From 3c7aa15d2073d81e56e8ba8771a4ab6f23be7ae2 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 10 Jun 2014 18:08:19 +0800 Subject: [PATCH 001/167] NFSD: Using min/max/min_t/max_t for calculate Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 6 +----- fs/nfsd/nfs3xdr.c | 30 ++++++++---------------------- fs/nfsd/nfs4proc.c | 12 ++++-------- fs/nfsd/nfs4xdr.c | 10 +++------- fs/nfsd/nfsctl.c | 9 +++------ fs/nfsd/nfssvc.c | 11 +++++------ fs/nfsd/nfsxdr.c | 14 ++++++-------- fs/nfsd/vfs.c | 3 +-- 8 files changed, 31 insertions(+), 64 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 401289913130..61ef42c7b0a6 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -157,11 +157,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * + 1 (xdr opaque byte count) = 26 */ - - resp->count = argp->count; - if (max_blocksize < resp->count) - resp->count = max_blocksize; - + resp->count = min(argp->count, max_blocksize); svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); fh_copy(&resp->fh, &argp->fh); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index e6c01e80325e..39c5eb3ad33a 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -120,10 +120,7 @@ decode_sattr3(__be32 *p, struct iattr *iap) iap->ia_valid |= ATTR_SIZE; p = xdr_decode_hyper(p, &newsize); - if (newsize <= NFS_OFFSET_MAX) - iap->ia_size = newsize; - else - iap->ia_size = NFS_OFFSET_MAX; + iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX); } if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ iap->ia_valid |= ATTR_ATIME; @@ -338,10 +335,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, return 0; p = xdr_decode_hyper(p, &args->offset); - len = args->count = ntohl(*p++); - - if (len > max_blocksize) - len = max_blocksize; + args->count = ntohl(*p++); + len = min(args->count, max_blocksize); /* set up the kvec */ v=0; @@ -349,7 +344,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct page *p = *(rqstp->rq_next_page++); rqstp->rq_vec[v].iov_base = page_address(p); - rqstp->rq_vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; + rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); len -= rqstp->rq_vec[v].iov_len; v++; } @@ -484,9 +479,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p, } /* now copy next page if there is one */ if (len && !avail && rqstp->rq_arg.page_len) { - avail = rqstp->rq_arg.page_len; - if (avail > PAGE_SIZE) - avail = PAGE_SIZE; + avail = min_t(unsigned int, rqstp->rq_arg.page_len, PAGE_SIZE); old = page_address(rqstp->rq_arg.pages[0]); } while (len && avail && *old) { @@ -571,10 +564,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, args->verf = p; p += 2; args->dircount = ~0; args->count = ntohl(*p++); - - if (args->count > PAGE_SIZE) - args->count = PAGE_SIZE; - + args->count = min_t(u32, args->count, PAGE_SIZE); args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); @@ -595,10 +585,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p, args->dircount = ntohl(*p++); args->count = ntohl(*p++); - len = (args->count > max_blocksize) ? max_blocksize : - args->count; - args->count = len; - + len = args->count = min(args->count, max_blocksize); while (len > 0) { struct page *p = *(rqstp->rq_next_page++); if (!args->buffer) @@ -913,8 +900,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen, */ /* truncate filename if too long */ - if (namlen > NFS3_MAXNAMLEN) - namlen = NFS3_MAXNAMLEN; + namlen = min(namlen, NFS3_MAXNAMLEN); slen = XDR_QUADLEN(namlen); elen = slen + NFS3_ENTRY_BAGGAGE diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 6851b003f2a4..baa3803f0811 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1529,21 +1529,17 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) u32 maxcount = 0, rlen = 0; maxcount = svc_max_payload(rqstp); - rlen = op->u.read.rd_length; - - if (rlen > maxcount) - rlen = maxcount; + rlen = min(op->u.read.rd_length, maxcount); return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32); } static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { - u32 maxcount = svc_max_payload(rqstp); - u32 rlen = op->u.readdir.rd_maxcount; + u32 maxcount = 0, rlen = 0; - if (rlen > maxcount) - rlen = maxcount; + maxcount = svc_max_payload(rqstp); + rlen = min(op->u.readdir.rd_maxcount, maxcount); return (op_encode_hdr_size + op_encode_verifier_maxsz + XDR_QUADLEN(rlen)) * sizeof(__be32); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 83baf2bfe9e9..30913c83ccb0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3134,9 +3134,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, len = maxcount; v = 0; - thislen = (void *)xdr->end - (void *)xdr->p; - if (len < thislen) - thislen = len; + thislen = min(len, ((void *)xdr->end - (void *)xdr->p)); p = xdr_reserve_space(xdr, (thislen+3)&~3); WARN_ON_ONCE(!p); resp->rqstp->rq_vec[v].iov_base = p; @@ -3203,10 +3201,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, xdr_commit_encode(xdr); maxcount = svc_max_payload(resp->rqstp); - if (maxcount > xdr->buf->buflen - xdr->buf->len) - maxcount = xdr->buf->buflen - xdr->buf->len; - if (maxcount > read->rd_length) - maxcount = read->rd_length; + maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len)); + maxcount = min_t(unsigned long, maxcount, read->rd_length); if (!read->rd_filp) { err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp, diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 51844048937f..6a6f65cc8b34 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -369,8 +369,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (maxsize < NFS_FHSIZE) return -EINVAL; - if (maxsize > NFS3_FHSIZE) - maxsize = NFS3_FHSIZE; + maxsize = min(maxsize, NFS3_FHSIZE); if (qword_get(&mesg, mesg, size)>0) return -EINVAL; @@ -871,10 +870,8 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) /* force bsize into allowed range and * required alignment. */ - if (bsize < 1024) - bsize = 1024; - if (bsize > NFSSVC_MAXBLKSIZE) - bsize = NFSSVC_MAXBLKSIZE; + bsize = max_t(int, bsize, 1024); + bsize = min_t(int, bsize, NFSSVC_MAXBLKSIZE); bsize &= ~(1024-1); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv) { diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 1879e43f2868..209474174fe4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -469,8 +469,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) /* enforce a global maximum number of threads */ tot = 0; for (i = 0; i < n; i++) { - if (nthreads[i] > NFSD_MAXSERVS) - nthreads[i] = NFSD_MAXSERVS; + nthreads[i] = min(nthreads[i], NFSD_MAXSERVS); tot += nthreads[i]; } if (tot > NFSD_MAXSERVS) { @@ -519,11 +518,11 @@ nfsd_svc(int nrservs, struct net *net) mutex_lock(&nfsd_mutex); dprintk("nfsd: creating service\n"); - if (nrservs <= 0) - nrservs = 0; - if (nrservs > NFSD_MAXSERVS) - nrservs = NFSD_MAXSERVS; + + nrservs = max(nrservs, 0); + nrservs = min(nrservs, NFSD_MAXSERVS); error = 0; + if (nrservs == 0 && nn->nfsd_serv == NULL) goto out; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c index 1ac306b769df..412d7061f9e5 100644 --- a/fs/nfsd/nfsxdr.c +++ b/fs/nfsd/nfsxdr.c @@ -257,8 +257,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, len = args->count = ntohl(*p++); p++; /* totalcount - unused */ - if (len > NFSSVC_MAXBLKSIZE_V2) - len = NFSSVC_MAXBLKSIZE_V2; + len = min_t(unsigned int, len, NFSSVC_MAXBLKSIZE_V2); /* set up somewhere to store response. * We take pages, put them on reslist and include in iovec @@ -268,7 +267,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, struct page *p = *(rqstp->rq_next_page++); rqstp->rq_vec[v].iov_base = page_address(p); - rqstp->rq_vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; + rqstp->rq_vec[v].iov_len = min_t(unsigned int, len, PAGE_SIZE); len -= rqstp->rq_vec[v].iov_len; v++; } @@ -400,9 +399,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p, return 0; args->cookie = ntohl(*p++); args->count = ntohl(*p++); - if (args->count > PAGE_SIZE) - args->count = PAGE_SIZE; - + args->count = min_t(u32, args->count, PAGE_SIZE); args->buffer = page_address(*(rqstp->rq_next_page++)); return xdr_argsize_check(rqstp, p); @@ -516,10 +513,11 @@ nfssvc_encode_entry(void *ccdv, const char *name, } if (cd->offset) *cd->offset = htonl(offset); - if (namlen > NFS2_MAXNAMLEN) - namlen = NFS2_MAXNAMLEN;/* truncate filename */ + /* truncate filename */ + namlen = min(namlen, NFS2_MAXNAMLEN); slen = XDR_QUADLEN(namlen); + if ((buflen = cd->buflen - slen - 4) < 0) { cd->common.err = nfserr_toosmall; return -EINVAL; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 140c496f612c..7498099b382f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2093,8 +2093,7 @@ nfsd_racache_init(int cache_size) if (raparm_hash[0].pb_head) return 0; nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); - if (nperbucket < 2) - nperbucket = 2; + nperbucket = max(2, nperbucket); cache_size = nperbucket * RAPARM_HASH_SIZE; dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); From f15a5cf912f05b572d1f9f3772fba019643f4837 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 10 Jun 2014 18:29:39 +0800 Subject: [PATCH 002/167] SUNRPC/NFSD: Change to type of bool for rq_usedeferral and rq_splice_ok rq_usedeferral and rq_splice_ok are used as 0 and 1, just defined to bool. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 ++-- include/linux/sunrpc/svc.h | 4 ++-- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- net/sunrpc/svc.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index baa3803f0811..be6734060d2a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1298,7 +1298,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - rqstp->rq_usedeferral = 0; + rqstp->rq_usedeferral = false; /* * According to RFC3010, this takes precedence over all other errors. @@ -1417,7 +1417,7 @@ encode_op: BUG_ON(cstate->replay_owner); out: /* Reset deferral mechanism for RPC deferrals */ - rqstp->rq_usedeferral = 1; + rqstp->rq_usedeferral = true; dprintk("nfsv4 compound returned %d\n", ntohl(status)); return status; } diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 1bc7cd05b22e..cf61ecd148e0 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -236,7 +236,7 @@ struct svc_rqst { struct svc_cred rq_cred; /* auth info */ void * rq_xprt_ctxt; /* transport specific context ptr */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ - int rq_usedeferral; /* use deferral */ + bool rq_usedeferral; /* use deferral */ size_t rq_xprt_hlen; /* xprt header len */ struct xdr_buf rq_arg; @@ -277,7 +277,7 @@ struct svc_rqst { struct auth_domain * rq_gssclient; /* "gss/"-style peer info */ int rq_cachetype; struct svc_cacherep * rq_cacherep; /* cache info */ - int rq_splice_ok; /* turned off in gss privacy + bool rq_splice_ok; /* turned off in gss privacy * to prevent encrypting page * cache pages */ wait_queue_head_t rq_wait; /* synchronization */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 4ce5eccec1f6..c548ab213f76 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -886,7 +886,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs u32 priv_len, maj_stat; int pad, saved_len, remaining_len, offset; - rqstp->rq_splice_ok = 0; + rqstp->rq_splice_ok = false; priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5de6801cd924..1db5007ddbce 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1086,9 +1086,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_short_len; /* Will be turned off only in gss privacy case: */ - rqstp->rq_splice_ok = 1; + rqstp->rq_splice_ok = true; /* Will be turned off only when NFSv4 Sessions are used */ - rqstp->rq_usedeferral = 1; + rqstp->rq_usedeferral = true; rqstp->rq_dropme = false; /* Setup reply header */ From 0da22a919d6972f629407f79fc096f29d23a4942 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 10 Jun 2014 22:04:43 +0800 Subject: [PATCH 003/167] NFSD: Using path_get when assigning path for export Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 13b85f94d9e2..ef2d9d62ce2b 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -698,8 +698,8 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) kref_get(&item->ex_client->ref); new->ex_client = item->ex_client; - new->ex_path.dentry = dget(item->ex_path.dentry); - new->ex_path.mnt = mntget(item->ex_path.mnt); + new->ex_path = item->ex_path; + path_get(&item->ex_path); new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; From bf18f163e89c52e09c96534db45c4274273a0b34 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 10 Jun 2014 22:06:44 +0800 Subject: [PATCH 004/167] NFSD: Using exp_get for export getting Don't using cache_get besides export.h, using exp_get for export. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 2 +- fs/nfsd/export.h | 3 ++- fs/nfsd/nfs4proc.c | 6 +++--- fs/nfsd/nfsfh.c | 3 +-- fs/nfsd/vfs.c | 3 +-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index ef2d9d62ce2b..72ffd7cce3c3 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1253,7 +1253,7 @@ static int e_show(struct seq_file *m, void *p) return 0; } - cache_get(&exp->h); + exp_get(exp); if (cache_check(cd, &exp->h, NULL)) return 0; exp_put(exp); diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index cfeea85c5bed..04dc8c167b0c 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -101,9 +101,10 @@ static inline void exp_put(struct svc_export *exp) cache_put(&exp->h, exp->cd); } -static inline void exp_get(struct svc_export *exp) +static inline struct svc_export *exp_get(struct svc_export *exp) { cache_get(&exp->h); + return exp; } struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index be6734060d2a..f3f048724ac7 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -177,7 +177,7 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src) fh_put(dst); dget(src->fh_dentry); if (src->fh_export) - cache_get(&src->fh_export->h); + exp_get(src->fh_export); *dst = *src; } @@ -918,8 +918,8 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat default: return nfserr_inval; } - exp_get(cstate->current_fh.fh_export); - sin->sin_exp = cstate->current_fh.fh_export; + + sin->sin_exp = exp_get(cstate->current_fh.fh_export); fh_put(&cstate->current_fh); return nfs_ok; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ec8393418154..6f5cc76a6c6e 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -539,8 +539,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, dentry); fhp->fh_dentry = dget(dentry); /* our internal copy */ - fhp->fh_export = exp; - cache_get(&exp->h); + fhp->fh_export = exp_get(exp); if (fhp->fh_handle.fh_version == 0xca) { /* old style filehandle please */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7498099b382f..df7cf61f2cd3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -189,8 +189,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); dparent = fhp->fh_dentry; - exp = fhp->fh_export; - exp_get(exp); + exp = exp_get(fhp->fh_export); /* Lookup the name, but don't follow links */ if (isdotent(name, len)) { From f419992c1f792f2ce501585853ffc71b8f78caa1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 17 Jun 2014 07:44:11 -0400 Subject: [PATCH 005/167] nfsd: add __force to opaque verifier field casts sparse complains that we're stuffing non-byte-swapped values into __be32's here. Since they're supposed to be opaque, it doesn't matter much. Just add __force to make sparse happy. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 8 ++++++-- fs/nfsd/nfs4state.c | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f3f048724ac7..a6be9d3ee1f0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -581,8 +581,12 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net) __be32 verf[2]; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - verf[0] = (__be32)nn->nfssvc_boot.tv_sec; - verf[1] = (__be32)nn->nfssvc_boot.tv_usec; + /* + * This is opaque to client, so no need to byte-swap. Use + * __force to keep sparse happy + */ + verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec; + verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec; memcpy(verifier->data, verf, sizeof(verifier->data)); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2204e1fe5725..8242385a249c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1421,8 +1421,12 @@ static void gen_confirm(struct nfs4_client *clp) __be32 verf[2]; static u32 i; - verf[0] = (__be32)get_seconds(); - verf[1] = (__be32)i++; + /* + * This is opaque to client, so no need to byte-swap. Use + * __force to keep sparse happy + */ + verf[0] = (__force __be32)get_seconds(); + verf[1] = (__force __be32)i++; memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } From b3d8d1284a8275f7e761df5fc5f80c464ecd23dd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 17 Jun 2014 07:44:12 -0400 Subject: [PATCH 006/167] nfsd: clean up sparse endianness warnings in nfscache.c We currently hash the XID to determine a hash bucket to use for the reply cache entry, which is fed into hash_32 without byte-swapping it. Add __force to make sparse happy, and add some comments to explain why. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 6040da8830ff..ff9567633245 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -221,7 +221,12 @@ static void hash_refile(struct svc_cacherep *rp) { hlist_del_init(&rp->c_hash); - hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits)); + /* + * No point in byte swapping c_xid since we're just using it to pick + * a hash bucket. + */ + hlist_add_head(&rp->c_hash, cache_hash + + hash_32((__force u32)rp->c_xid, maskbits)); } /* @@ -356,7 +361,11 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) struct hlist_head *rh; unsigned int entries = 0; - rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)]; + /* + * No point in byte swapping rq_xid since we're just using it to pick + * a hash bucket. + */ + rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)]; hlist_for_each_entry(rp, rh, c_hash) { ++entries; if (nfsd_cache_match(rqstp, csum, rp)) { From e2afc81919400505481a985fb389475707195c3c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 17 Jun 2014 07:44:13 -0400 Subject: [PATCH 007/167] nfsd: nfsd_splice_read and nfsd_readv should return __be32 The callers expect a __be32 return and the functions they call return __be32, so having these return int is just wrong. Also, nfsd_finish_read can be made static. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 7 ++++--- fs/nfsd/vfs.h | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index df7cf61f2cd3..6ffaa70300ed 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -819,7 +819,8 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe, return __splice_from_pipe(pipe, sd, nfsd_splice_actor); } -__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) +static __be32 +nfsd_finish_read(struct file *file, unsigned long *count, int host_err) { if (host_err >= 0) { nfsdstats.io_read += host_err; @@ -830,7 +831,7 @@ __be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err) return nfserrno(host_err); } -int nfsd_splice_read(struct svc_rqst *rqstp, +__be32 nfsd_splice_read(struct svc_rqst *rqstp, struct file *file, loff_t offset, unsigned long *count) { struct splice_desc sd = { @@ -846,7 +847,7 @@ int nfsd_splice_read(struct svc_rqst *rqstp, return nfsd_finish_read(file, count, host_err); } -int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, +__be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { mm_segment_t oldfs; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 91b6ae3f658b..b84aef50f55d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -74,9 +74,9 @@ struct raparms; __be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *, struct file **, struct raparms **); void nfsd_put_tmp_read_open(struct file *, struct raparms *); -int nfsd_splice_read(struct svc_rqst *, +__be32 nfsd_splice_read(struct svc_rqst *, struct file *, loff_t, unsigned long *); -int nfsd_readv(struct file *, loff_t, struct kvec *, int, +__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int, unsigned long *); __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, loff_t, struct kvec *, int, unsigned long *); From 94ec938b612eb877bb6622847972dd739ef738b8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 17 Jun 2014 07:44:14 -0400 Subject: [PATCH 008/167] nfsd: add appropriate __force directives to filehandle generation code The filehandle structs all use host-endian values, but will sometimes stuff big-endian values into those fields. This is OK since these values are opaque to the client, but it confuses sparse. Add __force to make it clear that we are doing this intentionally. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsfh.c | 9 ++++++++- fs/nfsd/nfsfh.h | 15 +++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 6f5cc76a6c6e..e883a5868be6 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -162,7 +162,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) /* deprecated, convert to type 3 */ len = key_len(FSID_ENCODE_DEV)/4; fh->fh_fsid_type = FSID_ENCODE_DEV; - fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); + /* + * struct knfsd_fh uses host-endian fields, which are + * sometimes used to hold net-endian values. This + * confuses sparse, so we must use __force here to + * keep it from complaining. + */ + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl((__force __be32)fh->fh_fsid[0]), + ntohl((__force __be32)fh->fh_fsid[1]))); fh->fh_fsid[1] = fh->fh_fsid[2]; } data_left -= len; diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index 2e89e70ac15c..08236d70c667 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h @@ -73,8 +73,15 @@ enum fsid_source { extern enum fsid_source fsid_source(struct svc_fh *fhp); -/* This might look a little large to "inline" but in all calls except +/* + * This might look a little large to "inline" but in all calls except * one, 'vers' is constant so moste of the function disappears. + * + * In some cases the values are considered to be host endian and in + * others, net endian. fsidv is always considered to be u32 as the + * callers don't know which it will be. So we must use __force to keep + * sparse from complaining. Since these values are opaque to the + * client, that shouldn't be a problem. */ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, u32 fsid, unsigned char *uuid) @@ -82,7 +89,7 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, u32 *up; switch(vers) { case FSID_DEV: - fsidv[0] = htonl((MAJOR(dev)<<16) | + fsidv[0] = (__force __u32)htonl((MAJOR(dev)<<16) | MINOR(dev)); fsidv[1] = ino_t_to_u32(ino); break; @@ -90,8 +97,8 @@ static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino, fsidv[0] = fsid; break; case FSID_MAJOR_MINOR: - fsidv[0] = htonl(MAJOR(dev)); - fsidv[1] = htonl(MINOR(dev)); + fsidv[0] = (__force __u32)htonl(MAJOR(dev)); + fsidv[1] = (__force __u32)htonl(MINOR(dev)); fsidv[2] = ino_t_to_u32(ino); break; From f7ce5d284253db9760fc1c3a96b66ec2d9abf0ab Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 17 Jun 2014 06:14:08 -0400 Subject: [PATCH 009/167] nfsd: fix return of nfs4_acl_write_who AFAICT, the only way to hit this error is to pass this function a bogus "who" value. In that case, we probably don't want to return -1 as that could get sent back to the client. Turn this into nfserr_serverfault, which is a more appropriate error for a server bug like this. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4acl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index d714156a19fd..b0cf00d3ee7d 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -935,5 +935,5 @@ __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who) return 0; } WARN_ON_ONCE(1); - return -1; + return nfserr_serverfault; } From d4c8e34fe8beeb7877ce7f8d2da6affd7231b2cb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 18 Jun 2014 15:00:19 -0400 Subject: [PATCH 010/167] nfsd: properly handle embedded newlines in fault_injection input Currently rpc_pton() fails to handle the case where you echo an address into the file, as it barfs on the newline. Ensure that we NULL out the first occurrence of any newline. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 2ed05c3cd43d..f1333fc35b33 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -115,11 +115,19 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf, struct net *net = current->nsproxy->net_ns; struct sockaddr_storage sa; u64 val; + char *nl; if (copy_from_user(write_buf, buf, size)) return -EFAULT; write_buf[size] = '\0'; + /* Deal with any embedded newlines in the string */ + nl = strchr(write_buf, '\n'); + if (nl) { + size = nl - write_buf; + *nl = '\0'; + } + size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); if (size > 0) nfsd_inject_set_client(file_inode(file)->i_private, &sa, size); From b829e9197ad3d8b86dbd5dc1d9bbc5508d214cec Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 19 Jun 2014 16:44:48 -0400 Subject: [PATCH 011/167] nfsd: fix rare symlink decoding bug An NFS operation that creates a new symlink includes the symlink data, which is xdr-encoded as a length followed by the data plus 0 to 3 bytes of zero-padding as required to reach a 4-byte boundary. The vfs, on the other hand, wants null-terminated data. The simple way to handle this would be by copying the data into a newly allocated buffer with space for the final null. The current nfsd_symlink code tries to be more clever by skipping that step in the (likely) case where the byte following the string is already 0. But that assumes that the byte following the string is ours to look at. In fact, it might be the first byte of a page that we can't read, or of some object that another task might modify. Worse, the NFSv4 code tries to fix the problem by actually writing to that byte. In the NFSv2/v3 cases this actually appears to be safe: - nfs3svc_decode_symlinkargs explicitly null-terminates the data (after first checking its length and copying it to a new page). - NFSv2 limits symlinks to 1k. The buffer holding the rpc request is always at least a page, and the link data (and previous fields) have maximum lengths that prevent the request from reaching the end of a page. In the NFSv4 case the CREATE op is potentially just one part of a long compound so can end up on the end of a page if you're unlucky. The minimal fix here is to copy and null-terminate in the NFSv4 case. The nfsd_symlink() interface here seems too fragile, though. It should really either do the copy itself every time or just require a null-terminated string. Reported-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 9 --------- fs/nfsd/nfs4xdr.c | 13 ++++++++++++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index a6be9d3ee1f0..2b3795a135e8 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -621,15 +621,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, switch (create->cr_type) { case NF4LNK: - /* ugh! we have to null-terminate the linktext, or - * vfs_symlink() will choke. it is always safe to - * null-terminate by brute force, since at worst we - * will overwrite the first byte of the create namelen - * in the XDR buffer, which has already been extracted - * during XDR decode. - */ - create->cr_linkname[create->cr_linklen] = 0; - status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, create->cr_linkname, create->cr_linklen, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 30913c83ccb0..a1c48b4111d2 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -600,7 +600,18 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create READ_BUF(4); create->cr_linklen = be32_to_cpup(p++); READ_BUF(create->cr_linklen); - SAVEMEM(create->cr_linkname, create->cr_linklen); + /* + * The VFS will want a null-terminated string, and + * null-terminating in place isn't safe since this might + * end on a page boundary: + */ + create->cr_linkname = + kmalloc(create->cr_linklen + 1, GFP_KERNEL); + if (!create->cr_linkname) + return nfserr_jukebox; + memcpy(create->cr_linkname, p, create->cr_linklen); + create->cr_linkname[create->cr_linklen] = '\0'; + defer_free(argp, kfree, create->cr_linkname); break; case NF4BLK: case NF4CHR: From 0aeae33f5d5fbd4af775e7c84795db9254d4a165 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 20 Jun 2014 11:49:49 -0400 Subject: [PATCH 012/167] nfsd: make NFSv2 null terminate symlink data It's simple enough for NFSv2 to null-terminate the symlink data. A bit weird (it depends on knowing that we've already read the following byte, which is either padding or part of the mode), but no worse than the conditional kstrdup it otherwise relies on in nfsd_symlink(). Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsproc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 54c6b3d3cc79..aebe23c45cbe 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -403,8 +403,11 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp, fh_init(&newfh, NFS_FHSIZE); /* - * Create the link, look up new file and set attrs. + * Crazy hack: the request fits in a page, and already-decoded + * attributes follow argp->tname, so it's safe to just write a + * null to ensure it's null-terminated: */ + argp->tname[argp->tlen] = '\0'; nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, argp->tname, argp->tlen, &newfh, &argp->attrs); From 52ee04330f585d1b5bc40442f07df07248fa3aee Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 20 Jun 2014 11:52:21 -0400 Subject: [PATCH 013/167] nfsd: let nfsd_symlink assume null-terminated data Currently nfsd_symlink has a weird hack to serve callers who don't null-terminate symlink data: it looks ahead at the next byte to see if it's zero, and copies it to a new buffer to null-terminate if not. That means callers don't have to null-terminate, but they *do* have to ensure that the byte following the end of the data is theirs to read. That's a bit subtle, and the NFSv4 code actually got this wrong. So let's just throw out that code and let callers pass null-terminated strings; we've already fixed them to do that. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 2 +- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfsproc.c | 2 +- fs/nfsd/vfs.c | 17 +++-------------- fs/nfsd/vfs.h | 2 +- 5 files changed, 7 insertions(+), 18 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 61ef42c7b0a6..19ba233cf006 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -282,7 +282,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp, fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, - argp->tname, argp->tlen, + argp->tname, &resp->fh, &argp->attrs); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2b3795a135e8..7aa83bf34fa9 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -623,7 +623,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_linkname, create->cr_linklen, + create->cr_linkname, &resfh, &create->cr_iattr); break; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index aebe23c45cbe..583ed03877e4 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -409,7 +409,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp, */ argp->tname[argp->tlen] = '\0'; nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, - argp->tname, argp->tlen, + argp->tname, &newfh, &argp->attrs); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6ffaa70300ed..7518c65f9a5a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1504,7 +1504,7 @@ out_nfserr: __be32 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, - char *path, int plen, + char *path, struct svc_fh *resfhp, struct iattr *iap) { @@ -1513,7 +1513,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int host_err; err = nfserr_noent; - if (!flen || !plen) + if (!flen || path[0] == '\0') goto out; err = nfserr_exist; if (isdotent(fname, flen)) @@ -1534,18 +1534,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (IS_ERR(dnew)) goto out_nfserr; - if (unlikely(path[plen] != 0)) { - char *path_alloced = kmalloc(plen+1, GFP_KERNEL); - if (path_alloced == NULL) - host_err = -ENOMEM; - else { - strncpy(path_alloced, path, plen); - path_alloced[plen] = 0; - host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced); - kfree(path_alloced); - } - } else - host_err = vfs_symlink(dentry->d_inode, dnew, path); + host_err = vfs_symlink(dentry->d_inode, dnew, path); err = nfserrno(host_err); if (!err) err = nfserrno(commit_metadata(fhp)); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index b84aef50f55d..20e4b6679e46 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -85,7 +85,7 @@ __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, - char *name, int len, char *path, int plen, + char *name, int len, char *path, struct svc_fh *res, struct iattr *); __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *); From 7fb84306f55d6cc32ea894d47cbb2faa18c8f45b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 24 Jun 2014 15:06:41 -0400 Subject: [PATCH 014/167] nfsd4: rename cr_linkname->cr_data The name of a link is currently stored in cr_name and cr_namelen, and the content in cr_linkname and cr_linklen. That's confusing. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfs4xdr.c | 15 +++++++-------- fs/nfsd/xdr4.h | 8 ++++---- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 7aa83bf34fa9..b57c8826ce08 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -623,7 +623,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_linkname, + create->cr_data, &resfh, &create->cr_iattr); break; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index a1c48b4111d2..3d0749633d2b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -598,20 +598,19 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create switch (create->cr_type) { case NF4LNK: READ_BUF(4); - create->cr_linklen = be32_to_cpup(p++); - READ_BUF(create->cr_linklen); + create->cr_datalen = be32_to_cpup(p++); + READ_BUF(create->cr_datalen); /* * The VFS will want a null-terminated string, and * null-terminating in place isn't safe since this might * end on a page boundary: */ - create->cr_linkname = - kmalloc(create->cr_linklen + 1, GFP_KERNEL); - if (!create->cr_linkname) + create->cr_data = kmalloc(create->cr_datalen + 1, GFP_KERNEL); + if (!create->cr_data) return nfserr_jukebox; - memcpy(create->cr_linkname, p, create->cr_linklen); - create->cr_linkname[create->cr_linklen] = '\0'; - defer_free(argp, kfree, create->cr_linkname); + memcpy(create->cr_data, p, create->cr_datalen); + create->cr_data[create->cr_datalen] = '\0'; + defer_free(argp, kfree, create->cr_data); break; case NF4BLK: case NF4CHR: diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 18cbb6d9c8a9..b8bf63a21e3b 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -107,8 +107,8 @@ struct nfsd4_create { u32 cr_type; /* request */ union { /* request */ struct { - u32 namelen; - char *name; + u32 datalen; + char *data; } link; /* NF4LNK */ struct { u32 specdata1; @@ -121,8 +121,8 @@ struct nfsd4_create { struct nfs4_acl *cr_acl; struct xdr_netobj cr_label; }; -#define cr_linklen u.link.namelen -#define cr_linkname u.link.name +#define cr_datalen u.link.datalen +#define cr_data u.link.data #define cr_specdata1 u.dev.specdata1 #define cr_specdata2 u.dev.specdata2 From ce043ac826f3ad224142f84d860316a5fd05f79c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 24 Jun 2014 16:51:12 -0400 Subject: [PATCH 015/167] nfsd4: remove unused defer_free argument 28e05dd8457c "knfsd: nfsd4: represent nfsv4 acl with array instead of linked list" removed the last user that wanted a custom free function. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 21 +++++++++------------ fs/nfsd/xdr4.h | 1 - 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 3d0749633d2b..13f91cec25c3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -182,16 +182,14 @@ static int zero_clientid(clientid_t *clid) /** * defer_free - mark an allocation as deferred freed - * @argp: NFSv4 compound argument structure to be freed with - * @release: release callback to free @p, typically kfree() - * @p: pointer to be freed + * @argp: NFSv4 compound argument structure + * @p: pointer to be freed (with kfree()) * * Marks @p to be freed when processing the compound operation * described in @argp finishes. */ static int -defer_free(struct nfsd4_compoundargs *argp, - void (*release)(const void *), void *p) +defer_free(struct nfsd4_compoundargs *argp, void *p) { struct tmpbuf *tb; @@ -199,7 +197,6 @@ defer_free(struct nfsd4_compoundargs *argp, if (!tb) return -ENOMEM; tb->buf = p; - tb->release = release; tb->next = argp->to_free; argp->to_free = tb; return 0; @@ -225,7 +222,7 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) BUG_ON(p != argp->tmpp); argp->tmpp = NULL; } - if (defer_free(argp, kfree, p)) { + if (defer_free(argp, p)) { kfree(p); return NULL; } else @@ -296,7 +293,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (*acl == NULL) return nfserr_jukebox; - defer_free(argp, kfree, *acl); + defer_free(argp, *acl); (*acl)->naces = nace; for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { @@ -422,7 +419,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (!label->data) return nfserr_jukebox; label->len = dummy32; - defer_free(argp, kfree, label->data); + defer_free(argp, label->data); memcpy(label->data, buf, dummy32); } #endif @@ -610,7 +607,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create return nfserr_jukebox; memcpy(create->cr_data, p, create->cr_datalen); create->cr_data[create->cr_datalen] = '\0'; - defer_free(argp, kfree, create->cr_data); + defer_free(argp, create->cr_data); break; case NF4BLK: case NF4CHR: @@ -1486,7 +1483,7 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta goto out; } - defer_free(argp, kfree, stateid); + defer_free(argp, stateid); INIT_LIST_HEAD(&stateid->ts_id_list); list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); @@ -3972,7 +3969,7 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp) while (args->to_free) { struct tmpbuf *tb = args->to_free; args->to_free = tb->next; - tb->release(tb->buf); + kfree(tb->buf); kfree(tb); } return 1; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index b8bf63a21e3b..4379cc871607 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -488,7 +488,6 @@ struct nfsd4_compoundargs { __be32 * tmpp; struct tmpbuf { struct tmpbuf *next; - void (*release)(const void *); void *buf; } *to_free; From 29c353b3fe54789706c0a37560ce4548a6362c2c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 24 Jun 2014 17:06:51 -0400 Subject: [PATCH 016/167] nfsd4: define svcxdr_dupstr to share some common code Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 13f91cec25c3..8fb0f3718202 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -202,6 +202,26 @@ defer_free(struct nfsd4_compoundargs *argp, void *p) return 0; } +/* + * For xdr strings that need to be passed to other kernel api's + * as null-terminated strings. + * + * Note null-terminating in place usually isn't safe since the + * buffer might end on a page boundary. + */ +static char * +svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) +{ + char *p = kmalloc(len + 1, GFP_KERNEL); + + if (!p) + return NULL; + memcpy(p, buf, len); + p[len] = '\0'; + defer_free(argp, p); + return p; +} + /** * savemem - duplicate a chunk of memory for later processing * @argp: NFSv4 compound argument structure to be freed with @@ -415,12 +435,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, return nfserr_badlabel; len += (XDR_QUADLEN(dummy32) << 2); READMEM(buf, dummy32); - label->data = kzalloc(dummy32 + 1, GFP_KERNEL); + label->len = dummy32; + label->data = svcxdr_dupstr(argp, buf, dummy32); if (!label->data) return nfserr_jukebox; - label->len = dummy32; - defer_free(argp, label->data); - memcpy(label->data, buf, dummy32); } #endif @@ -597,17 +615,9 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create READ_BUF(4); create->cr_datalen = be32_to_cpup(p++); READ_BUF(create->cr_datalen); - /* - * The VFS will want a null-terminated string, and - * null-terminating in place isn't safe since this might - * end on a page boundary: - */ - create->cr_data = kmalloc(create->cr_datalen + 1, GFP_KERNEL); + create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen); if (!create->cr_data) return nfserr_jukebox; - memcpy(create->cr_data, p, create->cr_datalen); - create->cr_data[create->cr_datalen] = '\0'; - defer_free(argp, create->cr_data); break; case NF4BLK: case NF4CHR: From bcaab953b1d3790c724a211f2452b574fd49a7ce Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 24 Jun 2014 17:51:21 -0400 Subject: [PATCH 017/167] nfsd4: remove nfs4_acl_new This is a not-that-useful kmalloc wrapper. And I'd like one of the callers to actually use something other than kmalloc. Signed-off-by: J. Bruce Fields --- fs/nfsd/acl.h | 2 +- fs/nfsd/nfs4acl.c | 18 ++++++++---------- fs/nfsd/nfs4xdr.c | 2 +- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h index a986ceb6fd0d..4cd7c69a6cb9 100644 --- a/fs/nfsd/acl.h +++ b/fs/nfsd/acl.h @@ -47,7 +47,7 @@ struct svc_rqst; #define NFS4_ACL_MAX ((PAGE_SIZE - sizeof(struct nfs4_acl)) \ / sizeof(struct nfs4_ace)) -struct nfs4_acl *nfs4_acl_new(int); +int nfs4_acl_bytes(int entries); int nfs4_acl_get_whotype(char *, u32); __be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who); diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index b0cf00d3ee7d..acf6974e6823 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -161,11 +161,12 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, size += 2 * dpacl->a_count; } - *acl = nfs4_acl_new(size); + *acl = kmalloc(nfs4_acl_bytes(size), GFP_KERNEL); if (*acl == NULL) { error = -ENOMEM; goto out; } + (*acl)->naces = 0; _posix_to_nfsv4_one(pacl, *acl, flags & ~NFS4_ACL_TYPE_DEFAULT); @@ -872,16 +873,13 @@ ace2type(struct nfs4_ace *ace) return -1; } -struct nfs4_acl * -nfs4_acl_new(int n) +/* + * return the size of the struct nfs4_acl required to represent an acl + * with @entries entries. + */ +int nfs4_acl_bytes(int entries) { - struct nfs4_acl *acl; - - acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL); - if (acl == NULL) - return NULL; - acl->naces = 0; - return acl; + return sizeof(struct nfs4_acl) + entries * sizeof(struct nfs4_ace); } static struct { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8fb0f3718202..fea41046427c 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -309,7 +309,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (nace > NFS4_ACL_MAX) return nfserr_fbig; - *acl = nfs4_acl_new(nace); + *acl = kmalloc(nfs4_acl_bytes(nace), GFP_KERNEL); if (*acl == NULL) return nfserr_jukebox; From d5e2338324102dcf34aa25aeaf96064cc4d94dce Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 24 Jun 2014 17:43:45 -0400 Subject: [PATCH 018/167] nfsd4: replace defer_free by svcxdr_tmpalloc Avoid an extra allocation for the tmpbuf struct itself, and stop ignoring some allocation failures. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 46 +++++++++++++++++----------------------------- fs/nfsd/xdr4.h | 13 +++++++++---- 2 files changed, 26 insertions(+), 33 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index fea41046427c..46115f2c3074 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -181,25 +181,24 @@ static int zero_clientid(clientid_t *clid) } /** - * defer_free - mark an allocation as deferred freed + * svcxdr_tmpalloc - allocate memory to be freed after compound processing * @argp: NFSv4 compound argument structure * @p: pointer to be freed (with kfree()) * * Marks @p to be freed when processing the compound operation * described in @argp finishes. */ -static int -defer_free(struct nfsd4_compoundargs *argp, void *p) +static void * +svcxdr_tmpalloc(struct nfsd4_compoundargs *argp, u32 len) { - struct tmpbuf *tb; + struct svcxdr_tmpbuf *tb; - tb = kmalloc(sizeof(*tb), GFP_KERNEL); + tb = kmalloc(sizeof(*tb) + len, GFP_KERNEL); if (!tb) - return -ENOMEM; - tb->buf = p; + return NULL; tb->next = argp->to_free; argp->to_free = tb; - return 0; + return tb->buf; } /* @@ -212,13 +211,12 @@ defer_free(struct nfsd4_compoundargs *argp, void *p) static char * svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) { - char *p = kmalloc(len + 1, GFP_KERNEL); + char *p = svcxdr_tmpalloc(argp, len + 1); if (!p) return NULL; memcpy(p, buf, len); p[len] = '\0'; - defer_free(argp, p); return p; } @@ -234,19 +232,13 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len) */ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes) { - if (p == argp->tmp) { - p = kmemdup(argp->tmp, nbytes, GFP_KERNEL); - if (!p) - return NULL; - } else { - BUG_ON(p != argp->tmpp); - argp->tmpp = NULL; - } - if (defer_free(argp, p)) { - kfree(p); + void *ret; + + ret = svcxdr_tmpalloc(argp, nbytes); + if (!ret) return NULL; - } else - return (char *)p; + memcpy(ret, p, nbytes); + return ret; } static __be32 @@ -309,12 +301,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, if (nace > NFS4_ACL_MAX) return nfserr_fbig; - *acl = kmalloc(nfs4_acl_bytes(nace), GFP_KERNEL); + *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace)); if (*acl == NULL) return nfserr_jukebox; - defer_free(argp, *acl); - (*acl)->naces = nace; for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) { READ_BUF(16); len += 16; @@ -1487,13 +1477,12 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta INIT_LIST_HEAD(&test_stateid->ts_stateid_list); for (i = 0; i < test_stateid->ts_num_ids; i++) { - stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL); + stateid = svcxdr_tmpalloc(argp, sizeof(*stateid)); if (!stateid) { status = nfserrno(-ENOMEM); goto out; } - defer_free(argp, stateid); INIT_LIST_HEAD(&stateid->ts_id_list); list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list); @@ -3977,9 +3966,8 @@ int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp) kfree(args->tmpp); args->tmpp = NULL; while (args->to_free) { - struct tmpbuf *tb = args->to_free; + struct svcxdr_tmpbuf *tb = args->to_free; args->to_free = tb->next; - kfree(tb->buf); kfree(tb); } return 1; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 4379cc871607..efce9010cad4 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -478,6 +478,14 @@ struct nfsd4_op { bool nfsd4_cache_this_op(struct nfsd4_op *); +/* + * Memory needed just for the duration of processing one compound: + */ +struct svcxdr_tmpbuf { + struct svcxdr_tmpbuf *next; + char buf[]; +}; + struct nfsd4_compoundargs { /* scratch variables for XDR decode */ __be32 * p; @@ -486,10 +494,7 @@ struct nfsd4_compoundargs { int pagelen; __be32 tmp[8]; __be32 * tmpp; - struct tmpbuf { - struct tmpbuf *next; - void *buf; - } *to_free; + struct svcxdr_tmpbuf *to_free; struct svc_rqst *rqstp; From 1055414fe19db2db6c8947c0b9ee9c8fe07beea1 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sun, 29 Jun 2014 19:18:17 +0800 Subject: [PATCH 019/167] NFSD: Avoid warning message when compile at i686 arch fs/nfsd/nfs4xdr.c: In function 'nfsd4_encode_readv': >> fs/nfsd/nfs4xdr.c:3137:148: warning: comparison of distinct pointer types lacks a cast [enabled by default] thislen = min(len, ((void *)xdr->end - (void *)xdr->p)); Reported-by: Fengguang Wu Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 46115f2c3074..9388a4316fa8 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3140,7 +3140,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp, len = maxcount; v = 0; - thislen = min(len, ((void *)xdr->end - (void *)xdr->p)); + thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p)); p = xdr_reserve_space(xdr, (thislen+3)&~3); WARN_ON_ONCE(!p); resp->rqstp->rq_vec[v].iov_base = p; From 7e6a72e5f1d42768a9949d73d3337277ff96e026 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 30 Jun 2014 11:48:30 -0400 Subject: [PATCH 020/167] nfsd: fix file access refcount leak when nfsd4_truncate fails nfsd4_process_open2 will currently will get access to the file, and then call nfsd4_truncate to (possibly) truncate it. If that operation fails though, then the access references will never be released as the nfs4_ol_stateid is never initialized. Fix by moving the nfsd4_truncate call into nfs4_get_vfs_file, ensuring that the refcounts are properly put if the truncate fails. Signed-off-by: Jeff Layton Signed-off-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 68 ++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8242385a249c..c473bd6d52c8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3046,24 +3046,6 @@ static inline int nfs4_access_to_access(u32 nfs4_access) return flags; } -static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, - struct svc_fh *cur_fh, struct nfsd4_open *open) -{ - __be32 status; - int oflag = nfs4_access_to_omode(open->op_share_access); - int access = nfs4_access_to_access(open->op_share_access); - - if (!fp->fi_fds[oflag]) { - status = nfsd_open(rqstp, cur_fh, S_IFREG, access, - &fp->fi_fds[oflag]); - if (status) - return status; - } - nfs4_file_get_access(fp, oflag); - - return nfs_ok; -} - static inline __be32 nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, struct nfsd4_open *open) @@ -3079,31 +3061,50 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); } +static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, + struct svc_fh *cur_fh, struct nfsd4_open *open) +{ + __be32 status; + int oflag = nfs4_access_to_omode(open->op_share_access); + int access = nfs4_access_to_access(open->op_share_access); + + if (!fp->fi_fds[oflag]) { + status = nfsd_open(rqstp, cur_fh, S_IFREG, access, + &fp->fi_fds[oflag]); + if (status) + goto out; + } + nfs4_file_get_access(fp, oflag); + + status = nfsd4_truncate(rqstp, cur_fh, open); + if (status) + goto out_put_access; + + return nfs_ok; + +out_put_access: + nfs4_file_put_access(fp, oflag); +out: + return status; +} + static __be32 nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { u32 op_share_access = open->op_share_access; - bool new_access; __be32 status; - new_access = !test_access(op_share_access, stp); - if (new_access) { + if (!test_access(op_share_access, stp)) status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); - if (status) - return status; - } - status = nfsd4_truncate(rqstp, cur_fh, open); - if (status) { - if (new_access) { - int oflag = nfs4_access_to_omode(op_share_access); - nfs4_file_put_access(fp, oflag); - } + else + status = nfsd4_truncate(rqstp, cur_fh, open); + + if (status) return status; - } + /* remember the open */ set_access(op_share_access, stp); set_deny(open->op_share_deny, stp); - return nfs_ok; } @@ -3352,9 +3353,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } else { status = nfs4_get_vfs_file(rqstp, fp, current_fh, open); - if (status) - goto out; - status = nfsd4_truncate(rqstp, current_fh, open); if (status) goto out; stp = open->op_stp; From 950e0118d06fae26e07b283b83e96124a2075a1d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:31 -0400 Subject: [PATCH 021/167] nfsd: Protect addition to the file_hashtbl Current code depends on the client_mutex to guarantee a single struct nfs4_file per inode in the file_hashtbl and make addition atomic with respect to lookup. Rely instead on the state_Lock, to make it easier to stop taking the client_mutex here later. To prevent an i_lock/state_lock inversion, change nfsd4_init_file to use ihold instead if igrab. That's also more efficient anyway as we definitely hold a reference to the inode at that point. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 49 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c473bd6d52c8..29788fd0da24 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2611,17 +2611,18 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) { unsigned int hashval = file_hashval(ino); + lockdep_assert_held(&state_lock); + atomic_set(&fp->fi_ref, 1); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); - fp->fi_inode = igrab(ino); + ihold(ino); + fp->fi_inode = ino; fp->fi_had_conflict = false; fp->fi_lease = NULL; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); - spin_lock(&state_lock); hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); - spin_unlock(&state_lock); } void @@ -2787,23 +2788,49 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, /* search file_hashtbl[] for file */ static struct nfs4_file * -find_file(struct inode *ino) +find_file_locked(struct inode *ino) { unsigned int hashval = file_hashval(ino); struct nfs4_file *fp; - spin_lock(&state_lock); + lockdep_assert_held(&state_lock); + hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { if (fp->fi_inode == ino) { get_nfs4_file(fp); - spin_unlock(&state_lock); return fp; } } - spin_unlock(&state_lock); return NULL; } +static struct nfs4_file * +find_file(struct inode *ino) +{ + struct nfs4_file *fp; + + spin_lock(&state_lock); + fp = find_file_locked(ino); + spin_unlock(&state_lock); + return fp; +} + +static struct nfs4_file * +find_or_add_file(struct inode *ino, struct nfs4_file *new) +{ + struct nfs4_file *fp; + + spin_lock(&state_lock); + fp = find_file_locked(ino); + if (fp == NULL) { + nfsd4_init_file(new, ino); + fp = new; + } + spin_unlock(&state_lock); + + return fp; +} + /* * Called to check deny when READ with all zero stateid or * WRITE with all zero or all one stateid @@ -3325,21 +3352,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_file(ino); - if (fp) { + fp = find_or_add_file(ino, open->op_file); + if (fp != open->op_file) { if ((status = nfs4_check_open(fp, open, &stp))) goto out; status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; } else { + open->op_file = NULL; status = nfserr_bad_stateid; if (nfsd4_is_deleg_cur(open)) goto out; status = nfserr_jukebox; - fp = open->op_file; - open->op_file = NULL; - nfsd4_init_file(fp, ino); } /* From 1e444f5bc0c468e244ee601b7acbd87f0b6ee7e2 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 1 Jul 2014 17:48:02 +0800 Subject: [PATCH 022/167] NFSD: Remove iattr parameter from nfsd_symlink() Commit db2e747b1499 (vfs: remove mode parameter from vfs_symlink()) have remove mode parameter from vfs_symlink. So that, iattr isn't needed by nfsd_symlink now, just remove it. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs3proc.c | 3 +-- fs/nfsd/nfs4proc.c | 3 +-- fs/nfsd/nfsproc.c | 4 +--- fs/nfsd/vfs.c | 3 +-- fs/nfsd/vfs.h | 2 +- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 19ba233cf006..fa2525b2e9d7 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -282,8 +282,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp, fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, - argp->tname, - &resp->fh, &argp->attrs); + argp->tname, &resp->fh); RETURN_STATUS(nfserr); } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b57c8826ce08..9425ffc48809 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -623,8 +623,7 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, case NF4LNK: status = nfsd_symlink(rqstp, &cstate->current_fh, create->cr_name, create->cr_namelen, - create->cr_data, - &resfh, &create->cr_iattr); + create->cr_data, &resfh); break; case NF4BLK: diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 583ed03877e4..eff49552cdc8 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -409,9 +409,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp, */ argp->tname[argp->tlen] = '\0'; nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, - argp->tname, - &newfh, &argp->attrs); - + argp->tname, &newfh); fh_put(&argp->ffh); fh_put(&newfh); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7518c65f9a5a..730f31964597 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1505,8 +1505,7 @@ __be32 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, char *path, - struct svc_fh *resfhp, - struct iattr *iap) + struct svc_fh *resfhp) { struct dentry *dentry, *dnew; __be32 err, cerr; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 20e4b6679e46..c2ff3f14e5f6 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -86,7 +86,7 @@ __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, char *name, int len, char *path, - struct svc_fh *res, struct iattr *); + struct svc_fh *res); __be32 nfsd_link(struct svc_rqst *, struct svc_fh *, char *, int, struct svc_fh *); __be32 nfsd_rename(struct svc_rqst *, From 0f3a24b43bf75adf67df188a85594a8f43b9ee93 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 1 Jul 2014 18:27:53 -0400 Subject: [PATCH 023/167] nfsd: Ensure that nfsd_create_setattr commits files to stable storage Since nfsd_create_setattr strips the mode from the struct iattr, it is quite possible that it will optimise away the call to nfsd_setattr altogether. If this is the case, then we never call commit_metadata() on the newly created file. Also ensure that both nfsd_setattr() and nfsd_create_setattr() fail when the call to commit_metadata fails. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 730f31964597..e1b792ada45b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -463,7 +463,7 @@ out_put_write_access: if (size_change) put_write_access(inode); if (!err) - commit_metadata(fhp); + err = commit_metadata(fhp); out: return err; } @@ -1121,7 +1121,8 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, iap->ia_valid &= ~(ATTR_UID|ATTR_GID); if (iap->ia_valid) return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); - return 0; + /* Callers expect file metadata to be committed here */ + return commit_metadata(resfhp); } /* HPUX client sometimes creates a file in mode 000, and sets size to 0. @@ -1253,9 +1254,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, resfhp, iap); /* - * nfsd_setattr already committed the child. Transactional filesystems - * had a chance to commit changes for both parent and child - * simultaneously making the following commit_metadata a noop. + * nfsd_create_setattr already committed the child. Transactional + * filesystems had a chance to commit changes for both parent and + * child * simultaneously making the following commit_metadata a + * noop. */ err2 = nfserrno(commit_metadata(fhp)); if (err2) @@ -1426,7 +1428,8 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = nfsd_create_setattr(rqstp, resfhp, iap); /* - * nfsd_setattr already committed the child (and possibly also the parent). + * nfsd_create_setattr already committed the child + * (and possibly also the parent). */ if (!err) err = nfserrno(commit_metadata(fhp)); From 5b8db00bae39e5ecd9bafb05478cca5b42564ab1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Jul 2014 16:11:22 -0400 Subject: [PATCH 024/167] nfsd: add a new /proc/fs/nfsd/max_connections file Currently, the maximum number of connections that nfsd will allow is based on the number of threads spawned. While this is fine for a default, there really isn't a clear relationship between the two. The number of threads corresponds to the number of concurrent requests that we want to allow the server to process at any given time. The connection limit corresponds to the maximum number of clients that we want to allow the server to handle. These are two entirely different quantities. Break the dependency on increasing threads in order to allow for more connections, by adding a new per-net parameter that can be set to a non-zero value. The default is still to base it on the number of threads, so there should be no behavior change for anyone who doesn't use it. Cc: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 6 ++++++ fs/nfsd/nfsctl.c | 42 ++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfssvc.c | 5 +++++ 3 files changed, 53 insertions(+) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index d32b3aa6600d..113e1aa9b0e8 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -102,6 +102,12 @@ struct nfsd_net { */ struct timeval nfssvc_boot; + /* + * Max number of connections this nfsd container will allow. Defaults + * to '0' which is means that it bases this on the number of threads. + */ + unsigned int max_connections; + struct svc_serv *nfsd_serv; }; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6a6f65cc8b34..4e042105fb6e 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -39,6 +39,7 @@ enum { NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, + NFSD_MaxConnections, NFSD_SupportedEnctypes, /* * The below MUST come last. Otherwise we leave a hole in nfsd_files[] @@ -62,6 +63,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size); static ssize_t write_versions(struct file *file, char *buf, size_t size); static ssize_t write_ports(struct file *file, char *buf, size_t size); static ssize_t write_maxblksize(struct file *file, char *buf, size_t size); +static ssize_t write_maxconn(struct file *file, char *buf, size_t size); #ifdef CONFIG_NFSD_V4 static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_gracetime(struct file *file, char *buf, size_t size); @@ -77,6 +79,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Versions] = write_versions, [NFSD_Ports] = write_ports, [NFSD_MaxBlkSize] = write_maxblksize, + [NFSD_MaxConnections] = write_maxconn, #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = write_leasetime, [NFSD_Gracetime] = write_gracetime, @@ -886,6 +889,44 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) nfsd_max_blksize); } +/** + * write_maxconn - Set or report the current max number of connections + * + * Input: + * buf: ignored + * size: zero + * OR + * + * Input: + * buf: C string containing an unsigned + * integer value representing the new + * number of max connections + * size: non-zero length of C string in @buf + * Output: + * On success: passed-in buffer filled with '\n'-terminated C string + * containing numeric value of max_connections setting + * for this net namespace; + * return code is the size in bytes of the string + * On error: return code is zero or a negative errno value + */ +static ssize_t write_maxconn(struct file *file, char *buf, size_t size) +{ + char *mesg = buf; + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + unsigned int maxconn = nn->max_connections; + + if (size > 0) { + int rv = get_uint(&mesg, &maxconn); + + if (rv) + return rv; + nn->max_connections = maxconn; + } + + return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%u\n", maxconn); +} + #ifdef CONFIG_NFSD_V4 static ssize_t __nfsd4_write_time(struct file *file, char *buf, size_t size, time_t *time, struct nfsd_net *nn) @@ -1061,6 +1102,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, + [NFSD_MaxConnections] = {"max_connections", &transaction_ops, S_IWUSR|S_IRUGO}, #if defined(CONFIG_SUNRPC_GSS) || defined(CONFIG_SUNRPC_GSS_MODULE) [NFSD_SupportedEnctypes] = {"supported_krb5_enctypes", &supported_enctypes_ops, S_IRUGO}, #endif /* CONFIG_SUNRPC_GSS or CONFIG_SUNRPC_GSS_MODULE */ diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 209474174fe4..5d026dca00ca 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -405,6 +405,7 @@ int nfsd_create_serv(struct net *net) if (nn->nfsd_serv == NULL) return -ENOMEM; + nn->nfsd_serv->sv_maxconn = nn->max_connections; error = svc_bind(nn->nfsd_serv, net); if (error < 0) { svc_destroy(nn->nfsd_serv); @@ -563,6 +564,7 @@ nfsd(void *vrqstp) struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list); struct net *net = perm_sock->xpt_net; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); int err; /* Lock module and set up kernel thread */ @@ -596,6 +598,9 @@ nfsd(void *vrqstp) * The main request loop */ for (;;) { + /* Update sv_maxconn if it has changed */ + rqstp->rq_server->sv_maxconn = nn->max_connections; + /* * Find a socket with data available and call its * recvfrom routine. From e17f99b728006fcebcf025b32fa7370bb998fb81 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:34 -0400 Subject: [PATCH 025/167] nfsd: nfs4_preprocess_seqid_op should only set *stpp on success Not technically a bugfix, since nothing tries to use the return pointer if this function doesn't return success, but it could be a problem with some coming changes. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 29788fd0da24..71c442fb9b3e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3953,6 +3953,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, { __be32 status; struct nfs4_stid *s; + struct nfs4_ol_stateid *stp = NULL; dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__, seqid, STATEID_VAL(stateid)); @@ -3962,11 +3963,14 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, cstate->minorversion, nn); if (status) return status; - *stpp = openlockstateid(s); + stp = openlockstateid(s); if (!nfsd4_has_session(cstate)) - cstate->replay_owner = (*stpp)->st_stateowner; + cstate->replay_owner = stp->st_stateowner; - return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp); + status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); + if (!status) + *stpp = stp; + return status; } static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, From b607664ee74313c7f3f657a044eda572051e560e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:35 -0400 Subject: [PATCH 026/167] nfsd: Cleanup nfs4svc_encode_compoundres Move the slot return, put session etc into a helper in fs/nfsd/nfs4state.c instead of open coding in nfs4svc_encode_compoundres. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 35 ++++++++++++++++++++++------------- fs/nfsd/nfs4xdr.c | 15 +-------------- fs/nfsd/state.h | 1 - fs/nfsd/xdr4.h | 2 +- 4 files changed, 24 insertions(+), 29 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 71c442fb9b3e..993da47cbc06 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -203,18 +203,6 @@ static void put_client_renew_locked(struct nfs4_client *clp) renew_client_locked(clp); } -void put_client_renew(struct nfs4_client *clp) -{ - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) - return; - if (!is_client_expired(clp)) - renew_client_locked(clp); - spin_unlock(&nn->client_lock); -} - - static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -1646,7 +1634,7 @@ out_err: /* * Cache a reply. nfsd4_check_resp_size() has bounded the cache size. */ -void +static void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) { struct xdr_buf *buf = resp->xdr.buf; @@ -2418,6 +2406,27 @@ out_put_client: goto out_no_session; } +void +nfsd4_sequence_done(struct nfsd4_compoundres *resp) +{ + struct nfsd4_compound_state *cs = &resp->cstate; + + if (nfsd4_has_session(cs)) { + struct nfs4_client *clp = cs->session->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + if (cs->status != nfserr_replay_cache) { + nfsd4_store_cache_entry(resp); + cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; + } + /* Renew the clientid on success and on replay */ + spin_lock(&nn->client_lock); + nfsd4_put_session(cs->session); + put_client_renew_locked(clp); + spin_unlock(&nn->client_lock); + } +} + __be32 nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 9388a4316fa8..21ffb9b9b768 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4000,7 +4000,6 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo /* * All that remains is to write the tag and operation count... */ - struct nfsd4_compound_state *cs = &resp->cstate; struct xdr_buf *buf = resp->xdr.buf; WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len + @@ -4014,19 +4013,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo p += XDR_QUADLEN(resp->taglen); *p++ = htonl(resp->opcnt); - if (nfsd4_has_session(cs)) { - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - struct nfs4_client *clp = cs->session->se_client; - if (cs->status != nfserr_replay_cache) { - nfsd4_store_cache_entry(resp); - cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; - } - /* Renew the clientid on success and on replay */ - spin_lock(&nn->client_lock); - nfsd4_put_session(cs->session); - spin_unlock(&nn->client_lock); - put_client_renew(clp); - } + nfsd4_sequence_done(resp); return 1; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 374c66283ac5..62f33b7ec10c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -476,7 +476,6 @@ extern void nfs4_put_delegation(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); -extern void put_client_renew(struct nfs4_client *clp); /* nfs4recover operations */ extern int nfsd4_client_tracking_init(struct net *net); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index efce9010cad4..a30a7418bbb5 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -578,7 +578,6 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid_confirm *setclientid_confirm); -extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); extern __be32 nfsd4_backchannel_ctl(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_backchannel_ctl *); @@ -589,6 +588,7 @@ extern __be32 nfsd4_create_session(struct svc_rqst *, extern __be32 nfsd4_sequence(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_sequence *); +extern void nfsd4_sequence_done(struct nfsd4_compoundres *resp); extern __be32 nfsd4_destroy_session(struct svc_rqst *, struct nfsd4_compound_state *, struct nfsd4_destroy_session *); From db24b3b4b2a510ad0face05aec1c5bfbe89050bb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Jun 2014 11:48:36 -0400 Subject: [PATCH 027/167] nfsd: declare v4.1+ openowners confirmed on creation There's no need to confirm an openowner in v4.1 and above, so we can go ahead and set NFS4_OO_CONFIRMED when we create openowners in those versions. This will also be necessary when we remove the client_mutex, as it'll be possible for two concurrent opens to race in versions >4.0. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 993da47cbc06..106db71e0eef 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2719,7 +2719,10 @@ static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, u } static struct nfs4_openowner * -alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { +alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, + struct nfsd4_open *open, + struct nfsd4_compound_state *cstate) +{ struct nfs4_openowner *oo; oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); @@ -2728,6 +2731,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str oo->oo_owner.so_is_open_owner = 1; oo->oo_owner.so_seqid = open->op_seqid; oo->oo_flags = NFS4_OO_NEW; + if (nfsd4_has_session(cstate)) + oo->oo_flags |= NFS4_OO_CONFIRMED; oo->oo_time = 0; oo->oo_last_closed_stid = NULL; INIT_LIST_HEAD(&oo->oo_close_lru); @@ -2987,7 +2992,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, clp = oo->oo_owner.so_client; goto alloc_stateid; new_owner: - oo = alloc_init_open_stateowner(strhashval, clp, open); + oo = alloc_init_open_stateowner(strhashval, clp, open, cstate); if (oo == NULL) return nfserr_jukebox; open->op_openowner = oo; @@ -3397,8 +3402,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); if (nfsd4_has_session(&resp->cstate)) { - open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; - if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT; open->op_why_no_deleg = WND4_NOT_WANTED; @@ -3792,8 +3795,9 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, - &s, cstate->minorversion, nn); + status = nfsd4_lookup_stateid(stateid, + NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, + &s, cstate->minorversion, nn); if (status) goto out; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); From acf9295b1c4e60fc205e21b7a5c9dc6e1cb2764a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:37 -0400 Subject: [PATCH 028/167] nfsd: clean up nfsd4_close_open_stateid Minor cleanup that should introduce no behavioral changes. Currently this function just unhashes the stateid and leaves the caller to do the work of the CLOSE processing. Change nfsd4_close_open_stateid so that it handles doing all of the work of closing a stateid. Move the handling of the unhashed stateid into it instead of doing that work in nfsd4_close. This will help isolate some coming changes to stateid handling from nfsd4_close. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 106db71e0eef..1e973f67999d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4129,8 +4129,25 @@ out: static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { - unhash_open_stateid(s); + struct nfs4_client *clp = s->st_stid.sc_client; + struct nfs4_openowner *oo = openowner(s->st_stateowner); + s->st_stid.sc_type = NFS4_CLOSED_STID; + unhash_open_stateid(s); + + if (clp->cl_minorversion) { + free_generic_stateid(s); + if (list_empty(&oo->oo_owner.so_stateids)) + release_openowner(oo); + } else { + oo->oo_last_closed_stid = s; + /* + * In the 4.0 case we need to keep the owners around a + * little while to handle CLOSE replay. + */ + if (list_empty(&oo->oo_owner.so_stateids)) + move_to_close_lru(oo, clp->net); + } } /* @@ -4141,7 +4158,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_close *close) { __be32 status; - struct nfs4_openowner *oo; struct nfs4_ol_stateid *stp; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -4157,28 +4173,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; - oo = openowner(stp->st_stateowner); update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); nfsd4_close_open_stateid(stp); - - if (cstate->minorversion) - free_generic_stateid(stp); - else - oo->oo_last_closed_stid = stp; - - if (list_empty(&oo->oo_owner.so_stateids)) { - if (cstate->minorversion) - release_openowner(oo); - else { - /* - * In the 4.0 case we need to keep the owners around a - * little while to handle CLOSE replay. - */ - move_to_close_lru(oo, SVC_NET(rqstp)); - } - } out: if (!cstate->replay_owner) nfs4_unlock_state(); From 3c87b9b7c05d7775a3d942de588296025023c6d2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:38 -0400 Subject: [PATCH 029/167] nfsd: lock owners are not per open stateid In the NFSv4 spec, lock stateids are per-file objects. Lockowners are not. This patch replaces the current list of lock owners in the open stateids with a list of lock stateids. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 53 ++++++++++++++++++++++++++++++--------------- fs/nfsd/state.h | 3 +-- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1e973f67999d..137fdcce9023 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -695,10 +695,11 @@ static void free_generic_stateid(struct nfs4_ol_stateid *stp) nfs4_free_stid(stateid_slab, &stp->st_stid); } -static void release_lock_stateid(struct nfs4_ol_stateid *stp) +static void __release_lock_stateid(struct nfs4_ol_stateid *stp) { struct file *file; + list_del(&stp->st_locks); unhash_generic_stateid(stp); unhash_stid(&stp->st_stid); file = find_any_file(stp->st_file); @@ -713,12 +714,11 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) struct nfs4_ol_stateid *stp; list_del(&lo->lo_owner.so_strhash); - list_del(&lo->lo_perstateid); list_del(&lo->lo_owner_ino_hash); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - release_lock_stateid(stp); + __release_lock_stateid(stp); } } @@ -734,22 +734,36 @@ static void release_lockowner(struct nfs4_lockowner *lo) nfs4_free_lockowner(lo); } -static void -release_stateid_lockowners(struct nfs4_ol_stateid *open_stp) +static void release_lockowner_if_empty(struct nfs4_lockowner *lo) +{ + if (list_empty(&lo->lo_owner.so_stateids)) + release_lockowner(lo); +} + +static void release_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_lockowner *lo; - while (!list_empty(&open_stp->st_lockowners)) { - lo = list_entry(open_stp->st_lockowners.next, - struct nfs4_lockowner, lo_perstateid); - release_lockowner(lo); + lo = lockowner(stp->st_stateowner); + __release_lock_stateid(stp); + release_lockowner_if_empty(lo); +} + +static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp) +{ + struct nfs4_ol_stateid *stp; + + while (!list_empty(&open_stp->st_locks)) { + stp = list_entry(open_stp->st_locks.next, + struct nfs4_ol_stateid, st_locks); + release_lock_stateid(stp); } } static void unhash_open_stateid(struct nfs4_ol_stateid *stp) { unhash_generic_stateid(stp); - release_stateid_lockowners(stp); + release_open_stateid_locks(stp); close_generic_stateid(stp); } @@ -2744,7 +2758,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfs4_openowner *oo = open->op_openowner; stp->st_stid.sc_type = NFS4_OPEN_STID; - INIT_LIST_HEAD(&stp->st_lockowners); + INIT_LIST_HEAD(&stp->st_locks); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); stp->st_stateowner = &oo->oo_owner; @@ -4335,7 +4349,6 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]); - list_add(&lo->lo_perstateid, &open_stp->st_lockowners); } /* @@ -4380,6 +4393,7 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + list_add(&stp->st_locks, &open_stp->st_locks); return stp; } @@ -4967,18 +4981,21 @@ static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); } -static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_lockowner *)) +static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, + void (*func)(struct nfs4_ol_stateid *)) { struct nfs4_openowner *oop; - struct nfs4_lockowner *lop, *lo_next; struct nfs4_ol_stateid *stp, *st_next; + struct nfs4_ol_stateid *lst, *lst_next; u64 count = 0; list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { - list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) { - list_for_each_entry_safe(lop, lo_next, &stp->st_lockowners, lo_perstateid) { + list_for_each_entry_safe(stp, st_next, + &oop->oo_owner.so_stateids, st_perstateowner) { + list_for_each_entry_safe(lst, lst_next, + &stp->st_locks, st_locks) { if (func) - func(lop); + func(lst); if (++count == max) return count; } @@ -4990,7 +5007,7 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, void (*fun u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) { - return nfsd_foreach_client_lock(clp, max, release_lockowner); + return nfsd_foreach_client_lock(clp, max, release_lock_stateid); } u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 62f33b7ec10c..c1e384a0a40a 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -365,7 +365,6 @@ struct nfs4_openowner { struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ struct list_head lo_owner_ino_hash; /* hash by owner,file */ - struct list_head lo_perstateid; struct list_head lo_list; /* for temporary uses */ }; @@ -433,7 +432,7 @@ struct nfs4_ol_stateid { struct nfs4_stid st_stid; /* must be first field */ struct list_head st_perfile; struct list_head st_perstateowner; - struct list_head st_lockowners; + struct list_head st_locks; struct nfs4_stateowner * st_stateowner; struct nfs4_file * st_file; unsigned long st_access_bmap; From c53530da4dfede2f080129b58a89ef907e5a0dfd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Jun 2014 11:48:39 -0400 Subject: [PATCH 030/167] nfsd: Allow lockowners to hold several stateids A lockowner can have more than one lock stateid. For instance, if a process has more than one file open and has locks on both, then the same lockowner has more than one stateid associated with it. Change it so that this reality is better reflected by the objects that nfsd uses. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 55 +++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 137fdcce9023..9b6a4f3ec18e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3870,12 +3870,7 @@ nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) if (check_for_locks(stp->st_file, lo)) return nfserr_locks_held; - /* - * Currently there's a 1-1 lock stateid<->lockowner - * correspondance, and we have to delete the lockowner when we - * delete the lock stateid: - */ - release_lockowner(lo); + release_lockowner_if_empty(lo); return nfs_ok; } @@ -4397,6 +4392,19 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct return stp; } +static struct nfs4_ol_stateid * +find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) +{ + struct nfs4_ol_stateid *lst; + + list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { + if (lst->st_file == fp) + return lst; + } + return NULL; +} + + static int check_lock_length(u64 offset, u64 length) { @@ -4426,25 +4434,28 @@ static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, s lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner, nn); - if (lo) { - if (!cstate->minorversion) + if (!lo) { + strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, + &lock->v.new.owner); + lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); + if (lo == NULL) + return nfserr_jukebox; + } else { + /* with an existing lockowner, seqids must be the same */ + if (!cstate->minorversion && + lock->lk_new_lock_seqid != lo->lo_owner.so_seqid) return nfserr_bad_seqid; - /* XXX: a lockowner always has exactly one stateid: */ - *lst = list_first_entry(&lo->lo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - return nfs_ok; } - strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, - &lock->v.new.owner); - lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); - if (lo == NULL) - return nfserr_jukebox; - *lst = alloc_init_lock_stateid(lo, fi, ost); + + *lst = find_lock_stateid(lo, fi); if (*lst == NULL) { - release_lockowner(lo); - return nfserr_jukebox; + *lst = alloc_init_lock_stateid(lo, fi, ost); + if (*lst == NULL) { + release_lockowner_if_empty(lo); + return nfserr_jukebox; + } + *new = true; } - *new = true; return nfs_ok; } @@ -4601,7 +4612,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } out: if (status && new_state) - release_lockowner(lock_sop); + release_lock_stateid(lock_stp); nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) nfs4_unlock_state(); From b3c32bcd9c4b8320aea504477573f0c460d2d57d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:40 -0400 Subject: [PATCH 031/167] nfsd: NFSv4 lock-owners are not associated to a specific file Just like open-owners, lock-owners are associated with a name, a clientid and, in the case of minor version 0, a sequence id. There is no association to a file. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 4 --- fs/nfsd/nfs4state.c | 73 +++++++++------------------------------------ fs/nfsd/state.h | 1 - 3 files changed, 14 insertions(+), 64 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 113e1aa9b0e8..a71d14413d39 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -29,9 +29,6 @@ #define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) #define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) -#define LOCKOWNER_INO_HASH_BITS 8 -#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS) - #define SESSION_HASH_SIZE 512 struct cld_net; @@ -67,7 +64,6 @@ struct nfsd_net { struct list_head *unconf_id_hashtbl; struct rb_root unconf_name_tree; struct list_head *ownerstr_hashtbl; - struct list_head *lockowner_ino_hashtbl; struct list_head *sessionid_hashtbl; /* * client_lru holds client queue ordered by nfs4_client.cl_time diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9b6a4f3ec18e..ff10919eebde 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -714,7 +714,6 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) struct nfs4_ol_stateid *stp; list_del(&lo->lo_owner.so_strhash); - list_del(&lo->lo_owner_ino_hash); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); @@ -4225,8 +4224,6 @@ out: #define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) -#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1) - static inline u64 end_offset(u64 start, u64 len) { @@ -4247,13 +4244,6 @@ last_byte_offset(u64 start, u64 len) return end > start ? end - 1: NFS4_MAX_UINT64; } -static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername) -{ - return (file_hashval(inode) + cl_id - + opaque_hashval(ownername->data, ownername->len)) - & LOCKOWNER_INO_HASH_MASK; -} - /* * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that * we can't properly handle lock requests that go beyond the (2^63 - 1)-th @@ -4306,46 +4296,23 @@ nevermind: deny->ld_type = NFS4_WRITE_LT; } -static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner) -{ - struct nfs4_ol_stateid *lst; - - if (!same_owner_str(&lo->lo_owner, owner, clid)) - return false; - if (list_empty(&lo->lo_owner.so_stateids)) { - WARN_ON_ONCE(1); - return false; - } - lst = list_first_entry(&lo->lo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - return lst->st_file->fi_inode == inode; -} - static struct nfs4_lockowner * -find_lockowner_str(struct inode *inode, clientid_t *clid, - struct xdr_netobj *owner, struct nfsd_net *nn) +find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, + struct nfsd_net *nn) { - unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner); - struct nfs4_lockowner *lo; + unsigned int strhashval = ownerstr_hashval(clid->cl_id, owner); + struct nfs4_stateowner *so; - list_for_each_entry(lo, &nn->lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) { - if (same_lockowner_ino(lo, inode, clid, owner)) - return lo; + list_for_each_entry(so, &nn->ownerstr_hashtbl[strhashval], so_strhash) { + if (so->so_is_open_owner) + continue; + if (!same_owner_str(so, owner, clid)) + continue; + return lockowner(so); } return NULL; } -static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp) -{ - struct inode *inode = open_stp->st_file->fi_inode; - unsigned int inohash = lockowner_ino_hashval(inode, - clp->cl_clientid.cl_id, &lo->lo_owner.so_owner); - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); - list_add(&lo->lo_owner_ino_hash, &nn->lockowner_ino_hashtbl[inohash]); -} - /* * Alloc a lock owner structure. * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has @@ -4353,10 +4320,10 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s * * strhashval = ownerstr_hashval */ - static struct nfs4_lockowner * alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { struct nfs4_lockowner *lo; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); if (!lo) @@ -4366,7 +4333,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str /* It is the openowner seqid that will be incremented in encode in the * case of new lockowners; so increment the lock seqid manually: */ lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; - hash_lockowner(lo, strhashval, clp, open_stp); + list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); return lo; } @@ -4432,8 +4399,7 @@ static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, s unsigned int strhashval; struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); - lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, - &lock->v.new.owner, nn); + lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, nn); if (!lo) { strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, &lock->v.new.owner); @@ -4647,7 +4613,6 @@ __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_lockt *lockt) { - struct inode *inode; struct file_lock *file_lock = NULL; struct nfs4_lockowner *lo; __be32 status; @@ -4670,7 +4635,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) goto out; - inode = cstate->current_fh.fh_dentry->d_inode; file_lock = locks_alloc_lock(); if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); @@ -4693,7 +4657,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - lo = find_lockowner_str(inode, &lockt->lt_clientid, &lockt->lt_owner, nn); + lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner, nn); if (lo) file_lock->fl_owner = (fl_owner_t)lo; file_lock->fl_pid = current->tgid; @@ -5187,10 +5151,6 @@ static int nfs4_state_create_net(struct net *net) OWNER_HASH_SIZE, GFP_KERNEL); if (!nn->ownerstr_hashtbl) goto err_ownerstr; - nn->lockowner_ino_hashtbl = kmalloc(sizeof(struct list_head) * - LOCKOWNER_INO_HASH_SIZE, GFP_KERNEL); - if (!nn->lockowner_ino_hashtbl) - goto err_lockowner_ino; nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) * SESSION_HASH_SIZE, GFP_KERNEL); if (!nn->sessionid_hashtbl) @@ -5202,8 +5162,6 @@ static int nfs4_state_create_net(struct net *net) } for (i = 0; i < OWNER_HASH_SIZE; i++) INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]); - for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++) - INIT_LIST_HEAD(&nn->lockowner_ino_hashtbl[i]); for (i = 0; i < SESSION_HASH_SIZE; i++) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; @@ -5219,8 +5177,6 @@ static int nfs4_state_create_net(struct net *net) return 0; err_sessionid: - kfree(nn->lockowner_ino_hashtbl); -err_lockowner_ino: kfree(nn->ownerstr_hashtbl); err_ownerstr: kfree(nn->unconf_id_hashtbl); @@ -5252,7 +5208,6 @@ nfs4_state_destroy_net(struct net *net) } kfree(nn->sessionid_hashtbl); - kfree(nn->lockowner_ino_hashtbl); kfree(nn->ownerstr_hashtbl); kfree(nn->unconf_id_hashtbl); kfree(nn->conf_id_hashtbl); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index c1e384a0a40a..23b110939da1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -364,7 +364,6 @@ struct nfs4_openowner { struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ - struct list_head lo_owner_ino_hash; /* hash by owner,file */ struct list_head lo_list; /* for temporary uses */ }; From fd44907c2d8f0647903d0c55520a34e24eeeb1cd Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Jun 2014 11:48:41 -0400 Subject: [PATCH 032/167] nfsd: clean up nfsd4_release_lockowner Now that we know that we won't have several lockowners with the same, owner->data, we can simplify nfsd4_release_lockowner and get rid of the lo_list in the process. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 46 ++++++++++++++++++++++----------------------- fs/nfsd/state.h | 1 - 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ff10919eebde..86ec359349c6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4783,11 +4783,10 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner) { clientid_t *clid = &rlockowner->rl_clientid; - struct nfs4_stateowner *sop; + struct nfs4_stateowner *sop = NULL, *tmp; struct nfs4_lockowner *lo; struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; - struct list_head matches; unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); @@ -4802,33 +4801,32 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, goto out; status = nfserr_locks_held; - INIT_LIST_HEAD(&matches); - list_for_each_entry(sop, &nn->ownerstr_hashtbl[hashval], so_strhash) { - if (sop->so_is_open_owner) + /* Find the matching lock stateowner */ + list_for_each_entry(tmp, &nn->ownerstr_hashtbl[hashval], so_strhash) { + if (tmp->so_is_open_owner) continue; - if (!same_owner_str(sop, owner, clid)) - continue; - list_for_each_entry(stp, &sop->so_stateids, - st_perstateowner) { - lo = lockowner(sop); - if (check_for_locks(stp->st_file, lo)) - goto out; - list_add(&lo->lo_list, &matches); + if (same_owner_str(tmp, owner, clid)) { + sop = tmp; + break; } } - /* Clients probably won't expect us to return with some (but not all) - * of the lockowner state released; so don't release any until all - * have been checked. */ - status = nfs_ok; - while (!list_empty(&matches)) { - lo = list_entry(matches.next, struct nfs4_lockowner, - lo_list); - /* unhash_stateowner deletes so_perclient only - * for openowners. */ - list_del(&lo->lo_list); - release_lockowner(lo); + + /* No matching owner found, maybe a replay? Just declare victory... */ + if (!sop) { + status = nfs_ok; + goto out; } + + lo = lockowner(sop); + /* see if there are still any locks associated with it */ + list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_file, lo)) + goto out; + } + + status = nfs_ok; + release_lockowner(lo); out: nfs4_unlock_state(); return status; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 23b110939da1..ab937b5f10ab 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -364,7 +364,6 @@ struct nfs4_openowner { struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ - struct list_head lo_list; /* for temporary uses */ }; static inline struct nfs4_openowner * openowner(struct nfs4_stateowner *so) From d4e19e70276a320bbc01b76fb50b5c4962ff523a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:42 -0400 Subject: [PATCH 033/167] nfsd: Don't get a session reference without a client reference If the client were to disappear from underneath us while we're holding a session reference, things would be bad. This cleanup helps ensure that it cannot, which will be a possibility when the client_mutex is removed. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 112 +++++++++++++++++++++++++++----------------- fs/nfsd/state.h | 2 - 2 files changed, 68 insertions(+), 46 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 86ec359349c6..29d1ddc098bc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -103,12 +103,6 @@ static bool is_session_dead(struct nfsd4_session *ses) return ses->se_flags & NFS4_SESSION_DEAD; } -void nfsd4_put_session(struct nfsd4_session *ses) -{ - if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) - free_session(ses); -} - static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me) { if (atomic_read(&ses->se_ref) > ref_held_by_me) @@ -117,14 +111,6 @@ static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_b return nfs_ok; } -static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) -{ - if (is_session_dead(ses)) - return nfserr_badsession; - atomic_inc(&ses->se_ref); - return nfs_ok; -} - void nfs4_unlock_state(void) { @@ -203,6 +189,39 @@ static void put_client_renew_locked(struct nfs4_client *clp) renew_client_locked(clp); } +static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) +{ + __be32 status; + + if (is_session_dead(ses)) + return nfserr_badsession; + status = get_client_locked(ses->se_client); + if (status) + return status; + atomic_inc(&ses->se_ref); + return nfs_ok; +} + +static void nfsd4_put_session_locked(struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + + if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) + free_session(ses); + put_client_renew_locked(clp); +} + +static void nfsd4_put_session(struct nfsd4_session *ses) +{ + struct nfs4_client *clp = ses->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + nfsd4_put_session_locked(ses); + spin_unlock(&nn->client_lock); +} + + static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -1121,7 +1140,7 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru /* caller must hold client_lock */ static struct nfsd4_session * -find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) +__find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) { struct nfsd4_session *elem; int idx; @@ -1141,6 +1160,24 @@ find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) return NULL; } +static struct nfsd4_session * +find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net, + __be32 *ret) +{ + struct nfsd4_session *session; + __be32 status = nfserr_badsession; + + session = __find_in_sessionid_hashtbl(sessionid, net); + if (!session) + goto out; + status = nfsd4_get_session_locked(session); + if (status) + session = NULL; +out: + *ret = status; + return session; +} + /* caller must hold client_lock */ static void unhash_session(struct nfsd4_session *ses) @@ -2157,17 +2194,17 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, __be32 status; struct nfsd4_conn *conn; struct nfsd4_session *session; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; nfs4_lock_state(); spin_lock(&nn->client_lock); - session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp)); + session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status); spin_unlock(&nn->client_lock); - status = nfserr_badsession; if (!session) - goto out; + goto out_no_session; status = nfserr_wrong_cred; if (!mach_creds_match(session->se_client, rqstp)) goto out; @@ -2181,6 +2218,8 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, nfsd4_init_conn(rqstp, conn, session); status = nfs_ok; out: + nfsd4_put_session(session); +out_no_session: nfs4_unlock_state(); return status; } @@ -2200,7 +2239,8 @@ nfsd4_destroy_session(struct svc_rqst *r, struct nfsd4_session *ses; __be32 status; int ref_held_by_me = 0; - struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id); + struct net *net = SVC_NET(r); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); nfs4_lock_state(); status = nfserr_not_only_op; @@ -2211,14 +2251,12 @@ nfsd4_destroy_session(struct svc_rqst *r, } dump_sessionid(__func__, &sessionid->sessionid); spin_lock(&nn->client_lock); - ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r)); - status = nfserr_badsession; + ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status); if (!ses) goto out_client_lock; status = nfserr_wrong_cred; if (!mach_creds_match(ses->se_client, r)) - goto out_client_lock; - nfsd4_get_session_locked(ses); + goto out_put_session; status = mark_session_dead_locked(ses, 1 + ref_held_by_me); if (status) goto out_put_session; @@ -2230,7 +2268,7 @@ nfsd4_destroy_session(struct svc_rqst *r, spin_lock(&nn->client_lock); status = nfs_ok; out_put_session: - nfsd4_put_session(ses); + nfsd4_put_session_locked(ses); out_client_lock: spin_unlock(&nn->client_lock); out: @@ -2305,7 +2343,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_conn *conn; __be32 status; int buflen; - struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct net *net = SVC_NET(rqstp); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (resp->opcnt != 1) return nfserr_sequence_pos; @@ -2319,17 +2358,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, return nfserr_jukebox; spin_lock(&nn->client_lock); - status = nfserr_badsession; - session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp)); + session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status); if (!session) goto out_no_session; clp = session->se_client; - status = get_client_locked(clp); - if (status) - goto out_no_session; - status = nfsd4_get_session_locked(session); - if (status) - goto out_put_client; status = nfserr_too_many_ops; if (nfsd4_session_too_many_ops(rqstp, session)) @@ -2413,9 +2445,7 @@ out_no_session: spin_unlock(&nn->client_lock); return status; out_put_session: - nfsd4_put_session(session); -out_put_client: - put_client_renew_locked(clp); + nfsd4_put_session_locked(session); goto out_no_session; } @@ -2425,18 +2455,12 @@ nfsd4_sequence_done(struct nfsd4_compoundres *resp) struct nfsd4_compound_state *cs = &resp->cstate; if (nfsd4_has_session(cs)) { - struct nfs4_client *clp = cs->session->se_client; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - if (cs->status != nfserr_replay_cache) { nfsd4_store_cache_entry(resp); cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE; } - /* Renew the clientid on success and on replay */ - spin_lock(&nn->client_lock); + /* Drop session reference that was taken in nfsd4_sequence() */ nfsd4_put_session(cs->session); - put_client_renew_locked(clp); - spin_unlock(&nn->client_lock); } } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ab937b5f10ab..ff160e89701a 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -212,8 +212,6 @@ struct nfsd4_session { struct nfsd4_slot *se_slots[]; /* forward channel slots */ }; -extern void nfsd4_put_session(struct nfsd4_session *ses); - /* formatted contents of nfs4_sessionid */ struct nfsd4_sessionid { clientid_t clientid; From 2dd6e458c3dc1ae598867130dc618eabbe7ccda5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:43 -0400 Subject: [PATCH 034/167] nfsd: Cleanup - Let nfsd4_lookup_stateid() take a cstate argument The cstate already holds information about the session, and hence the client id, so it makes more sense to pass that information rather than the current practice of passing a 'minor version' number. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 29d1ddc098bc..1f8aab8f67ba 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3781,12 +3781,14 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) } } -static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, - struct nfs4_stid **s, bool sessions, - struct nfsd_net *nn) +static __be32 +nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, + stateid_t *stateid, unsigned char typemask, + struct nfs4_stid **s, struct nfsd_net *nn) { struct nfs4_client *cl; __be32 status; + bool sessions = cstate->minorversion != 0; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; @@ -3832,9 +3834,9 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, nfs4_lock_state(); - status = nfsd4_lookup_stateid(stateid, + status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, - &s, cstate->minorversion, nn); + &s, nn); if (status) goto out; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); @@ -4004,8 +4006,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, seqid, STATEID_VAL(stateid)); *stpp = NULL; - status = nfsd4_lookup_stateid(stateid, typemask, &s, - cstate->minorversion, nn); + status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn); if (status) return status; stp = openlockstateid(s); @@ -4229,8 +4230,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; nfs4_lock_state(); - status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID, &s, - cstate->minorversion, nn); + status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn); if (status) goto out; dp = delegstateid(s); From 722b620d1830fce69367b099ef6a83f41a4b9d72 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 3 Jul 2014 07:54:19 -0400 Subject: [PATCH 035/167] nfsd: properly convert return from commit_metadata to __be32 Commit 2a7420c03e504 (nfsd: Ensure that nfsd_create_setattr commits files to stable storage), added a couple of calls to commit_metadata, but doesn't convert their return codes to __be32 in the appropriate places. Cc: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index e1b792ada45b..f501a9b5c9df 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -463,7 +463,7 @@ out_put_write_access: if (size_change) put_write_access(inode); if (!err) - err = commit_metadata(fhp); + err = nfserrno(commit_metadata(fhp)); out: return err; } @@ -1122,7 +1122,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *resfhp, if (iap->ia_valid) return nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); /* Callers expect file metadata to be committed here */ - return commit_metadata(resfhp); + return nfserrno(commit_metadata(resfhp)); } /* HPUX client sometimes creates a file in mode 000, and sets size to 0. From 62814d6a9bca1de4eb69cee161e01e0f670b486d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 3 Jul 2014 15:15:54 -0400 Subject: [PATCH 036/167] nfsd: add a nfserrno mapping for -E2BIG to nfserr_fbig I saw this pop up with some pynfs testing: [ 123.609992] nfsd: non-standard errno: -7 ...and -7 is -E2BIG. I think what happened is that XFS returned -E2BIG due to some xattr operations with the ACL10 pynfs TEST (I guess it has limited xattr size?). Add a better mapping for that error since it's possible that we'll need it. How about we convert it to NFSERR_FBIG? As Bruce points out, they both have "BIG" in the name so it must be good. Also, turn the printk in this function into a WARN() so that we can get a bit more information about situations that don't have proper mappings. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsproc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index eff49552cdc8..b19c7e8bf64c 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -717,6 +717,7 @@ nfserrno (int errno) { nfserr_noent, -ENOENT }, { nfserr_io, -EIO }, { nfserr_nxio, -ENXIO }, + { nfserr_fbig, -E2BIG }, { nfserr_acces, -EACCES }, { nfserr_exist, -EEXIST }, { nfserr_xdev, -EXDEV }, @@ -751,7 +752,7 @@ nfserrno (int errno) if (nfs_errtbl[i].syserr == errno) return nfs_errtbl[i].nfserr; } - printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno); + WARN(1, "nfsd: non-standard errno: %d\n", errno); return nfserr_io; } From 4b24ca7d30430882a2eaeb9d511990fb4581230d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 30 Jun 2014 11:48:44 -0400 Subject: [PATCH 037/167] nfsd: Allow struct nfsd4_compound_state to cache the nfs4_client We want to use the nfsd4_compound_state to cache the nfs4_client in order to optimise away extra lookups of the clid. In the v4.0 case, we use this to ensure that we only have to look up the client at most once per compound for each call into lookup_clientid. For v4.1+ we set the pointer in the cstate during SEQUENCE processing so we should never need to do a search for it. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 74 +++++++++++++++++++++++++++++++-------------- fs/nfsd/xdr4.h | 1 + 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1f8aab8f67ba..c01d81e21602 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -189,6 +189,15 @@ static void put_client_renew_locked(struct nfs4_client *clp) renew_client_locked(clp); } +static void put_client_renew(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + put_client_renew_locked(clp); + spin_unlock(&nn->client_lock); +} + static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) { __be32 status; @@ -2391,6 +2400,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, goto out_put_session; cstate->slot = slot; cstate->session = session; + cstate->clp = clp; /* Return the cached reply status and set cstate->status * for nfsd4_proc_compound processing */ status = nfsd4_replay_cache_entry(resp, seq); @@ -2425,6 +2435,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, cstate->slot = slot; cstate->session = session; + cstate->clp = clp; out: switch (clp->cl_cb_state) { @@ -2461,7 +2472,8 @@ nfsd4_sequence_done(struct nfsd4_compoundres *resp) } /* Drop session reference that was taken in nfsd4_sequence() */ nfsd4_put_session(cs->session); - } + } else if (cs->clp) + put_client_renew(cs->clp); } __be32 @@ -2986,6 +2998,38 @@ static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4 return nfserr_bad_seqid; } +static __be32 lookup_clientid(clientid_t *clid, + struct nfsd4_compound_state *cstate, + struct nfsd_net *nn) +{ + struct nfs4_client *found; + + if (cstate->clp) { + found = cstate->clp; + if (!same_clid(&found->cl_clientid, clid)) + return nfserr_stale_clientid; + return nfs_ok; + } + + if (STALE_CLIENTID(clid, nn)) + return nfserr_stale_clientid; + + /* + * For v4.1+ we get the client in the SEQUENCE op. If we don't have one + * cached already then we know this is for is for v4.0 and "sessions" + * will be false. + */ + WARN_ON_ONCE(cstate->session); + found = find_confirmed_client(clid, false, nn); + if (!found) + return nfserr_expired; + + /* Cache the nfs4_client in cstate! */ + cstate->clp = found; + atomic_inc(&found->cl_refcount); + return nfs_ok; +} + __be32 nfsd4_process_open1(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct nfsd_net *nn) @@ -3498,18 +3542,6 @@ void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) free_generic_stateid(open->op_stp); } -static __be32 lookup_clientid(clientid_t *clid, bool session, struct nfsd_net *nn, struct nfs4_client **clp) -{ - struct nfs4_client *found; - - if (STALE_CLIENTID(clid, nn)) - return nfserr_stale_clientid; - found = find_confirmed_client(clid, session, nn); - if (clp) - *clp = found; - return found ? nfs_ok : nfserr_expired; -} - __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, clientid_t *clid) @@ -3521,9 +3553,10 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); - status = lookup_clientid(clid, cstate->minorversion, nn, &clp); + status = lookup_clientid(clid, cstate, nn); if (status) goto out; + clp = cstate->clp; status = nfserr_cb_path_down; if (!list_empty(&clp->cl_delegations) && clp->cl_cb_state != NFSD4_CB_UP) @@ -3786,22 +3819,19 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s, struct nfsd_net *nn) { - struct nfs4_client *cl; __be32 status; - bool sessions = cstate->minorversion != 0; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return nfserr_bad_stateid; - status = lookup_clientid(&stateid->si_opaque.so_clid, sessions, - nn, &cl); + status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn); if (status == nfserr_stale_clientid) { - if (sessions) + if (cstate->session) return nfserr_bad_stateid; return nfserr_stale_stateid; } if (status) return status; - *s = find_stateid_by_type(cl, stateid, typemask); + *s = find_stateid_by_type(cstate->clp, stateid, typemask); if (!*s) return nfserr_bad_stateid; return nfs_ok; @@ -4651,7 +4681,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); if (!nfsd4_has_session(cstate)) { - status = lookup_clientid(&lockt->lt_clientid, false, nn, NULL); + status = lookup_clientid(&lockt->lt_clientid, cstate, nn); if (status) goto out; } @@ -4820,7 +4850,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, nfs4_lock_state(); - status = lookup_clientid(clid, cstate->minorversion, nn, NULL); + status = lookup_clientid(clid, cstate, nn); if (status) goto out; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index a30a7418bbb5..5abf6c942ddf 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -55,6 +55,7 @@ struct nfsd4_compound_state { struct svc_fh current_fh; struct svc_fh save_fh; struct nfs4_stateowner *replay_owner; + struct nfs4_client *clp; /* For sessions DRC */ struct nfsd4_session *session; struct nfsd4_slot *slot; From 13d6f66b0826029051518a71d513dbb1a1146992 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:45 -0400 Subject: [PATCH 038/167] nfsd: Convert nfsd4_process_open1() to work with lookup_clientid() ...and have alloc_init_open_stateowner just use the cstate->clp pointer instead of passing in a clp separately. This allows us to use the cached nfs4_client pointer in the cstate instead of having to look it up again. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c01d81e21602..342881985eb7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2768,10 +2768,10 @@ static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, u } static struct nfs4_openowner * -alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, - struct nfsd4_open *open, +alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) { + struct nfs4_client *clp = cstate->clp; struct nfs4_openowner *oo; oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); @@ -3054,10 +3054,10 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); open->op_openowner = oo; if (!oo) { - clp = find_confirmed_client(clientid, cstate->minorversion, - nn); - if (clp == NULL) - return nfserr_expired; + status = lookup_clientid(clientid, cstate, nn); + if (status) + return status; + clp = cstate->clp; goto new_owner; } if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { @@ -3073,7 +3073,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, clp = oo->oo_owner.so_client; goto alloc_stateid; new_owner: - oo = alloc_init_open_stateowner(strhashval, clp, open, cstate); + oo = alloc_init_open_stateowner(strhashval, open, cstate); if (oo == NULL) return nfserr_jukebox; open->op_openowner = oo; From 2d91e8953cb046d9eef281ddc608fee31a942f35 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:46 -0400 Subject: [PATCH 039/167] nfsd: Always use lookup_clientid() in nfsd4_process_open1 In later patches, we'll be moving the stateowner table into the nfs4_client, and by doing this we ensure that we have a cached nfs4_client pointer. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 342881985eb7..f82aec4193ce 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3050,19 +3050,19 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, if (open->op_file == NULL) return nfserr_jukebox; + status = lookup_clientid(clientid, cstate, nn); + if (status) + return status; + clp = cstate->clp; + strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); open->op_openowner = oo; if (!oo) { - status = lookup_clientid(clientid, cstate, nn); - if (status) - return status; - clp = cstate->clp; goto new_owner; } if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { /* Replace unconfirmed owners without checking for replay. */ - clp = oo->oo_owner.so_client; release_openowner(oo); open->op_openowner = NULL; goto new_owner; @@ -3070,7 +3070,6 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid); if (status) return status; - clp = oo->oo_owner.so_client; goto alloc_stateid; new_owner: oo = alloc_init_open_stateowner(strhashval, open, cstate); From 0fe492db6003218d5c36765c09cce3a5a9f8a2eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Jun 2014 11:48:47 -0400 Subject: [PATCH 040/167] nfsd: Convert nfs4_check_open_reclaim() to work with lookup_clientid() lookup_clientid is preferable to find_confirmed_client since it's able to use the cached client in the compound state. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 +-- fs/nfsd/nfs4state.c | 15 ++++++++++----- fs/nfsd/state.h | 3 ++- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9425ffc48809..29a617ebe38c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -431,8 +431,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_OPEN_CLAIM_PREVIOUS: status = nfs4_check_open_reclaim(&open->op_clientid, - cstate->minorversion, - nn); + cstate, nn); if (status) goto out; open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f82aec4193ce..324e80fbfea9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4969,16 +4969,21 @@ nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn) * Called from OPEN. Look for clientid in reclaim list. */ __be32 -nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn) +nfs4_check_open_reclaim(clientid_t *clid, + struct nfsd4_compound_state *cstate, + struct nfsd_net *nn) { - struct nfs4_client *clp; + __be32 status; /* find clientid in conf_id_hashtbl */ - clp = find_confirmed_client(clid, sessions, nn); - if (clp == NULL) + status = lookup_clientid(clid, cstate, nn); + if (status) return nfserr_reclaim_bad; - return nfsd4_client_record_check(clp) ? nfserr_reclaim_bad : nfs_ok; + if (nfsd4_client_record_check(cstate->clp)) + return nfserr_reclaim_bad; + + return nfs_ok; } #ifdef CONFIG_NFSD_FAULT_INJECTION diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index ff160e89701a..06d1a908a58e 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -457,7 +457,8 @@ void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *) extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn); -extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn); +extern __be32 nfs4_check_open_reclaim(clientid_t *clid, + struct nfsd4_compound_state *cstate, struct nfsd_net *nn); extern int set_callback_cred(void); extern void nfsd4_init_callback(struct nfsd4_callback *); extern void nfsd4_probe_callback(struct nfs4_client *clp); From 01529e3f817908b394221b0a5d985ae3541641cc Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 7 Jul 2014 22:10:56 +0800 Subject: [PATCH 041/167] NFSD: Fix memory leak in encoding denied lock Commit 8c7424cff6 (nfsd4: don't try to encode conflicting owner if low on space) forgot free conf->data in nfsd4_encode_lockt and before sign conf->data to NULL in nfsd4_encode_lock_denied. Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 21ffb9b9b768..1ad7bd4e346f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2874,6 +2874,7 @@ again: * return the conflicting open: */ if (conf->len) { + kfree(conf->data); conf->len = 0; conf->data = NULL; goto again; @@ -2886,6 +2887,7 @@ again: if (conf->len) { p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8); p = xdr_encode_opaque(p, conf->data, conf->len); + kfree(conf->data); } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ p = xdr_encode_hyper(p, (u64)0); /* clientid */ *p++ = cpu_to_be32(0); /* length of owner name */ @@ -2902,7 +2904,7 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid); else if (nfserr == nfserr_denied) nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied); - kfree(lock->lk_denied.ld_owner.data); + return nfserr; } From dff1399f8addf7129c49bb2227469da79cc30b47 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 8 Jul 2014 14:02:49 -0400 Subject: [PATCH 042/167] nfsd: close potential race between delegation break and laundromat Bruce says: There's also a preexisting expire_client/laundromat vs break race: - expire_client/laundromat adds a delegation to its local reaplist using the same dl_recall_lru field that a delegation uses to track its position on the recall lru and drops the state lock. - a concurrent break_lease adds the delegation to the lru. - expire/client/laundromat then walks it reaplist and sees the lru head as just another delegation on the list.... Fix this race by checking the dl_time under the state_lock. If we find that it's not 0, then we know that it has already been queued to the LRU list and that we shouldn't queue it again. In the case of destroy_client, we must also ensure that we don't hit similar races by ensuring that we don't move any delegations to the reaplist with a dl_time of 0. Just bump the dl_time by one before we drop the state_lock. We're destroying the delegations anyway, so a 1s difference there won't matter. The fault injection code also requires a bit of surgery here: First, in the case of nfsd_forget_client_delegations, we must prevent the same sort of race vs. the delegation break callback. For that, we just increment the dl_time to ensure that a delegation callback can't race in while we're working on it. We can't do that for nfsd_recall_client_delegations, as we need to have it actually queue the delegation, and that won't happen if we increment the dl_time. The state lock is held over that function, so we don't need to worry about these sorts of races there. There is one other potential bug nfsd_recall_client_delegations though. Entries on the victims list are not dequeued before calling nfsd_break_one_deleg. That's a potential list corruptor, so ensure that we do that there. Reported-by: "J. Bruce Fields" Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 324e80fbfea9..63c142059137 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1288,6 +1288,8 @@ destroy_client(struct nfs4_client *clp) while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); list_del_init(&dp->dl_perclnt); + /* Ensure that deleg break won't try to requeue it */ + ++dp->dl_time; list_move(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); @@ -2935,10 +2937,14 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * it's safe to take a reference: */ atomic_inc(&dp->dl_count); - list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); - - /* Only place dl_time is set; protected by i_lock: */ - dp->dl_time = get_seconds(); + /* + * If the dl_time != 0, then we know that it has already been + * queued for a lease break. Don't queue it again. + */ + if (dp->dl_time == 0) { + list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); + dp->dl_time = get_seconds(); + } block_delegations(&dp->dl_fh); @@ -5083,8 +5089,23 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, lockdep_assert_held(&state_lock); list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { - if (victims) + if (victims) { + /* + * It's not safe to mess with delegations that have a + * non-zero dl_time. They might have already been broken + * and could be processed by the laundromat outside of + * the state_lock. Just leave them be. + */ + if (dp->dl_time != 0) + continue; + + /* + * Increment dl_time to ensure that delegation breaks + * don't monkey with it now that we are. + */ + ++dp->dl_time; list_move(&dp->dl_recall_lru, victims); + } if (++count == max) break; } @@ -5109,14 +5130,19 @@ u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) { - struct nfs4_delegation *dp, *next; + struct nfs4_delegation *dp; LIST_HEAD(victims); u64 count; spin_lock(&state_lock); count = nfsd_find_all_delegations(clp, max, &victims); - list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) + while (!list_empty(&victims)) { + dp = list_first_entry(&victims, struct nfs4_delegation, + dl_recall_lru); + list_del_init(&dp->dl_recall_lru); + dp->dl_time = 0; nfsd_break_one_deleg(dp); + } spin_unlock(&state_lock); return count; From d6c249b4d4cfef894cbda224a7a063d17aacb60a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 8 Jul 2014 14:02:50 -0400 Subject: [PATCH 043/167] nfsd: reduce some spinlocking in put_client_renew No need to take the lock unless the count goes to 0. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 63c142059137..3704789ca4b7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -193,8 +193,10 @@ static void put_client_renew(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - spin_lock(&nn->client_lock); - put_client_renew_locked(clp); + if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock)) + return; + if (!is_client_expired(clp)) + renew_client_locked(clp); spin_unlock(&nn->client_lock); } From 1d31a2531ae91f8a89c0fffa883ef922c0dbb74d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Jul 2014 14:07:25 -0400 Subject: [PATCH 044/167] nfsd: Add fine grained protection for the nfs4_file->fi_stateids list Access to this list is currently serialized by the client_mutex. Add finer grained locking around this list in preparation for its removal. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 21 ++++++++++++++++++--- fs/nfsd/state.h | 1 + 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3704789ca4b7..cfb10d060c83 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -708,7 +708,11 @@ release_all_access(struct nfs4_ol_stateid *stp) static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) { + struct nfs4_file *fp = stp->st_file; + + spin_lock(&fp->fi_lock); list_del(&stp->st_perfile); + spin_unlock(&fp->fi_lock); list_del(&stp->st_perstateowner); } @@ -2676,6 +2680,7 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) lockdep_assert_held(&state_lock); atomic_set(&fp->fi_ref, 1); + spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); ihold(ino); @@ -2799,7 +2804,6 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); - list_add(&stp->st_perfile, &fp->fi_stateids); stp->st_stateowner = &oo->oo_owner; get_nfs4_file(fp); stp->st_file = fp; @@ -2808,6 +2812,9 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, set_access(open->op_share_access, stp); set_deny(open->op_share_deny, stp); stp->st_openstp = NULL; + spin_lock(&fp->fi_lock); + list_add(&stp->st_perfile, &fp->fi_stateids); + spin_unlock(&fp->fi_lock); } static void @@ -2915,6 +2922,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) return nfs_ok; ret = nfserr_locked; /* Search for conflicting share reservations */ + spin_lock(&fp->fi_lock); list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { if (test_deny(deny_type, stp) || test_deny(NFS4_SHARE_DENY_BOTH, stp)) @@ -2922,6 +2930,7 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) } ret = nfs_ok; out: + spin_unlock(&fp->fi_lock); put_nfs4_file(fp); return ret; } @@ -3150,6 +3159,7 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st struct nfs4_ol_stateid *local; struct nfs4_openowner *oo = open->op_openowner; + spin_lock(&fp->fi_lock); list_for_each_entry(local, &fp->fi_stateids, st_perfile) { /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) @@ -3158,9 +3168,12 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st if (local->st_stateowner == &oo->oo_owner) *stpp = local; /* check for conflicting share reservations */ - if (!test_share(local, open)) + if (!test_share(local, open)) { + spin_unlock(&fp->fi_lock); return nfserr_share_denied; + } } + spin_unlock(&fp->fi_lock); return nfs_ok; } @@ -4408,7 +4421,6 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct if (stp == NULL) return NULL; stp->st_stid.sc_type = NFS4_LOCK_STID; - list_add(&stp->st_perfile, &fp->fi_stateids); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; get_nfs4_file(fp); @@ -4417,6 +4429,9 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; list_add(&stp->st_locks, &open_stp->st_locks); + spin_lock(&fp->fi_lock); + list_add(&stp->st_perfile, &fp->fi_stateids); + spin_unlock(&fp->fi_lock); return stp; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 06d1a908a58e..04737b3ed363 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -377,6 +377,7 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) /* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ struct nfs4_file { atomic_t fi_ref; + spinlock_t fi_lock; struct hlist_node fi_hash; /* hash by "struct inode *" */ struct list_head fi_stateids; struct list_head fi_delegations; From de18643dce70e0d7c3dbccb5d2c8f17f04bc24a6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Jul 2014 14:07:26 -0400 Subject: [PATCH 045/167] nfsd: Add locking to the nfs4_file->fi_fds[] array Preparation for removal of the client_mutex, which currently protects this array. While we don't actually need the find_*_file_locked variants just yet, a later patch will. So go ahead and add them now to reduce future churn in this code. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 134 ++++++++++++++++++++++++++++++++++++++------ fs/nfsd/state.h | 26 --------- 2 files changed, 118 insertions(+), 42 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cfb10d060c83..314dc8061461 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -268,6 +268,79 @@ get_nfs4_file(struct nfs4_file *fi) atomic_inc(&fi->fi_ref); } +static struct file * +__nfs4_get_fd(struct nfs4_file *f, int oflag) +{ + if (f->fi_fds[oflag]) + return get_file(f->fi_fds[oflag]); + return NULL; +} + +static struct file * +find_writeable_file_locked(struct nfs4_file *f) +{ + struct file *ret; + + lockdep_assert_held(&f->fi_lock); + + ret = __nfs4_get_fd(f, O_WRONLY); + if (!ret) + ret = __nfs4_get_fd(f, O_RDWR); + return ret; +} + +static struct file * +find_writeable_file(struct nfs4_file *f) +{ + struct file *ret; + + spin_lock(&f->fi_lock); + ret = find_writeable_file_locked(f); + spin_unlock(&f->fi_lock); + + return ret; +} + +static struct file *find_readable_file_locked(struct nfs4_file *f) +{ + struct file *ret; + + lockdep_assert_held(&f->fi_lock); + + ret = __nfs4_get_fd(f, O_RDONLY); + if (!ret) + ret = __nfs4_get_fd(f, O_RDWR); + return ret; +} + +static struct file * +find_readable_file(struct nfs4_file *f) +{ + struct file *ret; + + spin_lock(&f->fi_lock); + ret = find_readable_file_locked(f); + spin_unlock(&f->fi_lock); + + return ret; +} + +static struct file * +find_any_file(struct nfs4_file *f) +{ + struct file *ret; + + spin_lock(&f->fi_lock); + ret = __nfs4_get_fd(f, O_RDWR); + if (!ret) { + ret = __nfs4_get_fd(f, O_WRONLY); + if (!ret) + ret = __nfs4_get_fd(f, O_RDONLY); + } + spin_unlock(&f->fi_lock); + return ret; +} + static int num_delegations; unsigned long max_delegations; @@ -316,20 +389,31 @@ static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) __nfs4_file_get_access(fp, oflag); } -static void nfs4_file_put_fd(struct nfs4_file *fp, int oflag) +static struct file *nfs4_file_put_fd(struct nfs4_file *fp, int oflag) { - if (fp->fi_fds[oflag]) { - fput(fp->fi_fds[oflag]); - fp->fi_fds[oflag] = NULL; - } + struct file *filp; + + filp = fp->fi_fds[oflag]; + fp->fi_fds[oflag] = NULL; + return filp; } static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { - if (atomic_dec_and_test(&fp->fi_access[oflag])) { - nfs4_file_put_fd(fp, oflag); + might_lock(&fp->fi_lock); + + if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) { + struct file *f1 = NULL; + struct file *f2 = NULL; + + f1 = nfs4_file_put_fd(fp, oflag); if (atomic_read(&fp->fi_access[1 - oflag]) == 0) - nfs4_file_put_fd(fp, O_RDWR); + f2 = nfs4_file_put_fd(fp, O_RDWR); + spin_unlock(&fp->fi_lock); + if (f1) + fput(f1); + if (f2) + fput(f2); } } @@ -737,8 +821,10 @@ static void __release_lock_stateid(struct nfs4_ol_stateid *stp) unhash_generic_stateid(stp); unhash_stid(&stp->st_stid); file = find_any_file(stp->st_file); - if (file) + if (file) { locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner)); + fput(file); + } close_generic_stateid(stp); free_generic_stateid(stp); } @@ -3206,17 +3292,27 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfsd4_open *open) { + struct file *filp = NULL; __be32 status; int oflag = nfs4_access_to_omode(open->op_share_access); int access = nfs4_access_to_access(open->op_share_access); + spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { - status = nfsd_open(rqstp, cur_fh, S_IFREG, access, - &fp->fi_fds[oflag]); + spin_unlock(&fp->fi_lock); + status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp); if (status) goto out; + spin_lock(&fp->fi_lock); + if (!fp->fi_fds[oflag]) { + fp->fi_fds[oflag] = filp; + filp = NULL; + } } nfs4_file_get_access(fp, oflag); + spin_unlock(&fp->fi_lock); + if (filp) + fput(filp); status = nfsd4_truncate(rqstp, cur_fh, open); if (status) @@ -3301,13 +3397,15 @@ static int nfs4_setlease(struct nfs4_delegation *dp) if (status) goto out_free; fp->fi_lease = fl; - fp->fi_deleg_file = get_file(fl->fl_file); + fp->fi_deleg_file = fl->fl_file; atomic_set(&fp->fi_delegees, 1); spin_lock(&state_lock); hash_delegation_locked(dp, fp); spin_unlock(&state_lock); return 0; out_free: + if (fl->fl_file) + fput(fl->fl_file); locks_free_lock(fl); return status; } @@ -3905,6 +4003,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, status = nfserr_serverfault; goto out; } + get_file(file); } break; case NFS4_OPEN_STID: @@ -3932,7 +4031,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, } status = nfs_ok; if (file) - *filpp = get_file(file); + *filpp = file; out: nfs4_unlock_state(); return status; @@ -4653,6 +4752,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; } out: + if (filp) + fput(filp); if (status && new_state) release_lock_stateid(lock_stp); nfsd4_bump_seqid(cstate, status); @@ -4793,7 +4894,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!file_lock) { dprintk("NFSD: %s: unable to allocate lock!\n", __func__); status = nfserr_jukebox; - goto out; + goto fput; } locks_init_lock(file_lock); file_lock->fl_type = F_UNLCK; @@ -4815,7 +4916,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } update_stateid(&stp->st_stid.sc_stateid); memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - +fput: + fput(filp); out: nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) @@ -4826,7 +4928,7 @@ out: out_nfserr: status = nfserrno(err); - goto out; + goto fput; } /* diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 04737b3ed363..9f1159d5de56 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -398,32 +398,6 @@ struct nfs4_file { bool fi_had_conflict; }; -/* XXX: for first cut may fall back on returning file that doesn't work - * at all? */ -static inline struct file *find_writeable_file(struct nfs4_file *f) -{ - if (f->fi_fds[O_WRONLY]) - return f->fi_fds[O_WRONLY]; - return f->fi_fds[O_RDWR]; -} - -static inline struct file *find_readable_file(struct nfs4_file *f) -{ - if (f->fi_fds[O_RDONLY]) - return f->fi_fds[O_RDONLY]; - return f->fi_fds[O_RDWR]; -} - -static inline struct file *find_any_file(struct nfs4_file *f) -{ - if (f->fi_fds[O_RDWR]) - return f->fi_fds[O_RDWR]; - else if (f->fi_fds[O_WRONLY]) - return f->fi_fds[O_WRONLY]; - else - return f->fi_fds[O_RDONLY]; -} - /* "ol" stands for "Open or Lock". Better suggestions welcome. */ struct nfs4_ol_stateid { struct nfs4_stid st_stid; /* must be first field */ From e20fcf1e6586ff1620adc345ad2a93d5ee5def59 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Jul 2014 14:07:27 -0400 Subject: [PATCH 046/167] nfsd: clean up helper __release_lock_stateid Use filp_close instead of open coding. filp_close does a bit more than just release the locks and put the filp. It also calls ->flush and dnotify_flush, both of which should be done here anyway. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 314dc8061461..4ab567e7db0f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -821,10 +821,8 @@ static void __release_lock_stateid(struct nfs4_ol_stateid *stp) unhash_generic_stateid(stp); unhash_stid(&stp->st_stid); file = find_any_file(stp->st_file); - if (file) { - locks_remove_posix(file, (fl_owner_t)lockowner(stp->st_stateowner)); - fput(file); - } + if (file) + filp_close(file, (fl_owner_t)lockowner(stp->st_stateowner)); close_generic_stateid(stp); free_generic_stateid(stp); } From 12659651721a1c291ec8e1976925985a2c1bfe7c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Jul 2014 14:07:28 -0400 Subject: [PATCH 047/167] nfsd: refactor nfs4_file_get_access and nfs4_file_put_access Have them take NFS4_SHARE_ACCESS_* flags instead of an open mode. This spares the callers from having to convert it themselves. This also allows us to simplify these functions as we no longer need to do the access_to_omode conversion in either one. Note too that this patch eliminates the WARN_ON in __nfs4_file_get_access. It's valid for now, but in a later patch we'll be bumping the refcounts prior to opening the file in order to close some races, at which point we'll need to remove it anyway. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 48 +++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4ab567e7db0f..a19257f91f25 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -374,19 +374,24 @@ static unsigned int file_hashval(struct inode *ino) static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; -static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag) +static void +__nfs4_file_get_access(struct nfs4_file *fp, u32 access) { - WARN_ON_ONCE(!(fp->fi_fds[oflag] || fp->fi_fds[O_RDWR])); - atomic_inc(&fp->fi_access[oflag]); + if (access & NFS4_SHARE_ACCESS_WRITE) + atomic_inc(&fp->fi_access[O_WRONLY]); + if (access & NFS4_SHARE_ACCESS_READ) + atomic_inc(&fp->fi_access[O_RDONLY]); } -static void nfs4_file_get_access(struct nfs4_file *fp, int oflag) +static __be32 +nfs4_file_get_access(struct nfs4_file *fp, u32 access) { - if (oflag == O_RDWR) { - __nfs4_file_get_access(fp, O_RDONLY); - __nfs4_file_get_access(fp, O_WRONLY); - } else - __nfs4_file_get_access(fp, oflag); + /* Does this access mode make sense? */ + if (access & ~NFS4_SHARE_ACCESS_BOTH) + return nfserr_inval; + + __nfs4_file_get_access(fp, access); + return nfs_ok; } static struct file *nfs4_file_put_fd(struct nfs4_file *fp, int oflag) @@ -417,13 +422,14 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) } } -static void nfs4_file_put_access(struct nfs4_file *fp, int oflag) +static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) { - if (oflag == O_RDWR) { - __nfs4_file_put_access(fp, O_RDONLY); + WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH); + + if (access & NFS4_SHARE_ACCESS_WRITE) __nfs4_file_put_access(fp, O_WRONLY); - } else - __nfs4_file_put_access(fp, oflag); + if (access & NFS4_SHARE_ACCESS_READ) + __nfs4_file_put_access(fp, O_RDONLY); } static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct @@ -784,8 +790,7 @@ release_all_access(struct nfs4_ol_stateid *stp) for (i = 1; i < 4; i++) { if (test_access(i, stp)) - nfs4_file_put_access(stp->st_file, - nfs4_access_to_omode(i)); + nfs4_file_put_access(stp->st_file, i); clear_access(i, stp); } } @@ -3307,10 +3312,12 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, filp = NULL; } } - nfs4_file_get_access(fp, oflag); + status = nfs4_file_get_access(fp, open->op_share_access); spin_unlock(&fp->fi_lock); if (filp) fput(filp); + if (status) + goto out_put_access; status = nfsd4_truncate(rqstp, cur_fh, open); if (status) @@ -3319,7 +3326,7 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, return nfs_ok; out_put_access: - nfs4_file_put_access(fp, oflag); + nfs4_file_put_access(fp, open->op_share_access); out: return status; } @@ -4228,7 +4235,7 @@ static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 a { if (!test_access(access, stp)) return; - nfs4_file_put_access(stp->st_file, nfs4_access_to_omode(access)); + nfs4_file_put_access(stp->st_file, access); clear_access(access, stp); } @@ -4555,11 +4562,10 @@ check_lock_length(u64 offset, u64 length) static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) { struct nfs4_file *fp = lock_stp->st_file; - int oflag = nfs4_access_to_omode(access); if (test_access(access, lock_stp)) return; - nfs4_file_get_access(fp, oflag); + __nfs4_file_get_access(fp, access); set_access(access, lock_stp); } From 6d338b51eb6e37b4d6f1459c892f5ec7df0dad88 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Jul 2014 14:07:29 -0400 Subject: [PATCH 048/167] nfsd: remove nfs4_file_put_fd ...and replace it with a simple swap call. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a19257f91f25..c02bad6d7e90 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -394,15 +394,6 @@ nfs4_file_get_access(struct nfs4_file *fp, u32 access) return nfs_ok; } -static struct file *nfs4_file_put_fd(struct nfs4_file *fp, int oflag) -{ - struct file *filp; - - filp = fp->fi_fds[oflag]; - fp->fi_fds[oflag] = NULL; - return filp; -} - static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { might_lock(&fp->fi_lock); @@ -411,9 +402,9 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) struct file *f1 = NULL; struct file *f2 = NULL; - f1 = nfs4_file_put_fd(fp, oflag); + swap(f1, fp->fi_fds[oflag]); if (atomic_read(&fp->fi_access[1 - oflag]) == 0) - f2 = nfs4_file_put_fd(fp, O_RDWR); + swap(f2, fp->fi_fds[O_RDWR]); spin_unlock(&fp->fi_lock); if (f1) fput(f1); From c11c591fe6682e0d642bf9242e53554a50e5fbc0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Jul 2014 14:07:30 -0400 Subject: [PATCH 049/167] nfsd: shrink st_access_bmap and st_deny_bmap We never use anything above bit #3, so an unsigned long for each is wasteful. Shrink them to a char each, and add some WARN_ON_ONCE calls if we try to set or clear bits that would go outside those sizes. Note too that because atomic bitops work on unsigned longs, we have to abandon their use here. That shouldn't be a problem though since we don't really care about the atomicity in this code anyway. Using them was just a convenient way to flip bits. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 38 +++++++++++++++++++++++++++----------- fs/nfsd/state.h | 4 ++-- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c02bad6d7e90..f7f11631c26c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -721,42 +721,58 @@ test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { static inline void set_access(u32 access, struct nfs4_ol_stateid *stp) { - __set_bit(access, &stp->st_access_bmap); + unsigned char mask = 1 << access; + + WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); + stp->st_access_bmap |= mask; } /* clear share access for a given stateid */ static inline void clear_access(u32 access, struct nfs4_ol_stateid *stp) { - __clear_bit(access, &stp->st_access_bmap); + unsigned char mask = 1 << access; + + WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH); + stp->st_access_bmap &= ~mask; } /* test whether a given stateid has access */ static inline bool test_access(u32 access, struct nfs4_ol_stateid *stp) { - return test_bit(access, &stp->st_access_bmap); + unsigned char mask = 1 << access; + + return (bool)(stp->st_access_bmap & mask); } /* set share deny for a given stateid */ static inline void -set_deny(u32 access, struct nfs4_ol_stateid *stp) +set_deny(u32 deny, struct nfs4_ol_stateid *stp) { - __set_bit(access, &stp->st_deny_bmap); + unsigned char mask = 1 << deny; + + WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); + stp->st_deny_bmap |= mask; } /* clear share deny for a given stateid */ static inline void -clear_deny(u32 access, struct nfs4_ol_stateid *stp) +clear_deny(u32 deny, struct nfs4_ol_stateid *stp) { - __clear_bit(access, &stp->st_deny_bmap); + unsigned char mask = 1 << deny; + + WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH); + stp->st_deny_bmap &= ~mask; } /* test whether a given stateid is denying specific access */ static inline bool -test_deny(u32 access, struct nfs4_ol_stateid *stp) +test_deny(u32 deny, struct nfs4_ol_stateid *stp) { - return test_bit(access, &stp->st_deny_bmap); + unsigned char mask = 1 << deny; + + return (bool)(stp->st_deny_bmap & mask); } static int nfs4_access_to_omode(u32 access) @@ -4282,12 +4298,12 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, goto out; status = nfserr_inval; if (!test_access(od->od_share_access, stp)) { - dprintk("NFSD: access not a subset current bitmap: 0x%lx, input access=%08x\n", + dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n", stp->st_access_bmap, od->od_share_access); goto out; } if (!test_deny(od->od_share_deny, stp)) { - dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", + dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n", stp->st_deny_bmap, od->od_share_deny); goto out; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 9f1159d5de56..72aee4b4f1ae 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -406,8 +406,8 @@ struct nfs4_ol_stateid { struct list_head st_locks; struct nfs4_stateowner * st_stateowner; struct nfs4_file * st_file; - unsigned long st_access_bmap; - unsigned long st_deny_bmap; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; struct nfs4_ol_stateid * st_openstp; }; From 6eb3a1d096751bcdec8fd9d9bb565fa9cba5897f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Jul 2014 14:07:31 -0400 Subject: [PATCH 050/167] nfsd: set stateid access and deny bits in nfs4_get_vfs_file Cleanup -- ensure that the stateid bits are set at the same time that the file access refcounts are incremented. Keeping them coherent like this makes it easier to ensure that we account for all of the references. Since the initialization of the st_*_bmap fields is done when it's hashed, we go ahead and hash the stateid before getting access to the file and unhash it if that function returns error. This will be necessary anyway in a follow-on patch that will overhaul deny mode handling. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f7f11631c26c..0a54fc956463 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3300,7 +3300,8 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, } static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, - struct svc_fh *cur_fh, struct nfsd4_open *open) + struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, + struct nfsd4_open *open) { struct file *filp = NULL; __be32 status; @@ -3330,6 +3331,9 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (status) goto out_put_access; + /* Set access and deny bits in stateid */ + set_access(open->op_share_access, stp); + set_deny(open->op_share_deny, stp); return nfs_ok; out_put_access: @@ -3341,20 +3345,15 @@ out: static __be32 nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { - u32 op_share_access = open->op_share_access; __be32 status; - if (!test_access(op_share_access, stp)) - status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); + if (!test_access(open->op_share_access, stp)) + status = nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open); else status = nfsd4_truncate(rqstp, cur_fh, open); if (status) return status; - - /* remember the open */ - set_access(op_share_access, stp); - set_deny(open->op_share_deny, stp); return nfs_ok; } @@ -3602,12 +3601,14 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf if (status) goto out; } else { - status = nfs4_get_vfs_file(rqstp, fp, current_fh, open); - if (status) - goto out; stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); + status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); + if (status) { + release_open_stateid(stp); + goto out; + } } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); From 3b84240a7b756e3fea8eaea5a29e7c10afbd0a47 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Jul 2014 14:07:32 -0400 Subject: [PATCH 051/167] nfsd: clean up reset_union_bmap_deny Fix the "deny" argument type, and start the loop at 1. The 0 iteration is always a noop. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0a54fc956463..5f7294712ad4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4266,10 +4266,11 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac } static void -reset_union_bmap_deny(unsigned long deny, struct nfs4_ol_stateid *stp) +reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp) { int i; - for (i = 0; i < 4; i++) { + + for (i = 1; i < 4; i++) { if ((i & deny) != i) clear_deny(i, stp); } From 7214e8600eee146b6ea79eb6b7b01b343856a7c6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Jul 2014 14:07:33 -0400 Subject: [PATCH 052/167] nfsd: always hold the fi_lock when bumping fi_access refcounts Once we remove the client_mutex, there's an unlikely but possible race that could occur. It will be possible for nfs4_file_put_access to race with nfs4_file_get_access. The refcount will go to zero (briefly) and then bumped back to one. If that happens we set ourselves up for a use-after-free and the potential for a lock to race onto the i_flock list as a filp is being torn down. Ensure that we can safely bump the refcount on the file by holding the fi_lock whenever that's done. The only place it currently isn't is in get_lock_access. In order to ensure atomicity with finding the file, use the find_*_file_locked variants and then call get_lock_access to get new access references on the nfs4_file under the same lock. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5f7294712ad4..8f320f2f8b84 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -377,6 +377,8 @@ static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; static void __nfs4_file_get_access(struct nfs4_file *fp, u32 access) { + lockdep_assert_held(&fp->fi_lock); + if (access & NFS4_SHARE_ACCESS_WRITE) atomic_inc(&fp->fi_access[O_WRONLY]); if (access & NFS4_SHARE_ACCESS_READ) @@ -386,6 +388,8 @@ __nfs4_file_get_access(struct nfs4_file *fp, u32 access) static __be32 nfs4_file_get_access(struct nfs4_file *fp, u32 access) { + lockdep_assert_held(&fp->fi_lock); + /* Does this access mode make sense? */ if (access & ~NFS4_SHARE_ACCESS_BOTH) return nfserr_inval; @@ -4572,6 +4576,8 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) { struct nfs4_file *fp = lock_stp->st_file; + lockdep_assert_held(&fp->fi_lock); + if (test_access(access, lock_stp)) return; __nfs4_file_get_access(fp, access); @@ -4623,6 +4629,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_openowner *open_sop = NULL; struct nfs4_lockowner *lock_sop = NULL; struct nfs4_ol_stateid *lock_stp; + struct nfs4_file *fp; struct file *filp = NULL; struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; @@ -4703,20 +4710,25 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } + fp = lock_stp->st_file; locks_init_lock(file_lock); switch (lock->lk_type) { case NFS4_READ_LT: case NFS4_READW_LT: - filp = find_readable_file(lock_stp->st_file); + spin_lock(&fp->fi_lock); + filp = find_readable_file_locked(fp); if (filp) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ); + spin_unlock(&fp->fi_lock); file_lock->fl_type = F_RDLCK; break; case NFS4_WRITE_LT: case NFS4_WRITEW_LT: - filp = find_writeable_file(lock_stp->st_file); + spin_lock(&fp->fi_lock); + filp = find_writeable_file_locked(fp); if (filp) get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE); + spin_unlock(&fp->fi_lock); file_lock->fl_type = F_WRLCK; break; default: From baeb4ff0e50281db6925223a096a506f02993b88 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Jul 2014 14:07:34 -0400 Subject: [PATCH 053/167] nfsd: make deny mode enforcement more efficient and close races in it The current enforcement of deny modes is both inefficient and scattered across several places, which makes it hard to guarantee atomicity. The inefficiency is a problem now, and the lack of atomicity will mean races once the client_mutex is removed. First, we address the inefficiency. We have to track deny modes on a per-stateid basis to ensure that open downgrades are sane, but when the server goes to enforce them it has to walk the entire list of stateids and check against each one. Instead of doing that, maintain a per-nfs4_file deny mode. When a file is opened, we simply set any deny bits in that mode that were specified in the OPEN call. We can then use that unified deny mode to do a simple check to see whether there are any conflicts without needing to walk the entire stateid list. The only time we'll need to walk the entire list of stateids is when a stateid that has a deny mode on it is being released, or one is having its deny mode downgraded. In that case, we must walk the entire list and recalculate the fi_share_deny field. Since deny modes are pretty rare today, this should be very rare under normal workloads. To address the potential for races once the client_mutex is removed, protect fi_share_deny with the fi_lock. In nfs4_get_vfs_file, check to make sure that any deny mode we want to apply won't conflict with existing access. If that's ok, then have nfs4_file_get_access check that new access to the file won't conflict with existing deny modes. If that also passes, then get file access references, set the correct access and deny bits in the stateid, and update the fi_share_deny field. If opening the file or truncating it fails, then unwind the whole mess and return the appropriate error. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 182 ++++++++++++++++++++++++++++++-------------- fs/nfsd/state.h | 1 + 2 files changed, 125 insertions(+), 58 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8f320f2f8b84..da88b31c0afe 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -394,10 +394,33 @@ nfs4_file_get_access(struct nfs4_file *fp, u32 access) if (access & ~NFS4_SHARE_ACCESS_BOTH) return nfserr_inval; + /* Does it conflict with a deny mode already set? */ + if ((access & fp->fi_share_deny) != 0) + return nfserr_share_denied; + __nfs4_file_get_access(fp, access); return nfs_ok; } +static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny) +{ + /* Common case is that there is no deny mode. */ + if (deny) { + /* Does this deny mode make sense? */ + if (deny & ~NFS4_SHARE_DENY_BOTH) + return nfserr_inval; + + if ((deny & NFS4_SHARE_DENY_READ) && + atomic_read(&fp->fi_access[O_RDONLY])) + return nfserr_share_denied; + + if ((deny & NFS4_SHARE_DENY_WRITE) && + atomic_read(&fp->fi_access[O_WRONLY])) + return nfserr_share_denied; + } + return nfs_ok; +} + static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) { might_lock(&fp->fi_lock); @@ -710,17 +733,6 @@ bmap_to_share_mode(unsigned long bmap) { return access; } -static bool -test_share(struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { - unsigned int access, deny; - - access = bmap_to_share_mode(stp->st_access_bmap); - deny = bmap_to_share_mode(stp->st_deny_bmap); - if ((access & open->op_share_deny) || (deny & open->op_share_access)) - return false; - return true; -} - /* set share access for a given stateid */ static inline void set_access(u32 access, struct nfs4_ol_stateid *stp) @@ -793,11 +805,49 @@ static int nfs4_access_to_omode(u32 access) return O_RDONLY; } +/* + * A stateid that had a deny mode associated with it is being released + * or downgraded. Recalculate the deny mode on the file. + */ +static void +recalculate_deny_mode(struct nfs4_file *fp) +{ + struct nfs4_ol_stateid *stp; + + spin_lock(&fp->fi_lock); + fp->fi_share_deny = 0; + list_for_each_entry(stp, &fp->fi_stateids, st_perfile) + fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap); + spin_unlock(&fp->fi_lock); +} + +static void +reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp) +{ + int i; + bool change = false; + + for (i = 1; i < 4; i++) { + if ((i & deny) != i) { + change = true; + clear_deny(i, stp); + } + } + + /* Recalculate per-file deny mode if there was a change */ + if (change) + recalculate_deny_mode(stp->st_file); +} + /* release all access and file references for a given stateid */ static void release_all_access(struct nfs4_ol_stateid *stp) { int i; + struct nfs4_file *fp = stp->st_file; + + if (fp && stp->st_deny_bmap != 0) + recalculate_deny_mode(fp); for (i = 1; i < 4; i++) { if (test_access(i, stp)) @@ -2787,6 +2837,7 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) fp->fi_inode = ino; fp->fi_had_conflict = false; fp->fi_lease = NULL; + fp->fi_share_deny = 0; memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); memset(fp->fi_access, 0, sizeof(fp->fi_access)); hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]); @@ -3014,22 +3065,15 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) { struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_file *fp; - struct nfs4_ol_stateid *stp; - __be32 ret; + __be32 ret = nfs_ok; fp = find_file(ino); if (!fp) - return nfs_ok; - ret = nfserr_locked; - /* Search for conflicting share reservations */ + return ret; + /* Check for conflicting share reservations */ spin_lock(&fp->fi_lock); - list_for_each_entry(stp, &fp->fi_stateids, st_perfile) { - if (test_deny(deny_type, stp) || - test_deny(NFS4_SHARE_DENY_BOTH, stp)) - goto out; - } - ret = nfs_ok; -out: + if (fp->fi_share_deny & deny_type) + ret = nfserr_locked; spin_unlock(&fp->fi_lock); put_nfs4_file(fp); return ret; @@ -3265,12 +3309,9 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st if (local->st_stateowner->so_is_open_owner == 0) continue; /* remember if we have seen this open owner */ - if (local->st_stateowner == &oo->oo_owner) + if (local->st_stateowner == &oo->oo_owner) { *stpp = local; - /* check for conflicting share reservations */ - if (!test_share(local, open)) { - spin_unlock(&fp->fi_lock); - return nfserr_share_denied; + break; } } spin_unlock(&fp->fi_lock); @@ -3311,56 +3352,91 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, __be32 status; int oflag = nfs4_access_to_omode(open->op_share_access); int access = nfs4_access_to_access(open->op_share_access); + unsigned char old_access_bmap, old_deny_bmap; spin_lock(&fp->fi_lock); + + /* + * Are we trying to set a deny mode that would conflict with + * current access? + */ + status = nfs4_file_check_deny(fp, open->op_share_deny); + if (status != nfs_ok) { + spin_unlock(&fp->fi_lock); + goto out; + } + + /* set access to the file */ + status = nfs4_file_get_access(fp, open->op_share_access); + if (status != nfs_ok) { + spin_unlock(&fp->fi_lock); + goto out; + } + + /* Set access bits in stateid */ + old_access_bmap = stp->st_access_bmap; + set_access(open->op_share_access, stp); + + /* Set new deny mask */ + old_deny_bmap = stp->st_deny_bmap; + set_deny(open->op_share_deny, stp); + fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH); + if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp); if (status) - goto out; + goto out_put_access; spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { fp->fi_fds[oflag] = filp; filp = NULL; } } - status = nfs4_file_get_access(fp, open->op_share_access); spin_unlock(&fp->fi_lock); if (filp) fput(filp); - if (status) - goto out_put_access; status = nfsd4_truncate(rqstp, cur_fh, open); if (status) goto out_put_access; - - /* Set access and deny bits in stateid */ - set_access(open->op_share_access, stp); - set_deny(open->op_share_deny, stp); - return nfs_ok; - -out_put_access: - nfs4_file_put_access(fp, open->op_share_access); out: return status; +out_put_access: + stp->st_access_bmap = old_access_bmap; + nfs4_file_put_access(fp, open->op_share_access); + reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp); + goto out; } static __be32 nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open) { __be32 status; + unsigned char old_deny_bmap; if (!test_access(open->op_share_access, stp)) - status = nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open); - else - status = nfsd4_truncate(rqstp, cur_fh, open); + return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open); - if (status) + /* test and set deny mode */ + spin_lock(&fp->fi_lock); + status = nfs4_file_check_deny(fp, open->op_share_deny); + if (status == nfs_ok) { + old_deny_bmap = stp->st_deny_bmap; + set_deny(open->op_share_deny, stp); + fp->fi_share_deny |= + (open->op_share_deny & NFS4_SHARE_DENY_BOTH); + } + spin_unlock(&fp->fi_lock); + + if (status != nfs_ok) return status; - return nfs_ok; -} + status = nfsd4_truncate(rqstp, cur_fh, open); + if (status != nfs_ok) + reset_union_bmap_deny(old_deny_bmap, stp); + return status; +} static void nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session) @@ -3582,7 +3658,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ fp = find_or_add_file(ino, open->op_file); if (fp != open->op_file) { - if ((status = nfs4_check_open(fp, open, &stp))) + status = nfs4_check_open(fp, open, &stp); + if (status) goto out; status = nfs4_check_deleg(cl, open, &dp); if (status) @@ -4269,17 +4346,6 @@ static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_ac } } -static void -reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp) -{ - int i; - - for (i = 1; i < 4; i++) { - if ((i & deny) != i) - clear_deny(i, stp); - } -} - __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 72aee4b4f1ae..015b972da8ba 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -391,6 +391,7 @@ struct nfs4_file { * + 1 to both of the above if NFS4_SHARE_ACCESS_BOTH is set. */ atomic_t fi_access[2]; + u32 fi_share_deny; struct file *fi_deleg_file; struct file_lock *fi_lease; atomic_t fi_delegees; From a46cb7f2878d22b5df190970416cea40982ec2fb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 10 Jul 2014 14:07:35 -0400 Subject: [PATCH 054/167] nfsd: cleanup and rename nfs4_check_open Rename it to better describe what it does, and have it just return the stateid instead of a __be32 (which is now always nfs_ok). Also, do the search for an existing stateid after the delegation check, to reduce cleanup if the delegation check returns error. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index da88b31c0afe..225f98c7d00d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3297,10 +3297,10 @@ out: return nfs_ok; } -static __be32 -nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_stateid **stpp) +static struct nfs4_ol_stateid * +nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) { - struct nfs4_ol_stateid *local; + struct nfs4_ol_stateid *local, *ret = NULL; struct nfs4_openowner *oo = open->op_openowner; spin_lock(&fp->fi_lock); @@ -3308,14 +3308,13 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_ol_st /* ignore lock owners */ if (local->st_stateowner->so_is_open_owner == 0) continue; - /* remember if we have seen this open owner */ if (local->st_stateowner == &oo->oo_owner) { - *stpp = local; + ret = local; break; } } spin_unlock(&fp->fi_lock); - return nfs_ok; + return ret; } static inline int nfs4_access_to_access(u32 nfs4_access) @@ -3658,12 +3657,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ fp = find_or_add_file(ino, open->op_file); if (fp != open->op_file) { - status = nfs4_check_open(fp, open, &stp); - if (status) - goto out; status = nfs4_check_deleg(cl, open, &dp); if (status) goto out; + stp = nfsd4_find_existing_open(fp, open); } else { open->op_file = NULL; status = nfserr_bad_stateid; From 255942907e7ff498ab1545b5edce5690833ff640 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 9 Jul 2014 13:49:15 -0500 Subject: [PATCH 055/167] svcrdma: send_write() must not overflow the device's max sge Function send_write() must stop creating sges when it reaches the device max and return the amount sent in the RDMA Write to the caller. Signed-off-by: Steve Wise Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 39 +++++++++++---------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 49fd21a5c215..9f1b50689c0f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -192,6 +192,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, xdr_sge_no++; BUG_ON(xdr_sge_no > vec->count); bc -= sge_bytes; + if (sge_no == xprt->sc_max_sge) + break; } /* Prepare WRITE WR */ @@ -209,7 +211,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, atomic_inc(&rdma_stat_write); if (svc_rdma_send(xprt, &write_wr)) goto err; - return 0; + return write_len - bc; err: svc_rdma_unmap_dma(ctxt); svc_rdma_put_context(ctxt, 0); @@ -225,7 +227,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, { u32 xfer_len = rqstp->rq_res.page_len + rqstp->rq_res.tail[0].iov_len; int write_len; - int max_write; u32 xdr_off; int chunk_off; int chunk_no; @@ -239,8 +240,6 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[1]; - max_write = xprt->sc_max_sge * PAGE_SIZE; - /* Write chunks start at the pagelist */ for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; xfer_len && chunk_no < arg_ary->wc_nchunks; @@ -260,23 +259,21 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, write_len); chunk_off = 0; while (write_len) { - int this_write; - this_write = min(write_len, max_write); ret = send_write(xprt, rqstp, ntohl(arg_ch->rs_handle), rs_offset + chunk_off, xdr_off, - this_write, + write_len, vec); - if (ret) { + if (ret <= 0) { dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", ret); return -EIO; } - chunk_off += this_write; - xdr_off += this_write; - xfer_len -= this_write; - write_len -= this_write; + chunk_off += ret; + xdr_off += ret; + xfer_len -= ret; + write_len -= ret; } } /* Update the req with the number of chunks actually used */ @@ -293,7 +290,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, { u32 xfer_len = rqstp->rq_res.len; int write_len; - int max_write; u32 xdr_off; int chunk_no; int chunk_off; @@ -311,8 +307,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[2]; - max_write = xprt->sc_max_sge * PAGE_SIZE; - /* xdr offset starts at RPC message */ nchunks = ntohl(arg_ary->wc_nchunks); for (xdr_off = 0, chunk_no = 0; @@ -330,24 +324,21 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, write_len); chunk_off = 0; while (write_len) { - int this_write; - - this_write = min(write_len, max_write); ret = send_write(xprt, rqstp, ntohl(ch->rs_handle), rs_offset + chunk_off, xdr_off, - this_write, + write_len, vec); - if (ret) { + if (ret <= 0) { dprintk("svcrdma: RDMA_WRITE failed, ret=%d\n", ret); return -EIO; } - chunk_off += this_write; - xdr_off += this_write; - xfer_len -= this_write; - write_len -= this_write; + chunk_off += ret; + xdr_off += ret; + xfer_len -= ret; + write_len -= ret; } } /* Update the req with the number of chunks actually used */ From 35e634b83cbe23e5673289d1536752968aab8f75 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 9 Jul 2014 21:54:16 +0800 Subject: [PATCH 056/167] NFSD: Check acl returned from get_acl/posix_acl_from_mode Commit 4ac7249ea5 (nfsd: use get_acl and ->set_acl) don't check the acl returned from get_acl()/posix_acl_from_mode(). Signed-off-by: Kinglong Mee Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs2acl.c | 8 ++++---- fs/nfsd/nfs3acl.c | 8 ++++---- fs/nfsd/nfs4acl.c | 19 +++++++++++++------ 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 12b023a7ab7d..ac54ea60b3f6 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -54,14 +54,14 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { acl = get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) { - nfserr = nfserrno(PTR_ERR(acl)); - goto fail; - } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } + if (IS_ERR(acl)) { + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; + } resp->acl_access = acl; } if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 2a514e21dc74..34cbbab6abd7 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -47,14 +47,14 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp, if (resp->mask & (NFS_ACL|NFS_ACLCNT)) { acl = get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) { - nfserr = nfserrno(PTR_ERR(acl)); - goto fail; - } if (acl == NULL) { /* Solaris returns the inode's minimum ACL. */ acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); } + if (IS_ERR(acl)) { + nfserr = nfserrno(PTR_ERR(acl)); + goto fail; + } resp->acl_access = acl; } if (resp->mask & (NFS_DFACL|NFS_DFACLCNT)) { diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index acf6974e6823..59fd76651781 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c @@ -146,17 +146,23 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, int size = 0; pacl = get_acl(inode, ACL_TYPE_ACCESS); - if (!pacl) { + if (!pacl) pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); - if (IS_ERR(pacl)) - return PTR_ERR(pacl); - } + + if (IS_ERR(pacl)) + return PTR_ERR(pacl); + /* allocate for worst case: one (deny, allow) pair each: */ size += 2 * pacl->a_count; if (S_ISDIR(inode->i_mode)) { flags = NFS4_ACL_DIR; dpacl = get_acl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(dpacl)) { + error = PTR_ERR(dpacl); + goto rel_pacl; + } + if (dpacl) size += 2 * dpacl->a_count; } @@ -173,9 +179,10 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, if (dpacl) _posix_to_nfsv4_one(dpacl, *acl, flags | NFS4_ACL_TYPE_DEFAULT); - out: - posix_acl_release(pacl); +out: posix_acl_release(dpacl); +rel_pacl: + posix_acl_release(pacl); return error; } From d5d5c304b13bc3cade13b8a1b5833c8b3a0975f1 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 9 Jul 2014 21:51:27 +0800 Subject: [PATCH 057/167] NFSD: Fix bad checking of space for padding in splice read Note that the caller has already reserved space for count and eof, so xdr->p has already moved past them, only the padding remains. Signed-off-by: Kinglong Mee Fixes dc97618ddd (nfsd4: separate splice and readv cases) Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1ad7bd4e346f..01023a595163 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3072,11 +3072,8 @@ static __be32 nfsd4_encode_splice_read( __be32 nfserr; __be32 *p = xdr->p - 2; - /* - * Don't inline pages unless we know there's room for eof, - * count, and possible padding: - */ - if (xdr->end - xdr->p < 3) + /* Make sure there will be room for padding if needed */ + if (xdr->end - xdr->p < 1) return nfserr_resource; nfserr = nfsd_splice_read(read->rd_rqstp, file, From e8051c837bd96ad1eabdd46504363431dc5fddc5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2014 10:31:56 -0400 Subject: [PATCH 058/167] nfsd: eliminate nfsd4_init_callback It's just an obfuscated INIT_WORK call. Just make the work_func_t a non-static symbol and use a normal INIT_WORK call. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 8 ++------ fs/nfsd/nfs4state.c | 4 ++-- fs/nfsd/state.h | 2 +- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 2c73cae9899d..30a71cb46001 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -1011,7 +1011,8 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) run_nfsd4_cb(cb); } -static void nfsd4_do_callback_rpc(struct work_struct *w) +void +nfsd4_do_callback_rpc(struct work_struct *w) { struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; @@ -1031,11 +1032,6 @@ static void nfsd4_do_callback_rpc(struct work_struct *w) cb->cb_ops, cb); } -void nfsd4_init_callback(struct nfsd4_callback *cb) -{ - INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); -} - void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfsd4_callback *cb = &dp->dl_recall; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 225f98c7d00d..56ea4f12803e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -592,7 +592,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); - nfsd4_init_callback(&dp->dl_recall); + INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); return dp; } @@ -1677,7 +1677,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, spin_unlock(&nn->client_lock); return NULL; } - nfsd4_init_callback(&clp->cl_cb_null); + INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 015b972da8ba..20857142773f 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -436,7 +436,7 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, extern __be32 nfs4_check_open_reclaim(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn); extern int set_callback_cred(void); -extern void nfsd4_init_callback(struct nfsd4_callback *); +void nfsd4_do_callback_rpc(struct work_struct *w); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); From 02e1215f9f72ad8c087e21a5701bea0ac18fafd4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2014 10:31:57 -0400 Subject: [PATCH 059/167] nfsd: Avoid taking state_lock while holding inode lock in nfsd_break_one_deleg state_lock is a heavily contended global lock. We don't want to grab that while simultaneously holding the inode->i_lock. Add a new per-nfs4_file lock that we can use to protect the per-nfs4_file delegation list. Hold that while walking the list in the break_deleg callback and queue the workqueue job for each one. The workqueue job can then take the state_lock and do the list manipulations without the i_lock being held prior to starting the rpc call. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 25 +++++++++++++++--- fs/nfsd/nfs4state.c | 58 +++++++++++++++++++++++++++--------------- fs/nfsd/state.h | 4 ++- 3 files changed, 62 insertions(+), 25 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 30a71cb46001..a88a93e09d69 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -933,7 +933,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) set_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags); /* * Note this won't actually result in a null callback; - * instead, nfsd4_do_callback_rpc() will detect the killed + * instead, nfsd4_run_cb_null() will detect the killed * client, destroy the rpc client, and stop: */ do_probe_callback(clp); @@ -1011,10 +1011,9 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) run_nfsd4_cb(cb); } -void -nfsd4_do_callback_rpc(struct work_struct *w) +static void +nfsd4_run_callback_rpc(struct nfsd4_callback *cb) { - struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; @@ -1032,6 +1031,24 @@ nfsd4_do_callback_rpc(struct work_struct *w) cb->cb_ops, cb); } +void +nfsd4_run_cb_null(struct work_struct *w) +{ + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, + cb_work); + nfsd4_run_callback_rpc(cb); +} + +void +nfsd4_run_cb_recall(struct work_struct *w) +{ + struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, + cb_work); + + nfsd4_prepare_cb_recall(cb->cb_op); + nfsd4_run_callback_rpc(cb); +} + void nfsd4_cb_recall(struct nfs4_delegation *dp) { struct nfsd4_callback *cb = &dp->dl_recall; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 56ea4f12803e..bdf8ac3393bd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -254,6 +254,8 @@ static void nfsd4_free_file(struct nfs4_file *f) static inline void put_nfs4_file(struct nfs4_file *fi) { + might_lock(&state_lock); + if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del(&fi->fi_hash); spin_unlock(&state_lock); @@ -554,6 +556,8 @@ static void block_delegations(struct knfsd_fh *fh) u32 hash; struct bloom_pair *bd = &blocked_delegations; + lockdep_assert_held(&state_lock); + hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); __set_bit(hash&255, bd->set[bd->new]); @@ -592,7 +596,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; atomic_set(&dp->dl_count, 1); - INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc); + INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); return dp; } @@ -640,7 +644,9 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) lockdep_assert_held(&state_lock); dp->dl_stid.sc_type = NFS4_DELEG_STID; + spin_lock(&fp->fi_lock); list_add(&dp->dl_perfile, &fp->fi_delegations); + spin_unlock(&fp->fi_lock); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } @@ -648,14 +654,18 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) static void unhash_delegation(struct nfs4_delegation *dp) { + struct nfs4_file *fp = dp->dl_file; + spin_lock(&state_lock); list_del_init(&dp->dl_perclnt); - list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_recall_lru); + spin_lock(&fp->fi_lock); + list_del_init(&dp->dl_perfile); + spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); - if (dp->dl_file) { - nfs4_put_deleg_lease(dp->dl_file); - put_nfs4_file(dp->dl_file); + if (fp) { + nfs4_put_deleg_lease(fp); + put_nfs4_file(fp); dp->dl_file = NULL; } } @@ -1677,7 +1687,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, spin_unlock(&nn->client_lock); return NULL; } - INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); + INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); clp->cl_time = get_seconds(); clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); @@ -3079,30 +3089,38 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) return ret; } -static void nfsd_break_one_deleg(struct nfs4_delegation *dp) +void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_stid.sc_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - lockdep_assert_held(&state_lock); - /* We're assuming the state code never drops its reference - * without first removing the lease. Since we're in this lease - * callback (and since the lease code is serialized by the kernel - * lock) we know the server hasn't removed the lease yet, we know - * it's safe to take a reference: */ - atomic_inc(&dp->dl_count); - + /* + * We can't do this in nfsd_break_deleg_cb because it is + * already holding inode->i_lock + */ + spin_lock(&state_lock); + block_delegations(&dp->dl_fh); /* * If the dl_time != 0, then we know that it has already been * queued for a lease break. Don't queue it again. */ if (dp->dl_time == 0) { - list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); dp->dl_time = get_seconds(); + list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); } + spin_unlock(&state_lock); +} - block_delegations(&dp->dl_fh); - +static void nfsd_break_one_deleg(struct nfs4_delegation *dp) +{ + /* + * We're assuming the state code never drops its reference + * without first removing the lease. Since we're in this lease + * callback (and since the lease code is serialized by the kernel + * lock) we know the server hasn't removed the lease yet, we know + * it's safe to take a reference. + */ + atomic_inc(&dp->dl_count); nfsd4_cb_recall(dp); } @@ -3127,11 +3145,11 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - spin_lock(&state_lock); fp->fi_had_conflict = true; + spin_lock(&fp->fi_lock); list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) nfsd_break_one_deleg(dp); - spin_unlock(&state_lock); + spin_unlock(&fp->fi_lock); } static diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 20857142773f..81b7522e3f67 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -436,7 +436,8 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, extern __be32 nfs4_check_open_reclaim(clientid_t *clid, struct nfsd4_compound_state *cstate, struct nfsd_net *nn); extern int set_callback_cred(void); -void nfsd4_do_callback_rpc(struct work_struct *w); +void nfsd4_run_cb_null(struct work_struct *w); +void nfsd4_run_cb_recall(struct work_struct *w); extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); @@ -444,6 +445,7 @@ extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); +extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); extern void nfs4_put_delegation(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); From d564fbec7a8fa22d4b1ad10249eace42ea01513b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 16 Jul 2014 10:31:58 -0400 Subject: [PATCH 060/167] nfsd: nfs4_alloc_init_lease should take a nfs4_file arg No need to pass the delegation pointer in here as it's only used to get the nfs4_file pointer. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bdf8ac3393bd..1b01a20827ab 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3474,7 +3474,7 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp) return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN; } -static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag) +static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) { struct file_lock *fl; @@ -3486,7 +3486,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int f fl->fl_flags = FL_DELEG; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_end = OFFSET_MAX; - fl->fl_owner = (fl_owner_t)(dp->dl_file); + fl->fl_owner = (fl_owner_t)fp; fl->fl_pid = current->tgid; return fl; } @@ -3497,7 +3497,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp) struct file_lock *fl; int status; - fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); + fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ); if (!fl) return -ENOMEM; fl->fl_file = find_readable_file(fp); From b0fc29d6fcd0310a8437123fe6f30b1ae60a62f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Jul 2014 10:31:59 -0400 Subject: [PATCH 061/167] nfsd: Ensure stateids remain unique until they are freed Add an extra delegation state to allow the stateid to remain in the idr tree until the last reference has been released. This will be necessary to ensure uniqueness once the client_mutex is removed. [jlayton: reset the sc_type under the state_lock in unhash_delegation] Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 ++++---- fs/nfsd/state.h | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1b01a20827ab..fd4deb049ddf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -616,6 +616,7 @@ void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { + remove_stid(&dp->dl_stid); nfs4_free_stid(deleg_slab, &dp->dl_stid); num_delegations--; } @@ -657,6 +658,7 @@ unhash_delegation(struct nfs4_delegation *dp) struct nfs4_file *fp = dp->dl_file; spin_lock(&state_lock); + dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_recall_lru); spin_lock(&fp->fi_lock); @@ -670,19 +672,15 @@ unhash_delegation(struct nfs4_delegation *dp) } } - - static void destroy_revoked_delegation(struct nfs4_delegation *dp) { list_del_init(&dp->dl_recall_lru); - remove_stid(&dp->dl_stid); nfs4_put_delegation(dp); } static void destroy_delegation(struct nfs4_delegation *dp) { unhash_delegation(dp); - remove_stid(&dp->dl_stid); nfs4_put_delegation(dp); } @@ -4036,7 +4034,9 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) return nfs_ok; default: printk("unknown stateid type %x\n", s->sc_type); + /* Fallthrough */ case NFS4_CLOSED_STID: + case NFS4_CLOSED_DELEG_STID: return nfserr_bad_stateid; } } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 81b7522e3f67..996d61eeb357 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -80,6 +80,7 @@ struct nfs4_stid { #define NFS4_CLOSED_STID 8 /* For a deleg stateid kept around only to process free_stateid's: */ #define NFS4_REVOKED_DELEG_STID 16 +#define NFS4_CLOSED_DELEG_STID 32 unsigned char sc_type; stateid_t sc_stateid; struct nfs4_client *sc_client; From ae4b884fc6316b3190be19448cea24b020c1cad6 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 15 Jul 2014 12:59:36 -0400 Subject: [PATCH 062/167] nfsd: silence sparse warning about accessing credentials sparse says: fs/nfsd/auth.c:31:38: warning: incorrect type in argument 1 (different address spaces) fs/nfsd/auth.c:31:38: expected struct cred const *cred fs/nfsd/auth.c:31:38: got struct cred const [noderef] *real_cred Add a new accessor for the ->real_cred and use that to fetch the pointer. Accessing current->real_cred directly is actually quite safe since we know that they can't go away so this is mostly a cosmetic fixup to silence sparse. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/auth.c | 2 +- include/linux/cred.h | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 72f44823adbb..9d46a0bdd9f9 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -28,7 +28,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) validate_process_creds(); /* discard any old override before preparing the new set */ - revert_creds(get_cred(current->real_cred)); + revert_creds(get_cred(current_real_cred())); new = prepare_creds(); if (!new) return -ENOMEM; diff --git a/include/linux/cred.h b/include/linux/cred.h index f61d6c8f5ef3..b2d0820837c4 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -258,6 +258,15 @@ static inline void put_cred(const struct cred *_cred) #define current_cred() \ rcu_dereference_protected(current->cred, 1) +/** + * current_real_cred - Access the current task's objective credentials + * + * Access the objective credentials of the current task. RCU-safe, + * since nobody else can modify it. + */ +#define current_real_cred() \ + rcu_dereference_protected(current->real_cred, 1) + /** * __task_cred - Access a task's objective credentials * @task: The task to query From 5d6031ca742f9f07b9c9d9322538619f3bd155ac Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 17 Jul 2014 16:20:39 -0400 Subject: [PATCH 063/167] nfsd4: zero op arguments beyond the 8th compound op The first 8 ops of the compound are zeroed since they're a part of the argument that's zeroed by the memset(rqstp->rq_argp, 0, procp->pc_argsize); in svc_process_common(). But we handle larger compounds by allocating the memory on the fly in nfsd4_decode_compound(). Other than code recently fixed by 01529e3f8179 "NFSD: Fix memory leak in encoding denied lock", I don't know of any examples of code depending on this initialization. But it definitely seems possible, and I'd rather be safe. Compounds this long are unusual so I'm much more worried about failure in this poorly tested cases than about an insignificant performance hit. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 01023a595163..628b430e743e 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1635,7 +1635,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) goto xdr_error; if (argp->opcnt > ARRAY_SIZE(argp->iops)) { - argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); + argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); if (!argp->ops) { argp->ops = argp->iops; dprintk("nfsd: couldn't allocate room for COMPOUND\n"); From 3c45ddf823d679a820adddd53b52c6699c9a05ac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 16 Jul 2014 15:38:32 -0400 Subject: [PATCH 064/167] svcrdma: Select NFSv4.1 backchannel transport based on forward channel The current code always selects XPRT_TRANSPORT_BC_TCP for the back channel, even when the forward channel was not TCP (eg, RDMA). When a 4.1 mount is attempted with RDMA, the server panics in the TCP BC code when trying to send CB_NULL. Instead, construct the transport protocol number from the forward channel transport or'd with XPRT_TRANSPORT_BC. Transports that do not support bi-directional RPC will not have registered a "BC" transport, causing create_backchannel_client() to fail immediately. Fixes: https://bugzilla.linux-nfs.org/show_bug.cgi?id=265 Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 3 ++- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svcsock.c | 2 ++ net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 + 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index a88a93e09d69..564d72304613 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -689,7 +689,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; - args.protocol = XPRT_TRANSPORT_BC_TCP; + args.protocol = conn->cb_xprt->xpt_class->xcl_ident | + XPRT_TRANSPORT_BC; args.authflavor = ses->se_cb_sec.flavor; } /* Create RPC client */ diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 7235040a19b2..5d9d6f84b382 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -33,6 +33,7 @@ struct svc_xprt_class { struct svc_xprt_ops *xcl_ops; struct list_head xcl_list; u32 xcl_max_payload; + int xcl_ident; }; /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b507cd327d9b..b2437ee93657 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -692,6 +692,7 @@ static struct svc_xprt_class svc_udp_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_udp_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, + .xcl_ident = XPRT_TRANSPORT_UDP, }; static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) @@ -1292,6 +1293,7 @@ static struct svc_xprt_class svc_tcp_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_tcp_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, + .xcl_ident = XPRT_TRANSPORT_TCP, }; void svc_init_xprt_sock(void) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c3b2b3369e52..51c63165073c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1306,7 +1306,7 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args) } } spin_unlock(&xprt_list_lock); - printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); + dprintk("RPC: transport (%d) not supported\n", args->ident); return ERR_PTR(-EIO); found: diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e7323fbbd348..06a5d9235107 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -92,6 +92,7 @@ struct svc_xprt_class svc_rdma_class = { .xcl_owner = THIS_MODULE, .xcl_ops = &svc_rdma_ops, .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, + .xcl_ident = XPRT_TRANSPORT_RDMA, }; struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) From 22cb43855dce2cb1b23c5b8c5c83e9baa4cfde6e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 12 Jul 2014 18:01:02 -0400 Subject: [PATCH 065/167] SUNRPC: xdr_get_next_encode_buffer should be declared static Quell another sparse warning. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/xdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 23fb4e75e245..290af97bf6f9 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -509,7 +509,8 @@ void xdr_commit_encode(struct xdr_stream *xdr) } EXPORT_SYMBOL_GPL(xdr_commit_encode); -__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes) +static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, + size_t nbytes) { static __be32 *p; int space_left; From 57a371442112856388c3c2fd4b0867ef3280896a Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 18 Jul 2014 15:06:47 -0400 Subject: [PATCH 066/167] nfsd4: CREATE_SESSION should update backchannel immediately nfsd4_probe_callback kicks off some work that will eventually run nfsd4_process_cb_update and update the session flags. In theory we could process a following SEQUENCE call before that update happens resulting in flags that don't accurately represent, for example, the lack of a backchannel. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fd4deb049ddf..10cdb67762f6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1223,10 +1223,8 @@ static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, str if (ret) /* oops; xprt is already down: */ nfsd4_conn_lost(&conn->cn_xpt_user); - if (conn->cn_flags & NFS4_CDFC4_BACK) { - /* callback channel may be back up */ - nfsd4_probe_callback(ses->se_client); - } + /* We may have gained or lost a callback channel: */ + nfsd4_probe_callback_sync(ses->se_client); } static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses) From 417c6629b2d81d5a18d29c4bbb6a9a4c64282a36 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 21 Jul 2014 09:34:57 -0400 Subject: [PATCH 067/167] nfsd: fix race that grants unrecallable delegation If nfs4_setlease succesfully acquires a new delegation, then another task breaks the delegation before we reach hash_delegation_locked, then the breaking task will see an empty fi_delegations list and do nothing. The client will receive an open reply incorrectly granting a delegation and will never receive a recall. Move more of the delegation fields to be protected by the fi_lock. It's more granular than the state_lock and in later patches we'll want to be able to rely on it in addition to the state_lock. Attempt to acquire a delegation. If that succeeds, take the spinlocks and then check to see if the file has had a conflict show up since then. If it has, then we assume that the lease is no longer valid and that we shouldn't hand out a delegation. There's also one more potential (but very unlikely) problem. If the lease is broken before the delegation is hashed, then it could leak. In the event that the fi_delegations list is empty, reset the fl_break_time to jiffies so that it's cleaned up ASAP by the normal lease handling code. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 26 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 10cdb67762f6..cc477dd55dce 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -624,6 +624,8 @@ nfs4_put_delegation(struct nfs4_delegation *dp) static void nfs4_put_deleg_lease(struct nfs4_file *fp) { + lockdep_assert_held(&state_lock); + if (!fp->fi_lease) return; if (atomic_dec_and_test(&fp->fi_delegees)) { @@ -643,11 +645,10 @@ static void hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); dp->dl_stid.sc_type = NFS4_DELEG_STID; - spin_lock(&fp->fi_lock); list_add(&dp->dl_perfile, &fp->fi_delegations); - spin_unlock(&fp->fi_lock); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } @@ -659,17 +660,18 @@ unhash_delegation(struct nfs4_delegation *dp) spin_lock(&state_lock); dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; + spin_lock(&fp->fi_lock); list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_recall_lru); - spin_lock(&fp->fi_lock); list_del_init(&dp->dl_perfile); spin_unlock(&fp->fi_lock); - spin_unlock(&state_lock); if (fp) { nfs4_put_deleg_lease(fp); - put_nfs4_file(fp); dp->dl_file = NULL; } + spin_unlock(&state_lock); + if (fp) + put_nfs4_file(fp); } static void destroy_revoked_delegation(struct nfs4_delegation *dp) @@ -3141,10 +3143,19 @@ static void nfsd_break_deleg_cb(struct file_lock *fl) */ fl->fl_break_time = 0; - fp->fi_had_conflict = true; spin_lock(&fp->fi_lock); - list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) - nfsd_break_one_deleg(dp); + fp->fi_had_conflict = true; + /* + * If there are no delegations on the list, then we can't count on this + * lease ever being cleaned up. Set the fl_break_time to jiffies so that + * time_out_leases will do it ASAP. The fact that fi_had_conflict is now + * true should keep any new delegations from being hashed. + */ + if (list_empty(&fp->fi_delegations)) + fl->fl_break_time = jiffies; + else + list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) + nfsd_break_one_deleg(dp); spin_unlock(&fp->fi_lock); } @@ -3491,46 +3502,77 @@ static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_file; struct file_lock *fl; - int status; + struct file *filp; + int status = 0; fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ); if (!fl) return -ENOMEM; - fl->fl_file = find_readable_file(fp); - status = vfs_setlease(fl->fl_file, fl->fl_type, &fl); - if (status) - goto out_free; - fp->fi_lease = fl; - fp->fi_deleg_file = fl->fl_file; - atomic_set(&fp->fi_delegees, 1); + filp = find_readable_file(fp); + if (!filp) { + /* We should always have a readable file here */ + WARN_ON_ONCE(1); + return -EBADF; + } + fl->fl_file = filp; + status = vfs_setlease(filp, fl->fl_type, &fl); + if (status) { + locks_free_lock(fl); + goto out_fput; + } spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + /* Did the lease get broken before we took the lock? */ + status = -EAGAIN; + if (fp->fi_had_conflict) + goto out_unlock; + /* Race breaker */ + if (fp->fi_lease) { + status = 0; + atomic_inc(&fp->fi_delegees); + hash_delegation_locked(dp, fp); + goto out_unlock; + } + fp->fi_lease = fl; + fp->fi_deleg_file = filp; + atomic_set(&fp->fi_delegees, 1); hash_delegation_locked(dp, fp); + spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); return 0; -out_free: - if (fl->fl_file) - fput(fl->fl_file); - locks_free_lock(fl); +out_unlock: + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); +out_fput: + fput(filp); return status; } static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) { + int status = 0; + if (fp->fi_had_conflict) return -EAGAIN; get_nfs4_file(fp); - dp->dl_file = fp; - if (!fp->fi_lease) - return nfs4_setlease(dp); spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + dp->dl_file = fp; + if (!fp->fi_lease) { + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + return nfs4_setlease(dp); + } atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { - spin_unlock(&state_lock); - return -EAGAIN; + status = -EAGAIN; + goto out_unlock; } hash_delegation_locked(dp, fp); +out_unlock: + spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); - return 0; + return status; } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) From 72c0b0fb9f8a24612b6c33c8adf9e9406818981b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Jul 2014 09:34:58 -0400 Subject: [PATCH 068/167] nfsd: Move the delegation reference counter into the struct nfs4_stid We will want to add reference counting to the lock stateid and open stateids too in later patches. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 +++--- fs/nfsd/state.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cc477dd55dce..72da0d44e66b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -472,6 +472,7 @@ kmem_cache *slab) stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ stid->sc_stateid.si_generation = 0; + atomic_set(&stid->sc_count, 1); /* * It shouldn't be a problem to reuse an opaque stateid value. @@ -595,7 +596,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv dp->dl_type = NFS4_OPEN_DELEGATE_READ; fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); dp->dl_time = 0; - atomic_set(&dp->dl_count, 1); INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); return dp; } @@ -615,7 +615,7 @@ static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) void nfs4_put_delegation(struct nfs4_delegation *dp) { - if (atomic_dec_and_test(&dp->dl_count)) { + if (atomic_dec_and_test(&dp->dl_stid.sc_count)) { remove_stid(&dp->dl_stid); nfs4_free_stid(deleg_slab, &dp->dl_stid); num_delegations--; @@ -3118,7 +3118,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) * lock) we know the server hasn't removed the lease yet, we know * it's safe to take a reference. */ - atomic_inc(&dp->dl_count); + atomic_inc(&dp->dl_stid.sc_count); nfsd4_cb_recall(dp); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 996d61eeb357..e68a9ae30fd7 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -73,6 +73,7 @@ struct nfsd4_callback { }; struct nfs4_stid { + atomic_t sc_count; #define NFS4_OPEN_STID 1 #define NFS4_LOCK_STID 2 #define NFS4_DELEG_STID 4 @@ -91,7 +92,6 @@ struct nfs4_delegation { struct list_head dl_perfile; struct list_head dl_perclnt; struct list_head dl_recall_lru; /* delegation recalled */ - atomic_t dl_count; /* ref count */ struct nfs4_file *dl_file; u32 dl_type; time_t dl_time; From d55a166c961714e18907f4723252f72097cd2d23 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 22 Jul 2014 13:52:06 -0400 Subject: [PATCH 069/167] nfsd: bump dl_time when unhashing delegation There's a potential race between a lease break and DELEGRETURN call. Suppose a lease break comes in and queues the workqueue job for a delegation, but it doesn't run just yet. Then, a DELEGRETURN comes in finds the delegation and calls destroy_delegation on it to unhash it and put its primary reference. Next, the workqueue job runs and queues the delegation back onto the del_recall_lru list, issues the CB_RECALL and puts the final reference. With that, the final reference to the delegation is put, but it's still on the LRU list. When we go to unhash a delegation, it's because we intend to get rid of it soon afterward, so we don't want lease breaks to mess with it once that occurs. Fix this by bumping the dl_time whenever we unhash a delegation, to ensure that lease breaks don't monkey with it. I believe this is a regression due to commit 02e1215f9f7 (nfsd: Avoid taking state_lock while holding inode lock in nfsd_break_one_deleg). Prior to that, the state_lock was held in the lm_break callback itself, and that would have prevented this race. Cc: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 72da0d44e66b..a3a828d17563 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -660,6 +660,8 @@ unhash_delegation(struct nfs4_delegation *dp) spin_lock(&state_lock); dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; + /* Ensure that deleg break won't try to requeue it */ + ++dp->dl_time; spin_lock(&fp->fi_lock); list_del_init(&dp->dl_perclnt); list_del_init(&dp->dl_recall_lru); From e560e3b510d22e06081a72a4d49e559b9e392659 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Jul 2014 16:00:40 -0400 Subject: [PATCH 070/167] svcrdma: Add zero padding if the client doesn't send it See RFC 5666 section 3.7: clients don't have to send zero XDR padding. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=246 Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 8f92a61ee2df..e0110270d650 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -435,6 +436,32 @@ static int rdma_read_chunks(struct svcxprt_rdma *xprt, return ret; } +/* + * To avoid a separate RDMA READ just for a handful of zero bytes, + * RFC 5666 section 3.7 allows the client to omit the XDR zero pad + * in chunk lists. + */ +static void +rdma_fix_xdr_pad(struct xdr_buf *buf) +{ + unsigned int page_len = buf->page_len; + unsigned int size = (XDR_QUADLEN(page_len) << 2) - page_len; + unsigned int offset, pg_no; + char *p; + + if (size == 0) + return; + + pg_no = page_len >> PAGE_SHIFT; + offset = page_len & ~PAGE_MASK; + p = page_address(buf->pages[pg_no]); + memset(p + offset, 0, size); + + buf->page_len += size; + buf->buflen += size; + buf->len += size; +} + static int rdma_read_complete(struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head) { @@ -449,6 +476,7 @@ static int rdma_read_complete(struct svc_rqst *rqstp, rqstp->rq_pages[page_no] = head->pages[page_no]; } /* Point rq_arg.pages past header */ + rdma_fix_xdr_pad(&head->arg); rqstp->rq_arg.pages = &rqstp->rq_pages[head->hdr_count]; rqstp->rq_arg.page_len = head->arg.page_len; rqstp->rq_arg.page_base = head->arg.page_base; From 2f6ce8e73caa443201e3d826639b9242cf6ea568 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 23 Jul 2014 13:46:49 -0400 Subject: [PATCH 071/167] nfsd: ensure that st_access_bmap and st_deny_bmap are initialized to 0 Open stateids must be initialized with the st_access_bmap and st_deny_bmap set to 0, so that nfs4_get_vfs_file can properly record their state in old_access_bmap and old_deny_bmap. This bug was introduced in commit baeb4ff0e502 (nfsd: make deny mode enforcement more efficient and close races in it) and was causing the refcounts to end up incorrect when nfs4_get_vfs_file returned an error after bumping the refcounts. This made it impossible to unmount the underlying filesystem after running pynfs tests that involve deny modes. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a3a828d17563..66a3b843a82b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2970,8 +2970,6 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; - set_access(open->op_share_access, stp); - set_deny(open->op_share_deny, stp); stp->st_openstp = NULL; spin_lock(&fp->fi_lock); list_add(&stp->st_perfile, &fp->fi_stateids); From d9bb5a43277d2dcc514fa693f741bbc38e2e2271 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 22 Jul 2014 17:48:04 -0400 Subject: [PATCH 072/167] svcrdma: Double the default credit limit The RDMA credit limit controls how many concurrent RPCs are allowed per connection. An NFS/RDMA client and server exchange their credit limits in the RPC/RDMA headers. The Linux client and the Solaris client and server allow 32 credits. The Linux server allows only 16, which limits its performance. Set the server's default credit limit to 32, like the other well- known implementations, so the out-of-the-shrinkwrap performance of the Linux server is better. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 5cf99a016368..975da754c778 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -174,8 +174,7 @@ struct svcxprt_rdma { * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ #define RPCRDMA_ORD (64/4) #define RPCRDMA_SQ_DEPTH_MULT 8 -#define RPCRDMA_MAX_THREADS 16 -#define RPCRDMA_MAX_REQUESTS 16 +#define RPCRDMA_MAX_REQUESTS 32 #define RPCRDMA_MAX_REQ_SIZE 4096 /* svc_rdma_marshal.c */ From fc8e5a644c2041273a1cee7c6299713ccee319ab Mon Sep 17 00:00:00 2001 From: Himangi Saraogi Date: Wed, 23 Jul 2014 20:12:31 +0530 Subject: [PATCH 073/167] nfsd4: convert comma to semicolon Replace a comma between expression statements by a semicolon. This changes the semantics of the code, but given the current indentation appears to be what is intended. A simplified version of the Coccinelle semantic patch that performs this transformation is as follows: // @r@ expression e1,e2; @@ e1 -, +; e2; // Signed-off-by: Himangi Saraogi Acked-by: Julia Lawall Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 564d72304613..c393d6ca3fce 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -678,7 +678,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c (clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) return -EINVAL; args.client_name = clp->cl_cred.cr_principal; - args.prognumber = conn->cb_prog, + args.prognumber = conn->cb_prog; args.protocol = XPRT_TRANSPORT_TCP; args.authflavor = clp->cl_cred.cr_flavor; clp->cl_cb_ident = conn->cb_ident; From e2cf80d73f283fa573069217bdb899bc554d9edc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Jul 2014 16:17:38 -0400 Subject: [PATCH 074/167] nfsd: Store the filehandle with the struct nfs4_file For use when we may not have a struct inode. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 10 ++++++---- fs/nfsd/state.h | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 66a3b843a82b..859891f30958 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2833,7 +2833,8 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) +static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino, + struct knfsd_fh *fh) { unsigned int hashval = file_hashval(ino); @@ -2845,6 +2846,7 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino) INIT_LIST_HEAD(&fp->fi_delegations); ihold(ino); fp->fi_inode = ino; + fh_copy_shallow(&fp->fi_fhandle, fh); fp->fi_had_conflict = false; fp->fi_lease = NULL; fp->fi_share_deny = 0; @@ -3049,14 +3051,14 @@ find_file(struct inode *ino) } static struct nfs4_file * -find_or_add_file(struct inode *ino, struct nfs4_file *new) +find_or_add_file(struct inode *ino, struct nfs4_file *new, struct knfsd_fh *fh) { struct nfs4_file *fp; spin_lock(&state_lock); fp = find_file_locked(ino); if (fp == NULL) { - nfsd4_init_file(new, ino); + nfsd4_init_file(new, ino, fh); fp = new; } spin_unlock(&state_lock); @@ -3711,7 +3713,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_or_add_file(ino, open->op_file); + fp = find_or_add_file(ino, open->op_file, ¤t_fh->fh_handle); if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e68a9ae30fd7..33cf950b3873 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -396,6 +396,7 @@ struct nfs4_file { struct file *fi_deleg_file; struct file_lock *fi_lease; atomic_t fi_delegees; + struct knfsd_fh fi_fhandle; struct inode *fi_inode; bool fi_had_conflict; }; From ca94321783786982bee416d57d20c93f71337aa1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Jul 2014 16:17:39 -0400 Subject: [PATCH 075/167] nfsd: Use the filehandle to look up the struct nfs4_file instead of inode This makes more sense anyway since an inode pointer value can change even when the filehandle doesn't. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 859891f30958..ab96718df3cc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -368,10 +368,22 @@ static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) #define FILE_HASH_BITS 8 #define FILE_HASH_SIZE (1 << FILE_HASH_BITS) -static unsigned int file_hashval(struct inode *ino) +static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh) { - /* XXX: why are we hashing on inode pointer, anyway? */ - return hash_ptr(ino, FILE_HASH_BITS); + return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0); +} + +static unsigned int file_hashval(struct knfsd_fh *fh) +{ + return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1); +} + +static bool nfsd_fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2) +{ + return fh1->fh_size == fh2->fh_size && + !memcmp(fh1->fh_base.fh_pad, + fh2->fh_base.fh_pad, + fh1->fh_size); } static struct hlist_head file_hashtbl[FILE_HASH_SIZE]; @@ -2836,7 +2848,7 @@ static struct nfs4_file *nfsd4_alloc_file(void) static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino, struct knfsd_fh *fh) { - unsigned int hashval = file_hashval(ino); + unsigned int hashval = file_hashval(fh); lockdep_assert_held(&state_lock); @@ -3023,15 +3035,15 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, /* search file_hashtbl[] for file */ static struct nfs4_file * -find_file_locked(struct inode *ino) +find_file_locked(struct knfsd_fh *fh) { - unsigned int hashval = file_hashval(ino); + unsigned int hashval = file_hashval(fh); struct nfs4_file *fp; lockdep_assert_held(&state_lock); hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { - if (fp->fi_inode == ino) { + if (nfsd_fh_match(&fp->fi_fhandle, fh)) { get_nfs4_file(fp); return fp; } @@ -3040,12 +3052,12 @@ find_file_locked(struct inode *ino) } static struct nfs4_file * -find_file(struct inode *ino) +find_file(struct knfsd_fh *fh) { struct nfs4_file *fp; spin_lock(&state_lock); - fp = find_file_locked(ino); + fp = find_file_locked(fh); spin_unlock(&state_lock); return fp; } @@ -3056,7 +3068,7 @@ find_or_add_file(struct inode *ino, struct nfs4_file *new, struct knfsd_fh *fh) struct nfs4_file *fp; spin_lock(&state_lock); - fp = find_file_locked(ino); + fp = find_file_locked(fh); if (fp == NULL) { nfsd4_init_file(new, ino, fh); fp = new; @@ -3073,11 +3085,10 @@ find_or_add_file(struct inode *ino, struct nfs4_file *new, struct knfsd_fh *fh) static __be32 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) { - struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_file *fp; __be32 ret = nfs_ok; - fp = find_file(ino); + fp = find_file(¤t_fh->fh_handle); if (!fp) return ret; /* Check for conflicting share reservations */ From b07c54a4a3802f28b0ed7b40b4341b170a3ef78f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Jul 2014 16:17:40 -0400 Subject: [PATCH 076/167] nfsd: nfs4_check_fh - make it actually check the filehandle ...instead of just checking the inode that corresponds to it. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ab96718df3cc..6ced8d566c0b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3951,7 +3951,7 @@ laundromat_main(struct work_struct *laundry) static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) { - if (fhp->fh_dentry->d_inode != stp->st_file->fi_inode) + if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_file->fi_fhandle)) return nfserr_bad_stateid; return nfs_ok; } From f9c00c3ab425ef04ca5a3caa5e9a9f5e0272bb8a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 23 Jul 2014 16:17:41 -0400 Subject: [PATCH 077/167] nfsd: Do not let nfs4_file pin the struct inode Remove the fi_inode field in struct nfs4_file in order to remove the possibility of struct nfs4_file pinning the inode when it does not have any open state. The only place we still need to get to an inode is in check_for_locks, so change it to use find_any_file and use the inode from any that it finds. If it doesn't find one, then just assume there aren't any. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 49 ++++++++++++++++++++++++++------------------- fs/nfsd/state.h | 1 - 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6ced8d566c0b..1dfc8ee85c93 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -70,7 +70,7 @@ static u64 current_sessionid = 1; #define CURRENT_STATEID(stateid) (!memcmp((stateid), ¤tstateid, sizeof(stateid_t))) /* forward declarations */ -static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner); +static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); /* Locking: */ @@ -259,7 +259,6 @@ put_nfs4_file(struct nfs4_file *fi) if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del(&fi->fi_hash); spin_unlock(&state_lock); - iput(fi->fi_inode); nfsd4_free_file(fi); } } @@ -2845,8 +2844,7 @@ static struct nfs4_file *nfsd4_alloc_file(void) } /* OPEN Share state helper functions */ -static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino, - struct knfsd_fh *fh) +static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh) { unsigned int hashval = file_hashval(fh); @@ -2856,8 +2854,6 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino, spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); - ihold(ino); - fp->fi_inode = ino; fh_copy_shallow(&fp->fi_fhandle, fh); fp->fi_had_conflict = false; fp->fi_lease = NULL; @@ -3063,14 +3059,14 @@ find_file(struct knfsd_fh *fh) } static struct nfs4_file * -find_or_add_file(struct inode *ino, struct nfs4_file *new, struct knfsd_fh *fh) +find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh) { struct nfs4_file *fp; spin_lock(&state_lock); fp = find_file_locked(fh); if (fp == NULL) { - nfsd4_init_file(new, ino, fh); + nfsd4_init_file(new, fh); fp = new; } spin_unlock(&state_lock); @@ -3714,7 +3710,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfs4_client *cl = open->op_openowner->oo_owner.so_client; struct nfs4_file *fp = NULL; - struct inode *ino = current_fh->fh_dentry->d_inode; struct nfs4_ol_stateid *stp = NULL; struct nfs4_delegation *dp = NULL; __be32 status; @@ -3724,7 +3719,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * and check for delegations in the process of being recalled. * If not found, create the nfs4_file struct */ - fp = find_or_add_file(ino, open->op_file, ¤t_fh->fh_handle); + fp = find_or_add_file(open->op_file, ¤t_fh->fh_handle); if (fp != open->op_file) { status = nfs4_check_deleg(cl, open, &dp); if (status) @@ -4663,7 +4658,9 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str } static struct nfs4_ol_stateid * -alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp) +alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, + struct inode *inode, + struct nfs4_ol_stateid *open_stp) { struct nfs4_ol_stateid *stp; struct nfs4_client *clp = lo->lo_owner.so_client; @@ -4723,6 +4720,7 @@ static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, s struct nfs4_file *fi = ost->st_file; struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *cl = oo->oo_owner.so_client; + struct inode *inode = cstate->current_fh.fh_dentry->d_inode; struct nfs4_lockowner *lo; unsigned int strhashval; struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); @@ -4743,7 +4741,7 @@ static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, s *lst = find_lock_stateid(lo, fi); if (*lst == NULL) { - *lst = alloc_init_lock_stateid(lo, fi, ost); + *lst = alloc_init_lock_stateid(lo, fi, inode, ost); if (*lst == NULL) { release_lockowner_if_empty(lo); return nfserr_jukebox; @@ -5092,25 +5090,34 @@ out_nfserr: /* * returns - * 1: locks held by lockowner - * 0: no locks held by lockowner + * true: locks held by lockowner + * false: no locks held by lockowner */ -static int -check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) +static bool +check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner) { struct file_lock **flpp; - struct inode *inode = filp->fi_inode; - int status = 0; + int status = false; + struct file *filp = find_any_file(fp); + struct inode *inode; + + if (!filp) { + /* Any valid lock stateid should have some sort of access */ + WARN_ON_ONCE(1); + return status; + } + + inode = file_inode(filp); spin_lock(&inode->i_lock); for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { if ((*flpp)->fl_owner == (fl_owner_t)lowner) { - status = 1; - goto out; + status = true; + break; } } -out: spin_unlock(&inode->i_lock); + fput(filp); return status; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 33cf950b3873..0097d4771521 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -397,7 +397,6 @@ struct nfs4_file { struct file_lock *fi_lease; atomic_t fi_delegees; struct knfsd_fh fi_fhandle; - struct inode *fi_inode; bool fi_had_conflict; }; From f83388341b825e03dafa38141ec113b43f9d61d0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 Jul 2014 07:34:19 -0400 Subject: [PATCH 078/167] nfsd: simplify stateid allocation and file handling Don't allow stateids to clear the open file pointer until they are being destroyed. In a later patches we'll want to rely on the fact that we have a valid file pointer when dealing with the stateid and this will save us from having to do a lot of NULL pointer checks before doing so. Also, move to allocating stateids with kzalloc and get rid of the explicit zeroing of fields. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1dfc8ee85c93..fdbfbcb70914 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -470,7 +470,7 @@ kmem_cache *slab) struct nfs4_stid *stid; int new_id; - stid = kmem_cache_alloc(slab, GFP_KERNEL); + stid = kmem_cache_zalloc(slab, GFP_KERNEL); if (!stid) return NULL; @@ -478,11 +478,9 @@ kmem_cache *slab) if (new_id < 0) goto out_free; stid->sc_client = cl; - stid->sc_type = 0; stid->sc_stateid.si_opaque.so_id = new_id; stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ - stid->sc_stateid.si_generation = 0; atomic_set(&stid->sc_count, 1); /* @@ -603,10 +601,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); - dp->dl_file = NULL; dp->dl_type = NFS4_OPEN_DELEGATE_READ; fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); - dp->dl_time = 0; INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); return dp; } @@ -627,6 +623,8 @@ void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_stid.sc_count)) { + if (dp->dl_file) + put_nfs4_file(dp->dl_file); remove_stid(&dp->dl_stid); nfs4_free_stid(deleg_slab, &dp->dl_stid); num_delegations--; @@ -678,13 +676,9 @@ unhash_delegation(struct nfs4_delegation *dp) list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_perfile); spin_unlock(&fp->fi_lock); - if (fp) { - nfs4_put_deleg_lease(fp); - dp->dl_file = NULL; - } - spin_unlock(&state_lock); if (fp) - put_nfs4_file(fp); + nfs4_put_deleg_lease(fp); + spin_unlock(&state_lock); } static void destroy_revoked_delegation(struct nfs4_delegation *dp) @@ -892,12 +886,12 @@ static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) static void close_generic_stateid(struct nfs4_ol_stateid *stp) { release_all_access(stp); - put_nfs4_file(stp->st_file); - stp->st_file = NULL; } static void free_generic_stateid(struct nfs4_ol_stateid *stp) { + if (stp->st_file) + put_nfs4_file(stp->st_file); remove_stid(&stp->st_stid); nfs4_free_stid(stateid_slab, &stp->st_stid); } @@ -4469,6 +4463,10 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) if (list_empty(&oo->oo_owner.so_stateids)) release_openowner(oo); } else { + if (s->st_file) { + put_nfs4_file(s->st_file); + s->st_file = NULL; + } oo->oo_last_closed_stid = s; /* * In the 4.0 case we need to keep the owners around a From 4269067696a1e0c6eef99f631aa3877d860df755 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 25 Jul 2014 07:34:20 -0400 Subject: [PATCH 079/167] nfsd: fully unhash delegations when revoking them Ensure that the delegations cannot be found by the laundromat etc once we add them to the various 'revoke' lists. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fdbfbcb70914..618daa0d4109 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -661,13 +661,13 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); } -/* Called under the state lock. */ static void -unhash_delegation(struct nfs4_delegation *dp) +unhash_delegation_locked(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_file; - spin_lock(&state_lock); + lockdep_assert_held(&state_lock); + dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; /* Ensure that deleg break won't try to requeue it */ ++dp->dl_time; @@ -678,7 +678,6 @@ unhash_delegation(struct nfs4_delegation *dp) spin_unlock(&fp->fi_lock); if (fp) nfs4_put_deleg_lease(fp); - spin_unlock(&state_lock); } static void destroy_revoked_delegation(struct nfs4_delegation *dp) @@ -689,7 +688,9 @@ static void destroy_revoked_delegation(struct nfs4_delegation *dp) static void destroy_delegation(struct nfs4_delegation *dp) { - unhash_delegation(dp); + spin_lock(&state_lock); + unhash_delegation_locked(dp); + spin_unlock(&state_lock); nfs4_put_delegation(dp); } @@ -698,11 +699,10 @@ static void revoke_delegation(struct nfs4_delegation *dp) struct nfs4_client *clp = dp->dl_stid.sc_client; if (clp->cl_minorversion == 0) - destroy_delegation(dp); + destroy_revoked_delegation(dp); else { - unhash_delegation(dp); dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; - list_add(&dp->dl_recall_lru, &clp->cl_revoked); + list_move(&dp->dl_recall_lru, &clp->cl_revoked); } } @@ -1458,15 +1458,14 @@ destroy_client(struct nfs4_client *clp) spin_lock(&state_lock); while (!list_empty(&clp->cl_delegations)) { dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt); - list_del_init(&dp->dl_perclnt); - /* Ensure that deleg break won't try to requeue it */ - ++dp->dl_time; - list_move(&dp->dl_recall_lru, &reaplist); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - destroy_delegation(dp); + list_del_init(&dp->dl_recall_lru); + nfs4_put_delegation(dp); } list_splice_init(&clp->cl_revoked, &reaplist); while (!list_empty(&reaplist)) { @@ -3662,7 +3661,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; out_free: - destroy_delegation(dp); + nfs4_put_delegation(dp); out_no_deleg: open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && @@ -3900,7 +3899,8 @@ nfs4_laundromat(struct nfsd_net *nn) new_timeo = min(new_timeo, t); break; } - list_move(&dp->dl_recall_lru, &reaplist); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { @@ -5382,12 +5382,8 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, if (dp->dl_time != 0) continue; - /* - * Increment dl_time to ensure that delegation breaks - * don't monkey with it now that we are. - */ - ++dp->dl_time; - list_move(&dp->dl_recall_lru, victims); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, victims); } if (++count == max) break; @@ -5642,12 +5638,14 @@ nfs4_state_shutdown_net(struct net *net) spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - list_move(&dp->dl_recall_lru, &reaplist); + unhash_delegation_locked(dp); + list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); - destroy_delegation(dp); + list_del_init(&dp->dl_recall_lru); + nfs4_put_delegation(dp); } nfsd4_client_tracking_exit(net); From 2d4a532d385f635ab8243b88db3136bb52a0bc29 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 25 Jul 2014 07:34:21 -0400 Subject: [PATCH 080/167] nfsd: ensure that clp->cl_revoked list is protected by clp->cl_lock Currently, both destroy_revoked_delegation and revoke_delegation manipulate the cl_revoked list without any locking aside from the client_mutex. Ensure that the clp->cl_lock is held when manipulating it, except for the list walking in destroy_client. At that point, the client should no longer be in use, and so it should be safe to walk the list without any locking. That also means that we don't need to do the list_splice_init there either. Also, the fact that revoke_delegation deletes dl_recall_lru list_head without any locking makes it difficult to know whether it's doing so safely in all cases. Move the list_del_init calls into the callers, and add a WARN_ON in the event that t's passed a delegation that has a non-empty list_head. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 618daa0d4109..9c912c004247 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -680,12 +680,6 @@ unhash_delegation_locked(struct nfs4_delegation *dp) nfs4_put_deleg_lease(fp); } -static void destroy_revoked_delegation(struct nfs4_delegation *dp) -{ - list_del_init(&dp->dl_recall_lru); - nfs4_put_delegation(dp); -} - static void destroy_delegation(struct nfs4_delegation *dp) { spin_lock(&state_lock); @@ -698,11 +692,15 @@ static void revoke_delegation(struct nfs4_delegation *dp) { struct nfs4_client *clp = dp->dl_stid.sc_client; + WARN_ON(!list_empty(&dp->dl_recall_lru)); + if (clp->cl_minorversion == 0) - destroy_revoked_delegation(dp); + nfs4_put_delegation(dp); else { dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; - list_move(&dp->dl_recall_lru, &clp->cl_revoked); + spin_lock(&clp->cl_lock); + list_add(&dp->dl_recall_lru, &clp->cl_revoked); + spin_unlock(&clp->cl_lock); } } @@ -1467,10 +1465,10 @@ destroy_client(struct nfs4_client *clp) list_del_init(&dp->dl_recall_lru); nfs4_put_delegation(dp); } - list_splice_init(&clp->cl_revoked, &reaplist); - while (!list_empty(&reaplist)) { + while (!list_empty(&clp->cl_revoked)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); - destroy_revoked_delegation(dp); + list_del_init(&dp->dl_recall_lru); + nfs4_put_delegation(dp); } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); @@ -3903,8 +3901,10 @@ nfs4_laundromat(struct nfsd_net *nn) list_add(&dp->dl_recall_lru, &reaplist); } spin_unlock(&state_lock); - list_for_each_safe(pos, next, &reaplist) { - dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + while (!list_empty(&reaplist)) { + dp = list_first_entry(&reaplist, struct nfs4_delegation, + dl_recall_lru); + list_del_init(&dp->dl_recall_lru); revoke_delegation(dp); } list_for_each_safe(pos, next, &nn->close_lru) { @@ -4248,7 +4248,10 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, break; case NFS4_REVOKED_DELEG_STID: dp = delegstateid(s); - destroy_revoked_delegation(dp); + spin_lock(&cl->cl_lock); + list_del_init(&dp->dl_recall_lru); + spin_unlock(&cl->cl_lock); + nfs4_put_delegation(dp); ret = nfs_ok; break; default: @@ -5401,8 +5404,10 @@ u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) count = nfsd_find_all_delegations(clp, max, &victims); spin_unlock(&state_lock); - list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) + list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) { + list_del_init(&dp->dl_recall_lru); revoke_delegation(dp); + } return count; } From 02a3508dba9a58b7bd77cc91f8e941e2dda94d1d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 Jul 2014 07:34:22 -0400 Subject: [PATCH 081/167] nfsd: Convert delegation counter to an atomic_long_t type We want to convert to an atomic type so that we don't need to lock across the call to alloc_init_deleg(). Then convert to a long type so that we match the size of 'max_delegations'. None of this is a problem today, but it will be once we remove client_mutex protection. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9c912c004247..b421b51c3a9e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -342,7 +342,7 @@ find_any_file(struct nfs4_file *f) return ret; } -static int num_delegations; +static atomic_long_t num_delegations; unsigned long max_delegations; /* @@ -582,22 +582,23 @@ static struct nfs4_delegation * alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; + long n; dprintk("NFSD alloc_init_deleg\n"); - if (num_delegations > max_delegations) - return NULL; + n = atomic_long_inc_return(&num_delegations); + if (n < 0 || n > max_delegations) + goto out_dec; if (delegation_blocked(¤t_fh->fh_handle)) - return NULL; + goto out_dec; dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) - return dp; + goto out_dec; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid * 0 anyway just for consistency and use 1: */ dp->dl_stid.sc_stateid.si_generation = 1; - num_delegations++; INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); @@ -605,6 +606,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); return dp; +out_dec: + atomic_long_dec(&num_delegations); + return NULL; } static void remove_stid(struct nfs4_stid *s) @@ -627,7 +631,7 @@ nfs4_put_delegation(struct nfs4_delegation *dp) put_nfs4_file(dp->dl_file); remove_stid(&dp->dl_stid); nfs4_free_stid(deleg_slab, &dp->dl_stid); - num_delegations--; + atomic_long_dec(&num_delegations); } } From f9416e281e53bea6f8e39c21f50fd79c029ba24a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 25 Jul 2014 07:34:23 -0400 Subject: [PATCH 082/167] nfsd: drop unused stp arg to alloc_init_deleg Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b421b51c3a9e..049ef2ce72bf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -579,7 +579,7 @@ static void block_delegations(struct knfsd_fh *fh) } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) +alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) { struct nfs4_delegation *dp; long n; @@ -3649,7 +3649,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, default: goto out_no_deleg; } - dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh); + dp = alloc_init_deleg(oo->oo_owner.so_client, fh); if (dp == NULL) goto out_no_deleg; status = nfs4_set_delegation(dp, stp->st_file); From 4cf59221c7cb46ce40e17bcfeddb64d759071440 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 25 Jul 2014 07:34:24 -0400 Subject: [PATCH 083/167] nfsd: clean up arguments to nfs4_open_delegation No need to pass in a net pointer since we can derive that. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 049ef2ce72bf..24065e1b2bb2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3607,11 +3607,12 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) * proper support for them. */ static void -nfs4_open_delegation(struct net *net, struct svc_fh *fh, - struct nfsd4_open *open, struct nfs4_ol_stateid *stp) +nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, + struct nfs4_ol_stateid *stp) { struct nfs4_delegation *dp; - struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner); + struct nfs4_openowner *oo = openowner(stp->st_stateowner); + struct nfs4_client *clp = stp->st_stid.sc_client; int cb_up; int status = 0; @@ -3630,7 +3631,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, * Let's not give out any delegations till everyone's * had the chance to reclaim theirs.... */ - if (locks_in_grace(net)) + if (locks_in_grace(clp->net)) goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; @@ -3649,7 +3650,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh, default: goto out_no_deleg; } - dp = alloc_init_deleg(oo->oo_owner.so_client, fh); + dp = alloc_init_deleg(clp, fh); if (dp == NULL) goto out_no_deleg; status = nfs4_set_delegation(dp, stp->st_file); @@ -3762,7 +3763,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf * Attempt to hand out a delegation. No error return, because the * OPEN succeeds even if we fail. */ - nfs4_open_delegation(SVC_NET(rqstp), current_fh, open, stp); + nfs4_open_delegation(current_fh, open, stp); nodeleg: status = nfs_ok; From 0b26693c56cc4beae2f913e737b15c12bc2b5b97 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 25 Jul 2014 07:34:25 -0400 Subject: [PATCH 084/167] nfsd: clean up nfs4_set_delegation Move the alloc_init_deleg call into nfs4_set_delegation and change the function to return a pointer to the delegation or an IS_ERR return. This allows us to skip allocating a delegation if the file has already experienced a lease conflict. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 24065e1b2bb2..85d7ac664691 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3552,12 +3552,20 @@ out_fput: return status; } -static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) +static struct nfs4_delegation * +nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, + struct nfs4_file *fp) { - int status = 0; + int status; + struct nfs4_delegation *dp; if (fp->fi_had_conflict) - return -EAGAIN; + return ERR_PTR(-EAGAIN); + + dp = alloc_init_deleg(clp, fh); + if (!dp) + return ERR_PTR(-ENOMEM); + get_nfs4_file(fp); spin_lock(&state_lock); spin_lock(&fp->fi_lock); @@ -3565,7 +3573,8 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) if (!fp->fi_lease) { spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); - return nfs4_setlease(dp); + status = nfs4_setlease(dp); + goto out; } atomic_inc(&fp->fi_delegees); if (fp->fi_had_conflict) { @@ -3573,10 +3582,16 @@ static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp) goto out_unlock; } hash_delegation_locked(dp, fp); + status = 0; out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); - return status; +out: + if (status) { + nfs4_put_delegation(dp); + return ERR_PTR(status); + } + return dp; } static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status) @@ -3650,12 +3665,9 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, default: goto out_no_deleg; } - dp = alloc_init_deleg(clp, fh); - if (dp == NULL) + dp = nfs4_set_delegation(clp, fh, stp->st_file); + if (IS_ERR(dp)) goto out_no_deleg; - status = nfs4_set_delegation(dp, stp->st_file); - if (status) - goto out_free; memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid)); @@ -3663,8 +3675,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, STATEID_VAL(&dp->dl_stid.sc_stateid)); open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; return; -out_free: - nfs4_put_delegation(dp); out_no_deleg: open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS && From f54fe962b88fbecd918feeb49b8838e272184c91 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 25 Jul 2014 07:34:26 -0400 Subject: [PATCH 085/167] nfsd: give block_delegation and delegation_blocked its own spinlock The state lock can be fairly heavily contended, and there's no reason that nfs4_file lookups and delegation_blocked should be mutually exclusive. Let's give the new block_delegation code its own spinlock. It does mean that we'll need to take a different lock in the delegation break code, but that's not generally as critical to performance. Cc: Neil Brown Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 85d7ac664691..ecfddca9b841 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -517,10 +517,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) * Each filter is 256 bits. We hash the filehandle to 32bit and use the * low 3 bytes as hash-table indices. * - * 'state_lock', which is always held when block_delegations() is called, + * 'blocked_delegations_lock', which is always taken in block_delegations(), * is used to manage concurrent access. Testing does not need the lock * except when swapping the two filters. */ +static DEFINE_SPINLOCK(blocked_delegations_lock); static struct bloom_pair { int entries, old_entries; time_t swap_time; @@ -536,7 +537,7 @@ static int delegation_blocked(struct knfsd_fh *fh) if (bd->entries == 0) return 0; if (seconds_since_boot() - bd->swap_time > 30) { - spin_lock(&state_lock); + spin_lock(&blocked_delegations_lock); if (seconds_since_boot() - bd->swap_time > 30) { bd->entries -= bd->old_entries; bd->old_entries = bd->entries; @@ -545,7 +546,7 @@ static int delegation_blocked(struct knfsd_fh *fh) bd->new = 1-bd->new; bd->swap_time = seconds_since_boot(); } - spin_unlock(&state_lock); + spin_unlock(&blocked_delegations_lock); } hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); if (test_bit(hash&255, bd->set[0]) && @@ -566,16 +567,16 @@ static void block_delegations(struct knfsd_fh *fh) u32 hash; struct bloom_pair *bd = &blocked_delegations; - lockdep_assert_held(&state_lock); - hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0); + spin_lock(&blocked_delegations_lock); __set_bit(hash&255, bd->set[bd->new]); __set_bit((hash>>8)&255, bd->set[bd->new]); __set_bit((hash>>16)&255, bd->set[bd->new]); if (bd->entries == 0) bd->swap_time = seconds_since_boot(); bd->entries += 1; + spin_unlock(&blocked_delegations_lock); } static struct nfs4_delegation * @@ -3096,16 +3097,16 @@ void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) struct nfs4_client *clp = dp->dl_stid.sc_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + block_delegations(&dp->dl_fh); + /* * We can't do this in nfsd_break_deleg_cb because it is - * already holding inode->i_lock - */ - spin_lock(&state_lock); - block_delegations(&dp->dl_fh); - /* + * already holding inode->i_lock. + * * If the dl_time != 0, then we know that it has already been * queued for a lease break. Don't queue it again. */ + spin_lock(&state_lock); if (dp->dl_time == 0) { dp->dl_time = get_seconds(); list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); From 650ecc8f8ff29a7f0990704f09df232b505b200d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 25 Jul 2014 07:34:27 -0400 Subject: [PATCH 086/167] nfsd: remove dl_fh field from struct nfs4_delegation Now that the nfs4_file has a filehandle in it, we no longer need to keep a per-delegation copy of it. Switch to using the one in the nfs4_file instead. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 2 +- fs/nfsd/nfs4state.c | 3 +-- fs/nfsd/state.h | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index c393d6ca3fce..e9813389687b 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -337,7 +337,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4); *p++ = xdr_zero; /* truncate */ - encode_nfs_fh4(xdr, &dp->dl_fh); + encode_nfs_fh4(xdr, &dp->dl_file->fi_fhandle); hdr->nops++; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index ecfddca9b841..b0f83beeca75 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -604,7 +604,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); dp->dl_type = NFS4_OPEN_DELEGATE_READ; - fh_copy_shallow(&dp->dl_fh, ¤t_fh->fh_handle); INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); return dp; out_dec: @@ -3097,7 +3096,7 @@ void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) struct nfs4_client *clp = dp->dl_stid.sc_client; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - block_delegations(&dp->dl_fh); + block_delegations(&dp->dl_file->fi_fhandle); /* * We can't do this in nfsd_break_deleg_cb because it is diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 0097d4771521..39747736e83b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -96,7 +96,6 @@ struct nfs4_delegation { u32 dl_type; time_t dl_time; /* For recall: */ - struct knfsd_fh dl_fh; int dl_retries; struct nfsd4_callback dl_recall; }; From 0971374e2818eef6ebdbd7a37acf6ab7e98ac06c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Jul 2014 23:59:31 -0400 Subject: [PATCH 087/167] SUNRPC: Reduce contention in svc_xprt_enqueue() Ensure that all calls to svc_xprt_enqueue() except svc_xprt_received() check the value of XPT_BUSY, before attempting to grab spinlocks etc. This is to avoid situations such as the following "perf" trace, which shows heavy contention on the pool spinlock: 54.15% nfsd [kernel.kallsyms] [k] _raw_spin_lock_bh | --- _raw_spin_lock_bh | |--71.43%-- svc_xprt_enqueue | | | |--50.31%-- svc_reserve | | | |--31.35%-- svc_xprt_received | | | |--18.34%-- svc_tcp_data_ready ... Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b4737fbdec13..9cfa391e2bd0 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -23,6 +23,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(unsigned long closure); static void svc_delete_xprt(struct svc_xprt *xprt); +static void svc_xprt_do_enqueue(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -222,11 +223,12 @@ static void svc_xprt_received(struct svc_xprt *xprt) if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) return; /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_xprt_enqueue with: + * 'put', so we need a reference to call svc_xprt_do_enqueue with: */ svc_xprt_get(xprt); + smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); + svc_xprt_do_enqueue(xprt); svc_xprt_put(xprt); } @@ -335,12 +337,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) return false; } -/* - * Queue up a transport with data pending. If there are idle nfsd - * processes, wake 'em up. - * - */ -void svc_xprt_enqueue(struct svc_xprt *xprt) +static void svc_xprt_do_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; struct svc_rqst *rqstp; @@ -398,6 +395,18 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) out_unlock: spin_unlock_bh(&pool->sp_lock); } + +/* + * Queue up a transport with data pending. If there are idle nfsd + * processes, wake 'em up. + * + */ +void svc_xprt_enqueue(struct svc_xprt *xprt) +{ + if (test_bit(XPT_BUSY, &xprt->xpt_flags)) + return; + svc_xprt_do_enqueue(xprt); +} EXPORT_SYMBOL_GPL(svc_xprt_enqueue); /* From c7fb3f0631b8d66b90e0642a95b948febb3f3cee Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Jul 2014 23:59:32 -0400 Subject: [PATCH 088/167] SUNRPC: svc_tcp_write_space: don't clear SOCK_NOSPACE prematurely If requests are queued in the socket inbuffer waiting for an svc_tcp_has_wspace() requirement to be satisfied, then we do not want to clear the SOCK_NOSPACE flag until we've satisfied that requirement. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b2437ee93657..88db211d4264 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -446,11 +446,31 @@ static void svc_write_space(struct sock *sk) } } +static int svc_tcp_has_wspace(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct svc_serv *serv = svsk->sk_xprt.xpt_server; + int required; + + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) + return 1; + required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; + if (sk_stream_wspace(svsk->sk_sk) >= required || + (sk_stream_min_wspace(svsk->sk_sk) == 0 && + atomic_read(&xprt->xpt_reserved) == 0)) + return 1; + set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); + return 0; +} + static void svc_tcp_write_space(struct sock *sk) { + struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); struct socket *sock = sk->sk_socket; - if (sk_stream_is_writeable(sk) && sock) + if (!sk_stream_is_writeable(sk) || !sock) + return; + if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt)) clear_bit(SOCK_NOSPACE, &sock->flags); svc_write_space(sk); } @@ -1198,23 +1218,6 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) svc_putnl(resv, 0); } -static int svc_tcp_has_wspace(struct svc_xprt *xprt) -{ - struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - struct svc_serv *serv = svsk->sk_xprt.xpt_server; - int required; - - if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) - return 1; - required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; - if (sk_stream_wspace(svsk->sk_sk) >= required || - (sk_stream_min_wspace(svsk->sk_sk) == 0 && - atomic_read(&xprt->xpt_reserved) == 0)) - return 1; - set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - return 0; -} - static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, From 518776800c094a518ae6d303660b57f1400eb1eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Jul 2014 23:59:33 -0400 Subject: [PATCH 089/167] SUNRPC: Allow svc_reserve() to notify TCP socket that space has been freed Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 2 ++ net/sunrpc/svcsock.c | 9 +++++++++ 3 files changed, 12 insertions(+) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 5d9d6f84b382..ce6e4182a5b2 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -25,6 +25,7 @@ struct svc_xprt_ops { void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); int (*xpo_secure_port)(struct svc_rqst *); + void (*xpo_adjust_wspace)(struct svc_xprt *); }; struct svc_xprt_class { diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 9cfa391e2bd0..6666c6745858 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -448,6 +448,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space) atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved); rqstp->rq_reserved = space; + if (xprt->xpt_ops->xpo_adjust_wspace) + xprt->xpt_ops->xpo_adjust_wspace(xprt); svc_xprt_enqueue(xprt); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 88db211d4264..c24a8ff33f8f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -475,6 +475,14 @@ static void svc_tcp_write_space(struct sock *sk) svc_write_space(sk); } +static void svc_tcp_adjust_wspace(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + + if (svc_tcp_has_wspace(xprt)) + clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -1289,6 +1297,7 @@ static struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_secure_port = svc_sock_secure_port, + .xpo_adjust_wspace = svc_tcp_adjust_wspace, }; static struct svc_xprt_class svc_tcp_class = { From b3fbfe0e7a1d88e3cbaa282c5f6fc50e8c67448c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:37:44 -0400 Subject: [PATCH 090/167] nfsd: print status when nfsd4_open fails to open file it just created It's possible for nfsd to fail opening a file that it has just created. When that happens, we throw a WARN but it doesn't include any info about the error code. Print the status code to give us a bit more info. Our QA group hit some of these warnings under some very heavy stress testing. My suspicion is that they hit the file-max limit, but it's hard to know for sure. Go ahead and add a -ENFILE mapping to nfserr_serverfault to make the error more distinct (and correct). Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 4 +++- fs/nfsd/nfsproc.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 29a617ebe38c..8611585f739d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -460,7 +460,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * set, (2) sets open->op_stateid, (3) sets open->op_delegation. */ status = nfsd4_process_open2(rqstp, resfh, open); - WARN_ON(status && open->op_created); + WARN(status && open->op_created, + "nfsd4_process_open2 failed to open newly-created file! status=%u\n", + be32_to_cpu(status)); out: if (resfh && resfh != &cstate->current_fh) { fh_dup2(&cstate->current_fh, resfh); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index b19c7e8bf64c..b8680738f588 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -745,6 +745,7 @@ nfserrno (int errno) { nfserr_notsupp, -EOPNOTSUPP }, { nfserr_toosmall, -ETOOSMALL }, { nfserr_serverfault, -ESERVERFAULT }, + { nfserr_serverfault, -ENFILE }, }; int i; From 6011695da2d7c588f2dfe57c318758f0bf1154dd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:06 -0400 Subject: [PATCH 091/167] nfsd: Add reference counting to the lock and open stateids When we remove the client_mutex, we'll need to be able to ensure that these objects aren't destroyed while we're not holding locks. Add a ->free() callback to the struct nfs4_stid, so that we can release a reference to the stid without caring about the contents. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 2 +- fs/nfsd/nfs4state.c | 103 ++++++++++++++++++++++------------------- fs/nfsd/state.h | 3 +- 3 files changed, 58 insertions(+), 50 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index e9813389687b..8574c708cf8c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -905,7 +905,7 @@ static void nfsd4_cb_recall_release(void *calldata) spin_lock(&clp->cl_lock); list_del(&cb->cb_per_client); spin_unlock(&clp->cl_lock); - nfs4_put_delegation(dp); + nfs4_put_stid(&dp->dl_stid); } } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b0f83beeca75..60ab22b6e099 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -71,6 +71,7 @@ static u64 current_sessionid = 1; /* forward declarations */ static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner); +static void nfs4_free_ol_stateid(struct nfs4_stid *stid); /* Locking: */ @@ -463,8 +464,8 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) __nfs4_file_put_access(fp, O_RDONLY); } -static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct -kmem_cache *slab) +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, + struct kmem_cache *slab) { struct idr *stateids = &cl->cl_stateids; struct nfs4_stid *stid; @@ -500,7 +501,26 @@ out_free: static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) { - return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); + struct nfs4_stid *stid; + struct nfs4_ol_stateid *stp; + + stid = nfs4_alloc_stid(clp, stateid_slab); + if (!stid) + return NULL; + + stp = openlockstateid(stid); + stp->st_stid.sc_free = nfs4_free_ol_stateid; + return stp; +} + +static void nfs4_free_deleg(struct nfs4_stid *stid) +{ + struct nfs4_delegation *dp = delegstateid(stid); + + if (dp->dl_file) + put_nfs4_file(dp->dl_file); + kmem_cache_free(deleg_slab, stid); + atomic_long_dec(&num_delegations); } /* @@ -594,6 +614,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) goto out_dec; + + dp->dl_stid.sc_free = nfs4_free_deleg; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -611,28 +633,15 @@ out_dec: return NULL; } -static void remove_stid(struct nfs4_stid *s) -{ - struct idr *stateids = &s->sc_client->cl_stateids; - - idr_remove(stateids, s->sc_stateid.si_opaque.so_id); -} - -static void nfs4_free_stid(struct kmem_cache *slab, struct nfs4_stid *s) -{ - kmem_cache_free(slab, s); -} - void -nfs4_put_delegation(struct nfs4_delegation *dp) +nfs4_put_stid(struct nfs4_stid *s) { - if (atomic_dec_and_test(&dp->dl_stid.sc_count)) { - if (dp->dl_file) - put_nfs4_file(dp->dl_file); - remove_stid(&dp->dl_stid); - nfs4_free_stid(deleg_slab, &dp->dl_stid); - atomic_long_dec(&num_delegations); - } + struct nfs4_client *clp = s->sc_client; + + if (!atomic_dec_and_test(&s->sc_count)) + return; + idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + s->sc_free(s); } static void nfs4_put_deleg_lease(struct nfs4_file *fp) @@ -689,7 +698,7 @@ static void destroy_delegation(struct nfs4_delegation *dp) spin_lock(&state_lock); unhash_delegation_locked(dp); spin_unlock(&state_lock); - nfs4_put_delegation(dp); + nfs4_put_stid(&dp->dl_stid); } static void revoke_delegation(struct nfs4_delegation *dp) @@ -699,7 +708,7 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); if (clp->cl_minorversion == 0) - nfs4_put_delegation(dp); + nfs4_put_stid(&dp->dl_stid); else { dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID; spin_lock(&clp->cl_lock); @@ -885,17 +894,14 @@ static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) list_del(&stp->st_perstateowner); } -static void close_generic_stateid(struct nfs4_ol_stateid *stp) +static void nfs4_free_ol_stateid(struct nfs4_stid *stid) { - release_all_access(stp); -} + struct nfs4_ol_stateid *stp = openlockstateid(stid); -static void free_generic_stateid(struct nfs4_ol_stateid *stp) -{ + release_all_access(stp); if (stp->st_file) put_nfs4_file(stp->st_file); - remove_stid(&stp->st_stid); - nfs4_free_stid(stateid_slab, &stp->st_stid); + kmem_cache_free(stateid_slab, stid); } static void __release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -908,8 +914,7 @@ static void __release_lock_stateid(struct nfs4_ol_stateid *stp) file = find_any_file(stp->st_file); if (file) filp_close(file, (fl_owner_t)lockowner(stp->st_stateowner)); - close_generic_stateid(stp); - free_generic_stateid(stp); + nfs4_put_stid(&stp->st_stid); } static void unhash_lockowner(struct nfs4_lockowner *lo) @@ -966,13 +971,12 @@ static void unhash_open_stateid(struct nfs4_ol_stateid *stp) { unhash_generic_stateid(stp); release_open_stateid_locks(stp); - close_generic_stateid(stp); } static void release_open_stateid(struct nfs4_ol_stateid *stp) { unhash_open_stateid(stp); - free_generic_stateid(stp); + nfs4_put_stid(&stp->st_stid); } static void unhash_openowner(struct nfs4_openowner *oo) @@ -993,7 +997,7 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; if (s) { - free_generic_stateid(s); + nfs4_put_stid(&s->st_stid); oo->oo_last_closed_stid = NULL; } } @@ -1467,12 +1471,12 @@ destroy_client(struct nfs4_client *clp) while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); - nfs4_put_delegation(dp); + nfs4_put_stid(&dp->dl_stid); } while (!list_empty(&clp->cl_revoked)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); - nfs4_put_delegation(dp); + nfs4_put_stid(&dp->dl_stid); } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); @@ -3588,7 +3592,7 @@ out_unlock: spin_unlock(&state_lock); out: if (status) { - nfs4_put_delegation(dp); + nfs4_put_stid(&dp->dl_stid); return ERR_PTR(status); } return dp; @@ -3818,7 +3822,7 @@ void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) if (open->op_file) nfsd4_free_file(open->op_file); if (open->op_stp) - free_generic_stateid(open->op_stp); + nfs4_put_stid(&open->op_stp->st_stid); } __be32 @@ -4266,7 +4270,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, spin_lock(&cl->cl_lock); list_del_init(&dp->dl_recall_lru); spin_unlock(&cl->cl_lock); - nfs4_put_delegation(dp); + nfs4_put_stid(s); ret = nfs_ok; break; default: @@ -4477,19 +4481,22 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) unhash_open_stateid(s); if (clp->cl_minorversion) { - free_generic_stateid(s); if (list_empty(&oo->oo_owner.so_stateids)) release_openowner(oo); + nfs4_put_stid(&s->st_stid); } else { + /* + * In the 4.0 case we need to keep the owners around a + * little while to handle CLOSE replay. We still do need + * to release any file access that is held by them + * before returning however. + */ + release_all_access(s); if (s->st_file) { put_nfs4_file(s->st_file); s->st_file = NULL; } oo->oo_last_closed_stid = s; - /* - * In the 4.0 case we need to keep the owners around a - * little while to handle CLOSE replay. - */ if (list_empty(&oo->oo_owner.so_stateids)) move_to_close_lru(oo, clp->net); } @@ -5665,7 +5672,7 @@ nfs4_state_shutdown_net(struct net *net) list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); - nfs4_put_delegation(dp); + nfs4_put_stid(&dp->dl_stid); } nfsd4_client_tracking_exit(net); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 39747736e83b..32c466265ac1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -85,6 +85,7 @@ struct nfs4_stid { unsigned char sc_type; stateid_t sc_stateid; struct nfs4_client *sc_client; + void (*sc_free)(struct nfs4_stid *); }; struct nfs4_delegation { @@ -429,6 +430,7 @@ extern __be32 nfs4_preprocess_stateid_op(struct net *net, stateid_t *stateid, int flags, struct file **filp); extern void nfs4_lock_state(void); extern void nfs4_unlock_state(void); +void nfs4_put_stid(struct nfs4_stid *s); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); extern void nfs4_release_reclaim(struct nfsd_net *); extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, @@ -446,7 +448,6 @@ extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); -extern void nfs4_put_delegation(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); From 11b9164adad7cd119b82b1f2c911a6d9bc67f1cc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:08 -0400 Subject: [PATCH 092/167] nfsd: Add a struct nfs4_file field to struct nfs4_stid All stateids are associated with a nfs4_file. Let's consolidate. Replace delegation->dl_file with the dl_stid.sc_file, and nfs4_ol_stateid->st_file with st_stid.sc_file. Signed-off-by: Trond Myklebust Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 2 +- fs/nfsd/nfs4state.c | 69 +++++++++++++++++++++--------------------- fs/nfsd/state.h | 3 +- 3 files changed, 36 insertions(+), 38 deletions(-) diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 8574c708cf8c..e0be57b0f79b 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -337,7 +337,7 @@ static void encode_cb_recall4args(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4); *p++ = xdr_zero; /* truncate */ - encode_nfs_fh4(xdr, &dp->dl_file->fi_fhandle); + encode_nfs_fh4(xdr, &dp->dl_stid.sc_file->fi_fhandle); hdr->nops++; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 60ab22b6e099..344cd1ac3f67 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -515,10 +515,6 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) static void nfs4_free_deleg(struct nfs4_stid *stid) { - struct nfs4_delegation *dp = delegstateid(stid); - - if (dp->dl_file) - put_nfs4_file(dp->dl_file); kmem_cache_free(deleg_slab, stid); atomic_long_dec(&num_delegations); } @@ -636,12 +632,15 @@ out_dec: void nfs4_put_stid(struct nfs4_stid *s) { + struct nfs4_file *fp = s->sc_file; struct nfs4_client *clp = s->sc_client; if (!atomic_dec_and_test(&s->sc_count)) return; idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); s->sc_free(s); + if (fp) + put_nfs4_file(fp); } static void nfs4_put_deleg_lease(struct nfs4_file *fp) @@ -677,7 +676,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) static void unhash_delegation_locked(struct nfs4_delegation *dp) { - struct nfs4_file *fp = dp->dl_file; + struct nfs4_file *fp = dp->dl_stid.sc_file; lockdep_assert_held(&state_lock); @@ -864,7 +863,7 @@ reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp) /* Recalculate per-file deny mode if there was a change */ if (change) - recalculate_deny_mode(stp->st_file); + recalculate_deny_mode(stp->st_stid.sc_file); } /* release all access and file references for a given stateid */ @@ -872,21 +871,21 @@ static void release_all_access(struct nfs4_ol_stateid *stp) { int i; - struct nfs4_file *fp = stp->st_file; + struct nfs4_file *fp = stp->st_stid.sc_file; if (fp && stp->st_deny_bmap != 0) recalculate_deny_mode(fp); for (i = 1; i < 4; i++) { if (test_access(i, stp)) - nfs4_file_put_access(stp->st_file, i); + nfs4_file_put_access(stp->st_stid.sc_file, i); clear_access(i, stp); } } static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) { - struct nfs4_file *fp = stp->st_file; + struct nfs4_file *fp = stp->st_stid.sc_file; spin_lock(&fp->fi_lock); list_del(&stp->st_perfile); @@ -899,8 +898,6 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) struct nfs4_ol_stateid *stp = openlockstateid(stid); release_all_access(stp); - if (stp->st_file) - put_nfs4_file(stp->st_file); kmem_cache_free(stateid_slab, stid); } @@ -911,7 +908,7 @@ static void __release_lock_stateid(struct nfs4_ol_stateid *stp) list_del(&stp->st_locks); unhash_generic_stateid(stp); unhash_stid(&stp->st_stid); - file = find_any_file(stp->st_file); + file = find_any_file(stp->st_stid.sc_file); if (file) filp_close(file, (fl_owner_t)lockowner(stp->st_stateowner)); nfs4_put_stid(&stp->st_stid); @@ -2976,7 +2973,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); stp->st_stateowner = &oo->oo_owner; get_nfs4_file(fp); - stp->st_file = fp; + stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; @@ -3097,10 +3094,10 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) { - struct nfs4_client *clp = dp->dl_stid.sc_client; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, + nfsd_net_id); - block_delegations(&dp->dl_file->fi_fhandle); + block_delegations(&dp->dl_stid.sc_file->fi_fhandle); /* * We can't do this in nfsd_break_deleg_cb because it is @@ -3508,7 +3505,7 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) static int nfs4_setlease(struct nfs4_delegation *dp) { - struct nfs4_file *fp = dp->dl_file; + struct nfs4_file *fp = dp->dl_stid.sc_file; struct file_lock *fl; struct file *filp; int status = 0; @@ -3573,7 +3570,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, get_nfs4_file(fp); spin_lock(&state_lock); spin_lock(&fp->fi_lock); - dp->dl_file = fp; + dp->dl_stid.sc_file = fp; if (!fp->fi_lease) { spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); @@ -3669,7 +3666,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, default: goto out_no_deleg; } - dp = nfs4_set_delegation(clp, fh, stp->st_file); + dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file); if (IS_ERR(dp)) goto out_no_deleg; @@ -3959,7 +3956,7 @@ laundromat_main(struct work_struct *laundry) static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp) { - if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_file->fi_fhandle)) + if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle)) return nfserr_bad_stateid; return nfs_ok; } @@ -4167,7 +4164,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (status) goto out; if (filpp) { - file = dp->dl_file->fi_deleg_file; + file = dp->dl_stid.sc_file->fi_deleg_file; if (!file) { WARN_ON_ONCE(1); status = nfserr_serverfault; @@ -4189,10 +4186,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (status) goto out; if (filpp) { + struct nfs4_file *fp = stp->st_stid.sc_file; + if (flags & RD_STATE) - file = find_readable_file(stp->st_file); + file = find_readable_file(fp); else - file = find_writeable_file(stp->st_file); + file = find_writeable_file(fp); } break; default: @@ -4212,7 +4211,7 @@ nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); - if (check_for_locks(stp->st_file, lo)) + if (check_for_locks(stp->st_stid.sc_file, lo)) return nfserr_locks_held; release_lockowner_if_empty(lo); return nfs_ok; @@ -4403,7 +4402,7 @@ static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 a { if (!test_access(access, stp)) return; - nfs4_file_put_access(stp->st_file, access); + nfs4_file_put_access(stp->st_stid.sc_file, access); clear_access(access, stp); } @@ -4492,9 +4491,9 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) * before returning however. */ release_all_access(s); - if (s->st_file) { - put_nfs4_file(s->st_file); - s->st_file = NULL; + if (s->st_stid.sc_file) { + put_nfs4_file(s->st_stid.sc_file); + s->st_stid.sc_file = NULL; } oo->oo_last_closed_stid = s; if (list_empty(&oo->oo_owner.so_stateids)) @@ -4695,7 +4694,7 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; get_nfs4_file(fp); - stp->st_file = fp; + stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; @@ -4712,7 +4711,7 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) struct nfs4_ol_stateid *lst; list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { - if (lst->st_file == fp) + if (lst->st_stid.sc_file == fp) return lst; } return NULL; @@ -4728,7 +4727,7 @@ check_lock_length(u64 offset, u64 length) static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) { - struct nfs4_file *fp = lock_stp->st_file; + struct nfs4_file *fp = lock_stp->st_stid.sc_file; lockdep_assert_held(&fp->fi_lock); @@ -4740,7 +4739,7 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) { - struct nfs4_file *fi = ost->st_file; + struct nfs4_file *fi = ost->st_stid.sc_file; struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *cl = oo->oo_owner.so_client; struct inode *inode = cstate->current_fh.fh_dentry->d_inode; @@ -4865,7 +4864,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - fp = lock_stp->st_file; + fp = lock_stp->st_stid.sc_file; locks_init_lock(file_lock); switch (lock->lk_type) { case NFS4_READ_LT: @@ -5065,7 +5064,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &stp, nn); if (status) goto out; - filp = find_any_file(stp->st_file); + filp = find_any_file(stp->st_stid.sc_file); if (!filp) { status = nfserr_lock_range; goto out; @@ -5188,7 +5187,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, lo = lockowner(sop); /* see if there are still any locks associated with it */ list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { - if (check_for_locks(stp->st_file, lo)) + if (check_for_locks(stp->st_stid.sc_file, lo)) goto out; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 32c466265ac1..af1d9c42e939 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -85,6 +85,7 @@ struct nfs4_stid { unsigned char sc_type; stateid_t sc_stateid; struct nfs4_client *sc_client; + struct nfs4_file *sc_file; void (*sc_free)(struct nfs4_stid *); }; @@ -93,7 +94,6 @@ struct nfs4_delegation { struct list_head dl_perfile; struct list_head dl_perclnt; struct list_head dl_recall_lru; /* delegation recalled */ - struct nfs4_file *dl_file; u32 dl_type; time_t dl_time; /* For recall: */ @@ -407,7 +407,6 @@ struct nfs4_ol_stateid { struct list_head st_perstateowner; struct list_head st_locks; struct nfs4_stateowner * st_stateowner; - struct nfs4_file * st_file; unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid * st_openstp; From 4770d722014b99e5438c0d1dc44db31ac4547af1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:10 -0400 Subject: [PATCH 093/167] nfsd4: use cl_lock to synchronize all stateid idr calls Currently, this is serialized by the client_mutex, which is slated for removal. Add finer-grained locking here. Also, do some cleanup around find_stateid to prepare for taking references. Signed-off-by: Trond Myklebust Signed-off-by: Benny Halevy Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 344cd1ac3f67..bb37cc4dd573 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -467,7 +467,6 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) { - struct idr *stateids = &cl->cl_stateids; struct nfs4_stid *stid; int new_id; @@ -475,7 +474,11 @@ static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, if (!stid) return NULL; - new_id = idr_alloc_cyclic(stateids, stid, 0, 0, GFP_KERNEL); + idr_preload(GFP_KERNEL); + spin_lock(&cl->cl_lock); + new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT); + spin_unlock(&cl->cl_lock); + idr_preload_end(); if (new_id < 0) goto out_free; stid->sc_client = cl; @@ -635,9 +638,12 @@ nfs4_put_stid(struct nfs4_stid *s) struct nfs4_file *fp = s->sc_file; struct nfs4_client *clp = s->sc_client; - if (!atomic_dec_and_test(&s->sc_count)) + might_lock(&clp->cl_lock); + + if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) return; idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + spin_unlock(&clp->cl_lock); s->sc_free(s); if (fp) put_nfs4_file(fp); @@ -1652,7 +1658,8 @@ static void gen_confirm(struct nfs4_client *clp) memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } -static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) +static struct nfs4_stid * +find_stateid_locked(struct nfs4_client *cl, stateid_t *t) { struct nfs4_stid *ret; @@ -1662,16 +1669,28 @@ static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) return ret; } -static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) +static struct nfs4_stid * +find_stateid(struct nfs4_client *cl, stateid_t *t) +{ + struct nfs4_stid *ret; + + spin_lock(&cl->cl_lock); + ret = find_stateid_locked(cl, t); + spin_unlock(&cl->cl_lock); + return ret; +} + +static struct nfs4_stid * +find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) { struct nfs4_stid *s; - s = find_stateid(cl, t); - if (!s) - return NULL; - if (typemask & s->sc_type) - return s; - return NULL; + spin_lock(&cl->cl_lock); + s = find_stateid_locked(cl, t); + if (s != NULL && !(typemask & s->sc_type)) + s = NULL; + spin_unlock(&cl->cl_lock); + return s; } static struct nfs4_client *create_client(struct xdr_netobj name, From b49e084d8c7df1632bb2b94ae1a21c8a4cf2d8a4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:11 -0400 Subject: [PATCH 094/167] nfsd: do filp_close in sc_free callback for lock stateids Releasing locks when we unhash the stateid instead of doing so only when the stateid is actually released will be problematic in later patches when we need to protect the unhashing with spinlocks. Move it into the sc_free operation instead. Signed-off-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index bb37cc4dd573..8ce5894133aa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -502,7 +502,7 @@ out_free: return NULL; } -static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) +static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { struct nfs4_stid *stid; struct nfs4_ol_stateid *stp; @@ -907,16 +907,23 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) kmem_cache_free(stateid_slab, stid); } -static void __release_lock_stateid(struct nfs4_ol_stateid *stp) +static void nfs4_free_lock_stateid(struct nfs4_stid *stid) { + struct nfs4_ol_stateid *stp = openlockstateid(stid); + struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); struct file *file; + file = find_any_file(stp->st_stid.sc_file); + if (file) + filp_close(file, (fl_owner_t)lo); + nfs4_free_ol_stateid(stid); +} + +static void __release_lock_stateid(struct nfs4_ol_stateid *stp) +{ list_del(&stp->st_locks); unhash_generic_stateid(stp); unhash_stid(&stp->st_stid); - file = find_any_file(stp->st_stid.sc_file); - if (file) - filp_close(file, (fl_owner_t)lockowner(stp->st_stateowner)); nfs4_put_stid(&stp->st_stid); } @@ -3287,7 +3294,7 @@ new_owner: return nfserr_jukebox; open->op_openowner = oo; alloc_stateid: - open->op_stp = nfs4_alloc_stateid(clp); + open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox; return nfs_ok; @@ -4703,17 +4710,20 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct inode *inode, struct nfs4_ol_stateid *open_stp) { + struct nfs4_stid *s; struct nfs4_ol_stateid *stp; struct nfs4_client *clp = lo->lo_owner.so_client; - stp = nfs4_alloc_stateid(clp); - if (stp == NULL) + s = nfs4_alloc_stid(clp, stateid_slab); + if (s == NULL) return NULL; + stp = openlockstateid(s); stp->st_stid.sc_type = NFS4_LOCK_STID; list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; get_nfs4_file(fp); stp->st_stid.sc_file = fp; + stp->st_stid.sc_free = nfs4_free_lock_stateid; stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; From 1c755dc1ada95adc9aa41102baada73659397b80 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:12 -0400 Subject: [PATCH 095/167] nfsd: Add locking to protect the state owner lists Change to using the clp->cl_lock for this. For now, there's a lot of cl_lock thrashing, but in later patches we'll eliminate that and close the potential races that can occur when releasing the cl_lock while walking the lists. For now, the client_mutex prevents those races. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8ce5894133aa..3ac6e2fdabe5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -893,6 +893,8 @@ static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_file *fp = stp->st_stid.sc_file; + lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock); + spin_lock(&fp->fi_lock); list_del(&stp->st_perfile); spin_unlock(&fp->fi_lock); @@ -921,9 +923,13 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid) static void __release_lock_stateid(struct nfs4_ol_stateid *stp) { + struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); + + spin_lock(&oo->oo_owner.so_client->cl_lock); list_del(&stp->st_locks); unhash_generic_stateid(stp); unhash_stid(&stp->st_stid); + spin_unlock(&oo->oo_owner.so_client->cl_lock); nfs4_put_stid(&stp->st_stid); } @@ -967,20 +973,26 @@ static void release_lock_stateid(struct nfs4_ol_stateid *stp) } static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp) + __releases(&open_stp->st_stateowner->so_client->cl_lock) + __acquires(&open_stp->st_stateowner->so_client->cl_lock) { struct nfs4_ol_stateid *stp; while (!list_empty(&open_stp->st_locks)) { stp = list_entry(open_stp->st_locks.next, struct nfs4_ol_stateid, st_locks); + spin_unlock(&open_stp->st_stateowner->so_client->cl_lock); release_lock_stateid(stp); + spin_lock(&open_stp->st_stateowner->so_client->cl_lock); } } static void unhash_open_stateid(struct nfs4_ol_stateid *stp) { + spin_lock(&stp->st_stateowner->so_client->cl_lock); unhash_generic_stateid(stp); release_open_stateid_locks(stp); + spin_unlock(&stp->st_stateowner->so_client->cl_lock); } static void release_open_stateid(struct nfs4_ol_stateid *stp) @@ -2996,16 +3008,18 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); - list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); stp->st_stateowner = &oo->oo_owner; get_nfs4_file(fp); stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; + spin_lock(&oo->oo_owner.so_client->cl_lock); + list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); spin_lock(&fp->fi_lock); list_add(&stp->st_perfile, &fp->fi_stateids); spin_unlock(&fp->fi_lock); + spin_unlock(&oo->oo_owner.so_client->cl_lock); } static void @@ -4711,6 +4725,7 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct nfs4_ol_stateid *open_stp) { struct nfs4_stid *s; + struct nfs4_openowner *oo = openowner(open_stp->st_stateowner); struct nfs4_ol_stateid *stp; struct nfs4_client *clp = lo->lo_owner.so_client; @@ -4719,7 +4734,6 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, return NULL; stp = openlockstateid(s); stp->st_stid.sc_type = NFS4_LOCK_STID; - list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; get_nfs4_file(fp); stp->st_stid.sc_file = fp; @@ -4727,10 +4741,13 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + spin_lock(&oo->oo_owner.so_client->cl_lock); list_add(&stp->st_locks, &open_stp->st_locks); + list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); list_add(&stp->st_perfile, &fp->fi_stateids); spin_unlock(&fp->fi_lock); + spin_unlock(&oo->oo_owner.so_client->cl_lock); return stp; } From 356a95ece7aab38ae464e1041da26dcc1dff7ad2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:13 -0400 Subject: [PATCH 096/167] nfsd: clean up races in lock stateid searching and creation Preparation for removal of the client_mutex. Currently, no lock aside from the client_mutex is held when calling find_lock_state. Ensure that the cl_lock is held by adding a lockdep assertion. Once we remove the client_mutex, it'll be possible for another thread to race in and insert a lock state for the same file after we search but before we insert a new one. Ensure that doesn't happen by redoing the search after allocating a new stid that we plan to insert. If one is found just put the one that was allocated. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 71 +++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3ac6e2fdabe5..59d44873b68b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4719,20 +4719,15 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str return lo; } -static struct nfs4_ol_stateid * -alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, - struct inode *inode, - struct nfs4_ol_stateid *open_stp) +static void +init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, + struct nfs4_file *fp, struct inode *inode, + struct nfs4_ol_stateid *open_stp) { - struct nfs4_stid *s; - struct nfs4_openowner *oo = openowner(open_stp->st_stateowner); - struct nfs4_ol_stateid *stp; struct nfs4_client *clp = lo->lo_owner.so_client; - s = nfs4_alloc_stid(clp, stateid_slab); - if (s == NULL) - return NULL; - stp = openlockstateid(s); + lockdep_assert_held(&clp->cl_lock); + stp->st_stid.sc_type = NFS4_LOCK_STID; stp->st_stateowner = &lo->lo_owner; get_nfs4_file(fp); @@ -4741,20 +4736,20 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; - spin_lock(&oo->oo_owner.so_client->cl_lock); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); list_add(&stp->st_perfile, &fp->fi_stateids); spin_unlock(&fp->fi_lock); - spin_unlock(&oo->oo_owner.so_client->cl_lock); - return stp; } static struct nfs4_ol_stateid * find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) { struct nfs4_ol_stateid *lst; + struct nfs4_client *clp = lo->lo_owner.so_client; + + lockdep_assert_held(&clp->cl_lock); list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { if (lst->st_stid.sc_file == fp) @@ -4763,6 +4758,38 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) return NULL; } +static struct nfs4_ol_stateid * +find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi, + struct inode *inode, struct nfs4_ol_stateid *ost, + bool *new) +{ + struct nfs4_stid *ns = NULL; + struct nfs4_ol_stateid *lst; + struct nfs4_openowner *oo = openowner(ost->st_stateowner); + struct nfs4_client *clp = oo->oo_owner.so_client; + + spin_lock(&clp->cl_lock); + lst = find_lock_stateid(lo, fi); + if (lst == NULL) { + spin_unlock(&clp->cl_lock); + ns = nfs4_alloc_stid(clp, stateid_slab); + if (ns == NULL) + return NULL; + + spin_lock(&clp->cl_lock); + lst = find_lock_stateid(lo, fi); + if (likely(!lst)) { + lst = openlockstateid(ns); + init_lock_stateid(lst, lo, fi, inode, ost); + ns = NULL; + *new = true; + } + } + spin_unlock(&clp->cl_lock); + if (ns) + nfs4_put_stid(ns); + return lst; +} static int check_lock_length(u64 offset, u64 length) @@ -4783,7 +4810,11 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access) set_access(access, lock_stp); } -static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) +static __be32 +lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, + struct nfs4_ol_stateid *ost, + struct nfsd4_lock *lock, + struct nfs4_ol_stateid **lst, bool *new) { struct nfs4_file *fi = ost->st_stid.sc_file; struct nfs4_openowner *oo = openowner(ost->st_stateowner); @@ -4807,14 +4838,10 @@ static __be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, s return nfserr_bad_seqid; } - *lst = find_lock_stateid(lo, fi); + *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); if (*lst == NULL) { - *lst = alloc_init_lock_stateid(lo, fi, inode, ost); - if (*lst == NULL) { - release_lockowner_if_empty(lo); - return nfserr_jukebox; - } - *new = true; + release_lockowner_if_empty(lo); + return nfserr_jukebox; } return nfs_ok; } From 1af71cc8014e78e975ca47929c957228019a579b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:14 -0400 Subject: [PATCH 097/167] nfsd: ensure atomicity in nfsd4_free_stateid and nfsd4_validate_stateid Hold the cl_lock over the bulk of these functions. In addition to ensuring that they aren't freed prematurely, this will also help prevent a potential race that could be introduced later. Once we remove the client_mutex, it'll be possible for FREE_STATEID and CLOSE to race and for both to try to put the "persistent" reference to the stateid. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 71 +++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 59d44873b68b..f4c7bf96c9fd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1688,17 +1688,6 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t) return ret; } -static struct nfs4_stid * -find_stateid(struct nfs4_client *cl, stateid_t *t) -{ - struct nfs4_stid *ret; - - spin_lock(&cl->cl_lock); - ret = find_stateid_locked(cl, t); - spin_unlock(&cl->cl_lock); - return ret; -} - static struct nfs4_stid * find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) { @@ -4098,10 +4087,10 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) { struct nfs4_stid *s; struct nfs4_ol_stateid *ols; - __be32 status; + __be32 status = nfserr_bad_stateid; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) - return nfserr_bad_stateid; + return status; /* Client debugging aid. */ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) { char addr_str[INET6_ADDRSTRLEN]; @@ -4109,34 +4098,42 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) sizeof(addr_str)); pr_warn_ratelimited("NFSD: client %s testing state ID " "with incorrect client ID\n", addr_str); - return nfserr_bad_stateid; + return status; } - s = find_stateid(cl, stateid); + spin_lock(&cl->cl_lock); + s = find_stateid_locked(cl, stateid); if (!s) - return nfserr_bad_stateid; + goto out_unlock; status = check_stateid_generation(stateid, &s->sc_stateid, 1); if (status) - return status; + goto out_unlock; switch (s->sc_type) { case NFS4_DELEG_STID: - return nfs_ok; + status = nfs_ok; + break; case NFS4_REVOKED_DELEG_STID: - return nfserr_deleg_revoked; + status = nfserr_deleg_revoked; + break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: ols = openlockstateid(s); if (ols->st_stateowner->so_is_open_owner && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) - return nfserr_bad_stateid; - return nfs_ok; + status = nfserr_bad_stateid; + else + status = nfs_ok; + break; default: printk("unknown stateid type %x\n", s->sc_type); /* Fallthrough */ case NFS4_CLOSED_STID: case NFS4_CLOSED_DELEG_STID: - return nfserr_bad_stateid; + status = nfserr_bad_stateid; } +out_unlock: + spin_unlock(&cl->cl_lock); + return status; } static __be32 @@ -4287,34 +4284,38 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 ret = nfserr_bad_stateid; nfs4_lock_state(); - s = find_stateid(cl, stateid); + spin_lock(&cl->cl_lock); + s = find_stateid_locked(cl, stateid); if (!s) - goto out; + goto out_unlock; switch (s->sc_type) { case NFS4_DELEG_STID: ret = nfserr_locks_held; - goto out; + break; case NFS4_OPEN_STID: + ret = check_stateid_generation(stateid, &s->sc_stateid, 1); + if (ret) + break; + ret = nfserr_locks_held; + break; case NFS4_LOCK_STID: ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) - goto out; - if (s->sc_type == NFS4_LOCK_STID) - ret = nfsd4_free_lock_stateid(openlockstateid(s)); - else - ret = nfserr_locks_held; - break; + break; + spin_unlock(&cl->cl_lock); + ret = nfsd4_free_lock_stateid(openlockstateid(s)); + goto out; case NFS4_REVOKED_DELEG_STID: dp = delegstateid(s); - spin_lock(&cl->cl_lock); list_del_init(&dp->dl_recall_lru); spin_unlock(&cl->cl_lock); nfs4_put_stid(s); ret = nfs_ok; - break; - default: - ret = nfserr_bad_stateid; + goto out; + /* Default falls through and returns nfserr_bad_stateid */ } +out_unlock: + spin_unlock(&cl->cl_lock); out: nfs4_unlock_state(); return ret; From 3d0fabd5a48fbf6e7097c17325295ae778137fe3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:15 -0400 Subject: [PATCH 098/167] nfsd: Add reference counting to lock stateids Ensure that nfsd4_lock() references the lock stateid while it is manipulating it. Not currently necessary, but will be once the client_mutex is removed. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f4c7bf96c9fd..1ddf4da0023d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4729,6 +4729,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, lockdep_assert_held(&clp->cl_lock); + atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; stp->st_stateowner = &lo->lo_owner; get_nfs4_file(fp); @@ -4753,8 +4754,10 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp) lockdep_assert_held(&clp->cl_lock); list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) { - if (lst->st_stid.sc_file == fp) + if (lst->st_stid.sc_file == fp) { + atomic_inc(&lst->st_stid.sc_count); return lst; + } } return NULL; } @@ -4856,7 +4859,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct nfs4_openowner *open_sop = NULL; struct nfs4_lockowner *lock_sop = NULL; - struct nfs4_ol_stateid *lock_stp; + struct nfs4_ol_stateid *lock_stp = NULL; struct nfs4_file *fp; struct file *filp = NULL; struct file_lock *file_lock = NULL; @@ -4910,11 +4913,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new_state); - } else + } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, NFS4_LOCK_STID, &lock_stp, nn); + /* FIXME: move into nfs4_preprocess_seqid_op */ + if (!status) + atomic_inc(&lock_stp->st_stid.sc_count); + } if (status) goto out; lock_sop = lockowner(lock_stp->st_stateowner); @@ -5007,6 +5014,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, out: if (filp) fput(filp); + if (lock_stp) + nfs4_put_stid(&lock_stp->st_stid); if (status && new_state) release_lock_stateid(lock_stp); nfsd4_bump_seqid(cstate, status); From 858cc57336dd98ca54dff417b55a86aa101f5fb0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:16 -0400 Subject: [PATCH 099/167] nfsd: nfsd4_locku() must reference the lock stateid Ensure that nfsd4_locku() keeps a reference to the lock stateid until it is done working with it. Necessary step toward client_mutex removal. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1ddf4da0023d..4f191456d737 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5147,10 +5147,12 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &stp, nn); if (status) goto out; + /* FIXME: move into nfs4_preprocess_seqid_op */ + atomic_inc(&stp->st_stid.sc_count); filp = find_any_file(stp->st_stid.sc_file); if (!filp) { status = nfserr_lock_range; - goto out; + goto put_stateid; } file_lock = locks_alloc_lock(); if (!file_lock) { @@ -5180,6 +5182,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); fput: fput(filp); +put_stateid: + nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) From 67cb1279be27345ba6855c1aab9e90ef5f5ac645 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:17 -0400 Subject: [PATCH 100/167] nfsd: Ensure that nfs4_open_delegation() references the delegation stateid Ensure that nfs4_open_delegation() keeps a reference to the delegation stateid until it is done working with it. Necessary step toward client_mutex removal. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4f191456d737..2df6af93120e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -674,6 +674,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); + atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); @@ -3704,6 +3705,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, dprintk("NFSD: delegation stateid=" STATEID_FMT "\n", STATEID_VAL(&dp->dl_stid.sc_stateid)); open->op_delegate_type = NFS4_OPEN_DELEGATE_READ; + nfs4_put_stid(&dp->dl_stid); return; out_no_deleg: open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE; From dcd94cc2e75cb1457d4d2dcfa0b360baee4b8764 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:18 -0400 Subject: [PATCH 101/167] nfsd: nfsd4_process_open2() must reference the delegation stateid Ensure that nfsd4_process_open2() keeps a reference to the delegation stateid until it is done working with it. Necessary step toward client_mutex removal. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2df6af93120e..5cb6305036cd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3325,6 +3325,8 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); if (!ret) return NULL; + /* FIXME: move into find_stateid_by_type */ + atomic_inc(&ret->sc_count); return delegstateid(ret); } @@ -3340,14 +3342,18 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open, { int flags; __be32 status = nfserr_bad_stateid; + struct nfs4_delegation *deleg; - *dp = find_deleg_stateid(cl, &open->op_delegate_stateid); - if (*dp == NULL) + deleg = find_deleg_stateid(cl, &open->op_delegate_stateid); + if (deleg == NULL) goto out; flags = share_access_to_flags(open->op_share_access); - status = nfs4_check_delegmode(*dp, flags); - if (status) - *dp = NULL; + status = nfs4_check_delegmode(deleg, flags); + if (status) { + nfs4_put_stid(&deleg->dl_stid); + goto out; + } + *dp = deleg; out: if (!nfsd4_is_deleg_cur(open)) return nfs_ok; @@ -3828,6 +3834,8 @@ out: if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) && !nfsd4_has_session(&resp->cstate)) open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; + if (dp) + nfs4_put_stid(&dp->dl_stid); return status; } From d6f2bc5dcf58259b6c3f206ae8f14087300b5bcf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:19 -0400 Subject: [PATCH 102/167] nfsd: nfsd4_process_open2() must reference the open stateid Ensure that nfsd4_process_open2() keeps a reference to the open stateid until it is done working with it. Necessary step toward client_mutex removal. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5cb6305036cd..f3018cb26769 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2996,6 +2996,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; + atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); stp->st_stateowner = &oo->oo_owner; @@ -3376,6 +3377,7 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open) continue; if (local->st_stateowner == &oo->oo_owner) { ret = local; + atomic_inc(&ret->st_stid.sc_count); break; } } @@ -3836,6 +3838,8 @@ out: open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; if (dp) nfs4_put_stid(&dp->dl_stid); + if (stp) + nfs4_put_stid(&stp->st_stid); return status; } From 8a0b589d8fd0e63579982cbfda099a2e09b52811 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:20 -0400 Subject: [PATCH 103/167] nfsd: Prepare nfsd4_close() for open stateid referencing Prepare nfsd4_close for a future where nfs4_preprocess_seqid_op() hands it a fully referenced open stateid. Necessary step toward client_mutex removal. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f3018cb26769..4e50f14f5bc1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4579,10 +4579,15 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; + /* FIXME: move into nfs4_preprocess_seqid_op */ + atomic_inc(&stp->st_stid.sc_count); update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); nfsd4_close_open_stateid(stp); + + /* put reference from nfs4_preprocess_seqid_op */ + nfs4_put_stid(&stp->st_stid); out: if (!cstate->replay_owner) nfs4_unlock_state(); From 2585fc79584684666b3d107179e43484dfb4da13 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:21 -0400 Subject: [PATCH 104/167] nfsd: nfsd4_open_confirm() must reference the open stateid Ensure that nfsd4_open_confirm() keeps a reference to the open stateid until it is done working with it. Necessary step toward client_mutex removal. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4e50f14f5bc1..8e18ca49555f 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4434,10 +4434,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, NFS4_OPEN_STID, &stp, nn); if (status) goto out; + /* FIXME: move into nfs4_preprocess_seqid_op */ + atomic_inc(&stp->st_stid.sc_count); oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; if (oo->oo_flags & NFS4_OO_CONFIRMED) - goto out; + goto put_stateid; oo->oo_flags |= NFS4_OO_CONFIRMED; update_stateid(&stp->st_stid.sc_stateid); memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); @@ -4446,6 +4448,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_client_record_create(oo->oo_owner.so_client); status = nfs_ok; +put_stateid: + nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) From 0667b1e9d8a7c0a315da8bd7f454ef4361ceb3ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:22 -0400 Subject: [PATCH 105/167] nfsd: Add reference counting to nfs4_preprocess_confirmed_seqid_op Ensure that all the callers put the open stateid after use. Necessary step toward client_mutex removal. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 8e18ca49555f..a777666044f1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4405,6 +4405,8 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs NFS4_OPEN_STID, stpp, nn); if (status) return status; + /* FIXME: move into nfs4_preprocess_seqid_op */ + atomic_inc(&(*stpp)->st_stid.sc_count); oo = openowner((*stpp)->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) return nfserr_bad_stateid; @@ -4509,12 +4511,12 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, if (!test_access(od->od_share_access, stp)) { dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n", stp->st_access_bmap, od->od_share_access); - goto out; + goto put_stateid; } if (!test_deny(od->od_share_deny, stp)) { dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n", stp->st_deny_bmap, od->od_share_deny); - goto out; + goto put_stateid; } nfs4_stateid_downgrade(stp, od->od_share_access); @@ -4523,6 +4525,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, update_stateid(&stp->st_stid.sc_stateid); memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; +put_stateid: + nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); if (!cstate->replay_owner) @@ -4883,6 +4887,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_openowner *open_sop = NULL; struct nfs4_lockowner *lock_sop = NULL; struct nfs4_ol_stateid *lock_stp = NULL; + struct nfs4_ol_stateid *open_stp = NULL; struct nfs4_file *fp; struct file *filp = NULL; struct file_lock *file_lock = NULL; @@ -4910,8 +4915,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_lock_state(); if (lock->lk_is_new) { - struct nfs4_ol_stateid *open_stp = NULL; - if (nfsd4_has_session(cstate)) /* See rfc 5661 18.10.3: given clientid is ignored: */ memcpy(&lock->v.new.clientid, @@ -5039,6 +5042,8 @@ out: fput(filp); if (lock_stp) nfs4_put_stid(&lock_stp->st_stid); + if (open_stp) + nfs4_put_stid(&open_stp->st_stid); if (status && new_state) release_lock_stateid(lock_stp); nfsd4_bump_seqid(cstate, status); From 4cbfc9f7046a31721075ecde333519867807ecf8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:23 -0400 Subject: [PATCH 106/167] nfsd: Migrate the stateid reference into nfs4_preprocess_seqid_op Allow nfs4_preprocess_seqid_op to take the stateid reference, instead of having all the callers do so. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a777666044f1..b0c0f4cdf503 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4390,8 +4390,11 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, cstate->replay_owner = stp->st_stateowner; status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); - if (!status) + if (!status) { + /* FIXME: move into find_stateid_by_type */ + atomic_inc(&stp->st_stid.sc_count); *stpp = stp; + } return status; } @@ -4400,16 +4403,18 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs { __be32 status; struct nfs4_openowner *oo; + struct nfs4_ol_stateid *stp; status = nfs4_preprocess_seqid_op(cstate, seqid, stateid, - NFS4_OPEN_STID, stpp, nn); + NFS4_OPEN_STID, &stp, nn); if (status) return status; - /* FIXME: move into nfs4_preprocess_seqid_op */ - atomic_inc(&(*stpp)->st_stid.sc_count); - oo = openowner((*stpp)->st_stateowner); - if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) + oo = openowner(stp->st_stateowner); + if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; + } + *stpp = stp; return nfs_ok; } @@ -4436,8 +4441,6 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, NFS4_OPEN_STID, &stp, nn); if (status) goto out; - /* FIXME: move into nfs4_preprocess_seqid_op */ - atomic_inc(&stp->st_stid.sc_count); oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; if (oo->oo_flags & NFS4_OO_CONFIRMED) @@ -4587,8 +4590,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfsd4_bump_seqid(cstate, status); if (status) goto out; - /* FIXME: move into nfs4_preprocess_seqid_op */ - atomic_inc(&stp->st_stid.sc_count); update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); @@ -4944,9 +4945,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, lock->lk_old_lock_seqid, &lock->lk_old_lock_stateid, NFS4_LOCK_STID, &lock_stp, nn); - /* FIXME: move into nfs4_preprocess_seqid_op */ - if (!status) - atomic_inc(&lock_stp->st_stid.sc_count); } if (status) goto out; @@ -5175,8 +5173,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &stp, nn); if (status) goto out; - /* FIXME: move into nfs4_preprocess_seqid_op */ - atomic_inc(&stp->st_stid.sc_count); filp = find_any_file(stp->st_stid.sc_file); if (!filp) { status = nfserr_lock_range; From fd9110113c434562c287f222cb4e30befb15ecdd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:24 -0400 Subject: [PATCH 107/167] nfsd: Migrate the stateid reference into nfs4_lookup_stateid() Allow nfs4_lookup_stateid to take the stateid reference, instead of having all the callers do so. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b0c0f4cdf503..a4a49a3b464c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4170,6 +4170,8 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, *s = find_stateid_by_type(cstate->clp, stateid, typemask); if (!*s) return nfserr_bad_stateid; + /* FIXME: move into find_stateid_by_type */ + atomic_inc(&(*s)->sc_count); return nfs_ok; } @@ -4204,7 +4206,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, nn); if (status) - goto out; + goto unlock_state; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); if (status) goto out; @@ -4253,6 +4255,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (file) *filpp = file; out: + nfs4_put_stid(s); +unlock_state: nfs4_unlock_state(); return status; } @@ -4390,11 +4394,10 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, cstate->replay_owner = stp->st_stateowner; status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); - if (!status) { - /* FIXME: move into find_stateid_by_type */ - atomic_inc(&stp->st_stid.sc_count); + if (!status) *stpp = stp; - } + else + nfs4_put_stid(&stp->st_stid); return status; } @@ -4623,9 +4626,11 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dp = delegstateid(s); status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate)); if (status) - goto out; + goto put_stateid; destroy_delegation(dp); +put_stateid: + nfs4_put_stid(&dp->dl_stid); out: nfs4_unlock_state(); From 2d3f96689ffc757628c6d4038cacaaeb72a03345 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:25 -0400 Subject: [PATCH 108/167] nfsd: Migrate the stateid reference into nfs4_find_stateid_by_type() Allow nfs4_find_stateid_by_type to take the stateid reference, while still holding the &cl->cl_lock. Necessary step toward client_mutex removal. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a4a49a3b464c..653de6b14665 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1696,8 +1696,12 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, t); - if (s != NULL && !(typemask & s->sc_type)) - s = NULL; + if (s != NULL) { + if (typemask & s->sc_type) + atomic_inc(&s->sc_count); + else + s = NULL; + } spin_unlock(&cl->cl_lock); return s; } @@ -3326,8 +3330,6 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID); if (!ret) return NULL; - /* FIXME: move into find_stateid_by_type */ - atomic_inc(&ret->sc_count); return delegstateid(ret); } @@ -4170,8 +4172,6 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, *s = find_stateid_by_type(cstate->clp, stateid, typemask); if (!*s) return nfserr_bad_stateid; - /* FIXME: move into find_stateid_by_type */ - atomic_inc(&(*s)->sc_count); return nfs_ok; } From 6b180f0b57af0295e8dc2602a7a4781241766340 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:26 -0400 Subject: [PATCH 109/167] nfsd: Add reference counting to state owners The way stateowners are managed today is somewhat awkward. They need to be explicitly destroyed, even though the stateids reference them. This will be particularly problematic when we remove the client_mutex. We may create a new stateowner and attempt to open a file or set a lock, and have that fail. In the meantime, another RPC may come in that uses that same stateowner and succeed. We can't have the first task tearing down the stateowner in that situation. To fix this, we need to change how stateowners are tracked altogether. Refcount them and only destroy them once all stateids that reference them have been destroyed. This patch starts by adding the refcounting necessary to do that. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 49 ++++++++++++++++++++++++++++++++------------- fs/nfsd/state.h | 22 +++++++++++++------- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 653de6b14665..5a93e5fafd4a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -890,6 +890,14 @@ release_all_access(struct nfs4_ol_stateid *stp) } } +static void nfs4_put_stateowner(struct nfs4_stateowner *sop) +{ + if (!atomic_dec_and_test(&sop->so_count)) + return; + kfree(sop->so_owner.data); + sop->so_ops->so_free(sop); +} + static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_file *fp = stp->st_stid.sc_file; @@ -946,16 +954,10 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) } } -static void nfs4_free_lockowner(struct nfs4_lockowner *lo) -{ - kfree(lo->lo_owner.so_owner.data); - kmem_cache_free(lockowner_slab, lo); -} - static void release_lockowner(struct nfs4_lockowner *lo) { unhash_lockowner(lo); - nfs4_free_lockowner(lo); + nfs4_put_stateowner(&lo->lo_owner); } static void release_lockowner_if_empty(struct nfs4_lockowner *lo) @@ -1025,18 +1027,12 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) } } -static void nfs4_free_openowner(struct nfs4_openowner *oo) -{ - kfree(oo->oo_owner.so_owner.data); - kmem_cache_free(openowner_slab, oo); -} - static void release_openowner(struct nfs4_openowner *oo) { unhash_openowner(oo); list_del(&oo->oo_close_lru); release_last_closed_stateid(oo); - nfs4_free_openowner(oo); + nfs4_put_stateowner(&oo->oo_owner); } static inline int @@ -2964,6 +2960,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj INIT_LIST_HEAD(&sop->so_stateids); sop->so_client = clp; init_nfs4_replay(&sop->so_replay); + atomic_set(&sop->so_count, 1); return sop; } @@ -2975,6 +2972,17 @@ static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, u list_add(&oo->oo_perclient, &clp->cl_openowners); } +static void nfs4_free_openowner(struct nfs4_stateowner *so) +{ + struct nfs4_openowner *oo = openowner(so); + + kmem_cache_free(openowner_slab, oo); +} + +static const struct nfs4_stateowner_operations openowner_ops = { + .so_free = nfs4_free_openowner, +}; + static struct nfs4_openowner * alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) @@ -2985,6 +2993,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); if (!oo) return NULL; + oo->oo_owner.so_ops = &openowner_ops; oo->oo_owner.so_is_open_owner = 1; oo->oo_owner.so_seqid = open->op_seqid; oo->oo_flags = NFS4_OO_NEW; @@ -4729,6 +4738,17 @@ find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, return NULL; } +static void nfs4_free_lockowner(struct nfs4_stateowner *sop) +{ + struct nfs4_lockowner *lo = lockowner(sop); + + kmem_cache_free(lockowner_slab, lo); +} + +static const struct nfs4_stateowner_operations lockowner_ops = { + .so_free = nfs4_free_lockowner, +}; + /* * Alloc a lock owner structure. * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has @@ -4749,6 +4769,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str /* It is the openowner seqid that will be incremented in encode in the * case of new lockowners; so increment the lock seqid manually: */ lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; + lo->lo_owner.so_ops = &lockowner_ops; list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); return lo; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index af1d9c42e939..dc725deb4aa8 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -331,16 +331,24 @@ struct nfs4_replay { char rp_ibuf[NFSD4_REPLAY_ISIZE]; }; +struct nfs4_stateowner; + +struct nfs4_stateowner_operations { + void (*so_free)(struct nfs4_stateowner *); +}; + struct nfs4_stateowner { - struct list_head so_strhash; /* hash by op_name */ - struct list_head so_stateids; - struct nfs4_client * so_client; + struct list_head so_strhash; + struct list_head so_stateids; + struct nfs4_client *so_client; + const struct nfs4_stateowner_operations *so_ops; /* after increment in ENCODE_SEQID_OP_TAIL, represents the next * sequence id expected from the client: */ - u32 so_seqid; - struct xdr_netobj so_owner; /* open owner name */ - struct nfs4_replay so_replay; - bool so_is_open_owner; + atomic_t so_count; + u32 so_seqid; + struct xdr_netobj so_owner; /* open owner name */ + struct nfs4_replay so_replay; + bool so_is_open_owner; }; struct nfs4_openowner { From 58fb12e6a42f30adf209f8f41385a3bbb2c82420 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:27 -0400 Subject: [PATCH 110/167] nfsd: Add a mutex to protect the NFSv4.0 open owner replay cache We don't want to rely on the client_mutex for protection in the case of NFSv4 open owners. Instead, we add a mutex that will only be taken for NFSv4.0 state mutating operations, and that will be released once the entire compound is done. Also, ensure that nfsd4_cstate_assign_replay/nfsd4_cstate_clear_replay take a reference to the stateowner when they are using it for NFSv4.0 open and lock replay caching. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 12 +++--------- fs/nfsd/nfs4state.c | 47 +++++++++++++++++++++++++++++++-------------- fs/nfsd/nfs4xdr.c | 2 -- fs/nfsd/state.h | 1 + fs/nfsd/xdr4.h | 5 ++++- 5 files changed, 41 insertions(+), 26 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8611585f739d..29cf395b694e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -469,12 +469,9 @@ out: fh_put(resfh); kfree(resfh); } - nfsd4_cleanup_open_state(open, status); - if (open->op_openowner && !nfsd4_has_session(cstate)) - cstate->replay_owner = &open->op_openowner->oo_owner; + nfsd4_cleanup_open_state(cstate, open, status); nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); + nfs4_unlock_state(); return status; } @@ -1395,10 +1392,7 @@ encode_op: args->ops, args->opcnt, resp->opcnt, op->opnum, be32_to_cpu(status)); - if (cstate->replay_owner) { - nfs4_unlock_state(); - cstate->replay_owner = NULL; - } + nfsd4_cstate_clear_replay(cstate); /* XXX Ugh, we need to get rid of this kind of special case: */ if (op->opnum == OP_READ && op->u.read.rd_filp) fput(op->u.read.rd_filp); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5a93e5fafd4a..749608b914b4 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1069,7 +1069,7 @@ void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr) return; if (!seqid_mutating_err(ntohl(nfserr))) { - cstate->replay_owner = NULL; + nfsd4_cstate_clear_replay(cstate); return; } if (!so) @@ -2940,6 +2940,28 @@ static void init_nfs4_replay(struct nfs4_replay *rp) rp->rp_status = nfserr_serverfault; rp->rp_buflen = 0; rp->rp_buf = rp->rp_ibuf; + mutex_init(&rp->rp_mutex); +} + +static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate, + struct nfs4_stateowner *so) +{ + if (!nfsd4_has_session(cstate)) { + mutex_lock(&so->so_replay.rp_mutex); + cstate->replay_owner = so; + atomic_inc(&so->so_count); + } +} + +void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate) +{ + struct nfs4_stateowner *so = cstate->replay_owner; + + if (so != NULL) { + cstate->replay_owner = NULL; + mutex_unlock(&so->so_replay.rp_mutex); + nfs4_put_stateowner(so); + } } static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp) @@ -3855,7 +3877,8 @@ out: return status; } -void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) +void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, + struct nfsd4_open *open, __be32 status) { if (open->op_openowner) { struct nfs4_openowner *oo = open->op_openowner; @@ -3869,6 +3892,8 @@ void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status) } else oo->oo_flags &= ~NFS4_OO_NEW; } + if (open->op_openowner) + nfsd4_cstate_assign_replay(cstate, &oo->oo_owner); } if (open->op_file) nfsd4_free_file(open->op_file); @@ -4399,8 +4424,7 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, if (status) return status; stp = openlockstateid(s); - if (!nfsd4_has_session(cstate)) - cstate->replay_owner = stp->st_stateowner; + nfsd4_cstate_assign_replay(cstate, stp->st_stateowner); status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp); if (!status) @@ -4469,8 +4493,7 @@ put_stateid: nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); + nfs4_unlock_state(); return status; } @@ -4544,8 +4567,7 @@ put_stateid: nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); + nfs4_unlock_state(); return status; } @@ -4610,8 +4632,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); out: - if (!cstate->replay_owner) - nfs4_unlock_state(); + nfs4_unlock_state(); return status; } @@ -5071,8 +5092,7 @@ out: if (status && new_state) release_lock_stateid(lock_stp); nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); + nfs4_unlock_state(); if (file_lock) locks_free_lock(file_lock); if (conflock) @@ -5236,8 +5256,7 @@ put_stateid: nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - if (!cstate->replay_owner) - nfs4_unlock_state(); + nfs4_unlock_state(); if (file_lock) locks_free_lock(file_lock); return status; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 628b430e743e..72a2a82e11a4 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3925,8 +3925,6 @@ status: * * XDR note: do not encode rp->rp_buflen: the buffer contains the * previously sent already encoded operation. - * - * called with nfs4_lock_state() held */ void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index dc725deb4aa8..9cba295812f6 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -328,6 +328,7 @@ struct nfs4_replay { unsigned int rp_buflen; char *rp_buf; struct knfsd_fh rp_openfh; + struct mutex rp_mutex; char rp_ibuf[NFSD4_REPLAY_ISIZE]; }; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 5abf6c942ddf..465e7799742a 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -599,7 +599,9 @@ extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *, struct nfsd4_open *open, struct nfsd_net *nn); extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open); -extern void nfsd4_cleanup_open_state(struct nfsd4_open *open, __be32 status); +extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate); +extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, + struct nfsd4_open *open, __be32 status); extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc); extern __be32 nfsd4_close(struct svc_rqst *rqstp, @@ -630,6 +632,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); + #endif /* From 5db1c03feb00a72846927172d81744790e601b97 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:28 -0400 Subject: [PATCH 111/167] nfsd: clean up lockowner refcounting when finding them Ensure that when finding or creating a lockowner, that we get a reference to it. For now, we also take an extra reference when a lockowner is created that can be put when release_lockowner is called, but we'll remove that in a later patch once we change how references are held. Since we no longer destroy lockowners in the event of an error in nfsd4_lock, we must change how the seqid gets bumped in the lk_is_new case. Instead of doing so on creation, do it manually in nfsd4_lock. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 749608b914b4..eaa5f9ebf444 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4754,6 +4754,7 @@ find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, continue; if (!same_owner_str(so, owner, clid)) continue; + atomic_inc(&so->so_count); return lockowner(so); } return NULL; @@ -4787,9 +4788,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str return NULL; INIT_LIST_HEAD(&lo->lo_owner.so_stateids); lo->lo_owner.so_is_open_owner = 0; - /* It is the openowner seqid that will be incremented in encode in the - * case of new lockowners; so increment the lock seqid manually: */ - lo->lo_owner.so_seqid = lock->lk_new_lock_seqid + 1; + lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; lo->lo_owner.so_ops = &lockowner_ops; list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); return lo; @@ -4895,6 +4894,7 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new) { + __be32 status; struct nfs4_file *fi = ost->st_stid.sc_file; struct nfs4_openowner *oo = openowner(ost->st_stateowner); struct nfs4_client *cl = oo->oo_owner.so_client; @@ -4910,19 +4910,26 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) return nfserr_jukebox; + /* FIXME: extra reference for new lockowners for the client */ + atomic_inc(&lo->lo_owner.so_count); } else { /* with an existing lockowner, seqids must be the same */ + status = nfserr_bad_seqid; if (!cstate->minorversion && lock->lk_new_lock_seqid != lo->lo_owner.so_seqid) - return nfserr_bad_seqid; + goto out; } *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); if (*lst == NULL) { release_lockowner_if_empty(lo); - return nfserr_jukebox; + status = nfserr_jukebox; + goto out; } - return nfs_ok; + status = nfs_ok; +out: + nfs4_put_stateowner(&lo->lo_owner); + return status; } /* @@ -4941,9 +4948,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct file_lock *file_lock = NULL; struct file_lock *conflock = NULL; __be32 status = 0; - bool new_state = false; int lkflg; int err; + bool new = false; struct net *net = SVC_NET(rqstp); struct nfsd_net *nn = net_generic(net, nfsd_net_id); @@ -4986,7 +4993,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &lock->v.new.clientid)) goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, - &lock_stp, &new_state); + &lock_stp, &new); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5085,12 +5092,24 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, out: if (filp) fput(filp); - if (lock_stp) + if (lock_stp) { + /* Bump seqid manually if the 4.0 replay owner is openowner */ + if (cstate->replay_owner && + cstate->replay_owner != &lock_sop->lo_owner && + seqid_mutating_err(ntohl(status))) + lock_sop->lo_owner.so_seqid++; + + /* + * If this is a new, never-before-used stateid, and we are + * returning an error, then just go ahead and release it. + */ + if (status && new) + release_lock_stateid(lock_stp); + nfs4_put_stid(&lock_stp->st_stid); + } if (open_stp) nfs4_put_stid(&open_stp->st_stid); - if (status && new_state) - release_lock_stateid(lock_stp); nfsd4_bump_seqid(cstate, status); nfs4_unlock_state(); if (file_lock) @@ -5125,7 +5144,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_lockt *lockt) { struct file_lock *file_lock = NULL; - struct nfs4_lockowner *lo; + struct nfs4_lockowner *lo = NULL; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); @@ -5188,6 +5207,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_set_lock_denied(file_lock, &lockt->lt_denied); } out: + if (lo) + nfs4_put_stateowner(&lo->lo_owner); nfs4_unlock_state(); if (file_lock) locks_free_lock(file_lock); From 8f4b54c53f0d9c67cf922c8a780b8d9075e20e07 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:29 -0400 Subject: [PATCH 112/167] nfsd: add an operation for unhashing a stateowner Allow stateowners to be unhashed and destroyed when the last reference is put. The unhashing must be idempotent. In a future patch, we'll add some locking around it, but for now it's only protected by the client_mutex. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 49 ++++++++++++++++++++++++++++++++++----------- fs/nfsd/state.h | 1 + 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index eaa5f9ebf444..906c8604de30 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -894,6 +894,7 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop) { if (!atomic_dec_and_test(&sop->so_count)) return; + sop->so_ops->so_unhash(sop); kfree(sop->so_owner.data); sop->so_ops->so_free(sop); } @@ -943,10 +944,14 @@ static void __release_lock_stateid(struct nfs4_ol_stateid *stp) } static void unhash_lockowner(struct nfs4_lockowner *lo) +{ + list_del_init(&lo->lo_owner.so_strhash); +} + +static void release_lockowner_stateids(struct nfs4_lockowner *lo) { struct nfs4_ol_stateid *stp; - list_del(&lo->lo_owner.so_strhash); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); @@ -957,6 +962,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo) static void release_lockowner(struct nfs4_lockowner *lo) { unhash_lockowner(lo); + release_lockowner_stateids(lo); nfs4_put_stateowner(&lo->lo_owner); } @@ -1006,15 +1012,8 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) static void unhash_openowner(struct nfs4_openowner *oo) { - struct nfs4_ol_stateid *stp; - - list_del(&oo->oo_owner.so_strhash); - list_del(&oo->oo_perclient); - while (!list_empty(&oo->oo_owner.so_stateids)) { - stp = list_first_entry(&oo->oo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - release_open_stateid(stp); - } + list_del_init(&oo->oo_owner.so_strhash); + list_del_init(&oo->oo_perclient); } static void release_last_closed_stateid(struct nfs4_openowner *oo) @@ -1027,9 +1026,21 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) } } +static void release_openowner_stateids(struct nfs4_openowner *oo) +{ + struct nfs4_ol_stateid *stp; + + while (!list_empty(&oo->oo_owner.so_stateids)) { + stp = list_first_entry(&oo->oo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + release_open_stateid(stp); + } +} + static void release_openowner(struct nfs4_openowner *oo) { unhash_openowner(oo); + release_openowner_stateids(oo); list_del(&oo->oo_close_lru); release_last_closed_stateid(oo); nfs4_put_stateowner(&oo->oo_owner); @@ -2994,6 +3005,13 @@ static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, u list_add(&oo->oo_perclient, &clp->cl_openowners); } +static void nfs4_unhash_openowner(struct nfs4_stateowner *so) +{ + struct nfs4_openowner *oo = openowner(so); + + unhash_openowner(oo); +} + static void nfs4_free_openowner(struct nfs4_stateowner *so) { struct nfs4_openowner *oo = openowner(so); @@ -3002,7 +3020,8 @@ static void nfs4_free_openowner(struct nfs4_stateowner *so) } static const struct nfs4_stateowner_operations openowner_ops = { - .so_free = nfs4_free_openowner, + .so_unhash = nfs4_unhash_openowner, + .so_free = nfs4_free_openowner, }; static struct nfs4_openowner * @@ -4760,6 +4779,11 @@ find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, return NULL; } +static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop) +{ + unhash_lockowner(lockowner(sop)); +} + static void nfs4_free_lockowner(struct nfs4_stateowner *sop) { struct nfs4_lockowner *lo = lockowner(sop); @@ -4768,7 +4792,8 @@ static void nfs4_free_lockowner(struct nfs4_stateowner *sop) } static const struct nfs4_stateowner_operations lockowner_ops = { - .so_free = nfs4_free_lockowner, + .so_unhash = nfs4_unhash_lockowner, + .so_free = nfs4_free_lockowner, }; /* diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 9cba295812f6..232246039db0 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -335,6 +335,7 @@ struct nfs4_replay { struct nfs4_stateowner; struct nfs4_stateowner_operations { + void (*so_unhash)(struct nfs4_stateowner *); void (*so_free)(struct nfs4_stateowner *); }; From e4f1dd7fc23f92efdaaa07d001b3dd5391505580 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:30 -0400 Subject: [PATCH 113/167] nfsd: Make lock stateid take a reference to the lockowner A necessary step toward client_mutex removal. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 906c8604de30..88225f0bbc12 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -928,6 +928,8 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid) file = find_any_file(stp->st_stid.sc_file); if (file) filp_close(file, (fl_owner_t)lo); + if (stp->st_stateowner) + nfs4_put_stateowner(stp->st_stateowner); nfs4_free_ol_stateid(stid); } @@ -4831,6 +4833,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, atomic_inc(&stp->st_stid.sc_count); stp->st_stid.sc_type = NFS4_LOCK_STID; stp->st_stateowner = &lo->lo_owner; + atomic_inc(&lo->lo_owner.so_count); get_nfs4_file(fp); stp->st_stid.sc_file = fp; stp->st_stid.sc_free = nfs4_free_lock_stateid; From 5adfd8850ba1463fe675c2df1829fb439e7c7b0f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:31 -0400 Subject: [PATCH 114/167] nfsd: clean up refcounting for lockowners Ensure that lockowner references are only held by lockstateids and operations that are in-progress. With this, we can get rid of release_lockowner_if_empty, which will be racy once we remove client_mutex protection. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 88225f0bbc12..c86fe66254b0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -933,7 +933,7 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid) nfs4_free_ol_stateid(stid); } -static void __release_lock_stateid(struct nfs4_ol_stateid *stp) +static void release_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); @@ -957,7 +957,7 @@ static void release_lockowner_stateids(struct nfs4_lockowner *lo) while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - __release_lock_stateid(stp); + release_lock_stateid(stp); } } @@ -968,21 +968,6 @@ static void release_lockowner(struct nfs4_lockowner *lo) nfs4_put_stateowner(&lo->lo_owner); } -static void release_lockowner_if_empty(struct nfs4_lockowner *lo) -{ - if (list_empty(&lo->lo_owner.so_stateids)) - release_lockowner(lo); -} - -static void release_lock_stateid(struct nfs4_ol_stateid *stp) -{ - struct nfs4_lockowner *lo; - - lo = lockowner(stp->st_stateowner); - __release_lock_stateid(stp); - release_lockowner_if_empty(lo); -} - static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp) __releases(&open_stp->st_stateowner->so_client->cl_lock) __acquires(&open_stp->st_stateowner->so_client->cl_lock) @@ -4323,7 +4308,7 @@ nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) if (check_for_locks(stp->st_stid.sc_file, lo)) return nfserr_locks_held; - release_lockowner_if_empty(lo); + release_lock_stateid(stp); return nfs_ok; } @@ -4938,8 +4923,6 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) return nfserr_jukebox; - /* FIXME: extra reference for new lockowners for the client */ - atomic_inc(&lo->lo_owner.so_count); } else { /* with an existing lockowner, seqids must be the same */ status = nfserr_bad_seqid; @@ -4950,7 +4933,6 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new); if (*lst == NULL) { - release_lockowner_if_empty(lo); status = nfserr_jukebox; goto out; } @@ -5379,6 +5361,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, continue; if (same_owner_str(tmp, owner, clid)) { sop = tmp; + atomic_inc(&sop->so_count); break; } } @@ -5392,8 +5375,10 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, lo = lockowner(sop); /* see if there are still any locks associated with it */ list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { - if (check_for_locks(stp->st_stid.sc_file, lo)) + if (check_for_locks(stp->st_stid.sc_file, lo)) { + nfs4_put_stateowner(sop); goto out; + } } status = nfs_ok; From d3134b1049c3db8fdac0c6dc9430facf30015a15 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:32 -0400 Subject: [PATCH 115/167] nfsd: make openstateids hold references to their openowners Change it so that only openstateids hold persistent references to openowners. References can still be held by compounds in progress. With this, we can get rid of NFS4_OO_NEW. It's possible that we will create a new openowner in the process of doing the open, but something later fails. In the meantime, another task could find that openowner and start using it on a successful open. If that occurs we don't necessarily want to tear it down, just put the reference that the failing compound holds. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++------------------------- fs/nfsd/state.h | 1 - 2 files changed, 31 insertions(+), 41 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c86fe66254b0..b61319401826 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -916,6 +916,8 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) struct nfs4_ol_stateid *stp = openlockstateid(stid); release_all_access(stp); + if (stp->st_stateowner) + nfs4_put_stateowner(stp->st_stateowner); kmem_cache_free(stateid_slab, stid); } @@ -928,8 +930,6 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid) file = find_any_file(stp->st_stid.sc_file); if (file) filp_close(file, (fl_owner_t)lo); - if (stp->st_stateowner) - nfs4_put_stateowner(stp->st_stateowner); nfs4_free_ol_stateid(stid); } @@ -1008,8 +1008,9 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; if (s) { - nfs4_put_stid(&s->st_stid); + list_del_init(&oo->oo_close_lru); oo->oo_last_closed_stid = NULL; + nfs4_put_stid(&s->st_stid); } } @@ -1028,7 +1029,6 @@ static void release_openowner(struct nfs4_openowner *oo) { unhash_openowner(oo); release_openowner_stateids(oo); - list_del(&oo->oo_close_lru); release_last_closed_stateid(oo); nfs4_put_stateowner(&oo->oo_owner); } @@ -1497,6 +1497,7 @@ destroy_client(struct nfs4_client *clp) } while (!list_empty(&clp->cl_openowners)) { oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient); + atomic_inc(&oo->oo_owner.so_count); release_openowner(oo); } nfsd4_shutdown_callback(clp); @@ -3024,7 +3025,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, oo->oo_owner.so_ops = &openowner_ops; oo->oo_owner.so_is_open_owner = 1; oo->oo_owner.so_seqid = open->op_seqid; - oo->oo_flags = NFS4_OO_NEW; + oo->oo_flags = 0; if (nfsd4_has_session(cstate)) oo->oo_flags |= NFS4_OO_CONFIRMED; oo->oo_time = 0; @@ -3041,6 +3042,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_locks); stp->st_stateowner = &oo->oo_owner; + atomic_inc(&stp->st_stateowner->so_count); get_nfs4_file(fp); stp->st_stid.sc_file = fp; stp->st_access_bmap = 0; @@ -3054,13 +3056,27 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, spin_unlock(&oo->oo_owner.so_client->cl_lock); } +/* + * In the 4.0 case we need to keep the owners around a little while to handle + * CLOSE replay. We still do need to release any file access that is held by + * them before returning however. + */ static void -move_to_close_lru(struct nfs4_openowner *oo, struct net *net) +move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) { - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfs4_openowner *oo = openowner(s->st_stateowner); + struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net, + nfsd_net_id); dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); + release_all_access(s); + if (s->st_stid.sc_file) { + put_nfs4_file(s->st_stid.sc_file); + s->st_stid.sc_file = NULL; + } + release_last_closed_stateid(oo); + oo->oo_last_closed_stid = s; list_move_tail(&oo->oo_close_lru, &nn->close_lru); oo->oo_time = get_seconds(); } @@ -3091,6 +3107,7 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, if ((bool)clp->cl_minorversion != sessions) return NULL; renew_client(oo->oo_owner.so_client); + atomic_inc(&oo->oo_owner.so_count); return oo; } } @@ -3887,19 +3904,10 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, __be32 status) { if (open->op_openowner) { - struct nfs4_openowner *oo = open->op_openowner; + struct nfs4_stateowner *so = &open->op_openowner->oo_owner; - if (!list_empty(&oo->oo_owner.so_stateids)) - list_del_init(&oo->oo_close_lru); - if (oo->oo_flags & NFS4_OO_NEW) { - if (status) { - release_openowner(oo); - open->op_openowner = NULL; - } else - oo->oo_flags &= ~NFS4_OO_NEW; - } - if (open->op_openowner) - nfsd4_cstate_assign_replay(cstate, &oo->oo_owner); + nfsd4_cstate_assign_replay(cstate, so); + nfs4_put_stateowner(so); } if (open->op_file) nfsd4_free_file(open->op_file); @@ -4015,7 +4023,7 @@ nfs4_laundromat(struct nfsd_net *nn) new_timeo = min(new_timeo, t); break; } - release_openowner(oo); + release_last_closed_stateid(oo); } new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); nfs4_unlock_state(); @@ -4580,31 +4588,14 @@ out: static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { struct nfs4_client *clp = s->st_stid.sc_client; - struct nfs4_openowner *oo = openowner(s->st_stateowner); s->st_stid.sc_type = NFS4_CLOSED_STID; unhash_open_stateid(s); - if (clp->cl_minorversion) { - if (list_empty(&oo->oo_owner.so_stateids)) - release_openowner(oo); + if (clp->cl_minorversion) nfs4_put_stid(&s->st_stid); - } else { - /* - * In the 4.0 case we need to keep the owners around a - * little while to handle CLOSE replay. We still do need - * to release any file access that is held by them - * before returning however. - */ - release_all_access(s); - if (s->st_stid.sc_file) { - put_nfs4_file(s->st_stid.sc_file); - s->st_stid.sc_file = NULL; - } - oo->oo_last_closed_stid = s; - if (list_empty(&oo->oo_owner.so_stateids)) - move_to_close_lru(oo, clp->net); - } + else + move_to_close_lru(s, clp->net); } /* diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 232246039db0..e073c86f389c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -367,7 +367,6 @@ struct nfs4_openowner { struct nfs4_ol_stateid *oo_last_closed_stid; time_t oo_time; /* time of placement on so_close_lru */ #define NFS4_OO_CONFIRMED 1 -#define NFS4_OO_NEW 4 unsigned char oo_flags; }; From b401be22b5cf059290ee98106bc780e087407d45 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:33 -0400 Subject: [PATCH 116/167] nfsd: don't allow CLOSE to proceed until refcount on stateid drops Once we remove client_mutex protection, it'll be possible to have an in-flight operation using an openstateid when a CLOSE call comes in. If that happens, we can't just put the sc_file reference and clear its pointer without risking an oops. Fix this by ensuring that v4.0 CLOSE operations wait for the refcount to drop before proceeding to do so. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b61319401826..6d26d26751f5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -85,6 +85,12 @@ static DEFINE_MUTEX(client_mutex); */ static DEFINE_SPINLOCK(state_lock); +/* + * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for + * the refcount on the open stateid to drop. + */ +static DECLARE_WAIT_QUEUE_HEAD(close_wq); + static struct kmem_cache *openowner_slab; static struct kmem_cache *lockowner_slab; static struct kmem_cache *file_slab; @@ -640,8 +646,10 @@ nfs4_put_stid(struct nfs4_stid *s) might_lock(&clp->cl_lock); - if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) + if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) { + wake_up_all(&close_wq); return; + } idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); spin_unlock(&clp->cl_lock); s->sc_free(s); @@ -3070,6 +3078,17 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo); + /* + * We know that we hold one reference via nfsd4_close, and another + * "persistent" reference for the client. If the refcount is higher + * than 2, then there are still calls in progress that are using this + * stateid. We can't put the sc_file reference until they are finished. + * Wait for the refcount to drop to 2. Since it has been unhashed, + * there should be no danger of the refcount going back up again at + * this point. + */ + wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2); + release_all_access(s); if (s->st_stid.sc_file) { put_nfs4_file(s->st_stid.sc_file); From 7ffb588086e941aa0a46a33e2bf2bf3c0963ed98 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:34 -0400 Subject: [PATCH 117/167] nfsd: Protect adding/removing open state owners using client_lock Once we remove client mutex protection, we'll need to ensure that stateowner lookup and creation are atomic between concurrent compounds. Ensure that alloc_init_open_stateowner checks the hashtable under the client_lock before adding a new element. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 118 ++++++++++++++++++++++++++++++-------------- 1 file changed, 80 insertions(+), 38 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6d26d26751f5..c4bb7f2b29d9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -239,6 +239,53 @@ static void nfsd4_put_session(struct nfsd4_session *ses) spin_unlock(&nn->client_lock); } +static int +same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, + clientid_t *clid) +{ + return (sop->so_owner.len == owner->len) && + 0 == memcmp(sop->so_owner.data, owner->data, owner->len) && + (sop->so_client->cl_clientid.cl_id == clid->cl_id); +} + +static struct nfs4_openowner * +find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, + bool sessions, struct nfsd_net *nn) +{ + struct nfs4_stateowner *so; + struct nfs4_openowner *oo; + struct nfs4_client *clp; + + lockdep_assert_held(&nn->client_lock); + + list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { + if (!so->so_is_open_owner) + continue; + if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { + oo = openowner(so); + clp = oo->oo_owner.so_client; + if ((bool)clp->cl_minorversion != sessions) + break; + renew_client_locked(clp); + atomic_inc(&so->so_count); + return oo; + } + } + return NULL; +} + +static struct nfs4_openowner * +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, + bool sessions, struct nfsd_net *nn) +{ + struct nfs4_openowner *oo; + + spin_lock(&nn->client_lock); + oo = find_openstateowner_str_locked(hashval, open, sessions, nn); + spin_unlock(&nn->client_lock); + return oo; +} + static inline u32 opaque_hashval(const void *ptr, int nbytes) @@ -1005,8 +1052,13 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) nfs4_put_stid(&stp->st_stid); } -static void unhash_openowner(struct nfs4_openowner *oo) +static void unhash_openowner_locked(struct nfs4_openowner *oo) { + struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, + nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + list_del_init(&oo->oo_owner.so_strhash); list_del_init(&oo->oo_perclient); } @@ -1025,18 +1077,29 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) static void release_openowner_stateids(struct nfs4_openowner *oo) { struct nfs4_ol_stateid *stp; + struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, + nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); while (!list_empty(&oo->oo_owner.so_stateids)) { stp = list_first_entry(&oo->oo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); + spin_unlock(&nn->client_lock); release_open_stateid(stp); + spin_lock(&nn->client_lock); } } static void release_openowner(struct nfs4_openowner *oo) { - unhash_openowner(oo); + struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, + nfsd_net_id); + + spin_lock(&nn->client_lock); + unhash_openowner_locked(oo); release_openowner_stateids(oo); + spin_unlock(&nn->client_lock); release_last_closed_stateid(oo); nfs4_put_stateowner(&oo->oo_owner); } @@ -3004,8 +3067,11 @@ static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, u static void nfs4_unhash_openowner(struct nfs4_stateowner *so) { struct nfs4_openowner *oo = openowner(so); + struct nfsd_net *nn = net_generic(so->so_client->net, nfsd_net_id); - unhash_openowner(oo); + spin_lock(&nn->client_lock); + unhash_openowner_locked(oo); + spin_unlock(&nn->client_lock); } static void nfs4_free_openowner(struct nfs4_stateowner *so) @@ -3025,7 +3091,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) { struct nfs4_client *clp = cstate->clp; - struct nfs4_openowner *oo; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfs4_openowner *oo, *ret; oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); if (!oo) @@ -3039,7 +3106,15 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, oo->oo_time = 0; oo->oo_last_closed_stid = NULL; INIT_LIST_HEAD(&oo->oo_close_lru); - hash_openowner(oo, clp, strhashval); + spin_lock(&nn->client_lock); + ret = find_openstateowner_str_locked(strhashval, + open, clp->cl_minorversion, nn); + if (ret == NULL) { + hash_openowner(oo, clp, strhashval); + ret = oo; + } else + nfs4_free_openowner(&oo->oo_owner); + spin_unlock(&nn->client_lock); return oo; } @@ -3100,39 +3175,6 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) oo->oo_time = get_seconds(); } -static int -same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, - clientid_t *clid) -{ - return (sop->so_owner.len == owner->len) && - 0 == memcmp(sop->so_owner.data, owner->data, owner->len) && - (sop->so_client->cl_clientid.cl_id == clid->cl_id); -} - -static struct nfs4_openowner * -find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, - bool sessions, struct nfsd_net *nn) -{ - struct nfs4_stateowner *so; - struct nfs4_openowner *oo; - struct nfs4_client *clp; - - list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { - if (!so->so_is_open_owner) - continue; - if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { - oo = openowner(so); - clp = oo->oo_owner.so_client; - if ((bool)clp->cl_minorversion != sessions) - return NULL; - renew_client(oo->oo_owner.so_client); - atomic_inc(&oo->oo_owner.so_count); - return oo; - } - } - return NULL; -} - /* search file_hashtbl[] for file */ static struct nfs4_file * find_file_locked(struct knfsd_fh *fh) From c58c6610ec248134b0b0ede2ac80859bf1cefa29 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:35 -0400 Subject: [PATCH 118/167] nfsd: Protect adding/removing lock owners using client_lock Once we remove client mutex protection, we'll need to ensure that stateowner lookup and creation are atomic between concurrent compounds. Ensure that alloc_init_lock_stateowner checks the hashtable under the client_lock before adding a new element. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 69 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c4bb7f2b29d9..7c15918d20f0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1000,26 +1000,42 @@ static void release_lock_stateid(struct nfs4_ol_stateid *stp) nfs4_put_stid(&stp->st_stid); } -static void unhash_lockowner(struct nfs4_lockowner *lo) +static void unhash_lockowner_locked(struct nfs4_lockowner *lo) { + struct nfsd_net *nn = net_generic(lo->lo_owner.so_client->net, + nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + list_del_init(&lo->lo_owner.so_strhash); } static void release_lockowner_stateids(struct nfs4_lockowner *lo) { + struct nfsd_net *nn = net_generic(lo->lo_owner.so_client->net, + nfsd_net_id); struct nfs4_ol_stateid *stp; + lockdep_assert_held(&nn->client_lock); + while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); + spin_unlock(&nn->client_lock); release_lock_stateid(stp); + spin_lock(&nn->client_lock); } } static void release_lockowner(struct nfs4_lockowner *lo) { - unhash_lockowner(lo); + struct nfsd_net *nn = net_generic(lo->lo_owner.so_client->net, + nfsd_net_id); + + spin_lock(&nn->client_lock); + unhash_lockowner_locked(lo); release_lockowner_stateids(lo); + spin_unlock(&nn->client_lock); nfs4_put_stateowner(&lo->lo_owner); } @@ -4801,7 +4817,7 @@ nevermind: } static struct nfs4_lockowner * -find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, +find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, struct nfsd_net *nn) { unsigned int strhashval = ownerstr_hashval(clid->cl_id, owner); @@ -4818,9 +4834,25 @@ find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, return NULL; } +static struct nfs4_lockowner * +find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, + struct nfsd_net *nn) +{ + struct nfs4_lockowner *lo; + + spin_lock(&nn->client_lock); + lo = find_lockowner_str_locked(clid, owner, nn); + spin_unlock(&nn->client_lock); + return lo; +} + static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop) { - unhash_lockowner(lockowner(sop)); + struct nfsd_net *nn = net_generic(sop->so_client->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + unhash_lockowner_locked(lockowner(sop)); + spin_unlock(&nn->client_lock); } static void nfs4_free_lockowner(struct nfs4_stateowner *sop) @@ -4843,9 +4875,12 @@ static const struct nfs4_stateowner_operations lockowner_ops = { * strhashval = ownerstr_hashval */ static struct nfs4_lockowner * -alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { - struct nfs4_lockowner *lo; +alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, + struct nfs4_ol_stateid *open_stp, + struct nfsd4_lock *lock) +{ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfs4_lockowner *lo, *ret; lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); if (!lo) @@ -4854,7 +4889,16 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str lo->lo_owner.so_is_open_owner = 0; lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; lo->lo_owner.so_ops = &lockowner_ops; - list_add(&lo->lo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); + spin_lock(&nn->client_lock); + ret = find_lockowner_str_locked(&clp->cl_clientid, + &lock->lk_new_owner, nn); + if (ret == NULL) { + list_add(&lo->lo_owner.so_strhash, + &nn->ownerstr_hashtbl[strhashval]); + ret = lo; + } else + nfs4_free_lockowner(&lo->lo_owner); + spin_unlock(&nn->client_lock); return lo; } @@ -5395,6 +5439,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + struct nfs4_client *clp; dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); @@ -5408,6 +5453,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, status = nfserr_locks_held; /* Find the matching lock stateowner */ + spin_lock(&nn->client_lock); list_for_each_entry(tmp, &nn->ownerstr_hashtbl[hashval], so_strhash) { if (tmp->so_is_open_owner) continue; @@ -5417,6 +5463,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, break; } } + spin_unlock(&nn->client_lock); /* No matching owner found, maybe a replay? Just declare victory... */ if (!sop) { @@ -5426,16 +5473,22 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, lo = lockowner(sop); /* see if there are still any locks associated with it */ + clp = cstate->clp; + spin_lock(&clp->cl_lock); list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { if (check_for_locks(stp->st_stid.sc_file, lo)) { - nfs4_put_stateowner(sop); + spin_unlock(&clp->cl_lock); goto out; } } + spin_unlock(&clp->cl_lock); status = nfs_ok; + sop = NULL; release_lockowner(lo); out: + if (sop) + nfs4_put_stateowner(sop); nfs4_unlock_state(); return status; } From d4f0489f38512027fdf5190d5d1d8007e155e88f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 29 Jul 2014 21:34:36 -0400 Subject: [PATCH 119/167] nfsd: Move the open owner hash table into struct nfs4_client Preparation for removing the client_mutex. Convert the open owner hash table into a per-client table and protect it using the nfs4_client->cl_lock spin lock. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 1 - fs/nfsd/nfs4state.c | 187 ++++++++++++++++++++------------------------ fs/nfsd/state.h | 1 + 3 files changed, 86 insertions(+), 103 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index a71d14413d39..e1f479c162b5 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -63,7 +63,6 @@ struct nfsd_net { struct rb_root conf_name_tree; struct list_head *unconf_id_hashtbl; struct rb_root unconf_name_tree; - struct list_head *ownerstr_hashtbl; struct list_head *sessionid_hashtbl; /* * client_lru holds client queue ordered by nfs4_client.cl_time diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 7c15918d20f0..4af4e5eff491 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -240,35 +240,27 @@ static void nfsd4_put_session(struct nfsd4_session *ses) } static int -same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, - clientid_t *clid) +same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner) { return (sop->so_owner.len == owner->len) && - 0 == memcmp(sop->so_owner.data, owner->data, owner->len) && - (sop->so_client->cl_clientid.cl_id == clid->cl_id); + 0 == memcmp(sop->so_owner.data, owner->data, owner->len); } static struct nfs4_openowner * find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, - bool sessions, struct nfsd_net *nn) + struct nfs4_client *clp) { struct nfs4_stateowner *so; - struct nfs4_openowner *oo; - struct nfs4_client *clp; - lockdep_assert_held(&nn->client_lock); + lockdep_assert_held(&clp->cl_lock); - list_for_each_entry(so, &nn->ownerstr_hashtbl[hashval], so_strhash) { + list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval], + so_strhash) { if (!so->so_is_open_owner) continue; - if (same_owner_str(so, &open->op_owner, &open->op_clientid)) { - oo = openowner(so); - clp = oo->oo_owner.so_client; - if ((bool)clp->cl_minorversion != sessions) - break; - renew_client_locked(clp); + if (same_owner_str(so, &open->op_owner)) { atomic_inc(&so->so_count); - return oo; + return openowner(so); } } return NULL; @@ -276,17 +268,16 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open, static struct nfs4_openowner * find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open, - bool sessions, struct nfsd_net *nn) + struct nfs4_client *clp) { struct nfs4_openowner *oo; - spin_lock(&nn->client_lock); - oo = find_openstateowner_str_locked(hashval, open, sessions, nn); - spin_unlock(&nn->client_lock); + spin_lock(&clp->cl_lock); + oo = find_openstateowner_str_locked(hashval, open, clp); + spin_unlock(&clp->cl_lock); return oo; } - static inline u32 opaque_hashval(const void *ptr, int nbytes) { @@ -408,12 +399,11 @@ unsigned long max_delegations; #define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) #define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) -static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername) +static unsigned int ownerstr_hashval(struct xdr_netobj *ownername) { unsigned int ret; ret = opaque_hashval(ownername->data, ownername->len); - ret += clientid; return ret & OWNER_HASH_MASK; } @@ -1002,40 +992,37 @@ static void release_lock_stateid(struct nfs4_ol_stateid *stp) static void unhash_lockowner_locked(struct nfs4_lockowner *lo) { - struct nfsd_net *nn = net_generic(lo->lo_owner.so_client->net, - nfsd_net_id); + struct nfs4_client *clp = lo->lo_owner.so_client; - lockdep_assert_held(&nn->client_lock); + lockdep_assert_held(&clp->cl_lock); list_del_init(&lo->lo_owner.so_strhash); } static void release_lockowner_stateids(struct nfs4_lockowner *lo) { - struct nfsd_net *nn = net_generic(lo->lo_owner.so_client->net, - nfsd_net_id); + struct nfs4_client *clp = lo->lo_owner.so_client; struct nfs4_ol_stateid *stp; - lockdep_assert_held(&nn->client_lock); + lockdep_assert_held(&clp->cl_lock); while (!list_empty(&lo->lo_owner.so_stateids)) { stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - spin_unlock(&nn->client_lock); + spin_unlock(&clp->cl_lock); release_lock_stateid(stp); - spin_lock(&nn->client_lock); + spin_lock(&clp->cl_lock); } } static void release_lockowner(struct nfs4_lockowner *lo) { - struct nfsd_net *nn = net_generic(lo->lo_owner.so_client->net, - nfsd_net_id); + struct nfs4_client *clp = lo->lo_owner.so_client; - spin_lock(&nn->client_lock); + spin_lock(&clp->cl_lock); unhash_lockowner_locked(lo); release_lockowner_stateids(lo); - spin_unlock(&nn->client_lock); + spin_unlock(&clp->cl_lock); nfs4_put_stateowner(&lo->lo_owner); } @@ -1070,10 +1057,9 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) static void unhash_openowner_locked(struct nfs4_openowner *oo) { - struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, - nfsd_net_id); + struct nfs4_client *clp = oo->oo_owner.so_client; - lockdep_assert_held(&nn->client_lock); + lockdep_assert_held(&clp->cl_lock); list_del_init(&oo->oo_owner.so_strhash); list_del_init(&oo->oo_perclient); @@ -1093,29 +1079,27 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) static void release_openowner_stateids(struct nfs4_openowner *oo) { struct nfs4_ol_stateid *stp; - struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, - nfsd_net_id); + struct nfs4_client *clp = oo->oo_owner.so_client; - lockdep_assert_held(&nn->client_lock); + lockdep_assert_held(&clp->cl_lock); while (!list_empty(&oo->oo_owner.so_stateids)) { stp = list_first_entry(&oo->oo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - spin_unlock(&nn->client_lock); + spin_unlock(&clp->cl_lock); release_open_stateid(stp); - spin_lock(&nn->client_lock); + spin_lock(&clp->cl_lock); } } static void release_openowner(struct nfs4_openowner *oo) { - struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, - nfsd_net_id); + struct nfs4_client *clp = oo->oo_owner.so_client; - spin_lock(&nn->client_lock); + spin_lock(&clp->cl_lock); unhash_openowner_locked(oo); release_openowner_stateids(oo); - spin_unlock(&nn->client_lock); + spin_unlock(&clp->cl_lock); release_last_closed_stateid(oo); nfs4_put_stateowner(&oo->oo_owner); } @@ -1497,15 +1481,20 @@ STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn) static struct nfs4_client *alloc_client(struct xdr_netobj name) { struct nfs4_client *clp; + int i; clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL); if (clp == NULL) return NULL; clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL); - if (clp->cl_name.data == NULL) { - kfree(clp); - return NULL; - } + if (clp->cl_name.data == NULL) + goto err_no_name; + clp->cl_ownerstr_hashtbl = kmalloc(sizeof(struct list_head) * + OWNER_HASH_SIZE, GFP_KERNEL); + if (!clp->cl_ownerstr_hashtbl) + goto err_no_hashtbl; + for (i = 0; i < OWNER_HASH_SIZE; i++) + INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]); clp->cl_name.len = name.len; INIT_LIST_HEAD(&clp->cl_sessions); idr_init(&clp->cl_stateids); @@ -1520,6 +1509,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) spin_lock_init(&clp->cl_lock); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; +err_no_hashtbl: + kfree(clp->cl_name.data); +err_no_name: + kfree(clp); + return NULL; } static void @@ -1538,6 +1532,7 @@ free_client(struct nfs4_client *clp) } rpc_destroy_wait_queue(&clp->cl_cb_waitq); free_svc_cred(&clp->cl_cred); + kfree(clp->cl_ownerstr_hashtbl); kfree(clp->cl_name.data); idr_destroy(&clp->cl_stateids); kfree(clp); @@ -3074,20 +3069,20 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval) { - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&clp->cl_lock); - list_add(&oo->oo_owner.so_strhash, &nn->ownerstr_hashtbl[strhashval]); + list_add(&oo->oo_owner.so_strhash, + &clp->cl_ownerstr_hashtbl[strhashval]); list_add(&oo->oo_perclient, &clp->cl_openowners); } static void nfs4_unhash_openowner(struct nfs4_stateowner *so) { - struct nfs4_openowner *oo = openowner(so); - struct nfsd_net *nn = net_generic(so->so_client->net, nfsd_net_id); + struct nfs4_client *clp = so->so_client; - spin_lock(&nn->client_lock); - unhash_openowner_locked(oo); - spin_unlock(&nn->client_lock); + spin_lock(&clp->cl_lock); + unhash_openowner_locked(openowner(so)); + spin_unlock(&clp->cl_lock); } static void nfs4_free_openowner(struct nfs4_stateowner *so) @@ -3107,7 +3102,6 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, struct nfsd4_compound_state *cstate) { struct nfs4_client *clp = cstate->clp; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfs4_openowner *oo, *ret; oo = alloc_stateowner(openowner_slab, &open->op_owner, clp); @@ -3122,15 +3116,14 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open, oo->oo_time = 0; oo->oo_last_closed_stid = NULL; INIT_LIST_HEAD(&oo->oo_close_lru); - spin_lock(&nn->client_lock); - ret = find_openstateowner_str_locked(strhashval, - open, clp->cl_minorversion, nn); + spin_lock(&clp->cl_lock); + ret = find_openstateowner_str_locked(strhashval, open, clp); if (ret == NULL) { hash_openowner(oo, clp, strhashval); ret = oo; } else nfs4_free_openowner(&oo->oo_owner); - spin_unlock(&nn->client_lock); + spin_unlock(&clp->cl_lock); return oo; } @@ -3412,8 +3405,8 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate, return status; clp = cstate->clp; - strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner); - oo = find_openstateowner_str(strhashval, open, cstate->minorversion, nn); + strhashval = ownerstr_hashval(&open->op_owner); + oo = find_openstateowner_str(strhashval, open, clp); open->op_openowner = oo; if (!oo) { goto new_owner; @@ -4818,15 +4811,16 @@ nevermind: static struct nfs4_lockowner * find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, - struct nfsd_net *nn) + struct nfs4_client *clp) { - unsigned int strhashval = ownerstr_hashval(clid->cl_id, owner); + unsigned int strhashval = ownerstr_hashval(owner); struct nfs4_stateowner *so; - list_for_each_entry(so, &nn->ownerstr_hashtbl[strhashval], so_strhash) { + list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval], + so_strhash) { if (so->so_is_open_owner) continue; - if (!same_owner_str(so, owner, clid)) + if (!same_owner_str(so, owner)) continue; atomic_inc(&so->so_count); return lockowner(so); @@ -4836,23 +4830,23 @@ find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, static struct nfs4_lockowner * find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, - struct nfsd_net *nn) + struct nfs4_client *clp) { struct nfs4_lockowner *lo; - spin_lock(&nn->client_lock); - lo = find_lockowner_str_locked(clid, owner, nn); - spin_unlock(&nn->client_lock); + spin_lock(&clp->cl_lock); + lo = find_lockowner_str_locked(clid, owner, clp); + spin_unlock(&clp->cl_lock); return lo; } static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop) { - struct nfsd_net *nn = net_generic(sop->so_client->net, nfsd_net_id); + struct nfs4_client *clp = sop->so_client; - spin_lock(&nn->client_lock); + spin_lock(&clp->cl_lock); unhash_lockowner_locked(lockowner(sop)); - spin_unlock(&nn->client_lock); + spin_unlock(&clp->cl_lock); } static void nfs4_free_lockowner(struct nfs4_stateowner *sop) @@ -4879,7 +4873,6 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp, struct nfsd4_lock *lock) { - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfs4_lockowner *lo, *ret; lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp); @@ -4889,16 +4882,16 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, lo->lo_owner.so_is_open_owner = 0; lo->lo_owner.so_seqid = lock->lk_new_lock_seqid; lo->lo_owner.so_ops = &lockowner_ops; - spin_lock(&nn->client_lock); + spin_lock(&clp->cl_lock); ret = find_lockowner_str_locked(&clp->cl_clientid, - &lock->lk_new_owner, nn); + &lock->lk_new_owner, clp); if (ret == NULL) { list_add(&lo->lo_owner.so_strhash, - &nn->ownerstr_hashtbl[strhashval]); + &clp->cl_ownerstr_hashtbl[strhashval]); ret = lo; } else nfs4_free_lockowner(&lo->lo_owner); - spin_unlock(&nn->client_lock); + spin_unlock(&clp->cl_lock); return lo; } @@ -5010,12 +5003,10 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct inode *inode = cstate->current_fh.fh_dentry->d_inode; struct nfs4_lockowner *lo; unsigned int strhashval; - struct nfsd_net *nn = net_generic(cl->net, nfsd_net_id); - lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, nn); + lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl); if (!lo) { - strhashval = ownerstr_hashval(cl->cl_clientid.cl_id, - &lock->v.new.owner); + strhashval = ownerstr_hashval(&lock->v.new.owner); lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock); if (lo == NULL) return nfserr_jukebox; @@ -5293,7 +5284,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; } - lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner, nn); + lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner, + cstate->clp); if (lo) file_lock->fl_owner = (fl_owner_t)lo; file_lock->fl_pid = current->tgid; @@ -5436,7 +5428,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfs4_lockowner *lo; struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; - unsigned int hashval = ownerstr_hashval(clid->cl_id, owner); + unsigned int hashval = ownerstr_hashval(owner); __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct nfs4_client *clp; @@ -5452,29 +5444,29 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, status = nfserr_locks_held; + clp = cstate->clp; /* Find the matching lock stateowner */ - spin_lock(&nn->client_lock); - list_for_each_entry(tmp, &nn->ownerstr_hashtbl[hashval], so_strhash) { + spin_lock(&clp->cl_lock); + list_for_each_entry(tmp, &clp->cl_ownerstr_hashtbl[hashval], + so_strhash) { if (tmp->so_is_open_owner) continue; - if (same_owner_str(tmp, owner, clid)) { + if (same_owner_str(tmp, owner)) { sop = tmp; atomic_inc(&sop->so_count); break; } } - spin_unlock(&nn->client_lock); /* No matching owner found, maybe a replay? Just declare victory... */ if (!sop) { + spin_unlock(&clp->cl_lock); status = nfs_ok; goto out; } lo = lockowner(sop); /* see if there are still any locks associated with it */ - clp = cstate->clp; - spin_lock(&clp->cl_lock); list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { if (check_for_locks(stp->st_stid.sc_file, lo)) { spin_unlock(&clp->cl_lock); @@ -5829,10 +5821,6 @@ static int nfs4_state_create_net(struct net *net) CLIENT_HASH_SIZE, GFP_KERNEL); if (!nn->unconf_id_hashtbl) goto err_unconf_id; - nn->ownerstr_hashtbl = kmalloc(sizeof(struct list_head) * - OWNER_HASH_SIZE, GFP_KERNEL); - if (!nn->ownerstr_hashtbl) - goto err_ownerstr; nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) * SESSION_HASH_SIZE, GFP_KERNEL); if (!nn->sessionid_hashtbl) @@ -5842,8 +5830,6 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]); INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]); } - for (i = 0; i < OWNER_HASH_SIZE; i++) - INIT_LIST_HEAD(&nn->ownerstr_hashtbl[i]); for (i = 0; i < SESSION_HASH_SIZE; i++) INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]); nn->conf_name_tree = RB_ROOT; @@ -5859,8 +5845,6 @@ static int nfs4_state_create_net(struct net *net) return 0; err_sessionid: - kfree(nn->ownerstr_hashtbl); -err_ownerstr: kfree(nn->unconf_id_hashtbl); err_unconf_id: kfree(nn->conf_id_hashtbl); @@ -5890,7 +5874,6 @@ nfs4_state_destroy_net(struct net *net) } kfree(nn->sessionid_hashtbl); - kfree(nn->ownerstr_hashtbl); kfree(nn->unconf_id_hashtbl); kfree(nn->conf_id_hashtbl); put_net(net); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index e073c86f389c..73a209dc352b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -235,6 +235,7 @@ struct nfsd4_sessionid { struct nfs4_client { struct list_head cl_idhash; /* hash by cl_clientid.id */ struct rb_node cl_namenode; /* link into by-name trees */ + struct list_head *cl_ownerstr_hashtbl; struct list_head cl_openowners; struct idr cl_stateids; /* stateid lookup */ struct list_head cl_delegations; From 882e9d25e11d644b24e578866c688d3f8f0d3712 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:37 -0400 Subject: [PATCH 120/167] nfsd: clean up and reorganize release_lockowner Do more within the main loop, and simplify the function a bit. Also, there's no need to take a stateowner reference unless we're going to call release_lockowner. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 49 +++++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4af4e5eff491..cd7d7df03afa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5424,8 +5424,8 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner) { clientid_t *clid = &rlockowner->rl_clientid; - struct nfs4_stateowner *sop = NULL, *tmp; - struct nfs4_lockowner *lo; + struct nfs4_stateowner *sop; + struct nfs4_lockowner *lo = NULL; struct nfs4_ol_stateid *stp; struct xdr_netobj *owner = &rlockowner->rl_owner; unsigned int hashval = ownerstr_hashval(owner); @@ -5442,45 +5442,32 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, if (status) goto out; - status = nfserr_locks_held; - clp = cstate->clp; /* Find the matching lock stateowner */ spin_lock(&clp->cl_lock); - list_for_each_entry(tmp, &clp->cl_ownerstr_hashtbl[hashval], + list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval], so_strhash) { - if (tmp->so_is_open_owner) + + if (sop->so_is_open_owner || !same_owner_str(sop, owner)) continue; - if (same_owner_str(tmp, owner)) { - sop = tmp; - atomic_inc(&sop->so_count); - break; - } - } - /* No matching owner found, maybe a replay? Just declare victory... */ - if (!sop) { - spin_unlock(&clp->cl_lock); - status = nfs_ok; - goto out; - } - - lo = lockowner(sop); - /* see if there are still any locks associated with it */ - list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { - if (check_for_locks(stp->st_stid.sc_file, lo)) { - spin_unlock(&clp->cl_lock); - goto out; + /* see if there are still any locks associated with it */ + lo = lockowner(sop); + list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) { + if (check_for_locks(stp->st_stid.sc_file, lo)) { + status = nfserr_locks_held; + spin_unlock(&clp->cl_lock); + goto out; + } } + + atomic_inc(&sop->so_count); + break; } spin_unlock(&clp->cl_lock); - - status = nfs_ok; - sop = NULL; - release_lockowner(lo); + if (lo) + release_lockowner(lo); out: - if (sop) - nfs4_put_stateowner(sop); nfs4_unlock_state(); return status; } From a819ecc1bbb0c795184c80afeec0e7a6ef508ef5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:38 -0400 Subject: [PATCH 121/167] nfsd: add locking to stateowner release Once we remove the client_mutex, we'll need to properly protect the stateowner reference counts using the cl_lock. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cd7d7df03afa..9b342e164407 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -937,9 +937,14 @@ release_all_access(struct nfs4_ol_stateid *stp) static void nfs4_put_stateowner(struct nfs4_stateowner *sop) { - if (!atomic_dec_and_test(&sop->so_count)) + struct nfs4_client *clp = sop->so_client; + + might_lock(&clp->cl_lock); + + if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock)) return; sop->so_ops->so_unhash(sop); + spin_unlock(&clp->cl_lock); kfree(sop->so_owner.data); sop->so_ops->so_free(sop); } @@ -3078,11 +3083,7 @@ static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, u static void nfs4_unhash_openowner(struct nfs4_stateowner *so) { - struct nfs4_client *clp = so->so_client; - - spin_lock(&clp->cl_lock); unhash_openowner_locked(openowner(so)); - spin_unlock(&clp->cl_lock); } static void nfs4_free_openowner(struct nfs4_stateowner *so) @@ -4842,11 +4843,7 @@ find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner, static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop) { - struct nfs4_client *clp = sop->so_client; - - spin_lock(&clp->cl_lock); unhash_lockowner_locked(lockowner(sop)); - spin_unlock(&clp->cl_lock); } static void nfs4_free_lockowner(struct nfs4_stateowner *sop) From 3c1c995cc2e49f6f7504586ad07c5d80c6aa3301 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:39 -0400 Subject: [PATCH 122/167] nfsd: optimize destroy_lockowner cl_lock thrashing Reduce the cl_lock trashing in destroy_lockowner. Unhash all of the lockstateids on the lockowner's list. Put the reference under the lock and see if it was the last one. If so, then add it to a private list to be destroyed after we drop the lock. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 57 +++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9b342e164407..9358cbe2283d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -983,14 +983,23 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid) nfs4_free_ol_stateid(stid); } +static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) +{ + struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); + + lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); + + list_del_init(&stp->st_locks); + unhash_generic_stateid(stp); + unhash_stid(&stp->st_stid); +} + static void release_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); spin_lock(&oo->oo_owner.so_client->cl_lock); - list_del(&stp->st_locks); - unhash_generic_stateid(stp); - unhash_stid(&stp->st_stid); + unhash_lock_stateid(stp); spin_unlock(&oo->oo_owner.so_client->cl_lock); nfs4_put_stid(&stp->st_stid); } @@ -1004,30 +1013,38 @@ static void unhash_lockowner_locked(struct nfs4_lockowner *lo) list_del_init(&lo->lo_owner.so_strhash); } -static void release_lockowner_stateids(struct nfs4_lockowner *lo) -{ - struct nfs4_client *clp = lo->lo_owner.so_client; - struct nfs4_ol_stateid *stp; - - lockdep_assert_held(&clp->cl_lock); - - while (!list_empty(&lo->lo_owner.so_stateids)) { - stp = list_first_entry(&lo->lo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - spin_unlock(&clp->cl_lock); - release_lock_stateid(stp); - spin_lock(&clp->cl_lock); - } -} - static void release_lockowner(struct nfs4_lockowner *lo) { struct nfs4_client *clp = lo->lo_owner.so_client; + struct nfs4_ol_stateid *stp; + struct list_head reaplist; + + INIT_LIST_HEAD(&reaplist); spin_lock(&clp->cl_lock); unhash_lockowner_locked(lo); - release_lockowner_stateids(lo); + while (!list_empty(&lo->lo_owner.so_stateids)) { + stp = list_first_entry(&lo->lo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + unhash_lock_stateid(stp); + /* + * We now know that no new references can be added to the + * stateid. If ours is the last one, finish the unhashing + * and put it on the list to be reaped. + */ + if (atomic_dec_and_test(&stp->st_stid.sc_count)) { + idr_remove(&clp->cl_stateids, + stp->st_stid.sc_stateid.si_opaque.so_id); + list_add(&stp->st_locks, &reaplist); + } + } spin_unlock(&clp->cl_lock); + while (!list_empty(&reaplist)) { + stp = list_first_entry(&reaplist, struct nfs4_ol_stateid, + st_locks); + list_del(&stp->st_locks); + stp->st_stid.sc_free(&stp->st_stid); + } nfs4_put_stateowner(&lo->lo_owner); } From fc5a96c3b70d00c863f69ff4ea7f5dfddbcbc0d8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:40 -0400 Subject: [PATCH 123/167] nfsd: close potential race in nfsd4_free_stateid Once we remove the client_mutex, it'll be possible for the sc_type of a lock stateid to change after it's found and checked, but before we can go to destroy it. If that happens, we can end up putting the persistent reference to the stateid more than once, and unhash it more than once. Fix this by unhashing the lock stateid prior to dropping the cl_lock but after finding it. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9358cbe2283d..9c7dcbb68094 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4397,17 +4397,6 @@ unlock_state: return status; } -static __be32 -nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp) -{ - struct nfs4_lockowner *lo = lockowner(stp->st_stateowner); - - if (check_for_locks(stp->st_stid.sc_file, lo)) - return nfserr_locks_held; - release_lock_stateid(stp); - return nfs_ok; -} - /* * Test if the stateid is valid */ @@ -4434,6 +4423,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stateid_t *stateid = &free_stateid->fr_stateid; struct nfs4_stid *s; struct nfs4_delegation *dp; + struct nfs4_ol_stateid *stp; struct nfs4_client *cl = cstate->session->se_client; __be32 ret = nfserr_bad_stateid; @@ -4456,8 +4446,15 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ret = check_stateid_generation(stateid, &s->sc_stateid, 1); if (ret) break; + stp = openlockstateid(s); + ret = nfserr_locks_held; + if (check_for_locks(stp->st_stid.sc_file, + lockowner(stp->st_stateowner))) + break; + unhash_lock_stateid(stp); spin_unlock(&cl->cl_lock); - ret = nfsd4_free_lock_stateid(openlockstateid(s)); + nfs4_put_stid(s); + ret = nfs_ok; goto out; case NFS4_REVOKED_DELEG_STID: dp = delegstateid(s); From 2c41beb0e5cf22fe3ab4c4adc3cedd5f732b2a7e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:41 -0400 Subject: [PATCH 124/167] nfsd: reduce cl_lock thrashing in release_openowner Releasing an openowner is a bit inefficient as it can potentially thrash the cl_lock if you have a lot of stateids attached to it. Once we remove the client_mutex, it'll also potentially be dangerous to do this. Add some functions to make it easier to defer the part of putting a generic stateid reference that needs to be done outside the cl_lock while doing the parts that must be done while holding it under a single lock. First we unhash each open stateid. Then we call put_generic_stateid_locked which will put the reference to an nfs4_ol_stateid. If it turns out to be the last reference, it'll go ahead and remove the stid from the IDR tree and put it onto the reaplist using the st_locks list_head. Then, after dropping the lock we'll call free_ol_stateid_reaplist to walk the list of stateids that are fully unhashed and ready to be freed, and free each of them. This function can sleep, so it must be done outside any spinlocks. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 100 ++++++++++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 35 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9c7dcbb68094..879342bc16b2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -983,6 +983,30 @@ static void nfs4_free_lock_stateid(struct nfs4_stid *stid) nfs4_free_ol_stateid(stid); } +/* + * Put the persistent reference to an already unhashed generic stateid, while + * holding the cl_lock. If it's the last reference, then put it onto the + * reaplist for later destruction. + */ +static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, + struct list_head *reaplist) +{ + struct nfs4_stid *s = &stp->st_stid; + struct nfs4_client *clp = s->sc_client; + + lockdep_assert_held(&clp->cl_lock); + + WARN_ON_ONCE(!list_empty(&stp->st_locks)); + + if (!atomic_dec_and_test(&s->sc_count)) { + wake_up_all(&close_wq); + return; + } + + idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); + list_add(&stp->st_locks, reaplist); +} + static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); @@ -1013,6 +1037,25 @@ static void unhash_lockowner_locked(struct nfs4_lockowner *lo) list_del_init(&lo->lo_owner.so_strhash); } +/* + * Free a list of generic stateids that were collected earlier after being + * fully unhashed. + */ +static void +free_ol_stateid_reaplist(struct list_head *reaplist) +{ + struct nfs4_ol_stateid *stp; + + might_sleep(); + + while (!list_empty(reaplist)) { + stp = list_first_entry(reaplist, struct nfs4_ol_stateid, + st_locks); + list_del(&stp->st_locks); + stp->st_stid.sc_free(&stp->st_stid); + } +} + static void release_lockowner(struct nfs4_lockowner *lo) { struct nfs4_client *clp = lo->lo_owner.so_client; @@ -1027,24 +1070,10 @@ static void release_lockowner(struct nfs4_lockowner *lo) stp = list_first_entry(&lo->lo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); unhash_lock_stateid(stp); - /* - * We now know that no new references can be added to the - * stateid. If ours is the last one, finish the unhashing - * and put it on the list to be reaped. - */ - if (atomic_dec_and_test(&stp->st_stid.sc_count)) { - idr_remove(&clp->cl_stateids, - stp->st_stid.sc_stateid.si_opaque.so_id); - list_add(&stp->st_locks, &reaplist); - } + put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); - while (!list_empty(&reaplist)) { - stp = list_first_entry(&reaplist, struct nfs4_ol_stateid, - st_locks); - list_del(&stp->st_locks); - stp->st_stid.sc_free(&stp->st_stid); - } + free_ol_stateid_reaplist(&reaplist); nfs4_put_stateowner(&lo->lo_owner); } @@ -1065,16 +1094,21 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp) static void unhash_open_stateid(struct nfs4_ol_stateid *stp) { - spin_lock(&stp->st_stateowner->so_client->cl_lock); + lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); + unhash_generic_stateid(stp); release_open_stateid_locks(stp); - spin_unlock(&stp->st_stateowner->so_client->cl_lock); } static void release_open_stateid(struct nfs4_ol_stateid *stp) { + LIST_HEAD(reaplist); + + spin_lock(&stp->st_stid.sc_client->cl_lock); unhash_open_stateid(stp); - nfs4_put_stid(&stp->st_stid); + put_ol_stateid_locked(stp, &reaplist); + spin_unlock(&stp->st_stid.sc_client->cl_lock); + free_ol_stateid_reaplist(&reaplist); } static void unhash_openowner_locked(struct nfs4_openowner *oo) @@ -1098,30 +1132,24 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo) } } -static void release_openowner_stateids(struct nfs4_openowner *oo) +static void release_openowner(struct nfs4_openowner *oo) { struct nfs4_ol_stateid *stp; struct nfs4_client *clp = oo->oo_owner.so_client; + struct list_head reaplist; - lockdep_assert_held(&clp->cl_lock); - - while (!list_empty(&oo->oo_owner.so_stateids)) { - stp = list_first_entry(&oo->oo_owner.so_stateids, - struct nfs4_ol_stateid, st_perstateowner); - spin_unlock(&clp->cl_lock); - release_open_stateid(stp); - spin_lock(&clp->cl_lock); - } -} - -static void release_openowner(struct nfs4_openowner *oo) -{ - struct nfs4_client *clp = oo->oo_owner.so_client; + INIT_LIST_HEAD(&reaplist); spin_lock(&clp->cl_lock); unhash_openowner_locked(oo); - release_openowner_stateids(oo); + while (!list_empty(&oo->oo_owner.so_stateids)) { + stp = list_first_entry(&oo->oo_owner.so_stateids, + struct nfs4_ol_stateid, st_perstateowner); + unhash_open_stateid(stp); + put_ol_stateid_locked(stp, &reaplist); + } spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); release_last_closed_stateid(oo); nfs4_put_stateowner(&oo->oo_owner); } @@ -4675,7 +4703,9 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) struct nfs4_client *clp = s->st_stid.sc_client; s->st_stid.sc_type = NFS4_CLOSED_STID; + spin_lock(&clp->cl_lock); unhash_open_stateid(s); + spin_unlock(&clp->cl_lock); if (clp->cl_minorversion) nfs4_put_stid(&s->st_stid); From d83017f94c290c56010e194cdbc45e59894ccae2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:42 -0400 Subject: [PATCH 125/167] nfsd: don't thrash the cl_lock while freeing an open stateid When we remove the client_mutex, we'll have a potential race between FREE_STATEID and CLOSE. The root of the problem is that we are walking the st_locks list, dropping the spinlock and then trying to release the persistent reference to the lockstateid. In between, a FREE_STATEID call can come along and take the lock, find the stateid and then try to put the reference. That leads to a double put. Fix this by not releasing the cl_lock in order to release each lock stateid. Use put_generic_stateid_locked to unhash them and gather them onto a list, and free_ol_stateid_reaplist to free any that end up on the list. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 879342bc16b2..1f67a96c4941 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1077,27 +1077,26 @@ static void release_lockowner(struct nfs4_lockowner *lo) nfs4_put_stateowner(&lo->lo_owner); } -static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp) - __releases(&open_stp->st_stateowner->so_client->cl_lock) - __acquires(&open_stp->st_stateowner->so_client->cl_lock) +static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp, + struct list_head *reaplist) { struct nfs4_ol_stateid *stp; while (!list_empty(&open_stp->st_locks)) { stp = list_entry(open_stp->st_locks.next, struct nfs4_ol_stateid, st_locks); - spin_unlock(&open_stp->st_stateowner->so_client->cl_lock); - release_lock_stateid(stp); - spin_lock(&open_stp->st_stateowner->so_client->cl_lock); + unhash_lock_stateid(stp); + put_ol_stateid_locked(stp, reaplist); } } -static void unhash_open_stateid(struct nfs4_ol_stateid *stp) +static void unhash_open_stateid(struct nfs4_ol_stateid *stp, + struct list_head *reaplist) { lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); unhash_generic_stateid(stp); - release_open_stateid_locks(stp); + release_open_stateid_locks(stp, reaplist); } static void release_open_stateid(struct nfs4_ol_stateid *stp) @@ -1105,7 +1104,7 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp) LIST_HEAD(reaplist); spin_lock(&stp->st_stid.sc_client->cl_lock); - unhash_open_stateid(stp); + unhash_open_stateid(stp, &reaplist); put_ol_stateid_locked(stp, &reaplist); spin_unlock(&stp->st_stid.sc_client->cl_lock); free_ol_stateid_reaplist(&reaplist); @@ -1145,7 +1144,7 @@ static void release_openowner(struct nfs4_openowner *oo) while (!list_empty(&oo->oo_owner.so_stateids)) { stp = list_first_entry(&oo->oo_owner.so_stateids, struct nfs4_ol_stateid, st_perstateowner); - unhash_open_stateid(stp); + unhash_open_stateid(stp, &reaplist); put_ol_stateid_locked(stp, &reaplist); } spin_unlock(&clp->cl_lock); @@ -4701,16 +4700,21 @@ out: static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s) { struct nfs4_client *clp = s->st_stid.sc_client; + LIST_HEAD(reaplist); s->st_stid.sc_type = NFS4_CLOSED_STID; spin_lock(&clp->cl_lock); - unhash_open_stateid(s); - spin_unlock(&clp->cl_lock); + unhash_open_stateid(s, &reaplist); - if (clp->cl_minorversion) - nfs4_put_stid(&s->st_stid); - else + if (clp->cl_minorversion) { + put_ol_stateid_locked(s, &reaplist); + spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); + } else { + spin_unlock(&clp->cl_lock); + free_ol_stateid_reaplist(&reaplist); move_to_close_lru(s, clp->net); + } } /* From 4ae098d327c599c9a8e2eecedcc2c192b537ff4e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 29 Jul 2014 21:34:43 -0400 Subject: [PATCH 126/167] nfsd: rename unhash_generic_stateid to unhash_ol_stateid ...to better match other functions that deal with open/lock stateids. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1f67a96c4941..52ec47de1185 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -949,7 +949,7 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop) sop->so_ops->so_free(sop); } -static void unhash_generic_stateid(struct nfs4_ol_stateid *stp) +static void unhash_ol_stateid(struct nfs4_ol_stateid *stp) { struct nfs4_file *fp = stp->st_stid.sc_file; @@ -1014,7 +1014,7 @@ static void unhash_lock_stateid(struct nfs4_ol_stateid *stp) lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); list_del_init(&stp->st_locks); - unhash_generic_stateid(stp); + unhash_ol_stateid(stp); unhash_stid(&stp->st_stid); } @@ -1095,7 +1095,7 @@ static void unhash_open_stateid(struct nfs4_ol_stateid *stp, { lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); - unhash_generic_stateid(stp); + unhash_ol_stateid(stp); release_open_stateid_locks(stp, reaplist); } From d9499a95716db0d4bc9b67e88fd162133e7d6b08 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 30 Jul 2014 21:26:05 +0800 Subject: [PATCH 127/167] NFSD: Decrease nfsd_users in nfsd_startup_generic fail A memory allocation failure could cause nfsd_startup_generic to fail, in which case nfsd_users wouldn't be incorrectly left elevated. After nfsd restarts nfsd_startup_generic will then succeed without doing anything--the first consequence is likely nfs4_start_net finding a bad laundry_wq and crashing. Signed-off-by: Kinglong Mee Fixes: 4539f14981ce "nfsd: replace boolean nfsd_up flag by users counter" Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 5d026dca00ca..752d56bbe0ba 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -221,7 +221,8 @@ static int nfsd_startup_generic(int nrservs) */ ret = nfsd_racache_init(2*nrservs); if (ret) - return ret; + goto dec_users; + ret = nfs4_state_start(); if (ret) goto out_racache; @@ -229,6 +230,8 @@ static int nfsd_startup_generic(int nrservs) out_racache: nfsd_racache_shutdown(); +dec_users: + nfsd_users--; return ret; } From 83e452fee81cf67a8e08fd843291a7cff62a3dc7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 31 Jul 2014 16:10:08 -0400 Subject: [PATCH 128/167] nfsd4: fix out of date comment Signed-off-by: J. Bruce Fields --- fs/nfsd/state.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 73a209dc352b..0b234500f104 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -345,7 +345,7 @@ struct nfs4_stateowner { struct list_head so_stateids; struct nfs4_client *so_client; const struct nfs4_stateowner_operations *so_ops; - /* after increment in ENCODE_SEQID_OP_TAIL, represents the next + /* after increment in nfsd4_bump_seqid, represents the next * sequence id expected from the client: */ atomic_t so_count; u32 so_seqid; From 4beb345b37fc099e98f40d20c94da6c07654005c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:02 -0400 Subject: [PATCH 129/167] nfsd: Ensure struct nfs4_client is unhashed before we try to destroy it When we remove the client_mutex protection, we will need to ensure that it can't be found by other threads while we're destroying it. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 52ec47de1185..cb630db015b0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1588,12 +1588,23 @@ free_client(struct nfs4_client *clp) } /* must be called under the client_lock */ -static inline void +static void unhash_client_locked(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd4_session *ses; - list_del(&clp->cl_lru); + /* Mark the client as expired! */ + clp->cl_time = 0; + /* Make it invisible */ + if (!list_empty(&clp->cl_idhash)) { + list_del_init(&clp->cl_idhash); + if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) + rb_erase(&clp->cl_namenode, &nn->conf_name_tree); + else + rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); + } + list_del_init(&clp->cl_lru); spin_lock(&clp->cl_lock); list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) list_del_init(&ses->se_hash); @@ -1601,7 +1612,17 @@ unhash_client_locked(struct nfs4_client *clp) } static void -destroy_client(struct nfs4_client *clp) +unhash_client(struct nfs4_client *clp) +{ + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + unhash_client_locked(clp); + spin_unlock(&nn->client_lock); +} + +static void +__destroy_client(struct nfs4_client *clp) { struct nfs4_openowner *oo; struct nfs4_delegation *dp; @@ -1634,22 +1655,24 @@ destroy_client(struct nfs4_client *clp) nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); - list_del(&clp->cl_idhash); - if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags)) - rb_erase(&clp->cl_namenode, &nn->conf_name_tree); - else - rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); spin_lock(&nn->client_lock); - unhash_client_locked(clp); WARN_ON_ONCE(atomic_read(&clp->cl_refcount)); free_client(clp); spin_unlock(&nn->client_lock); } +static void +destroy_client(struct nfs4_client *clp) +{ + unhash_client(clp); + __destroy_client(clp); +} + static void expire_client(struct nfs4_client *clp) { + unhash_client(clp); nfsd4_client_record_remove(clp); - destroy_client(clp); + __destroy_client(clp); } static void copy_verf(struct nfs4_client *target, nfs4_verifier *source) From 4864af97e02d1ef6aa78963195a64ed2ed7752c3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:03 -0400 Subject: [PATCH 130/167] nfsd: Ensure that the laundromat unhashes the client before releasing locks If we leave the client on the confirmed/unconfirmed tables, and leave the sessions visible on the sessionid_hashtbl, then someone might find them before we've had a chance to destroy them. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cb630db015b0..a374592e7dcf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4125,13 +4125,15 @@ nfs4_laundromat(struct nfsd_net *nn) clp->cl_clientid.cl_id); continue; } - list_move(&clp->cl_lru, &reaplist); + unhash_client_locked(clp); + list_add(&clp->cl_lru, &reaplist); } spin_unlock(&nn->client_lock); list_for_each_safe(pos, next, &reaplist) { clp = list_entry(pos, struct nfs4_client, cl_lru); dprintk("NFSD: purging unused client (clientid %08x)\n", clp->cl_clientid.cl_id); + list_del_init(&clp->cl_lru); expire_client(clp); } spin_lock(&state_lock); From 425510f5c8f1643a01668f48c43c77b8275e9227 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:04 -0400 Subject: [PATCH 131/167] nfsd: Don't require client_lock in free_client The struct nfs_client is supposed to be invisible and unreferenced before it gets here. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a374592e7dcf..256e9032f49c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1416,9 +1416,6 @@ static void __free_session(struct nfsd4_session *ses) static void free_session(struct nfsd4_session *ses) { - struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id); - - lockdep_assert_held(&nn->client_lock); nfsd4_del_conns(ses); nfsd4_put_drc_mem(&ses->se_fchannel); __free_session(ses); @@ -1568,9 +1565,6 @@ err_no_name: static void free_client(struct nfs4_client *clp) { - struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); - - lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { struct nfsd4_session *ses; ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, @@ -1627,7 +1621,6 @@ __destroy_client(struct nfs4_client *clp) struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); INIT_LIST_HEAD(&reaplist); spin_lock(&state_lock); @@ -1655,10 +1648,7 @@ __destroy_client(struct nfs4_client *clp) nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); - spin_lock(&nn->client_lock); - WARN_ON_ONCE(atomic_read(&clp->cl_refcount)); free_client(clp); - spin_unlock(&nn->client_lock); } static void @@ -1862,7 +1852,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, struct sockaddr *sa = svc_addr(rqstp); int ret; struct net *net = SVC_NET(rqstp); - struct nfsd_net *nn = net_generic(net, nfsd_net_id); clp = alloc_client(name); if (clp == NULL) @@ -1870,9 +1859,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred); if (ret) { - spin_lock(&nn->client_lock); free_client(clp); - spin_unlock(&nn->client_lock); return NULL; } INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); From 5cc40fd7b623b306adfe1eba1b509e95890358f5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:05 -0400 Subject: [PATCH 132/167] nfsd: Move create_client() call outside the lock For efficiency reasons, and because we want to use spin locks instead of relying on the client_mutex. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 256e9032f49c..4b42cb95e315 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2181,6 +2181,10 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, return nfserr_encr_alg_unsupp; } + new = create_client(exid->clname, rqstp, &verf); + if (new == NULL) + return nfserr_jukebox; + /* Cases below refer to rfc 5661 section 18.35.4: */ nfs4_lock_state(); conf = find_confirmed_client_by_name(&exid->clname, nn); @@ -2207,7 +2211,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, } /* case 6 */ exid->flags |= EXCHGID4_FLAG_CONFIRMED_R; - new = conf; goto out_copy; } if (!creds_match) { /* case 3 */ @@ -2220,7 +2223,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, } if (verfs_match) { /* case 2 */ conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R; - new = conf; goto out_copy; } /* case 5, client reboot */ @@ -2238,29 +2240,28 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, /* case 1 (normal case) */ out_new: - new = create_client(exid->clname, rqstp, &verf); - if (new == NULL) { - status = nfserr_jukebox; - goto out; - } new->cl_minorversion = cstate->minorversion; new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); gen_clid(new, nn); add_to_unconfirmed(new); + conf = new; + new = NULL; out_copy: - exid->clientid.cl_boot = new->cl_clientid.cl_boot; - exid->clientid.cl_id = new->cl_clientid.cl_id; + exid->clientid.cl_boot = conf->cl_clientid.cl_boot; + exid->clientid.cl_id = conf->cl_clientid.cl_id; - exid->seqid = new->cl_cs_slot.sl_seqid + 1; - nfsd4_set_ex_flags(new, exid); + exid->seqid = conf->cl_cs_slot.sl_seqid + 1; + nfsd4_set_ex_flags(conf, exid); dprintk("nfsd4_exchange_id seqid %d flags %x\n", - new->cl_cs_slot.sl_seqid, new->cl_exchange_flags); + conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags); status = nfs_ok; out: nfs4_unlock_state(); + if (new) + free_client(new); return status; } @@ -2903,6 +2904,9 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + new = create_client(clname, rqstp, &clverifier); + if (new == NULL) + return nfserr_jukebox; /* Cases below refer to rfc 3530 section 14.2.33: */ nfs4_lock_state(); conf = find_confirmed_client_by_name(&clname, nn); @@ -2923,10 +2927,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) expire_client(unconf); - status = nfserr_jukebox; - new = create_client(clname, rqstp, &clverifier); - if (new == NULL) - goto out; if (conf && same_verf(&conf->cl_verifier, &clverifier)) /* case 1: probable callback update */ copy_clid(new, conf); @@ -2938,9 +2938,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; setclid->se_clientid.cl_id = new->cl_clientid.cl_id; memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); + new = NULL; status = nfs_ok; out: nfs4_unlock_state(); + if (new) + free_client(new); return status; } From 3dbacee6e127e7595f83654251cf129cbadc2c26 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:06 -0400 Subject: [PATCH 133/167] nfsd: Protect unconfirmed client creation using client_lock ...instead of relying on the client_mutex. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4b42cb95e315..f149e30475db 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1923,7 +1923,7 @@ add_to_unconfirmed(struct nfs4_client *clp) add_clp_to_name_tree(clp, &nn->unconf_name_tree); idhashval = clientid_hashval(clp->cl_clientid.cl_id); list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]); - renew_client(clp); + renew_client_locked(clp); } static void @@ -1937,7 +1937,7 @@ move_to_confirmed(struct nfs4_client *clp) rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); add_clp_to_name_tree(clp, &nn->conf_name_tree); set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); - renew_client(clp); + renew_client_locked(clp); } static struct nfs4_client * @@ -1950,7 +1950,7 @@ find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions) if (same_clid(&clp->cl_clientid, clid)) { if ((bool)clp->cl_minorversion != sessions) return NULL; - renew_client(clp); + renew_client_locked(clp); return clp; } } @@ -2152,7 +2152,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_exchange_id *exid) { - struct nfs4_client *unconf, *conf, *new; + struct nfs4_client *conf, *new; + struct nfs4_client *unconf = NULL; __be32 status; char addr_str[INET6_ADDRSTRLEN]; nfs4_verifier verf = exid->verifier; @@ -2187,6 +2188,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, /* Cases below refer to rfc 5661 section 18.35.4: */ nfs4_lock_state(); + spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&exid->clname, nn); if (conf) { bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred); @@ -2218,7 +2220,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, status = nfserr_clid_inuse; goto out; } - expire_client(conf); goto out_new; } if (verfs_match) { /* case 2 */ @@ -2226,6 +2227,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, goto out_copy; } /* case 5, client reboot */ + conf = NULL; goto out_new; } @@ -2236,17 +2238,18 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, unconf = find_unconfirmed_client_by_name(&exid->clname, nn); if (unconf) /* case 4, possible retry or client restart */ - expire_client(unconf); + unhash_client_locked(unconf); /* case 1 (normal case) */ out_new: + if (conf) + unhash_client_locked(conf); new->cl_minorversion = cstate->minorversion; new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); gen_clid(new, nn); add_to_unconfirmed(new); - conf = new; - new = NULL; + swap(new, conf); out_copy: exid->clientid.cl_boot = conf->cl_clientid.cl_boot; exid->clientid.cl_id = conf->cl_clientid.cl_id; @@ -2259,9 +2262,12 @@ out_copy: status = nfs_ok; out: + spin_unlock(&nn->client_lock); nfs4_unlock_state(); if (new) - free_client(new); + expire_client(new); + if (unconf) + expire_client(unconf); return status; } @@ -2900,7 +2906,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { struct xdr_netobj clname = setclid->se_name; nfs4_verifier clverifier = setclid->se_verf; - struct nfs4_client *conf, *unconf, *new; + struct nfs4_client *conf, *new; + struct nfs4_client *unconf = NULL; __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); @@ -2909,6 +2916,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return nfserr_jukebox; /* Cases below refer to rfc 3530 section 14.2.33: */ nfs4_lock_state(); + spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); if (conf) { /* case 0: */ @@ -2926,7 +2934,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } unconf = find_unconfirmed_client_by_name(&clname, nn); if (unconf) - expire_client(unconf); + unhash_client_locked(unconf); if (conf && same_verf(&conf->cl_verifier, &clverifier)) /* case 1: probable callback update */ copy_clid(new, conf); @@ -2941,9 +2949,12 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, new = NULL; status = nfs_ok; out: + spin_unlock(&nn->client_lock); nfs4_unlock_state(); if (new) free_client(new); + if (unconf) + expire_client(unconf); return status; } From d20c11d86d8f821a64eac7d6c8f296f06d935f4f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:07 -0400 Subject: [PATCH 134/167] nfsd: Protect session creation and client confirm using client_lock In particular, we want to ensure that the move_to_confirmed() is protected by the nn->client_lock spin lock, so that we can use that when looking up the clientid etc. instead of relying on the client_mutex. Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 65 +++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f149e30475db..52a4677f6f35 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -137,17 +137,6 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) return nfs_ok; } -static __be32 mark_client_expired(struct nfs4_client *clp) -{ - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - __be32 ret; - - spin_lock(&nn->client_lock); - ret = mark_client_expired_locked(clp); - spin_unlock(&nn->client_lock); - return ret; -} - static __be32 get_client_locked(struct nfs4_client *clp) { if (is_client_expired(clp)) @@ -1437,12 +1426,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru new->se_cb_sec = cses->cb_sec; atomic_set(&new->se_ref, 0); idx = hash_sessionid(&new->se_sessionid); - spin_lock(&nn->client_lock); list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]); spin_lock(&clp->cl_lock); list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); - spin_unlock(&nn->client_lock); if (cses->flags & SESSION4_BACK_CHAN) { struct sockaddr *sa = svc_addr(rqstp); @@ -2411,6 +2398,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, { struct sockaddr *sa = svc_addr(rqstp); struct nfs4_client *conf, *unconf; + struct nfs4_client *old = NULL; struct nfsd4_session *new; struct nfsd4_conn *conn; struct nfsd4_clid_slot *cs_slot = NULL; @@ -2437,6 +2425,7 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_session; nfs4_lock_state(); + spin_lock(&nn->client_lock); unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); conf = find_confirmed_client(&cr_ses->clientid, true, nn); WARN_ON_ONCE(conf && unconf); @@ -2455,7 +2444,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, goto out_free_conn; } } else if (unconf) { - struct nfs4_client *old; if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { status = nfserr_clid_inuse; @@ -2473,10 +2461,10 @@ nfsd4_create_session(struct svc_rqst *rqstp, } old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { - status = mark_client_expired(old); + status = mark_client_expired_locked(old); if (status) goto out_free_conn; - expire_client(old); + unhash_client_locked(old); } move_to_confirmed(unconf); conf = unconf; @@ -2492,20 +2480,29 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_RDMA; init_session(rqstp, new, conf, cr_ses); - nfsd4_init_conn(rqstp, conn, new); + nfsd4_get_session_locked(new); memcpy(cr_ses->sessionid.data, new->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); cs_slot->sl_seqid++; cr_ses->seqid = cs_slot->sl_seqid; - /* cache solo and embedded create sessions under the state lock */ + /* cache solo and embedded create sessions under the client_lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); + spin_unlock(&nn->client_lock); + /* init connection and backchannel */ + nfsd4_init_conn(rqstp, conn, new); + nfsd4_put_session(new); nfs4_unlock_state(); + if (old) + expire_client(old); return status; out_free_conn: + spin_unlock(&nn->client_lock); nfs4_unlock_state(); free_conn(conn); + if (old) + expire_client(old); out_free_session: __free_session(new); out_release_drc_mem: @@ -2965,6 +2962,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm) { struct nfs4_client *conf, *unconf; + struct nfs4_client *old = NULL; nfs4_verifier confirm = setclientid_confirm->sc_confirm; clientid_t * clid = &setclientid_confirm->sc_clientid; __be32 status; @@ -2974,6 +2972,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, return nfserr_stale_clientid; nfs4_lock_state(); + spin_lock(&nn->client_lock); conf = find_confirmed_client(clid, false, nn); unconf = find_unconfirmed_client(clid, false, nn); /* @@ -2997,21 +2996,29 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, } status = nfs_ok; if (conf) { /* case 1: callback update */ + old = unconf; + unhash_client_locked(old); nfsd4_change_callback(conf, &unconf->cl_cb_conn); - nfsd4_probe_callback(conf); - expire_client(unconf); } else { /* case 3: normal case; new or rebooted client */ - conf = find_confirmed_client_by_name(&unconf->cl_name, nn); - if (conf) { - status = mark_client_expired(conf); + old = find_confirmed_client_by_name(&unconf->cl_name, nn); + if (old) { + status = mark_client_expired_locked(old); if (status) goto out; - expire_client(conf); + unhash_client_locked(old); } move_to_confirmed(unconf); - nfsd4_probe_callback(unconf); + conf = unconf; } + get_client_locked(conf); + spin_unlock(&nn->client_lock); + nfsd4_probe_callback(conf); + spin_lock(&nn->client_lock); + put_client_renew_locked(conf); out: + spin_unlock(&nn->client_lock); + if (old) + expire_client(old); nfs4_unlock_state(); return status; } @@ -5648,7 +5655,13 @@ nfs4_check_open_reclaim(clientid_t *clid, u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) { - if (mark_client_expired(clp)) + __be32 ret; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + spin_lock(&nn->client_lock); + ret = mark_client_expired_locked(clp); + spin_unlock(&nn->client_lock); + if (ret != nfs_ok) return 0; expire_client(clp); return 1; From 6b10ad193d391c295146f23cbe8523e48df78999 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:08 -0400 Subject: [PATCH 135/167] nfsd: Protect nfsd4_destroy_clientid using client_lock ...instead of relying on the client_mutex. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 52a4677f6f35..68383b09c7dc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2826,22 +2826,23 @@ nfsd4_sequence_done(struct nfsd4_compoundres *resp) __be32 nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc) { - struct nfs4_client *conf, *unconf, *clp; + struct nfs4_client *conf, *unconf; + struct nfs4_client *clp = NULL; __be32 status = 0; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); nfs4_lock_state(); + spin_lock(&nn->client_lock); unconf = find_unconfirmed_client(&dc->clientid, true, nn); conf = find_confirmed_client(&dc->clientid, true, nn); WARN_ON_ONCE(conf && unconf); if (conf) { - clp = conf; - if (client_has_state(conf)) { status = nfserr_clientid_busy; goto out; } + clp = conf; } else if (unconf) clp = unconf; else { @@ -2849,12 +2850,16 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta goto out; } if (!mach_creds_match(clp, rqstp)) { + clp = NULL; status = nfserr_wrong_cred; goto out; } - expire_client(clp); + unhash_client_locked(clp); out: + spin_unlock(&nn->client_lock); nfs4_unlock_state(); + if (clp) + expire_client(clp); return status; } From 3e339f964b74b7223ab128f36f4b2aaf9dc12eb9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:09 -0400 Subject: [PATCH 136/167] nfsd: Ensure lookup_clientid() takes client_lock Ensure that the client lookup is done safely under the client_lock, so we're not relying on the client_mutex. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 68383b09c7dc..f9d077d800ee 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3451,13 +3451,17 @@ static __be32 lookup_clientid(clientid_t *clid, * will be false. */ WARN_ON_ONCE(cstate->session); + spin_lock(&nn->client_lock); found = find_confirmed_client(clid, false, nn); - if (!found) + if (!found) { + spin_unlock(&nn->client_lock); return nfserr_expired; + } + atomic_inc(&found->cl_refcount); + spin_unlock(&nn->client_lock); /* Cache the nfs4_client in cstate! */ cstate->clp = found; - atomic_inc(&found->cl_refcount); return nfs_ok; } From 0a880a28f8add9b134a26f6e058c40199a2ffbc8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:10 -0400 Subject: [PATCH 137/167] nfsd: Add lockdep assertions to document the nfs4_client/session locking Signed-off-by: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f9d077d800ee..e7dfd4e9d942 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -139,6 +139,10 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) static __be32 get_client_locked(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + if (is_client_expired(clp)) return nfserr_expired; atomic_inc(&clp->cl_refcount); @@ -179,6 +183,10 @@ renew_client(struct nfs4_client *clp) static void put_client_renew_locked(struct nfs4_client *clp) { + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + if (!atomic_dec_and_test(&clp->cl_refcount)) return; if (!is_client_expired(clp)) @@ -212,6 +220,9 @@ static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses) static void nfsd4_put_session_locked(struct nfsd4_session *ses) { struct nfs4_client *clp = ses->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses)) free_session(ses); @@ -1453,6 +1464,8 @@ __find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net) int idx; struct nfsd_net *nn = net_generic(net, nfsd_net_id); + lockdep_assert_held(&nn->client_lock); + dump_sessionid(__func__, sessionid); idx = hash_sessionid(sessionid); /* Search in the appropriate list */ @@ -1489,6 +1502,11 @@ out: static void unhash_session(struct nfsd4_session *ses) { + struct nfs4_client *clp = ses->se_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + + lockdep_assert_held(&nn->client_lock); + list_del(&ses->se_hash); spin_lock(&ses->se_client->cl_lock); list_del(&ses->se_perclnt); @@ -1575,6 +1593,8 @@ unhash_client_locked(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct nfsd4_session *ses; + lockdep_assert_held(&nn->client_lock); + /* Mark the client as expired! */ clp->cl_time = 0; /* Make it invisible */ @@ -1906,6 +1926,8 @@ add_to_unconfirmed(struct nfs4_client *clp) unsigned int idhashval; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&nn->client_lock); + clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags); add_clp_to_name_tree(clp, &nn->unconf_name_tree); idhashval = clientid_hashval(clp->cl_clientid.cl_id); @@ -1919,6 +1941,8 @@ move_to_confirmed(struct nfs4_client *clp) unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + lockdep_assert_held(&nn->client_lock); + dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]); rb_erase(&clp->cl_namenode, &nn->unconf_name_tree); @@ -1949,6 +1973,7 @@ find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct list_head *tbl = nn->conf_id_hashtbl; + lockdep_assert_held(&nn->client_lock); return find_client_in_id_table(tbl, clid, sessions); } @@ -1957,6 +1982,7 @@ find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn) { struct list_head *tbl = nn->unconf_id_hashtbl; + lockdep_assert_held(&nn->client_lock); return find_client_in_id_table(tbl, clid, sessions); } @@ -1968,12 +1994,14 @@ static bool clp_used_exchangeid(struct nfs4_client *clp) static struct nfs4_client * find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { + lockdep_assert_held(&nn->client_lock); return find_clp_in_name_tree(name, &nn->conf_name_tree); } static struct nfs4_client * find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn) { + lockdep_assert_held(&nn->client_lock); return find_clp_in_name_tree(name, &nn->unconf_name_tree); } @@ -4907,6 +4935,8 @@ find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner, unsigned int strhashval = ownerstr_hashval(owner); struct nfs4_stateowner *so; + lockdep_assert_held(&clp->cl_lock); + list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval], so_strhash) { if (so->so_is_open_owner) From 217526e7ecc9f6f243e976772e81eab7ab986a4c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:11 -0400 Subject: [PATCH 138/167] nfsd: protect the close_lru list and oo_last_closed_stid with client_lock Currently, it's protected by the client_mutex. Move it so that the list and the fields in the openowner are protected by the client_lock. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e7dfd4e9d942..818480035453 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1122,13 +1122,19 @@ static void unhash_openowner_locked(struct nfs4_openowner *oo) static void release_last_closed_stateid(struct nfs4_openowner *oo) { - struct nfs4_ol_stateid *s = oo->oo_last_closed_stid; + struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net, + nfsd_net_id); + struct nfs4_ol_stateid *s; + spin_lock(&nn->client_lock); + s = oo->oo_last_closed_stid; if (s) { list_del_init(&oo->oo_close_lru); oo->oo_last_closed_stid = NULL; - nfs4_put_stid(&s->st_stid); } + spin_unlock(&nn->client_lock); + if (s) + nfs4_put_stid(&s->st_stid); } static void release_openowner(struct nfs4_openowner *oo) @@ -3265,6 +3271,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, static void move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) { + struct nfs4_ol_stateid *last; struct nfs4_openowner *oo = openowner(s->st_stateowner); struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net, nfsd_net_id); @@ -3287,10 +3294,15 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net) put_nfs4_file(s->st_stid.sc_file); s->st_stid.sc_file = NULL; } - release_last_closed_stateid(oo); + + spin_lock(&nn->client_lock); + last = oo->oo_last_closed_stid; oo->oo_last_closed_stid = s; list_move_tail(&oo->oo_close_lru, &nn->close_lru); oo->oo_time = get_seconds(); + spin_unlock(&nn->client_lock); + if (last) + nfs4_put_stid(&last->st_stid); } /* search file_hashtbl[] for file */ @@ -4148,6 +4160,7 @@ nfs4_laundromat(struct nfsd_net *nn) struct nfs4_client *clp; struct nfs4_openowner *oo; struct nfs4_delegation *dp; + struct nfs4_ol_stateid *stp; struct list_head *pos, *next, reaplist; time_t cutoff = get_seconds() - nn->nfsd4_lease; time_t t, new_timeo = nn->nfsd4_lease; @@ -4201,15 +4214,26 @@ nfs4_laundromat(struct nfsd_net *nn) list_del_init(&dp->dl_recall_lru); revoke_delegation(dp); } - list_for_each_safe(pos, next, &nn->close_lru) { - oo = container_of(pos, struct nfs4_openowner, oo_close_lru); - if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) { + + spin_lock(&nn->client_lock); + while (!list_empty(&nn->close_lru)) { + oo = list_first_entry(&nn->close_lru, struct nfs4_openowner, + oo_close_lru); + if (time_after((unsigned long)oo->oo_time, + (unsigned long)cutoff)) { t = oo->oo_time - cutoff; new_timeo = min(new_timeo, t); break; } - release_last_closed_stateid(oo); + list_del_init(&oo->oo_close_lru); + stp = oo->oo_last_closed_stid; + oo->oo_last_closed_stid = NULL; + spin_unlock(&nn->client_lock); + nfs4_put_stid(&stp->st_stid); + spin_lock(&nn->client_lock); } + spin_unlock(&nn->client_lock); + new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); nfs4_unlock_state(); return new_timeo; From 97403d95e1a7f5b257e90aad1f3284953bc72671 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:12 -0400 Subject: [PATCH 139/167] nfsd: move unhash_client_locked call into mark_client_expired_locked All the callers except for the fault injection code call it directly afterward, and in the fault injection case it won't hurt to do so anyway. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 818480035453..56999cbe84a7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -129,14 +129,6 @@ static bool is_client_expired(struct nfs4_client *clp) return clp->cl_time == 0; } -static __be32 mark_client_expired_locked(struct nfs4_client *clp) -{ - if (atomic_read(&clp->cl_refcount)) - return nfserr_jukebox; - clp->cl_time = 0; - return nfs_ok; -} - static __be32 get_client_locked(struct nfs4_client *clp) { struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); @@ -1628,6 +1620,14 @@ unhash_client(struct nfs4_client *clp) spin_unlock(&nn->client_lock); } +static __be32 mark_client_expired_locked(struct nfs4_client *clp) +{ + if (atomic_read(&clp->cl_refcount)) + return nfserr_jukebox; + unhash_client_locked(clp); + return nfs_ok; +} + static void __destroy_client(struct nfs4_client *clp) { @@ -2498,7 +2498,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, status = mark_client_expired_locked(old); if (status) goto out_free_conn; - unhash_client_locked(old); } move_to_confirmed(unconf); conf = unconf; @@ -3044,7 +3043,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, status = mark_client_expired_locked(old); if (status) goto out; - unhash_client_locked(old); } move_to_confirmed(unconf); conf = unconf; @@ -4183,7 +4181,6 @@ nfs4_laundromat(struct nfsd_net *nn) clp->cl_clientid.cl_id); continue; } - unhash_client_locked(clp); list_add(&clp->cl_lru, &reaplist); } spin_unlock(&nn->client_lock); From 7abea1e8e81ad7ba100bd97c4881027c73e2db3e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:13 -0400 Subject: [PATCH 140/167] nfsd: don't destroy client if mark_client_expired_locked fails If it fails, it means that the client is in use and so destroying it would be bad. Currently, the client_mutex prevents this from happening but once we remove it, we won't be able to do this. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 56999cbe84a7..43e66fc1b90d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2496,8 +2496,10 @@ nfsd4_create_session(struct svc_rqst *rqstp, old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = mark_client_expired_locked(old); - if (status) + if (status) { + old = NULL; goto out_free_conn; + } } move_to_confirmed(unconf); conf = unconf; @@ -3041,8 +3043,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, old = find_confirmed_client_by_name(&unconf->cl_name, nn); if (old) { status = mark_client_expired_locked(old); - if (status) + if (status) { + old = NULL; goto out; + } } move_to_confirmed(unconf); conf = unconf; From fb94d766af0571ab82d5f63e871a73d985d6d6b0 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 5 Aug 2014 21:20:27 +0800 Subject: [PATCH 141/167] NFSD: Put the reference of nfs4_file when freeing stid After testing nfs4 lock, I restart the nfsd service, got messages as, [ 5677.403419] nfsd: last server has exited, flushing export cache [ 5677.463728] ============================================================================= [ 5677.463942] BUG nfsd4_files (Tainted: G B OE): Objects remaining in nfsd4_files on kmem_cache_close() [ 5677.464055] ----------------------------------------------------------------------------- [ 5677.464203] INFO: Slab 0xffffea0000233400 objects=28 used=1 fp=0xffff880008cd3d98 flags=0x3ffc0000004080 [ 5677.464318] CPU: 0 PID: 3772 Comm: rmmod Tainted: G B OE 3.16.0-rc2+ #29 [ 5677.464420] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 [ 5677.464538] 0000000000000000 0000000036af2c9f ffff88000ce97d68 ffffffff816eacfa [ 5677.464643] ffffea0000233400 ffff88000ce97e40 ffffffff811cda44 ffffffff00000020 [ 5677.464774] ffff88000ce97e50 ffff88000ce97e00 656a624f00000008 616d657220737463 [ 5677.464875] Call Trace: [ 5677.464925] [] dump_stack+0x45/0x56 [ 5677.464983] [] slab_err+0xb4/0xe0 [ 5677.465040] [] ? __kmalloc+0x117/0x290 [ 5677.465099] [] ? on_each_cpu_cond+0xac/0xf0 [ 5677.465158] [] ? kmem_cache_close+0x110/0x2e0 [ 5677.465218] [] kmem_cache_close+0x130/0x2e0 [ 5677.465279] [] ? kobject_cleanup+0x91/0x1b0 [ 5677.465338] [] __kmem_cache_shutdown+0xe/0x10 [ 5677.465399] [] kmem_cache_destroy+0x48/0x100 [ 5677.465466] [] nfsd4_free_slabs+0x2d/0x50 [nfsd] [ 5677.465530] [] exit_nfsd+0x34/0x6ad [nfsd] [ 5677.465589] [] SyS_delete_module+0x162/0x200 [ 5677.465649] [] ? do_notify_resume+0x59/0x90 [ 5677.465759] [] system_call_fastpath+0x16/0x1b [ 5677.465822] INFO: Object 0xffff880008cd0000 @offset=0 [ 5677.465882] INFO: Allocated in nfsd4_process_open1+0x61/0x350 [nfsd] age=7599 cpu=0 pid=3253 [ 5677.466115] __slab_alloc+0x3b0/0x4b1 [ 5677.466166] kmem_cache_alloc+0x1e4/0x240 [ 5677.466220] nfsd4_process_open1+0x61/0x350 [nfsd] [ 5677.466276] nfsd4_open+0xee/0x860 [nfsd] [ 5677.466329] nfsd4_proc_compound+0x4d7/0x7f0 [nfsd] [ 5677.466384] nfsd_dispatch+0xbb/0x200 [nfsd] [ 5677.466447] svc_process_common+0x453/0x6f0 [sunrpc] [ 5677.466506] svc_process+0x103/0x170 [sunrpc] [ 5677.466559] nfsd+0x117/0x190 [nfsd] [ 5677.466609] kthread+0xd8/0xf0 [ 5677.466656] ret_from_fork+0x7c/0xb0 [ 5677.466775] kmem_cache_destroy nfsd4_files: Slab cache still has objects [ 5677.466839] CPU: 0 PID: 3772 Comm: rmmod Tainted: G B OE 3.16.0-rc2+ #29 [ 5677.466937] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 [ 5677.467049] 0000000000000000 0000000036af2c9f ffff88000ce97eb0 ffffffff816eacfa [ 5677.467150] ffff880020bb2d00 ffff88000ce97ed0 ffffffff8119bdd9 0000000000000000 [ 5677.467250] ffffffffa06065c0 ffff88000ce97ee0 ffffffffa05ef78d ffff88000ce97ef0 [ 5677.467351] Call Trace: [ 5677.467397] [] dump_stack+0x45/0x56 [ 5677.467454] [] kmem_cache_destroy+0xf9/0x100 [ 5677.467516] [] nfsd4_free_slabs+0x2d/0x50 [nfsd] [ 5677.467579] [] exit_nfsd+0x34/0x6ad [nfsd] [ 5677.467639] [] SyS_delete_module+0x162/0x200 [ 5677.467765] [] ? do_notify_resume+0x59/0x90 [ 5677.467826] [] system_call_fastpath+0x16/0x1b Signed-off-by: Kinglong Mee Reviewed-by: Jeff Layton Fixes: 11b9164adad7 "nfsd: Add a struct nfs4_file field to struct nfs4_stid" Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 43e66fc1b90d..028ae55e5f7a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1037,6 +1037,7 @@ static void free_ol_stateid_reaplist(struct list_head *reaplist) { struct nfs4_ol_stateid *stp; + struct nfs4_file *fp; might_sleep(); @@ -1044,7 +1045,10 @@ free_ol_stateid_reaplist(struct list_head *reaplist) stp = list_first_entry(reaplist, struct nfs4_ol_stateid, st_locks); list_del(&stp->st_locks); + fp = stp->st_stid.sc_file; stp->st_stid.sc_free(&stp->st_stid); + if (fp) + put_nfs4_file(fp); } } From fd699b8a48c0ca36e782cf705794358b3e4b8c25 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:14 -0400 Subject: [PATCH 142/167] nfsd: don't destroy clients that are busy It's possible that we'll have an in-progress call on some of the clients while a rogue EXCHANGE_ID or DESTROY_CLIENTID call comes in. Be sure to try and mark the client expired first, so that the refcount is respected. This will only be a problem once the client_mutex is removed. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 028ae55e5f7a..037bb924ce63 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2267,8 +2267,11 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, /* case 1 (normal case) */ out_new: - if (conf) - unhash_client_locked(conf); + if (conf) { + status = mark_client_expired_locked(conf); + if (status) + goto out; + } new->cl_minorversion = cstate->minorversion; new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED); @@ -2881,6 +2884,9 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta status = nfserr_clientid_busy; goto out; } + status = mark_client_expired_locked(conf); + if (status) + goto out; clp = conf; } else if (unconf) clp = unconf; From 294ac32e99861f6efee548a6b7afb27c32cd502f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:15 -0400 Subject: [PATCH 143/167] nfsd: protect clid and verifier generation with client_lock The clid counter is a global counter currently. Move it to be a per-net property so that it can be properly protected by the nn->client_lock instead of relying on the client_mutex. The verifier generator is also potentially racy if there are two simultaneous callers. Generate the verifier when we generate the clid value, so it's also created under the client_lock. With this, there's no need to keep two counters as they'd always be in sync anyway, so just use the clientid_counter for both. As Trond points out, what would be best is to eventually move this code to use IDR instead of the hash tables. That would also help ensure uniqueness, but that's probably best done as a separate project. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 6 +++--- fs/nfsd/nfs4state.c | 21 +++++++++------------ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index e1f479c162b5..3831ef6e5c75 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -92,9 +92,7 @@ struct nfsd_net { bool nfsd_net_up; bool lockd_up; - /* - * Time of server startup - */ + /* Time of server startup */ struct timeval nfssvc_boot; /* @@ -103,6 +101,8 @@ struct nfsd_net { */ unsigned int max_connections; + u32 clientid_counter; + struct svc_serv *nfsd_serv; }; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 037bb924ce63..2cb559017ac9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1812,28 +1812,26 @@ static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp) return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal); } -static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) -{ - static u32 current_clientid = 1; - - clp->cl_clientid.cl_boot = nn->boot_time; - clp->cl_clientid.cl_id = current_clientid++; -} - -static void gen_confirm(struct nfs4_client *clp) +static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn) { __be32 verf[2]; - static u32 i; /* * This is opaque to client, so no need to byte-swap. Use * __force to keep sparse happy */ verf[0] = (__force __be32)get_seconds(); - verf[1] = (__force __be32)i++; + verf[1] = (__force __be32)nn->clientid_counter; memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data)); } +static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn) +{ + clp->cl_clientid.cl_boot = nn->boot_time; + clp->cl_clientid.cl_id = nn->clientid_counter++; + gen_confirm(clp, nn); +} + static struct nfs4_stid * find_stateid_locked(struct nfs4_client *cl, stateid_t *t) { @@ -1884,7 +1882,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, clear_bit(0, &clp->cl_cb_slot_busy); copy_verf(clp, verf); rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa); - gen_confirm(clp); clp->cl_cb_session = NULL; clp->net = net; return clp; From c96223d3b6b2794b6262d1a31d35694760cff5b2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:16 -0400 Subject: [PATCH 144/167] nfsd: abstract out the get and set routines into the fault injection ops Now that we've added more granular locking in other places, it's time to address the fault injection code. This code is currently quite reliant on the client_mutex for protection. Start to change this by adding a new set of fault injection op vectors. For now they all use the legacy ones. In later patches we'll add new routines that can deal with more granular locking. Also, move some of the printk routines into the callers to make the results of the operations more uniform. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 129 +++++++++++++++++++++++++---------------- 1 file changed, 78 insertions(+), 51 deletions(-) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index f1333fc35b33..b1159900d934 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -17,79 +17,50 @@ struct nfsd_fault_inject_op { char *file; + u64 (*get)(struct nfsd_fault_inject_op *); + u64 (*set_val)(struct nfsd_fault_inject_op *, u64); + u64 (*set_clnt)(struct nfsd_fault_inject_op *, + struct sockaddr_storage *, size_t); u64 (*forget)(struct nfs4_client *, u64); u64 (*print)(struct nfs4_client *, u64); }; -static struct nfsd_fault_inject_op inject_ops[] = { - { - .file = "forget_clients", - .forget = nfsd_forget_client, - .print = nfsd_print_client, - }, - { - .file = "forget_locks", - .forget = nfsd_forget_client_locks, - .print = nfsd_print_client_locks, - }, - { - .file = "forget_openowners", - .forget = nfsd_forget_client_openowners, - .print = nfsd_print_client_openowners, - }, - { - .file = "forget_delegations", - .forget = nfsd_forget_client_delegations, - .print = nfsd_print_client_delegations, - }, - { - .file = "recall_delegations", - .forget = nfsd_recall_client_delegations, - .print = nfsd_print_client_delegations, - }, -}; - -static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op); static struct dentry *debug_dir; -static void nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val) +static u64 nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val) { - u64 count = 0; - - if (val == 0) - printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file); - else - printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val); + u64 count; nfs4_lock_state(); count = nfsd_for_n_state(val, op->forget); nfs4_unlock_state(); - printk(KERN_INFO "NFSD: %s: found %llu", op->file, count); + return count; } -static void nfsd_inject_set_client(struct nfsd_fault_inject_op *op, +static u64 nfsd_inject_set_client(struct nfsd_fault_inject_op *op, struct sockaddr_storage *addr, size_t addr_size) { - char buf[INET6_ADDRSTRLEN]; struct nfs4_client *clp; - u64 count; + u64 count = 0; nfs4_lock_state(); clp = nfsd_find_client(addr, addr_size); - if (clp) { + if (clp) count = op->forget(clp, 0); - rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); - printk(KERN_INFO "NFSD [%s]: Client %s had %llu state object(s)\n", op->file, buf, count); - } nfs4_unlock_state(); + return count; } -static void nfsd_inject_get(struct nfsd_fault_inject_op *op, u64 *val) +static u64 nfsd_inject_get(struct nfsd_fault_inject_op *op) { + u64 count; + nfs4_lock_state(); - *val = nfsd_for_n_state(0, op->print); + count = nfsd_for_n_state(0, op->print); nfs4_unlock_state(); + + return count; } static ssize_t fault_inject_read(struct file *file, char __user *buf, @@ -99,9 +70,10 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf, char read_buf[25]; size_t size; loff_t pos = *ppos; + struct nfsd_fault_inject_op *op = file_inode(file)->i_private; if (!pos) - nfsd_inject_get(file_inode(file)->i_private, &val); + val = op->get(op); size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); return simple_read_from_buffer(buf, len, ppos, read_buf, size); @@ -114,6 +86,7 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf, size_t size = min(sizeof(write_buf) - 1, len); struct net *net = current->nsproxy->net_ns; struct sockaddr_storage sa; + struct nfsd_fault_inject_op *op = file_inode(file)->i_private; u64 val; char *nl; @@ -129,11 +102,20 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf, } size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); - if (size > 0) - nfsd_inject_set_client(file_inode(file)->i_private, &sa, size); - else { + if (size > 0) { + val = op->set_clnt(op, &sa, size); + if (val) + pr_info("NFSD [%s]: Client %s had %llu state object(s)\n", + op->file, write_buf, val); + } else { val = simple_strtoll(write_buf, NULL, 0); - nfsd_inject_set(file_inode(file)->i_private, val); + if (val == 0) + pr_info("NFSD Fault Injection: %s (all)", op->file); + else + pr_info("NFSD Fault Injection: %s (n = %llu)", + op->file, val); + val = op->set_val(op, val); + pr_info("NFSD: %s: found %llu", op->file, val); } return len; /* on success, claim we got the whole input */ } @@ -149,6 +131,51 @@ void nfsd_fault_inject_cleanup(void) debugfs_remove_recursive(debug_dir); } +static struct nfsd_fault_inject_op inject_ops[] = { + { + .file = "forget_clients", + .get = nfsd_inject_get, + .set_val = nfsd_inject_set, + .set_clnt = nfsd_inject_set_client, + .forget = nfsd_forget_client, + .print = nfsd_print_client, + }, + { + .file = "forget_locks", + .get = nfsd_inject_get, + .set_val = nfsd_inject_set, + .set_clnt = nfsd_inject_set_client, + .forget = nfsd_forget_client_locks, + .print = nfsd_print_client_locks, + }, + { + .file = "forget_openowners", + .get = nfsd_inject_get, + .set_val = nfsd_inject_set, + .set_clnt = nfsd_inject_set_client, + .forget = nfsd_forget_client_openowners, + .print = nfsd_print_client_openowners, + }, + { + .file = "forget_delegations", + .get = nfsd_inject_get, + .set_val = nfsd_inject_set, + .set_clnt = nfsd_inject_set_client, + .forget = nfsd_forget_client_delegations, + .print = nfsd_print_client_delegations, + }, + { + .file = "recall_delegations", + .get = nfsd_inject_get, + .set_val = nfsd_inject_set, + .set_clnt = nfsd_inject_set_client, + .forget = nfsd_recall_client_delegations, + .print = nfsd_print_client_delegations, + }, +}; + +#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op)) + int nfsd_fault_inject_init(void) { unsigned int i; From 7ec0e36f1a35c9c241726f6639178fafda654e09 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:17 -0400 Subject: [PATCH 145/167] nfsd: add a forget_clients "get" routine with proper locking Add a new "get" routine for forget_clients that relies on the client_lock instead of the client_mutex. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 3 +-- fs/nfsd/nfs4state.c | 30 ++++++++++++++++++++++-------- fs/nfsd/state.h | 4 +++- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index b1159900d934..a0387fd47e14 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -134,11 +134,10 @@ void nfsd_fault_inject_cleanup(void) static struct nfsd_fault_inject_op inject_ops[] = { { .file = "forget_clients", - .get = nfsd_inject_get, + .get = nfsd_inject_print_clients, .set_val = nfsd_inject_set, .set_clnt = nfsd_inject_set_client, .forget = nfsd_forget_client, - .print = nfsd_print_client, }, { .file = "forget_locks", diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2cb559017ac9..2225e1103742 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5723,6 +5723,28 @@ nfs4_check_open_reclaim(clientid_t *clid, } #ifdef CONFIG_NFSD_FAULT_INJECTION +u64 +nfsd_inject_print_clients(struct nfsd_fault_inject_op *op) +{ + struct nfs4_client *clp; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + char buf[INET6_ADDRSTRLEN]; + + if (!nfsd_netns_ready(nn)) + return 0; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); + pr_info("NFS Client: %s\n", buf); + ++count; + } + spin_unlock(&nn->client_lock); + + return count; +} u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) { @@ -5738,14 +5760,6 @@ u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) return 1; } -u64 nfsd_print_client(struct nfs4_client *clp, u64 num) -{ - char buf[INET6_ADDRSTRLEN]; - rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf)); - printk(KERN_INFO "NFS Client: %s\n", buf); - return 1; -} - static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, const char *type) { diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 0b234500f104..7c7580ea9680 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -471,18 +471,20 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); /* nfs fault injection functions */ #ifdef CONFIG_NFSD_FAULT_INJECTION +struct nfsd_fault_inject_op; + int nfsd_fault_inject_init(void); void nfsd_fault_inject_cleanup(void); u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); +u64 nfsd_inject_print_clients(struct nfsd_fault_inject_op *op); u64 nfsd_forget_client(struct nfs4_client *, u64); u64 nfsd_forget_client_locks(struct nfs4_client*, u64); u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); -u64 nfsd_print_client(struct nfs4_client *, u64); u64 nfsd_print_client_locks(struct nfs4_client *, u64); u64 nfsd_print_client_openowners(struct nfs4_client *, u64); u64 nfsd_print_client_delegations(struct nfs4_client *, u64); From a0926d15271a0139606d54d0521c527746e2815b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:18 -0400 Subject: [PATCH 146/167] nfsd: add a forget_client set_clnt routine ...that relies on the client_lock instead of client_mutex. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 2 +- fs/nfsd/nfs4state.c | 28 ++++++++++++++++++++++++++++ fs/nfsd/state.h | 3 +++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index a0387fd47e14..5f3ead0c72fb 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -136,7 +136,7 @@ static struct nfsd_fault_inject_op inject_ops[] = { .file = "forget_clients", .get = nfsd_inject_print_clients, .set_val = nfsd_inject_set, - .set_clnt = nfsd_inject_set_client, + .set_clnt = nfsd_inject_forget_client, .forget = nfsd_forget_client, }, { diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2225e1103742..c4c28f8f48a1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5760,6 +5760,34 @@ u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) return 1; } +u64 +nfsd_inject_forget_client(struct nfsd_fault_inject_op *op, + struct sockaddr_storage *addr, size_t addr_size) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) { + if (mark_client_expired_locked(clp) == nfs_ok) + ++count; + else + clp = NULL; + } + spin_unlock(&nn->client_lock); + + if (clp) + expire_client(clp); + + return count; +} + static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, const char *type) { diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 7c7580ea9680..77a1903d58ab 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -480,6 +480,9 @@ struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); u64 nfsd_inject_print_clients(struct nfsd_fault_inject_op *op); u64 nfsd_forget_client(struct nfs4_client *, u64); +u64 nfsd_inject_forget_client(struct nfsd_fault_inject_op *, + struct sockaddr_storage *, size_t); + u64 nfsd_forget_client_locks(struct nfs4_client*, u64); u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); From 69fc9edf987ca451831575b1e5450a9fe49fbfe0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:19 -0400 Subject: [PATCH 147/167] nfsd: add nfsd_inject_forget_clients ...which uses the client_lock for protection instead of client_mutex. Also remove nfsd_forget_client as there are no more callers. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 3 +-- fs/nfsd/nfs4state.c | 42 ++++++++++++++++++++++++++++-------------- fs/nfsd/state.h | 2 +- 3 files changed, 30 insertions(+), 17 deletions(-) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 5f3ead0c72fb..76ecdff37ea2 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -135,9 +135,8 @@ static struct nfsd_fault_inject_op inject_ops[] = { { .file = "forget_clients", .get = nfsd_inject_print_clients, - .set_val = nfsd_inject_set, + .set_val = nfsd_inject_forget_clients, .set_clnt = nfsd_inject_forget_client, - .forget = nfsd_forget_client, }, { .file = "forget_locks", diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c4c28f8f48a1..226d89e2c7b2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5746,20 +5746,6 @@ nfsd_inject_print_clients(struct nfsd_fault_inject_op *op) return count; } -u64 nfsd_forget_client(struct nfs4_client *clp, u64 max) -{ - __be32 ret; - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); - - spin_lock(&nn->client_lock); - ret = mark_client_expired_locked(clp); - spin_unlock(&nn->client_lock); - if (ret != nfs_ok) - return 0; - expire_client(clp); - return 1; -} - u64 nfsd_inject_forget_client(struct nfsd_fault_inject_op *op, struct sockaddr_storage *addr, size_t addr_size) @@ -5788,6 +5774,34 @@ nfsd_inject_forget_client(struct nfsd_fault_inject_op *op, return count; } +u64 +nfsd_inject_forget_clients(struct nfsd_fault_inject_op *op, u64 max) +{ + u64 count = 0; + struct nfs4_client *clp, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { + if (mark_client_expired_locked(clp) == nfs_ok) { + list_add(&clp->cl_lru, &reaplist); + if (max != 0 && ++count >= max) + break; + } + } + spin_unlock(&nn->client_lock); + + list_for_each_entry_safe(clp, next, &reaplist, cl_lru) + expire_client(clp); + + return count; +} + static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, const char *type) { diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 77a1903d58ab..eb3b35a74795 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -479,9 +479,9 @@ u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); u64 nfsd_inject_print_clients(struct nfsd_fault_inject_op *op); -u64 nfsd_forget_client(struct nfs4_client *, u64); u64 nfsd_inject_forget_client(struct nfsd_fault_inject_op *, struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_clients(struct nfsd_fault_inject_op *, u64); u64 nfsd_forget_client_locks(struct nfs4_client*, u64); u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); From 3738d50e7f6d04dd58d219cf9111bf927c17c6f2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:20 -0400 Subject: [PATCH 148/167] nfsd: add a list_head arg to nfsd_foreach_client_lock In a later patch, we'll want to collect the locks onto a list for later destruction. If "func" is defined and "collect" is defined, then we'll add the lock stateid to the list. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 226d89e2c7b2..b661294144ba 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5811,6 +5811,7 @@ static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, } static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, + struct list_head *collect, void (*func)(struct nfs4_ol_stateid *)) { struct nfs4_openowner *oop; @@ -5823,8 +5824,12 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, &oop->oo_owner.so_stateids, st_perstateowner) { list_for_each_entry_safe(lst, lst_next, &stp->st_locks, st_locks) { - if (func) + if (func) { func(lst); + if (collect) + list_add(&lst->st_locks, + collect); + } if (++count == max) return count; } @@ -5836,12 +5841,12 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) { - return nfsd_foreach_client_lock(clp, max, release_lock_stateid); + return nfsd_foreach_client_lock(clp, max, NULL, release_lock_stateid); } u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) { - u64 count = nfsd_foreach_client_lock(clp, max, NULL); + u64 count = nfsd_foreach_client_lock(clp, max, NULL, NULL); nfsd_print_count(clp, count, "locked files"); return count; } From 016200c37341b62df14ec642b0b30b4b70bc09af Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:21 -0400 Subject: [PATCH 149/167] nfsd: add more granular locking to forget_locks fault injector ...instead of relying on the client_mutex. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 8 +-- fs/nfsd/nfs4state.c | 132 ++++++++++++++++++++++++++++++++++++++--- fs/nfsd/state.h | 7 ++- 3 files changed, 131 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 76ecdff37ea2..a444d821d2a5 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -140,11 +140,9 @@ static struct nfsd_fault_inject_op inject_ops[] = { }, { .file = "forget_locks", - .get = nfsd_inject_get, - .set_val = nfsd_inject_set, - .set_clnt = nfsd_inject_set_client, - .forget = nfsd_forget_client_locks, - .print = nfsd_print_client_locks, + .get = nfsd_inject_print_locks, + .set_val = nfsd_inject_forget_locks, + .set_clnt = nfsd_inject_forget_client_locks, }, { .file = "forget_openowners", diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b661294144ba..48ae0a66d512 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5723,6 +5723,12 @@ nfs4_check_open_reclaim(clientid_t *clid, } #ifdef CONFIG_NFSD_FAULT_INJECTION +static inline void +put_client(struct nfs4_client *clp) +{ + atomic_dec(&clp->cl_refcount); +} + u64 nfsd_inject_print_clients(struct nfsd_fault_inject_op *op) { @@ -5810,6 +5816,22 @@ static void nfsd_print_count(struct nfs4_client *clp, unsigned int count, printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type); } +static void +nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst, + struct list_head *collect) +{ + struct nfs4_client *clp = lst->st_stid.sc_client; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!collect) + return; + + lockdep_assert_held(&nn->client_lock); + atomic_inc(&clp->cl_refcount); + list_add(&lst->st_locks, collect); +} + static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, struct list_head *collect, void (*func)(struct nfs4_ol_stateid *)) @@ -5819,6 +5841,7 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, struct nfs4_ol_stateid *lst, *lst_next; u64 count = 0; + spin_lock(&clp->cl_lock); list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) { list_for_each_entry_safe(stp, st_next, &oop->oo_owner.so_stateids, st_perstateowner) { @@ -5826,31 +5849,122 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max, &stp->st_locks, st_locks) { if (func) { func(lst); - if (collect) - list_add(&lst->st_locks, - collect); + nfsd_inject_add_lock_to_list(lst, + collect); } - if (++count == max) - return count; + ++count; + /* + * Despite the fact that these functions deal + * with 64-bit integers for "count", we must + * ensure that it doesn't blow up the + * clp->cl_refcount. Throw a warning if we + * start to approach INT_MAX here. + */ + WARN_ON_ONCE(count == (INT_MAX / 2)); + if (count == max) + goto out; } } } +out: + spin_unlock(&clp->cl_lock); return count; } -u64 nfsd_forget_client_locks(struct nfs4_client *clp, u64 max) +static u64 +nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect, + u64 max) { - return nfsd_foreach_client_lock(clp, max, NULL, release_lock_stateid); + return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid); } -u64 nfsd_print_client_locks(struct nfs4_client *clp, u64 max) +static u64 +nfsd_print_client_locks(struct nfs4_client *clp) { - u64 count = nfsd_foreach_client_lock(clp, max, NULL, NULL); + u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL); nfsd_print_count(clp, count, "locked files"); return count; } +u64 +nfsd_inject_print_locks(struct nfsd_fault_inject_op *op) +{ + struct nfs4_client *clp; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return 0; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) + count += nfsd_print_client_locks(clp); + spin_unlock(&nn->client_lock); + + return count; +} + +static void +nfsd_reap_locks(struct list_head *reaplist) +{ + struct nfs4_client *clp; + struct nfs4_ol_stateid *stp, *next; + + list_for_each_entry_safe(stp, next, reaplist, st_locks) { + list_del_init(&stp->st_locks); + clp = stp->st_stid.sc_client; + nfs4_put_stid(&stp->st_stid); + put_client(clp); + } +} + +u64 +nfsd_inject_forget_client_locks(struct nfsd_fault_inject_op *op, + struct sockaddr_storage *addr, size_t addr_size) +{ + unsigned int count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_collect_client_locks(clp, &reaplist, 0); + spin_unlock(&nn->client_lock); + nfsd_reap_locks(&reaplist); + return count; +} + +u64 +nfsd_inject_forget_locks(struct nfsd_fault_inject_op *op, u64 max) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + count += nfsd_collect_client_locks(clp, &reaplist, max - count); + if (max != 0 && count >= max) + break; + } + spin_unlock(&nn->client_lock); + nfsd_reap_locks(&reaplist); + return count; +} + static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *)) { struct nfs4_openowner *oop, *next; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index eb3b35a74795..028947688d57 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -483,12 +483,15 @@ u64 nfsd_inject_forget_client(struct nfsd_fault_inject_op *, struct sockaddr_storage *, size_t); u64 nfsd_inject_forget_clients(struct nfsd_fault_inject_op *, u64); -u64 nfsd_forget_client_locks(struct nfs4_client*, u64); +u64 nfsd_inject_print_locks(struct nfsd_fault_inject_op *); +u64 nfsd_inject_forget_client_locks(struct nfsd_fault_inject_op *, + struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_locks(struct nfsd_fault_inject_op *, u64); + u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); -u64 nfsd_print_client_locks(struct nfs4_client *, u64); u64 nfsd_print_client_openowners(struct nfs4_client *, u64); u64 nfsd_print_client_delegations(struct nfs4_client *, u64); #else /* CONFIG_NFSD_FAULT_INJECTION */ From 82e05efaec9b5b1528771b30c27d060961576827 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:22 -0400 Subject: [PATCH 150/167] nfsd: add more granular locking to forget_openowners fault injector ...instead of relying on the client_mutex. Also, fix up the printk output that is generated when the file is read. It currently says that it's reporting the number of open files, but it's actually reporting the number of openowners. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 8 +-- fs/nfsd/nfs4state.c | 122 ++++++++++++++++++++++++++++++++++++++--- fs/nfsd/state.h | 7 ++- 3 files changed, 122 insertions(+), 15 deletions(-) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index a444d821d2a5..d4472cd19807 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -146,11 +146,9 @@ static struct nfsd_fault_inject_op inject_ops[] = { }, { .file = "forget_openowners", - .get = nfsd_inject_get, - .set_val = nfsd_inject_set, - .set_clnt = nfsd_inject_set_client, - .forget = nfsd_forget_client_openowners, - .print = nfsd_print_client_openowners, + .get = nfsd_inject_print_openowners, + .set_val = nfsd_inject_forget_openowners, + .set_clnt = nfsd_inject_forget_client_openowners, }, { .file = "forget_delegations", diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 48ae0a66d512..20bffa8c976c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5965,30 +5965,136 @@ nfsd_inject_forget_locks(struct nfsd_fault_inject_op *op, u64 max) return count; } -static u64 nfsd_foreach_client_open(struct nfs4_client *clp, u64 max, void (*func)(struct nfs4_openowner *)) +static u64 +nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max, + struct list_head *collect, + void (*func)(struct nfs4_openowner *)) { struct nfs4_openowner *oop, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); u64 count = 0; + lockdep_assert_held(&nn->client_lock); + + spin_lock(&clp->cl_lock); list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) { - if (func) + if (func) { func(oop); - if (++count == max) + if (collect) { + atomic_inc(&clp->cl_refcount); + list_add(&oop->oo_perclient, collect); + } + } + ++count; + /* + * Despite the fact that these functions deal with + * 64-bit integers for "count", we must ensure that + * it doesn't blow up the clp->cl_refcount. Throw a + * warning if we start to approach INT_MAX here. + */ + WARN_ON_ONCE(count == (INT_MAX / 2)); + if (count == max) break; } + spin_unlock(&clp->cl_lock); return count; } -u64 nfsd_forget_client_openowners(struct nfs4_client *clp, u64 max) +static u64 +nfsd_print_client_openowners(struct nfs4_client *clp) { - return nfsd_foreach_client_open(clp, max, release_openowner); + u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL); + + nfsd_print_count(clp, count, "openowners"); + return count; } -u64 nfsd_print_client_openowners(struct nfs4_client *clp, u64 max) +static u64 +nfsd_collect_client_openowners(struct nfs4_client *clp, + struct list_head *collect, u64 max) { - u64 count = nfsd_foreach_client_open(clp, max, NULL); - nfsd_print_count(clp, count, "open files"); + return nfsd_foreach_client_openowner(clp, max, collect, + unhash_openowner_locked); +} + +u64 +nfsd_inject_print_openowners(struct nfsd_fault_inject_op *op) +{ + struct nfs4_client *clp; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return 0; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) + count += nfsd_print_client_openowners(clp); + spin_unlock(&nn->client_lock); + + return count; +} + +static void +nfsd_reap_openowners(struct list_head *reaplist) +{ + struct nfs4_client *clp; + struct nfs4_openowner *oop, *next; + + list_for_each_entry_safe(oop, next, reaplist, oo_perclient) { + list_del_init(&oop->oo_perclient); + clp = oop->oo_owner.so_client; + release_openowner(oop); + put_client(clp); + } +} + +u64 +nfsd_inject_forget_client_openowners(struct nfsd_fault_inject_op *op, + struct sockaddr_storage *addr, size_t addr_size) +{ + unsigned int count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_collect_client_openowners(clp, &reaplist, 0); + spin_unlock(&nn->client_lock); + nfsd_reap_openowners(&reaplist); + return count; +} + +u64 +nfsd_inject_forget_openowners(struct nfsd_fault_inject_op *op, u64 max) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + count += nfsd_collect_client_openowners(clp, &reaplist, + max - count); + if (max != 0 && count >= max) + break; + } + spin_unlock(&nn->client_lock); + nfsd_reap_openowners(&reaplist); return count; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 028947688d57..faaf6af7b28d 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -488,11 +488,14 @@ u64 nfsd_inject_forget_client_locks(struct nfsd_fault_inject_op *, struct sockaddr_storage *, size_t); u64 nfsd_inject_forget_locks(struct nfsd_fault_inject_op *, u64); -u64 nfsd_forget_client_openowners(struct nfs4_client *, u64); +u64 nfsd_inject_print_openowners(struct nfsd_fault_inject_op *); +u64 nfsd_inject_forget_client_openowners(struct nfsd_fault_inject_op *, + struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_openowners(struct nfsd_fault_inject_op *, u64); + u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); -u64 nfsd_print_client_openowners(struct nfs4_client *, u64); u64 nfsd_print_client_delegations(struct nfs4_client *, u64); #else /* CONFIG_NFSD_FAULT_INJECTION */ static inline int nfsd_fault_inject_init(void) { return 0; } From 98d5c7c5bd378aa1a22549200f49de3ed79d4d0a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:23 -0400 Subject: [PATCH 151/167] nfsd: add more granular locking to *_delegations fault injectors ...instead of relying on the client_mutex. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 16 ++- fs/nfsd/nfs4state.c | 217 ++++++++++++++++++++++++++++++++--------- fs/nfsd/state.h | 11 ++- 3 files changed, 183 insertions(+), 61 deletions(-) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index d4472cd19807..2479dba71c3c 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -152,19 +152,15 @@ static struct nfsd_fault_inject_op inject_ops[] = { }, { .file = "forget_delegations", - .get = nfsd_inject_get, - .set_val = nfsd_inject_set, - .set_clnt = nfsd_inject_set_client, - .forget = nfsd_forget_client_delegations, - .print = nfsd_print_client_delegations, + .get = nfsd_inject_print_delegations, + .set_val = nfsd_inject_forget_delegations, + .set_clnt = nfsd_inject_forget_client_delegations, }, { .file = "recall_delegations", - .get = nfsd_inject_get, - .set_val = nfsd_inject_set, - .set_clnt = nfsd_inject_set_client, - .forget = nfsd_recall_client_delegations, - .print = nfsd_print_client_delegations, + .get = nfsd_inject_print_delegations, + .set_val = nfsd_inject_recall_delegations, + .set_clnt = nfsd_inject_recall_client_delegations, }, }; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 20bffa8c976c..d18bbb1e334d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6102,9 +6102,13 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, struct list_head *victims) { struct nfs4_delegation *dp, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); u64 count = 0; - lockdep_assert_held(&state_lock); + lockdep_assert_held(&nn->client_lock); + + spin_lock(&state_lock); list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) { if (victims) { /* @@ -6116,70 +6120,189 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max, if (dp->dl_time != 0) continue; + atomic_inc(&clp->cl_refcount); unhash_delegation_locked(dp); list_add(&dp->dl_recall_lru, victims); } - if (++count == max) + ++count; + /* + * Despite the fact that these functions deal with + * 64-bit integers for "count", we must ensure that + * it doesn't blow up the clp->cl_refcount. Throw a + * warning if we start to approach INT_MAX here. + */ + WARN_ON_ONCE(count == (INT_MAX / 2)); + if (count == max) break; } + spin_unlock(&state_lock); return count; } -u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max) +static u64 +nfsd_print_client_delegations(struct nfs4_client *clp) { - struct nfs4_delegation *dp, *next; - LIST_HEAD(victims); - u64 count; - - spin_lock(&state_lock); - count = nfsd_find_all_delegations(clp, max, &victims); - spin_unlock(&state_lock); - - list_for_each_entry_safe(dp, next, &victims, dl_recall_lru) { - list_del_init(&dp->dl_recall_lru); - revoke_delegation(dp); - } - - return count; -} - -u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max) -{ - struct nfs4_delegation *dp; - LIST_HEAD(victims); - u64 count; - - spin_lock(&state_lock); - count = nfsd_find_all_delegations(clp, max, &victims); - while (!list_empty(&victims)) { - dp = list_first_entry(&victims, struct nfs4_delegation, - dl_recall_lru); - list_del_init(&dp->dl_recall_lru); - dp->dl_time = 0; - nfsd_break_one_deleg(dp); - } - spin_unlock(&state_lock); - - return count; -} - -u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max) -{ - u64 count = 0; - - spin_lock(&state_lock); - count = nfsd_find_all_delegations(clp, max, NULL); - spin_unlock(&state_lock); + u64 count = nfsd_find_all_delegations(clp, 0, NULL); nfsd_print_count(clp, count, "delegations"); return count; } +u64 +nfsd_inject_print_delegations(struct nfsd_fault_inject_op *op) +{ + struct nfs4_client *clp; + u64 count = 0; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return 0; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) + count += nfsd_print_client_delegations(clp); + spin_unlock(&nn->client_lock); + + return count; +} + +static void +nfsd_forget_delegations(struct list_head *reaplist) +{ + struct nfs4_client *clp; + struct nfs4_delegation *dp, *next; + + list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { + list_del_init(&dp->dl_recall_lru); + clp = dp->dl_stid.sc_client; + revoke_delegation(dp); + put_client(clp); + } +} + +u64 +nfsd_inject_forget_client_delegations(struct nfsd_fault_inject_op *op, + struct sockaddr_storage *addr, size_t addr_size) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_find_all_delegations(clp, 0, &reaplist); + spin_unlock(&nn->client_lock); + + nfsd_forget_delegations(&reaplist); + return count; +} + +u64 +nfsd_inject_forget_delegations(struct nfsd_fault_inject_op *op, u64 max) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + count += nfsd_find_all_delegations(clp, max - count, &reaplist); + if (max != 0 && count >= max) + break; + } + spin_unlock(&nn->client_lock); + nfsd_forget_delegations(&reaplist); + return count; +} + +static void +nfsd_recall_delegations(struct list_head *reaplist) +{ + struct nfs4_client *clp; + struct nfs4_delegation *dp, *next; + + list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) { + list_del_init(&dp->dl_recall_lru); + clp = dp->dl_stid.sc_client; + /* + * We skipped all entries that had a zero dl_time before, + * so we can now reset the dl_time back to 0. If a delegation + * break comes in now, then it won't make any difference since + * we're recalling it either way. + */ + spin_lock(&state_lock); + dp->dl_time = 0; + spin_unlock(&state_lock); + nfsd_break_one_deleg(dp); + put_client(clp); + } +} + +u64 +nfsd_inject_recall_client_delegations(struct nfsd_fault_inject_op *op, + struct sockaddr_storage *addr, + size_t addr_size) +{ + u64 count = 0; + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + clp = nfsd_find_client(addr, addr_size); + if (clp) + count = nfsd_find_all_delegations(clp, 0, &reaplist); + spin_unlock(&nn->client_lock); + + nfsd_recall_delegations(&reaplist); + return count; +} + +u64 +nfsd_inject_recall_delegations(struct nfsd_fault_inject_op *op, u64 max) +{ + u64 count = 0; + struct nfs4_client *clp, *next; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + LIST_HEAD(reaplist); + + if (!nfsd_netns_ready(nn)) + return count; + + spin_lock(&nn->client_lock); + list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { + count += nfsd_find_all_delegations(clp, max - count, &reaplist); + if (max != 0 && ++count >= max) + break; + } + spin_unlock(&nn->client_lock); + nfsd_recall_delegations(&reaplist); + return count; +} + u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64)) { struct nfs4_client *clp, *next; u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); if (!nfsd_netns_ready(nn)) return 0; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index faaf6af7b28d..0a35e7bea5f7 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -493,10 +493,13 @@ u64 nfsd_inject_forget_client_openowners(struct nfsd_fault_inject_op *, struct sockaddr_storage *, size_t); u64 nfsd_inject_forget_openowners(struct nfsd_fault_inject_op *, u64); -u64 nfsd_forget_client_delegations(struct nfs4_client *, u64); -u64 nfsd_recall_client_delegations(struct nfs4_client *, u64); - -u64 nfsd_print_client_delegations(struct nfs4_client *, u64); +u64 nfsd_inject_print_delegations(struct nfsd_fault_inject_op *); +u64 nfsd_inject_forget_client_delegations(struct nfsd_fault_inject_op *, + struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_delegations(struct nfsd_fault_inject_op *, u64); +u64 nfsd_inject_recall_client_delegations(struct nfsd_fault_inject_op *, + struct sockaddr_storage *, size_t); +u64 nfsd_inject_recall_delegations(struct nfsd_fault_inject_op *, u64); #else /* CONFIG_NFSD_FAULT_INJECTION */ static inline int nfsd_fault_inject_init(void) { return 0; } static inline void nfsd_fault_inject_cleanup(void) {} From 285abdee5335921b6a41f9719c1fc56c478ac561 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:24 -0400 Subject: [PATCH 152/167] nfsd: remove old fault injection infrastructure Remove the old nfsd_for_n_state function and move nfsd_find_client higher up into the file to get rid of forward declaration. Remove the struct nfsd_fault_inject_op arguments from the operations as they are no longer needed by any of them. Finally, remove the old "standard" get and set routines, which also eliminates the client_mutex from this code. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/fault_inject.c | 51 +++---------------------- fs/nfsd/nfs4state.c | 87 ++++++++++++++++-------------------------- fs/nfsd/state.h | 37 +++++++----------- 3 files changed, 53 insertions(+), 122 deletions(-) diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 2479dba71c3c..c16bf5af6831 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -17,52 +17,13 @@ struct nfsd_fault_inject_op { char *file; - u64 (*get)(struct nfsd_fault_inject_op *); - u64 (*set_val)(struct nfsd_fault_inject_op *, u64); - u64 (*set_clnt)(struct nfsd_fault_inject_op *, - struct sockaddr_storage *, size_t); - u64 (*forget)(struct nfs4_client *, u64); - u64 (*print)(struct nfs4_client *, u64); + u64 (*get)(void); + u64 (*set_val)(u64); + u64 (*set_clnt)(struct sockaddr_storage *, size_t); }; static struct dentry *debug_dir; -static u64 nfsd_inject_set(struct nfsd_fault_inject_op *op, u64 val) -{ - u64 count; - - nfs4_lock_state(); - count = nfsd_for_n_state(val, op->forget); - nfs4_unlock_state(); - return count; -} - -static u64 nfsd_inject_set_client(struct nfsd_fault_inject_op *op, - struct sockaddr_storage *addr, - size_t addr_size) -{ - struct nfs4_client *clp; - u64 count = 0; - - nfs4_lock_state(); - clp = nfsd_find_client(addr, addr_size); - if (clp) - count = op->forget(clp, 0); - nfs4_unlock_state(); - return count; -} - -static u64 nfsd_inject_get(struct nfsd_fault_inject_op *op) -{ - u64 count; - - nfs4_lock_state(); - count = nfsd_for_n_state(0, op->print); - nfs4_unlock_state(); - - return count; -} - static ssize_t fault_inject_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { @@ -73,7 +34,7 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf, struct nfsd_fault_inject_op *op = file_inode(file)->i_private; if (!pos) - val = op->get(op); + val = op->get(); size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val); return simple_read_from_buffer(buf, len, ppos, read_buf, size); @@ -103,7 +64,7 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf, size = rpc_pton(net, write_buf, size, (struct sockaddr *)&sa, sizeof(sa)); if (size > 0) { - val = op->set_clnt(op, &sa, size); + val = op->set_clnt(&sa, size); if (val) pr_info("NFSD [%s]: Client %s had %llu state object(s)\n", op->file, write_buf, val); @@ -114,7 +75,7 @@ static ssize_t fault_inject_write(struct file *file, const char __user *buf, else pr_info("NFSD Fault Injection: %s (n = %llu)", op->file, val); - val = op->set_val(op, val); + val = op->set_val(val); pr_info("NFSD: %s: found %llu", op->file, val); } return len; /* on success, claim we got the whole input */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index d18bbb1e334d..90aa953420b6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5729,8 +5729,25 @@ put_client(struct nfs4_client *clp) atomic_dec(&clp->cl_refcount); } +static struct nfs4_client * +nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) +{ + struct nfs4_client *clp; + struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, + nfsd_net_id); + + if (!nfsd_netns_ready(nn)) + return NULL; + + list_for_each_entry(clp, &nn->client_lru, cl_lru) { + if (memcmp(&clp->cl_addr, addr, addr_size) == 0) + return clp; + } + return NULL; +} + u64 -nfsd_inject_print_clients(struct nfsd_fault_inject_op *op) +nfsd_inject_print_clients(void) { struct nfs4_client *clp; u64 count = 0; @@ -5753,8 +5770,7 @@ nfsd_inject_print_clients(struct nfsd_fault_inject_op *op) } u64 -nfsd_inject_forget_client(struct nfsd_fault_inject_op *op, - struct sockaddr_storage *addr, size_t addr_size) +nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size) { u64 count = 0; struct nfs4_client *clp; @@ -5781,7 +5797,7 @@ nfsd_inject_forget_client(struct nfsd_fault_inject_op *op, } u64 -nfsd_inject_forget_clients(struct nfsd_fault_inject_op *op, u64 max) +nfsd_inject_forget_clients(u64 max) { u64 count = 0; struct nfs4_client *clp, *next; @@ -5888,7 +5904,7 @@ nfsd_print_client_locks(struct nfs4_client *clp) } u64 -nfsd_inject_print_locks(struct nfsd_fault_inject_op *op) +nfsd_inject_print_locks(void) { struct nfs4_client *clp; u64 count = 0; @@ -5921,8 +5937,7 @@ nfsd_reap_locks(struct list_head *reaplist) } u64 -nfsd_inject_forget_client_locks(struct nfsd_fault_inject_op *op, - struct sockaddr_storage *addr, size_t addr_size) +nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size) { unsigned int count = 0; struct nfs4_client *clp; @@ -5943,7 +5958,7 @@ nfsd_inject_forget_client_locks(struct nfsd_fault_inject_op *op, } u64 -nfsd_inject_forget_locks(struct nfsd_fault_inject_op *op, u64 max) +nfsd_inject_forget_locks(u64 max) { u64 count = 0; struct nfs4_client *clp; @@ -6020,7 +6035,7 @@ nfsd_collect_client_openowners(struct nfs4_client *clp, } u64 -nfsd_inject_print_openowners(struct nfsd_fault_inject_op *op) +nfsd_inject_print_openowners(void) { struct nfs4_client *clp; u64 count = 0; @@ -6053,8 +6068,8 @@ nfsd_reap_openowners(struct list_head *reaplist) } u64 -nfsd_inject_forget_client_openowners(struct nfsd_fault_inject_op *op, - struct sockaddr_storage *addr, size_t addr_size) +nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr, + size_t addr_size) { unsigned int count = 0; struct nfs4_client *clp; @@ -6075,7 +6090,7 @@ nfsd_inject_forget_client_openowners(struct nfsd_fault_inject_op *op, } u64 -nfsd_inject_forget_openowners(struct nfsd_fault_inject_op *op, u64 max) +nfsd_inject_forget_openowners(u64 max) { u64 count = 0; struct nfs4_client *clp; @@ -6149,7 +6164,7 @@ nfsd_print_client_delegations(struct nfs4_client *clp) } u64 -nfsd_inject_print_delegations(struct nfsd_fault_inject_op *op) +nfsd_inject_print_delegations(void) { struct nfs4_client *clp; u64 count = 0; @@ -6182,8 +6197,8 @@ nfsd_forget_delegations(struct list_head *reaplist) } u64 -nfsd_inject_forget_client_delegations(struct nfsd_fault_inject_op *op, - struct sockaddr_storage *addr, size_t addr_size) +nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr, + size_t addr_size) { u64 count = 0; struct nfs4_client *clp; @@ -6205,7 +6220,7 @@ nfsd_inject_forget_client_delegations(struct nfsd_fault_inject_op *op, } u64 -nfsd_inject_forget_delegations(struct nfsd_fault_inject_op *op, u64 max) +nfsd_inject_forget_delegations(u64 max) { u64 count = 0; struct nfs4_client *clp; @@ -6251,8 +6266,7 @@ nfsd_recall_delegations(struct list_head *reaplist) } u64 -nfsd_inject_recall_client_delegations(struct nfsd_fault_inject_op *op, - struct sockaddr_storage *addr, +nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr, size_t addr_size) { u64 count = 0; @@ -6275,7 +6289,7 @@ nfsd_inject_recall_client_delegations(struct nfsd_fault_inject_op *op, } u64 -nfsd_inject_recall_delegations(struct nfsd_fault_inject_op *op, u64 max) +nfsd_inject_recall_delegations(u64 max) { u64 count = 0; struct nfs4_client *clp, *next; @@ -6296,41 +6310,6 @@ nfsd_inject_recall_delegations(struct nfsd_fault_inject_op *op, u64 max) nfsd_recall_delegations(&reaplist); return count; } - -u64 nfsd_for_n_state(u64 max, u64 (*func)(struct nfs4_client *, u64)) -{ - struct nfs4_client *clp, *next; - u64 count = 0; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, - nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return 0; - - list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) { - count += func(clp, max - count); - if ((max != 0) && (count >= max)) - break; - } - - return count; -} - -struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size) -{ - struct nfs4_client *clp; - struct nfsd_net *nn = net_generic(current->nsproxy->net_ns, nfsd_net_id); - - if (!nfsd_netns_ready(nn)) - return NULL; - - list_for_each_entry(clp, &nn->client_lru, cl_lru) { - if (memcmp(&clp->cl_addr, addr, addr_size) == 0) - return clp; - } - return NULL; -} - #endif /* CONFIG_NFSD_FAULT_INJECTION */ /* diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 0a35e7bea5f7..a02358f2442c 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -471,35 +471,26 @@ extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); /* nfs fault injection functions */ #ifdef CONFIG_NFSD_FAULT_INJECTION -struct nfsd_fault_inject_op; - int nfsd_fault_inject_init(void); void nfsd_fault_inject_cleanup(void); -u64 nfsd_for_n_state(u64, u64 (*)(struct nfs4_client *, u64)); -struct nfs4_client *nfsd_find_client(struct sockaddr_storage *, size_t); -u64 nfsd_inject_print_clients(struct nfsd_fault_inject_op *op); -u64 nfsd_inject_forget_client(struct nfsd_fault_inject_op *, - struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_clients(struct nfsd_fault_inject_op *, u64); +u64 nfsd_inject_print_clients(void); +u64 nfsd_inject_forget_client(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_clients(u64); -u64 nfsd_inject_print_locks(struct nfsd_fault_inject_op *); -u64 nfsd_inject_forget_client_locks(struct nfsd_fault_inject_op *, - struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_locks(struct nfsd_fault_inject_op *, u64); +u64 nfsd_inject_print_locks(void); +u64 nfsd_inject_forget_client_locks(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_locks(u64); -u64 nfsd_inject_print_openowners(struct nfsd_fault_inject_op *); -u64 nfsd_inject_forget_client_openowners(struct nfsd_fault_inject_op *, - struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_openowners(struct nfsd_fault_inject_op *, u64); +u64 nfsd_inject_print_openowners(void); +u64 nfsd_inject_forget_client_openowners(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_openowners(u64); -u64 nfsd_inject_print_delegations(struct nfsd_fault_inject_op *); -u64 nfsd_inject_forget_client_delegations(struct nfsd_fault_inject_op *, - struct sockaddr_storage *, size_t); -u64 nfsd_inject_forget_delegations(struct nfsd_fault_inject_op *, u64); -u64 nfsd_inject_recall_client_delegations(struct nfsd_fault_inject_op *, - struct sockaddr_storage *, size_t); -u64 nfsd_inject_recall_delegations(struct nfsd_fault_inject_op *, u64); +u64 nfsd_inject_print_delegations(void); +u64 nfsd_inject_forget_client_delegations(struct sockaddr_storage *, size_t); +u64 nfsd_inject_forget_delegations(u64); +u64 nfsd_inject_recall_client_delegations(struct sockaddr_storage *, size_t); +u64 nfsd_inject_recall_delegations(u64); #else /* CONFIG_NFSD_FAULT_INJECTION */ static inline int nfsd_fault_inject_init(void) { return 0; } static inline void nfsd_fault_inject_cleanup(void) {} From c2d1d6a8f096ffbd3bde2490d313d16f35ceaa1b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:25 -0400 Subject: [PATCH 153/167] nfsd: Remove nfs4_lock_state(): nfs4_preprocess_stateid_op() Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 90aa953420b6..3199ab50d63c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4465,13 +4465,11 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) return check_special_stateids(net, current_fh, stateid, flags); - nfs4_lock_state(); - status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s, nn); if (status) - goto unlock_state; + return status; status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate)); if (status) goto out; @@ -4521,8 +4519,6 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, *filpp = file; out: nfs4_put_stid(s); -unlock_state: - nfs4_unlock_state(); return status; } From e7d5dc19ce9800b86dd9e41ff36cc418e9da1fce Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:26 -0400 Subject: [PATCH 154/167] nfsd: Remove nfs4_lock_state(): nfsd4_test_stateid/nfsd4_free_stateid Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3199ab50d63c..712d7e75e7dd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4532,11 +4532,9 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_test_stateid_id *stateid; struct nfs4_client *cl = cstate->session->se_client; - nfs4_lock_state(); list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list) stateid->ts_id_status = nfsd4_validate_stateid(cl, &stateid->ts_id_stateid); - nfs4_unlock_state(); return nfs_ok; } @@ -4552,7 +4550,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfs4_client *cl = cstate->session->se_client; __be32 ret = nfserr_bad_stateid; - nfs4_lock_state(); spin_lock(&cl->cl_lock); s = find_stateid_locked(cl, stateid); if (!s) @@ -4593,7 +4590,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, out_unlock: spin_unlock(&cl->cl_lock); out: - nfs4_unlock_state(); return ret; } From 51f5e78355db2e9b4d5d9093f83be3567178d236 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:27 -0400 Subject: [PATCH 155/167] nfsd: Remove nfs4_lock_state(): nfsd4_release_lockowner Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 712d7e75e7dd..0f9e8426b2dd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5577,11 +5577,9 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", clid->cl_boot, clid->cl_id); - nfs4_lock_state(); - status = lookup_clientid(clid, cstate, nn); if (status) - goto out; + return status; clp = cstate->clp; /* Find the matching lock stateowner */ @@ -5598,7 +5596,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, if (check_for_locks(stp->st_stid.sc_file, lo)) { status = nfserr_locks_held; spin_unlock(&clp->cl_lock); - goto out; + return status; } } @@ -5608,8 +5606,6 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, spin_unlock(&clp->cl_lock); if (lo) release_lockowner(lo); -out: - nfs4_unlock_state(); return status; } From 2dd7f2ad4e34a3487e071ad3ef88b0a1ae7a97f2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:28 -0400 Subject: [PATCH 156/167] nfsd: Remove nfs4_lock_state(): nfsd4_lock/locku/lockt() Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0f9e8426b2dd..2712cd3e77a0 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5205,8 +5205,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } - nfs4_lock_state(); - if (lock->lk_is_new) { if (nfsd4_has_session(cstate)) /* See rfc 5661 18.10.3: given clientid is ignored: */ @@ -5349,7 +5347,6 @@ out: if (open_stp) nfs4_put_stid(&open_stp->st_stid); nfsd4_bump_seqid(cstate, status); - nfs4_unlock_state(); if (file_lock) locks_free_lock(file_lock); if (conflock) @@ -5392,8 +5389,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_lock_length(lockt->lt_offset, lockt->lt_length)) return nfserr_inval; - nfs4_lock_state(); - if (!nfsd4_has_session(cstate)) { status = lookup_clientid(&lockt->lt_clientid, cstate, nn); if (status) @@ -5448,7 +5443,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, out: if (lo) nfs4_put_stateowner(&lo->lo_owner); - nfs4_unlock_state(); if (file_lock) locks_free_lock(file_lock); return status; @@ -5472,8 +5466,6 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (check_lock_length(locku->lu_offset, locku->lu_length)) return nfserr_inval; - nfs4_lock_state(); - status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid, &locku->lu_stateid, NFS4_LOCK_STID, &stp, nn); @@ -5516,7 +5508,6 @@ put_stateid: nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - nfs4_unlock_state(); if (file_lock) locks_free_lock(file_lock); return status; From 36626a2ecfcf23530bf5439679d3a4d040d6cc0d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:29 -0400 Subject: [PATCH 157/167] nfsd: Remove nfs4_lock_state(): nfsd4_open_downgrade + nfsd4_close Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 2712cd3e77a0..cde72d85991d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4713,7 +4713,6 @@ put_stateid: nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - nfs4_unlock_state(); return status; } @@ -4760,7 +4759,6 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__, od->od_deleg_want); - nfs4_lock_state(); status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid, &od->od_stateid, &stp, nn); if (status) @@ -4826,7 +4824,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("NFSD: nfsd4_close on file %pd\n", cstate->current_fh.fh_dentry); - nfs4_lock_state(); status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid, &close->cl_stateid, NFS4_OPEN_STID|NFS4_CLOSED_STID, @@ -4842,7 +4839,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* put reference from nfs4_preprocess_seqid_op */ nfs4_put_stid(&stp->st_stid); out: - nfs4_unlock_state(); return status; } From 084d4d4549d88f7a27f455f9fe8a03fd9842f5ba Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:30 -0400 Subject: [PATCH 158/167] nfsd: Remove nfs4_lock_state(): nfsd4_delegreturn() Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cde72d85991d..733e653e67b8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4855,7 +4855,6 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0))) return status; - nfs4_lock_state(); status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn); if (status) goto out; @@ -4868,8 +4867,6 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, put_stateid: nfs4_put_stid(&dp->dl_stid); out: - nfs4_unlock_state(); - return status; } From 3234975f477f746c22d076ea178a79ea104b2ca7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:31 -0400 Subject: [PATCH 159/167] nfsd: Remove nfs4_lock_state(): nfsd4_open and nfsd4_open_confirm Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 --- fs/nfsd/nfs4state.c | 6 ------ 2 files changed, 9 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 29cf395b694e..5e0dc528a0e8 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -385,8 +385,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (nfsd4_has_session(cstate)) copy_clientid(&open->op_clientid, cstate->session); - nfs4_lock_state(); - /* check seqid for replay. set nfs4_owner */ resp = rqstp->rq_resp; status = nfsd4_process_open1(&resp->cstate, open, nn); @@ -471,7 +469,6 @@ out: } nfsd4_cleanup_open_state(cstate, open, status); nfsd4_bump_seqid(cstate, status); - nfs4_unlock_state(); return status; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 733e653e67b8..5b3452a00cb2 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4007,9 +4007,6 @@ static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open, */ } -/* - * called with nfs4_lock_state() held. - */ __be32 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) { @@ -4690,8 +4687,6 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (status) return status; - nfs4_lock_state(); - status = nfs4_preprocess_seqid_op(cstate, oc->oc_seqid, &oc->oc_req_stateid, NFS4_OPEN_STID, &stp, nn); @@ -4785,7 +4780,6 @@ put_stateid: nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); - nfs4_unlock_state(); return status; } From 3974552dcea94619b0f51c5a52f90671067cbcec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:32 -0400 Subject: [PATCH 160/167] nfsd: Remove nfs4_lock_state(): exchange_id, create/destroy_session() Also destroy_clientid and bind_conn_to_session. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 5b3452a00cb2..e6b27ede3388 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2209,7 +2209,6 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, return nfserr_jukebox; /* Cases below refer to rfc 5661 section 18.35.4: */ - nfs4_lock_state(); spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&exid->clname, nn); if (conf) { @@ -2288,7 +2287,6 @@ out_copy: out: spin_unlock(&nn->client_lock); - nfs4_unlock_state(); if (new) expire_client(new); if (unconf) @@ -2462,7 +2460,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, if (!conn) goto out_free_session; - nfs4_lock_state(); spin_lock(&nn->client_lock); unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn); conf = find_confirmed_client(&cr_ses->clientid, true, nn); @@ -2532,13 +2529,11 @@ nfsd4_create_session(struct svc_rqst *rqstp, /* init connection and backchannel */ nfsd4_init_conn(rqstp, conn, new); nfsd4_put_session(new); - nfs4_unlock_state(); if (old) expire_client(old); return status; out_free_conn: spin_unlock(&nn->client_lock); - nfs4_unlock_state(); free_conn(conn); if (old) expire_client(old); @@ -2594,7 +2589,6 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, if (!nfsd4_last_compound_op(rqstp)) return nfserr_not_only_op; - nfs4_lock_state(); spin_lock(&nn->client_lock); session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status); spin_unlock(&nn->client_lock); @@ -2615,7 +2609,6 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, out: nfsd4_put_session(session); out_no_session: - nfs4_unlock_state(); return status; } @@ -2637,7 +2630,6 @@ nfsd4_destroy_session(struct svc_rqst *r, struct net *net = SVC_NET(r); struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfs4_lock_state(); status = nfserr_not_only_op; if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) { if (!nfsd4_last_compound_op(r)) @@ -2667,7 +2659,6 @@ out_put_session: out_client_lock: spin_unlock(&nn->client_lock); out: - nfs4_unlock_state(); return status; } @@ -2870,7 +2861,6 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta __be32 status = 0; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - nfs4_lock_state(); spin_lock(&nn->client_lock); unconf = find_unconfirmed_client(&dc->clientid, true, nn); conf = find_confirmed_client(&dc->clientid, true, nn); @@ -2899,7 +2889,6 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta unhash_client_locked(clp); out: spin_unlock(&nn->client_lock); - nfs4_unlock_state(); if (clp) expire_client(clp); return status; From cb86fb1428499160eea1be7612127952eec69122 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:33 -0400 Subject: [PATCH 161/167] nfsd: Remove nfs4_lock_state(): setclientid, setclientid_confirm, renew Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e6b27ede3388..a4ae38b61933 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2948,7 +2948,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (new == NULL) return nfserr_jukebox; /* Cases below refer to rfc 3530 section 14.2.33: */ - nfs4_lock_state(); spin_lock(&nn->client_lock); conf = find_confirmed_client_by_name(&clname, nn); if (conf) { @@ -2983,7 +2982,6 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, status = nfs_ok; out: spin_unlock(&nn->client_lock); - nfs4_unlock_state(); if (new) free_client(new); if (unconf) @@ -3006,7 +3004,6 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, if (STALE_CLIENTID(clid, nn)) return nfserr_stale_clientid; - nfs4_lock_state(); spin_lock(&nn->client_lock); conf = find_confirmed_client(clid, false, nn); @@ -3056,7 +3053,6 @@ out: spin_unlock(&nn->client_lock); if (old) expire_client(old); - nfs4_unlock_state(); return status; } @@ -4113,7 +4109,6 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - nfs4_lock_state(); dprintk("process_renew(%08x/%08x): starting\n", clid->cl_boot, clid->cl_id); status = lookup_clientid(clid, cstate, nn); @@ -4126,7 +4121,6 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = nfs_ok; out: - nfs4_unlock_state(); return status; } From 05149dd4dcfb45a57def3f6277bc636110a45b7a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 30 Jul 2014 08:27:34 -0400 Subject: [PATCH 162/167] nfsd: Remove nfs4_lock_state(): reclaim_complete() Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a4ae38b61933..3b9a1a6348c6 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2909,7 +2909,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta return nfs_ok; } - nfs4_lock_state(); status = nfserr_complete_already; if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->session->se_client->cl_flags)) @@ -2929,7 +2928,6 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta status = nfs_ok; nfsd4_client_record_create(cstate->session->se_client); out: - nfs4_unlock_state(); return status; } From dab6ef2415a45b644ae6015ea9687803a1e967fa Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:35 -0400 Subject: [PATCH 163/167] nfsd: remove nfs4_lock_state: nfs4_laundromat Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 3b9a1a6348c6..f5d434c89813 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4152,8 +4152,6 @@ nfs4_laundromat(struct nfsd_net *nn) time_t cutoff = get_seconds() - nn->nfsd4_lease; time_t t, new_timeo = nn->nfsd4_lease; - nfs4_lock_state(); - dprintk("NFSD: laundromat service - starting\n"); nfsd4_end_grace(nn); INIT_LIST_HEAD(&reaplist); @@ -4221,7 +4219,6 @@ nfs4_laundromat(struct nfsd_net *nn) spin_unlock(&nn->client_lock); new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); - nfs4_unlock_state(); return new_timeo; } From 74cf76df0f7fa39f4d9d6c43487774d43b31eb80 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:36 -0400 Subject: [PATCH 164/167] nfsd: remove nfs4_lock_state: nfs4_state_shutdown_net Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f5d434c89813..b900af81791a 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -6406,7 +6406,6 @@ nfs4_state_shutdown_net(struct net *net) cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); - nfs4_lock_state(); INIT_LIST_HEAD(&reaplist); spin_lock(&state_lock); list_for_each_safe(pos, next, &nn->del_recall_lru) { @@ -6423,7 +6422,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - nfs4_unlock_state(); } void From b687f6863eed050aa56fe176e513025907ecd287 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 30 Jul 2014 08:27:37 -0400 Subject: [PATCH 165/167] nfsd: remove the client_mutex and the nfs4_lock/unlock_state wrappers Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 15 --------------- fs/nfsd/state.h | 2 -- 2 files changed, 17 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b900af81791a..2e80a59e7e91 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -75,9 +75,6 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid); /* Locking: */ -/* Currently used for almost all code touching nfsv4 state: */ -static DEFINE_MUTEX(client_mutex); - /* * Currently used for the del_recall_lru and file hash table. In an * effort to decrease the scope of the client_mutex, this spinlock may @@ -97,12 +94,6 @@ static struct kmem_cache *file_slab; static struct kmem_cache *stateid_slab; static struct kmem_cache *deleg_slab; -void -nfs4_lock_state(void) -{ - mutex_lock(&client_mutex); -} - static void free_session(struct nfsd4_session *); static bool is_session_dead(struct nfsd4_session *ses) @@ -118,12 +109,6 @@ static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_b return nfs_ok; } -void -nfs4_unlock_state(void) -{ - mutex_unlock(&client_mutex); -} - static bool is_client_expired(struct nfs4_client *clp) { return clp->cl_time == 0; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index a02358f2442c..32a7c290d027 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -437,8 +437,6 @@ struct nfsd_net; extern __be32 nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, stateid_t *stateid, int flags, struct file **filp); -extern void nfs4_lock_state(void); -extern void nfs4_unlock_state(void); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *); extern void nfs4_release_reclaim(struct nfsd_net *); From 14a571a8ecc69feadce8973bd67090a7d0430123 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 5 Aug 2014 15:13:30 -0400 Subject: [PATCH 166/167] nfsd: add some comments to the nfsd4 object definitions Add some comments that describe what each of these objects is, and how they related to one another. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 8 ++++ fs/nfsd/state.h | 100 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 101 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 3831ef6e5c75..ea6749a32760 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -34,6 +34,14 @@ struct cld_net; struct nfsd4_client_tracking_ops; +/* + * Represents a nfsd "container". With respect to nfsv4 state tracking, the + * fields of interest are the *_id_hashtbls and the *_name_tree. These track + * the nfs4_client objects by either short or long form clientid. + * + * Each nfsd_net runs a nfs4_laundromat workqueue job when necessary to clean + * up expired clients and delegations within the container. + */ struct nfsd_net { struct cld_net *cld_net; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 32a7c290d027..4a89e00d7461 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -72,6 +72,11 @@ struct nfsd4_callback { bool cb_done; }; +/* + * A core object that represents a "common" stateid. These are generally + * embedded within the different (more specific) stateid objects and contain + * fields that are of general use to any stateid. + */ struct nfs4_stid { atomic_t sc_count; #define NFS4_OPEN_STID 1 @@ -89,6 +94,27 @@ struct nfs4_stid { void (*sc_free)(struct nfs4_stid *); }; +/* + * Represents a delegation stateid. The nfs4_client holds references to these + * and they are put when it is being destroyed or when the delegation is + * returned by the client: + * + * o 1 reference as long as a delegation is still in force (taken when it's + * alloc'd, put when it's returned or revoked) + * + * o 1 reference as long as a recall rpc is in progress (taken when the lease + * is broken, put when the rpc exits) + * + * o 1 more ephemeral reference for each nfsd thread currently doing something + * with that delegation without holding the cl_lock + * + * If the server attempts to recall a delegation and the client doesn't do so + * before a timeout, the server may also revoke the delegation. In that case, + * the object will either be destroyed (v4.0) or moved to a per-client list of + * revoked delegations (v4.1+). + * + * This object is a superset of the nfs4_stid. + */ struct nfs4_delegation { struct nfs4_stid dl_stid; /* must be first field */ struct list_head dl_perfile; @@ -195,6 +221,11 @@ struct nfsd4_conn { unsigned char cn_flags; }; +/* + * Representation of a v4.1+ session. These are refcounted in a similar fashion + * to the nfs4_client. References are only taken when the server is actively + * working on the object (primarily during the processing of compounds). + */ struct nfsd4_session { atomic_t se_ref; struct list_head se_hash; /* hash by sessionid */ @@ -224,13 +255,30 @@ struct nfsd4_sessionid { /* * struct nfs4_client - one per client. Clientids live here. - * o Each nfs4_client is hashed by clientid. * - * o Each nfs4_clients is also hashed by name - * (the opaque quantity initially sent by the client to identify itself). + * The initial object created by an NFS client using SETCLIENTID (for NFSv4.0) + * or EXCHANGE_ID (for NFSv4.1+). These objects are refcounted and timestamped. + * Each nfsd_net_ns object contains a set of these and they are tracked via + * short and long form clientid. They are hashed and searched for under the + * per-nfsd_net client_lock spinlock. + * + * References to it are only held during the processing of compounds, and in + * certain other operations. In their "resting state" they have a refcount of + * 0. If they are not renewed within a lease period, they become eligible for + * destruction by the laundromat. + * + * These objects can also be destroyed prematurely by the fault injection code, + * or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates. + * Care is taken *not* to do this however when the objects have an elevated + * refcount. + * + * o Each nfs4_client is hashed by clientid + * + * o Each nfs4_clients is also hashed by name (the opaque quantity initially + * sent by the client to identify itself). * - * o cl_perclient list is used to ensure no dangling stateowner references - * when we expire the nfs4_client + * o cl_perclient list is used to ensure no dangling stateowner references + * when we expire the nfs4_client */ struct nfs4_client { struct list_head cl_idhash; /* hash by cl_clientid.id */ @@ -340,6 +388,12 @@ struct nfs4_stateowner_operations { void (*so_free)(struct nfs4_stateowner *); }; +/* + * A core object that represents either an open or lock owner. The object and + * lock owner objects have one of these embedded within them. Refcounts and + * other fields common to both owner types are contained within these + * structures. + */ struct nfs4_stateowner { struct list_head so_strhash; struct list_head so_stateids; @@ -354,6 +408,12 @@ struct nfs4_stateowner { bool so_is_open_owner; }; +/* + * When a file is opened, the client provides an open state owner opaque string + * that indicates the "owner" of that open. These objects are refcounted. + * References to it are held by each open state associated with it. This object + * is a superset of the nfs4_stateowner struct. + */ struct nfs4_openowner { struct nfs4_stateowner oo_owner; /* must be first field */ struct list_head oo_perclient; @@ -371,6 +431,12 @@ struct nfs4_openowner { unsigned char oo_flags; }; +/* + * Represents a generic "lockowner". Similar to an openowner. References to it + * are held by the lock stateids that are created on its behalf. This object is + * a superset of the nfs4_stateowner struct (or would be if it needed any extra + * fields). + */ struct nfs4_lockowner { struct nfs4_stateowner lo_owner; /* must be first element */ }; @@ -385,7 +451,14 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) return container_of(so, struct nfs4_lockowner, lo_owner); } -/* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */ +/* + * nfs4_file: a file opened by some number of (open) nfs4_stateowners. + * + * These objects are global. nfsd only keeps one instance of a nfs4_file per + * inode (though it may keep multiple file descriptors open per inode). These + * are tracked in the file_hashtbl which is protected by the state_lock + * spinlock. + */ struct nfs4_file { atomic_t fi_ref; spinlock_t fi_lock; @@ -410,7 +483,20 @@ struct nfs4_file { bool fi_had_conflict; }; -/* "ol" stands for "Open or Lock". Better suggestions welcome. */ +/* + * A generic struct representing either a open or lock stateid. The nfs4_client + * holds a reference to each of these objects, and they in turn hold a + * reference to their respective stateowners. The client's reference is + * released in response to a close or unlock (depending on whether it's an open + * or lock stateid) or when the client is being destroyed. + * + * In the case of v4.0 open stateids, these objects are preserved for a little + * while after close in order to handle CLOSE replays. Those are eventually + * reclaimed via a LRU scheme by the laundromat. + * + * This object is a superset of the nfs4_stid. "ol" stands for "Open or Lock". + * Better suggestions welcome. + */ struct nfs4_ol_stateid { struct nfs4_stid st_stid; /* must be first field */ struct list_head st_perfile; From d1e458fe671baf1e60afafc88bda090202a412f1 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 31 Jul 2014 15:26:07 -0500 Subject: [PATCH 167/167] svcrdma: remove rdma_create_qp() failure recovery logic In svc_rdma_accept(), if rdma_create_qp() fails, there is useless logic to try and call rdma_create_qp() again with reduced sge depths. The assumption, I guess, was that perhaps the initial sge depths chosen were too big. However they initial depths are selected based on the rdma device attribute max_sge returned from ib_query_device(). If rdma_create_qp() fails, it would not be because the max_send_sge and max_recv_sge values passed in exceed the device's max. So just remove this code. Signed-off-by: Steve Wise Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 06a5d9235107..374feb44afea 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -943,23 +943,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); if (ret) { - /* - * XXX: This is a hack. We need a xx_request_qp interface - * that will adjust the qp_attr's with a best-effort - * number - */ - qp_attr.cap.max_send_sge -= 2; - qp_attr.cap.max_recv_sge -= 2; - ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, - &qp_attr); - if (ret) { - dprintk("svcrdma: failed to create QP, ret=%d\n", ret); - goto errout; - } - newxprt->sc_max_sge = qp_attr.cap.max_send_sge; - newxprt->sc_max_sge = qp_attr.cap.max_recv_sge; - newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; - newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; + dprintk("svcrdma: failed to create QP, ret=%d\n", ret); + goto errout; } newxprt->sc_qp = newxprt->sc_cm_id->qp;