From fd42ba8223fd698ea4e48407e5d5cc99f6befdb9 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 4 May 2023 16:47:11 -0400 Subject: [PATCH 01/33] NFSv4.2: Clean up: Move the encode_copy_commit() function Move the function to be with the other encode_*() functions, instead of in the middle of the nfs4_xdr_enc_*() section. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index a6df815a140c..dfac3f62c7ed 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -317,6 +317,18 @@ static void encode_copy(struct xdr_stream *xdr, encode_nl4_server(xdr, args->cp_src); } +static void encode_copy_commit(struct xdr_stream *xdr, + const struct nfs42_copy_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr); + p = reserve_space(xdr, 12); + p = xdr_encode_hyper(p, args->dst_pos); + *p = cpu_to_be32(args->count); +} + static void encode_offload_cancel(struct xdr_stream *xdr, const struct nfs42_offload_status_args *args, struct compound_hdr *hdr) @@ -671,18 +683,6 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req, encode_nops(&hdr); } -static void encode_copy_commit(struct xdr_stream *xdr, - const struct nfs42_copy_args *args, - struct compound_hdr *hdr) -{ - __be32 *p; - - encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr); - p = reserve_space(xdr, 12); - p = xdr_encode_hyper(p, args->dst_pos); - *p = cpu_to_be32(args->count); -} - /* * Encode COPY request */ From 04b4c9fb07bfb196378fd449f6125dfeadb9acc5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 4 May 2023 16:47:12 -0400 Subject: [PATCH 02/33] NFSv4.2: Clean up: move decode_*xattr() functions Move them out of the encode_*() section and into the decode_*() section where it belongs. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 326 +++++++++++++++++++++++----------------------- 1 file changed, 162 insertions(+), 164 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index dfac3f62c7ed..09e735bcee09 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -464,20 +464,6 @@ static void encode_setxattr(struct xdr_stream *xdr, xdr_write_pages(xdr, arg->xattr_pages, 0, arg->xattr_len); } -static int decode_setxattr(struct xdr_stream *xdr, - struct nfs4_change_info *cinfo) -{ - int status; - - status = decode_op_hdr(xdr, OP_SETXATTR); - if (status) - goto out; - status = decode_change_info(xdr, cinfo); -out: - return status; -} - - static void encode_getxattr(struct xdr_stream *xdr, const char *name, struct compound_hdr *hdr) { @@ -485,43 +471,6 @@ static void encode_getxattr(struct xdr_stream *xdr, const char *name, encode_string(xdr, strlen(name), name); } -static int decode_getxattr(struct xdr_stream *xdr, - struct nfs42_getxattrres *res, - struct rpc_rqst *req) -{ - int status; - __be32 *p; - u32 len, rdlen; - - status = decode_op_hdr(xdr, OP_GETXATTR); - if (status) - return status; - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; - - len = be32_to_cpup(p); - - /* - * Only check against the page length here. The actual - * requested length may be smaller, but that is only - * checked against after possibly caching a valid reply. - */ - if (len > req->rq_rcv_buf.page_len) - return -ERANGE; - - res->xattr_len = len; - - if (len > 0) { - rdlen = xdr_read_pages(xdr, len); - if (rdlen < len) - return -EIO; - } - - return 0; -} - static void encode_removexattr(struct xdr_stream *xdr, const char *name, struct compound_hdr *hdr) { @@ -529,21 +478,6 @@ static void encode_removexattr(struct xdr_stream *xdr, const char *name, encode_string(xdr, strlen(name), name); } - -static int decode_removexattr(struct xdr_stream *xdr, - struct nfs4_change_info *cinfo) -{ - int status; - - status = decode_op_hdr(xdr, OP_REMOVEXATTR); - if (status) - goto out; - - status = decode_change_info(xdr, cinfo); -out: - return status; -} - static void encode_listxattrs(struct xdr_stream *xdr, const struct nfs42_listxattrsargs *arg, struct compound_hdr *hdr) @@ -565,104 +499,6 @@ static void encode_listxattrs(struct xdr_stream *xdr, *p = cpu_to_be32(arg->count + 8 + 4); } -static int decode_listxattrs(struct xdr_stream *xdr, - struct nfs42_listxattrsres *res) -{ - int status; - __be32 *p; - u32 count, len, ulen; - size_t left, copied; - char *buf; - - status = decode_op_hdr(xdr, OP_LISTXATTRS); - if (status) { - /* - * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL - * should be translated to ERANGE. - */ - if (status == -ETOOSMALL) - status = -ERANGE; - /* - * Special case: for LISTXATTRS, NFS4ERR_NOXATTR - * should be translated to success with zero-length reply. - */ - if (status == -ENODATA) { - res->eof = true; - status = 0; - } - goto out; - } - - p = xdr_inline_decode(xdr, 8); - if (unlikely(!p)) - return -EIO; - - xdr_decode_hyper(p, &res->cookie); - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; - - left = res->xattr_len; - buf = res->xattr_buf; - - count = be32_to_cpup(p); - copied = 0; - - /* - * We have asked for enough room to encode the maximum number - * of possible attribute names, so everything should fit. - * - * But, don't rely on that assumption. Just decode entries - * until they don't fit anymore, just in case the server did - * something odd. - */ - while (count--) { - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; - - len = be32_to_cpup(p); - if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) { - status = -ERANGE; - goto out; - } - - p = xdr_inline_decode(xdr, len); - if (unlikely(!p)) - return -EIO; - - ulen = len + XATTR_USER_PREFIX_LEN + 1; - if (buf) { - if (ulen > left) { - status = -ERANGE; - goto out; - } - - memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); - memcpy(buf + XATTR_USER_PREFIX_LEN, p, len); - - buf[ulen - 1] = 0; - buf += ulen; - left -= ulen; - } - copied += ulen; - } - - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; - - res->eof = be32_to_cpup(p); - res->copied = copied; - -out: - if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX) - status = -E2BIG; - - return status; -} - /* * Encode ALLOCATE request */ @@ -1192,6 +1028,168 @@ static int decode_layouterror(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_LAYOUTERROR); } +static int decode_setxattr(struct xdr_stream *xdr, + struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_SETXATTR); + if (status) + goto out; + status = decode_change_info(xdr, cinfo); +out: + return status; +} + +static int decode_getxattr(struct xdr_stream *xdr, + struct nfs42_getxattrres *res, + struct rpc_rqst *req) +{ + int status; + __be32 *p; + u32 len, rdlen; + + status = decode_op_hdr(xdr, OP_GETXATTR); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + len = be32_to_cpup(p); + + /* + * Only check against the page length here. The actual + * requested length may be smaller, but that is only + * checked against after possibly caching a valid reply. + */ + if (len > req->rq_rcv_buf.page_len) + return -ERANGE; + + res->xattr_len = len; + + if (len > 0) { + rdlen = xdr_read_pages(xdr, len); + if (rdlen < len) + return -EIO; + } + + return 0; +} + +static int decode_removexattr(struct xdr_stream *xdr, + struct nfs4_change_info *cinfo) +{ + int status; + + status = decode_op_hdr(xdr, OP_REMOVEXATTR); + if (status) + goto out; + + status = decode_change_info(xdr, cinfo); +out: + return status; +} + +static int decode_listxattrs(struct xdr_stream *xdr, + struct nfs42_listxattrsres *res) +{ + int status; + __be32 *p; + u32 count, len, ulen; + size_t left, copied; + char *buf; + + status = decode_op_hdr(xdr, OP_LISTXATTRS); + if (status) { + /* + * Special case: for LISTXATTRS, NFS4ERR_TOOSMALL + * should be translated to ERANGE. + */ + if (status == -ETOOSMALL) + status = -ERANGE; + /* + * Special case: for LISTXATTRS, NFS4ERR_NOXATTR + * should be translated to success with zero-length reply. + */ + if (status == -ENODATA) { + res->eof = true; + status = 0; + } + goto out; + } + + p = xdr_inline_decode(xdr, 8); + if (unlikely(!p)) + return -EIO; + + xdr_decode_hyper(p, &res->cookie); + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + left = res->xattr_len; + buf = res->xattr_buf; + + count = be32_to_cpup(p); + copied = 0; + + /* + * We have asked for enough room to encode the maximum number + * of possible attribute names, so everything should fit. + * + * But, don't rely on that assumption. Just decode entries + * until they don't fit anymore, just in case the server did + * something odd. + */ + while (count--) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + len = be32_to_cpup(p); + if (len > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN)) { + status = -ERANGE; + goto out; + } + + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return -EIO; + + ulen = len + XATTR_USER_PREFIX_LEN + 1; + if (buf) { + if (ulen > left) { + status = -ERANGE; + goto out; + } + + memcpy(buf, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); + memcpy(buf + XATTR_USER_PREFIX_LEN, p, len); + + buf[ulen - 1] = 0; + buf += ulen; + left -= ulen; + } + copied += ulen; + } + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + + res->eof = be32_to_cpup(p); + res->copied = copied; + +out: + if (status == -ERANGE && res->xattr_len == XATTR_LIST_MAX) + status = -E2BIG; + + return status; +} + /* * Decode ALLOCATE request */ From 31f1bd8f89f5903583ca544c7700b141062aea9d Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 4 May 2023 16:47:13 -0400 Subject: [PATCH 03/33] NFSv4.2: Clean up: Move nfs4_xdr_enc_*xattr() functions They should be in the nfs4_xdr_enc_*() section, and not at the bottom of the file. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 154 +++++++++++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 71 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 09e735bcee09..51560c7d468d 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -707,6 +707,89 @@ static void nfs4_xdr_enc_layouterror(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode SETXATTR request + */ +static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_setxattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_setxattr(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* + * Encode GETXATTR request + */ +static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, + const void *data) +{ + const struct nfs42_getxattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + uint32_t replen; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + replen = hdr.replen + op_decode_hdr_maxsz + 1; + encode_getxattr(xdr, args->xattr_name, &hdr); + + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len, + replen); + + encode_nops(&hdr); +} + +/* + * Encode LISTXATTR request + */ +static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfs42_listxattrsargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + uint32_t replen; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1; + encode_listxattrs(xdr, args, &hdr); + + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen); + + encode_nops(&hdr); +} + +/* + * Encode REMOVEXATTR request + */ +static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfs42_removexattrargs *args = data; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_removexattr(xdr, args->xattr_name, &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -1480,21 +1563,6 @@ out: } #ifdef CONFIG_NFS_V4_2 -static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, - const void *data) -{ - const struct nfs42_setxattrargs *args = data; - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - encode_setxattr(xdr, args, &hdr); - encode_nops(&hdr); -} - static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, void *data) { @@ -1517,27 +1585,6 @@ out: return status; } -static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, - const void *data) -{ - const struct nfs42_getxattrargs *args = data; - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - uint32_t replen; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + 1; - encode_getxattr(xdr, args->xattr_name, &hdr); - - rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len, - replen); - - encode_nops(&hdr); -} - static int nfs4_xdr_dec_getxattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, void *data) { @@ -1559,26 +1606,6 @@ out: return status; } -static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, - struct xdr_stream *xdr, const void *data) -{ - const struct nfs42_listxattrsargs *args = data; - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - uint32_t replen; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1; - encode_listxattrs(xdr, args, &hdr); - - rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen); - - encode_nops(&hdr); -} - static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, struct xdr_stream *xdr, void *data) { @@ -1602,21 +1629,6 @@ out: return status; } -static void nfs4_xdr_enc_removexattr(struct rpc_rqst *req, - struct xdr_stream *xdr, const void *data) -{ - const struct nfs42_removexattrargs *args = data; - struct compound_hdr hdr = { - .minorversion = nfs4_xdr_minorversion(&args->seq_args), - }; - - encode_compound_hdr(xdr, req, &hdr); - encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->fh, &hdr); - encode_removexattr(xdr, args->xattr_name, &hdr); - encode_nops(&hdr); -} - static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req, struct xdr_stream *xdr, void *data) { From d594097367b836482db291a4bec54f67cfda2374 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 4 May 2023 16:47:14 -0400 Subject: [PATCH 04/33] NFSv4.2: Clean up nfs4_xdr_dec_*xattr() functions I add commends above each function to match the style of the other nfs4_xdr_dec_*() functions. I also remove the unnecessary #ifdef CONFIG_NFS_V4_2 that was added around this code, since we are already in a v4.2-only file. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 51560c7d468d..1d74135715c5 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1562,7 +1562,9 @@ out: return status; } -#ifdef CONFIG_NFS_V4_2 +/* + * Decode SETXATTR request + */ static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, void *data) { @@ -1585,6 +1587,9 @@ out: return status; } +/* + * Decode GETXATTR request + */ static int nfs4_xdr_dec_getxattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr, void *data) { @@ -1606,6 +1611,9 @@ out: return status; } +/* + * Decode LISTXATTR request + */ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp, struct xdr_stream *xdr, void *data) { @@ -1629,6 +1637,9 @@ out: return status; } +/* + * Decode REMOVEXATTR request + */ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req, struct xdr_stream *xdr, void *data) { @@ -1650,5 +1661,4 @@ static int nfs4_xdr_dec_removexattr(struct rpc_rqst *req, out: return status; } -#endif #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ From 64edd55d0f1908220f6a4a53ff40c2b42b1bbfd5 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 4 May 2023 16:47:15 -0400 Subject: [PATCH 05/33] NFSv4.2: Clean up xattr size macros Fold them into the other NFS v4.2 operations in the right spots and adjust spacing to keep the same style. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 96 +++++++++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 49 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 1d74135715c5..215b8700e504 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -7,6 +7,9 @@ #include "nfs42.h" +/* Not limited by NFS itself, limited by the generic xattr code */ +#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX) + #define encode_fallocate_maxsz (encode_stateid_maxsz + \ 2 /* offset */ + \ 2 /* length */) @@ -89,6 +92,18 @@ 2 /* dst offset */ + \ 2 /* count */) #define decode_clone_maxsz (op_decode_hdr_maxsz) +#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \ + nfs4_xattr_name_maxsz) +#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz) +#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \ + 1 + nfs4_xattr_name_maxsz + 1) +#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) +#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1) +#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1) +#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \ + nfs4_xattr_name_maxsz) +#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \ + decode_change_info_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ @@ -186,55 +201,38 @@ decode_putfh_maxsz + \ decode_clone_maxsz + \ decode_getattr_maxsz) - -/* Not limited by NFS itself, limited by the generic xattr code */ -#define nfs4_xattr_name_maxsz XDR_QUADLEN(XATTR_NAME_MAX) - -#define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \ - nfs4_xattr_name_maxsz) -#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz) -#define encode_setxattr_maxsz (op_encode_hdr_maxsz + \ - 1 + nfs4_xattr_name_maxsz + 1) -#define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) -#define encode_listxattrs_maxsz (op_encode_hdr_maxsz + 2 + 1) -#define decode_listxattrs_maxsz (op_decode_hdr_maxsz + 2 + 1 + 1 + 1) -#define encode_removexattr_maxsz (op_encode_hdr_maxsz + 1 + \ - nfs4_xattr_name_maxsz) -#define decode_removexattr_maxsz (op_decode_hdr_maxsz + \ - decode_change_info_maxsz) - -#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_getxattr_maxsz) -#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_getxattr_maxsz) -#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_setxattr_maxsz) -#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_setxattr_maxsz) -#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_listxattrs_maxsz) -#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_listxattrs_maxsz) -#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \ - encode_sequence_maxsz + \ - encode_putfh_maxsz + \ - encode_removexattr_maxsz) -#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \ - decode_sequence_maxsz + \ - decode_putfh_maxsz + \ - decode_removexattr_maxsz) +#define NFS4_enc_getxattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getxattr_maxsz) +#define NFS4_dec_getxattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getxattr_maxsz) +#define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_setxattr_maxsz) +#define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_setxattr_maxsz) +#define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_listxattrs_maxsz) +#define NFS4_dec_listxattrs_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_listxattrs_maxsz) +#define NFS4_enc_removexattr_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_removexattr_maxsz) +#define NFS4_dec_removexattr_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_removexattr_maxsz) /* * These values specify the maximum amount of data that is not From 86e2e1f6d9215bfec88b82c16936ba0f3ddaeb00 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 4 May 2023 16:47:16 -0400 Subject: [PATCH 06/33] NFSv4.2: SETXATTR should update ctime Otherwise, `stat` will report a stale value to users. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 25 +++++++++++++++++++++---- fs/nfs/nfs42xdr.c | 11 ++++++++--- include/linux/nfs_xdr.h | 3 +++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 93e306bf4430..63802d195556 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1190,15 +1190,19 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name, const void *buf, size_t buflen, int flags) { struct nfs_server *server = NFS_SERVER(inode); + __u32 bitmask[NFS_BITMASK_SZ]; struct page *pages[NFS4XATTR_MAXPAGES]; struct nfs42_setxattrargs arg = { .fh = NFS_FH(inode), + .bitmask = bitmask, .xattr_pages = pages, .xattr_len = buflen, .xattr_name = name, .xattr_flags = flags, }; - struct nfs42_setxattrres res; + struct nfs42_setxattrres res = { + .server = server, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETXATTR], .rpc_argp = &arg, @@ -1210,13 +1214,22 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name, if (buflen > server->sxasize) return -ERANGE; + res.fattr = nfs_alloc_fattr(); + if (!res.fattr) + return -ENOMEM; + if (buflen > 0) { np = nfs4_buf_to_pages_noslab(buf, buflen, arg.xattr_pages); - if (np < 0) - return np; + if (np < 0) { + ret = np; + goto out; + } } else np = 0; + nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask, + inode, NFS_INO_INVALID_CHANGE); + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); trace_nfs4_setxattr(inode, name, ret); @@ -1224,9 +1237,13 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name, for (; np > 0; np--) put_page(pages[np - 1]); - if (!ret) + if (!ret) { nfs4_update_changeattr(inode, &res.cinfo, timestamp, 0); + ret = nfs_post_op_update_inode(inode, res.fattr); + } +out: + kfree(res.fattr); return ret; } diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 215b8700e504..95234208dc9e 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -212,11 +212,13 @@ #define NFS4_enc_setxattr_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_setxattr_maxsz) + encode_setxattr_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_setxattr_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_setxattr_maxsz) + decode_setxattr_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_listxattrs_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -720,6 +722,7 @@ static void nfs4_xdr_enc_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_setxattr(xdr, args, &hdr); + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -1579,8 +1582,10 @@ static int nfs4_xdr_dec_setxattr(struct rpc_rqst *req, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status) goto out; - status = decode_setxattr(xdr, &res->cinfo); + if (status) + goto out; + status = decode_getfattr(xdr, res->fattr, res->server); out: return status; } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 29a1b39794bf..12bbb5c63664 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1528,6 +1528,7 @@ struct nfs42_seek_res { struct nfs42_setxattrargs { struct nfs4_sequence_args seq_args; struct nfs_fh *fh; + const u32 *bitmask; const char *xattr_name; u32 xattr_flags; size_t xattr_len; @@ -1537,6 +1538,8 @@ struct nfs42_setxattrargs { struct nfs42_setxattrres { struct nfs4_sequence_res seq_res; struct nfs4_change_info cinfo; + struct nfs_fattr *fattr; + const struct nfs_server *server; }; struct nfs42_getxattrargs { From 4388ce05fa38b17e7d9ddabffcb16ed778ee417c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 11 May 2023 08:06:24 +1000 Subject: [PATCH 07/33] SUNRPC: support abstract unix socket addresses An "abtract" address for an AF_UNIX socket start with a nul and can contain any bytes for the given length, but traditionally doesn't contain other nuls. When reported, the leading nul is replaced by '@'. sunrpc currently rejects connections to these addresses and reports them as an empty string. To provide support for future use of these addresses, allow them for outgoing connections and report them more usefully. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 8 ++++++-- net/sunrpc/xprtsock.c | 9 +++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d2ee56634308..18f70854f528 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -565,8 +565,12 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) servername[0] = '\0'; switch (args->address->sa_family) { case AF_LOCAL: - snprintf(servername, sizeof(servername), "%s", - sun->sun_path); + if (sun->sun_path[0]) + snprintf(servername, sizeof(servername), "%s", + sun->sun_path); + else + snprintf(servername, sizeof(servername), "@%s", + sun->sun_path+1); break; case AF_INET: snprintf(servername, sizeof(servername), "%pI4", diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5f9030b81c9e..515328a8dafe 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -253,7 +253,12 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) switch (sap->sa_family) { case AF_LOCAL: sun = xs_addr_un(xprt); - strscpy(buf, sun->sun_path, sizeof(buf)); + if (sun->sun_path[0]) { + strscpy(buf, sun->sun_path, sizeof(buf)); + } else { + buf[0] = '@'; + strscpy(buf+1, sun->sun_path+1, sizeof(buf)-1); + } xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); break; @@ -2858,7 +2863,7 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) switch (sun->sun_family) { case AF_LOCAL: - if (sun->sun_path[0] != '/') { + if (sun->sun_path[0] != '/' && sun->sun_path[0] != '\0') { dprintk("RPC: bad AF_LOCAL address: %s\n", sun->sun_path); ret = ERR_PTR(-EINVAL); From 626590ea4c93814808a8c4e5ffd2aa0d27f05d4b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 11 May 2023 08:06:24 +1000 Subject: [PATCH 08/33] SUNRPC: attempt to reach rpcbind with an abstract socket name NFS is primarily name-spaced using network namespaces. However it contacts rpcbind (and gss_proxy) using AF_UNIX sockets which are name-spaced using the mount namespaces. This requires a container using NFSv3 (the form that requires rpcbind) to manage both network and mount namespaces, which can seem an unnecessary burden. As NFS is primarily a network service it makes sense to use network namespaces as much as possible, and to prefer to communicate with an rpcbind running in the same network namespace. This can be done, while preserving the benefits of AF_UNIX sockets, by using an abstract socket address. An abstract address has a nul at the start of sun_path, and a length that is exactly the complete size of the sockaddr_un up to the end of the name, NOT including any trailing nul (which is not part of the address). Abstract addresses are local to a network namespace - regular AF_UNIX path names a resolved in the mount namespace ignoring the network namespace. This patch causes rpcb to first try an abstract address before continuing with regular AF_UNIX and then IP addresses. This ensures backwards compatibility. Choosing the name needs some care as the same address will be configured for rpcbind, and needs to be built in to libtirpc for this enhancement to be fully successful. There is no formal standard for choosing abstract addresses. The defacto standard appears to be to use a path name similar to what would be used for a filesystem AF_UNIX address - but with a leading nul. In that case "\0/var/run/rpcbind.sock" seems like the best choice. However at this time /var/run is deprecated in favour of /run, so "\0/run/rpcbind.sock" might be better. Though as we are deliberately moving away from using the filesystem it might seem more sensible to explicitly break the connection and just have "\0rpcbind.socket" using the same name as the systemd unit file.. This patch chooses the second option, which seems least likely to raise objections. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 5a8e6d46809a..5988a5c5ff3f 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -36,6 +36,7 @@ #include "netns.h" #define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" +#define RPCBIND_SOCK_ABSTRACT_NAME "\0/run/rpcbind.sock" #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) @@ -216,21 +217,22 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, sn->rpcb_users = 1; } +/* Evaluate to actual length of the `sockaddr_un' structure. */ +# define SUN_LEN(ptr) (offsetof(struct sockaddr_un, sun_path) \ + + 1 + strlen((ptr)->sun_path + 1)) + /* * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local_unix(struct net *net) +static int rpcb_create_af_local(struct net *net, + const struct sockaddr_un *addr) { - static const struct sockaddr_un rpcb_localaddr_rpcbind = { - .sun_family = AF_LOCAL, - .sun_path = RPCBIND_SOCK_PATHNAME, - }; struct rpc_create_args args = { .net = net, .protocol = XPRT_TRANSPORT_LOCAL, - .address = (struct sockaddr *)&rpcb_localaddr_rpcbind, - .addrsize = sizeof(rpcb_localaddr_rpcbind), + .address = (struct sockaddr *)addr, + .addrsize = SUN_LEN(addr), .servername = "localhost", .program = &rpcb_program, .version = RPCBVERS_2, @@ -269,6 +271,26 @@ out: return result; } +static int rpcb_create_local_abstract(struct net *net) +{ + static const struct sockaddr_un rpcb_localaddr_abstract = { + .sun_family = AF_LOCAL, + .sun_path = RPCBIND_SOCK_ABSTRACT_NAME, + }; + + return rpcb_create_af_local(net, &rpcb_localaddr_abstract); +} + +static int rpcb_create_local_unix(struct net *net) +{ + static const struct sockaddr_un rpcb_localaddr_unix = { + .sun_family = AF_LOCAL, + .sun_path = RPCBIND_SOCK_PATHNAME, + }; + + return rpcb_create_af_local(net, &rpcb_localaddr_unix); +} + /* * Returns zero on success, otherwise a negative errno value * is returned. @@ -332,7 +354,8 @@ int rpcb_create_local(struct net *net) if (rpcb_get_local(net)) goto out; - if (rpcb_create_local_unix(net) != 0) + if (rpcb_create_local_abstract(net) != 0 && + rpcb_create_local_unix(net) != 0) result = rpcb_create_local_net(net); out: From 9e8ab85a7ea74b0698f14df9b828927b6db03bd2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 7 Jun 2023 09:56:43 -0400 Subject: [PATCH 09/33] NFS: Improvements for fs_context-related tracepoints Add some missing observability to the fs_context tracepoints added by commit 33ce83ef0bb0 ("NFS: Replace fs_context-related dprintk() call sites with tracepoints"). Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/fs_context.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 9bcd53d5c7d4..5626d358ee2e 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -791,16 +791,19 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, ctx->mount_server.addrlen = len; break; case Opt_nconnect: + trace_nfs_mount_assign(param->key, param->string); if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS) goto out_of_bounds; ctx->nfs_server.nconnect = result.uint_32; break; case Opt_max_connect: + trace_nfs_mount_assign(param->key, param->string); if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_TRANSPORTS) goto out_of_bounds; ctx->nfs_server.max_connect = result.uint_32; break; case Opt_lookupcache: + trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_lookupcache_all: ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); @@ -817,6 +820,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, } break; case Opt_local_lock: + trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_local_lock_all: ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | @@ -837,6 +841,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, } break; case Opt_write: + trace_nfs_mount_assign(param->key, param->string); switch (result.uint_32) { case Opt_write_lazy: ctx->flags &= From 500053191297fcf73023ff057da6d2aa35f738e0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 7 Jun 2023 09:57:10 -0400 Subject: [PATCH 10/33] SUNRPC: Plumb an API for setting transport layer security Add an initial set of policies along with fields for upper layers to pass the requested policy down to the transport layer. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 2 ++ include/linux/sunrpc/xprt.h | 17 +++++++++++++++++ net/sunrpc/clnt.c | 4 ++++ 3 files changed, 23 insertions(+) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 770ef2cb5775..063692cd2a60 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -58,6 +58,7 @@ struct rpc_clnt { cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ cl_chatty : 1;/* be verbose */ + struct xprtsec_parms cl_xprtsec; /* transport security policy */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ @@ -139,6 +140,7 @@ struct rpc_create_args { struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ const struct cred *cred; unsigned int max_connect; + struct xprtsec_parms xprtsec; }; struct rpc_add_xprt_test { diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b9f59aabee53..9e7f12c240c5 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -129,6 +129,21 @@ struct rpc_rqst { #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len +/* RPC transport layer security policies */ +enum xprtsec_policies { + RPC_XPRTSEC_NONE = 0, + RPC_XPRTSEC_TLS_ANON, + RPC_XPRTSEC_TLS_X509, +}; + +struct xprtsec_parms { + enum xprtsec_policies policy; + + /* authentication material */ + key_serial_t cert_serial; + key_serial_t privkey_serial; +}; + struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); @@ -229,6 +244,7 @@ struct rpc_xprt { */ unsigned long bind_timeout, reestablish_timeout; + struct xprtsec_parms xprtsec; unsigned int connect_cookie; /* A cookie that gets bumped every time the transport is reconnected */ @@ -333,6 +349,7 @@ struct xprt_create { struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ struct rpc_xprt_switch *bc_xps; unsigned int flags; + struct xprtsec_parms xprtsec; }; struct xprt_class { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 18f70854f528..8364b74a0f81 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -385,6 +385,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, if (!clnt) goto out_err; clnt->cl_parent = parent ? : clnt; + clnt->cl_xprtsec = args->xprtsec; err = rpc_alloc_clid(clnt); if (err) @@ -532,6 +533,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .addrlen = args->addrsize, .servername = args->servername, .bc_xprt = args->bc_xprt, + .xprtsec = args->xprtsec, }; char servername[48]; struct rpc_clnt *clnt; @@ -731,6 +733,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt, struct rpc_clnt *parent; int err; + args->xprtsec = clnt->cl_xprtsec; xprt = xprt_create_transport(args); if (IS_ERR(xprt)) return PTR_ERR(xprt); @@ -3050,6 +3053,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, if (!xprtargs->ident) xprtargs->ident = ident; + xprtargs->xprtsec = clnt->cl_xprtsec; xprt = xprt_create_transport(xprtargs); if (IS_ERR(xprt)) { ret = PTR_ERR(xprt); From 97d1c83c3ff40759f64784210da21ca6225d8422 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 7 Jun 2023 09:57:37 -0400 Subject: [PATCH 11/33] SUNRPC: Trace the rpc_create_args Pass the upper layer's rpc_create_args to the rpc_clnt_new() tracepoint so additional parts of the upper layer's request can be recorded. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 52 ++++++++++++++++++++++++++++------- net/sunrpc/clnt.c | 2 +- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 31bc7025cb44..34784f29a63d 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -139,36 +139,68 @@ DEFINE_RPC_CLNT_EVENT(release); DEFINE_RPC_CLNT_EVENT(replace_xprt); DEFINE_RPC_CLNT_EVENT(replace_xprt_err); +TRACE_DEFINE_ENUM(RPC_XPRTSEC_NONE); +TRACE_DEFINE_ENUM(RPC_XPRTSEC_TLS_X509); + +#define rpc_show_xprtsec_policy(policy) \ + __print_symbolic(policy, \ + { RPC_XPRTSEC_NONE, "none" }, \ + { RPC_XPRTSEC_TLS_ANON, "tls-anon" }, \ + { RPC_XPRTSEC_TLS_X509, "tls-x509" }) + +#define rpc_show_create_flags(flags) \ + __print_flags(flags, "|", \ + { RPC_CLNT_CREATE_HARDRTRY, "HARDRTRY" }, \ + { RPC_CLNT_CREATE_AUTOBIND, "AUTOBIND" }, \ + { RPC_CLNT_CREATE_NONPRIVPORT, "NONPRIVPORT" }, \ + { RPC_CLNT_CREATE_NOPING, "NOPING" }, \ + { RPC_CLNT_CREATE_DISCRTRY, "DISCRTRY" }, \ + { RPC_CLNT_CREATE_QUIET, "QUIET" }, \ + { RPC_CLNT_CREATE_INFINITE_SLOTS, \ + "INFINITE_SLOTS" }, \ + { RPC_CLNT_CREATE_NO_IDLE_TIMEOUT, \ + "NO_IDLE_TIMEOUT" }, \ + { RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT, \ + "NO_RETRANS_TIMEOUT" }, \ + { RPC_CLNT_CREATE_SOFTERR, "SOFTERR" }, \ + { RPC_CLNT_CREATE_REUSEPORT, "REUSEPORT" }) + TRACE_EVENT(rpc_clnt_new, TP_PROTO( const struct rpc_clnt *clnt, const struct rpc_xprt *xprt, - const char *program, - const char *server + const struct rpc_create_args *args ), - TP_ARGS(clnt, xprt, program, server), + TP_ARGS(clnt, xprt, args), TP_STRUCT__entry( __field(unsigned int, client_id) + __field(unsigned long, xprtsec) + __field(unsigned long, flags) + __string(program, clnt->cl_program->name) + __string(server, xprt->servername) __string(addr, xprt->address_strings[RPC_DISPLAY_ADDR]) __string(port, xprt->address_strings[RPC_DISPLAY_PORT]) - __string(program, program) - __string(server, server) ), TP_fast_assign( __entry->client_id = clnt->cl_clid; + __entry->xprtsec = args->xprtsec.policy; + __entry->flags = args->flags; + __assign_str(program, clnt->cl_program->name); + __assign_str(server, xprt->servername); __assign_str(addr, xprt->address_strings[RPC_DISPLAY_ADDR]); __assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]); - __assign_str(program, program); - __assign_str(server, server); ), - TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER - " peer=[%s]:%s program=%s server=%s", + TP_printk("client=" SUNRPC_TRACE_CLID_SPECIFIER " peer=[%s]:%s" + " program=%s server=%s xprtsec=%s flags=%s", __entry->client_id, __get_str(addr), __get_str(port), - __get_str(program), __get_str(server)) + __get_str(program), __get_str(server), + rpc_show_xprtsec_policy(__entry->xprtsec), + rpc_show_create_flags(__entry->flags) + ) ); TRACE_EVENT(rpc_clnt_new_err, diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8364b74a0f81..ba34cfcf459a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -435,7 +435,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, if (parent) refcount_inc(&parent->cl_count); - trace_rpc_clnt_new(clnt, xprt, program->name, args->servername); + trace_rpc_clnt_new(clnt, xprt, args); return clnt; out_no_path: From 120726526e5ee3dfac11bd417e266a7e411f3315 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 7 Jun 2023 09:58:04 -0400 Subject: [PATCH 12/33] SUNRPC: Add RPC client support for the RPC_AUTH_TLS auth flavor The new authentication flavor is used only to discover peer support for RPC-over-TLS. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/auth.h | 2 + net/sunrpc/Makefile | 2 +- net/sunrpc/auth.c | 2 +- net/sunrpc/auth_tls.c | 175 ++++++++++++++++++++++++++++++++++++ net/sunrpc/clnt.c | 3 + 5 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 net/sunrpc/auth_tls.c diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 3e6ce288a7fc..61e58327b1aa 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -120,6 +120,7 @@ struct rpc_authops { struct rpcsec_gss_info *); int (*key_timeout)(struct rpc_auth *, struct rpc_cred *); + int (*ping)(struct rpc_clnt *clnt); }; struct rpc_credops { @@ -144,6 +145,7 @@ struct rpc_credops { extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authnull_ops; +extern const struct rpc_authops authtls_ops; int __init rpc_init_authunix(void); int __init rpcauth_init_module(void); diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 1c8de397d6ad..f89c10fe7e6a 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ - auth.o auth_null.o auth_unix.o \ + auth.o auth_null.o auth_tls.o auth_unix.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o sysfs.o \ diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index fb75a883503f..2f16f9d17966 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -32,7 +32,7 @@ static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS; static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = { [RPC_AUTH_NULL] = (const struct rpc_authops __force __rcu *)&authnull_ops, [RPC_AUTH_UNIX] = (const struct rpc_authops __force __rcu *)&authunix_ops, - NULL, /* others can be loadable modules */ + [RPC_AUTH_TLS] = (const struct rpc_authops __force __rcu *)&authtls_ops, }; static LIST_HEAD(cred_unused); diff --git a/net/sunrpc/auth_tls.c b/net/sunrpc/auth_tls.c new file mode 100644 index 000000000000..de7678f8a23d --- /dev/null +++ b/net/sunrpc/auth_tls.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, 2022 Oracle. All rights reserved. + * + * The AUTH_TLS credential is used only to probe a remote peer + * for RPC-over-TLS support. + */ + +#include +#include +#include + +static const char *starttls_token = "STARTTLS"; +static const size_t starttls_len = 8; + +static struct rpc_auth tls_auth; +static struct rpc_cred tls_cred; + +static void tls_encode_probe(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + const void *obj) +{ +} + +static int tls_decode_probe(struct rpc_rqst *rqstp, struct xdr_stream *xdr, + void *obj) +{ + return 0; +} + +static const struct rpc_procinfo rpcproc_tls_probe = { + .p_encode = tls_encode_probe, + .p_decode = tls_decode_probe, +}; + +static void rpc_tls_probe_call_prepare(struct rpc_task *task, void *data) +{ + task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT; + rpc_call_start(task); +} + +static void rpc_tls_probe_call_done(struct rpc_task *task, void *data) +{ +} + +static const struct rpc_call_ops rpc_tls_probe_ops = { + .rpc_call_prepare = rpc_tls_probe_call_prepare, + .rpc_call_done = rpc_tls_probe_call_done, +}; + +static int tls_probe(struct rpc_clnt *clnt) +{ + struct rpc_message msg = { + .rpc_proc = &rpcproc_tls_probe, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clnt, + .rpc_message = &msg, + .rpc_op_cred = &tls_cred, + .callback_ops = &rpc_tls_probe_ops, + .flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN, + }; + struct rpc_task *task; + int status; + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = task->tk_status; + rpc_put_task(task); + return status; +} + +static struct rpc_auth *tls_create(const struct rpc_auth_create_args *args, + struct rpc_clnt *clnt) +{ + refcount_inc(&tls_auth.au_count); + return &tls_auth; +} + +static void tls_destroy(struct rpc_auth *auth) +{ +} + +static struct rpc_cred *tls_lookup_cred(struct rpc_auth *auth, + struct auth_cred *acred, int flags) +{ + return get_rpccred(&tls_cred); +} + +static void tls_destroy_cred(struct rpc_cred *cred) +{ +} + +static int tls_match(struct auth_cred *acred, struct rpc_cred *cred, int taskflags) +{ + return 1; +} + +static int tls_marshal(struct rpc_task *task, struct xdr_stream *xdr) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4 * XDR_UNIT); + if (!p) + return -EMSGSIZE; + /* Credential */ + *p++ = rpc_auth_tls; + *p++ = xdr_zero; + /* Verifier */ + *p++ = rpc_auth_null; + *p = xdr_zero; + return 0; +} + +static int tls_refresh(struct rpc_task *task) +{ + set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_rqstp->rq_cred->cr_flags); + return 0; +} + +static int tls_validate(struct rpc_task *task, struct xdr_stream *xdr) +{ + __be32 *p; + void *str; + + p = xdr_inline_decode(xdr, XDR_UNIT); + if (!p) + return -EIO; + if (*p != rpc_auth_null) + return -EIO; + if (xdr_stream_decode_opaque_inline(xdr, &str, starttls_len) != starttls_len) + return -EIO; + if (memcmp(str, starttls_token, starttls_len)) + return -EIO; + return 0; +} + +const struct rpc_authops authtls_ops = { + .owner = THIS_MODULE, + .au_flavor = RPC_AUTH_TLS, + .au_name = "NULL", + .create = tls_create, + .destroy = tls_destroy, + .lookup_cred = tls_lookup_cred, + .ping = tls_probe, +}; + +static struct rpc_auth tls_auth = { + .au_cslack = NUL_CALLSLACK, + .au_rslack = NUL_REPLYSLACK, + .au_verfsize = NUL_REPLYSLACK, + .au_ralign = NUL_REPLYSLACK, + .au_ops = &authtls_ops, + .au_flavor = RPC_AUTH_TLS, + .au_count = REFCOUNT_INIT(1), +}; + +static const struct rpc_credops tls_credops = { + .cr_name = "AUTH_TLS", + .crdestroy = tls_destroy_cred, + .crmatch = tls_match, + .crmarshal = tls_marshal, + .crwrap_req = rpcauth_wrap_req_encode, + .crrefresh = tls_refresh, + .crvalidate = tls_validate, + .crunwrap_resp = rpcauth_unwrap_resp_decode, +}; + +static struct rpc_cred tls_cred = { + .cr_lru = LIST_HEAD_INIT(tls_cred.cr_lru), + .cr_auth = &tls_auth, + .cr_ops = &tls_credops, + .cr_count = REFCOUNT_INIT(2), + .cr_flags = 1UL << RPCAUTH_CRED_UPTODATE, +}; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ba34cfcf459a..640c76ab2f1a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2833,6 +2833,9 @@ static int rpc_ping(struct rpc_clnt *clnt) struct rpc_task *task; int status; + if (clnt->cl_auth->au_ops->ping) + return clnt->cl_auth->au_ops->ping(clnt); + task = rpc_call_null_helper(clnt, NULL, NULL, 0, NULL, NULL); if (IS_ERR(task)) return PTR_ERR(task); From 0d3ca07ffda9291843bb0b4b39dea43535bb1f13 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 7 Jun 2023 09:58:22 -0400 Subject: [PATCH 13/33] SUNRPC: Ignore data_ready callbacks during TLS handshakes The RPC header parser doesn't recognize TLS handshake traffic, so it will close the connection prematurely with an error. To avoid that, shunt the transport's data_ready callback when there is a TLS handshake in progress. The XPRT_SOCK_IGNORE_RECV flag will be toggled by code added in a subsequent patch. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 1 + net/sunrpc/xprtsock.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 38284f25eddf..daef030f4848 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -90,5 +90,6 @@ struct sock_xprt { #define XPRT_SOCK_WAKE_DISCONNECT (7) #define XPRT_SOCK_CONNECT_SENT (8) #define XPRT_SOCK_NOSPACE (9) +#define XPRT_SOCK_IGNORE_RECV (10) #endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 515328a8dafe..0b2739d6e1a0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -700,6 +700,8 @@ static void xs_poll_check_readable(struct sock_xprt *transport) { clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); + if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state)) + return; if (!xs_poll_socket_readable(transport)) return; if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) @@ -1385,6 +1387,10 @@ static void xs_data_ready(struct sock *sk) trace_xs_data_ready(xprt); transport->old_data_ready(sk); + + if (test_bit(XPRT_SOCK_IGNORE_RECV, &transport->sock_state)) + return; + /* Any data means we had a useful conversation, so * then we don't need to delay the next reconnect */ From dea034b963c8901bdcc3d3880c04f0d75c95112f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 7 Jun 2023 09:58:49 -0400 Subject: [PATCH 14/33] SUNRPC: Capture CMSG metadata on client-side receive kTLS sockets use CMSG to report decryption errors and the need for session re-keying. For RPC-with-TLS, an "application data" message contains a ULP payload, and that is passed along to the RPC client. An "alert" message triggers connection reset. Everything else is discarded. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 49 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0b2739d6e1a0..7e2f962d1f66 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -47,6 +47,8 @@ #include #include #include +#include + #include #include #include @@ -347,13 +349,56 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) return want; } +static int +xs_sock_process_cmsg(struct socket *sock, struct msghdr *msg, + struct cmsghdr *cmsg, int ret) +{ + if (cmsg->cmsg_level == SOL_TLS && + cmsg->cmsg_type == TLS_GET_RECORD_TYPE) { + u8 content_type = *((u8 *)CMSG_DATA(cmsg)); + + switch (content_type) { + case TLS_RECORD_TYPE_DATA: + /* TLS sets EOR at the end of each application data + * record, even though there might be more frames + * waiting to be decrypted. + */ + msg->msg_flags &= ~MSG_EOR; + break; + case TLS_RECORD_TYPE_ALERT: + ret = -ENOTCONN; + break; + default: + ret = -EAGAIN; + } + } + return ret; +} + +static int +xs_sock_recv_cmsg(struct socket *sock, struct msghdr *msg, int flags) +{ + union { + struct cmsghdr cmsg; + u8 buf[CMSG_SPACE(sizeof(u8))]; + } u; + int ret; + + msg->msg_control = &u; + msg->msg_controllen = sizeof(u); + ret = sock_recvmsg(sock, msg, flags); + if (msg->msg_controllen != sizeof(u)) + ret = xs_sock_process_cmsg(sock, msg, &u.cmsg, ret); + return ret; +} + static ssize_t xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek) { ssize_t ret; if (seek != 0) iov_iter_advance(&msg->msg_iter, seek); - ret = sock_recvmsg(sock, msg, flags); + ret = xs_sock_recv_cmsg(sock, msg, flags); return ret > 0 ? ret + seek : ret; } @@ -379,7 +424,7 @@ xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, size_t count) { iov_iter_discard(&msg->msg_iter, ITER_DEST, count); - return sock_recvmsg(sock, msg, flags); + return xs_sock_recv_cmsg(sock, msg, flags); } #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE From 75eb6af7acdf566c68d61e98e67ee2f235201c02 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 7 Jun 2023 09:59:15 -0400 Subject: [PATCH 15/33] SUNRPC: Add a TCP-with-TLS RPC transport class Use the new TLS handshake API to enable the SunRPC client code to request a TLS handshake. This implements support for RFC 9289, only on TCP sockets. Upper layers such as NFS use RPC-with-TLS to protect in-transit traffic. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 + include/linux/sunrpc/xprtsock.h | 2 + include/trace/events/sunrpc.h | 44 ++++ net/sunrpc/sysfs.c | 1 + net/sunrpc/xprtsock.c | 370 ++++++++++++++++++++++++++++++++ 5 files changed, 418 insertions(+) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9e7f12c240c5..b52411bcfe4e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -200,6 +200,7 @@ enum xprt_transports { XPRT_TRANSPORT_RDMA = 256, XPRT_TRANSPORT_BC_RDMA = XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_LOCAL = 257, + XPRT_TRANSPORT_TCP_TLS = 258, }; struct rpc_sysfs_xprt; diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index daef030f4848..700a1e6c047c 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -57,9 +57,11 @@ struct sock_xprt { struct work_struct error_worker; struct work_struct recv_worker; struct mutex recv_mutex; + struct completion handshake_done; struct sockaddr_storage srcaddr; unsigned short srcport; int xprt_err; + struct rpc_clnt *clnt; /* * UDP socket buffer size parameters diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 34784f29a63d..7cd4bbd6904c 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1525,6 +1525,50 @@ TRACE_EVENT(rpcb_unregister, ) ); +/** + ** RPC-over-TLS tracepoints + **/ + +DECLARE_EVENT_CLASS(rpc_tls_class, + TP_PROTO( + const struct rpc_clnt *clnt, + const struct rpc_xprt *xprt + ), + + TP_ARGS(clnt, xprt), + + TP_STRUCT__entry( + __field(unsigned long, requested_policy) + __field(u32, version) + __string(servername, xprt->servername) + __string(progname, clnt->cl_program->name) + ), + + TP_fast_assign( + __entry->requested_policy = clnt->cl_xprtsec.policy; + __entry->version = clnt->cl_vers; + __assign_str(servername, xprt->servername); + __assign_str(progname, clnt->cl_program->name) + ), + + TP_printk("server=%s %sv%u requested_policy=%s", + __get_str(servername), __get_str(progname), __entry->version, + rpc_show_xprtsec_policy(__entry->requested_policy) + ) +); + +#define DEFINE_RPC_TLS_EVENT(name) \ + DEFINE_EVENT(rpc_tls_class, rpc_tls_##name, \ + TP_PROTO( \ + const struct rpc_clnt *clnt, \ + const struct rpc_xprt *xprt \ + ), \ + TP_ARGS(clnt, xprt)) + +DEFINE_RPC_TLS_EVENT(unavailable); +DEFINE_RPC_TLS_EVENT(not_started); + + /* Record an xdr_buf containing a fully-formed RPC message */ DECLARE_EVENT_CLASS(svc_xdr_msg_class, TP_PROTO( diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 0d0db4e1064e..5c8ecdaaa985 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -239,6 +239,7 @@ static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, if (!xprt) return 0; if (!(xprt->xprt_class->ident == XPRT_TRANSPORT_TCP || + xprt->xprt_class->ident == XPRT_TRANSPORT_TCP_TLS || xprt->xprt_class->ident == XPRT_TRANSPORT_RDMA)) { xprt_put(xprt); return -EOPNOTSUPP; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7e2f962d1f66..9f010369100a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -98,6 +99,7 @@ static struct ctl_table_header *sunrpc_table_header; static struct xprt_class xs_local_transport; static struct xprt_class xs_udp_transport; static struct xprt_class xs_tcp_transport; +static struct xprt_class xs_tcp_tls_transport; static struct xprt_class xs_bc_tcp_transport; /* @@ -189,6 +191,11 @@ static struct ctl_table xs_tunables_table[] = { */ #define XS_IDLE_DISC_TO (5U * 60 * HZ) +/* + * TLS handshake timeout. + */ +#define XS_TLS_HANDSHAKE_TO (10U * HZ) + #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # undef RPC_DEBUG_DATA # define RPCDBG_FACILITY RPCDBG_TRANS @@ -1243,6 +1250,8 @@ static void xs_reset_transport(struct sock_xprt *transport) if (atomic_read(&transport->xprt.swapper)) sk_clear_memalloc(sk); + tls_handshake_cancel(sk); + kernel_sock_shutdown(sock, SHUT_RDWR); mutex_lock(&transport->recv_mutex); @@ -2416,6 +2425,267 @@ out_unlock: current_restore_flags(pflags, PF_MEMALLOC); } +/* + * Transfer the connected socket to @upper_transport, then mark that + * xprt CONNECTED. + */ +static int xs_tcp_tls_finish_connecting(struct rpc_xprt *lower_xprt, + struct sock_xprt *upper_transport) +{ + struct sock_xprt *lower_transport = + container_of(lower_xprt, struct sock_xprt, xprt); + struct rpc_xprt *upper_xprt = &upper_transport->xprt; + + if (!upper_transport->inet) { + struct socket *sock = lower_transport->sock; + struct sock *sk = sock->sk; + + /* Avoid temporary address, they are bad for long-lived + * connections such as NFS mounts. + * RFC4941, section 3.6 suggests that: + * Individual applications, which have specific + * knowledge about the normal duration of connections, + * MAY override this as appropriate. + */ + if (xs_addr(upper_xprt)->sa_family == PF_INET6) + ip6_sock_set_addr_preferences(sk, IPV6_PREFER_SRC_PUBLIC); + + xs_tcp_set_socket_timeouts(upper_xprt, sock); + tcp_sock_set_nodelay(sk); + + lock_sock(sk); + + /* @sk is already connected, so it now has the RPC callbacks. + * Reach into @lower_transport to save the original ones. + */ + upper_transport->old_data_ready = lower_transport->old_data_ready; + upper_transport->old_state_change = lower_transport->old_state_change; + upper_transport->old_write_space = lower_transport->old_write_space; + upper_transport->old_error_report = lower_transport->old_error_report; + sk->sk_user_data = upper_xprt; + + /* socket options */ + sock_reset_flag(sk, SOCK_LINGER); + + xprt_clear_connected(upper_xprt); + + upper_transport->sock = sock; + upper_transport->inet = sk; + upper_transport->file = lower_transport->file; + + release_sock(sk); + + /* Reset lower_transport before shutting down its clnt */ + mutex_lock(&lower_transport->recv_mutex); + lower_transport->inet = NULL; + lower_transport->sock = NULL; + lower_transport->file = NULL; + + xprt_clear_connected(lower_xprt); + xs_sock_reset_connection_flags(lower_xprt); + xs_stream_reset_connect(lower_transport); + mutex_unlock(&lower_transport->recv_mutex); + } + + if (!xprt_bound(upper_xprt)) + return -ENOTCONN; + + xs_set_memalloc(upper_xprt); + + if (!xprt_test_and_set_connected(upper_xprt)) { + upper_xprt->connect_cookie++; + clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state); + xprt_clear_connecting(upper_xprt); + + upper_xprt->stat.connect_count++; + upper_xprt->stat.connect_time += (long)jiffies - + upper_xprt->stat.connect_start; + xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING); + } + return 0; +} + +/** + * xs_tls_handshake_done - TLS handshake completion handler + * @data: address of xprt to wake + * @status: status of handshake + * @peerid: serial number of key containing the remote's identity + * + */ +static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid) +{ + struct rpc_xprt *lower_xprt = data; + struct sock_xprt *lower_transport = + container_of(lower_xprt, struct sock_xprt, xprt); + + lower_transport->xprt_err = status ? -EACCES : 0; + complete(&lower_transport->handshake_done); + xprt_put(lower_xprt); +} + +static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_parms *xprtsec) +{ + struct sock_xprt *lower_transport = + container_of(lower_xprt, struct sock_xprt, xprt); + struct tls_handshake_args args = { + .ta_sock = lower_transport->sock, + .ta_done = xs_tls_handshake_done, + .ta_data = xprt_get(lower_xprt), + .ta_peername = lower_xprt->servername, + }; + struct sock *sk = lower_transport->inet; + int rc; + + init_completion(&lower_transport->handshake_done); + set_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state); + lower_transport->xprt_err = -ETIMEDOUT; + switch (xprtsec->policy) { + case RPC_XPRTSEC_TLS_ANON: + rc = tls_client_hello_anon(&args, GFP_KERNEL); + if (rc) + goto out_put_xprt; + break; + case RPC_XPRTSEC_TLS_X509: + args.ta_my_cert = xprtsec->cert_serial; + args.ta_my_privkey = xprtsec->privkey_serial; + rc = tls_client_hello_x509(&args, GFP_KERNEL); + if (rc) + goto out_put_xprt; + break; + default: + rc = -EACCES; + goto out_put_xprt; + } + + rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done, + XS_TLS_HANDSHAKE_TO); + if (rc <= 0) { + if (!tls_handshake_cancel(sk)) { + if (rc == 0) + rc = -ETIMEDOUT; + goto out_put_xprt; + } + } + + rc = lower_transport->xprt_err; + +out: + xs_stream_reset_connect(lower_transport); + clear_bit(XPRT_SOCK_IGNORE_RECV, &lower_transport->sock_state); + return rc; + +out_put_xprt: + xprt_put(lower_xprt); + goto out; +} + +/** + * xs_tcp_tls_setup_socket - establish a TLS session on a TCP socket + * @work: queued work item + * + * Invoked by a work queue tasklet. + * + * For RPC-with-TLS, there is a two-stage connection process. + * + * The "upper-layer xprt" is visible to the RPC consumer. Once it has + * been marked connected, the consumer knows that a TCP connection and + * a TLS session have been established. + * + * A "lower-layer xprt", created in this function, handles the mechanics + * of connecting the TCP socket, performing the RPC_AUTH_TLS probe, and + * then driving the TLS handshake. Once all that is complete, the upper + * layer xprt is marked connected. + */ +static void xs_tcp_tls_setup_socket(struct work_struct *work) +{ + struct sock_xprt *upper_transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_clnt *upper_clnt = upper_transport->clnt; + struct rpc_xprt *upper_xprt = &upper_transport->xprt; + struct rpc_create_args args = { + .net = upper_xprt->xprt_net, + .protocol = upper_xprt->prot, + .address = (struct sockaddr *)&upper_xprt->addr, + .addrsize = upper_xprt->addrlen, + .timeout = upper_clnt->cl_timeout, + .servername = upper_xprt->servername, + .program = upper_clnt->cl_program, + .prognumber = upper_clnt->cl_prog, + .version = upper_clnt->cl_vers, + .authflavor = RPC_AUTH_TLS, + .cred = upper_clnt->cl_cred, + .xprtsec = { + .policy = RPC_XPRTSEC_NONE, + }, + }; + unsigned int pflags = current->flags; + struct rpc_clnt *lower_clnt; + struct rpc_xprt *lower_xprt; + int status; + + if (atomic_read(&upper_xprt->swapper)) + current->flags |= PF_MEMALLOC; + + xs_stream_start_connect(upper_transport); + + /* This implicitly sends an RPC_AUTH_TLS probe */ + lower_clnt = rpc_create(&args); + if (IS_ERR(lower_clnt)) { + trace_rpc_tls_unavailable(upper_clnt, upper_xprt); + clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state); + xprt_clear_connecting(upper_xprt); + xprt_wake_pending_tasks(upper_xprt, PTR_ERR(lower_clnt)); + xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING); + goto out_unlock; + } + + /* RPC_AUTH_TLS probe was successful. Try a TLS handshake on + * the lower xprt. + */ + rcu_read_lock(); + lower_xprt = rcu_dereference(lower_clnt->cl_xprt); + rcu_read_unlock(); + status = xs_tls_handshake_sync(lower_xprt, &upper_xprt->xprtsec); + if (status) { + trace_rpc_tls_not_started(upper_clnt, upper_xprt); + goto out_close; + } + + status = xs_tcp_tls_finish_connecting(lower_xprt, upper_transport); + if (status) + goto out_close; + + trace_rpc_socket_connect(upper_xprt, upper_transport->sock, 0); + if (!xprt_test_and_set_connected(upper_xprt)) { + upper_xprt->connect_cookie++; + clear_bit(XPRT_SOCK_CONNECTING, &upper_transport->sock_state); + xprt_clear_connecting(upper_xprt); + + upper_xprt->stat.connect_count++; + upper_xprt->stat.connect_time += (long)jiffies - + upper_xprt->stat.connect_start; + xs_run_error_worker(upper_transport, XPRT_SOCK_WAKE_PENDING); + } + rpc_shutdown_client(lower_clnt); + +out_unlock: + current_restore_flags(pflags, PF_MEMALLOC); + upper_transport->clnt = NULL; + xprt_unlock_connect(upper_xprt, upper_transport); + return; + +out_close: + rpc_shutdown_client(lower_clnt); + + /* xprt_force_disconnect() wakes tasks with a fixed tk_status code. + * Wake them first here to ensure they get our tk_status code. + */ + xprt_wake_pending_tasks(upper_xprt, status); + xs_tcp_force_close(upper_xprt); + xprt_clear_connecting(upper_xprt); + goto out_unlock; +} + /** * xs_connect - connect a socket to a remote endpoint * @xprt: pointer to transport structure @@ -2447,6 +2717,7 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) } else dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); + transport->clnt = task->tk_client; queue_delayed_work(xprtiod_workqueue, &transport->connect_worker, delay); @@ -3100,6 +3371,94 @@ out_err: return ret; } +/** + * xs_setup_tcp_tls - Set up transport to use a TCP with TLS + * @args: rpc transport creation arguments + * + */ +static struct rpc_xprt *xs_setup_tcp_tls(struct xprt_create *args) +{ + struct sockaddr *addr = args->dstaddr; + struct rpc_xprt *xprt; + struct sock_xprt *transport; + struct rpc_xprt *ret; + unsigned int max_slot_table_size = xprt_max_tcp_slot_table_entries; + + if (args->flags & XPRT_CREATE_INFINITE_SLOTS) + max_slot_table_size = RPC_MAX_SLOT_TABLE_LIMIT; + + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + max_slot_table_size); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); + + xprt->prot = IPPROTO_TCP; + xprt->xprt_class = &xs_tcp_transport; + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + + xprt->bind_timeout = XS_BIND_TO; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; + + xprt->ops = &xs_tcp_ops; + xprt->timeout = &xs_tcp_default_timeout; + + xprt->max_reconnect_timeout = xprt->timeout->to_maxval; + xprt->connect_timeout = xprt->timeout->to_initval * + (xprt->timeout->to_retries + 1); + + INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); + INIT_WORK(&transport->error_worker, xs_error_handle); + + switch (args->xprtsec.policy) { + case RPC_XPRTSEC_TLS_ANON: + case RPC_XPRTSEC_TLS_X509: + xprt->xprtsec = args->xprtsec; + INIT_DELAYED_WORK(&transport->connect_worker, + xs_tcp_tls_setup_socket); + break; + default: + ret = ERR_PTR(-EACCES); + goto out_err; + } + + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); + break; + default: + ret = ERR_PTR(-EAFNOSUPPORT); + goto out_err; + } + + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); + + if (try_module_get(THIS_MODULE)) + return xprt; + ret = ERR_PTR(-EINVAL); +out_err: + xs_xprt_free(xprt); + return ret; +} + /** * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket * @args: rpc transport creation arguments @@ -3209,6 +3568,15 @@ static struct xprt_class xs_tcp_transport = { .netid = { "tcp", "tcp6", "" }, }; +static struct xprt_class xs_tcp_tls_transport = { + .list = LIST_HEAD_INIT(xs_tcp_tls_transport.list), + .name = "tcp-with-tls", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_TCP_TLS, + .setup = xs_setup_tcp_tls, + .netid = { "tcp", "tcp6", "" }, +}; + static struct xprt_class xs_bc_tcp_transport = { .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list), .name = "tcp NFSv4.1 backchannel", @@ -3230,6 +3598,7 @@ int init_socket_xprt(void) xprt_register_transport(&xs_local_transport); xprt_register_transport(&xs_udp_transport); xprt_register_transport(&xs_tcp_transport); + xprt_register_transport(&xs_tcp_tls_transport); xprt_register_transport(&xs_bc_tcp_transport); return 0; @@ -3249,6 +3618,7 @@ void cleanup_socket_xprt(void) xprt_unregister_transport(&xs_local_transport); xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); + xprt_unregister_transport(&xs_tcp_tls_transport); xprt_unregister_transport(&xs_bc_tcp_transport); } From 6c0a8c5fcf7158e889dbdd077f67c81984704710 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 7 Jun 2023 09:59:42 -0400 Subject: [PATCH 16/33] NFS: Have struct nfs_client carry a TLS policy field The new field is used to match struct nfs_clients that have the same TLS policy setting. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 8 ++++++++ fs/nfs/internal.h | 1 + fs/nfs/nfs3client.c | 1 + fs/nfs/nfs4client.c | 20 +++++++++++++++----- include/linux/nfs_fs_sb.h | 3 ++- 5 files changed, 27 insertions(+), 6 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f50e025ae406..9bfdade0f6e6 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -184,6 +184,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_net = get_net(cl_init->net); clp->cl_principal = "*"; + clp->cl_xprtsec = cl_init->xprtsec; return clp; error_cleanup: @@ -326,6 +327,10 @@ again: sap)) continue; + /* Match the xprt security policy */ + if (clp->cl_xprtsec.policy != data->xprtsec.policy) + continue; + refcount_inc(&clp->cl_count); return clp; } @@ -675,6 +680,9 @@ static int nfs_init_server(struct nfs_server *server, .cred = server->cred, .nconnect = ctx->nfs_server.nconnect, .init_flags = (1UL << NFS_CS_REUSEPORT), + .xprtsec = { + .policy = RPC_XPRTSEC_NONE, + }, }; struct nfs_client *clp; int error; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 3cc027d3bd58..5c986c0d3cce 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -81,6 +81,7 @@ struct nfs_client_initdata { struct net *net; const struct rpc_timeout *timeparms; const struct cred *cred; + struct xprtsec_parms xprtsec; }; /* diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 669cda757a5c..8fa187a9c46d 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -93,6 +93,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, .net = mds_clp->cl_net, .timeparms = &ds_timeout, .cred = mds_srv->cred, + .xprtsec = mds_clp->cl_xprtsec, }; struct nfs_client *clp; char buf[INET6_ADDRSTRLEN + 1]; diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index d3051b051a56..75ed8354576b 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -896,7 +896,8 @@ static int nfs4_set_client(struct nfs_server *server, int proto, const struct rpc_timeout *timeparms, u32 minorversion, unsigned int nconnect, unsigned int max_connect, - struct net *net) + struct net *net, + struct xprtsec_parms *xprtsec) { struct nfs_client_initdata cl_init = { .hostname = hostname, @@ -909,6 +910,7 @@ static int nfs4_set_client(struct nfs_server *server, .net = net, .timeparms = timeparms, .cred = server->cred, + .xprtsec = *xprtsec, }; struct nfs_client *clp; @@ -978,6 +980,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, .net = mds_clp->cl_net, .timeparms = &ds_timeout, .cred = mds_srv->cred, + .xprtsec = mds_srv->nfs_client->cl_xprtsec, }; char buf[INET6_ADDRSTRLEN + 1]; @@ -1127,6 +1130,9 @@ out: static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct xprtsec_parms xprtsec = { + .policy = RPC_XPRTSEC_NONE, + }; struct rpc_timeout timeparms; int error; @@ -1157,7 +1163,8 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) ctx->minorversion, ctx->nfs_server.nconnect, ctx->nfs_server.max_connect, - fc->net_ns); + fc->net_ns, + &xprtsec); if (error < 0) return error; @@ -1247,7 +1254,8 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) parent_client->cl_mvops->minor_version, parent_client->cl_nconnect, parent_client->cl_max_connect, - parent_client->cl_net); + parent_client->cl_net, + &parent_client->cl_xprtsec); if (!error) goto init_server; #endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */ @@ -1263,7 +1271,8 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) parent_client->cl_mvops->minor_version, parent_client->cl_nconnect, parent_client->cl_max_connect, - parent_client->cl_net); + parent_client->cl_net, + &parent_client->cl_xprtsec); if (error < 0) goto error; @@ -1336,7 +1345,8 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, error = nfs4_set_client(server, hostname, sap, salen, buf, clp->cl_proto, clnt->cl_timeout, clp->cl_minorversion, - clp->cl_nconnect, clp->cl_max_connect, net); + clp->cl_nconnect, clp->cl_max_connect, + net, &clp->cl_xprtsec); clear_bit(NFS_MIG_TSM_POSSIBLE, &server->mig_status); if (error != 0) { nfs_server_insert_lists(server); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ea2f7e6b1b0b..fa5a592de798 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -63,7 +63,8 @@ struct nfs_client { u32 cl_minorversion;/* NFSv4 minorversion */ unsigned int cl_nconnect; /* Number of connections */ unsigned int cl_max_connect; /* max number of xprts allowed */ - const char * cl_principal; /* used for machine cred */ + const char * cl_principal; /* used for machine cred */ + struct xprtsec_parms cl_xprtsec; /* xprt security policy */ #if IS_ENABLED(CONFIG_NFS_V4) struct list_head cl_ds_clients; /* auth flavor data servers */ From c8407f2e560c53c4c73e77cb5604c8a408dbe7f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 7 Jun 2023 10:00:09 -0400 Subject: [PATCH 17/33] NFS: Add an "xprtsec=" NFS mount option After some discussion, we decided that controlling transport layer security policy should be separate from the setting for the user authentication flavor. To accomplish this, add a new NFS mount option to select a transport layer security policy for RPC operations associated with the mount point. xprtsec=none - Transport layer security is forced off. xprtsec=tls - Establish an encryption-only TLS session. If the initial handshake fails, the mount fails. If TLS is not available on a reconnect, drop the connection and try again. xprtsec=mtls - Both sides authenticate and an encrypted session is created. If the initial handshake fails, the mount fails. If TLS is not available on a reconnect, drop the connection and try again. To support client peer authentication (mtls), the handshake daemon will have configurable default authentication material (certificate or pre-shared key). In the future, mount options can be added that can provide this material on a per-mount basis. Updates to mount.nfs (to support xprtsec=auto) and nfs(5) will be sent under separate cover. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 6 ++--- fs/nfs/fs_context.c | 62 +++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/internal.h | 1 + fs/nfs/nfs3client.c | 8 ++++-- fs/nfs/nfs4client.c | 28 ++++++++++++-------- fs/nfs/super.c | 12 +++++++++ 6 files changed, 102 insertions(+), 15 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 9bfdade0f6e6..d5441e60d7e1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -463,6 +463,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, switch (proto) { case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_TCP_TLS: case XPRT_TRANSPORT_RDMA: if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_TCP_RETRANS; @@ -515,6 +516,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, .version = clp->rpc_ops->version, .authflavor = flavor, .cred = cl_init->cred, + .xprtsec = cl_init->xprtsec, }; if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) @@ -680,9 +682,7 @@ static int nfs_init_server(struct nfs_server *server, .cred = server->cred, .nconnect = ctx->nfs_server.nconnect, .init_flags = (1UL << NFS_CS_REUSEPORT), - .xprtsec = { - .policy = RPC_XPRTSEC_NONE, - }, + .xprtsec = ctx->xprtsec, }; struct nfs_client *clp; int error; diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 5626d358ee2e..853e8d609bb3 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -18,6 +18,9 @@ #include #include #include + +#include + #include "nfs.h" #include "internal.h" @@ -88,6 +91,7 @@ enum nfs_param { Opt_vers, Opt_wsize, Opt_write, + Opt_xprtsec, }; enum { @@ -194,6 +198,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = { fsparam_string("vers", Opt_vers), fsparam_enum ("write", Opt_write, nfs_param_enums_write), fsparam_u32 ("wsize", Opt_wsize), + fsparam_string("xprtsec", Opt_xprtsec), {} }; @@ -267,6 +272,20 @@ static const struct constant_table nfs_secflavor_tokens[] = { {} }; +enum { + Opt_xprtsec_none, + Opt_xprtsec_tls, + Opt_xprtsec_mtls, + nr__Opt_xprtsec +}; + +static const struct constant_table nfs_xprtsec_policies[] = { + { "none", Opt_xprtsec_none }, + { "tls", Opt_xprtsec_tls }, + { "mtls", Opt_xprtsec_mtls }, + {} +}; + /* * Sanity-check a server address provided by the mount command. * @@ -320,9 +339,21 @@ static int nfs_validate_transport_protocol(struct fs_context *fc, default: ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; } + + if (ctx->xprtsec.policy != RPC_XPRTSEC_NONE) + switch (ctx->nfs_server.protocol) { + case XPRT_TRANSPORT_TCP: + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP_TLS; + break; + default: + goto out_invalid_xprtsec_policy; + } + return 0; out_invalid_transport_udp: return nfs_invalf(fc, "NFS: Unsupported transport protocol udp"); +out_invalid_xprtsec_policy: + return nfs_invalf(fc, "NFS: Transport does not support xprtsec"); } /* @@ -430,6 +461,29 @@ static int nfs_parse_security_flavors(struct fs_context *fc, return 0; } +static int nfs_parse_xprtsec_policy(struct fs_context *fc, + struct fs_parameter *param) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + + trace_nfs_mount_assign(param->key, param->string); + + switch (lookup_constant(nfs_xprtsec_policies, param->string, -1)) { + case Opt_xprtsec_none: + ctx->xprtsec.policy = RPC_XPRTSEC_NONE; + break; + case Opt_xprtsec_tls: + ctx->xprtsec.policy = RPC_XPRTSEC_TLS_ANON; + break; + case Opt_xprtsec_mtls: + ctx->xprtsec.policy = RPC_XPRTSEC_TLS_X509; + break; + default: + return nfs_invalf(fc, "NFS: Unrecognized transport security policy"); + } + return 0; +} + static int nfs_parse_version_string(struct fs_context *fc, const char *string) { @@ -696,6 +750,11 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, if (ret < 0) return ret; break; + case Opt_xprtsec: + ret = nfs_parse_xprtsec_policy(fc, param); + if (ret < 0) + return ret; + break; case Opt_proto: if (!param->string) @@ -1574,6 +1633,9 @@ static int nfs_init_fs_context(struct fs_context *fc) ctx->selected_flavor = RPC_AUTH_MAXFLAVOR; ctx->minorversion = 0; ctx->need_mount = true; + ctx->xprtsec.policy = RPC_XPRTSEC_NONE; + ctx->xprtsec.cert_serial = TLS_NO_CERT; + ctx->xprtsec.privkey_serial = TLS_NO_PRIVKEY; fc->s_iflags |= SB_I_STABLE_WRITES; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5c986c0d3cce..0019c7578f9d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -102,6 +102,7 @@ struct nfs_fs_context { unsigned int bsize; struct nfs_auth_info auth_info; rpc_authflavor_t selected_flavor; + struct xprtsec_parms xprtsec; char *client_address; unsigned int version; unsigned int minorversion; diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 8fa187a9c46d..0844f1651e0f 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -103,8 +103,12 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; - if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) - cl_init.nconnect = mds_clp->cl_nconnect; + switch (ds_proto) { + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_TCP_TLS: + if (mds_clp->cl_nconnect > 1) + cl_init.nconnect = mds_clp->cl_nconnect; + } if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 75ed8354576b..321854942ce1 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -918,8 +918,11 @@ static int nfs4_set_client(struct nfs_server *server, __set_bit(NFS_CS_REUSEPORT, &cl_init.init_flags); else cl_init.max_connect = max_connect; - if (proto == XPRT_TRANSPORT_TCP) + switch (proto) { + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_TCP_TLS: cl_init.nconnect = nconnect; + } if (server->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); @@ -988,9 +991,13 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, return ERR_PTR(-EINVAL); cl_init.hostname = buf; - if (mds_clp->cl_nconnect > 1 && ds_proto == XPRT_TRANSPORT_TCP) { - cl_init.nconnect = mds_clp->cl_nconnect; - cl_init.max_connect = NFS_MAX_TRANSPORTS; + switch (ds_proto) { + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_TCP_TLS: + if (mds_clp->cl_nconnect > 1) { + cl_init.nconnect = mds_clp->cl_nconnect; + cl_init.max_connect = NFS_MAX_TRANSPORTS; + } } if (mds_srv->flags & NFS_MOUNT_NORESVPORT) @@ -1130,9 +1137,6 @@ out: static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) { struct nfs_fs_context *ctx = nfs_fc2context(fc); - struct xprtsec_parms xprtsec = { - .policy = RPC_XPRTSEC_NONE, - }; struct rpc_timeout timeparms; int error; @@ -1164,7 +1168,7 @@ static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) ctx->nfs_server.nconnect, ctx->nfs_server.max_connect, fc->net_ns, - &xprtsec); + &ctx->xprtsec); if (error < 0) return error; @@ -1226,8 +1230,8 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_client *parent_client; struct nfs_server *server, *parent_server; + int proto, error; bool auth_probe; - int error; server = nfs_alloc_server(); if (!server) @@ -1260,13 +1264,16 @@ struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) goto init_server; #endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */ + proto = XPRT_TRANSPORT_TCP; + if (parent_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE) + proto = XPRT_TRANSPORT_TCP_TLS; rpc_set_port(&ctx->nfs_server.address, NFS_PORT); error = nfs4_set_client(server, ctx->nfs_server.hostname, &ctx->nfs_server._address, ctx->nfs_server.addrlen, parent_client->cl_ipaddr, - XPRT_TRANSPORT_TCP, + proto, parent_server->client->cl_timeout, parent_client->cl_mvops->minor_version, parent_client->cl_nconnect, @@ -1323,6 +1330,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, .dstaddr = (struct sockaddr *)sap, .addrlen = salen, .servername = hostname, + /* cel: bleh. We might need to pass TLS parameters here */ }; char buf[INET6_ADDRSTRLEN + 1]; struct sockaddr_storage address; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 30e53e93049e..059b0beabc1b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -59,6 +59,8 @@ #include #include +#include + #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" @@ -491,6 +493,16 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ); seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries); seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); + switch (clp->cl_xprtsec.policy) { + case RPC_XPRTSEC_TLS_ANON: + seq_puts(m, ",xprtsec=tls"); + break; + case RPC_XPRTSEC_TLS_X509: + seq_puts(m, ",xprtsec=mtls"); + break; + default: + break; + } if (version != 4) nfs_show_mountd_options(m, nfss, showdefaults); From 8b18a2edecc0741b0eecf8b18fdb356a0f8682de Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:22 -0400 Subject: [PATCH 18/33] NFS: rename nfs_client_kset to nfs_kset Be brief and match the subsystem name. There's no need to distinguish this kset variable from the server. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/sysfs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 0cbcd2dfa732..81d98727b79f 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -18,7 +18,7 @@ #include "sysfs.h" struct kobject *nfs_client_kobj; -static struct kset *nfs_client_kset; +static struct kset *nfs_kset; static void nfs_netns_object_release(struct kobject *kobj) { @@ -55,13 +55,13 @@ static struct kobject *nfs_netns_object_alloc(const char *name, int nfs_sysfs_init(void) { - nfs_client_kset = kset_create_and_add("nfs", NULL, fs_kobj); - if (!nfs_client_kset) + nfs_kset = kset_create_and_add("nfs", NULL, fs_kobj); + if (!nfs_kset) return -ENOMEM; - nfs_client_kobj = nfs_netns_object_alloc("net", nfs_client_kset, NULL); + nfs_client_kobj = nfs_netns_object_alloc("net", nfs_kset, NULL); if (!nfs_client_kobj) { - kset_unregister(nfs_client_kset); - nfs_client_kset = NULL; + kset_unregister(nfs_kset); + nfs_kset = NULL; return -ENOMEM; } return 0; @@ -70,7 +70,7 @@ int nfs_sysfs_init(void) void nfs_sysfs_exit(void) { kobject_put(nfs_client_kobj); - kset_unregister(nfs_client_kset); + kset_unregister(nfs_kset); } static ssize_t nfs_netns_identifier_show(struct kobject *kobj, @@ -159,7 +159,7 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, p = kzalloc(sizeof(*p), GFP_KERNEL); if (p) { p->net = net; - p->kobject.kset = nfs_client_kset; + p->kobject.kset = nfs_kset; if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type, parent, "nfs_client") == 0) return p; From d5082ace6c8ddefd19b8f7b7164580d972fdb103 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:23 -0400 Subject: [PATCH 19/33] NFS: rename nfs_client_kobj to nfs_net_kobj Match the variable names to the sysfs structure. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/sysfs.c | 10 +++++----- fs/nfs/sysfs.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 81d98727b79f..8f89aeca6272 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -17,7 +17,7 @@ #include "netns.h" #include "sysfs.h" -struct kobject *nfs_client_kobj; +struct kobject *nfs_net_kobj; static struct kset *nfs_kset; static void nfs_netns_object_release(struct kobject *kobj) @@ -58,8 +58,8 @@ int nfs_sysfs_init(void) nfs_kset = kset_create_and_add("nfs", NULL, fs_kobj); if (!nfs_kset) return -ENOMEM; - nfs_client_kobj = nfs_netns_object_alloc("net", nfs_kset, NULL); - if (!nfs_client_kobj) { + nfs_net_kobj = nfs_netns_object_alloc("net", nfs_kset, NULL); + if (!nfs_net_kobj) { kset_unregister(nfs_kset); nfs_kset = NULL; return -ENOMEM; @@ -69,7 +69,7 @@ int nfs_sysfs_init(void) void nfs_sysfs_exit(void) { - kobject_put(nfs_client_kobj); + kobject_put(nfs_net_kobj); kset_unregister(nfs_kset); } @@ -172,7 +172,7 @@ void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net) { struct nfs_netns_client *clp; - clp = nfs_netns_client_alloc(nfs_client_kobj, net); + clp = nfs_netns_client_alloc(nfs_net_kobj, net); if (clp) { netns->nfs_client = clp; kobject_uevent(&clp->kobject, KOBJ_ADD); diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index 5501ef573c32..0423aaf388c9 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -14,7 +14,7 @@ struct nfs_netns_client { const char __rcu *identifier; }; -extern struct kobject *nfs_client_kobj; +extern struct kobject *nfs_net_kobj; extern int nfs_sysfs_init(void); extern void nfs_sysfs_exit(void); From 943aef2dbcf75f81c4574903131bd9559cee4fd1 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:24 -0400 Subject: [PATCH 20/33] NFS: Open-code the nfs_kset kset_create_and_add() In preparation to make objects below /sys/fs/nfs namespace aware, we need to define our own kobj_type for the nfs kset so that we can add the .child_ns_type member in a following patch. No functional change here, only the unrolling of kset_create_and_add(). Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/sysfs.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 8f89aeca6272..9adb8ac08d9a 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -25,12 +25,23 @@ static void nfs_netns_object_release(struct kobject *kobj) kfree(kobj); } +static void nfs_kset_release(struct kobject *kobj) +{ + struct kset *kset = container_of(kobj, struct kset, kobj); + kfree(kset); +} + static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type( const struct kobject *kobj) { return &net_ns_type_operations; } +static struct kobj_type nfs_kset_type = { + .release = nfs_kset_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + static struct kobj_type nfs_netns_object_type = { .release = nfs_netns_object_release, .sysfs_ops = &kobj_sysfs_ops, @@ -55,13 +66,32 @@ static struct kobject *nfs_netns_object_alloc(const char *name, int nfs_sysfs_init(void) { - nfs_kset = kset_create_and_add("nfs", NULL, fs_kobj); + int ret; + + nfs_kset = kzalloc(sizeof(*nfs_kset), GFP_KERNEL); if (!nfs_kset) return -ENOMEM; + + ret = kobject_set_name(&nfs_kset->kobj, "nfs"); + if (ret) { + kfree(nfs_kset); + return ret; + } + + nfs_kset->kobj.parent = fs_kobj; + nfs_kset->kobj.ktype = &nfs_kset_type; + nfs_kset->kobj.kset = NULL; + + ret = kset_register(nfs_kset); + if (ret) { + kfree(nfs_kset); + return ret; + } + nfs_net_kobj = nfs_netns_object_alloc("net", nfs_kset, NULL); if (!nfs_net_kobj) { kset_unregister(nfs_kset); - nfs_kset = NULL; + kfree(nfs_kset); return -ENOMEM; } return 0; From e96f9268eea626126021641eefeed02f8669f584 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:25 -0400 Subject: [PATCH 21/33] NFS: Make all of /sys/fs/nfs network-namespace unique Expand the NFS network-namespaced sysfs from /sys/fs/nfs/net down one level into /sys/fs/nfs by moving the "net" kobject onto struct nfs_netns_client and setting it up during network namespace init. This prepares the way for superblock kobjects within /sys/fs/nfs that will only be visible to matching network namespaces. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/sysfs.c | 69 +++++++++++++++++++++++--------------------------- fs/nfs/sysfs.h | 1 + 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 9adb8ac08d9a..90256a3a714e 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -17,14 +17,8 @@ #include "netns.h" #include "sysfs.h" -struct kobject *nfs_net_kobj; static struct kset *nfs_kset; -static void nfs_netns_object_release(struct kobject *kobj) -{ - kfree(kobj); -} - static void nfs_kset_release(struct kobject *kobj) { struct kset *kset = container_of(kobj, struct kset, kobj); @@ -40,30 +34,9 @@ static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type( static struct kobj_type nfs_kset_type = { .release = nfs_kset_release, .sysfs_ops = &kobj_sysfs_ops, -}; - -static struct kobj_type nfs_netns_object_type = { - .release = nfs_netns_object_release, - .sysfs_ops = &kobj_sysfs_ops, .child_ns_type = nfs_netns_object_child_ns_type, }; -static struct kobject *nfs_netns_object_alloc(const char *name, - struct kset *kset, struct kobject *parent) -{ - struct kobject *kobj; - - kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); - if (kobj) { - kobj->kset = kset; - if (kobject_init_and_add(kobj, &nfs_netns_object_type, - parent, "%s", name) == 0) - return kobj; - kobject_put(kobj); - } - return NULL; -} - int nfs_sysfs_init(void) { int ret; @@ -88,18 +61,11 @@ int nfs_sysfs_init(void) return ret; } - nfs_net_kobj = nfs_netns_object_alloc("net", nfs_kset, NULL); - if (!nfs_net_kobj) { - kset_unregister(nfs_kset); - kfree(nfs_kset); - return -ENOMEM; - } return 0; } void nfs_sysfs_exit(void) { - kobject_put(nfs_net_kobj); kset_unregister(nfs_kset); } @@ -157,7 +123,6 @@ static void nfs_netns_client_release(struct kobject *kobj) kobject); kfree(rcu_dereference_raw(c->identifier)); - kfree(c); } static const void *nfs_netns_client_namespace(const struct kobject *kobj) @@ -181,6 +146,25 @@ static struct kobj_type nfs_netns_client_type = { .namespace = nfs_netns_client_namespace, }; +static void nfs_netns_object_release(struct kobject *kobj) +{ + struct nfs_netns_client *c = container_of(kobj, + struct nfs_netns_client, + nfs_net_kobj); + kfree(c); +} + +static const void *nfs_netns_namespace(const struct kobject *kobj) +{ + return container_of(kobj, struct nfs_netns_client, nfs_net_kobj)->net; +} + +static struct kobj_type nfs_netns_object_type = { + .release = nfs_netns_object_release, + .sysfs_ops = &kobj_sysfs_ops, + .namespace = nfs_netns_namespace, +}; + static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, struct net *net) { @@ -190,9 +174,18 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, if (p) { p->net = net; p->kobject.kset = nfs_kset; + p->nfs_net_kobj.kset = nfs_kset; + + if (kobject_init_and_add(&p->nfs_net_kobj, &nfs_netns_object_type, + parent, "net") != 0) { + kobject_put(&p->nfs_net_kobj); + return NULL; + } + if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type, - parent, "nfs_client") == 0) + &p->nfs_net_kobj, "nfs_client") == 0) return p; + kobject_put(&p->kobject); } return NULL; @@ -202,7 +195,7 @@ void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net) { struct nfs_netns_client *clp; - clp = nfs_netns_client_alloc(nfs_net_kobj, net); + clp = nfs_netns_client_alloc(&nfs_kset->kobj, net); if (clp) { netns->nfs_client = clp; kobject_uevent(&clp->kobject, KOBJ_ADD); @@ -217,6 +210,8 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns) kobject_uevent(&clp->kobject, KOBJ_REMOVE); kobject_del(&clp->kobject); kobject_put(&clp->kobject); + kobject_del(&clp->nfs_net_kobj); + kobject_put(&clp->nfs_net_kobj); netns->nfs_client = NULL; } } diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index 0423aaf388c9..dc4cc9809d1b 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -10,6 +10,7 @@ struct nfs_netns_client { struct kobject kobject; + struct kobject nfs_net_kobj; struct net *net; const char __rcu *identifier; }; From 1c7251187dc067a6d460cf33ca67da9c1dd87807 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:26 -0400 Subject: [PATCH 22/33] NFS: add superblock sysfs entries Create a sysfs directory for each mount that corresponds to the mount's nfs_server struct. As the mount is being constructed, use the name "server-n", but rename it to the "MAJOR:MINOR" of the mount after assigning a device_id. The rename approach allows us to populate the mount's directory with links to the various rpc_client objects during the mount's construction. The naming convention (MAJOR:MINOR) can be used to reference a particular NFS mount's sysfs tree. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 16 +++++++++++ fs/nfs/nfs4client.c | 3 ++ fs/nfs/super.c | 6 +++- fs/nfs/sysfs.c | 59 +++++++++++++++++++++++++++++++++++++++ fs/nfs/sysfs.h | 5 ++++ include/linux/nfs_fs_sb.h | 2 ++ 6 files changed, 90 insertions(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d5441e60d7e1..e95672a9bcd6 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -698,6 +698,7 @@ static int nfs_init_server(struct nfs_server *server, return PTR_ERR(clp); server->nfs_client = clp; + nfs_sysfs_add_server(server); /* Initialise the client representation from the mount data */ server->flags = ctx->flags; @@ -952,6 +953,8 @@ void nfs_server_remove_lists(struct nfs_server *server) } EXPORT_SYMBOL_GPL(nfs_server_remove_lists); +static DEFINE_IDA(s_sysfs_ids); + /* * Allocate and initialise a server record */ @@ -963,6 +966,12 @@ struct nfs_server *nfs_alloc_server(void) if (!server) return NULL; + server->s_sysfs_id = ida_alloc(&s_sysfs_ids, GFP_KERNEL); + if (server->s_sysfs_id < 0) { + kfree(server); + return NULL; + } + server->client = server->client_acl = ERR_PTR(-EINVAL); /* Zero out the NFS state stuff */ @@ -1009,6 +1018,10 @@ void nfs_free_server(struct nfs_server *server) nfs_put_client(server->nfs_client); + nfs_sysfs_remove_server(server); + kobject_put(&server->kobj); + ida_free(&s_sysfs_ids, server->s_sysfs_id); + ida_destroy(&server->lockowner_id); ida_destroy(&server->openowner_id); nfs_free_iostats(server->io_stats); @@ -1110,6 +1123,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, server->fsid = fattr->fsid; + nfs_sysfs_add_server(server); + error = nfs_init_server_rpcclient(server, source->client->cl_timeout, flavor); @@ -1393,6 +1408,7 @@ error_0: void nfs_fs_proc_exit(void) { remove_proc_subtree("fs/nfsfs", NULL); + ida_destroy(&s_sysfs_ids); } #endif /* CONFIG_PROC_FS */ diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 321854942ce1..a098a41811d6 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -18,6 +18,7 @@ #include "nfs4idmap.h" #include "pnfs.h" #include "netns.h" +#include "sysfs.h" #define NFSDBG_FACILITY NFSDBG_CLIENT @@ -952,6 +953,8 @@ static int nfs4_set_client(struct nfs_server *server, set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); server->nfs_client = clp; + nfs_sysfs_add_server(server); + return 0; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 059b0beabc1b..2284f749d892 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -70,6 +70,8 @@ #include "nfs4session.h" #include "pnfs.h" #include "nfs.h" +#include "netns.h" +#include "sysfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -1089,6 +1091,7 @@ static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx) &sb->s_blocksize_bits); nfs_super_set_maxbytes(sb, server->maxfilesize); + nfs_sysfs_move_server_to_sb(sb); server->has_sec_mnt_opts = ctx->has_sec_mnt_opts; } @@ -1331,13 +1334,14 @@ error_splat_super: } /* - * Destroy an NFS2/3 superblock + * Destroy an NFS superblock */ void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); dev_t dev = s->s_dev; + nfs_sysfs_move_sb_to_server(server); generic_shutdown_super(s); nfs_fscache_release_super_cookie(s); diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 90256a3a714e..0ff24f133a02 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -215,3 +215,62 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns) netns->nfs_client = NULL; } } + +static void nfs_sysfs_sb_release(struct kobject *kobj) +{ + /* no-op: why? see lib/kobject.c kobject_cleanup() */ +} + +static const void *nfs_netns_server_namespace(const struct kobject *kobj) +{ + return container_of(kobj, struct nfs_server, kobj)->nfs_client->cl_net; +} + +static struct kobj_type nfs_sb_ktype = { + .release = nfs_sysfs_sb_release, + .sysfs_ops = &kobj_sysfs_ops, + .namespace = nfs_netns_server_namespace, + .child_ns_type = nfs_netns_object_child_ns_type, +}; + +void nfs_sysfs_add_server(struct nfs_server *server) +{ + int ret; + + ret = kobject_init_and_add(&server->kobj, &nfs_sb_ktype, + &nfs_kset->kobj, "server-%d", server->s_sysfs_id); + if (ret < 0) + pr_warn("NFS: nfs sysfs add server-%d failed (%d)\n", + server->s_sysfs_id, ret); +} +EXPORT_SYMBOL_GPL(nfs_sysfs_add_server); + +void nfs_sysfs_move_server_to_sb(struct super_block *s) +{ + struct nfs_server *server = s->s_fs_info; + int ret; + + ret = kobject_rename(&server->kobj, s->s_id); + if (ret < 0) + pr_warn("NFS: rename sysfs %s failed (%d)\n", + server->kobj.name, ret); +} + +void nfs_sysfs_move_sb_to_server(struct nfs_server *server) +{ + const char *s; + int ret = -ENOMEM; + + s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id); + if (s) + ret = kobject_rename(&server->kobj, s); + if (ret < 0) + pr_warn("NFS: rename sysfs %s failed (%d)\n", + server->kobj.name, ret); +} + +/* unlink, not dec-ref */ +void nfs_sysfs_remove_server(struct nfs_server *server) +{ + kobject_del(&server->kobj); +} diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index dc4cc9809d1b..c9f5e3677eb5 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -23,4 +23,9 @@ extern void nfs_sysfs_exit(void); void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net); void nfs_netns_sysfs_destroy(struct nfs_net *netns); +void nfs_sysfs_add_server(struct nfs_server *s); +void nfs_sysfs_move_server_to_sb(struct super_block *s); +void nfs_sysfs_move_sb_to_server(struct nfs_server *s); +void nfs_sysfs_remove_server(struct nfs_server *s); + #endif diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fa5a592de798..4bed0b6c79c7 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -184,6 +184,7 @@ struct nfs_server { change_attr_type;/* Description of change attribute */ struct nfs_fsid fsid; + int s_sysfs_id; /* sysfs dentry index */ __u64 maxfilesize; /* maximum file size */ struct timespec64 time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ @@ -260,6 +261,7 @@ struct nfs_server { /* User namespace info */ const struct cred *cred; bool has_sec_mnt_opts; + struct kobject kobj; }; /* Server capabilities */ From e13b549319a684dd80c4cc25e9567a5c84007e32 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:27 -0400 Subject: [PATCH 23/33] NFS: Add sysfs links to sunrpc clients for nfs_clients For the general and state management nfs_client under each mount, create symlinks to their respective rpc_client sysfs entries. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 5 +++++ fs/nfs/nfs4client.c | 1 + fs/nfs/sysfs.c | 20 ++++++++++++++++++++ fs/nfs/sysfs.h | 2 ++ include/linux/sunrpc/clnt.h | 8 +++++++- net/sunrpc/sysfs.h | 7 ------- 6 files changed, 35 insertions(+), 8 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e95672a9bcd6..745c661429f2 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -628,6 +628,7 @@ int nfs_init_server_rpcclient(struct nfs_server *server, if (server->flags & NFS_MOUNT_SOFT) server->client->cl_softrtry = 1; + nfs_sysfs_link_rpc_client(server, server->client, NULL); return 0; } EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient); @@ -699,6 +700,7 @@ static int nfs_init_server(struct nfs_server *server, server->nfs_client = clp; nfs_sysfs_add_server(server); + nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state"); /* Initialise the client representation from the mount data */ server->flags = ctx->flags; @@ -1125,6 +1127,9 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, nfs_sysfs_add_server(server); + nfs_sysfs_link_rpc_client(server, + server->nfs_client->cl_rpcclient, "_state"); + error = nfs_init_server_rpcclient(server, source->client->cl_timeout, flavor); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index a098a41811d6..d9114a754db7 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -954,6 +954,7 @@ static int nfs4_set_client(struct nfs_server *server, server->nfs_client = clp; nfs_sysfs_add_server(server); + nfs_sysfs_link_rpc_client(server, clp->cl_rpcclient, "_state"); return 0; } diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 0ff24f133a02..7009de149158 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -216,6 +216,26 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns) } } +#define RPC_CLIENT_NAME_SIZE 64 + +void nfs_sysfs_link_rpc_client(struct nfs_server *server, + struct rpc_clnt *clnt, const char *uniq) +{ + char name[RPC_CLIENT_NAME_SIZE]; + int ret; + + strcpy(name, clnt->cl_program->name); + strcat(name, uniq ? uniq : ""); + strcat(name, "_client"); + + ret = sysfs_create_link_nowarn(&server->kobj, + &clnt->cl_sysfs->kobject, name); + if (ret < 0) + pr_warn("NFS: can't create link to %s in sysfs (%d)\n", + name, ret); +} +EXPORT_SYMBOL_GPL(nfs_sysfs_link_rpc_client); + static void nfs_sysfs_sb_release(struct kobject *kobj) { /* no-op: why? see lib/kobject.c kobject_cleanup() */ diff --git a/fs/nfs/sysfs.h b/fs/nfs/sysfs.h index c9f5e3677eb5..c5d1990cade5 100644 --- a/fs/nfs/sysfs.h +++ b/fs/nfs/sysfs.h @@ -23,6 +23,8 @@ extern void nfs_sysfs_exit(void); void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net); void nfs_netns_sysfs_destroy(struct nfs_net *netns); +void nfs_sysfs_link_rpc_client(struct nfs_server *server, + struct rpc_clnt *clnt, const char *sysfs_prefix); void nfs_sysfs_add_server(struct nfs_server *s); void nfs_sysfs_move_server_to_sb(struct super_block *s); void nfs_sysfs_move_sb_to_server(struct nfs_server *s); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 063692cd2a60..88cdf6e3012a 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -30,7 +30,13 @@ #include struct rpc_inode; -struct rpc_sysfs_client; +struct rpc_sysfs_client { + struct kobject kobject; + struct net *net; + struct rpc_clnt *clnt; + struct rpc_xprt_switch *xprt_switch; +}; + /* * The high-level client handle diff --git a/net/sunrpc/sysfs.h b/net/sunrpc/sysfs.h index 6620cebd1037..d2dd77a0a0e9 100644 --- a/net/sunrpc/sysfs.h +++ b/net/sunrpc/sysfs.h @@ -5,13 +5,6 @@ #ifndef __SUNRPC_SYSFS_H #define __SUNRPC_SYSFS_H -struct rpc_sysfs_client { - struct kobject kobject; - struct net *net; - struct rpc_clnt *clnt; - struct rpc_xprt_switch *xprt_switch; -}; - struct rpc_sysfs_xprt_switch { struct kobject kobject; struct net *net; From d97c05897757a5d7fa131073d04a2fb29b5836ee Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:28 -0400 Subject: [PATCH 24/33] NFS: add a sysfs link to the lockd rpc_client After lockd is started, add a symlink for lockd's rpc_client under NFS' superblock sysfs. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/lockd/clntlock.c | 6 ++++++ fs/nfs/client.c | 1 + include/linux/lockd/bind.h | 2 ++ 3 files changed, 9 insertions(+) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index e3972aa3045a..5d85715be763 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -93,6 +93,12 @@ void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct block->b_status = nlm_lck_blocked; } +struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host) +{ + return host->h_rpcclnt; +} +EXPORT_SYMBOL_GPL(nlmclnt_rpc_clnt); + /* * Queue up a lock for blocking so that the GRANTED request can see it */ diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 745c661429f2..48c9d8411c0e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -599,6 +599,7 @@ static int nfs_start_lockd(struct nfs_server *server) server->nlm_host = host; server->destroy = nfs_destroy_server; + nfs_sysfs_link_rpc_client(server, nlmclnt_rpc_clnt(host), NULL); return 0; } diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 3bc9f7410e21..c53c81242e72 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -20,6 +20,7 @@ /* Dummy declarations */ struct svc_rqst; struct rpc_task; +struct rpc_clnt; /* * This is the set of functions for lockd->nfsd communication @@ -56,6 +57,7 @@ struct nlmclnt_initdata { extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init); extern void nlmclnt_done(struct nlm_host *host); +extern struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host); /* * NLM client operations provide a means to modify RPC processing of NLM From f4057ffd0e134e54a727e00c3c9b0d9a5051eadf Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:29 -0400 Subject: [PATCH 25/33] NFS: add a sysfs link to the acl rpc_client Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs3client.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 0844f1651e0f..eff3802c5e03 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -4,6 +4,8 @@ #include #include "internal.h" #include "nfs3_fs.h" +#include "netns.h" +#include "sysfs.h" #ifdef CONFIG_NFS_V3_ACL static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; @@ -31,6 +33,8 @@ static void nfs_init_server_aclclient(struct nfs_server *server) if (IS_ERR(server->client_acl)) goto out_noacl; + nfs_sysfs_link_rpc_client(server, server->client_acl, NULL); + /* No errors! Assume that Sun nfsacls are supported */ server->caps |= NFS_CAP_ACLS; return; From d9615d166c7ede67bf16bdd0772e35e124f305f5 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:30 -0400 Subject: [PATCH 26/33] NFS: add sysfs shutdown knob Within each nfs_server sysfs tree, add an entry named "shutdown". Writing 1 to this file will set the cl_shutdown bit on the rpc_clnt structs associated with that mount. If cl_shutdown is set, the task scheduler immediately returns -EIO for new tasks. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/sysfs.c | 54 ++++++++++++++++++++++++++++++++++++- include/linux/nfs_fs_sb.h | 1 + include/linux/sunrpc/clnt.h | 3 ++- net/sunrpc/clnt.c | 5 ++++ 4 files changed, 61 insertions(+), 2 deletions(-) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 7009de149158..1fedbaff10e9 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "netns.h" @@ -216,6 +217,50 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns) } } +static ssize_t +shutdown_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct nfs_server *server = container_of(kobj, struct nfs_server, kobj); + bool shutdown = server->flags & NFS_MOUNT_SHUTDOWN; + return sysfs_emit(buf, "%d\n", shutdown); +} + +static ssize_t +shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct nfs_server *server; + int ret, val; + + server = container_of(kobj, struct nfs_server, kobj); + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + if (val != 1) + return -EINVAL; + + /* already shut down? */ + if (server->flags & NFS_MOUNT_SHUTDOWN) + goto out; + + server->flags |= NFS_MOUNT_SHUTDOWN; + server->client->cl_shutdown = 1; + server->nfs_client->cl_rpcclient->cl_shutdown = 1; + + if (!IS_ERR(server->client_acl)) + server->client_acl->cl_shutdown = 1; + + if (server->nlm_host) + server->nlm_host->h_rpcclnt->cl_shutdown = 1; +out: + return count; +} + +static struct kobj_attribute nfs_sysfs_attr_shutdown = __ATTR_RW(shutdown); + #define RPC_CLIENT_NAME_SIZE 64 void nfs_sysfs_link_rpc_client(struct nfs_server *server, @@ -259,9 +304,16 @@ void nfs_sysfs_add_server(struct nfs_server *server) ret = kobject_init_and_add(&server->kobj, &nfs_sb_ktype, &nfs_kset->kobj, "server-%d", server->s_sysfs_id); - if (ret < 0) + if (ret < 0) { pr_warn("NFS: nfs sysfs add server-%d failed (%d)\n", server->s_sysfs_id, ret); + return; + } + ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_shutdown.attr, + nfs_netns_server_namespace(&server->kobj)); + if (ret < 0) + pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", + server->s_sysfs_id, ret); } EXPORT_SYMBOL_GPL(nfs_sysfs_add_server); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4bed0b6c79c7..20eeba8b009d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -154,6 +154,7 @@ struct nfs_server { #define NFS_MOUNT_WRITE_EAGER 0x01000000 #define NFS_MOUNT_WRITE_WAIT 0x02000000 #define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000 +#define NFS_MOUNT_SHUTDOWN 0x08000000 unsigned int fattr_valid; /* Valid attributes */ unsigned int caps; /* server capabilities */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 88cdf6e3012a..4f41d839face 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -63,7 +63,8 @@ struct rpc_clnt { cl_discrtry : 1,/* disconnect before retry */ cl_noretranstimeo: 1,/* No retransmit timeouts */ cl_autobind : 1,/* use getport() */ - cl_chatty : 1;/* be verbose */ + cl_chatty : 1,/* be verbose */ + cl_shutdown : 1;/* rpc immediate -EIO */ struct xprtsec_parms cl_xprtsec; /* transport security policy */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 640c76ab2f1a..d7c697af3762 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1724,6 +1724,11 @@ call_start(struct rpc_task *task) trace_rpc_request(task); + if (task->tk_client->cl_shutdown) { + rpc_call_rpcerror(task, -EIO); + return; + } + /* Increment call count (version might not be valid for ping) */ if (clnt->cl_program->version[clnt->cl_vers]) clnt->cl_program->version[clnt->cl_vers]->counts[idx]++; From 7d3e26a054c88477b26adda3542d66271a943968 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:31 -0400 Subject: [PATCH 27/33] NFS: Cancel all existing RPC tasks when shutdown Walk existing RPC tasks and cancel them with -EIO when the client is shutdown. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/sysfs.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 1fedbaff10e9..acda8f033d30 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -217,6 +217,17 @@ void nfs_netns_sysfs_destroy(struct nfs_net *netns) } } +static bool shutdown_match_client(const struct rpc_task *task, const void *data) +{ + return true; +} + +static void shutdown_client(struct rpc_clnt *clnt) +{ + clnt->cl_shutdown = 1; + rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL); +} + static ssize_t shutdown_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -247,14 +258,14 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, goto out; server->flags |= NFS_MOUNT_SHUTDOWN; - server->client->cl_shutdown = 1; - server->nfs_client->cl_rpcclient->cl_shutdown = 1; + shutdown_client(server->client); + shutdown_client(server->nfs_client->cl_rpcclient); if (!IS_ERR(server->client_acl)) - server->client_acl->cl_shutdown = 1; + shutdown_client(server->client_acl); if (server->nlm_host) - server->nlm_host->h_rpcclnt->cl_shutdown = 1; + shutdown_client(server->nlm_host->h_rpcclnt); out: return count; } From 6ad477a69ad81bcdd515559fba2887ae71c9c0cc Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 15 Jun 2023 14:07:32 -0400 Subject: [PATCH 28/33] NFSv4: Clean up some shutdown loops If a SEQUENCE call receives -EIO for a shutdown client, it will retry the RPC call. Instead of doing that for a shutdown client, just bail out. Likewise, if the state manager decides to perform recovery for a shutdown client, it will continuously retry. As above, just bail out. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- fs/nfs/nfs4state.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d3665390c4cb..6fcee85e30ca 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9371,7 +9371,7 @@ static void nfs41_sequence_call_done(struct rpc_task *task, void *data) return; trace_nfs4_sequence(clp, task->tk_status); - if (task->tk_status < 0) { + if (task->tk_status < 0 && !task->tk_client->cl_shutdown) { dprintk("%s ERROR %d\n", __func__, task->tk_status); if (refcount_read(&clp->cl_count) == 1) return; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index bbe49315d99e..e079987af4a3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1210,6 +1210,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) struct task_struct *task; char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1]; + if (clp->cl_rpcclient->cl_shutdown) + return; + set_bit(NFS4CLNT_RUN_MANAGER, &clp->cl_state); if (test_and_set_bit(NFS4CLNT_MANAGER_AVAILABLE, &clp->cl_state) != 0) { wake_up_var(&clp->cl_state); From 7f7ab336898f281e58540ef781a8fb375acc32a9 Mon Sep 17 00:00:00 2001 From: Qi Zheng Date: Thu, 15 Jun 2023 11:19:46 +0000 Subject: [PATCH 29/33] NFSv4.2: fix wrong shrinker_id Currently, the list_lru::shrinker_id corresponding to the nfs4_xattr shrinkers is wrong: >>> prog["nfs4_xattr_cache_lru"].shrinker_id (int)0 >>> prog["nfs4_xattr_entry_lru"].shrinker_id (int)0 >>> prog["nfs4_xattr_large_entry_lru"].shrinker_id (int)0 >>> prog["nfs4_xattr_cache_shrinker"].id (int)18 >>> prog["nfs4_xattr_entry_shrinker"].id (int)19 >>> prog["nfs4_xattr_large_entry_shrinker"].id (int)20 This is not what we expect, which will cause these shrinkers not to be found in shrink_slab_memcg(). We should assign shrinker::id before calling list_lru_init_memcg(), so that the corresponding list_lru::shrinker_id will be assigned the correct value like below: >>> prog["nfs4_xattr_cache_lru"].shrinker_id (int)16 >>> prog["nfs4_xattr_entry_lru"].shrinker_id (int)17 >>> prog["nfs4_xattr_large_entry_lru"].shrinker_id (int)18 >>> prog["nfs4_xattr_cache_shrinker"].id (int)16 >>> prog["nfs4_xattr_entry_shrinker"].id (int)17 >>> prog["nfs4_xattr_large_entry_shrinker"].id (int)18 So just do it. Fixes: 95ad37f90c33 ("NFSv4.2: add client side xattr caching.") Signed-off-by: Qi Zheng Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xattr.c | 79 +++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/fs/nfs/nfs42xattr.c b/fs/nfs/nfs42xattr.c index 76ae11834206..911f634ba3da 100644 --- a/fs/nfs/nfs42xattr.c +++ b/fs/nfs/nfs42xattr.c @@ -991,6 +991,29 @@ static void nfs4_xattr_cache_init_once(void *p) INIT_LIST_HEAD(&cache->dispose); } +static int nfs4_xattr_shrinker_init(struct shrinker *shrinker, + struct list_lru *lru, const char *name) +{ + int ret = 0; + + ret = register_shrinker(shrinker, name); + if (ret) + return ret; + + ret = list_lru_init_memcg(lru, shrinker); + if (ret) + unregister_shrinker(shrinker); + + return ret; +} + +static void nfs4_xattr_shrinker_destroy(struct shrinker *shrinker, + struct list_lru *lru) +{ + unregister_shrinker(shrinker); + list_lru_destroy(lru); +} + int __init nfs4_xattr_cache_init(void) { int ret = 0; @@ -1002,44 +1025,30 @@ int __init nfs4_xattr_cache_init(void) if (nfs4_xattr_cache_cachep == NULL) return -ENOMEM; - ret = list_lru_init_memcg(&nfs4_xattr_large_entry_lru, - &nfs4_xattr_large_entry_shrinker); - if (ret) - goto out4; - - ret = list_lru_init_memcg(&nfs4_xattr_entry_lru, - &nfs4_xattr_entry_shrinker); - if (ret) - goto out3; - - ret = list_lru_init_memcg(&nfs4_xattr_cache_lru, - &nfs4_xattr_cache_shrinker); - if (ret) - goto out2; - - ret = register_shrinker(&nfs4_xattr_cache_shrinker, "nfs-xattr_cache"); + ret = nfs4_xattr_shrinker_init(&nfs4_xattr_cache_shrinker, + &nfs4_xattr_cache_lru, + "nfs-xattr_cache"); if (ret) goto out1; - ret = register_shrinker(&nfs4_xattr_entry_shrinker, "nfs-xattr_entry"); + ret = nfs4_xattr_shrinker_init(&nfs4_xattr_entry_shrinker, + &nfs4_xattr_entry_lru, + "nfs-xattr_entry"); if (ret) - goto out; + goto out2; - ret = register_shrinker(&nfs4_xattr_large_entry_shrinker, - "nfs-xattr_large_entry"); + ret = nfs4_xattr_shrinker_init(&nfs4_xattr_large_entry_shrinker, + &nfs4_xattr_large_entry_lru, + "nfs-xattr_large_entry"); if (!ret) return 0; - unregister_shrinker(&nfs4_xattr_entry_shrinker); -out: - unregister_shrinker(&nfs4_xattr_cache_shrinker); -out1: - list_lru_destroy(&nfs4_xattr_cache_lru); + nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker, + &nfs4_xattr_entry_lru); out2: - list_lru_destroy(&nfs4_xattr_entry_lru); -out3: - list_lru_destroy(&nfs4_xattr_large_entry_lru); -out4: + nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker, + &nfs4_xattr_cache_lru); +out1: kmem_cache_destroy(nfs4_xattr_cache_cachep); return ret; @@ -1047,11 +1056,11 @@ out4: void nfs4_xattr_cache_exit(void) { - unregister_shrinker(&nfs4_xattr_large_entry_shrinker); - unregister_shrinker(&nfs4_xattr_entry_shrinker); - unregister_shrinker(&nfs4_xattr_cache_shrinker); - list_lru_destroy(&nfs4_xattr_large_entry_lru); - list_lru_destroy(&nfs4_xattr_entry_lru); - list_lru_destroy(&nfs4_xattr_cache_lru); + nfs4_xattr_shrinker_destroy(&nfs4_xattr_large_entry_shrinker, + &nfs4_xattr_large_entry_lru); + nfs4_xattr_shrinker_destroy(&nfs4_xattr_entry_shrinker, + &nfs4_xattr_entry_lru); + nfs4_xattr_shrinker_destroy(&nfs4_xattr_cache_shrinker, + &nfs4_xattr_cache_lru); kmem_cache_destroy(nfs4_xattr_cache_cachep); } From c907e72f58ed979a24a9fdcadfbc447c51d5e509 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Sun, 18 Jun 2023 17:32:25 -0400 Subject: [PATCH 30/33] NFSv4.1: freeze the session table upon receiving NFS4ERR_BADSESSION When the client received NFS4ERR_BADSESSION, it schedules recovery and start the state manager thread which in turn freezes the session table and does not allow for any new requests to use the no-longer valid session. However, it is possible that before the state manager thread runs, a new operation would use the released slot that received BADSESSION and was therefore not updated its sequence number. Such re-use of the slot can lead the application errors. Fixes: 5c441544f045 ("NFSv4.x: Handle bad/dead sessions correctly in nfs41_sequence_process()") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6fcee85e30ca..212971ddb149 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -921,6 +921,7 @@ out: out_noaction: return ret; session_recover: + set_bit(NFS4_SLOT_TBL_DRAINING, &session->fc_slot_table.slot_tbl_state); nfs4_schedule_session_recovery(session, status); dprintk("%s ERROR: %d Reset session\n", __func__, status); nfs41_sequence_free_slot(res); From cded49ba366220ae7009d71c5804baa01acfb860 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 12 Jun 2023 09:34:04 -0400 Subject: [PATCH 31/33] nfs: don't report STATX_BTIME in ->getattr NFS doesn't properly support reporting the btime in getattr (yet), but 61a968b4f05e mistakenly added it to the request_mask. This causes statx for STATX_BTIME to report a zeroed out btime instead of properly clearing the flag. Cc: stable@vger.kernel.org # v6.3+ Fixes: 61a968b4f05e ("nfs: report the inode version in getattr if requested") Signed-off-by: Jeff Layton Link: https://bugzilla.redhat.com/show_bug.cgi?id=2214134 Reported-by: Boyang Xue Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a910b9a638c5..8172dd4135a1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -845,7 +845,7 @@ int nfs_getattr(struct mnt_idmap *idmap, const struct path *path, request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID | STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME | - STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_BTIME | + STATX_INO | STATX_SIZE | STATX_BLOCKS | STATX_CHANGE_COOKIE; if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { From e901f17b0742e36c9d79885a912b666cc1deb210 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 27 Jun 2023 06:12:11 -0400 Subject: [PATCH 32/33] NFS: Don't cleanup sysfs superblock entry if uninitialized Its possible to end up in nfs_free_server() before the server's superblock sysfs entry has been initialized, in which case calling kobject_put() will emit a WARNING. Check if the kobject has been initialized before cleaning it up. Fixes: 1c7251187dc0 ("NFS: add superblock sysfs entries") Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 48c9d8411c0e..e4c5f193ed5e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1021,8 +1021,10 @@ void nfs_free_server(struct nfs_server *server) nfs_put_client(server->nfs_client); - nfs_sysfs_remove_server(server); - kobject_put(&server->kobj); + if (server->kobj.state_initialized) { + nfs_sysfs_remove_server(server); + kobject_put(&server->kobj); + } ida_free(&s_sysfs_ids, server->s_sysfs_id); ida_destroy(&server->lockowner_id); From 5b4a82a0724af1dfd1320826e0266117b6a57fbd Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 27 Jun 2023 14:31:49 -0400 Subject: [PATCH 33/33] Revert "NFSv4: Retry LOCK on OLD_STATEID during delegation return" Olga Kornievskaia reports that this patch breaks NFSv4.0 state recovery. It also introduces additional complexity in the error paths for cases not related to the original problem. Let's revert it for now, and address the original problem in another manner. This reverts commit f5ea16137a3fa2858620dc9084466491c128535f. Fixes: f5ea16137a3f ("NFSv4: Retry LOCK on OLD_STATEID during delegation return") Reported-by: Kornievskaia, Olga Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 212971ddb149..e1a886b58354 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7160,7 +7160,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; struct nfs4_lock_state *lsp = data->lsp; - struct nfs_server *server = NFS_SERVER(d_inode(data->ctx->dentry)); if (!nfs4_sequence_done(task, &data->res.seq_res)) return; @@ -7168,7 +7167,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; switch (task->tk_status) { case 0: - renew_lease(server, data->timestamp); + renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)), + data->timestamp); if (data->arg.new_lock && !data->cancelled) { data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); if (locks_lock_inode_wait(lsp->ls_state->inode, &data->fl) < 0) @@ -7189,8 +7189,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) if (!nfs4_stateid_match(&data->arg.open_stateid, &lsp->ls_state->open_stateid)) goto out_restart; - else if (nfs4_async_handle_error(task, server, lsp->ls_state, NULL) == -EAGAIN) - goto out_restart; } else if (!nfs4_stateid_match(&data->arg.lock_stateid, &lsp->ls_stateid)) goto out_restart;