pNFS/flexfiles: Fix a deadlock on LAYOUTGET
We encountered a deadlock where the SEQUENCE that accompanied the LAYOUTGET triggered a session drain, while ff_layout_alloc_lseg triggered a GETDEVICEINFO. The GETDEVICEINFO hung waiting for the session drain, while the LAYOUTGET held the slot waiting for alloc_lseg to finish. Avoid this by moving the call to nfs4_find_get_deviceid out of ff_layout_alloc_lseg and into nfs4_ff_layout_prepare_ds. Signed-off-by: Fred Isaman <fred.isaman@gmail.com> [dros@primarydata.com: pNFS/flexfiles: fix races in ff_layout_mirror_valid] Signed-off-by: Weston Andros Adamson <dros@primarydata.com> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
This commit is contained in:
		
							parent
							
								
									2f065ddb64
								
							
						
					
					
						commit
						65990d1afb
					
				| @ -183,7 +183,7 @@ ff_layout_add_mirror(struct pnfs_layout_hdr *lo, | |||||||
| 
 | 
 | ||||||
| 	spin_lock(&inode->i_lock); | 	spin_lock(&inode->i_lock); | ||||||
| 	list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { | 	list_for_each_entry(pos, &ff_layout->mirrors, mirrors) { | ||||||
| 		if (mirror->mirror_ds != pos->mirror_ds) | 		if (memcmp(&mirror->devid, &pos->devid, sizeof(pos->devid)) != 0) | ||||||
| 			continue; | 			continue; | ||||||
| 		if (!ff_mirror_match_fh(mirror, pos)) | 		if (!ff_mirror_match_fh(mirror, pos)) | ||||||
| 			continue; | 			continue; | ||||||
| @ -360,19 +360,6 @@ static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void ff_layout_mark_devices_valid(struct nfs4_ff_layout_segment *fls) |  | ||||||
| { |  | ||||||
| 	struct nfs4_deviceid_node *node; |  | ||||||
| 	int i; |  | ||||||
| 
 |  | ||||||
| 	if (!(fls->flags & FF_FLAGS_NO_IO_THRU_MDS)) |  | ||||||
| 		return; |  | ||||||
| 	for (i = 0; i < fls->mirror_array_cnt; i++) { |  | ||||||
| 		node = &fls->mirror_array[i]->mirror_ds->id_node; |  | ||||||
| 		clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags); |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static struct pnfs_layout_segment * | static struct pnfs_layout_segment * | ||||||
| ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | ||||||
| 		     struct nfs4_layoutget_res *lgr, | 		     struct nfs4_layoutget_res *lgr, | ||||||
| @ -426,8 +413,6 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | |||||||
| 
 | 
 | ||||||
| 	for (i = 0; i < fls->mirror_array_cnt; i++) { | 	for (i = 0; i < fls->mirror_array_cnt; i++) { | ||||||
| 		struct nfs4_ff_layout_mirror *mirror; | 		struct nfs4_ff_layout_mirror *mirror; | ||||||
| 		struct nfs4_deviceid devid; |  | ||||||
| 		struct nfs4_deviceid_node *idnode; |  | ||||||
| 		struct auth_cred acred = { .group_info = ff_zero_group }; | 		struct auth_cred acred = { .group_info = ff_zero_group }; | ||||||
| 		struct rpc_cred	__rcu *cred; | 		struct rpc_cred	__rcu *cred; | ||||||
| 		u32 ds_count, fh_count, id; | 		u32 ds_count, fh_count, id; | ||||||
| @ -452,24 +437,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | |||||||
| 		fls->mirror_array[i]->ds_count = ds_count; | 		fls->mirror_array[i]->ds_count = ds_count; | ||||||
| 
 | 
 | ||||||
| 		/* deviceid */ | 		/* deviceid */ | ||||||
| 		rc = decode_deviceid(&stream, &devid); | 		rc = decode_deviceid(&stream, &fls->mirror_array[i]->devid); | ||||||
| 		if (rc) | 		if (rc) | ||||||
| 			goto out_err_free; | 			goto out_err_free; | ||||||
| 
 | 
 | ||||||
| 		idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), |  | ||||||
| 						&devid, lh->plh_lc_cred, |  | ||||||
| 						gfp_flags); |  | ||||||
| 		/*
 |  | ||||||
| 		 * upon success, mirror_ds is allocated by previous |  | ||||||
| 		 * getdeviceinfo, or newly by .alloc_deviceid_node |  | ||||||
| 		 * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure |  | ||||||
| 		 */ |  | ||||||
| 		if (idnode) |  | ||||||
| 			fls->mirror_array[i]->mirror_ds = |  | ||||||
| 				FF_LAYOUT_MIRROR_DS(idnode); |  | ||||||
| 		else |  | ||||||
| 			goto out_err_free; |  | ||||||
| 
 |  | ||||||
| 		/* efficiency */ | 		/* efficiency */ | ||||||
| 		rc = -EIO; | 		rc = -EIO; | ||||||
| 		p = xdr_inline_decode(&stream, 4); | 		p = xdr_inline_decode(&stream, 4); | ||||||
| @ -567,8 +538,6 @@ out_sort_mirrors: | |||||||
| 	rc = ff_layout_check_layout(lgr); | 	rc = ff_layout_check_layout(lgr); | ||||||
| 	if (rc) | 	if (rc) | ||||||
| 		goto out_err_free; | 		goto out_err_free; | ||||||
| 	ff_layout_mark_devices_valid(fls); |  | ||||||
| 
 |  | ||||||
| 	ret = &fls->generic_hdr; | 	ret = &fls->generic_hdr; | ||||||
| 	dprintk("<-- %s (success)\n", __func__); | 	dprintk("<-- %s (success)\n", __func__); | ||||||
| out_free_page: | out_free_page: | ||||||
| @ -2332,7 +2301,7 @@ ff_layout_mirror_prepare_stats(struct pnfs_layout_hdr *lo, | |||||||
| 	list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { | 	list_for_each_entry(mirror, &ff_layout->mirrors, mirrors) { | ||||||
| 		if (i >= dev_limit) | 		if (i >= dev_limit) | ||||||
| 			break; | 			break; | ||||||
| 		if (!mirror->mirror_ds) | 		if (IS_ERR_OR_NULL(mirror->mirror_ds)) | ||||||
| 			continue; | 			continue; | ||||||
| 		if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) | 		if (!test_and_clear_bit(NFS4_FF_MIRROR_STAT_AVAIL, &mirror->flags)) | ||||||
| 			continue; | 			continue; | ||||||
|  | |||||||
| @ -74,6 +74,7 @@ struct nfs4_ff_layout_mirror { | |||||||
| 	struct list_head		mirrors; | 	struct list_head		mirrors; | ||||||
| 	u32				ds_count; | 	u32				ds_count; | ||||||
| 	u32				efficiency; | 	u32				efficiency; | ||||||
|  | 	struct nfs4_deviceid		devid; | ||||||
| 	struct nfs4_ff_layout_ds	*mirror_ds; | 	struct nfs4_ff_layout_ds	*mirror_ds; | ||||||
| 	u32				fh_versions_cnt; | 	u32				fh_versions_cnt; | ||||||
| 	struct nfs_fh			*fh_versions; | 	struct nfs_fh			*fh_versions; | ||||||
| @ -211,7 +212,6 @@ nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, | |||||||
| 				 struct inode *inode); | 				 struct inode *inode); | ||||||
| struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, | struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, | ||||||
| 				       u32 ds_idx, struct rpc_cred *mdscred); | 				       u32 ds_idx, struct rpc_cred *mdscred); | ||||||
| bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); |  | ||||||
| bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg); | bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg); | ||||||
| bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg); | bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg); | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -20,9 +20,11 @@ | |||||||
| static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; | static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS; | ||||||
| static unsigned int dataserver_retrans; | static unsigned int dataserver_retrans; | ||||||
| 
 | 
 | ||||||
|  | static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); | ||||||
|  | 
 | ||||||
| void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | ||||||
| { | { | ||||||
| 	if (mirror_ds) | 	if (!IS_ERR_OR_NULL(mirror_ds)) | ||||||
| 		nfs4_put_deviceid_node(&mirror_ds->id_node); | 		nfs4_put_deviceid_node(&mirror_ds->id_node); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| @ -182,12 +184,29 @@ static void ff_layout_mark_devid_invalid(struct pnfs_layout_segment *lseg, | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, | static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, | ||||||
| 		struct nfs4_ff_layout_mirror *mirror) | 				   struct nfs4_ff_layout_mirror *mirror, | ||||||
|  | 				   bool create) | ||||||
| { | { | ||||||
| 	if (mirror == NULL || mirror->mirror_ds == NULL) { | 	if (mirror == NULL || IS_ERR(mirror->mirror_ds)) | ||||||
| 		pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, | 		goto outerr; | ||||||
| 					lseg); | 	if (mirror->mirror_ds == NULL) { | ||||||
| 		return false; | 		if (create) { | ||||||
|  | 			struct nfs4_deviceid_node *node; | ||||||
|  | 			struct pnfs_layout_hdr *lh = lseg->pls_layout; | ||||||
|  | 			struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV); | ||||||
|  | 
 | ||||||
|  | 			node = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), | ||||||
|  | 					&mirror->devid, lh->plh_lc_cred, | ||||||
|  | 					GFP_KERNEL); | ||||||
|  | 			if (node) | ||||||
|  | 				mirror_ds = FF_LAYOUT_MIRROR_DS(node); | ||||||
|  | 
 | ||||||
|  | 			/* check for race with another call to this function */ | ||||||
|  | 			if (cmpxchg(&mirror->mirror_ds, NULL, mirror_ds) && | ||||||
|  | 			    mirror_ds != ERR_PTR(-ENODEV)) | ||||||
|  | 				nfs4_put_deviceid_node(node); | ||||||
|  | 		} else | ||||||
|  | 			goto outerr; | ||||||
| 	} | 	} | ||||||
| 	if (mirror->mirror_ds->ds == NULL) { | 	if (mirror->mirror_ds->ds == NULL) { | ||||||
| 		struct nfs4_deviceid_node *devid; | 		struct nfs4_deviceid_node *devid; | ||||||
| @ -196,6 +215,9 @@ static bool ff_layout_mirror_valid(struct pnfs_layout_segment *lseg, | |||||||
| 		return false; | 		return false; | ||||||
| 	} | 	} | ||||||
| 	return true; | 	return true; | ||||||
|  | outerr: | ||||||
|  | 	pnfs_error_mark_layout_for_return(lseg->pls_layout->plh_inode, lseg); | ||||||
|  | 	return false; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, | static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, | ||||||
| @ -323,7 +345,7 @@ nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) | |||||||
| 	struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); | 	struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); | ||||||
| 	struct nfs_fh *fh = NULL; | 	struct nfs_fh *fh = NULL; | ||||||
| 
 | 
 | ||||||
| 	if (!ff_layout_mirror_valid(lseg, mirror)) { | 	if (!ff_layout_mirror_valid(lseg, mirror, false)) { | ||||||
| 		pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", | 		pr_err_ratelimited("NFS: %s: No data server for mirror offset index %d\n", | ||||||
| 			__func__, mirror_idx); | 			__func__, mirror_idx); | ||||||
| 		goto out; | 		goto out; | ||||||
| @ -363,7 +385,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | |||||||
| 	struct nfs_server *s = NFS_SERVER(ino); | 	struct nfs_server *s = NFS_SERVER(ino); | ||||||
| 	unsigned int max_payload; | 	unsigned int max_payload; | ||||||
| 
 | 
 | ||||||
| 	if (!ff_layout_mirror_valid(lseg, mirror)) { | 	if (!ff_layout_mirror_valid(lseg, mirror, true)) { | ||||||
| 		pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", | 		pr_err_ratelimited("NFS: %s: No data server for offset index %d\n", | ||||||
| 			__func__, ds_idx); | 			__func__, ds_idx); | ||||||
| 		goto out; | 		goto out; | ||||||
| @ -547,7 +569,11 @@ static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg) | |||||||
| 
 | 
 | ||||||
| 	for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | 	for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | ||||||
| 		mirror = FF_LAYOUT_COMP(lseg, idx); | 		mirror = FF_LAYOUT_COMP(lseg, idx); | ||||||
| 		if (mirror && mirror->mirror_ds) { | 		if (mirror) { | ||||||
|  | 			if (!mirror->mirror_ds) | ||||||
|  | 				return true; | ||||||
|  | 			if (IS_ERR(mirror->mirror_ds)) | ||||||
|  | 				continue; | ||||||
| 			devid = &mirror->mirror_ds->id_node; | 			devid = &mirror->mirror_ds->id_node; | ||||||
| 			if (!ff_layout_test_devid_unavailable(devid)) | 			if (!ff_layout_test_devid_unavailable(devid)) | ||||||
| 				return true; | 				return true; | ||||||
| @ -565,8 +591,10 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) | |||||||
| 
 | 
 | ||||||
| 	for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | 	for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | ||||||
| 		mirror = FF_LAYOUT_COMP(lseg, idx); | 		mirror = FF_LAYOUT_COMP(lseg, idx); | ||||||
| 		if (!mirror || !mirror->mirror_ds) | 		if (!mirror || IS_ERR(mirror->mirror_ds)) | ||||||
| 			return false; | 			return false; | ||||||
|  | 		if (!mirror->mirror_ds) | ||||||
|  | 			continue; | ||||||
| 		devid = &mirror->mirror_ds->id_node; | 		devid = &mirror->mirror_ds->id_node; | ||||||
| 		if (ff_layout_test_devid_unavailable(devid)) | 		if (ff_layout_test_devid_unavailable(devid)) | ||||||
| 			return false; | 			return false; | ||||||
| @ -575,7 +603,7 @@ static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg) | |||||||
| 	return FF_LAYOUT_MIRROR_COUNT(lseg) != 0; | 	return FF_LAYOUT_MIRROR_COUNT(lseg) != 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) | static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) | ||||||
| { | { | ||||||
| 	if (lseg->pls_range.iomode == IOMODE_READ) | 	if (lseg->pls_range.iomode == IOMODE_READ) | ||||||
| 		return  ff_read_layout_has_available_ds(lseg); | 		return  ff_read_layout_has_available_ds(lseg); | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user