ceph: fix flush_dirty_caps race with caps migration
The flush_dirty_caps() used to loop over the first entry of the cap_dirty dirty list on the assumption that after calling ceph_check_caps() it would be removed from the list. This isn't true for caps that are being migrated between MDSs, where we've received the EXPORT but not the IMPORT. Instead, do a safe list iteration, and pin the next inode on the list via the CEPH_I_NOFLUSH flag. Signed-off-by: Sage Weil <sage@newdream.net>
This commit is contained in:
		
							parent
							
								
									7af8f1e4aa
								
							
						
					
					
						commit
						e9964c1023
					
				| @ -1573,6 +1573,11 @@ retry_locked: | ||||
| 		} | ||||
| 
 | ||||
| ack: | ||||
| 		if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { | ||||
| 			dout(" skipping %p I_NOFLUSH set\n", inode); | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		if (session && session != cap->session) { | ||||
| 			dout("oops, wrong session %p mutex\n", session); | ||||
| 			mutex_unlock(&session->s_mutex); | ||||
| @ -1652,6 +1657,10 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | ||||
| 
 | ||||
| retry: | ||||
| 	spin_lock(&inode->i_lock); | ||||
| 	if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { | ||||
| 		dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); | ||||
| 		goto out; | ||||
| 	} | ||||
| 	if (ci->i_dirty_caps && ci->i_auth_cap) { | ||||
| 		struct ceph_cap *cap = ci->i_auth_cap; | ||||
| 		int used = __ceph_caps_used(ci); | ||||
| @ -2747,16 +2756,38 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) | ||||
|  */ | ||||
| void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | ||||
| { | ||||
| 	struct ceph_inode_info *ci; | ||||
| 	struct inode *inode; | ||||
| 	struct ceph_inode_info *ci, *nci = NULL; | ||||
| 	struct inode *inode, *ninode = NULL; | ||||
| 	struct list_head *p, *n; | ||||
| 
 | ||||
| 	dout("flush_dirty_caps\n"); | ||||
| 	spin_lock(&mdsc->cap_dirty_lock); | ||||
| 	while (!list_empty(&mdsc->cap_dirty)) { | ||||
| 		ci = list_first_entry(&mdsc->cap_dirty, | ||||
| 				      struct ceph_inode_info, | ||||
| 				      i_dirty_item); | ||||
| 		inode = igrab(&ci->vfs_inode); | ||||
| 	list_for_each_safe(p, n, &mdsc->cap_dirty) { | ||||
| 		if (nci) { | ||||
| 			ci = nci; | ||||
| 			inode = ninode; | ||||
| 			ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; | ||||
| 			dout("flush_dirty_caps inode %p (was next inode)\n", | ||||
| 			     inode); | ||||
| 		} else { | ||||
| 			ci = list_entry(p, struct ceph_inode_info, | ||||
| 					i_dirty_item); | ||||
| 			inode = igrab(&ci->vfs_inode); | ||||
| 			BUG_ON(!inode); | ||||
| 			dout("flush_dirty_caps inode %p\n", inode); | ||||
| 		} | ||||
| 		if (n != &mdsc->cap_dirty) { | ||||
| 			nci = list_entry(n, struct ceph_inode_info, | ||||
| 					 i_dirty_item); | ||||
| 			ninode = igrab(&nci->vfs_inode); | ||||
| 			BUG_ON(!ninode); | ||||
| 			nci->i_ceph_flags |= CEPH_I_NOFLUSH; | ||||
| 			dout("flush_dirty_caps next inode %p, noflush\n", | ||||
| 			     ninode); | ||||
| 		} else { | ||||
| 			nci = NULL; | ||||
| 			ninode = NULL; | ||||
| 		} | ||||
| 		spin_unlock(&mdsc->cap_dirty_lock); | ||||
| 		if (inode) { | ||||
| 			ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, | ||||
|  | ||||
| @ -289,6 +289,7 @@ struct ceph_inode_xattrs_info { | ||||
| #define CEPH_I_COMPLETE  1  /* we have complete directory cached */ | ||||
| #define CEPH_I_NODELAY   4  /* do not delay cap release */ | ||||
| #define CEPH_I_FLUSH     8  /* do not delay flush of dirty metadata */ | ||||
| #define CEPH_I_NOFLUSH  16  /* do not flush dirty caps */ | ||||
| 
 | ||||
| struct ceph_inode_info { | ||||
| 	struct ceph_vino i_vino;   /* ceph ino + snap */ | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user