Btrfs: race free update of commit root for ro snapshots
This is a better solution for the problem addressed in the following
commit:
    Btrfs: update commit root on snapshot creation after orphan cleanup
    (3821f34888)
The previous solution wasn't the best because of 2 reasons:
    1) It added another full transaction commit, which is more expensive
       than just swapping the commit root with the root;
    2) If a reboot happened after the first transaction commit (the one
       that creates the snapshot) and before the second transaction commit,
       then we would end up with the same problem if a send using that
       snapshot was requested before the first transaction commit after
       the reboot.
This change addresses those 2 issues. The second issue is addressed by
switching the commit root in the dentry lookup VFS callback, which is
also called by the snapshot/subvol creation ioctl and performs orphan
cleanup if needed. Like the vfs, the ioctl locks the parent inode too,
preventing race issues between a dentry lookup and snapshot creation.
Cc: Alex Lyakas <alex.btrfs@zadarastorage.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
			
			
This commit is contained in:
		
							parent
							
								
									87fa3bb078
								
							
						
					
					
						commit
						9c3b306e1c
					
				| @ -5181,6 +5181,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | ||||
| 			iput(inode); | ||||
| 			inode = ERR_PTR(ret); | ||||
| 		} | ||||
| 		/*
 | ||||
| 		 * If orphan cleanup did remove any orphans, it means the tree | ||||
| 		 * was modified and therefore the commit root is not the same as | ||||
| 		 * the current root anymore. This is a problem, because send | ||||
| 		 * uses the commit root and therefore can see inode items that | ||||
| 		 * don't exist in the current root anymore, and for example make | ||||
| 		 * calls to btrfs_iget, which will do tree lookups based on the | ||||
| 		 * current root and not on the commit root. Those lookups will | ||||
| 		 * fail, returning a -ESTALE error, and making send fail with | ||||
| 		 * that error. So make sure a send does not see any orphans we | ||||
| 		 * have just removed, and that it will see the same inodes | ||||
| 		 * regardless of whether a transaction commit happened before | ||||
| 		 * it started (meaning that the commit root will be the same as | ||||
| 		 * the current root) or not. | ||||
| 		 */ | ||||
| 		if (sub_root->node != sub_root->commit_root) { | ||||
| 			u64 sub_flags = btrfs_root_flags(&sub_root->root_item); | ||||
| 
 | ||||
| 			if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) { | ||||
| 				struct extent_buffer *eb; | ||||
| 
 | ||||
| 				/*
 | ||||
| 				 * Assert we can't have races between dentry | ||||
| 				 * lookup called through the snapshot creation | ||||
| 				 * ioctl and the VFS. | ||||
| 				 */ | ||||
| 				ASSERT(mutex_is_locked(&dir->i_mutex)); | ||||
| 
 | ||||
| 				down_write(&root->fs_info->commit_root_sem); | ||||
| 				eb = sub_root->commit_root; | ||||
| 				sub_root->commit_root = | ||||
| 					btrfs_root_node(sub_root); | ||||
| 				up_write(&root->fs_info->commit_root_sem); | ||||
| 				free_extent_buffer(eb); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return inode; | ||||
|  | ||||
| @ -711,39 +711,6 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | ||||
| 	if (ret) | ||||
| 		goto fail; | ||||
| 
 | ||||
| 	ret = btrfs_orphan_cleanup(pending_snapshot->snap); | ||||
| 	if (ret) | ||||
| 		goto fail; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If orphan cleanup did remove any orphans, it means the tree was | ||||
| 	 * modified and therefore the commit root is not the same as the | ||||
| 	 * current root anymore. This is a problem, because send uses the | ||||
| 	 * commit root and therefore can see inode items that don't exist | ||||
| 	 * in the current root anymore, and for example make calls to | ||||
| 	 * btrfs_iget, which will do tree lookups based on the current root | ||||
| 	 * and not on the commit root. Those lookups will fail, returning a | ||||
| 	 * -ESTALE error, and making send fail with that error. So make sure | ||||
| 	 * a send does not see any orphans we have just removed, and that it | ||||
| 	 * will see the same inodes regardless of whether a transaction | ||||
| 	 * commit happened before it started (meaning that the commit root | ||||
| 	 * will be the same as the current root) or not. | ||||
| 	 */ | ||||
| 	if (readonly && pending_snapshot->snap->node != | ||||
| 	    pending_snapshot->snap->commit_root) { | ||||
| 		trans = btrfs_join_transaction(pending_snapshot->snap); | ||||
| 		if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { | ||||
| 			ret = PTR_ERR(trans); | ||||
| 			goto fail; | ||||
| 		} | ||||
| 		if (!IS_ERR(trans)) { | ||||
| 			ret = btrfs_commit_transaction(trans, | ||||
| 						       pending_snapshot->snap); | ||||
| 			if (ret) | ||||
| 				goto fail; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | ||||
| 	if (IS_ERR(inode)) { | ||||
| 		ret = PTR_ERR(inode); | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user