fsnotify: Provide framework for dropping SRCU lock in ->handle_event
fanotify wants to drop fsnotify_mark_srcu lock when waiting for response from userspace so that the whole notification subsystem is not blocked during that time. This patch provides a framework for safely getting mark reference for a mark found in the object list which pins the mark in that list. We can then drop fsnotify_mark_srcu, wait for userspace response and then safely continue iteration of the object list once we reaquire fsnotify_mark_srcu. Reviewed-by: Miklos Szeredi <mszeredi@redhat.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> Signed-off-by: Jan Kara <jack@suse.cz>
This commit is contained in:
		
							parent
							
								
									f09b04a03e
								
							
						
					
					
						commit
						abc77577a6
					
				| @ -8,6 +8,12 @@ | ||||
| 
 | ||||
| #include "../mount.h" | ||||
| 
 | ||||
| struct fsnotify_iter_info { | ||||
| 	struct fsnotify_mark *inode_mark; | ||||
| 	struct fsnotify_mark *vfsmount_mark; | ||||
| 	int srcu_idx; | ||||
| }; | ||||
| 
 | ||||
| /* destroy all events sitting in this groups notification queue */ | ||||
| extern void fsnotify_flush_notify(struct fsnotify_group *group); | ||||
| 
 | ||||
|  | ||||
| @ -126,6 +126,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) | ||||
| 	/* set to 0 when there a no external references to this group */ | ||||
| 	atomic_set(&group->refcnt, 1); | ||||
| 	atomic_set(&group->num_marks, 0); | ||||
| 	atomic_set(&group->user_waits, 0); | ||||
| 
 | ||||
| 	spin_lock_init(&group->notification_lock); | ||||
| 	INIT_LIST_HEAD(&group->notification_list); | ||||
|  | ||||
| @ -109,6 +109,16 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) | ||||
| 	atomic_inc(&mark->refcnt); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Get mark reference when we found the mark via lockless traversal of object | ||||
|  * list. Mark can be already removed from the list by now and on its way to be | ||||
|  * destroyed once SRCU period ends. | ||||
|  */ | ||||
| static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) | ||||
| { | ||||
| 	return atomic_inc_not_zero(&mark->refcnt); | ||||
| } | ||||
| 
 | ||||
| static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) | ||||
| { | ||||
| 	u32 new_mask = 0; | ||||
| @ -243,6 +253,72 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) | ||||
| 			   FSNOTIFY_REAPER_DELAY); | ||||
| } | ||||
| 
 | ||||
| bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) | ||||
| { | ||||
| 	struct fsnotify_group *group; | ||||
| 
 | ||||
| 	if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark)) | ||||
| 		return false; | ||||
| 
 | ||||
| 	if (iter_info->inode_mark) | ||||
| 		group = iter_info->inode_mark->group; | ||||
| 	else | ||||
| 		group = iter_info->vfsmount_mark->group; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Since acquisition of mark reference is an atomic op as well, we can | ||||
| 	 * be sure this inc is seen before any effect of refcount increment. | ||||
| 	 */ | ||||
| 	atomic_inc(&group->user_waits); | ||||
| 
 | ||||
| 	if (iter_info->inode_mark) { | ||||
| 		/* This can fail if mark is being removed */ | ||||
| 		if (!fsnotify_get_mark_safe(iter_info->inode_mark)) | ||||
| 			goto out_wait; | ||||
| 	} | ||||
| 	if (iter_info->vfsmount_mark) { | ||||
| 		if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) | ||||
| 			goto out_inode; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Now that both marks are pinned by refcount in the inode / vfsmount | ||||
| 	 * lists, we can drop SRCU lock, and safely resume the list iteration | ||||
| 	 * once userspace returns. | ||||
| 	 */ | ||||
| 	srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); | ||||
| 
 | ||||
| 	return true; | ||||
| out_inode: | ||||
| 	if (iter_info->inode_mark) | ||||
| 		fsnotify_put_mark(iter_info->inode_mark); | ||||
| out_wait: | ||||
| 	if (atomic_dec_and_test(&group->user_waits) && group->shutdown) | ||||
| 		wake_up(&group->notification_waitq); | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) | ||||
| { | ||||
| 	struct fsnotify_group *group = NULL; | ||||
| 
 | ||||
| 	iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); | ||||
| 	if (iter_info->inode_mark) { | ||||
| 		group = iter_info->inode_mark->group; | ||||
| 		fsnotify_put_mark(iter_info->inode_mark); | ||||
| 	} | ||||
| 	if (iter_info->vfsmount_mark) { | ||||
| 		group = iter_info->vfsmount_mark->group; | ||||
| 		fsnotify_put_mark(iter_info->vfsmount_mark); | ||||
| 	} | ||||
| 	/*
 | ||||
| 	 * We abuse notification_waitq on group shutdown for waiting for all | ||||
| 	 * marks pinned when waiting for userspace. | ||||
| 	 */ | ||||
| 	if (atomic_dec_and_test(&group->user_waits) && group->shutdown) | ||||
| 		wake_up(&group->notification_waitq); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Mark mark as detached, remove it from group list. Mark still stays in object | ||||
|  * list until its last reference is dropped. Note that we rely on mark being | ||||
| @ -647,6 +723,12 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) | ||||
| 		fsnotify_free_mark(mark); | ||||
| 		fsnotify_put_mark(mark); | ||||
| 	} | ||||
| 	/*
 | ||||
| 	 * Some marks can still be pinned when waiting for response from | ||||
| 	 * userspace. Wait for those now. fsnotify_prepare_user_wait() will | ||||
| 	 * not succeed now so this wait is race-free. | ||||
| 	 */ | ||||
| 	wait_event(group->notification_waitq, !atomic_read(&group->user_waits)); | ||||
| } | ||||
| 
 | ||||
| /* Destroy all marks attached to inode / vfsmount */ | ||||
|  | ||||
| @ -80,6 +80,7 @@ struct fsnotify_event; | ||||
| struct fsnotify_mark; | ||||
| struct fsnotify_event_private_data; | ||||
| struct fsnotify_fname; | ||||
| struct fsnotify_iter_info; | ||||
| 
 | ||||
| /*
 | ||||
|  * Each group much define these ops.  The fsnotify infrastructure will call | ||||
| @ -163,6 +164,8 @@ struct fsnotify_group { | ||||
| 	struct fsnotify_event *overflow_event;	/* Event we queue when the
 | ||||
| 						 * notification list is too | ||||
| 						 * full */ | ||||
| 	atomic_t user_waits;		/* Number of tasks waiting for user
 | ||||
| 					 * response */ | ||||
| 
 | ||||
| 	/* groups can define private fields here or use the void *private */ | ||||
| 	union { | ||||
| @ -368,6 +371,8 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un | ||||
| extern void fsnotify_get_mark(struct fsnotify_mark *mark); | ||||
| extern void fsnotify_put_mark(struct fsnotify_mark *mark); | ||||
| extern void fsnotify_unmount_inodes(struct super_block *sb); | ||||
| extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); | ||||
| extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); | ||||
| 
 | ||||
| /* put here because inotify does some weird stuff when destroying watches */ | ||||
| extern void fsnotify_init_event(struct fsnotify_event *event, | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user