linux/fs/notify/group.c
Lizhi Xu cad3f4a22c inotify: Fix possible deadlock in fsnotify_destroy_mark
[Syzbot reported]
WARNING: possible circular locking dependency detected
6.11.0-rc4-syzkaller-00019-gb311c1b497e5 #0 Not tainted
------------------------------------------------------
kswapd0/78 is trying to acquire lock:
ffff88801b8d8930 (&group->mark_mutex){+.+.}-{3:3}, at: fsnotify_group_lock include/linux/fsnotify_backend.h:270 [inline]
ffff88801b8d8930 (&group->mark_mutex){+.+.}-{3:3}, at: fsnotify_destroy_mark+0x38/0x3c0 fs/notify/mark.c:578

but task is already holding lock:
ffffffff8ea2fd60 (fs_reclaim){+.+.}-{0:0}, at: balance_pgdat mm/vmscan.c:6841 [inline]
ffffffff8ea2fd60 (fs_reclaim){+.+.}-{0:0}, at: kswapd+0xbb4/0x35a0 mm/vmscan.c:7223

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (fs_reclaim){+.+.}-{0:0}:
       ...
       kmem_cache_alloc_noprof+0x3d/0x2a0 mm/slub.c:4044
       inotify_new_watch fs/notify/inotify/inotify_user.c:599 [inline]
       inotify_update_watch fs/notify/inotify/inotify_user.c:647 [inline]
       __do_sys_inotify_add_watch fs/notify/inotify/inotify_user.c:786 [inline]
       __se_sys_inotify_add_watch+0x72e/0x1070 fs/notify/inotify/inotify_user.c:729
       do_syscall_x64 arch/x86/entry/common.c:52 [inline]
       do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83
       entry_SYSCALL_64_after_hwframe+0x77/0x7f

-> #0 (&group->mark_mutex){+.+.}-{3:3}:
       ...
       __mutex_lock+0x136/0xd70 kernel/locking/mutex.c:752
       fsnotify_group_lock include/linux/fsnotify_backend.h:270 [inline]
       fsnotify_destroy_mark+0x38/0x3c0 fs/notify/mark.c:578
       fsnotify_destroy_marks+0x14a/0x660 fs/notify/mark.c:934
       fsnotify_inoderemove include/linux/fsnotify.h:264 [inline]
       dentry_unlink_inode+0x2e0/0x430 fs/dcache.c:403
       __dentry_kill+0x20d/0x630 fs/dcache.c:610
       shrink_kill+0xa9/0x2c0 fs/dcache.c:1055
       shrink_dentry_list+0x2c0/0x5b0 fs/dcache.c:1082
       prune_dcache_sb+0x10f/0x180 fs/dcache.c:1163
       super_cache_scan+0x34f/0x4b0 fs/super.c:221
       do_shrink_slab+0x701/0x1160 mm/shrinker.c:435
       shrink_slab+0x1093/0x14d0 mm/shrinker.c:662
       shrink_one+0x43b/0x850 mm/vmscan.c:4815
       shrink_many mm/vmscan.c:4876 [inline]
       lru_gen_shrink_node mm/vmscan.c:4954 [inline]
       shrink_node+0x3799/0x3de0 mm/vmscan.c:5934
       kswapd_shrink_node mm/vmscan.c:6762 [inline]
       balance_pgdat mm/vmscan.c:6954 [inline]
       kswapd+0x1bcd/0x35a0 mm/vmscan.c:7223

[Analysis]
The problem is that inotify_new_watch() is using GFP_KERNEL to allocate
new watches under group->mark_mutex, however if dentry reclaim races
with unlinking of an inode, it can end up dropping the last dentry reference
for an unlinked inode resulting in removal of fsnotify mark from reclaim
context which wants to acquire group->mark_mutex as well.

This scenario shows that all notification groups are in principle prone
to this kind of a deadlock (previously, we considered only fanotify and
dnotify to be problematic for other reasons) so make sure all
allocations under group->mark_mutex happen with GFP_NOFS.

Reported-and-tested-by: syzbot+c679f13773f295d2da53@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=c679f13773f295d2da53
Signed-off-by: Lizhi Xu <lizhi.xu@windriver.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20240927143642.2369508-1-lizhi.xu@windriver.com
2024-10-02 15:14:29 +02:00

161 lines
4.3 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*/
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/srcu.h>
#include <linux/rculist.h>
#include <linux/wait.h>
#include <linux/memcontrol.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
#include <linux/atomic.h>
/*
* Final freeing of a group
*/
static void fsnotify_final_destroy_group(struct fsnotify_group *group)
{
if (group->ops->free_group_priv)
group->ops->free_group_priv(group);
mem_cgroup_put(group->memcg);
mutex_destroy(&group->mark_mutex);
kfree(group);
}
/*
* Stop queueing new events for this group. Once this function returns
* fsnotify_add_event() will not add any new events to the group's queue.
*/
void fsnotify_group_stop_queueing(struct fsnotify_group *group)
{
spin_lock(&group->notification_lock);
group->shutdown = true;
spin_unlock(&group->notification_lock);
}
/*
* Trying to get rid of a group. Remove all marks, flush all events and release
* the group reference.
* Note that another thread calling fsnotify_clear_marks_by_group() may still
* hold a ref to the group.
*/
void fsnotify_destroy_group(struct fsnotify_group *group)
{
/*
* Stop queueing new events. The code below is careful enough to not
* require this but fanotify needs to stop queuing events even before
* fsnotify_destroy_group() is called and this makes the other callers
* of fsnotify_destroy_group() to see the same behavior.
*/
fsnotify_group_stop_queueing(group);
/* Clear all marks for this group and queue them for destruction */
fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_ANY);
/*
* Some marks can still be pinned when waiting for response from
* userspace. Wait for those now. fsnotify_prepare_user_wait() will
* not succeed now so this wait is race-free.
*/
wait_event(group->notification_waitq, !atomic_read(&group->user_waits));
/*
* Wait until all marks get really destroyed. We could actually destroy
* them ourselves instead of waiting for worker to do it, however that
* would be racy as worker can already be processing some marks before
* we even entered fsnotify_destroy_group().
*/
fsnotify_wait_marks_destroyed();
/*
* Since we have waited for fsnotify_mark_srcu in
* fsnotify_mark_destroy_list() there can be no outstanding event
* notification against this group. So clearing the notification queue
* of all events is reliable now.
*/
fsnotify_flush_notify(group);
/*
* Destroy overflow event (we cannot use fsnotify_destroy_event() as
* that deliberately ignores overflow events.
*/
if (group->overflow_event)
group->ops->free_event(group, group->overflow_event);
fsnotify_put_group(group);
}
/*
* Get reference to a group.
*/
void fsnotify_get_group(struct fsnotify_group *group)
{
refcount_inc(&group->refcnt);
}
/*
* Drop a reference to a group. Free it if it's through.
*/
void fsnotify_put_group(struct fsnotify_group *group)
{
if (refcount_dec_and_test(&group->refcnt))
fsnotify_final_destroy_group(group);
}
EXPORT_SYMBOL_GPL(fsnotify_put_group);
static struct fsnotify_group *__fsnotify_alloc_group(
const struct fsnotify_ops *ops,
int flags, gfp_t gfp)
{
struct fsnotify_group *group;
group = kzalloc(sizeof(struct fsnotify_group), gfp);
if (!group)
return ERR_PTR(-ENOMEM);
/* set to 0 when there a no external references to this group */
refcount_set(&group->refcnt, 1);
atomic_set(&group->user_waits, 0);
spin_lock_init(&group->notification_lock);
INIT_LIST_HEAD(&group->notification_list);
init_waitqueue_head(&group->notification_waitq);
group->max_events = UINT_MAX;
mutex_init(&group->mark_mutex);
INIT_LIST_HEAD(&group->marks_list);
group->ops = ops;
group->flags = flags;
return group;
}
/*
* Create a new fsnotify_group and hold a reference for the group returned.
*/
struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops,
int flags)
{
gfp_t gfp = (flags & FSNOTIFY_GROUP_USER) ? GFP_KERNEL_ACCOUNT :
GFP_KERNEL;
return __fsnotify_alloc_group(ops, flags, gfp);
}
EXPORT_SYMBOL_GPL(fsnotify_alloc_group);
int fsnotify_fasync(int fd, struct file *file, int on)
{
struct fsnotify_group *group = file->private_data;
return fasync_helper(fd, file, on, &group->fsn_fa) >= 0 ? 0 : -EIO;
}