linux/fs/notify/group.c
Eric Paris 90586523eb fsnotify: unified filesystem notification backend
fsnotify is a backend for filesystem notification.  fsnotify does
not provide any userspace interface but does provide the basis
needed for other notification schemes such as dnotify.  fsnotify
can be extended to be the backend for inotify or the upcoming
fanotify.  fsnotify provides a mechanism for "groups" to register for
some set of filesystem events and to then deliver those events to
those groups for processing.

fsnotify has a number of benefits, the first being actually shrinking the size
of an inode.  Before fsnotify to support both dnotify and inotify an inode had

        unsigned long           i_dnotify_mask; /* Directory notify events */
        struct dnotify_struct   *i_dnotify; /* for directory notifications */
        struct list_head        inotify_watches; /* watches on this inode */
        struct mutex            inotify_mutex;  /* protects the watches list

But with fsnotify this same functionallity (and more) is done with just

        __u32                   i_fsnotify_mask; /* all events for this inode */
        struct hlist_head       i_fsnotify_mark_entries; /* marks on this inode */

That's right, inotify, dnotify, and fanotify all in 64 bits.  We used that
much space just in inotify_watches alone, before this patch set.

fsnotify object lifetime and locking is MUCH better than what we have today.
inotify locking is incredibly complex.  See 8f7b0ba1c8 as an example of
what's been busted since inception.  inotify needs to know internal semantics
of superblock destruction and unmounting to function.  The inode pinning and
vfs contortions are horrible.

no fsnotify implementers do allocation under locks.  This means things like
f04b30de3 which (due to an overabundance of caution) changes GFP_KERNEL to
GFP_NOFS can be reverted.  There are no longer any allocation rules when using
or implementing your own fsnotify listener.

fsnotify paves the way for fanotify.  In brief fanotify is a notification
mechanism that delivers the lisener both an 'event' and an open file descriptor
to the object in question.  This means that fanotify is pathname agnostic.
Some on lkml may not care for the original companies or users that pushed for
TALPA, but fanotify was designed with flexibility and input for other users in
mind.  The readahead group expressed interest in fanotify as it could be used
to profile disk access on boot without breaking the audit system.  The desktop
search groups have also expressed interest in fanotify as it solves a number
of the race conditions and problems present with managing inotify when more
than a limited number of specific files are of interest.  fanotify can provide
for a userspace access control system which makes it a clean interface for AV
vendors to hook without trying to do binary patching on the syscall table,
LSM, and everywhere else they do their things today.  With this patch series
fanotify can be implemented in less than 1200 lines of easy to review code.
Almost all of which is the socket based user interface.

This patch series builds fsnotify to the point that it can implement
dnotify and inotify_user.  Patches exist and will be sent soon after
acceptance to finish the in kernel inotify conversion (audit) and implement
fanotify.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
2009-06-11 14:57:52 -04:00

199 lines
5.5 KiB
C

/*
* Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/srcu.h>
#include <linux/rculist.h>
#include <linux/wait.h>
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
#include <asm/atomic.h>
/* protects writes to fsnotify_groups and fsnotify_mask */
static DEFINE_MUTEX(fsnotify_grp_mutex);
/* protects reads while running the fsnotify_groups list */
struct srcu_struct fsnotify_grp_srcu;
/* all groups registered to receive filesystem notifications */
LIST_HEAD(fsnotify_groups);
/* bitwise OR of all events (FS_*) interesting to some group on this system */
__u32 fsnotify_mask;
/*
* When a new group registers or changes it's set of interesting events
* this function updates the fsnotify_mask to contain all interesting events
*/
void fsnotify_recalc_global_mask(void)
{
struct fsnotify_group *group;
__u32 mask = 0;
int idx;
idx = srcu_read_lock(&fsnotify_grp_srcu);
list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
mask |= group->mask;
srcu_read_unlock(&fsnotify_grp_srcu, idx);
fsnotify_mask = mask;
}
/*
* Take a reference to a group so things found under the fsnotify_grp_mutex
* can't get freed under us
*/
static void fsnotify_get_group(struct fsnotify_group *group)
{
atomic_inc(&group->refcnt);
}
/*
* Final freeing of a group
*/
static void fsnotify_destroy_group(struct fsnotify_group *group)
{
if (group->ops->free_group_priv)
group->ops->free_group_priv(group);
kfree(group);
}
/*
* Remove this group from the global list of groups that will get events
* this can be done even if there are still references and things still using
* this group. This just stops the group from getting new events.
*/
static void __fsnotify_evict_group(struct fsnotify_group *group)
{
BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
if (group->on_group_list)
list_del_rcu(&group->group_list);
group->on_group_list = 0;
}
/*
* Called when a group is no longer interested in getting events. This can be
* used if a group is misbehaving or if for some reason a group should no longer
* get any filesystem events.
*/
void fsnotify_evict_group(struct fsnotify_group *group)
{
mutex_lock(&fsnotify_grp_mutex);
__fsnotify_evict_group(group);
mutex_unlock(&fsnotify_grp_mutex);
}
/*
* Drop a reference to a group. Free it if it's through.
*/
void fsnotify_put_group(struct fsnotify_group *group)
{
if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
return;
/*
* OK, now we know that there's no other users *and* we hold mutex,
* so no new references will appear
*/
__fsnotify_evict_group(group);
/*
* now it's off the list, so the only thing we might care about is
* srcu access....
*/
mutex_unlock(&fsnotify_grp_mutex);
synchronize_srcu(&fsnotify_grp_srcu);
/* and now it is really dead. _Nothing_ could be seeing it */
fsnotify_recalc_global_mask();
fsnotify_destroy_group(group);
}
/*
* Simply run the fsnotify_groups list and find a group which matches
* the given parameters. If a group is found we take a reference to that
* group.
*/
static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
const struct fsnotify_ops *ops)
{
struct fsnotify_group *group_iter;
struct fsnotify_group *group = NULL;
BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
if (group_iter->group_num == group_num) {
if ((group_iter->mask == mask) &&
(group_iter->ops == ops)) {
fsnotify_get_group(group_iter);
group = group_iter;
} else
group = ERR_PTR(-EEXIST);
}
}
return group;
}
/*
* Either finds an existing group which matches the group_num, mask, and ops or
* creates a new group and adds it to the global group list. In either case we
* take a reference for the group returned.
*/
struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
const struct fsnotify_ops *ops)
{
struct fsnotify_group *group, *tgroup;
/* very low use, simpler locking if we just always alloc */
group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
if (!group)
return ERR_PTR(-ENOMEM);
atomic_set(&group->refcnt, 1);
group->on_group_list = 0;
group->group_num = group_num;
group->mask = mask;
group->ops = ops;
mutex_lock(&fsnotify_grp_mutex);
tgroup = fsnotify_find_group(group_num, mask, ops);
if (tgroup) {
/* group already exists */
mutex_unlock(&fsnotify_grp_mutex);
/* destroy the new one we made */
fsnotify_put_group(group);
return tgroup;
}
/* group not found, add a new one */
list_add_rcu(&group->group_list, &fsnotify_groups);
group->on_group_list = 1;
mutex_unlock(&fsnotify_grp_mutex);
if (mask)
fsnotify_recalc_global_mask();
return group;
}