linux/fs/sysfs/inode.c

318 lines
7.8 KiB
C
Raw Normal View History

/*
* inode.c - basic inode and dentry operations.
*
* sysfs is Copyright (c) 2001-3 Patrick Mochel
*
* Please see Documentation/filesystems/sysfs.txt for more information.
*/
#undef DEBUG
#include <linux/pagemap.h>
#include <linux/namei.h>
#include <linux/backing-dev.h>
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <asm/semaphore.h>
#include "sysfs.h"
extern struct super_block * sysfs_sb;
static const struct address_space_operations sysfs_aops = {
.readpage = simple_readpage,
.prepare_write = simple_prepare_write,
.commit_write = simple_commit_write
};
static struct backing_dev_info sysfs_backing_dev_info = {
.ra_pages = 0, /* No readahead */
.capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
};
static const struct inode_operations sysfs_inode_operations ={
.setattr = sysfs_setattr,
};
void sysfs_delete_inode(struct inode *inode)
{
/* Free the shadowed directory inode operations */
if (sysfs_is_shadowed_inode(inode)) {
kfree(inode->i_op);
inode->i_op = NULL;
}
return generic_delete_inode(inode);
}
int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
{
struct inode * inode = dentry->d_inode;
struct sysfs_dirent * sd = dentry->d_fsdata;
struct iattr * sd_iattr;
unsigned int ia_valid = iattr->ia_valid;
int error;
if (!sd)
return -EINVAL;
sd_iattr = sd->s_iattr;
error = inode_change_ok(inode, iattr);
if (error)
return error;
error = inode_setattr(inode, iattr);
if (error)
return error;
if (!sd_iattr) {
/* setting attributes for the first time, allocate now */
sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
if (!sd_iattr)
return -ENOMEM;
/* assign default attributes */
sd_iattr->ia_mode = sd->s_mode;
sd_iattr->ia_uid = 0;
sd_iattr->ia_gid = 0;
sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
sd->s_iattr = sd_iattr;
}
/* attributes were changed atleast once in past */
if (ia_valid & ATTR_UID)
sd_iattr->ia_uid = iattr->ia_uid;
if (ia_valid & ATTR_GID)
sd_iattr->ia_gid = iattr->ia_gid;
if (ia_valid & ATTR_ATIME)
sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime,
inode->i_sb->s_time_gran);
if (ia_valid & ATTR_MTIME)
sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime,
inode->i_sb->s_time_gran);
if (ia_valid & ATTR_CTIME)
sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime,
inode->i_sb->s_time_gran);
if (ia_valid & ATTR_MODE) {
umode_t mode = iattr->ia_mode;
if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
mode &= ~S_ISGID;
sd_iattr->ia_mode = sd->s_mode = mode;
}
return error;
}
static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
{
inode->i_mode = mode;
inode->i_uid = 0;
inode->i_gid = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
}
static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
{
inode->i_mode = iattr->ia_mode;
inode->i_uid = iattr->ia_uid;
inode->i_gid = iattr->ia_gid;
inode->i_atime = iattr->ia_atime;
inode->i_mtime = iattr->ia_mtime;
inode->i_ctime = iattr->ia_ctime;
}
/*
* sysfs has a different i_mutex lock order behavior for i_mutex than other
* filesystems; sysfs i_mutex is called in many places with subsystem locks
* held. At the same time, many of the VFS locking rules do not apply to
* sysfs at all (cross directory rename for example). To untangle this mess
* (which gives false positives in lockdep), we're giving sysfs inodes their
* own class for i_mutex.
*/
static struct lock_class_key sysfs_inode_imutex_key;
void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
{
inode->i_blocks = 0;
inode->i_mapping->a_ops = &sysfs_aops;
inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
inode->i_op = &sysfs_inode_operations;
inode->i_ino = sd->s_ino;
lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
if (sd->s_iattr) {
/* sysfs_dirent has non-default attributes
* get them for the new inode from persistent copy
* in sysfs_dirent
*/
set_inode_attr(inode, sd->s_iattr);
} else
set_default_inode_attr(inode, sd->s_mode);
}
/**
* sysfs_get_inode - get inode for sysfs_dirent
* @sd: sysfs_dirent to allocate inode for
*
* Get inode for @sd. If such inode doesn't exist, a new inode
* is allocated and basics are initialized. New inode is
* returned locked.
*
* LOCKING:
* Kernel thread context (may sleep).
*
* RETURNS:
* Pointer to allocated inode on success, NULL on failure.
*/
struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
{
struct inode *inode;
inode = iget_locked(sysfs_sb, sd->s_ino);
if (inode && (inode->i_state & I_NEW))
sysfs_init_inode(sd, inode);
return inode;
}
/**
* sysfs_instantiate - instantiate dentry
* @dentry: dentry to be instantiated
* @inode: inode associated with @sd
*
* Unlock @inode if locked and instantiate @dentry with @inode.
*
* LOCKING:
* None.
*/
void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
{
BUG_ON(!dentry || dentry->d_inode);
if (inode->i_state & I_NEW) {
unlock_new_inode(inode);
if (dentry->d_parent && dentry->d_parent->d_inode) {
struct inode *p_inode = dentry->d_parent->d_inode;
p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
}
}
d_instantiate(dentry, inode);
}
/**
* sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
* @sd: target sysfs_dirent
*
* Drop dentry for @sd. @sd must have been unlinked from its
* parent on entry to this function such that it can't be looked
* up anymore.
*
* @sd->s_dentry which is protected with sysfs_lock points to the
* currently associated dentry but we're not holding a reference
* to it and racing with dput(). Grab dcache_lock and verify
* dentry before dropping it. If @sd->s_dentry is NULL or dput()
* beats us, no need to bother.
*/
void sysfs_drop_dentry(struct sysfs_dirent *sd)
{
struct dentry *dentry = NULL;
struct timespec curtime;
struct inode *inode;
/* We're not holding a reference to ->s_dentry dentry but the
* field will stay valid as long as sysfs_lock is held.
*/
spin_lock(&sysfs_lock);
spin_lock(&dcache_lock);
/* drop dentry if it's there and dput() didn't kill it yet */
if (sd->s_dentry && sd->s_dentry->d_inode) {
dentry = dget_locked(sd->s_dentry);
spin_lock(&dentry->d_lock);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
}
spin_unlock(&dcache_lock);
spin_unlock(&sysfs_lock);
dput(dentry);
/* XXX: unpin if directory, this will go away soon */
if (sysfs_type(sd) == SYSFS_DIR)
dput(dentry);
/* adjust nlink and update timestamp */
curtime = CURRENT_TIME;
inode = ilookup(sysfs_sb, sd->s_ino);
if (inode) {
mutex_lock(&inode->i_mutex);
inode->i_ctime = curtime;
drop_nlink(inode);
if (sysfs_type(sd) == SYSFS_DIR)
drop_nlink(inode);
mutex_unlock(&inode->i_mutex);
iput(inode);
}
/* adjust nlink and udpate timestamp of the parent */
inode = ilookup(sysfs_sb, sd->s_parent->s_ino);
if (inode) {
mutex_lock(&inode->i_mutex);
inode->i_ctime = inode->i_mtime = curtime;
if (sysfs_type(sd) == SYSFS_DIR)
drop_nlink(inode);
mutex_unlock(&inode->i_mutex);
iput(inode);
}
}
int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
{
struct dentry *dir;
struct sysfs_dirent **pos, *sd;
int found = 0;
if (!dir_sd)
return -ENOENT;
dir = dir_sd->s_dentry;
if (dir->d_inode == NULL)
/* no inode means this hasn't been made visible yet */
return -ENOENT;
sysfs: suppress lockdep warnings Lockdep issues the following warning: [ 9.064000] ============================================= [ 9.064000] [ INFO: possible recursive locking detected ] [ 9.064000] 2.6.20-rc3-mm1 #3 [ 9.064000] --------------------------------------------- [ 9.064000] init/1 is trying to acquire lock: [ 9.064000] (&sysfs_inode_imutex_key){--..}, at: [<c03e6afc>] mutex_lock+0x1c/0x1f [ 9.064000] [ 9.064000] but task is already holding lock: [ 9.064000] (&sysfs_inode_imutex_key){--..}, at: [<c03e6afc>] mutex_lock+0x1c/0x1f [ 9.065000] [ 9.065000] other info that might help us debug this: [ 9.065000] 2 locks held by init/1: [ 9.065000] #0: (tty_mutex){--..}, at: [<c03e6afc>] mutex_lock+0x1c/0x1f [ 9.065000] #1: (&sysfs_inode_imutex_key){--..}, at: [<c03e6afc>] mutex_lock+0x1c/0x1f [ 9.065000] [ 9.065000] stack backtrace: [ 9.065000] [<c010390d>] show_trace_log_lvl+0x1a/0x30 [ 9.066000] [<c0103935>] show_trace+0x12/0x14 [ 9.066000] [<c0103a2f>] dump_stack+0x16/0x18 [ 9.066000] [<c0138cb8>] print_deadlock_bug+0xb9/0xc3 [ 9.066000] [<c0138d17>] check_deadlock+0x55/0x5a [ 9.066000] [<c013a953>] __lock_acquire+0x371/0xbf0 [ 9.066000] [<c013b7a9>] lock_acquire+0x69/0x83 [ 9.066000] [<c03e6b7e>] __mutex_lock_slowpath+0x75/0x2d1 [ 9.066000] [<c03e6afc>] mutex_lock+0x1c/0x1f [ 9.066000] [<c01b249c>] sysfs_drop_dentry+0xb1/0x133 [ 9.066000] [<c01b25d1>] sysfs_hash_and_remove+0xb3/0x142 [ 9.066000] [<c01b30ed>] sysfs_remove_file+0xd/0x10 [ 9.067000] [<c02849e0>] device_remove_file+0x23/0x2e [ 9.067000] [<c02850b2>] device_del+0x188/0x1e6 [ 9.067000] [<c028511b>] device_unregister+0xb/0x15 [ 9.067000] [<c0285318>] device_destroy+0x9c/0xa9 [ 9.067000] [<c0261431>] vcs_remove_sysfs+0x1c/0x3b [ 9.067000] [<c0267a08>] con_close+0x5e/0x6b [ 9.067000] [<c02598f2>] release_dev+0x4c4/0x6e5 [ 9.067000] [<c0259faa>] tty_release+0x12/0x1c [ 9.067000] [<c0174872>] __fput+0x177/0x1a0 [ 9.067000] [<c01746f5>] fput+0x3b/0x41 [ 9.068000] [<c0172ee1>] filp_close+0x36/0x65 [ 9.068000] [<c0172f73>] sys_close+0x63/0xa4 [ 9.068000] [<c0102a96>] sysenter_past_esp+0x5f/0x99 [ 9.068000] ======================= This is due to sysfs_hash_and_remove() holding dir->d_inode->i_mutex before calling sysfs_drop_dentry() which calls orphan_all_buffers() which in turn takes node->i_mutex. Signed-off-by: Frederik Deweerdt <frederik.deweerdt@gmail.com> Cc: Oliver Neukum <oliver@neukum.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2007-01-05 20:04:33 +00:00
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
sd = *pos;
if (!sysfs_type(sd))
continue;
if (!strcmp(sd->s_name, name)) {
sd->s_flags |= SYSFS_FLAG_REMOVED;
*pos = sd->s_sibling;
sd->s_sibling = NULL;
found = 1;
break;
}
}
mutex_unlock(&dir->d_inode->i_mutex);
sysfs: implement sysfs_dirent active reference and immediate disconnect sysfs: implement sysfs_dirent active reference and immediate disconnect Opening a sysfs node references its associated kobject, so userland can arbitrarily prolong lifetime of a kobject which complicates lifetime rules in drivers. This patch implements active reference and makes the association between kobject and sysfs immediately breakable. Now each sysfs_dirent has two reference counts - s_count and s_active. s_count is a regular reference count which guarantees that the containing sysfs_dirent is accessible. As long as s_count reference is held, all sysfs internal fields in sysfs_dirent are accessible including s_parent and s_name. The newly added s_active is active reference count. This is acquired by invoking sysfs_get_active() and it's the caller's responsibility to ensure sysfs_dirent itself is accessible (should be holding s_count one way or the other). Dereferencing sysfs_dirent to access objects out of sysfs proper requires active reference. This includes access to the associated kobjects, attributes and ops. The active references can be drained and denied by calling sysfs_deactivate(). All active sysfs_dirents must be deactivated after deletion but before the default reference is dropped. This enables immediate disconnect of sysfs nodes. Once a sysfs_dirent is deleted, it won't access any entity external to sysfs proper. Because attr/bin_attr ops access both the node itself and its parent for kobject, they need to hold active references to both. sysfs_get/put_active_two() helpers are provided to help grabbing both references. Parent's is acquired first and released last. Unlike other operations, mmapped area lingers on after mmap() is finished and the module implement implementing it and kobj need to stay referenced till all the mapped pages are gone. This is accomplished by holding one set of active references to the bin_attr and its parent if there have been any mmap during lifetime of an openfile. The references are dropped when the openfile is released. This change makes sysfs lifetime rules independent from both kobject's and module's. It not only fixes several race conditions caused by sysfs not holding onto the proper module when referencing kobject, but also helps fixing and simplifying lifetime management in driver model and drivers by taking sysfs out of the equation. Please read the following message for more info. http://article.gmane.org/gmane.linux.kernel/510293 Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2007-06-13 18:45:16 +00:00
if (!found)
return -ENOENT;
sysfs_drop_dentry(sd);
sysfs: implement sysfs_dirent active reference and immediate disconnect sysfs: implement sysfs_dirent active reference and immediate disconnect Opening a sysfs node references its associated kobject, so userland can arbitrarily prolong lifetime of a kobject which complicates lifetime rules in drivers. This patch implements active reference and makes the association between kobject and sysfs immediately breakable. Now each sysfs_dirent has two reference counts - s_count and s_active. s_count is a regular reference count which guarantees that the containing sysfs_dirent is accessible. As long as s_count reference is held, all sysfs internal fields in sysfs_dirent are accessible including s_parent and s_name. The newly added s_active is active reference count. This is acquired by invoking sysfs_get_active() and it's the caller's responsibility to ensure sysfs_dirent itself is accessible (should be holding s_count one way or the other). Dereferencing sysfs_dirent to access objects out of sysfs proper requires active reference. This includes access to the associated kobjects, attributes and ops. The active references can be drained and denied by calling sysfs_deactivate(). All active sysfs_dirents must be deactivated after deletion but before the default reference is dropped. This enables immediate disconnect of sysfs nodes. Once a sysfs_dirent is deleted, it won't access any entity external to sysfs proper. Because attr/bin_attr ops access both the node itself and its parent for kobject, they need to hold active references to both. sysfs_get/put_active_two() helpers are provided to help grabbing both references. Parent's is acquired first and released last. Unlike other operations, mmapped area lingers on after mmap() is finished and the module implement implementing it and kobj need to stay referenced till all the mapped pages are gone. This is accomplished by holding one set of active references to the bin_attr and its parent if there have been any mmap during lifetime of an openfile. The references are dropped when the openfile is released. This change makes sysfs lifetime rules independent from both kobject's and module's. It not only fixes several race conditions caused by sysfs not holding onto the proper module when referencing kobject, but also helps fixing and simplifying lifetime management in driver model and drivers by taking sysfs out of the equation. Please read the following message for more info. http://article.gmane.org/gmane.linux.kernel/510293 Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2007-06-13 18:45:16 +00:00
sysfs_deactivate(sd);
sysfs_put(sd);
sysfs: implement sysfs_dirent active reference and immediate disconnect sysfs: implement sysfs_dirent active reference and immediate disconnect Opening a sysfs node references its associated kobject, so userland can arbitrarily prolong lifetime of a kobject which complicates lifetime rules in drivers. This patch implements active reference and makes the association between kobject and sysfs immediately breakable. Now each sysfs_dirent has two reference counts - s_count and s_active. s_count is a regular reference count which guarantees that the containing sysfs_dirent is accessible. As long as s_count reference is held, all sysfs internal fields in sysfs_dirent are accessible including s_parent and s_name. The newly added s_active is active reference count. This is acquired by invoking sysfs_get_active() and it's the caller's responsibility to ensure sysfs_dirent itself is accessible (should be holding s_count one way or the other). Dereferencing sysfs_dirent to access objects out of sysfs proper requires active reference. This includes access to the associated kobjects, attributes and ops. The active references can be drained and denied by calling sysfs_deactivate(). All active sysfs_dirents must be deactivated after deletion but before the default reference is dropped. This enables immediate disconnect of sysfs nodes. Once a sysfs_dirent is deleted, it won't access any entity external to sysfs proper. Because attr/bin_attr ops access both the node itself and its parent for kobject, they need to hold active references to both. sysfs_get/put_active_two() helpers are provided to help grabbing both references. Parent's is acquired first and released last. Unlike other operations, mmapped area lingers on after mmap() is finished and the module implement implementing it and kobj need to stay referenced till all the mapped pages are gone. This is accomplished by holding one set of active references to the bin_attr and its parent if there have been any mmap during lifetime of an openfile. The references are dropped when the openfile is released. This change makes sysfs lifetime rules independent from both kobject's and module's. It not only fixes several race conditions caused by sysfs not holding onto the proper module when referencing kobject, but also helps fixing and simplifying lifetime management in driver model and drivers by taking sysfs out of the equation. Please read the following message for more info. http://article.gmane.org/gmane.linux.kernel/510293 Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2007-06-13 18:45:16 +00:00
return 0;
}