mirror of
https://github.com/torvalds/linux.git
synced 2024-11-05 11:32:04 +00:00
fa0d7e3de6
RCU free the struct inode. This will allow: - Subsequent store-free path walking patch. The inode must be consulted for permissions when walking, so an RCU inode reference is a must. - sb_inode_list_lock to be moved inside i_lock because sb list walkers who want to take i_lock no longer need to take sb_inode_list_lock to walk the list in the first place. This will simplify and optimize locking. - Could remove some nested trylock loops in dcache code - Could potentially simplify things a bit in VM land. Do not need to take the page lock to follow page->mapping. The downsides of this is the performance cost of using RCU. In a simple creat/unlink microbenchmark, performance drops by about 10% due to inability to reuse cache-hot slab objects. As iterations increase and RCU freeing starts kicking over, this increases to about 20%. In cases where inode lifetimes are longer (ie. many inodes may be allocated during the average life span of a single inode), a lot of this cache reuse is not applicable, so the regression caused by this patch is smaller. The cache-hot regression could largely be avoided by using SLAB_DESTROY_BY_RCU, however this adds some complexity to list walking and store-free path walking, so I prefer to implement this at a later date, if it is shown to be a win in real situations. I haven't found a regression in any non-micro benchmark so I doubt it will be a problem. Signed-off-by: Nick Piggin <npiggin@kernel.dk>
362 lines
9.0 KiB
C
362 lines
9.0 KiB
C
/*
|
|
* Copyright (c) 2000-2001 Christoph Hellwig.
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions, and the following disclaimer,
|
|
* without modification.
|
|
* 2. The name of the author may not be used to endorse or promote products
|
|
* derived from this software without specific prior written permission.
|
|
*
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
* GNU General Public License ("GPL").
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
|
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* Veritas filesystem driver - inode routines.
|
|
*/
|
|
#include <linux/fs.h>
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/slab.h>
|
|
|
|
#include "vxfs.h"
|
|
#include "vxfs_inode.h"
|
|
#include "vxfs_extern.h"
|
|
|
|
|
|
struct kmem_cache *vxfs_inode_cachep;
|
|
|
|
|
|
#ifdef DIAGNOSTIC
|
|
/*
|
|
* Dump inode contents (partially).
|
|
*/
|
|
void
|
|
vxfs_dumpi(struct vxfs_inode_info *vip, ino_t ino)
|
|
{
|
|
printk(KERN_DEBUG "\n\n");
|
|
if (ino)
|
|
printk(KERN_DEBUG "dumping vxfs inode %ld\n", ino);
|
|
else
|
|
printk(KERN_DEBUG "dumping unknown vxfs inode\n");
|
|
|
|
printk(KERN_DEBUG "---------------------------\n");
|
|
printk(KERN_DEBUG "mode is %x\n", vip->vii_mode);
|
|
printk(KERN_DEBUG "nlink:%u, uid:%u, gid:%u\n",
|
|
vip->vii_nlink, vip->vii_uid, vip->vii_gid);
|
|
printk(KERN_DEBUG "size:%Lx, blocks:%u\n",
|
|
vip->vii_size, vip->vii_blocks);
|
|
printk(KERN_DEBUG "orgtype:%u\n", vip->vii_orgtype);
|
|
}
|
|
#endif
|
|
|
|
|
|
/**
|
|
* vxfs_blkiget - find inode based on extent #
|
|
* @sbp: superblock of the filesystem we search in
|
|
* @extent: number of the extent to search
|
|
* @ino: inode number to search
|
|
*
|
|
* Description:
|
|
* vxfs_blkiget searches inode @ino in the filesystem described by
|
|
* @sbp in the extent @extent.
|
|
* Returns the matching VxFS inode on success, else a NULL pointer.
|
|
*
|
|
* NOTE:
|
|
* While __vxfs_iget uses the pagecache vxfs_blkiget uses the
|
|
* buffercache. This function should not be used outside the
|
|
* read_super() method, otherwise the data may be incoherent.
|
|
*/
|
|
struct vxfs_inode_info *
|
|
vxfs_blkiget(struct super_block *sbp, u_long extent, ino_t ino)
|
|
{
|
|
struct buffer_head *bp;
|
|
u_long block, offset;
|
|
|
|
block = extent + ((ino * VXFS_ISIZE) / sbp->s_blocksize);
|
|
offset = ((ino % (sbp->s_blocksize / VXFS_ISIZE)) * VXFS_ISIZE);
|
|
bp = sb_bread(sbp, block);
|
|
|
|
if (bp && buffer_mapped(bp)) {
|
|
struct vxfs_inode_info *vip;
|
|
struct vxfs_dinode *dip;
|
|
|
|
if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL)))
|
|
goto fail;
|
|
dip = (struct vxfs_dinode *)(bp->b_data + offset);
|
|
memcpy(vip, dip, sizeof(*vip));
|
|
#ifdef DIAGNOSTIC
|
|
vxfs_dumpi(vip, ino);
|
|
#endif
|
|
brelse(bp);
|
|
return (vip);
|
|
}
|
|
|
|
fail:
|
|
printk(KERN_WARNING "vxfs: unable to read block %ld\n", block);
|
|
brelse(bp);
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* __vxfs_iget - generic find inode facility
|
|
* @sbp: VFS superblock
|
|
* @ino: inode number
|
|
* @ilistp: inode list
|
|
*
|
|
* Description:
|
|
* Search the for inode number @ino in the filesystem
|
|
* described by @sbp. Use the specified inode table (@ilistp).
|
|
* Returns the matching VxFS inode on success, else an error code.
|
|
*/
|
|
static struct vxfs_inode_info *
|
|
__vxfs_iget(ino_t ino, struct inode *ilistp)
|
|
{
|
|
struct page *pp;
|
|
u_long offset;
|
|
|
|
offset = (ino % (PAGE_SIZE / VXFS_ISIZE)) * VXFS_ISIZE;
|
|
pp = vxfs_get_page(ilistp->i_mapping, ino * VXFS_ISIZE / PAGE_SIZE);
|
|
|
|
if (!IS_ERR(pp)) {
|
|
struct vxfs_inode_info *vip;
|
|
struct vxfs_dinode *dip;
|
|
caddr_t kaddr = (char *)page_address(pp);
|
|
|
|
if (!(vip = kmem_cache_alloc(vxfs_inode_cachep, GFP_KERNEL)))
|
|
goto fail;
|
|
dip = (struct vxfs_dinode *)(kaddr + offset);
|
|
memcpy(vip, dip, sizeof(*vip));
|
|
#ifdef DIAGNOSTIC
|
|
vxfs_dumpi(vip, ino);
|
|
#endif
|
|
vxfs_put_page(pp);
|
|
return (vip);
|
|
}
|
|
|
|
printk(KERN_WARNING "vxfs: error on page %p\n", pp);
|
|
return ERR_CAST(pp);
|
|
|
|
fail:
|
|
printk(KERN_WARNING "vxfs: unable to read inode %ld\n", (unsigned long)ino);
|
|
vxfs_put_page(pp);
|
|
return ERR_PTR(-ENOMEM);
|
|
}
|
|
|
|
/**
|
|
* vxfs_stiget - find inode using the structural inode list
|
|
* @sbp: VFS superblock
|
|
* @ino: inode #
|
|
*
|
|
* Description:
|
|
* Find inode @ino in the filesystem described by @sbp using
|
|
* the structural inode list.
|
|
* Returns the matching VxFS inode on success, else a NULL pointer.
|
|
*/
|
|
struct vxfs_inode_info *
|
|
vxfs_stiget(struct super_block *sbp, ino_t ino)
|
|
{
|
|
struct vxfs_inode_info *vip;
|
|
|
|
vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_stilist);
|
|
return IS_ERR(vip) ? NULL : vip;
|
|
}
|
|
|
|
/**
|
|
* vxfs_transmod - mode for a VxFS inode
|
|
* @vip: VxFS inode
|
|
*
|
|
* Description:
|
|
* vxfs_transmod returns a Linux mode_t for a given
|
|
* VxFS inode structure.
|
|
*/
|
|
static __inline__ mode_t
|
|
vxfs_transmod(struct vxfs_inode_info *vip)
|
|
{
|
|
mode_t ret = vip->vii_mode & ~VXFS_TYPE_MASK;
|
|
|
|
if (VXFS_ISFIFO(vip))
|
|
ret |= S_IFIFO;
|
|
if (VXFS_ISCHR(vip))
|
|
ret |= S_IFCHR;
|
|
if (VXFS_ISDIR(vip))
|
|
ret |= S_IFDIR;
|
|
if (VXFS_ISBLK(vip))
|
|
ret |= S_IFBLK;
|
|
if (VXFS_ISLNK(vip))
|
|
ret |= S_IFLNK;
|
|
if (VXFS_ISREG(vip))
|
|
ret |= S_IFREG;
|
|
if (VXFS_ISSOC(vip))
|
|
ret |= S_IFSOCK;
|
|
|
|
return (ret);
|
|
}
|
|
|
|
/**
|
|
* vxfs_iinit- helper to fill inode fields
|
|
* @ip: VFS inode
|
|
* @vip: VxFS inode
|
|
*
|
|
* Description:
|
|
* vxfs_instino is a helper function to fill in all relevant
|
|
* fields in @ip from @vip.
|
|
*/
|
|
static void
|
|
vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
|
|
{
|
|
|
|
ip->i_mode = vxfs_transmod(vip);
|
|
ip->i_uid = (uid_t)vip->vii_uid;
|
|
ip->i_gid = (gid_t)vip->vii_gid;
|
|
|
|
ip->i_nlink = vip->vii_nlink;
|
|
ip->i_size = vip->vii_size;
|
|
|
|
ip->i_atime.tv_sec = vip->vii_atime;
|
|
ip->i_ctime.tv_sec = vip->vii_ctime;
|
|
ip->i_mtime.tv_sec = vip->vii_mtime;
|
|
ip->i_atime.tv_nsec = 0;
|
|
ip->i_ctime.tv_nsec = 0;
|
|
ip->i_mtime.tv_nsec = 0;
|
|
|
|
ip->i_blocks = vip->vii_blocks;
|
|
ip->i_generation = vip->vii_gen;
|
|
|
|
ip->i_private = vip;
|
|
|
|
}
|
|
|
|
/**
|
|
* vxfs_get_fake_inode - get fake inode structure
|
|
* @sbp: filesystem superblock
|
|
* @vip: fspriv inode
|
|
*
|
|
* Description:
|
|
* vxfs_fake_inode gets a fake inode (not in the inode hash) for a
|
|
* superblock, vxfs_inode pair.
|
|
* Returns the filled VFS inode.
|
|
*/
|
|
struct inode *
|
|
vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
|
|
{
|
|
struct inode *ip = NULL;
|
|
|
|
if ((ip = new_inode(sbp))) {
|
|
ip->i_ino = get_next_ino();
|
|
vxfs_iinit(ip, vip);
|
|
ip->i_mapping->a_ops = &vxfs_aops;
|
|
}
|
|
return (ip);
|
|
}
|
|
|
|
/**
|
|
* vxfs_put_fake_inode - free faked inode
|
|
* *ip: VFS inode
|
|
*
|
|
* Description:
|
|
* vxfs_put_fake_inode frees all data asssociated with @ip.
|
|
*/
|
|
void
|
|
vxfs_put_fake_inode(struct inode *ip)
|
|
{
|
|
iput(ip);
|
|
}
|
|
|
|
/**
|
|
* vxfs_iget - get an inode
|
|
* @sbp: the superblock to get the inode for
|
|
* @ino: the number of the inode to get
|
|
*
|
|
* Description:
|
|
* vxfs_read_inode creates an inode, reads the disk inode for @ino and fills
|
|
* in all relevant fields in the new inode.
|
|
*/
|
|
struct inode *
|
|
vxfs_iget(struct super_block *sbp, ino_t ino)
|
|
{
|
|
struct vxfs_inode_info *vip;
|
|
const struct address_space_operations *aops;
|
|
struct inode *ip;
|
|
|
|
ip = iget_locked(sbp, ino);
|
|
if (!ip)
|
|
return ERR_PTR(-ENOMEM);
|
|
if (!(ip->i_state & I_NEW))
|
|
return ip;
|
|
|
|
vip = __vxfs_iget(ino, VXFS_SBI(sbp)->vsi_ilist);
|
|
if (IS_ERR(vip)) {
|
|
iget_failed(ip);
|
|
return ERR_CAST(vip);
|
|
}
|
|
|
|
vxfs_iinit(ip, vip);
|
|
|
|
if (VXFS_ISIMMED(vip))
|
|
aops = &vxfs_immed_aops;
|
|
else
|
|
aops = &vxfs_aops;
|
|
|
|
if (S_ISREG(ip->i_mode)) {
|
|
ip->i_fop = &generic_ro_fops;
|
|
ip->i_mapping->a_ops = aops;
|
|
} else if (S_ISDIR(ip->i_mode)) {
|
|
ip->i_op = &vxfs_dir_inode_ops;
|
|
ip->i_fop = &vxfs_dir_operations;
|
|
ip->i_mapping->a_ops = aops;
|
|
} else if (S_ISLNK(ip->i_mode)) {
|
|
if (!VXFS_ISIMMED(vip)) {
|
|
ip->i_op = &page_symlink_inode_operations;
|
|
ip->i_mapping->a_ops = &vxfs_aops;
|
|
} else {
|
|
ip->i_op = &vxfs_immed_symlink_iops;
|
|
vip->vii_immed.vi_immed[ip->i_size] = '\0';
|
|
}
|
|
} else
|
|
init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev));
|
|
|
|
unlock_new_inode(ip);
|
|
return ip;
|
|
}
|
|
|
|
static void vxfs_i_callback(struct rcu_head *head)
|
|
{
|
|
struct inode *inode = container_of(head, struct inode, i_rcu);
|
|
INIT_LIST_HEAD(&inode->i_dentry);
|
|
kmem_cache_free(vxfs_inode_cachep, inode->i_private);
|
|
}
|
|
|
|
/**
|
|
* vxfs_evict_inode - remove inode from main memory
|
|
* @ip: inode to discard.
|
|
*
|
|
* Description:
|
|
* vxfs_evict_inode() is called on the final iput and frees the private
|
|
* inode area.
|
|
*/
|
|
void
|
|
vxfs_evict_inode(struct inode *ip)
|
|
{
|
|
truncate_inode_pages(&ip->i_data, 0);
|
|
end_writeback(ip);
|
|
call_rcu(&ip->i_rcu, vxfs_i_callback);
|
|
}
|