From 9c64daff9d5afb102dfe64a26829e26725538e58 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 8 Jun 2009 15:22:24 -0400 Subject: [PATCH 01/11] ext3: avoid unnecessary spinlock in critical POSIX ACL path If a filesystem supports POSIX ACL's, the VFS layer expects the filesystem to do POSIX ACL checks on any files not owned by the caller, and it does this for every single pathname component that it looks up. That obviously can be pretty expensive if the filesystem isn't careful about it, especially with locking. That's doubly sad, since the common case tends to be that there are no ACL's associated with the files in question. ext3 already caches the ACL data so that it doesn't have to look it up over and over again, but it does so by taking the inode->i_lock spinlock on every lookup. Which is a noticeable overhead even if it's a private lock, especially on CPU's where the serialization is expensive (eg Intel Netburst aka 'P4'). For the special case of not actually having any ACL's, all that locking is unnecessary. Even if somebody else were to be changing the ACL's on another CPU, we simply don't care - if we've seen a NULL ACL, we might as well use it. So just load the ACL speculatively without any locking, and if it was NULL, just use it. If it's non-NULL (either because we had a cached entry, or because the cache hasn't been filled in at all), it means that we'll need to get the lock and re-load it properly. This is noticeable even on Nehalem, which does locking quite well (much better than P4). From lmbench: Processor, Processes - times in microseconds - smaller is better -------------------------------------------------------------------- Host OS Mhz null null open slct fork exec sh call I/O stat clos TCP proc proc proc --------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- - before: nehalem.l Linux 2.6.30- 3193 0.04 0.09 0.95 1.45 2.18 69.1 273. 1141 nehalem.l Linux 2.6.30- 3193 0.04 0.09 0.95 1.48 2.28 69.9 253. 1140 nehalem.l Linux 2.6.30- 3193 0.04 0.10 0.95 1.42 2.19 68.6 284. 1141 - after: nehalem.l Linux 2.6.30- 3193 0.04 0.09 0.92 1.44 2.12 68.3 282. 1094 nehalem.l Linux 2.6.30- 3193 0.04 0.09 0.92 1.39 2.20 67.0 308. 1123 nehalem.l Linux 2.6.30- 3193 0.04 0.09 0.92 1.39 2.36 67.4 293. 1148 where you can see what appears to be a roughly 3% improvement in stat and open/close latencies from just the removal of the locking overhead. Of course, this only matters for files you don't own (the owner never needs to do the ACL checks), but that's the common case for libraries, header files, and executables. As well as for the base components of any absolute pathname, even if you are the owner of the final file. [ At some point we probably want to move this ACL caching logic entirely into the VFS layer (and only call down to the filesystem when uncached), but in the meantime this improves ext3 a bit. A similar fix to btrfs makes a much bigger difference (15x improvement in lmbench) due to broken caching. ] Signed-off-by: Linus Torvalds Signed-off-by: "Theodore Ts'o" Acked-by: Jan Kara Cc: Al Viro Signed-off-by: Al Viro --- fs/ext3/acl.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index d81ef2fdb08e..e0c745451715 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -129,12 +129,15 @@ fail: static inline struct posix_acl * ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl) { - struct posix_acl *acl = EXT3_ACL_NOT_CACHED; + struct posix_acl *acl = ACCESS_ONCE(*i_acl); - spin_lock(&inode->i_lock); - if (*i_acl != EXT3_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); + if (acl) { + spin_lock(&inode->i_lock); + acl = *i_acl; + if (acl != EXT3_ACL_NOT_CACHED) + acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); + } return acl; } From 210ad6aedb332e73167ece5af9bd47f0da8c2aca Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 8 Jun 2009 15:22:25 -0400 Subject: [PATCH 02/11] ext4: avoid unnecessary spinlock in critical POSIX ACL path If a filesystem supports POSIX ACL's, the VFS layer expects the filesystem to do POSIX ACL checks on any files not owned by the caller, and it does this for every single pathname component that it looks up. That obviously can be pretty expensive if the filesystem isn't careful about it, especially with locking. That's doubly sad, since the common case tends to be that there are no ACL's associated with the files in question. ext4 already caches the ACL data so that it doesn't have to look it up over and over again, but it does so by taking the inode->i_lock spinlock on every lookup. Which is a noticeable overhead even if it's a private lock, especially on CPU's where the serialization is expensive (eg Intel Netburst aka 'P4'). For the special case of not actually having any ACL's, all that locking is unnecessary. Even if somebody else were to be changing the ACL's on another CPU, we simply don't care - if we've seen a NULL ACL, we might as well use it. So just load the ACL speculatively without any locking, and if it was NULL, just use it. If it's non-NULL (either because we had a cached entry, or because the cache hasn't been filled in at all), it means that we'll need to get the lock and re-load it properly. (This commit was ported from a patch originally authored by Linus for ext3.) Signed-off-by: "Theodore Ts'o" Signed-off-by: Al Viro --- fs/ext4/acl.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 647e0d65a284..605aeed96d68 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -129,12 +129,15 @@ fail: static inline struct posix_acl * ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl) { - struct posix_acl *acl = EXT4_ACL_NOT_CACHED; + struct posix_acl *acl = ACCESS_ONCE(*i_acl); - spin_lock(&inode->i_lock); - if (*i_acl != EXT4_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); + if (acl) { + spin_lock(&inode->i_lock); + acl = *i_acl; + if (acl != EXT4_ACL_NOT_CACHED) + acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); + } return acl; } From b0895513f499b8f786d292ce48589ca210ca1d6e Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Wed, 17 Jun 2009 01:16:50 +0900 Subject: [PATCH 03/11] remove unlock_kernel() left accidentally commit 337eb00a2c3a421999c39c94ce7e33545ee8baa7 Push BKL down into ->remount_fs() and commit 4aa98cf768b6f2ea4b204620d949a665959214f6 Push BKL down into do_remount_sb() were uncorrectly merged. The former removes one pair of lock/unlock_kernel(), but the latter adds several unlock_kernel(). Finally a few unlock_kernel() calls left. Signed-off-by: J. R. Okajima Signed-off-by: Al Viro --- fs/super.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/super.c b/fs/super.c index 83b47416d006..d40d53a22fb5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -545,24 +545,18 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { if (force) mark_files_ro(sb); - else if (!fs_may_remount_ro(sb)) { - unlock_kernel(); + else if (!fs_may_remount_ro(sb)) return -EBUSY; - } retval = vfs_dq_off(sb, 1); - if (retval < 0 && retval != -ENOSYS) { - unlock_kernel(); + if (retval < 0 && retval != -ENOSYS) return -EBUSY; - } } remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); if (sb->s_op->remount_fs) { retval = sb->s_op->remount_fs(sb, &flags, data); - if (retval) { - unlock_kernel(); + if (retval) return retval; - } } sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); if (remount_rw) From fe36adf47eb1f7f4972559efa30ce3d2d3f977f2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 13:35:01 -0400 Subject: [PATCH 04/11] No instance of ->bmap() needs BKL Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 2 +- fs/ioctl.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 3120f8dd2c31..229d7b7c50a3 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -187,7 +187,7 @@ readpages: no write_begin: no locks the page yes write_end: no yes, unlocks yes perform_write: no n/a yes -bmap: yes +bmap: no invalidatepage: no yes releasepage: no yes direct_IO: no diff --git a/fs/ioctl.c b/fs/ioctl.c index 286f38dfc6c0..001f8d3118f2 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -70,9 +70,7 @@ static int ioctl_fibmap(struct file *filp, int __user *p) res = get_user(block, p); if (res) return res; - lock_kernel(); res = mapping->a_ops->bmap(mapping, block); - unlock_kernel(); return put_user(res, p); } From 66c6af2e8ba55d4d6691c136b42f2423ab9598ec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 14:15:00 -0400 Subject: [PATCH 05/11] fuse doesn't need BKL in ->umount_begin() Signed-off-by: Al Viro --- fs/fuse/inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f0df55a52929..d8673ccf90b7 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -19,7 +19,6 @@ #include #include #include -#include MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -260,9 +259,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, static void fuse_umount_begin(struct super_block *sb) { - lock_kernel(); fuse_abort_conn(get_fuse_conn_super(sb)); - unlock_kernel(); } static void fuse_send_destroy(struct fuse_conn *fc) From ee450f796f6c4f3a563c914cb93ccfa91a1f7580 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 14:17:21 -0400 Subject: [PATCH 06/11] 9P doesn't need BKL in ->umount_begin() Signed-off-by: Al Viro --- fs/9p/vfs_super.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index ab5547ff29a1..38d695d66a0b 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -231,10 +230,8 @@ v9fs_umount_begin(struct super_block *sb) { struct v9fs_session_info *v9ses; - lock_kernel(); v9ses = sb->s_fs_info; v9fs_session_cancel(v9ses); - unlock_kernel(); } static const struct super_operations v9fs_super_ops = { From 608ba50bd0225d95469154feba8f00a6457848c1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 14:52:13 -0400 Subject: [PATCH 07/11] Cleanup of adfs headers Signed-off-by: Al Viro --- fs/adfs/adfs.h | 55 ++++++++++++++++++++++++++++++++++++++ fs/adfs/dir.c | 8 ------ fs/adfs/dir_f.c | 8 ------ fs/adfs/dir_fplus.c | 8 ------ fs/adfs/file.c | 4 --- fs/adfs/inode.c | 10 ------- fs/adfs/map.c | 6 ----- fs/adfs/super.c | 17 ++---------- include/linux/adfs_fs.h | 13 --------- include/linux/adfs_fs_i.h | 24 ----------------- include/linux/adfs_fs_sb.h | 38 -------------------------- 11 files changed, 57 insertions(+), 134 deletions(-) delete mode 100644 include/linux/adfs_fs_i.h delete mode 100644 include/linux/adfs_fs_sb.h diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index a6665f37f456..9cc18775b832 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -1,3 +1,6 @@ +#include +#include + /* Internal data structures for ADFS */ #define ADFS_FREE_FRAG 0 @@ -16,6 +19,58 @@ struct buffer_head; +/* + * adfs file system inode data in memory + */ +struct adfs_inode_info { + loff_t mmu_private; + unsigned long parent_id; /* object id of parent */ + __u32 loadaddr; /* RISC OS load address */ + __u32 execaddr; /* RISC OS exec address */ + unsigned int filetype; /* RISC OS file type */ + unsigned int attr; /* RISC OS permissions */ + unsigned int stamped:1; /* RISC OS file has date/time */ + struct inode vfs_inode; +}; + +/* + * Forward-declare this + */ +struct adfs_discmap; +struct adfs_dir_ops; + +/* + * ADFS file system superblock data in memory + */ +struct adfs_sb_info { + struct adfs_discmap *s_map; /* bh list containing map */ + struct adfs_dir_ops *s_dir; /* directory operations */ + + uid_t s_uid; /* owner uid */ + gid_t s_gid; /* owner gid */ + umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ + umode_t s_other_mask; /* ADFS other perm -> unix perm */ + + __u32 s_ids_per_zone; /* max. no ids in one zone */ + __u32 s_idlen; /* length of ID in map */ + __u32 s_map_size; /* sector size of a map */ + unsigned long s_size; /* total size (in blocks) of this fs */ + signed int s_map2blk; /* shift left by this for map->sector */ + unsigned int s_log2sharesize;/* log2 share size */ + __le32 s_version; /* disc format version */ + unsigned int s_namelen; /* maximum number of characters in name */ +}; + +static inline struct adfs_sb_info *ADFS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct adfs_inode_info *ADFS_I(struct inode *inode) +{ + return container_of(inode, struct adfs_inode_info, vfs_inode); +} + /* * Directory handling */ diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 4d4073447d1a..23aa52f548a0 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -9,15 +9,7 @@ * * Common directory handling for ADFS */ -#include -#include -#include -#include -#include -#include #include -#include /* for file_fsync() */ - #include "adfs.h" /* diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index 31df6adf0de6..bafc71222e25 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -9,15 +9,7 @@ * * E and F format directory handling */ -#include -#include -#include -#include -#include -#include #include -#include - #include "adfs.h" #include "dir_f.h" diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index 139e0f345f18..1796bb352d05 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -7,15 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include -#include -#include -#include -#include -#include #include -#include - #include "adfs.h" #include "dir_fplus.h" diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 8224d54a2afb..005ea34d1758 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -19,10 +19,6 @@ * * adfs regular file handling primitives */ -#include -#include /* for file_fsync() */ -#include - #include "adfs.h" const struct file_operations adfs_file_operations = { diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 05b3a677201d..798cb071d132 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -7,17 +7,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include -#include -#include -#include -#include -#include -#include #include -#include #include - #include "adfs.h" /* @@ -395,4 +386,3 @@ int adfs_write_inode(struct inode *inode, int wait) unlock_kernel(); return ret; } -MODULE_LICENSE("GPL"); diff --git a/fs/adfs/map.c b/fs/adfs/map.c index 568081b93f73..d1a5932bb0f1 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -7,14 +7,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include -#include -#include -#include #include - #include - #include "adfs.h" /* diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 0ec5aaf47aa7..aad92f0a1048 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -8,26 +8,12 @@ * published by the Free Software Foundation. */ #include -#include -#include -#include -#include -#include -#include -#include #include #include -#include #include -#include #include #include - -#include -#include - -#include - +#include #include "adfs.h" #include "dir_f.h" #include "dir_fplus.h" @@ -534,3 +520,4 @@ static void __exit exit_adfs_fs(void) module_init(init_adfs_fs) module_exit(exit_adfs_fs) +MODULE_LICENSE("GPL"); diff --git a/include/linux/adfs_fs.h b/include/linux/adfs_fs.h index ef788c2085a1..b19801f73890 100644 --- a/include/linux/adfs_fs.h +++ b/include/linux/adfs_fs.h @@ -41,8 +41,6 @@ struct adfs_discrecord { #define ADFS_DR_SIZE_BITS (ADFS_DR_SIZE << 3) #ifdef __KERNEL__ -#include -#include /* * Calculate the boot block checksum on an ADFS drive. Note that this will * appear to be correct if the sector contains all zeros, so also check that @@ -60,17 +58,6 @@ static inline int adfs_checkbblk(unsigned char *ptr) return (result & 0xff) != ptr[511]; } - -static inline struct adfs_sb_info *ADFS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct adfs_inode_info *ADFS_I(struct inode *inode) -{ - return container_of(inode, struct adfs_inode_info, vfs_inode); -} - #endif #endif diff --git a/include/linux/adfs_fs_i.h b/include/linux/adfs_fs_i.h deleted file mode 100644 index cb543034e54f..000000000000 --- a/include/linux/adfs_fs_i.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * linux/include/linux/adfs_fs_i.h - * - * Copyright (C) 1997 Russell King - */ - -#ifndef _ADFS_FS_I -#define _ADFS_FS_I - -/* - * adfs file system inode data in memory - */ -struct adfs_inode_info { - loff_t mmu_private; - unsigned long parent_id; /* object id of parent */ - __u32 loadaddr; /* RISC OS load address */ - __u32 execaddr; /* RISC OS exec address */ - unsigned int filetype; /* RISC OS file type */ - unsigned int attr; /* RISC OS permissions */ - unsigned int stamped:1; /* RISC OS file has date/time */ - struct inode vfs_inode; -}; - -#endif diff --git a/include/linux/adfs_fs_sb.h b/include/linux/adfs_fs_sb.h deleted file mode 100644 index d9bf05c02ccc..000000000000 --- a/include/linux/adfs_fs_sb.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * linux/include/linux/adfs_fs_sb.h - * - * Copyright (C) 1997-1999 Russell King - */ - -#ifndef _ADFS_FS_SB -#define _ADFS_FS_SB - -/* - * Forward-declare this - */ -struct adfs_discmap; -struct adfs_dir_ops; - -/* - * ADFS file system superblock data in memory - */ -struct adfs_sb_info { - struct adfs_discmap *s_map; /* bh list containing map */ - struct adfs_dir_ops *s_dir; /* directory operations */ - - uid_t s_uid; /* owner uid */ - gid_t s_gid; /* owner gid */ - umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ - umode_t s_other_mask; /* ADFS other perm -> unix perm */ - - __u32 s_ids_per_zone; /* max. no ids in one zone */ - __u32 s_idlen; /* length of ID in map */ - __u32 s_map_size; /* sector size of a map */ - unsigned long s_size; /* total size (in blocks) of this fs */ - signed int s_map2blk; /* shift left by this for map->sector */ - unsigned int s_log2sharesize;/* log2 share size */ - __le32 s_version; /* disc format version */ - unsigned int s_namelen; /* maximum number of characters in name */ -}; - -#endif From 536c94901eb8f2eb6fccf81ae6be814899a9f6e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 23:24:50 -0400 Subject: [PATCH 08/11] befs ->pust_super() doesn't need BKL Signed-off-by: Al Viro --- fs/befs/linuxvfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 9367b6297d84..02c06138bc6a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -737,8 +737,6 @@ parse_options(char *options, befs_mount_options * opts) static void befs_put_super(struct super_block *sb) { - lock_kernel(); - kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; @@ -749,8 +747,6 @@ befs_put_super(struct super_block *sb) kfree(sb->s_fs_info); sb->s_fs_info = NULL; - - unlock_kernel(); } /* Allocate private field of the superblock, fill it. From e7ec952f6aa6ac1649ac49eb5e4de5b92c829d1e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 23:35:46 -0400 Subject: [PATCH 09/11] get rid of BKL in fs/efs Only readdir() really needed it, and that's easily fixable by switch to generic_file_llseek() Signed-off-by: Al Viro --- fs/efs/dir.c | 5 +---- fs/efs/namei.c | 9 +-------- fs/efs/symlink.c | 7 +------ 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/fs/efs/dir.c b/fs/efs/dir.c index 49308a29798a..7ee6f7e3a608 100644 --- a/fs/efs/dir.c +++ b/fs/efs/dir.c @@ -5,12 +5,12 @@ */ #include -#include #include "efs.h" static int efs_readdir(struct file *, void *, filldir_t); const struct file_operations efs_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = efs_readdir, }; @@ -33,8 +33,6 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { if (inode->i_size & (EFS_DIRBSIZE-1)) printk(KERN_WARNING "EFS: WARNING: readdir(): directory size not a multiple of EFS_DIRBSIZE\n"); - lock_kernel(); - /* work out where this entry can be found */ block = filp->f_pos >> EFS_DIRBSIZE_BITS; @@ -107,7 +105,6 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot; out: - unlock_kernel(); return 0; } diff --git a/fs/efs/namei.c b/fs/efs/namei.c index c3fb5f9c4a44..1511bf9e5f80 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -8,7 +8,6 @@ #include #include -#include #include #include "efs.h" @@ -63,16 +62,12 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei efs_ino_t inodenum; struct inode * inode = NULL; - lock_kernel(); inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len); if (inodenum) { inode = efs_iget(dir->i_sb, inodenum); - if (IS_ERR(inode)) { - unlock_kernel(); + if (IS_ERR(inode)) return ERR_CAST(inode); - } } - unlock_kernel(); return d_splice_alias(inode, dentry); } @@ -115,11 +110,9 @@ struct dentry *efs_get_parent(struct dentry *child) struct dentry *parent = ERR_PTR(-ENOENT); efs_ino_t ino; - lock_kernel(); ino = efs_find_entry(child->d_inode, "..", 2); if (ino) parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino)); - unlock_kernel(); return parent; } diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c index 41911ec83aaf..75117d0dac2b 100644 --- a/fs/efs/symlink.c +++ b/fs/efs/symlink.c @@ -9,7 +9,6 @@ #include #include #include -#include #include "efs.h" static int efs_symlink_readpage(struct file *file, struct page *page) @@ -22,9 +21,8 @@ static int efs_symlink_readpage(struct file *file, struct page *page) err = -ENAMETOOLONG; if (size > 2 * EFS_BLOCKSIZE) - goto fail_notlocked; + goto fail; - lock_kernel(); /* read first 512 bytes of link target */ err = -EIO; bh = sb_bread(inode->i_sb, efs_bmap(inode, 0)); @@ -40,14 +38,11 @@ static int efs_symlink_readpage(struct file *file, struct page *page) brelse(bh); } link[size] = '\0'; - unlock_kernel(); SetPageUptodate(page); kunmap(page); unlock_page(page); return 0; fail: - unlock_kernel(); -fail_notlocked: SetPageError(page); kunmap(page); unlock_page(page); From cc46759a8c0ac4c6f13aa4b0f470305c05f600e1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 23:47:45 -0400 Subject: [PATCH 10/11] get rid of BKL in fs/minix Signed-off-by: Al Viro --- fs/minix/bitmap.c | 25 +++++++++++++------------ fs/minix/dir.c | 5 +---- fs/minix/inode.c | 4 ---- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 3aebe322271a..6ac693faae49 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -12,13 +12,14 @@ /* bitmap.c contains the code that handles the inode and block bitmaps */ #include "minix.h" -#include #include #include #include static const int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 }; +static DEFINE_SPINLOCK(bitmap_lock); + static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, __u32 numbits) { unsigned i, j, sum = 0; @@ -69,11 +70,11 @@ void minix_free_block(struct inode *inode, unsigned long block) return; } bh = sbi->s_zmap[zone]; - lock_kernel(); + spin_lock(&bitmap_lock); if (!minix_test_and_clear_bit(bit, bh->b_data)) printk("minix_free_block (%s:%lu): bit already cleared\n", sb->s_id, block); - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); return; } @@ -88,18 +89,18 @@ int minix_new_block(struct inode * inode) struct buffer_head *bh = sbi->s_zmap[i]; int j; - lock_kernel(); + spin_lock(&bitmap_lock); j = minix_find_first_zero_bit(bh->b_data, bits_per_zone); if (j < bits_per_zone) { minix_set_bit(j, bh->b_data); - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); j += i * bits_per_zone + sbi->s_firstdatazone-1; if (j < sbi->s_firstdatazone || j >= sbi->s_nzones) break; return j; } - unlock_kernel(); + spin_unlock(&bitmap_lock); } return 0; } @@ -211,10 +212,10 @@ void minix_free_inode(struct inode * inode) minix_clear_inode(inode); /* clear on-disk copy */ bh = sbi->s_imap[ino]; - lock_kernel(); + spin_lock(&bitmap_lock); if (!minix_test_and_clear_bit(bit, bh->b_data)) printk("minix_free_inode: bit %lu already cleared\n", bit); - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); out: clear_inode(inode); /* clear in-memory copy */ @@ -237,7 +238,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) j = bits_per_zone; bh = NULL; *error = -ENOSPC; - lock_kernel(); + spin_lock(&bitmap_lock); for (i = 0; i < sbi->s_imap_blocks; i++) { bh = sbi->s_imap[i]; j = minix_find_first_zero_bit(bh->b_data, bits_per_zone); @@ -245,17 +246,17 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) break; } if (!bh || j >= bits_per_zone) { - unlock_kernel(); + spin_unlock(&bitmap_lock); iput(inode); return NULL; } if (minix_test_and_set_bit(j, bh->b_data)) { /* shouldn't happen */ - unlock_kernel(); + spin_unlock(&bitmap_lock); printk("minix_new_inode: bit already set\n"); iput(inode); return NULL; } - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); j += i * bits_per_zone; if (!j || j > sbi->s_ninodes) { diff --git a/fs/minix/dir.c b/fs/minix/dir.c index e5f206467e40..d407e7a0b6fe 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -11,7 +11,6 @@ #include "minix.h" #include #include -#include #include typedef struct minix_dir_entry minix_dirent; @@ -20,6 +19,7 @@ typedef struct minix3_dir_entry minix3_dirent; static int minix_readdir(struct file *, void *, filldir_t); const struct file_operations minix_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = minix_readdir, .fsync = simple_fsync, @@ -102,8 +102,6 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) char *name; __u32 inumber; - lock_kernel(); - pos = (pos + chunk_size-1) & ~(chunk_size-1); if (pos >= inode->i_size) goto done; @@ -146,7 +144,6 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) done: filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; - unlock_kernel(); return 0; } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index f91a23693597..74ea82d72164 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -35,8 +35,6 @@ static void minix_put_super(struct super_block *sb) int i; struct minix_sb_info *sbi = minix_sb(sb); - lock_kernel(); - if (!(sb->s_flags & MS_RDONLY)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ sbi->s_ms->s_state = sbi->s_mount_state; @@ -50,8 +48,6 @@ static void minix_put_super(struct super_block *sb) kfree(sbi->s_imap); sb->s_fs_info = NULL; kfree(sbi); - - unlock_kernel(); } static struct kmem_cache * minix_inode_cachep; From 5ac3455a843d2ca77333c954eea83aa4514c8199 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 23:59:37 -0400 Subject: [PATCH 11/11] get rid of BKL in fs/sysv Signed-off-by: Al Viro --- fs/sysv/dir.c | 5 +---- fs/sysv/inode.c | 11 ----------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index c7798079e644..4e50286a4cc3 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -15,13 +15,13 @@ #include #include -#include #include #include "sysv.h" static int sysv_readdir(struct file *, void *, filldir_t); const struct file_operations sysv_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = sysv_readdir, .fsync = simple_fsync, @@ -74,8 +74,6 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) unsigned long n = pos >> PAGE_CACHE_SHIFT; unsigned long npages = dir_pages(inode); - lock_kernel(); - pos = (pos + SYSV_DIRSIZE-1) & ~(SYSV_DIRSIZE-1); if (pos >= inode->i_size) goto done; @@ -113,7 +111,6 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) done: filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset; - unlock_kernel(); return 0; } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 479923456a54..9824743832a7 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -21,7 +21,6 @@ * the superblock. */ -#include #include #include #include @@ -37,7 +36,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait) unsigned long time = get_seconds(), old_time; lock_super(sb); - lock_kernel(); /* * If we are going to write out the super block, @@ -52,7 +50,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait) mark_buffer_dirty(sbi->s_bh2); } - unlock_kernel(); unlock_super(sb); return 0; @@ -82,8 +79,6 @@ static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); - lock_kernel(); - if (sb->s_dirt) sysv_write_super(sb); @@ -99,8 +94,6 @@ static void sysv_put_super(struct super_block *sb) brelse(sbi->s_bh2); kfree(sbi); - - unlock_kernel(); } static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -275,7 +268,6 @@ int sysv_write_inode(struct inode *inode, int wait) return -EIO; } - lock_kernel(); raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); @@ -291,7 +283,6 @@ int sysv_write_inode(struct inode *inode, int wait) for (block = 0; block < 10+1+1+1; block++) write3byte(sbi, (u8 *)&si->i_data[block], &raw_inode->i_data[3*block]); - unlock_kernel(); mark_buffer_dirty(bh); if (wait) { sync_dirty_buffer(bh); @@ -315,9 +306,7 @@ static void sysv_delete_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); inode->i_size = 0; sysv_truncate(inode); - lock_kernel(); sysv_free_inode(inode); - unlock_kernel(); } static struct kmem_cache *sysv_inode_cachep;