Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull ext3 improvements, cleanups, reiserfs fix from Jan Kara:
 "various cleanups for ext2, ext3, udf, isofs, a documentation update
  for quota, and a fix of a race in reiserfs readdir implementation"

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  reiserfs: fix race in readdir
  ext2: acl: remove unneeded include of linux/capability.h
  ext3: explicitly remove inode from orphan list after failed direct io
  fs/isofs/inode.c add __init to init_inodecache()
  ext3: Speedup WB_SYNC_ALL pass
  fs/quota/Kconfig: Update filesystems
  ext3: Update outdated comment before ext3_ordered_writepage()
  ext3: Update PF_MEMALLOC handling in ext3_write_inode()
  ext2/3: use prandom_u32() instead of get_random_bytes()
  ext3: remove an unneeded check in ext3_new_blocks()
  ext3: remove unneeded check in ext3_ordered_writepage()
  fs: Mark function as static in ext3/xattr_security.c
  fs: Mark function as static in ext3/dir.c
  fs: Mark function as static in ext2/xattr_security.c
  ext3: Add __init macro to init_inodecache
  ext2: Add __init macro to init_inodecache
  udf: Add __init macro to init_inodecache
  fs: udf: parse_options: blocksize check
This commit is contained in:
Linus Torvalds 2014-04-07 17:59:17 -07:00
commit a7963eb7f4
14 changed files with 51 additions and 83 deletions

View File

@ -4,7 +4,6 @@
* Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
*/
#include <linux/capability.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/slab.h>

View File

@ -284,7 +284,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
int best_ndir = inodes_per_group;
int best_group = -1;
get_random_bytes(&group, sizeof(group));
group = prandom_u32();
parent_group = (unsigned)group % ngroups;
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;

View File

@ -192,7 +192,7 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
static int init_inodecache(void)
static int __init init_inodecache(void)
{
ext2_inode_cachep = kmem_cache_create("ext2_inode_cache",
sizeof(struct ext2_inode_info),

View File

@ -42,8 +42,8 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
value, size, flags);
}
int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
void *fs_info)
static int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
void *fs_info)
{
const struct xattr *xattr;
int err = 0;

View File

@ -1727,10 +1727,7 @@ allocated:
percpu_counter_sub(&sbi->s_freeblocks_counter, num);
BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
err = ext3_journal_dirty_metadata(handle, gdp_bh);
if (!fatal)
fatal = err;
fatal = ext3_journal_dirty_metadata(handle, gdp_bh);
if (fatal)
goto out;

View File

@ -275,7 +275,7 @@ static inline loff_t ext3_get_htree_eof(struct file *filp)
* NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
* will be invalid once the directory was converted into a dx directory
*/
loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence)
static loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
int dx_dir = is_dx_dir(inode);

View File

@ -215,7 +215,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
int best_ndir = inodes_per_group;
int best_group = -1;
get_random_bytes(&group, sizeof(group));
group = prandom_u32();
parent_group = (unsigned)group % ngroups;
for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups;

View File

@ -1559,56 +1559,17 @@ static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
}
/*
* Note that we always start a transaction even if we're not journalling
* data. This is to preserve ordering: any hole instantiation within
* __block_write_full_page -> ext3_get_block() should be journalled
* along with the data so we don't crash and then get metadata which
* Note that whenever we need to map blocks we start a transaction even if
* we're not journalling data. This is to preserve ordering: any hole
* instantiation within __block_write_full_page -> ext3_get_block() should be
* journalled along with the data so we don't crash and then get metadata which
* refers to old data.
*
* In all journalling modes block_write_full_page() will start the I/O.
*
* Problem:
*
* ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
* ext3_writepage()
*
* Similar for:
*
* ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
*
* Same applies to ext3_get_block(). We will deadlock on various things like
* lock_journal and i_truncate_mutex.
*
* Setting PF_MEMALLOC here doesn't work - too many internal memory
* allocations fail.
*
* 16May01: If we're reentered then journal_current_handle() will be
* non-zero. We simply *return*.
*
* 1 July 2001: @@@ FIXME:
* In journalled data mode, a data buffer may be metadata against the
* current transaction. But the same file is part of a shared mapping
* and someone does a writepage() on it.
*
* We will move the buffer onto the async_data list, but *after* it has
* been dirtied. So there's a small window where we have dirty data on
* BJ_Metadata.
*
* Note that this only applies to the last partial page in the file. The
* bit which block_write_full_page() uses prepare/commit for. (That's
* broken code anyway: it's wrong for msync()).
*
* It's a rare case: affects the final partial page, for journalled data
* where the file is subject to bith write() and writepage() in the same
* transction. To fix it we'll need a custom block_write_full_page().
* We'll probably need that anyway for journalling writepage() output.
*
* We don't honour synchronous mounts for writepage(). That would be
* disastrous. Any write() or metadata operation will sync the fs for
* us.
*
* AKPM2: if all the page's buffers are mapped to disk and !data=journal,
* we don't need to open a transaction here.
*/
static int ext3_ordered_writepage(struct page *page,
struct writeback_control *wbc)
@ -1673,12 +1634,9 @@ static int ext3_ordered_writepage(struct page *page,
* block_write_full_page() succeeded. Otherwise they are unmapped,
* and generally junk.
*/
if (ret == 0) {
err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
if (ret == 0)
ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
NULL, journal_dirty_data_fn);
if (!ret)
ret = err;
}
walk_page_buffers(handle, page_bufs, 0,
PAGE_CACHE_SIZE, NULL, bput_one);
err = ext3_journal_stop(handle);
@ -1925,6 +1883,8 @@ retry:
* and pretend the write failed... */
ext3_truncate_failed_direct_write(inode);
ret = PTR_ERR(handle);
if (inode->i_nlink)
ext3_orphan_del(NULL, inode);
goto out;
}
if (inode->i_nlink)
@ -3212,21 +3172,20 @@ out_brelse:
*
* We are called from a few places:
*
* - Within generic_file_write() for O_SYNC files.
* - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
* Here, there will be no transaction running. We wait for any running
* transaction to commit.
*
* - Within sys_sync(), kupdate and such.
* We wait on commit, if tol to.
* - Within flush work (for sys_sync(), kupdate and such).
* We wait on commit, if told to.
*
* - Within prune_icache() (PF_MEMALLOC == true)
* Here we simply return. We can't afford to block kswapd on the
* journal commit.
* - Within iput_final() -> write_inode_now()
* We wait on commit, if told to.
*
* In all cases it is actually safe for us to return without doing anything,
* because the inode has been copied into a raw inode buffer in
* ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for
* knfsd.
* ext3_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
* writeback.
*
* Note that we are absolutely dependent upon all inode dirtiers doing the
* right thing: they *must* call mark_inode_dirty() after dirtying info in
@ -3238,13 +3197,13 @@ out_brelse:
* stuff();
* inode->i_size = expr;
*
* is in error because a kswapd-driven write_inode() could occur while
* `stuff()' is running, and the new i_size will be lost. Plus the inode
* will no longer be on the superblock's dirty inode list.
* is in error because write_inode() could occur while `stuff()' is running,
* and the new i_size will be lost. Plus the inode will no longer be on the
* superblock's dirty inode list.
*/
int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
{
if (current->flags & PF_MEMALLOC)
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
return 0;
if (ext3_journal_current_handle()) {
@ -3253,7 +3212,12 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
return -EIO;
}
if (wbc->sync_mode != WB_SYNC_ALL)
/*
* No need to force transaction in WB_SYNC_NONE mode. Also
* ext3_sync_fs() will force the commit after everything is
* written.
*/
if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
return 0;
return ext3_force_commit(inode->i_sb);

View File

@ -527,7 +527,7 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
static int init_inodecache(void)
static int __init init_inodecache(void)
{
ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
sizeof(struct ext3_inode_info),

View File

@ -43,8 +43,9 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
name, value, size, flags);
}
int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array,
void *fs_info)
static int ext3_initxattrs(struct inode *inode,
const struct xattr *xattr_array,
void *fs_info)
{
const struct xattr *xattr;
handle_t *handle = fs_info;

View File

@ -93,7 +93,7 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
static int init_inodecache(void)
static int __init init_inodecache(void)
{
isofs_inode_cachep = kmem_cache_create("isofs_inode_cache",
sizeof(struct iso_inode_info),

View File

@ -8,9 +8,10 @@ config QUOTA
help
If you say Y here, you will be able to set per user limits for disk
usage (also called disk quotas). Currently, it works for the
ext2, ext3, and reiserfs file system. ext3 also supports journalled
quotas for which you don't need to run quotacheck(8) after an unclean
shutdown.
ext2, ext3, ext4, jfs, ocfs2 and reiserfs file systems.
Note that gfs2 and xfs use their own quota system.
Ext3, ext4 and reiserfs also support journaled quotas for which
you don't need to run quotacheck(8) after an unclean shutdown.
For further details, read the Quota mini-HOWTO, available from
<http://www.tldp.org/docs.html#howto>, or the documentation provided
with the quota tools. Probably the quota support is only useful for

View File

@ -125,6 +125,7 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
int d_reclen;
char *d_name;
ino_t d_ino;
loff_t cur_pos = deh_offset(deh);
if (!de_visible(deh))
/* it is hidden entry */
@ -196,8 +197,9 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
if (local_buf != small_buf) {
kfree(local_buf);
}
// next entry should be looked for with such offset
next_pos = deh_offset(deh) + 1;
/* deh_offset(deh) may be invalid now. */
next_pos = cur_pos + 1;
if (item_moved(&tmp_ih, &path_to_entry)) {
set_cpu_key_k_offset(&pos_key,

View File

@ -175,7 +175,7 @@ static void init_once(void *foo)
inode_init_once(&ei->vfs_inode);
}
static int init_inodecache(void)
static int __init init_inodecache(void)
{
udf_inode_cachep = kmem_cache_create("udf_inode_cache",
sizeof(struct udf_inode_info),
@ -505,6 +505,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
while ((p = strsep(&options, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
int token;
unsigned n;
if (!*p)
continue;
@ -516,7 +517,10 @@ static int udf_parse_options(char *options, struct udf_options *uopt,
case Opt_bs:
if (match_int(&args[0], &option))
return 0;
uopt->blocksize = option;
n = option;
if (n != 512 && n != 1024 && n != 2048 && n != 4096)
return 0;
uopt->blocksize = n;
uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET);
break;
case Opt_unhide: