forked from Minki/linux
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (49 commits) ext4: Avoid corrupting the uninitialized bit in the extent during truncate ext4: Don't treat a truncation of a zero-length file as replace-via-truncate ext4: fix dx_map_entry to support 256k directory blocks ext4: truncate the file properly if we fail to copy data from userspace ext4: Avoid leaking blocks after a block allocation failure ext4: Change all super.c messages to print the device ext4: Get rid of EXTEND_DISKSIZE flag of ext4_get_blocks_handle() ext4: super.c whitespace cleanup jbd2: Fix minor typos in comments in fs/jbd2/journal.c ext4: Clean up calls to ext4_get_group_desc() ext4: remove unused function __ext4_write_dirty_metadata ext2: Fix memory leak in ext2_fill_super() in case of a failed mount ext3: Fix memory leak in ext3_fill_super() in case of a failed mount ext4: Fix memory leak in ext4_fill_super() in case of a failed mount ext4: down i_data_sem only for read when walking tree for fiemap ext4: Add a comprehensive block validity check to ext4_get_blocks() ext4: Clean up ext4_get_blocks() so it does not depend on bh_result->b_state ext4: Merge ext4_da_get_block_write() into mpage_da_map_blocks() ext4: Add BUG_ON debugging checks to noalloc_get_block_write() ext4: Add documentation to the ext4_*get_block* functions ...
This commit is contained in:
commit
e893123c73
@ -2935,6 +2935,8 @@ int submit_bh(int rw, struct buffer_head * bh)
|
||||
BUG_ON(!buffer_locked(bh));
|
||||
BUG_ON(!buffer_mapped(bh));
|
||||
BUG_ON(!bh->b_end_io);
|
||||
BUG_ON(buffer_delay(bh));
|
||||
BUG_ON(buffer_unwritten(bh));
|
||||
|
||||
/*
|
||||
* Mask in barrier bit for a write (could be either a WRITE or a
|
||||
|
@ -1093,6 +1093,7 @@ failed_mount:
|
||||
brelse(bh);
|
||||
failed_sbi:
|
||||
sb->s_fs_info = NULL;
|
||||
kfree(sbi->s_blockgroup_lock);
|
||||
kfree(sbi);
|
||||
return ret;
|
||||
}
|
||||
|
@ -2021,6 +2021,7 @@ failed_mount:
|
||||
brelse(bh);
|
||||
out_fail:
|
||||
sb->s_fs_info = NULL;
|
||||
kfree(sbi->s_blockgroup_lock);
|
||||
kfree(sbi);
|
||||
lock_kernel();
|
||||
return ret;
|
||||
|
@ -5,8 +5,8 @@
|
||||
obj-$(CONFIG_EXT4_FS) += ext4.o
|
||||
|
||||
ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
|
||||
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
|
||||
ext4_jbd2.o migrate.o mballoc.o
|
||||
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
|
||||
ext4_jbd2.o migrate.o mballoc.o block_validity.o
|
||||
|
||||
ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
|
||||
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include <linux/buffer_head.h>
|
||||
#include "ext4.h"
|
||||
#include "ext4_jbd2.h"
|
||||
#include "group.h"
|
||||
#include "mballoc.h"
|
||||
|
||||
/*
|
||||
@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
|
||||
ext4_group_t block_group, struct ext4_group_desc *gdp)
|
||||
{
|
||||
int bit, bit_max;
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
unsigned free_blocks, group_blocks;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
|
||||
bit_max += ext4_bg_num_gdb(sb, block_group);
|
||||
}
|
||||
|
||||
if (block_group == sbi->s_groups_count - 1) {
|
||||
if (block_group == ngroups - 1) {
|
||||
/*
|
||||
* Even though mke2fs always initialize first and last group
|
||||
* if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need
|
||||
@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
|
||||
*/
|
||||
group_blocks = ext4_blocks_count(sbi->s_es) -
|
||||
le32_to_cpu(sbi->s_es->s_first_data_block) -
|
||||
(EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1));
|
||||
(EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1));
|
||||
} else {
|
||||
group_blocks = EXT4_BLOCKS_PER_GROUP(sb);
|
||||
}
|
||||
@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
|
||||
{
|
||||
unsigned int group_desc;
|
||||
unsigned int offset;
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
struct ext4_group_desc *desc;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (block_group >= sbi->s_groups_count) {
|
||||
if (block_group >= ngroups) {
|
||||
ext4_error(sb, "ext4_get_group_desc",
|
||||
"block_group >= groups_count - "
|
||||
"block_group = %u, groups_count = %u",
|
||||
block_group, sbi->s_groups_count);
|
||||
block_group, ngroups);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
smp_rmb();
|
||||
|
||||
group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||
offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
|
||||
@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
|
||||
unlock_buffer(bh);
|
||||
return bh;
|
||||
}
|
||||
spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
|
||||
ext4_lock_group(sb, block_group);
|
||||
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
|
||||
ext4_init_block_bitmap(sb, bh, block_group, desc);
|
||||
set_bitmap_uptodate(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
|
||||
ext4_unlock_group(sb, block_group);
|
||||
unlock_buffer(bh);
|
||||
return bh;
|
||||
}
|
||||
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
|
||||
ext4_unlock_group(sb, block_group);
|
||||
if (buffer_uptodate(bh)) {
|
||||
/*
|
||||
* if not uninit if bh is uptodate,
|
||||
@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
|
||||
down_write(&grp->alloc_sem);
|
||||
for (i = 0, blocks_freed = 0; i < count; i++) {
|
||||
BUFFER_TRACE(bitmap_bh, "clear bit");
|
||||
if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
|
||||
if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
|
||||
bit + i, bitmap_bh->b_data)) {
|
||||
ext4_error(sb, __func__,
|
||||
"bit already cleared for block %llu",
|
||||
@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
|
||||
blocks_freed++;
|
||||
}
|
||||
}
|
||||
spin_lock(sb_bgl_lock(sbi, block_group));
|
||||
ext4_lock_group(sb, block_group);
|
||||
blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc);
|
||||
ext4_free_blks_set(sb, desc, blk_free_count);
|
||||
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
|
||||
spin_unlock(sb_bgl_lock(sbi, block_group));
|
||||
ext4_unlock_group(sb, block_group);
|
||||
percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed);
|
||||
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
|
||||
ext4_fsblk_t desc_count;
|
||||
struct ext4_group_desc *gdp;
|
||||
ext4_group_t i;
|
||||
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
#ifdef EXT4FS_DEBUG
|
||||
struct ext4_super_block *es;
|
||||
ext4_fsblk_t bitmap_count;
|
||||
@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
|
||||
bitmap_count = 0;
|
||||
gdp = NULL;
|
||||
|
||||
smp_rmb();
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
gdp = ext4_get_group_desc(sb, i, NULL);
|
||||
if (!gdp)
|
||||
@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
|
||||
return bitmap_count;
|
||||
#else
|
||||
desc_count = 0;
|
||||
smp_rmb();
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
gdp = ext4_get_group_desc(sb, i, NULL);
|
||||
if (!gdp)
|
||||
|
244
fs/ext4/block_validity.c
Normal file
244
fs/ext4/block_validity.c
Normal file
@ -0,0 +1,244 @@
|
||||
/*
|
||||
* linux/fs/ext4/block_validity.c
|
||||
*
|
||||
* Copyright (C) 2009
|
||||
* Theodore Ts'o (tytso@mit.edu)
|
||||
*
|
||||
* Track which blocks in the filesystem are metadata blocks that
|
||||
* should never be used as data blocks by files or directories.
|
||||
*/
|
||||
|
||||
#include <linux/time.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/quotaops.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/mutex.h>
|
||||
#include "ext4.h"
|
||||
|
||||
struct ext4_system_zone {
|
||||
struct rb_node node;
|
||||
ext4_fsblk_t start_blk;
|
||||
unsigned int count;
|
||||
};
|
||||
|
||||
static struct kmem_cache *ext4_system_zone_cachep;
|
||||
|
||||
int __init init_ext4_system_zone(void)
|
||||
{
|
||||
ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
|
||||
SLAB_RECLAIM_ACCOUNT);
|
||||
if (ext4_system_zone_cachep == NULL)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void exit_ext4_system_zone(void)
|
||||
{
|
||||
kmem_cache_destroy(ext4_system_zone_cachep);
|
||||
}
|
||||
|
||||
static inline int can_merge(struct ext4_system_zone *entry1,
|
||||
struct ext4_system_zone *entry2)
|
||||
{
|
||||
if ((entry1->start_blk + entry1->count) == entry2->start_blk)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark a range of blocks as belonging to the "system zone" --- that
|
||||
* is, filesystem metadata blocks which should never be used by
|
||||
* inodes.
|
||||
*/
|
||||
static int add_system_zone(struct ext4_sb_info *sbi,
|
||||
ext4_fsblk_t start_blk,
|
||||
unsigned int count)
|
||||
{
|
||||
struct ext4_system_zone *new_entry = NULL, *entry;
|
||||
struct rb_node **n = &sbi->system_blks.rb_node, *node;
|
||||
struct rb_node *parent = NULL, *new_node = NULL;
|
||||
|
||||
while (*n) {
|
||||
parent = *n;
|
||||
entry = rb_entry(parent, struct ext4_system_zone, node);
|
||||
if (start_blk < entry->start_blk)
|
||||
n = &(*n)->rb_left;
|
||||
else if (start_blk >= (entry->start_blk + entry->count))
|
||||
n = &(*n)->rb_right;
|
||||
else {
|
||||
if (start_blk + count > (entry->start_blk +
|
||||
entry->count))
|
||||
entry->count = (start_blk + count -
|
||||
entry->start_blk);
|
||||
new_node = *n;
|
||||
new_entry = rb_entry(new_node, struct ext4_system_zone,
|
||||
node);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!new_entry) {
|
||||
new_entry = kmem_cache_alloc(ext4_system_zone_cachep,
|
||||
GFP_KERNEL);
|
||||
if (!new_entry)
|
||||
return -ENOMEM;
|
||||
new_entry->start_blk = start_blk;
|
||||
new_entry->count = count;
|
||||
new_node = &new_entry->node;
|
||||
|
||||
rb_link_node(new_node, parent, n);
|
||||
rb_insert_color(new_node, &sbi->system_blks);
|
||||
}
|
||||
|
||||
/* Can we merge to the left? */
|
||||
node = rb_prev(new_node);
|
||||
if (node) {
|
||||
entry = rb_entry(node, struct ext4_system_zone, node);
|
||||
if (can_merge(entry, new_entry)) {
|
||||
new_entry->start_blk = entry->start_blk;
|
||||
new_entry->count += entry->count;
|
||||
rb_erase(node, &sbi->system_blks);
|
||||
kmem_cache_free(ext4_system_zone_cachep, entry);
|
||||
}
|
||||
}
|
||||
|
||||
/* Can we merge to the right? */
|
||||
node = rb_next(new_node);
|
||||
if (node) {
|
||||
entry = rb_entry(node, struct ext4_system_zone, node);
|
||||
if (can_merge(new_entry, entry)) {
|
||||
new_entry->count += entry->count;
|
||||
rb_erase(node, &sbi->system_blks);
|
||||
kmem_cache_free(ext4_system_zone_cachep, entry);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void debug_print_tree(struct ext4_sb_info *sbi)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct ext4_system_zone *entry;
|
||||
int first = 1;
|
||||
|
||||
printk(KERN_INFO "System zones: ");
|
||||
node = rb_first(&sbi->system_blks);
|
||||
while (node) {
|
||||
entry = rb_entry(node, struct ext4_system_zone, node);
|
||||
printk("%s%llu-%llu", first ? "" : ", ",
|
||||
entry->start_blk, entry->start_blk + entry->count - 1);
|
||||
first = 0;
|
||||
node = rb_next(node);
|
||||
}
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
int ext4_setup_system_zone(struct super_block *sb)
|
||||
{
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_group_desc *gdp;
|
||||
ext4_group_t i;
|
||||
int flex_size = ext4_flex_bg_size(sbi);
|
||||
int ret;
|
||||
|
||||
if (!test_opt(sb, BLOCK_VALIDITY)) {
|
||||
if (EXT4_SB(sb)->system_blks.rb_node)
|
||||
ext4_release_system_zone(sb);
|
||||
return 0;
|
||||
}
|
||||
if (EXT4_SB(sb)->system_blks.rb_node)
|
||||
return 0;
|
||||
|
||||
for (i=0; i < ngroups; i++) {
|
||||
if (ext4_bg_has_super(sb, i) &&
|
||||
((i < 5) || ((i % flex_size) == 0)))
|
||||
add_system_zone(sbi, ext4_group_first_block_no(sb, i),
|
||||
sbi->s_gdb_count + 1);
|
||||
gdp = ext4_get_group_desc(sb, i, NULL);
|
||||
ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = add_system_zone(sbi, ext4_inode_table(sb, gdp),
|
||||
sbi->s_itb_per_group);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (test_opt(sb, DEBUG))
|
||||
debug_print_tree(EXT4_SB(sb));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called when the filesystem is unmounted */
|
||||
void ext4_release_system_zone(struct super_block *sb)
|
||||
{
|
||||
struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node;
|
||||
struct rb_node *parent;
|
||||
struct ext4_system_zone *entry;
|
||||
|
||||
while (n) {
|
||||
/* Do the node's children first */
|
||||
if (n->rb_left) {
|
||||
n = n->rb_left;
|
||||
continue;
|
||||
}
|
||||
if (n->rb_right) {
|
||||
n = n->rb_right;
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* The node has no children; free it, and then zero
|
||||
* out parent's link to it. Finally go to the
|
||||
* beginning of the loop and try to free the parent
|
||||
* node.
|
||||
*/
|
||||
parent = rb_parent(n);
|
||||
entry = rb_entry(n, struct ext4_system_zone, node);
|
||||
kmem_cache_free(ext4_system_zone_cachep, entry);
|
||||
if (!parent)
|
||||
EXT4_SB(sb)->system_blks.rb_node = NULL;
|
||||
else if (parent->rb_left == n)
|
||||
parent->rb_left = NULL;
|
||||
else if (parent->rb_right == n)
|
||||
parent->rb_right = NULL;
|
||||
n = parent;
|
||||
}
|
||||
EXT4_SB(sb)->system_blks.rb_node = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns 1 if the passed-in block region (start_blk,
|
||||
* start_blk+count) is valid; 0 if some part of the block region
|
||||
* overlaps with filesystem metadata blocks.
|
||||
*/
|
||||
int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
|
||||
unsigned int count)
|
||||
{
|
||||
struct ext4_system_zone *entry;
|
||||
struct rb_node *n = sbi->system_blks.rb_node;
|
||||
|
||||
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
|
||||
(start_blk + count > ext4_blocks_count(sbi->s_es)))
|
||||
return 0;
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct ext4_system_zone, node);
|
||||
if (start_blk + count - 1 < entry->start_blk)
|
||||
n = n->rb_left;
|
||||
else if (start_blk >= (entry->start_blk + entry->count))
|
||||
n = n->rb_right;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp,
|
||||
struct buffer_head *bh = NULL;
|
||||
|
||||
map_bh.b_state = 0;
|
||||
err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
|
||||
0, 0, 0);
|
||||
err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0);
|
||||
if (err > 0) {
|
||||
pgoff_t index = map_bh.b_blocknr >>
|
||||
(PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||
|
356
fs/ext4/ext4.h
356
fs/ext4/ext4.h
@ -21,7 +21,14 @@
|
||||
#include <linux/magic.h>
|
||||
#include <linux/jbd2.h>
|
||||
#include <linux/quota.h>
|
||||
#include "ext4_i.h"
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/blockgroup_lock.h>
|
||||
#include <linux/percpu_counter.h>
|
||||
|
||||
/*
|
||||
* The fourth extended filesystem constants/structures
|
||||
@ -46,6 +53,19 @@
|
||||
#define ext4_debug(f, a...) do {} while (0)
|
||||
#endif
|
||||
|
||||
/* data type for block offset of block group */
|
||||
typedef int ext4_grpblk_t;
|
||||
|
||||
/* data type for filesystem-wide blocks number */
|
||||
typedef unsigned long long ext4_fsblk_t;
|
||||
|
||||
/* data type for file logical block number */
|
||||
typedef __u32 ext4_lblk_t;
|
||||
|
||||
/* data type for block group number */
|
||||
typedef unsigned int ext4_group_t;
|
||||
|
||||
|
||||
/* prefer goal again. length */
|
||||
#define EXT4_MB_HINT_MERGE 1
|
||||
/* blocks already reserved */
|
||||
@ -179,9 +199,6 @@ struct flex_groups {
|
||||
#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
|
||||
#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include "ext4_sb.h"
|
||||
#endif
|
||||
/*
|
||||
* Macro-instructions used to manage group descriptors
|
||||
*/
|
||||
@ -297,10 +314,23 @@ struct ext4_new_group_data {
|
||||
};
|
||||
|
||||
/*
|
||||
* Following is used by preallocation code to tell get_blocks() that we
|
||||
* want uninitialzed extents.
|
||||
* Flags used by ext4_get_blocks()
|
||||
*/
|
||||
#define EXT4_CREATE_UNINITIALIZED_EXT 2
|
||||
/* Allocate any needed blocks and/or convert an unitialized
|
||||
extent to be an initialized ext4 */
|
||||
#define EXT4_GET_BLOCKS_CREATE 0x0001
|
||||
/* Request the creation of an unitialized extent */
|
||||
#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002
|
||||
#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\
|
||||
EXT4_GET_BLOCKS_CREATE)
|
||||
/* Caller is from the delayed allocation writeout path,
|
||||
so set the magic i_delalloc_reserve_flag after taking the
|
||||
inode allocation semaphore for */
|
||||
#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
|
||||
/* Call ext4_da_update_reserve_space() after successfully
|
||||
allocating the blocks */
|
||||
#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008
|
||||
|
||||
|
||||
/*
|
||||
* ioctl commands
|
||||
@ -515,6 +545,110 @@ do { \
|
||||
|
||||
#endif /* defined(__KERNEL__) || defined(__linux__) */
|
||||
|
||||
/*
|
||||
* storage for cached extent
|
||||
*/
|
||||
struct ext4_ext_cache {
|
||||
ext4_fsblk_t ec_start;
|
||||
ext4_lblk_t ec_block;
|
||||
__u32 ec_len; /* must be 32bit to return holes */
|
||||
__u32 ec_type;
|
||||
};
|
||||
|
||||
/*
|
||||
* fourth extended file system inode data in memory
|
||||
*/
|
||||
struct ext4_inode_info {
|
||||
__le32 i_data[15]; /* unconverted */
|
||||
__u32 i_flags;
|
||||
ext4_fsblk_t i_file_acl;
|
||||
__u32 i_dtime;
|
||||
|
||||
/*
|
||||
* i_block_group is the number of the block group which contains
|
||||
* this file's inode. Constant across the lifetime of the inode,
|
||||
* it is ued for making block allocation decisions - we try to
|
||||
* place a file's data blocks near its inode block, and new inodes
|
||||
* near to their parent directory's inode.
|
||||
*/
|
||||
ext4_group_t i_block_group;
|
||||
__u32 i_state; /* Dynamic state flags for ext4 */
|
||||
|
||||
ext4_lblk_t i_dir_start_lookup;
|
||||
#ifdef CONFIG_EXT4_FS_XATTR
|
||||
/*
|
||||
* Extended attributes can be read independently of the main file
|
||||
* data. Taking i_mutex even when reading would cause contention
|
||||
* between readers of EAs and writers of regular file data, so
|
||||
* instead we synchronize on xattr_sem when reading or changing
|
||||
* EAs.
|
||||
*/
|
||||
struct rw_semaphore xattr_sem;
|
||||
#endif
|
||||
#ifdef CONFIG_EXT4_FS_POSIX_ACL
|
||||
struct posix_acl *i_acl;
|
||||
struct posix_acl *i_default_acl;
|
||||
#endif
|
||||
|
||||
struct list_head i_orphan; /* unlinked but open inodes */
|
||||
|
||||
/*
|
||||
* i_disksize keeps track of what the inode size is ON DISK, not
|
||||
* in memory. During truncate, i_size is set to the new size by
|
||||
* the VFS prior to calling ext4_truncate(), but the filesystem won't
|
||||
* set i_disksize to 0 until the truncate is actually under way.
|
||||
*
|
||||
* The intent is that i_disksize always represents the blocks which
|
||||
* are used by this file. This allows recovery to restart truncate
|
||||
* on orphans if we crash during truncate. We actually write i_disksize
|
||||
* into the on-disk inode when writing inodes out, instead of i_size.
|
||||
*
|
||||
* The only time when i_disksize and i_size may be different is when
|
||||
* a truncate is in progress. The only things which change i_disksize
|
||||
* are ext4_get_block (growth) and ext4_truncate (shrinkth).
|
||||
*/
|
||||
loff_t i_disksize;
|
||||
|
||||
/*
|
||||
* i_data_sem is for serialising ext4_truncate() against
|
||||
* ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
|
||||
* data tree are chopped off during truncate. We can't do that in
|
||||
* ext4 because whenever we perform intermediate commits during
|
||||
* truncate, the inode and all the metadata blocks *must* be in a
|
||||
* consistent state which allows truncation of the orphans to restart
|
||||
* during recovery. Hence we must fix the get_block-vs-truncate race
|
||||
* by other means, so we have i_data_sem.
|
||||
*/
|
||||
struct rw_semaphore i_data_sem;
|
||||
struct inode vfs_inode;
|
||||
struct jbd2_inode jinode;
|
||||
|
||||
struct ext4_ext_cache i_cached_extent;
|
||||
/*
|
||||
* File creation time. Its function is same as that of
|
||||
* struct timespec i_{a,c,m}time in the generic inode.
|
||||
*/
|
||||
struct timespec i_crtime;
|
||||
|
||||
/* mballoc */
|
||||
struct list_head i_prealloc_list;
|
||||
spinlock_t i_prealloc_lock;
|
||||
|
||||
/* ialloc */
|
||||
ext4_group_t i_last_alloc_group;
|
||||
|
||||
/* allocation reservation info for delalloc */
|
||||
unsigned int i_reserved_data_blocks;
|
||||
unsigned int i_reserved_meta_blocks;
|
||||
unsigned int i_allocated_meta_blocks;
|
||||
unsigned short i_delalloc_reserved_flag;
|
||||
|
||||
/* on-disk additional length */
|
||||
__u16 i_extra_isize;
|
||||
|
||||
spinlock_t i_block_reservation_lock;
|
||||
};
|
||||
|
||||
/*
|
||||
* File system states
|
||||
*/
|
||||
@ -560,6 +694,7 @@ do { \
|
||||
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
|
||||
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
|
||||
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
|
||||
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
|
||||
|
||||
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
|
||||
#ifndef _LINUX_EXT2_FS_H
|
||||
@ -689,6 +824,137 @@ struct ext4_super_block {
|
||||
};
|
||||
|
||||
#ifdef __KERNEL__
|
||||
/*
|
||||
* fourth extended-fs super-block data in memory
|
||||
*/
|
||||
struct ext4_sb_info {
|
||||
unsigned long s_desc_size; /* Size of a group descriptor in bytes */
|
||||
unsigned long s_inodes_per_block;/* Number of inodes per block */
|
||||
unsigned long s_blocks_per_group;/* Number of blocks in a group */
|
||||
unsigned long s_inodes_per_group;/* Number of inodes in a group */
|
||||
unsigned long s_itb_per_group; /* Number of inode table blocks per group */
|
||||
unsigned long s_gdb_count; /* Number of group descriptor blocks */
|
||||
unsigned long s_desc_per_block; /* Number of group descriptors per block */
|
||||
ext4_group_t s_groups_count; /* Number of groups in the fs */
|
||||
unsigned long s_overhead_last; /* Last calculated overhead */
|
||||
unsigned long s_blocks_last; /* Last seen block count */
|
||||
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
|
||||
struct buffer_head * s_sbh; /* Buffer containing the super block */
|
||||
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
|
||||
struct buffer_head **s_group_desc;
|
||||
unsigned long s_mount_opt;
|
||||
ext4_fsblk_t s_sb_block;
|
||||
uid_t s_resuid;
|
||||
gid_t s_resgid;
|
||||
unsigned short s_mount_state;
|
||||
unsigned short s_pad;
|
||||
int s_addr_per_block_bits;
|
||||
int s_desc_per_block_bits;
|
||||
int s_inode_size;
|
||||
int s_first_ino;
|
||||
unsigned int s_inode_readahead_blks;
|
||||
spinlock_t s_next_gen_lock;
|
||||
u32 s_next_generation;
|
||||
u32 s_hash_seed[4];
|
||||
int s_def_hash_version;
|
||||
int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
|
||||
struct percpu_counter s_freeblocks_counter;
|
||||
struct percpu_counter s_freeinodes_counter;
|
||||
struct percpu_counter s_dirs_counter;
|
||||
struct percpu_counter s_dirtyblocks_counter;
|
||||
struct blockgroup_lock *s_blockgroup_lock;
|
||||
struct proc_dir_entry *s_proc;
|
||||
struct kobject s_kobj;
|
||||
struct completion s_kobj_unregister;
|
||||
|
||||
/* Journaling */
|
||||
struct inode *s_journal_inode;
|
||||
struct journal_s *s_journal;
|
||||
struct list_head s_orphan;
|
||||
struct mutex s_orphan_lock;
|
||||
struct mutex s_resize_lock;
|
||||
unsigned long s_commit_interval;
|
||||
u32 s_max_batch_time;
|
||||
u32 s_min_batch_time;
|
||||
struct block_device *journal_bdev;
|
||||
#ifdef CONFIG_JBD2_DEBUG
|
||||
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
|
||||
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
|
||||
#endif
|
||||
#ifdef CONFIG_QUOTA
|
||||
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
|
||||
int s_jquota_fmt; /* Format of quota to use */
|
||||
#endif
|
||||
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
|
||||
struct rb_root system_blks;
|
||||
|
||||
#ifdef EXTENTS_STATS
|
||||
/* ext4 extents stats */
|
||||
unsigned long s_ext_min;
|
||||
unsigned long s_ext_max;
|
||||
unsigned long s_depth_max;
|
||||
spinlock_t s_ext_stats_lock;
|
||||
unsigned long s_ext_blocks;
|
||||
unsigned long s_ext_extents;
|
||||
#endif
|
||||
|
||||
/* for buddy allocator */
|
||||
struct ext4_group_info ***s_group_info;
|
||||
struct inode *s_buddy_cache;
|
||||
long s_blocks_reserved;
|
||||
spinlock_t s_reserve_lock;
|
||||
spinlock_t s_md_lock;
|
||||
tid_t s_last_transaction;
|
||||
unsigned short *s_mb_offsets;
|
||||
unsigned int *s_mb_maxs;
|
||||
|
||||
/* tunables */
|
||||
unsigned long s_stripe;
|
||||
unsigned int s_mb_stream_request;
|
||||
unsigned int s_mb_max_to_scan;
|
||||
unsigned int s_mb_min_to_scan;
|
||||
unsigned int s_mb_stats;
|
||||
unsigned int s_mb_order2_reqs;
|
||||
unsigned int s_mb_group_prealloc;
|
||||
/* where last allocation was done - for stream allocation */
|
||||
unsigned long s_mb_last_group;
|
||||
unsigned long s_mb_last_start;
|
||||
|
||||
/* history to debug policy */
|
||||
struct ext4_mb_history *s_mb_history;
|
||||
int s_mb_history_cur;
|
||||
int s_mb_history_max;
|
||||
int s_mb_history_num;
|
||||
spinlock_t s_mb_history_lock;
|
||||
int s_mb_history_filter;
|
||||
|
||||
/* stats for buddy allocator */
|
||||
spinlock_t s_mb_pa_lock;
|
||||
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
|
||||
atomic_t s_bal_success; /* we found long enough chunks */
|
||||
atomic_t s_bal_allocated; /* in blocks */
|
||||
atomic_t s_bal_ex_scanned; /* total extents scanned */
|
||||
atomic_t s_bal_goals; /* goal hits */
|
||||
atomic_t s_bal_breaks; /* too long searches */
|
||||
atomic_t s_bal_2orders; /* 2^order hits */
|
||||
spinlock_t s_bal_lock;
|
||||
unsigned long s_mb_buddies_generated;
|
||||
unsigned long long s_mb_generation_time;
|
||||
atomic_t s_mb_lost_chunks;
|
||||
atomic_t s_mb_preallocated;
|
||||
atomic_t s_mb_discarded;
|
||||
|
||||
/* locality groups */
|
||||
struct ext4_locality_group *s_locality_groups;
|
||||
|
||||
/* for write statistics */
|
||||
unsigned long s_sectors_written_start;
|
||||
u64 s_kbytes_written;
|
||||
|
||||
unsigned int s_log_groups_per_flex;
|
||||
struct flex_groups *s_flex_groups;
|
||||
};
|
||||
|
||||
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
|
||||
{
|
||||
return sb->s_fs_info;
|
||||
@ -704,7 +970,6 @@ static inline struct timespec ext4_current_time(struct inode *inode)
|
||||
current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
|
||||
}
|
||||
|
||||
|
||||
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
|
||||
{
|
||||
return ino == EXT4_ROOT_INO ||
|
||||
@ -1014,6 +1279,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
|
||||
ext4_group_t block_group,
|
||||
struct buffer_head ** bh);
|
||||
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
|
||||
struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
|
||||
ext4_group_t block_group);
|
||||
extern unsigned ext4_init_block_bitmap(struct super_block *sb,
|
||||
struct buffer_head *bh,
|
||||
ext4_group_t group,
|
||||
struct ext4_group_desc *desc);
|
||||
#define ext4_free_blocks_after_init(sb, group, desc) \
|
||||
ext4_init_block_bitmap(sb, NULL, group, desc)
|
||||
|
||||
/* dir.c */
|
||||
extern int ext4_check_dir_entry(const char *, struct inode *,
|
||||
@ -1038,6 +1311,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
|
||||
extern unsigned long ext4_count_free_inodes(struct super_block *);
|
||||
extern unsigned long ext4_count_dirs(struct super_block *);
|
||||
extern void ext4_check_inodes_bitmap(struct super_block *);
|
||||
extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
|
||||
struct buffer_head *bh,
|
||||
ext4_group_t group,
|
||||
struct ext4_group_desc *desc);
|
||||
extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
|
||||
|
||||
/* mballoc.c */
|
||||
extern long ext4_mb_stats;
|
||||
@ -1123,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...)
|
||||
__attribute__ ((format (printf, 3, 4)));
|
||||
extern void ext4_warning(struct super_block *, const char *, const char *, ...)
|
||||
__attribute__ ((format (printf, 3, 4)));
|
||||
extern void ext4_msg(struct super_block *, const char *, const char *, ...)
|
||||
__attribute__ ((format (printf, 3, 4)));
|
||||
extern void ext4_grp_locked_error(struct super_block *, ext4_group_t,
|
||||
const char *, const char *, ...)
|
||||
__attribute__ ((format (printf, 4, 5)));
|
||||
@ -1161,6 +1441,10 @@ extern void ext4_used_dirs_set(struct super_block *sb,
|
||||
struct ext4_group_desc *bg, __u32 count);
|
||||
extern void ext4_itable_unused_set(struct super_block *sb,
|
||||
struct ext4_group_desc *bg, __u32 count);
|
||||
extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
|
||||
struct ext4_group_desc *gdp);
|
||||
extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
|
||||
struct ext4_group_desc *gdp);
|
||||
|
||||
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
|
||||
{
|
||||
@ -1228,6 +1512,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
|
||||
return grp_info[indexv][indexh];
|
||||
}
|
||||
|
||||
/*
|
||||
* Reading s_groups_count requires using smp_rmb() afterwards. See
|
||||
* the locking protocol documented in the comments of ext4_group_add()
|
||||
* in resize.c
|
||||
*/
|
||||
static inline ext4_group_t ext4_get_groups_count(struct super_block *sb)
|
||||
{
|
||||
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
|
||||
|
||||
smp_rmb();
|
||||
return ngroups;
|
||||
}
|
||||
|
||||
static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi,
|
||||
ext4_group_t block_group)
|
||||
@ -1283,33 +1579,25 @@ struct ext4_group_info {
|
||||
};
|
||||
|
||||
#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
|
||||
#define EXT4_GROUP_INFO_LOCKED_BIT 1
|
||||
|
||||
#define EXT4_MB_GRP_NEED_INIT(grp) \
|
||||
(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
|
||||
|
||||
static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb,
|
||||
ext4_group_t group)
|
||||
{
|
||||
return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group);
|
||||
}
|
||||
|
||||
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
|
||||
{
|
||||
struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
|
||||
|
||||
bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
|
||||
spin_lock(ext4_group_lock_ptr(sb, group));
|
||||
}
|
||||
|
||||
static inline void ext4_unlock_group(struct super_block *sb,
|
||||
ext4_group_t group)
|
||||
{
|
||||
struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
|
||||
|
||||
bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
|
||||
}
|
||||
|
||||
static inline int ext4_is_group_locked(struct super_block *sb,
|
||||
ext4_group_t group)
|
||||
{
|
||||
struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
|
||||
|
||||
return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
|
||||
&(grinfo->bb_state));
|
||||
spin_unlock(ext4_group_lock_ptr(sb, group));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1326,11 +1614,21 @@ extern const struct file_operations ext4_file_operations;
|
||||
/* namei.c */
|
||||
extern const struct inode_operations ext4_dir_inode_operations;
|
||||
extern const struct inode_operations ext4_special_inode_operations;
|
||||
extern struct dentry *ext4_get_parent(struct dentry *child);
|
||||
|
||||
/* symlink.c */
|
||||
extern const struct inode_operations ext4_symlink_inode_operations;
|
||||
extern const struct inode_operations ext4_fast_symlink_inode_operations;
|
||||
|
||||
/* block_validity */
|
||||
extern void ext4_release_system_zone(struct super_block *sb);
|
||||
extern int ext4_setup_system_zone(struct super_block *sb);
|
||||
extern int __init init_ext4_system_zone(void);
|
||||
extern void exit_ext4_system_zone(void);
|
||||
extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
|
||||
ext4_fsblk_t start_blk,
|
||||
unsigned int count);
|
||||
|
||||
/* extents.c */
|
||||
extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
|
||||
extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
|
||||
@ -1338,17 +1636,15 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks,
|
||||
int chunk);
|
||||
extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_lblk_t iblock, unsigned int max_blocks,
|
||||
struct buffer_head *bh_result,
|
||||
int create, int extend_disksize);
|
||||
struct buffer_head *bh_result, int flags);
|
||||
extern void ext4_ext_truncate(struct inode *);
|
||||
extern void ext4_ext_init(struct super_block *);
|
||||
extern void ext4_ext_release(struct super_block *);
|
||||
extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
|
||||
loff_t len);
|
||||
extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
|
||||
sector_t block, unsigned int max_blocks,
|
||||
struct buffer_head *bh, int create,
|
||||
int extend_disksize, int flag);
|
||||
extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
|
||||
sector_t block, unsigned int max_blocks,
|
||||
struct buffer_head *bh, int flags);
|
||||
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
__u64 start, __u64 len);
|
||||
|
||||
|
140
fs/ext4/ext4_i.h
140
fs/ext4/ext4_i.h
@ -1,140 +0,0 @@
|
||||
/*
|
||||
* ext4_i.h
|
||||
*
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
*
|
||||
* from
|
||||
*
|
||||
* linux/include/linux/minix_fs_i.h
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*/
|
||||
|
||||
#ifndef _EXT4_I
|
||||
#define _EXT4_I
|
||||
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/mutex.h>
|
||||
|
||||
/* data type for block offset of block group */
|
||||
typedef int ext4_grpblk_t;
|
||||
|
||||
/* data type for filesystem-wide blocks number */
|
||||
typedef unsigned long long ext4_fsblk_t;
|
||||
|
||||
/* data type for file logical block number */
|
||||
typedef __u32 ext4_lblk_t;
|
||||
|
||||
/* data type for block group number */
|
||||
typedef unsigned int ext4_group_t;
|
||||
|
||||
/*
|
||||
* storage for cached extent
|
||||
*/
|
||||
struct ext4_ext_cache {
|
||||
ext4_fsblk_t ec_start;
|
||||
ext4_lblk_t ec_block;
|
||||
__u32 ec_len; /* must be 32bit to return holes */
|
||||
__u32 ec_type;
|
||||
};
|
||||
|
||||
/*
|
||||
* fourth extended file system inode data in memory
|
||||
*/
|
||||
struct ext4_inode_info {
|
||||
__le32 i_data[15]; /* unconverted */
|
||||
__u32 i_flags;
|
||||
ext4_fsblk_t i_file_acl;
|
||||
__u32 i_dtime;
|
||||
|
||||
/*
|
||||
* i_block_group is the number of the block group which contains
|
||||
* this file's inode. Constant across the lifetime of the inode,
|
||||
* it is ued for making block allocation decisions - we try to
|
||||
* place a file's data blocks near its inode block, and new inodes
|
||||
* near to their parent directory's inode.
|
||||
*/
|
||||
ext4_group_t i_block_group;
|
||||
__u32 i_state; /* Dynamic state flags for ext4 */
|
||||
|
||||
ext4_lblk_t i_dir_start_lookup;
|
||||
#ifdef CONFIG_EXT4_FS_XATTR
|
||||
/*
|
||||
* Extended attributes can be read independently of the main file
|
||||
* data. Taking i_mutex even when reading would cause contention
|
||||
* between readers of EAs and writers of regular file data, so
|
||||
* instead we synchronize on xattr_sem when reading or changing
|
||||
* EAs.
|
||||
*/
|
||||
struct rw_semaphore xattr_sem;
|
||||
#endif
|
||||
#ifdef CONFIG_EXT4_FS_POSIX_ACL
|
||||
struct posix_acl *i_acl;
|
||||
struct posix_acl *i_default_acl;
|
||||
#endif
|
||||
|
||||
struct list_head i_orphan; /* unlinked but open inodes */
|
||||
|
||||
/*
|
||||
* i_disksize keeps track of what the inode size is ON DISK, not
|
||||
* in memory. During truncate, i_size is set to the new size by
|
||||
* the VFS prior to calling ext4_truncate(), but the filesystem won't
|
||||
* set i_disksize to 0 until the truncate is actually under way.
|
||||
*
|
||||
* The intent is that i_disksize always represents the blocks which
|
||||
* are used by this file. This allows recovery to restart truncate
|
||||
* on orphans if we crash during truncate. We actually write i_disksize
|
||||
* into the on-disk inode when writing inodes out, instead of i_size.
|
||||
*
|
||||
* The only time when i_disksize and i_size may be different is when
|
||||
* a truncate is in progress. The only things which change i_disksize
|
||||
* are ext4_get_block (growth) and ext4_truncate (shrinkth).
|
||||
*/
|
||||
loff_t i_disksize;
|
||||
|
||||
/*
|
||||
* i_data_sem is for serialising ext4_truncate() against
|
||||
* ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
|
||||
* data tree are chopped off during truncate. We can't do that in
|
||||
* ext4 because whenever we perform intermediate commits during
|
||||
* truncate, the inode and all the metadata blocks *must* be in a
|
||||
* consistent state which allows truncation of the orphans to restart
|
||||
* during recovery. Hence we must fix the get_block-vs-truncate race
|
||||
* by other means, so we have i_data_sem.
|
||||
*/
|
||||
struct rw_semaphore i_data_sem;
|
||||
struct inode vfs_inode;
|
||||
struct jbd2_inode jinode;
|
||||
|
||||
struct ext4_ext_cache i_cached_extent;
|
||||
/*
|
||||
* File creation time. Its function is same as that of
|
||||
* struct timespec i_{a,c,m}time in the generic inode.
|
||||
*/
|
||||
struct timespec i_crtime;
|
||||
|
||||
/* mballoc */
|
||||
struct list_head i_prealloc_list;
|
||||
spinlock_t i_prealloc_lock;
|
||||
|
||||
/* ialloc */
|
||||
ext4_group_t i_last_alloc_group;
|
||||
|
||||
/* allocation reservation info for delalloc */
|
||||
unsigned int i_reserved_data_blocks;
|
||||
unsigned int i_reserved_meta_blocks;
|
||||
unsigned int i_allocated_meta_blocks;
|
||||
unsigned short i_delalloc_reserved_flag;
|
||||
|
||||
/* on-disk additional length */
|
||||
__u16 i_extra_isize;
|
||||
|
||||
spinlock_t i_block_reservation_lock;
|
||||
};
|
||||
|
||||
#endif /* _EXT4_I */
|
@ -1,161 +0,0 @@
|
||||
/*
|
||||
* ext4_sb.h
|
||||
*
|
||||
* Copyright (C) 1992, 1993, 1994, 1995
|
||||
* Remy Card (card@masi.ibp.fr)
|
||||
* Laboratoire MASI - Institut Blaise Pascal
|
||||
* Universite Pierre et Marie Curie (Paris VI)
|
||||
*
|
||||
* from
|
||||
*
|
||||
* linux/include/linux/minix_fs_sb.h
|
||||
*
|
||||
* Copyright (C) 1991, 1992 Linus Torvalds
|
||||
*/
|
||||
|
||||
#ifndef _EXT4_SB
|
||||
#define _EXT4_SB
|
||||
|
||||
#ifdef __KERNEL__
|
||||
#include <linux/timer.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/blockgroup_lock.h>
|
||||
#include <linux/percpu_counter.h>
|
||||
#endif
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
/*
|
||||
* fourth extended-fs super-block data in memory
|
||||
*/
|
||||
struct ext4_sb_info {
|
||||
unsigned long s_desc_size; /* Size of a group descriptor in bytes */
|
||||
unsigned long s_inodes_per_block;/* Number of inodes per block */
|
||||
unsigned long s_blocks_per_group;/* Number of blocks in a group */
|
||||
unsigned long s_inodes_per_group;/* Number of inodes in a group */
|
||||
unsigned long s_itb_per_group; /* Number of inode table blocks per group */
|
||||
unsigned long s_gdb_count; /* Number of group descriptor blocks */
|
||||
unsigned long s_desc_per_block; /* Number of group descriptors per block */
|
||||
ext4_group_t s_groups_count; /* Number of groups in the fs */
|
||||
unsigned long s_overhead_last; /* Last calculated overhead */
|
||||
unsigned long s_blocks_last; /* Last seen block count */
|
||||
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
|
||||
struct buffer_head * s_sbh; /* Buffer containing the super block */
|
||||
struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
|
||||
struct buffer_head **s_group_desc;
|
||||
unsigned long s_mount_opt;
|
||||
ext4_fsblk_t s_sb_block;
|
||||
uid_t s_resuid;
|
||||
gid_t s_resgid;
|
||||
unsigned short s_mount_state;
|
||||
unsigned short s_pad;
|
||||
int s_addr_per_block_bits;
|
||||
int s_desc_per_block_bits;
|
||||
int s_inode_size;
|
||||
int s_first_ino;
|
||||
unsigned int s_inode_readahead_blks;
|
||||
spinlock_t s_next_gen_lock;
|
||||
u32 s_next_generation;
|
||||
u32 s_hash_seed[4];
|
||||
int s_def_hash_version;
|
||||
int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
|
||||
struct percpu_counter s_freeblocks_counter;
|
||||
struct percpu_counter s_freeinodes_counter;
|
||||
struct percpu_counter s_dirs_counter;
|
||||
struct percpu_counter s_dirtyblocks_counter;
|
||||
struct blockgroup_lock *s_blockgroup_lock;
|
||||
struct proc_dir_entry *s_proc;
|
||||
struct kobject s_kobj;
|
||||
struct completion s_kobj_unregister;
|
||||
|
||||
/* Journaling */
|
||||
struct inode *s_journal_inode;
|
||||
struct journal_s *s_journal;
|
||||
struct list_head s_orphan;
|
||||
unsigned long s_commit_interval;
|
||||
u32 s_max_batch_time;
|
||||
u32 s_min_batch_time;
|
||||
struct block_device *journal_bdev;
|
||||
#ifdef CONFIG_JBD2_DEBUG
|
||||
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
|
||||
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
|
||||
#endif
|
||||
#ifdef CONFIG_QUOTA
|
||||
char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
|
||||
int s_jquota_fmt; /* Format of quota to use */
|
||||
#endif
|
||||
unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
|
||||
|
||||
#ifdef EXTENTS_STATS
|
||||
/* ext4 extents stats */
|
||||
unsigned long s_ext_min;
|
||||
unsigned long s_ext_max;
|
||||
unsigned long s_depth_max;
|
||||
spinlock_t s_ext_stats_lock;
|
||||
unsigned long s_ext_blocks;
|
||||
unsigned long s_ext_extents;
|
||||
#endif
|
||||
|
||||
/* for buddy allocator */
|
||||
struct ext4_group_info ***s_group_info;
|
||||
struct inode *s_buddy_cache;
|
||||
long s_blocks_reserved;
|
||||
spinlock_t s_reserve_lock;
|
||||
spinlock_t s_md_lock;
|
||||
tid_t s_last_transaction;
|
||||
unsigned short *s_mb_offsets;
|
||||
unsigned int *s_mb_maxs;
|
||||
|
||||
/* tunables */
|
||||
unsigned long s_stripe;
|
||||
unsigned int s_mb_stream_request;
|
||||
unsigned int s_mb_max_to_scan;
|
||||
unsigned int s_mb_min_to_scan;
|
||||
unsigned int s_mb_stats;
|
||||
unsigned int s_mb_order2_reqs;
|
||||
unsigned int s_mb_group_prealloc;
|
||||
/* where last allocation was done - for stream allocation */
|
||||
unsigned long s_mb_last_group;
|
||||
unsigned long s_mb_last_start;
|
||||
|
||||
/* history to debug policy */
|
||||
struct ext4_mb_history *s_mb_history;
|
||||
int s_mb_history_cur;
|
||||
int s_mb_history_max;
|
||||
int s_mb_history_num;
|
||||
spinlock_t s_mb_history_lock;
|
||||
int s_mb_history_filter;
|
||||
|
||||
/* stats for buddy allocator */
|
||||
spinlock_t s_mb_pa_lock;
|
||||
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
|
||||
atomic_t s_bal_success; /* we found long enough chunks */
|
||||
atomic_t s_bal_allocated; /* in blocks */
|
||||
atomic_t s_bal_ex_scanned; /* total extents scanned */
|
||||
atomic_t s_bal_goals; /* goal hits */
|
||||
atomic_t s_bal_breaks; /* too long searches */
|
||||
atomic_t s_bal_2orders; /* 2^order hits */
|
||||
spinlock_t s_bal_lock;
|
||||
unsigned long s_mb_buddies_generated;
|
||||
unsigned long long s_mb_generation_time;
|
||||
atomic_t s_mb_lost_chunks;
|
||||
atomic_t s_mb_preallocated;
|
||||
atomic_t s_mb_discarded;
|
||||
|
||||
/* locality groups */
|
||||
struct ext4_locality_group *s_locality_groups;
|
||||
|
||||
/* for write statistics */
|
||||
unsigned long s_sectors_written_start;
|
||||
u64 s_kbytes_written;
|
||||
|
||||
unsigned int s_log_groups_per_flex;
|
||||
struct flex_groups *s_flex_groups;
|
||||
};
|
||||
|
||||
static inline spinlock_t *
|
||||
sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group)
|
||||
{
|
||||
return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group);
|
||||
}
|
||||
|
||||
#endif /* _EXT4_SB */
|
@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth)
|
||||
|
||||
static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
|
||||
{
|
||||
ext4_fsblk_t block = ext_pblock(ext), valid_block;
|
||||
ext4_fsblk_t block = ext_pblock(ext);
|
||||
int len = ext4_ext_get_actual_len(ext);
|
||||
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
|
||||
|
||||
valid_block = le32_to_cpu(es->s_first_data_block) +
|
||||
EXT4_SB(inode->i_sb)->s_gdb_count;
|
||||
if (unlikely(block <= valid_block ||
|
||||
((block + len) > ext4_blocks_count(es))))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
|
||||
}
|
||||
|
||||
static int ext4_valid_extent_idx(struct inode *inode,
|
||||
struct ext4_extent_idx *ext_idx)
|
||||
{
|
||||
ext4_fsblk_t block = idx_pblock(ext_idx), valid_block;
|
||||
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
|
||||
ext4_fsblk_t block = idx_pblock(ext_idx);
|
||||
|
||||
valid_block = le32_to_cpu(es->s_first_data_block) +
|
||||
EXT4_SB(inode->i_sb)->s_gdb_count;
|
||||
if (unlikely(block <= valid_block ||
|
||||
(block >= ext4_blocks_count(es))))
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
|
||||
}
|
||||
|
||||
static int ext4_valid_extent_entries(struct inode *inode,
|
||||
@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
ex = EXT_LAST_EXTENT(eh);
|
||||
|
||||
ex_ee_block = le32_to_cpu(ex->ee_block);
|
||||
if (ext4_ext_is_uninitialized(ex))
|
||||
uninitialized = 1;
|
||||
ex_ee_len = ext4_ext_get_actual_len(ex);
|
||||
|
||||
while (ex >= EXT_FIRST_EXTENT(eh) &&
|
||||
ex_ee_block + ex_ee_len > start) {
|
||||
|
||||
if (ext4_ext_is_uninitialized(ex))
|
||||
uninitialized = 1;
|
||||
else
|
||||
uninitialized = 0;
|
||||
|
||||
ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
|
||||
path[depth].p_ext = ex;
|
||||
|
||||
@ -2784,7 +2774,7 @@ fix_extent_len:
|
||||
int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_lblk_t iblock,
|
||||
unsigned int max_blocks, struct buffer_head *bh_result,
|
||||
int create, int extend_disksize)
|
||||
int flags)
|
||||
{
|
||||
struct ext4_ext_path *path = NULL;
|
||||
struct ext4_extent_header *eh;
|
||||
@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
int err = 0, depth, ret, cache_type;
|
||||
unsigned int allocated = 0;
|
||||
struct ext4_allocation_request ar;
|
||||
loff_t disksize;
|
||||
|
||||
__clear_bit(BH_New, &bh_result->b_state);
|
||||
ext_debug("blocks %u/%u requested for inode %u\n",
|
||||
@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
cache_type = ext4_ext_in_cache(inode, iblock, &newex);
|
||||
if (cache_type) {
|
||||
if (cache_type == EXT4_EXT_CACHE_GAP) {
|
||||
if (!create) {
|
||||
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
|
||||
/*
|
||||
* block isn't allocated yet and
|
||||
* user doesn't want to allocate it
|
||||
@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
EXT4_EXT_CACHE_EXTENT);
|
||||
goto out;
|
||||
}
|
||||
if (create == EXT4_CREATE_UNINITIALIZED_EXT)
|
||||
if (flags & EXT4_GET_BLOCKS_UNINIT_EXT)
|
||||
goto out;
|
||||
if (!create) {
|
||||
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
|
||||
if (allocated > max_blocks)
|
||||
allocated = max_blocks;
|
||||
/*
|
||||
* We have blocks reserved already. We
|
||||
* return allocated blocks so that delalloc
|
||||
@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
* the buffer head will be unmapped so that
|
||||
* a read from the block returns 0s.
|
||||
*/
|
||||
if (allocated > max_blocks)
|
||||
allocated = max_blocks;
|
||||
set_buffer_unwritten(bh_result);
|
||||
bh_result->b_bdev = inode->i_sb->s_bdev;
|
||||
bh_result->b_blocknr = newblock;
|
||||
@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
* requested block isn't allocated yet;
|
||||
* we couldn't try to create block if create flag is zero
|
||||
*/
|
||||
if (!create) {
|
||||
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
|
||||
/*
|
||||
* put just found gap into cache to speed up
|
||||
* subsequent requests
|
||||
@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
* EXT_UNINIT_MAX_LEN.
|
||||
*/
|
||||
if (max_blocks > EXT_INIT_MAX_LEN &&
|
||||
create != EXT4_CREATE_UNINITIALIZED_EXT)
|
||||
!(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
|
||||
max_blocks = EXT_INIT_MAX_LEN;
|
||||
else if (max_blocks > EXT_UNINIT_MAX_LEN &&
|
||||
create == EXT4_CREATE_UNINITIALIZED_EXT)
|
||||
(flags & EXT4_GET_BLOCKS_UNINIT_EXT))
|
||||
max_blocks = EXT_UNINIT_MAX_LEN;
|
||||
|
||||
/* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
|
||||
@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
/* try to insert new extent into found leaf and return */
|
||||
ext4_ext_store_pblock(&newex, newblock);
|
||||
newex.ee_len = cpu_to_le16(ar.len);
|
||||
if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
|
||||
if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */
|
||||
ext4_ext_mark_uninitialized(&newex);
|
||||
err = ext4_ext_insert_extent(handle, inode, path, &newex);
|
||||
if (err) {
|
||||
@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
|
||||
newblock = ext_pblock(&newex);
|
||||
allocated = ext4_ext_get_actual_len(&newex);
|
||||
outnew:
|
||||
if (extend_disksize) {
|
||||
disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
|
||||
if (disksize > i_size_read(inode))
|
||||
disksize = i_size_read(inode);
|
||||
if (disksize > EXT4_I(inode)->i_disksize)
|
||||
EXT4_I(inode)->i_disksize = disksize;
|
||||
}
|
||||
|
||||
set_buffer_new(bh_result);
|
||||
|
||||
/* Cache only when it is _not_ an uninitialized extent */
|
||||
if (create != EXT4_CREATE_UNINITIALIZED_EXT)
|
||||
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
|
||||
ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
|
||||
EXT4_EXT_CACHE_EXTENT);
|
||||
out:
|
||||
@ -3150,9 +3131,10 @@ retry:
|
||||
ret = PTR_ERR(handle);
|
||||
break;
|
||||
}
|
||||
ret = ext4_get_blocks_wrap(handle, inode, block,
|
||||
max_blocks, &map_bh,
|
||||
EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
|
||||
map_bh.b_state = 0;
|
||||
ret = ext4_get_blocks(handle, inode, block,
|
||||
max_blocks, &map_bh,
|
||||
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT);
|
||||
if (ret <= 0) {
|
||||
#ifdef EXT4FS_DEBUG
|
||||
WARN_ON(ret <= 0);
|
||||
@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
|
||||
void *data)
|
||||
{
|
||||
struct fiemap_extent_info *fieinfo = data;
|
||||
unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
|
||||
unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
|
||||
__u64 logical;
|
||||
__u64 physical;
|
||||
__u64 length;
|
||||
@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
|
||||
*
|
||||
* XXX this might miss a single-block extent at EXT_MAX_BLOCK
|
||||
*/
|
||||
if (logical + length - 1 == EXT_MAX_BLOCK ||
|
||||
ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
|
||||
if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
|
||||
newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
|
||||
loff_t size = i_size_read(inode);
|
||||
loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
|
||||
|
||||
flags |= FIEMAP_EXTENT_LAST;
|
||||
if ((flags & FIEMAP_EXTENT_DELALLOC) &&
|
||||
logical+length > size)
|
||||
length = (size - logical + bs - 1) & ~(bs-1);
|
||||
}
|
||||
|
||||
error = fiemap_fill_next_extent(fieinfo, logical, physical,
|
||||
length, flags);
|
||||
@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
* Walk the extent tree gathering extent information.
|
||||
* ext4_ext_fiemap_cb will push extents back to user.
|
||||
*/
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
down_read(&EXT4_I(inode)->i_data_sem);
|
||||
error = ext4_ext_walk_space(inode, start_blk, len_blks,
|
||||
ext4_ext_fiemap_cb, fieinfo);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
up_read(&EXT4_I(inode)->i_data_sem);
|
||||
}
|
||||
|
||||
return error;
|
||||
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
* linux/fs/ext4/group.h
|
||||
*
|
||||
* Copyright (C) 2007 Cluster File Systems, Inc
|
||||
*
|
||||
* Author: Andreas Dilger <adilger@clusterfs.com>
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_EXT4_GROUP_H
|
||||
#define _LINUX_EXT4_GROUP_H
|
||||
|
||||
extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
|
||||
struct ext4_group_desc *gdp);
|
||||
extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
|
||||
struct ext4_group_desc *gdp);
|
||||
struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
|
||||
ext4_group_t block_group);
|
||||
extern unsigned ext4_init_block_bitmap(struct super_block *sb,
|
||||
struct buffer_head *bh,
|
||||
ext4_group_t group,
|
||||
struct ext4_group_desc *desc);
|
||||
#define ext4_free_blocks_after_init(sb, group, desc) \
|
||||
ext4_init_block_bitmap(sb, NULL, group, desc)
|
||||
extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
|
||||
struct buffer_head *bh,
|
||||
ext4_group_t group,
|
||||
struct ext4_group_desc *desc);
|
||||
extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
|
||||
#endif /* _LINUX_EXT4_GROUP_H */
|
@ -27,7 +27,6 @@
|
||||
#include "ext4_jbd2.h"
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
#include "group.h"
|
||||
|
||||
/*
|
||||
* ialloc.c contains the inodes allocation and deallocation routines
|
||||
@ -123,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
|
||||
unlock_buffer(bh);
|
||||
return bh;
|
||||
}
|
||||
spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
|
||||
ext4_lock_group(sb, block_group);
|
||||
if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
|
||||
ext4_init_inode_bitmap(sb, bh, block_group, desc);
|
||||
set_bitmap_uptodate(bh);
|
||||
set_buffer_uptodate(bh);
|
||||
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
|
||||
ext4_unlock_group(sb, block_group);
|
||||
unlock_buffer(bh);
|
||||
return bh;
|
||||
}
|
||||
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
|
||||
ext4_unlock_group(sb, block_group);
|
||||
if (buffer_uptodate(bh)) {
|
||||
/*
|
||||
* if not uninit if bh is uptodate,
|
||||
@ -247,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
|
||||
goto error_return;
|
||||
|
||||
/* Ok, now we can actually update the inode bitmaps.. */
|
||||
spin_lock(sb_bgl_lock(sbi, block_group));
|
||||
cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
|
||||
spin_unlock(sb_bgl_lock(sbi, block_group));
|
||||
cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group),
|
||||
bit, bitmap_bh->b_data);
|
||||
if (!cleared)
|
||||
ext4_error(sb, "ext4_free_inode",
|
||||
"bit already cleared for inode %lu", ino);
|
||||
@ -261,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
|
||||
if (fatal) goto error_return;
|
||||
|
||||
if (gdp) {
|
||||
spin_lock(sb_bgl_lock(sbi, block_group));
|
||||
ext4_lock_group(sb, block_group);
|
||||
count = ext4_free_inodes_count(sb, gdp) + 1;
|
||||
ext4_free_inodes_set(sb, gdp, count);
|
||||
if (is_directory) {
|
||||
@ -277,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
|
||||
}
|
||||
gdp->bg_checksum = ext4_group_desc_csum(sbi,
|
||||
block_group, gdp);
|
||||
spin_unlock(sb_bgl_lock(sbi, block_group));
|
||||
ext4_unlock_group(sb, block_group);
|
||||
percpu_counter_inc(&sbi->s_freeinodes_counter);
|
||||
if (is_directory)
|
||||
percpu_counter_dec(&sbi->s_dirs_counter);
|
||||
@ -316,7 +314,7 @@ error_return:
|
||||
static int find_group_dir(struct super_block *sb, struct inode *parent,
|
||||
ext4_group_t *best_group)
|
||||
{
|
||||
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
unsigned int freei, avefreei;
|
||||
struct ext4_group_desc *desc, *best_desc = NULL;
|
||||
ext4_group_t group;
|
||||
@ -349,11 +347,10 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_group_desc *desc;
|
||||
struct buffer_head *bh;
|
||||
struct flex_groups *flex_group = sbi->s_flex_groups;
|
||||
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
|
||||
ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group);
|
||||
ext4_group_t ngroups = sbi->s_groups_count;
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
int flex_size = ext4_flex_bg_size(sbi);
|
||||
ext4_group_t best_flex = parent_fbg_group;
|
||||
int blocks_per_flex = sbi->s_blocks_per_group * flex_size;
|
||||
@ -362,7 +359,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent,
|
||||
ext4_group_t n_fbg_groups;
|
||||
ext4_group_t i;
|
||||
|
||||
n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >>
|
||||
n_fbg_groups = (ngroups + flex_size - 1) >>
|
||||
sbi->s_log_groups_per_flex;
|
||||
|
||||
find_close_to_parent:
|
||||
@ -404,7 +401,7 @@ find_close_to_parent:
|
||||
found_flexbg:
|
||||
for (i = best_flex * flex_size; i < ngroups &&
|
||||
i < (best_flex + 1) * flex_size; i++) {
|
||||
desc = ext4_get_group_desc(sb, i, &bh);
|
||||
desc = ext4_get_group_desc(sb, i, NULL);
|
||||
if (ext4_free_inodes_count(sb, desc)) {
|
||||
*best_group = i;
|
||||
goto out;
|
||||
@ -478,20 +475,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
|
||||
{
|
||||
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
ext4_group_t ngroups = sbi->s_groups_count;
|
||||
ext4_group_t real_ngroups = ext4_get_groups_count(sb);
|
||||
int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
|
||||
unsigned int freei, avefreei;
|
||||
ext4_fsblk_t freeb, avefreeb;
|
||||
unsigned int ndirs;
|
||||
int max_dirs, min_inodes;
|
||||
ext4_grpblk_t min_blocks;
|
||||
ext4_group_t i, grp, g;
|
||||
ext4_group_t i, grp, g, ngroups;
|
||||
struct ext4_group_desc *desc;
|
||||
struct orlov_stats stats;
|
||||
int flex_size = ext4_flex_bg_size(sbi);
|
||||
|
||||
ngroups = real_ngroups;
|
||||
if (flex_size > 1) {
|
||||
ngroups = (ngroups + flex_size - 1) >>
|
||||
ngroups = (real_ngroups + flex_size - 1) >>
|
||||
sbi->s_log_groups_per_flex;
|
||||
parent_group >>= sbi->s_log_groups_per_flex;
|
||||
}
|
||||
@ -543,7 +541,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
|
||||
*/
|
||||
grp *= flex_size;
|
||||
for (i = 0; i < flex_size; i++) {
|
||||
if (grp+i >= sbi->s_groups_count)
|
||||
if (grp+i >= real_ngroups)
|
||||
break;
|
||||
desc = ext4_get_group_desc(sb, grp+i, NULL);
|
||||
if (desc && ext4_free_inodes_count(sb, desc)) {
|
||||
@ -583,7 +581,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
|
||||
}
|
||||
|
||||
fallback:
|
||||
ngroups = sbi->s_groups_count;
|
||||
ngroups = real_ngroups;
|
||||
avefreei = freei / ngroups;
|
||||
fallback_retry:
|
||||
parent_group = EXT4_I(parent)->i_block_group;
|
||||
@ -613,9 +611,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
|
||||
ext4_group_t *group, int mode)
|
||||
{
|
||||
ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
|
||||
ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
|
||||
ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
|
||||
struct ext4_group_desc *desc;
|
||||
ext4_group_t i, last;
|
||||
int flex_size = ext4_flex_bg_size(EXT4_SB(sb));
|
||||
|
||||
/*
|
||||
@ -708,10 +705,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
|
||||
|
||||
/*
|
||||
* claim the inode from the inode bitmap. If the group
|
||||
* is uninit we need to take the groups's sb_bgl_lock
|
||||
* is uninit we need to take the groups's ext4_group_lock
|
||||
* and clear the uninit flag. The inode bitmap update
|
||||
* and group desc uninit flag clear should be done
|
||||
* after holding sb_bgl_lock so that ext4_read_inode_bitmap
|
||||
* after holding ext4_group_lock so that ext4_read_inode_bitmap
|
||||
* doesn't race with the ext4_claim_inode
|
||||
*/
|
||||
static int ext4_claim_inode(struct super_block *sb,
|
||||
@ -722,7 +719,7 @@ static int ext4_claim_inode(struct super_block *sb,
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
|
||||
|
||||
spin_lock(sb_bgl_lock(sbi, group));
|
||||
ext4_lock_group(sb, group);
|
||||
if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
|
||||
/* not a free inode */
|
||||
retval = 1;
|
||||
@ -731,7 +728,7 @@ static int ext4_claim_inode(struct super_block *sb,
|
||||
ino++;
|
||||
if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
|
||||
ino > EXT4_INODES_PER_GROUP(sb)) {
|
||||
spin_unlock(sb_bgl_lock(sbi, group));
|
||||
ext4_unlock_group(sb, group);
|
||||
ext4_error(sb, __func__,
|
||||
"reserved inode or inode > inodes count - "
|
||||
"block_group = %u, inode=%lu", group,
|
||||
@ -780,7 +777,7 @@ static int ext4_claim_inode(struct super_block *sb,
|
||||
}
|
||||
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
|
||||
err_ret:
|
||||
spin_unlock(sb_bgl_lock(sbi, group));
|
||||
ext4_unlock_group(sb, group);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -799,11 +796,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
|
||||
struct super_block *sb;
|
||||
struct buffer_head *inode_bitmap_bh = NULL;
|
||||
struct buffer_head *group_desc_bh;
|
||||
ext4_group_t group = 0;
|
||||
ext4_group_t ngroups, group = 0;
|
||||
unsigned long ino = 0;
|
||||
struct inode *inode;
|
||||
struct ext4_group_desc *gdp = NULL;
|
||||
struct ext4_super_block *es;
|
||||
struct ext4_inode_info *ei;
|
||||
struct ext4_sb_info *sbi;
|
||||
int ret2, err = 0;
|
||||
@ -818,15 +814,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode)
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
sb = dir->i_sb;
|
||||
ngroups = ext4_get_groups_count(sb);
|
||||
trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id,
|
||||
dir->i_ino, mode);
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
ei = EXT4_I(inode);
|
||||
|
||||
sbi = EXT4_SB(sb);
|
||||
es = sbi->s_es;
|
||||
|
||||
if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) {
|
||||
ret2 = find_group_flex(sb, dir, &group);
|
||||
@ -856,7 +851,7 @@ got_group:
|
||||
if (ret2 == -1)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < sbi->s_groups_count; i++) {
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
err = -EIO;
|
||||
|
||||
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
|
||||
@ -917,7 +912,7 @@ repeat_in_this_group:
|
||||
* group descriptor metadata has not yet been updated.
|
||||
* So we just go onto the next blockgroup.
|
||||
*/
|
||||
if (++group == sbi->s_groups_count)
|
||||
if (++group == ngroups)
|
||||
group = 0;
|
||||
}
|
||||
err = -ENOSPC;
|
||||
@ -938,7 +933,7 @@ got:
|
||||
}
|
||||
|
||||
free = 0;
|
||||
spin_lock(sb_bgl_lock(sbi, group));
|
||||
ext4_lock_group(sb, group);
|
||||
/* recheck and clear flag under lock if we still need to */
|
||||
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
|
||||
free = ext4_free_blocks_after_init(sb, group, gdp);
|
||||
@ -947,7 +942,7 @@ got:
|
||||
gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
|
||||
gdp);
|
||||
}
|
||||
spin_unlock(sb_bgl_lock(sbi, group));
|
||||
ext4_unlock_group(sb, group);
|
||||
|
||||
/* Don't need to dirty bitmap block if we didn't change it */
|
||||
if (free) {
|
||||
@ -1158,7 +1153,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
|
||||
{
|
||||
unsigned long desc_count;
|
||||
struct ext4_group_desc *gdp;
|
||||
ext4_group_t i;
|
||||
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
|
||||
#ifdef EXT4FS_DEBUG
|
||||
struct ext4_super_block *es;
|
||||
unsigned long bitmap_count, x;
|
||||
@ -1168,7 +1163,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
|
||||
desc_count = 0;
|
||||
bitmap_count = 0;
|
||||
gdp = NULL;
|
||||
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
gdp = ext4_get_group_desc(sb, i, NULL);
|
||||
if (!gdp)
|
||||
continue;
|
||||
@ -1190,7 +1185,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
|
||||
return desc_count;
|
||||
#else
|
||||
desc_count = 0;
|
||||
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
gdp = ext4_get_group_desc(sb, i, NULL);
|
||||
if (!gdp)
|
||||
continue;
|
||||
@ -1205,9 +1200,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
|
||||
unsigned long ext4_count_dirs(struct super_block * sb)
|
||||
{
|
||||
unsigned long count = 0;
|
||||
ext4_group_t i;
|
||||
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
|
||||
|
||||
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
|
||||
if (!gdp)
|
||||
continue;
|
||||
|
599
fs/ext4/inode.c
599
fs/ext4/inode.c
@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode,
|
||||
}
|
||||
|
||||
static int __ext4_check_blockref(const char *function, struct inode *inode,
|
||||
__le32 *p, unsigned int max) {
|
||||
|
||||
unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es);
|
||||
__le32 *p, unsigned int max)
|
||||
{
|
||||
__le32 *bref = p;
|
||||
unsigned int blk;
|
||||
|
||||
while (bref < p+max) {
|
||||
if (unlikely(le32_to_cpu(*bref) >= maxblocks)) {
|
||||
blk = le32_to_cpu(*bref++);
|
||||
if (blk &&
|
||||
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
|
||||
blk, 1))) {
|
||||
ext4_error(inode->i_sb, function,
|
||||
"block reference %u >= max (%u) "
|
||||
"in inode #%lu, offset=%d",
|
||||
le32_to_cpu(*bref), maxblocks,
|
||||
inode->i_ino, (int)(bref-p));
|
||||
"invalid block reference %u "
|
||||
"in inode #%lu", blk, inode->i_ino);
|
||||
return -EIO;
|
||||
}
|
||||
bref++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -892,6 +893,10 @@ err_out:
|
||||
}
|
||||
|
||||
/*
|
||||
* The ext4_ind_get_blocks() function handles non-extents inodes
|
||||
* (i.e., using the traditional indirect/double-indirect i_blocks
|
||||
* scheme) for ext4_get_blocks().
|
||||
*
|
||||
* Allocation strategy is simple: if we have to allocate something, we will
|
||||
* have to go the whole way to leaf. So let's do it before attaching anything
|
||||
* to tree, set linkage between the newborn blocks, write them if sync is
|
||||
@ -909,15 +914,16 @@ err_out:
|
||||
* return = 0, if plain lookup failed.
|
||||
* return < 0, error case.
|
||||
*
|
||||
*
|
||||
* Need to be called with
|
||||
* down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
|
||||
* (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
|
||||
* The ext4_ind_get_blocks() function should be called with
|
||||
* down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem
|
||||
* blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or
|
||||
* down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system
|
||||
* blocks.
|
||||
*/
|
||||
static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
|
||||
static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
|
||||
ext4_lblk_t iblock, unsigned int maxblocks,
|
||||
struct buffer_head *bh_result,
|
||||
int create, int extend_disksize)
|
||||
int flags)
|
||||
{
|
||||
int err = -EIO;
|
||||
ext4_lblk_t offsets[4];
|
||||
@ -927,14 +933,11 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
|
||||
int indirect_blks;
|
||||
int blocks_to_boundary = 0;
|
||||
int depth;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
int count = 0;
|
||||
ext4_fsblk_t first_block = 0;
|
||||
loff_t disksize;
|
||||
|
||||
|
||||
J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
|
||||
J_ASSERT(handle != NULL || create == 0);
|
||||
J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
|
||||
depth = ext4_block_to_path(inode, iblock, offsets,
|
||||
&blocks_to_boundary);
|
||||
|
||||
@ -963,7 +966,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
|
||||
}
|
||||
|
||||
/* Next simple case - plain lookup or failed read of indirect block */
|
||||
if (!create || err == -EIO)
|
||||
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
@ -997,19 +1000,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
|
||||
if (!err)
|
||||
err = ext4_splice_branch(handle, inode, iblock,
|
||||
partial, indirect_blks, count);
|
||||
/*
|
||||
* i_disksize growing is protected by i_data_sem. Don't forget to
|
||||
* protect it if you're about to implement concurrent
|
||||
* ext4_get_block() -bzzz
|
||||
*/
|
||||
if (!err && extend_disksize) {
|
||||
disksize = ((loff_t) iblock + count) << inode->i_blkbits;
|
||||
if (disksize > i_size_read(inode))
|
||||
disksize = i_size_read(inode);
|
||||
if (disksize > ei->i_disksize)
|
||||
ei->i_disksize = disksize;
|
||||
}
|
||||
if (err)
|
||||
else
|
||||
goto cleanup;
|
||||
|
||||
set_buffer_new(bh_result);
|
||||
@ -1120,8 +1111,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
|
||||
ext4_discard_preallocations(inode);
|
||||
}
|
||||
|
||||
static int check_block_validity(struct inode *inode, sector_t logical,
|
||||
sector_t phys, int len)
|
||||
{
|
||||
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
|
||||
ext4_error(inode->i_sb, "check_block_validity",
|
||||
"inode #%lu logical block %llu mapped to %llu "
|
||||
"(size %d)", inode->i_ino,
|
||||
(unsigned long long) logical,
|
||||
(unsigned long long) phys, len);
|
||||
WARN_ON(1);
|
||||
return -EIO;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The ext4_get_blocks_wrap() function try to look up the requested blocks,
|
||||
* The ext4_get_blocks() function tries to look up the requested blocks,
|
||||
* and returns if the blocks are already mapped.
|
||||
*
|
||||
* Otherwise it takes the write lock of the i_data_sem and allocate blocks
|
||||
@ -1129,7 +1135,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
|
||||
* mapped.
|
||||
*
|
||||
* If file type is extents based, it will call ext4_ext_get_blocks(),
|
||||
* Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping
|
||||
* Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping
|
||||
* based files
|
||||
*
|
||||
* On success, it returns the number of blocks being mapped or allocate.
|
||||
@ -1142,9 +1148,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
|
||||
*
|
||||
* It returns the error in case of allocation failure.
|
||||
*/
|
||||
int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
|
||||
unsigned int max_blocks, struct buffer_head *bh,
|
||||
int create, int extend_disksize, int flag)
|
||||
int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
|
||||
unsigned int max_blocks, struct buffer_head *bh,
|
||||
int flags)
|
||||
{
|
||||
int retval;
|
||||
|
||||
@ -1152,21 +1158,28 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
|
||||
clear_buffer_unwritten(bh);
|
||||
|
||||
/*
|
||||
* Try to see if we can get the block without requesting
|
||||
* for new file system block.
|
||||
* Try to see if we can get the block without requesting a new
|
||||
* file system block.
|
||||
*/
|
||||
down_read((&EXT4_I(inode)->i_data_sem));
|
||||
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
|
||||
retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
|
||||
bh, 0, 0);
|
||||
bh, 0);
|
||||
} else {
|
||||
retval = ext4_get_blocks_handle(handle,
|
||||
inode, block, max_blocks, bh, 0, 0);
|
||||
retval = ext4_ind_get_blocks(handle, inode, block, max_blocks,
|
||||
bh, 0);
|
||||
}
|
||||
up_read((&EXT4_I(inode)->i_data_sem));
|
||||
|
||||
if (retval > 0 && buffer_mapped(bh)) {
|
||||
int ret = check_block_validity(inode, block,
|
||||
bh->b_blocknr, retval);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* If it is only a block(s) look up */
|
||||
if (!create)
|
||||
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
|
||||
return retval;
|
||||
|
||||
/*
|
||||
@ -1205,7 +1218,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
|
||||
* let the underlying get_block() function know to
|
||||
* avoid double accounting
|
||||
*/
|
||||
if (flag)
|
||||
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
|
||||
EXT4_I(inode)->i_delalloc_reserved_flag = 1;
|
||||
/*
|
||||
* We need to check for EXT4 here because migrate
|
||||
@ -1213,10 +1226,10 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
|
||||
*/
|
||||
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
|
||||
retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
|
||||
bh, create, extend_disksize);
|
||||
bh, flags);
|
||||
} else {
|
||||
retval = ext4_get_blocks_handle(handle, inode, block,
|
||||
max_blocks, bh, create, extend_disksize);
|
||||
retval = ext4_ind_get_blocks(handle, inode, block,
|
||||
max_blocks, bh, flags);
|
||||
|
||||
if (retval > 0 && buffer_new(bh)) {
|
||||
/*
|
||||
@ -1229,18 +1242,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
|
||||
}
|
||||
}
|
||||
|
||||
if (flag) {
|
||||
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
|
||||
EXT4_I(inode)->i_delalloc_reserved_flag = 0;
|
||||
/*
|
||||
* Update reserved blocks/metadata blocks
|
||||
* after successful block allocation
|
||||
* which were deferred till now
|
||||
*/
|
||||
if ((retval > 0) && buffer_delay(bh))
|
||||
ext4_da_update_reserve_space(inode, retval);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update reserved blocks/metadata blocks after successful
|
||||
* block allocation which had been deferred till now.
|
||||
*/
|
||||
if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
|
||||
ext4_da_update_reserve_space(inode, retval);
|
||||
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
if (retval > 0 && buffer_mapped(bh)) {
|
||||
int ret = check_block_validity(inode, block,
|
||||
bh->b_blocknr, retval);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -1268,8 +1286,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
|
||||
started = 1;
|
||||
}
|
||||
|
||||
ret = ext4_get_blocks_wrap(handle, inode, iblock,
|
||||
max_blocks, bh_result, create, 0, 0);
|
||||
ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result,
|
||||
create ? EXT4_GET_BLOCKS_CREATE : 0);
|
||||
if (ret > 0) {
|
||||
bh_result->b_size = (ret << inode->i_blkbits);
|
||||
ret = 0;
|
||||
@ -1288,17 +1306,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
|
||||
{
|
||||
struct buffer_head dummy;
|
||||
int fatal = 0, err;
|
||||
int flags = 0;
|
||||
|
||||
J_ASSERT(handle != NULL || create == 0);
|
||||
|
||||
dummy.b_state = 0;
|
||||
dummy.b_blocknr = -1000;
|
||||
buffer_trace_init(&dummy.b_history);
|
||||
err = ext4_get_blocks_wrap(handle, inode, block, 1,
|
||||
&dummy, create, 1, 0);
|
||||
if (create)
|
||||
flags |= EXT4_GET_BLOCKS_CREATE;
|
||||
err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags);
|
||||
/*
|
||||
* ext4_get_blocks_handle() returns number of blocks
|
||||
* mapped. 0 in case of a HOLE.
|
||||
* ext4_get_blocks() returns number of blocks mapped. 0 in
|
||||
* case of a HOLE.
|
||||
*/
|
||||
if (err > 0) {
|
||||
if (err > 1)
|
||||
@ -1439,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
|
||||
struct page **pagep, void **fsdata)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
|
||||
int ret, needed_blocks;
|
||||
handle_t *handle;
|
||||
int retries = 0;
|
||||
struct page *page;
|
||||
@ -1450,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
|
||||
"dev %s ino %lu pos %llu len %u flags %u",
|
||||
inode->i_sb->s_id, inode->i_ino,
|
||||
(unsigned long long) pos, len, flags);
|
||||
/*
|
||||
* Reserve one block more for addition to orphan list in case
|
||||
* we allocate blocks but write fails for some reason
|
||||
*/
|
||||
needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
|
||||
index = pos >> PAGE_CACHE_SHIFT;
|
||||
from = pos & (PAGE_CACHE_SIZE - 1);
|
||||
to = from + len;
|
||||
@ -1483,15 +1508,30 @@ retry:
|
||||
|
||||
if (ret) {
|
||||
unlock_page(page);
|
||||
ext4_journal_stop(handle);
|
||||
page_cache_release(page);
|
||||
/*
|
||||
* block_write_begin may have instantiated a few blocks
|
||||
* outside i_size. Trim these off again. Don't need
|
||||
* i_size_read because we hold i_mutex.
|
||||
*
|
||||
* Add inode to orphan list in case we crash before
|
||||
* truncate finishes
|
||||
*/
|
||||
if (pos + len > inode->i_size)
|
||||
ext4_orphan_add(handle, inode);
|
||||
|
||||
ext4_journal_stop(handle);
|
||||
if (pos + len > inode->i_size) {
|
||||
vmtruncate(inode, inode->i_size);
|
||||
/*
|
||||
* If vmtruncate failed early the inode might
|
||||
* still be on the orphan list; we need to
|
||||
* make sure the inode is removed from the
|
||||
* orphan list in that case.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(NULL, inode);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
|
||||
@ -1509,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh)
|
||||
return ext4_handle_dirty_metadata(handle, NULL, bh);
|
||||
}
|
||||
|
||||
static int ext4_generic_write_end(struct file *file,
|
||||
struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
{
|
||||
int i_size_changed = 0;
|
||||
struct inode *inode = mapping->host;
|
||||
handle_t *handle = ext4_journal_current_handle();
|
||||
|
||||
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
|
||||
|
||||
/*
|
||||
* No need to use i_size_read() here, the i_size
|
||||
* cannot change under us because we hold i_mutex.
|
||||
*
|
||||
* But it's important to update i_size while still holding page lock:
|
||||
* page writeout could otherwise come in and zero beyond i_size.
|
||||
*/
|
||||
if (pos + copied > inode->i_size) {
|
||||
i_size_write(inode, pos + copied);
|
||||
i_size_changed = 1;
|
||||
}
|
||||
|
||||
if (pos + copied > EXT4_I(inode)->i_disksize) {
|
||||
/* We need to mark inode dirty even if
|
||||
* new_i_size is less that inode->i_size
|
||||
* bu greater than i_disksize.(hint delalloc)
|
||||
*/
|
||||
ext4_update_i_disksize(inode, (pos + copied));
|
||||
i_size_changed = 1;
|
||||
}
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
|
||||
/*
|
||||
* Don't mark the inode dirty under page lock. First, it unnecessarily
|
||||
* makes the holding time of page lock longer. Second, it forces lock
|
||||
* ordering of page lock and transaction start for journaling
|
||||
* filesystems.
|
||||
*/
|
||||
if (i_size_changed)
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
|
||||
return copied;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to pick up the new inode size which generic_commit_write gave us
|
||||
* `file' can be NULL - eg, when called from page_symlink().
|
||||
@ -1532,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file,
|
||||
ret = ext4_jbd2_file_inode(handle, inode);
|
||||
|
||||
if (ret == 0) {
|
||||
loff_t new_i_size;
|
||||
|
||||
new_i_size = pos + copied;
|
||||
if (new_i_size > EXT4_I(inode)->i_disksize) {
|
||||
ext4_update_i_disksize(inode, new_i_size);
|
||||
/* We need to mark inode dirty even if
|
||||
* new_i_size is less that inode->i_size
|
||||
* bu greater than i_disksize.(hint delalloc)
|
||||
*/
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
}
|
||||
|
||||
ret2 = generic_write_end(file, mapping, pos, len, copied,
|
||||
ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
|
||||
page, fsdata);
|
||||
copied = ret2;
|
||||
if (pos + len > inode->i_size)
|
||||
/* if we have allocated more blocks and copied
|
||||
* less. We will have blocks allocated outside
|
||||
* inode->i_size. So truncate them
|
||||
*/
|
||||
ext4_orphan_add(handle, inode);
|
||||
if (ret2 < 0)
|
||||
ret = ret2;
|
||||
}
|
||||
@ -1554,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file,
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
|
||||
if (pos + len > inode->i_size) {
|
||||
vmtruncate(inode, inode->i_size);
|
||||
/*
|
||||
* If vmtruncate failed early the inode might still be
|
||||
* on the orphan list; we need to make sure the inode
|
||||
* is removed from the orphan list in that case.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(NULL, inode);
|
||||
}
|
||||
|
||||
|
||||
return ret ? ret : copied;
|
||||
}
|
||||
|
||||
@ -1565,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file,
|
||||
handle_t *handle = ext4_journal_current_handle();
|
||||
struct inode *inode = mapping->host;
|
||||
int ret = 0, ret2;
|
||||
loff_t new_i_size;
|
||||
|
||||
trace_mark(ext4_writeback_write_end,
|
||||
"dev %s ino %lu pos %llu len %u copied %u",
|
||||
inode->i_sb->s_id, inode->i_ino,
|
||||
(unsigned long long) pos, len, copied);
|
||||
new_i_size = pos + copied;
|
||||
if (new_i_size > EXT4_I(inode)->i_disksize) {
|
||||
ext4_update_i_disksize(inode, new_i_size);
|
||||
/* We need to mark inode dirty even if
|
||||
* new_i_size is less that inode->i_size
|
||||
* bu greater than i_disksize.(hint delalloc)
|
||||
*/
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
}
|
||||
|
||||
ret2 = generic_write_end(file, mapping, pos, len, copied,
|
||||
ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
|
||||
page, fsdata);
|
||||
copied = ret2;
|
||||
if (pos + len > inode->i_size)
|
||||
/* if we have allocated more blocks and copied
|
||||
* less. We will have blocks allocated outside
|
||||
* inode->i_size. So truncate them
|
||||
*/
|
||||
ext4_orphan_add(handle, inode);
|
||||
|
||||
if (ret2 < 0)
|
||||
ret = ret2;
|
||||
|
||||
@ -1591,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file,
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
|
||||
if (pos + len > inode->i_size) {
|
||||
vmtruncate(inode, inode->i_size);
|
||||
/*
|
||||
* If vmtruncate failed early the inode might still be
|
||||
* on the orphan list; we need to make sure the inode
|
||||
* is removed from the orphan list in that case.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(NULL, inode);
|
||||
}
|
||||
|
||||
return ret ? ret : copied;
|
||||
}
|
||||
|
||||
@ -1635,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file,
|
||||
}
|
||||
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
if (pos + len > inode->i_size)
|
||||
/* if we have allocated more blocks and copied
|
||||
* less. We will have blocks allocated outside
|
||||
* inode->i_size. So truncate them
|
||||
*/
|
||||
ext4_orphan_add(handle, inode);
|
||||
|
||||
ret2 = ext4_journal_stop(handle);
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
page_cache_release(page);
|
||||
if (pos + len > inode->i_size) {
|
||||
vmtruncate(inode, inode->i_size);
|
||||
/*
|
||||
* If vmtruncate failed early the inode might still be
|
||||
* on the orphan list; we need to make sure the inode
|
||||
* is removed from the orphan list in that case.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(NULL, inode);
|
||||
}
|
||||
|
||||
return ret ? ret : copied;
|
||||
}
|
||||
@ -1852,7 +1968,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
|
||||
* @logical - first logical block to start assignment with
|
||||
*
|
||||
* the function goes through all passed space and put actual disk
|
||||
* block numbers into buffer heads, dropping BH_Delay
|
||||
* block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
|
||||
*/
|
||||
static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
|
||||
struct buffer_head *exbh)
|
||||
@ -1902,16 +2018,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical,
|
||||
do {
|
||||
if (cur_logical >= logical + blocks)
|
||||
break;
|
||||
if (buffer_delay(bh)) {
|
||||
bh->b_blocknr = pblock;
|
||||
clear_buffer_delay(bh);
|
||||
bh->b_bdev = inode->i_sb->s_bdev;
|
||||
} else if (buffer_unwritten(bh)) {
|
||||
bh->b_blocknr = pblock;
|
||||
clear_buffer_unwritten(bh);
|
||||
set_buffer_mapped(bh);
|
||||
set_buffer_new(bh);
|
||||
bh->b_bdev = inode->i_sb->s_bdev;
|
||||
|
||||
if (buffer_delay(bh) ||
|
||||
buffer_unwritten(bh)) {
|
||||
|
||||
BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
|
||||
|
||||
if (buffer_delay(bh)) {
|
||||
clear_buffer_delay(bh);
|
||||
bh->b_blocknr = pblock;
|
||||
} else {
|
||||
/*
|
||||
* unwritten already should have
|
||||
* blocknr assigned. Verify that
|
||||
*/
|
||||
clear_buffer_unwritten(bh);
|
||||
BUG_ON(bh->b_blocknr != pblock);
|
||||
}
|
||||
|
||||
} else if (buffer_mapped(bh))
|
||||
BUG_ON(bh->b_blocknr != pblock);
|
||||
|
||||
@ -1990,51 +2114,6 @@ static void ext4_print_free_blocks(struct inode *inode)
|
||||
return;
|
||||
}
|
||||
|
||||
#define EXT4_DELALLOC_RSVED 1
|
||||
static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
{
|
||||
int ret;
|
||||
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
|
||||
loff_t disksize = EXT4_I(inode)->i_disksize;
|
||||
handle_t *handle = NULL;
|
||||
|
||||
handle = ext4_journal_current_handle();
|
||||
BUG_ON(!handle);
|
||||
ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
|
||||
bh_result, create, 0, EXT4_DELALLOC_RSVED);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
bh_result->b_size = (ret << inode->i_blkbits);
|
||||
|
||||
if (ext4_should_order_data(inode)) {
|
||||
int retval;
|
||||
retval = ext4_jbd2_file_inode(handle, inode);
|
||||
if (retval)
|
||||
/*
|
||||
* Failed to add inode for ordered mode. Don't
|
||||
* update file size
|
||||
*/
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update on-disk size along with block allocation we don't
|
||||
* use 'extend_disksize' as size may change within already
|
||||
* allocated block -bzzz
|
||||
*/
|
||||
disksize = ((loff_t) iblock + ret) << inode->i_blkbits;
|
||||
if (disksize > i_size_read(inode))
|
||||
disksize = i_size_read(inode);
|
||||
if (disksize > EXT4_I(inode)->i_disksize) {
|
||||
ext4_update_i_disksize(inode, disksize);
|
||||
ret = ext4_mark_inode_dirty(handle, inode);
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* mpage_da_map_blocks - go through given space
|
||||
*
|
||||
@ -2045,29 +2124,57 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
|
||||
*/
|
||||
static int mpage_da_map_blocks(struct mpage_da_data *mpd)
|
||||
{
|
||||
int err = 0;
|
||||
int err, blks, get_blocks_flags;
|
||||
struct buffer_head new;
|
||||
sector_t next;
|
||||
sector_t next = mpd->b_blocknr;
|
||||
unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
|
||||
loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
|
||||
handle_t *handle = NULL;
|
||||
|
||||
/*
|
||||
* We consider only non-mapped and non-allocated blocks
|
||||
*/
|
||||
if ((mpd->b_state & (1 << BH_Mapped)) &&
|
||||
!(mpd->b_state & (1 << BH_Delay)))
|
||||
return 0;
|
||||
new.b_state = mpd->b_state;
|
||||
new.b_blocknr = 0;
|
||||
new.b_size = mpd->b_size;
|
||||
next = mpd->b_blocknr;
|
||||
/*
|
||||
* If we didn't accumulate anything
|
||||
* to write simply return
|
||||
*/
|
||||
if (!new.b_size)
|
||||
!(mpd->b_state & (1 << BH_Delay)) &&
|
||||
!(mpd->b_state & (1 << BH_Unwritten)))
|
||||
return 0;
|
||||
|
||||
err = ext4_da_get_block_write(mpd->inode, next, &new, 1);
|
||||
if (err) {
|
||||
/*
|
||||
* If we didn't accumulate anything to write simply return
|
||||
*/
|
||||
if (!mpd->b_size)
|
||||
return 0;
|
||||
|
||||
handle = ext4_journal_current_handle();
|
||||
BUG_ON(!handle);
|
||||
|
||||
/*
|
||||
* Call ext4_get_blocks() to allocate any delayed allocation
|
||||
* blocks, or to convert an uninitialized extent to be
|
||||
* initialized (in the case where we have written into
|
||||
* one or more preallocated blocks).
|
||||
*
|
||||
* We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to
|
||||
* indicate that we are on the delayed allocation path. This
|
||||
* affects functions in many different parts of the allocation
|
||||
* call path. This flag exists primarily because we don't
|
||||
* want to change *many* call functions, so ext4_get_blocks()
|
||||
* will set the magic i_delalloc_reserved_flag once the
|
||||
* inode's allocation semaphore is taken.
|
||||
*
|
||||
* If the blocks in questions were delalloc blocks, set
|
||||
* EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting
|
||||
* variables are updated after the blocks have been allocated.
|
||||
*/
|
||||
new.b_state = 0;
|
||||
get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
|
||||
EXT4_GET_BLOCKS_DELALLOC_RESERVE);
|
||||
if (mpd->b_state & (1 << BH_Delay))
|
||||
get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
|
||||
blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
|
||||
&new, get_blocks_flags);
|
||||
if (blks < 0) {
|
||||
err = blks;
|
||||
/*
|
||||
* If get block returns with error we simply
|
||||
* return. Later writepage will redirty the page and
|
||||
@ -2100,12 +2207,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
|
||||
if (err == -ENOSPC) {
|
||||
ext4_print_free_blocks(mpd->inode);
|
||||
}
|
||||
/* invlaidate all the pages */
|
||||
/* invalidate all the pages */
|
||||
ext4_da_block_invalidatepages(mpd, next,
|
||||
mpd->b_size >> mpd->inode->i_blkbits);
|
||||
return err;
|
||||
}
|
||||
BUG_ON(new.b_size == 0);
|
||||
BUG_ON(blks == 0);
|
||||
|
||||
new.b_size = (blks << mpd->inode->i_blkbits);
|
||||
|
||||
if (buffer_new(&new))
|
||||
__unmap_underlying_blocks(mpd->inode, &new);
|
||||
@ -2118,6 +2227,23 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
|
||||
(mpd->b_state & (1 << BH_Unwritten)))
|
||||
mpage_put_bnr_to_bhs(mpd, next, &new);
|
||||
|
||||
if (ext4_should_order_data(mpd->inode)) {
|
||||
err = ext4_jbd2_file_inode(handle, mpd->inode);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update on-disk size along with block allocation.
|
||||
*/
|
||||
disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits;
|
||||
if (disksize > i_size_read(mpd->inode))
|
||||
disksize = i_size_read(mpd->inode);
|
||||
if (disksize > EXT4_I(mpd->inode)->i_disksize) {
|
||||
ext4_update_i_disksize(mpd->inode, disksize);
|
||||
return ext4_mark_inode_dirty(handle, mpd->inode);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2192,6 +2318,17 @@ flush_it:
|
||||
return;
|
||||
}
|
||||
|
||||
static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
/*
|
||||
* unmapped buffer is possible for holes.
|
||||
* delay buffer is possible with delayed allocation.
|
||||
* We also need to consider unwritten buffer as unmapped.
|
||||
*/
|
||||
return (!buffer_mapped(bh) || buffer_delay(bh) ||
|
||||
buffer_unwritten(bh)) && buffer_dirty(bh);
|
||||
}
|
||||
|
||||
/*
|
||||
* __mpage_da_writepage - finds extent of pages and blocks
|
||||
*
|
||||
@ -2276,8 +2413,7 @@ static int __mpage_da_writepage(struct page *page,
|
||||
* Otherwise we won't make progress
|
||||
* with the page in ext4_da_writepage
|
||||
*/
|
||||
if (buffer_dirty(bh) &&
|
||||
(!buffer_mapped(bh) || buffer_delay(bh))) {
|
||||
if (ext4_bh_unmapped_or_delay(NULL, bh)) {
|
||||
mpage_add_bh_to_extent(mpd, logical,
|
||||
bh->b_size,
|
||||
bh->b_state);
|
||||
@ -2303,8 +2439,16 @@ static int __mpage_da_writepage(struct page *page,
|
||||
}
|
||||
|
||||
/*
|
||||
* this is a special callback for ->write_begin() only
|
||||
* it's intention is to return mapped block or reserve space
|
||||
* This is a special get_blocks_t callback which is used by
|
||||
* ext4_da_write_begin(). It will either return mapped block or
|
||||
* reserve space for a single block.
|
||||
*
|
||||
* For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
|
||||
* We also have b_blocknr = -1 and b_bdev initialized properly
|
||||
*
|
||||
* For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
|
||||
* We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
|
||||
* initialized properly.
|
||||
*/
|
||||
static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
@ -2323,7 +2467,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
||||
* preallocated blocks are unmapped but should treated
|
||||
* the same as allocated blocks.
|
||||
*/
|
||||
ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0);
|
||||
ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0);
|
||||
if ((ret == 0) && !buffer_delay(bh_result)) {
|
||||
/* the block isn't (pre)allocated yet, let's reserve space */
|
||||
/*
|
||||
@ -2340,40 +2484,53 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
||||
set_buffer_delay(bh_result);
|
||||
} else if (ret > 0) {
|
||||
bh_result->b_size = (ret << inode->i_blkbits);
|
||||
/*
|
||||
* With sub-block writes into unwritten extents
|
||||
* we also need to mark the buffer as new so that
|
||||
* the unwritten parts of the buffer gets correctly zeroed.
|
||||
*/
|
||||
if (buffer_unwritten(bh_result))
|
||||
if (buffer_unwritten(bh_result)) {
|
||||
/* A delayed write to unwritten bh should
|
||||
* be marked new and mapped. Mapped ensures
|
||||
* that we don't do get_block multiple times
|
||||
* when we write to the same offset and new
|
||||
* ensures that we do proper zero out for
|
||||
* partial write.
|
||||
*/
|
||||
set_buffer_new(bh_result);
|
||||
set_buffer_mapped(bh_result);
|
||||
}
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
|
||||
{
|
||||
/*
|
||||
* unmapped buffer is possible for holes.
|
||||
* delay buffer is possible with delayed allocation
|
||||
*/
|
||||
return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh));
|
||||
}
|
||||
|
||||
static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
|
||||
/*
|
||||
* This function is used as a standard get_block_t calback function
|
||||
* when there is no desire to allocate any blocks. It is used as a
|
||||
* callback function for block_prepare_write(), nobh_writepage(), and
|
||||
* block_write_full_page(). These functions should only try to map a
|
||||
* single block at a time.
|
||||
*
|
||||
* Since this function doesn't do block allocations even if the caller
|
||||
* requests it by passing in create=1, it is critically important that
|
||||
* any caller checks to make sure that any buffer heads are returned
|
||||
* by this function are either all already mapped or marked for
|
||||
* delayed allocation before calling nobh_writepage() or
|
||||
* block_write_full_page(). Otherwise, b_blocknr could be left
|
||||
* unitialized, and the page write functions will be taken by
|
||||
* surprise.
|
||||
*/
|
||||
static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
|
||||
|
||||
BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
|
||||
|
||||
/*
|
||||
* we don't want to do block allocation in writepage
|
||||
* so call get_block_wrap with create = 0
|
||||
*/
|
||||
ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks,
|
||||
bh_result, 0, 0, 0);
|
||||
ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0);
|
||||
BUG_ON(create && ret == 0);
|
||||
if (ret > 0) {
|
||||
bh_result->b_size = (ret << inode->i_blkbits);
|
||||
ret = 0;
|
||||
@ -2382,10 +2539,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock,
|
||||
}
|
||||
|
||||
/*
|
||||
* get called vi ext4_da_writepages after taking page lock (have journal handle)
|
||||
* get called via journal_submit_inode_data_buffers (no journal handle)
|
||||
* get called via shrink_page_list via pdflush (no journal handle)
|
||||
* or grab_page_cache when doing write_begin (have journal handle)
|
||||
* This function can get called via...
|
||||
* - ext4_da_writepages after taking page lock (have journal handle)
|
||||
* - journal_submit_inode_data_buffers (no journal handle)
|
||||
* - shrink_page_list via pdflush (no journal handle)
|
||||
* - grab_page_cache when doing write_begin (have journal handle)
|
||||
*/
|
||||
static int ext4_da_writepage(struct page *page,
|
||||
struct writeback_control *wbc)
|
||||
@ -2436,7 +2594,7 @@ static int ext4_da_writepage(struct page *page,
|
||||
* do block allocation here.
|
||||
*/
|
||||
ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
|
||||
ext4_normal_get_block_write);
|
||||
noalloc_get_block_write);
|
||||
if (!ret) {
|
||||
page_bufs = page_buffers(page);
|
||||
/* check whether all are mapped and non delay */
|
||||
@ -2461,11 +2619,10 @@ static int ext4_da_writepage(struct page *page,
|
||||
}
|
||||
|
||||
if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
|
||||
ret = nobh_writepage(page, ext4_normal_get_block_write, wbc);
|
||||
ret = nobh_writepage(page, noalloc_get_block_write, wbc);
|
||||
else
|
||||
ret = block_write_full_page(page,
|
||||
ext4_normal_get_block_write,
|
||||
wbc);
|
||||
ret = block_write_full_page(page, noalloc_get_block_write,
|
||||
wbc);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2777,7 +2934,7 @@ retry:
|
||||
*pagep = page;
|
||||
|
||||
ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
|
||||
ext4_da_get_block_prep);
|
||||
ext4_da_get_block_prep);
|
||||
if (ret < 0) {
|
||||
unlock_page(page);
|
||||
ext4_journal_stop(handle);
|
||||
@ -2815,7 +2972,7 @@ static int ext4_da_should_update_i_disksize(struct page *page,
|
||||
for (i = 0; i < idx; i++)
|
||||
bh = bh->b_this_page;
|
||||
|
||||
if (!buffer_mapped(bh) || (buffer_delay(bh)))
|
||||
if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
@ -3085,12 +3242,10 @@ static int __ext4_normal_writepage(struct page *page,
|
||||
struct inode *inode = page->mapping->host;
|
||||
|
||||
if (test_opt(inode->i_sb, NOBH))
|
||||
return nobh_writepage(page,
|
||||
ext4_normal_get_block_write, wbc);
|
||||
return nobh_writepage(page, noalloc_get_block_write, wbc);
|
||||
else
|
||||
return block_write_full_page(page,
|
||||
ext4_normal_get_block_write,
|
||||
wbc);
|
||||
return block_write_full_page(page, noalloc_get_block_write,
|
||||
wbc);
|
||||
}
|
||||
|
||||
static int ext4_normal_writepage(struct page *page,
|
||||
@ -3142,7 +3297,7 @@ static int __ext4_journalled_writepage(struct page *page,
|
||||
int err;
|
||||
|
||||
ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
|
||||
ext4_normal_get_block_write);
|
||||
noalloc_get_block_write);
|
||||
if (ret != 0)
|
||||
goto out_unlock;
|
||||
|
||||
@ -3227,9 +3382,8 @@ static int ext4_journalled_writepage(struct page *page,
|
||||
* really know unless we go poke around in the buffer_heads.
|
||||
* But block_write_full_page will do the right thing.
|
||||
*/
|
||||
return block_write_full_page(page,
|
||||
ext4_normal_get_block_write,
|
||||
wbc);
|
||||
return block_write_full_page(page, noalloc_get_block_write,
|
||||
wbc);
|
||||
}
|
||||
no_write:
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
@ -3973,7 +4127,8 @@ void ext4_truncate(struct inode *inode)
|
||||
if (!ext4_can_truncate(inode))
|
||||
return;
|
||||
|
||||
if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
|
||||
if (ei->i_disksize && inode->i_size == 0 &&
|
||||
!test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
|
||||
ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
|
||||
|
||||
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
|
||||
@ -4715,25 +4870,6 @@ int ext4_write_inode(struct inode *inode, int wait)
|
||||
return ext4_force_commit(inode->i_sb);
|
||||
}
|
||||
|
||||
int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
mark_buffer_dirty(bh);
|
||||
if (inode && inode_needs_sync(inode)) {
|
||||
sync_dirty_buffer(bh);
|
||||
if (buffer_req(bh) && !buffer_uptodate(bh)) {
|
||||
ext4_error(inode->i_sb, __func__,
|
||||
"IO error syncing inode, "
|
||||
"inode=%lu, block=%llu",
|
||||
inode->i_ino,
|
||||
(unsigned long long)bh->b_blocknr);
|
||||
err = -EIO;
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext4_setattr()
|
||||
*
|
||||
@ -4930,7 +5066,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
|
||||
*/
|
||||
int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
|
||||
{
|
||||
int groups, gdpblocks;
|
||||
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
|
||||
int gdpblocks;
|
||||
int idxblocks;
|
||||
int ret = 0;
|
||||
|
||||
@ -4957,8 +5094,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
|
||||
groups += nrblocks;
|
||||
|
||||
gdpblocks = groups;
|
||||
if (groups > EXT4_SB(inode->i_sb)->s_groups_count)
|
||||
groups = EXT4_SB(inode->i_sb)->s_groups_count;
|
||||
if (groups > ngroups)
|
||||
groups = ngroups;
|
||||
if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
|
||||
gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
|
||||
|
||||
@ -4998,7 +5135,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
|
||||
* Calculate the journal credits for a chunk of data modification.
|
||||
*
|
||||
* This is called from DIO, fallocate or whoever calling
|
||||
* ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks.
|
||||
* ext4_get_blocks() to map/allocate a chunk of contigous disk blocks.
|
||||
*
|
||||
* journal buffers for data blocks are not included here, as DIO
|
||||
* and fallocate do no need to journal data buffers.
|
||||
|
@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr)
|
||||
ext4_set_bit(bit, addr);
|
||||
}
|
||||
|
||||
static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr)
|
||||
{
|
||||
addr = mb_correct_addr_and_bit(&bit, addr);
|
||||
ext4_set_bit_atomic(lock, bit, addr);
|
||||
}
|
||||
|
||||
static inline void mb_clear_bit(int bit, void *addr)
|
||||
{
|
||||
addr = mb_correct_addr_and_bit(&bit, addr);
|
||||
ext4_clear_bit(bit, addr);
|
||||
}
|
||||
|
||||
static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
|
||||
{
|
||||
addr = mb_correct_addr_and_bit(&bit, addr);
|
||||
ext4_clear_bit_atomic(lock, bit, addr);
|
||||
}
|
||||
|
||||
static inline int mb_find_next_zero_bit(void *addr, int max, int start)
|
||||
{
|
||||
int fix = 0, ret, tmpmax;
|
||||
@ -448,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
|
||||
|
||||
if (unlikely(e4b->bd_info->bb_bitmap == NULL))
|
||||
return;
|
||||
BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
|
||||
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
|
||||
for (i = 0; i < count; i++) {
|
||||
if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) {
|
||||
ext4_fsblk_t blocknr;
|
||||
@ -472,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count)
|
||||
|
||||
if (unlikely(e4b->bd_info->bb_bitmap == NULL))
|
||||
return;
|
||||
BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
|
||||
assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
|
||||
for (i = 0; i < count; i++) {
|
||||
BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap));
|
||||
mb_set_bit(first + i, e4b->bd_info->bb_bitmap);
|
||||
@ -739,6 +727,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
|
||||
|
||||
static int ext4_mb_init_cache(struct page *page, char *incore)
|
||||
{
|
||||
ext4_group_t ngroups;
|
||||
int blocksize;
|
||||
int blocks_per_page;
|
||||
int groups_per_page;
|
||||
@ -757,6 +746,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
||||
|
||||
inode = page->mapping->host;
|
||||
sb = inode->i_sb;
|
||||
ngroups = ext4_get_groups_count(sb);
|
||||
blocksize = 1 << inode->i_blkbits;
|
||||
blocks_per_page = PAGE_CACHE_SIZE / blocksize;
|
||||
|
||||
@ -780,7 +770,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
||||
for (i = 0; i < groups_per_page; i++) {
|
||||
struct ext4_group_desc *desc;
|
||||
|
||||
if (first_group + i >= EXT4_SB(sb)->s_groups_count)
|
||||
if (first_group + i >= ngroups)
|
||||
break;
|
||||
|
||||
err = -EIO;
|
||||
@ -801,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
||||
unlock_buffer(bh[i]);
|
||||
continue;
|
||||
}
|
||||
spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
|
||||
ext4_lock_group(sb, first_group + i);
|
||||
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
|
||||
ext4_init_block_bitmap(sb, bh[i],
|
||||
first_group + i, desc);
|
||||
set_bitmap_uptodate(bh[i]);
|
||||
set_buffer_uptodate(bh[i]);
|
||||
spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
|
||||
ext4_unlock_group(sb, first_group + i);
|
||||
unlock_buffer(bh[i]);
|
||||
continue;
|
||||
}
|
||||
spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
|
||||
ext4_unlock_group(sb, first_group + i);
|
||||
if (buffer_uptodate(bh[i])) {
|
||||
/*
|
||||
* if not uninit if bh is uptodate,
|
||||
@ -852,7 +842,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
|
||||
struct ext4_group_info *grinfo;
|
||||
|
||||
group = (first_block + i) >> 1;
|
||||
if (group >= EXT4_SB(sb)->s_groups_count)
|
||||
if (group >= ngroups)
|
||||
break;
|
||||
|
||||
/*
|
||||
@ -1078,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
|
||||
static void mb_clear_bits(void *bm, int cur, int len)
|
||||
{
|
||||
__u32 *addr;
|
||||
|
||||
@ -1091,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len)
|
||||
cur += 32;
|
||||
continue;
|
||||
}
|
||||
if (lock)
|
||||
mb_clear_bit_atomic(lock, cur, bm);
|
||||
else
|
||||
mb_clear_bit(cur, bm);
|
||||
mb_clear_bit(cur, bm);
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
|
||||
static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
|
||||
static void mb_set_bits(void *bm, int cur, int len)
|
||||
{
|
||||
__u32 *addr;
|
||||
|
||||
@ -1112,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
|
||||
cur += 32;
|
||||
continue;
|
||||
}
|
||||
if (lock)
|
||||
mb_set_bit_atomic(lock, cur, bm);
|
||||
else
|
||||
mb_set_bit(cur, bm);
|
||||
mb_set_bit(cur, bm);
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
@ -1131,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
|
||||
struct super_block *sb = e4b->bd_sb;
|
||||
|
||||
BUG_ON(first + count > (sb->s_blocksize << 3));
|
||||
BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group));
|
||||
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
|
||||
mb_check_buddy(e4b);
|
||||
mb_free_blocks_double(inode, e4b, first, count);
|
||||
|
||||
@ -1212,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
|
||||
int ord;
|
||||
void *buddy;
|
||||
|
||||
BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
|
||||
assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
|
||||
BUG_ON(ex == NULL);
|
||||
|
||||
buddy = mb_find_buddy(e4b, order, &max);
|
||||
@ -1276,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
|
||||
|
||||
BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
|
||||
BUG_ON(e4b->bd_group != ex->fe_group);
|
||||
BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group));
|
||||
assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
|
||||
mb_check_buddy(e4b);
|
||||
mb_mark_used_double(e4b, start, len);
|
||||
|
||||
@ -1330,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
|
||||
e4b->bd_info->bb_counters[ord]++;
|
||||
}
|
||||
|
||||
mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group),
|
||||
EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
|
||||
mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
|
||||
mb_check_buddy(e4b);
|
||||
|
||||
return ret;
|
||||
@ -1726,7 +1709,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
|
||||
unsigned free, fragments;
|
||||
unsigned i, bits;
|
||||
int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
|
||||
struct ext4_group_desc *desc;
|
||||
struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
|
||||
|
||||
BUG_ON(cr < 0 || cr >= 4);
|
||||
@ -1742,10 +1724,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
|
||||
switch (cr) {
|
||||
case 0:
|
||||
BUG_ON(ac->ac_2order == 0);
|
||||
/* If this group is uninitialized, skip it initially */
|
||||
desc = ext4_get_group_desc(ac->ac_sb, group, NULL);
|
||||
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
|
||||
return 0;
|
||||
|
||||
/* Avoid using the first bg of a flexgroup for data files */
|
||||
if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
|
||||
@ -1788,6 +1766,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
|
||||
int block, pnum;
|
||||
int blocks_per_page;
|
||||
int groups_per_page;
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
ext4_group_t first_group;
|
||||
struct ext4_group_info *grp;
|
||||
|
||||
@ -1807,7 +1786,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
|
||||
/* read all groups the page covers into the cache */
|
||||
for (i = 0; i < groups_per_page; i++) {
|
||||
|
||||
if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
|
||||
if ((first_group + i) >= ngroups)
|
||||
break;
|
||||
grp = ext4_get_group_info(sb, first_group + i);
|
||||
/* take all groups write allocation
|
||||
@ -1945,8 +1924,7 @@ err:
|
||||
static noinline_for_stack int
|
||||
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
|
||||
{
|
||||
ext4_group_t group;
|
||||
ext4_group_t i;
|
||||
ext4_group_t ngroups, group, i;
|
||||
int cr;
|
||||
int err = 0;
|
||||
int bsbits;
|
||||
@ -1957,6 +1935,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
|
||||
|
||||
sb = ac->ac_sb;
|
||||
sbi = EXT4_SB(sb);
|
||||
ngroups = ext4_get_groups_count(sb);
|
||||
BUG_ON(ac->ac_status == AC_STATUS_FOUND);
|
||||
|
||||
/* first, try the goal */
|
||||
@ -2017,11 +1996,11 @@ repeat:
|
||||
*/
|
||||
group = ac->ac_g_ex.fe_group;
|
||||
|
||||
for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
|
||||
for (i = 0; i < ngroups; group++, i++) {
|
||||
struct ext4_group_info *grp;
|
||||
struct ext4_group_desc *desc;
|
||||
|
||||
if (group == EXT4_SB(sb)->s_groups_count)
|
||||
if (group == ngroups)
|
||||
group = 0;
|
||||
|
||||
/* quick check to skip empty groups */
|
||||
@ -2064,9 +2043,7 @@ repeat:
|
||||
|
||||
ac->ac_groups_scanned++;
|
||||
desc = ext4_get_group_desc(sb, group, NULL);
|
||||
if (cr == 0 || (desc->bg_flags &
|
||||
cpu_to_le16(EXT4_BG_BLOCK_UNINIT) &&
|
||||
ac->ac_2order != 0))
|
||||
if (cr == 0)
|
||||
ext4_mb_simple_scan_group(ac, &e4b);
|
||||
else if (cr == 1 &&
|
||||
ac->ac_g_ex.fe_len == sbi->s_stripe)
|
||||
@ -2315,12 +2292,10 @@ static struct file_operations ext4_mb_seq_history_fops = {
|
||||
static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
|
||||
{
|
||||
struct super_block *sb = seq->private;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
ext4_group_t group;
|
||||
|
||||
if (*pos < 0 || *pos >= sbi->s_groups_count)
|
||||
if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
|
||||
return NULL;
|
||||
|
||||
group = *pos + 1;
|
||||
return (void *) ((unsigned long) group);
|
||||
}
|
||||
@ -2328,11 +2303,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
|
||||
static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
struct super_block *sb = seq->private;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
ext4_group_t group;
|
||||
|
||||
++*pos;
|
||||
if (*pos < 0 || *pos >= sbi->s_groups_count)
|
||||
if (*pos < 0 || *pos >= ext4_get_groups_count(sb))
|
||||
return NULL;
|
||||
group = *pos + 1;
|
||||
return (void *) ((unsigned long) group);
|
||||
@ -2420,7 +2394,8 @@ static void ext4_mb_history_release(struct super_block *sb)
|
||||
|
||||
if (sbi->s_proc != NULL) {
|
||||
remove_proc_entry("mb_groups", sbi->s_proc);
|
||||
remove_proc_entry("mb_history", sbi->s_proc);
|
||||
if (sbi->s_mb_history_max)
|
||||
remove_proc_entry("mb_history", sbi->s_proc);
|
||||
}
|
||||
kfree(sbi->s_mb_history);
|
||||
}
|
||||
@ -2431,17 +2406,17 @@ static void ext4_mb_history_init(struct super_block *sb)
|
||||
int i;
|
||||
|
||||
if (sbi->s_proc != NULL) {
|
||||
proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
|
||||
&ext4_mb_seq_history_fops, sb);
|
||||
if (sbi->s_mb_history_max)
|
||||
proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
|
||||
&ext4_mb_seq_history_fops, sb);
|
||||
proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
|
||||
&ext4_mb_seq_groups_fops, sb);
|
||||
}
|
||||
|
||||
sbi->s_mb_history_max = 1000;
|
||||
sbi->s_mb_history_cur = 0;
|
||||
spin_lock_init(&sbi->s_mb_history_lock);
|
||||
i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
|
||||
sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
|
||||
sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
|
||||
/* if we can't allocate history, then we simple won't use it */
|
||||
}
|
||||
|
||||
@ -2451,7 +2426,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
|
||||
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
|
||||
struct ext4_mb_history h;
|
||||
|
||||
if (unlikely(sbi->s_mb_history == NULL))
|
||||
if (sbi->s_mb_history == NULL)
|
||||
return;
|
||||
|
||||
if (!(ac->ac_op & sbi->s_mb_history_filter))
|
||||
@ -2587,6 +2562,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
|
||||
|
||||
static int ext4_mb_init_backend(struct super_block *sb)
|
||||
{
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
ext4_group_t i;
|
||||
int metalen;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
@ -2598,7 +2574,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
|
||||
struct ext4_group_desc *desc;
|
||||
|
||||
/* This is the number of blocks used by GDT */
|
||||
num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) -
|
||||
num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
|
||||
1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||
|
||||
/*
|
||||
@ -2644,7 +2620,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
|
||||
for (i = 0; i < num_meta_group_infos; i++) {
|
||||
if ((i + 1) == num_meta_group_infos)
|
||||
metalen = sizeof(*meta_group_info) *
|
||||
(sbi->s_groups_count -
|
||||
(ngroups -
|
||||
(i << EXT4_DESC_PER_BLOCK_BITS(sb)));
|
||||
meta_group_info = kmalloc(metalen, GFP_KERNEL);
|
||||
if (meta_group_info == NULL) {
|
||||
@ -2655,7 +2631,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
|
||||
sbi->s_group_info[i] = meta_group_info;
|
||||
}
|
||||
|
||||
for (i = 0; i < sbi->s_groups_count; i++) {
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
desc = ext4_get_group_desc(sb, i, NULL);
|
||||
if (desc == NULL) {
|
||||
printk(KERN_ERR
|
||||
@ -2761,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* need to called with ext4 group lock (ext4_lock_group) */
|
||||
/* need to called with the ext4 group lock held */
|
||||
static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
|
||||
{
|
||||
struct ext4_prealloc_space *pa;
|
||||
@ -2781,13 +2757,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
|
||||
|
||||
int ext4_mb_release(struct super_block *sb)
|
||||
{
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
ext4_group_t i;
|
||||
int num_meta_group_infos;
|
||||
struct ext4_group_info *grinfo;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (sbi->s_group_info) {
|
||||
for (i = 0; i < sbi->s_groups_count; i++) {
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
grinfo = ext4_get_group_info(sb, i);
|
||||
#ifdef DOUBLE_CHECK
|
||||
kfree(grinfo->bb_bitmap);
|
||||
@ -2797,7 +2774,7 @@ int ext4_mb_release(struct super_block *sb)
|
||||
ext4_unlock_group(sb, i);
|
||||
kfree(grinfo);
|
||||
}
|
||||
num_meta_group_infos = (sbi->s_groups_count +
|
||||
num_meta_group_infos = (ngroups +
|
||||
EXT4_DESC_PER_BLOCK(sb) - 1) >>
|
||||
EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||
for (i = 0; i < num_meta_group_infos; i++)
|
||||
@ -2984,27 +2961,25 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
|
||||
+ le32_to_cpu(es->s_first_data_block);
|
||||
|
||||
len = ac->ac_b_ex.fe_len;
|
||||
if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
|
||||
in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
|
||||
in_range(block, ext4_inode_table(sb, gdp),
|
||||
EXT4_SB(sb)->s_itb_per_group) ||
|
||||
in_range(block + len - 1, ext4_inode_table(sb, gdp),
|
||||
EXT4_SB(sb)->s_itb_per_group)) {
|
||||
if (!ext4_data_block_valid(sbi, block, len)) {
|
||||
ext4_error(sb, __func__,
|
||||
"Allocating block %llu in system zone of %d group\n",
|
||||
block, ac->ac_b_ex.fe_group);
|
||||
"Allocating blocks %llu-%llu which overlap "
|
||||
"fs metadata\n", block, block+len);
|
||||
/* File system mounted not to panic on error
|
||||
* Fix the bitmap and repeat the block allocation
|
||||
* We leak some of the blocks here.
|
||||
*/
|
||||
mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
|
||||
bitmap_bh->b_data, ac->ac_b_ex.fe_start,
|
||||
ac->ac_b_ex.fe_len);
|
||||
ext4_lock_group(sb, ac->ac_b_ex.fe_group);
|
||||
mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
|
||||
ac->ac_b_ex.fe_len);
|
||||
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
|
||||
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
|
||||
if (!err)
|
||||
err = -EAGAIN;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
ext4_lock_group(sb, ac->ac_b_ex.fe_group);
|
||||
#ifdef AGGRESSIVE_CHECK
|
||||
{
|
||||
int i;
|
||||
@ -3014,9 +2989,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
|
||||
mb_set_bits(NULL, bitmap_bh->b_data,
|
||||
ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
|
||||
mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
|
||||
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
|
||||
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
|
||||
ext4_free_blks_set(sb, gdp,
|
||||
@ -3026,7 +2999,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
|
||||
len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len;
|
||||
ext4_free_blks_set(sb, gdp, len);
|
||||
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
|
||||
spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
|
||||
|
||||
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
|
||||
percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
|
||||
/*
|
||||
* Now reduce the dirty block count also. Should not go negative
|
||||
@ -3459,7 +3433,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
|
||||
* the function goes through all block freed in the group
|
||||
* but not yet committed and marks them used in in-core bitmap.
|
||||
* buddy must be generated from this bitmap
|
||||
* Need to be called with ext4 group lock (ext4_lock_group)
|
||||
* Need to be called with the ext4 group lock held
|
||||
*/
|
||||
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
|
||||
ext4_group_t group)
|
||||
@ -3473,9 +3447,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
|
||||
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct ext4_free_data, node);
|
||||
mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
|
||||
bitmap, entry->start_blk,
|
||||
entry->count);
|
||||
mb_set_bits(bitmap, entry->start_blk, entry->count);
|
||||
n = rb_next(n);
|
||||
}
|
||||
return;
|
||||
@ -3484,7 +3456,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
|
||||
/*
|
||||
* the function goes through all preallocation in this group and marks them
|
||||
* used in in-core bitmap. buddy must be generated from this bitmap
|
||||
* Need to be called with ext4 group lock (ext4_lock_group)
|
||||
* Need to be called with ext4 group lock held
|
||||
*/
|
||||
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
|
||||
ext4_group_t group)
|
||||
@ -3516,8 +3488,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
|
||||
if (unlikely(len == 0))
|
||||
continue;
|
||||
BUG_ON(groupnr != group);
|
||||
mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
|
||||
bitmap, start, len);
|
||||
mb_set_bits(bitmap, start, len);
|
||||
preallocated += len;
|
||||
count++;
|
||||
}
|
||||
@ -4121,7 +4092,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
|
||||
static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
|
||||
{
|
||||
struct super_block *sb = ac->ac_sb;
|
||||
ext4_group_t i;
|
||||
ext4_group_t ngroups, i;
|
||||
|
||||
printk(KERN_ERR "EXT4-fs: Can't allocate:"
|
||||
" Allocation context details:\n");
|
||||
@ -4145,7 +4116,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
|
||||
printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
|
||||
ac->ac_found);
|
||||
printk(KERN_ERR "EXT4-fs: groups: \n");
|
||||
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
|
||||
ngroups = ext4_get_groups_count(sb);
|
||||
for (i = 0; i < ngroups; i++) {
|
||||
struct ext4_group_info *grp = ext4_get_group_info(sb, i);
|
||||
struct ext4_prealloc_space *pa;
|
||||
ext4_grpblk_t start;
|
||||
@ -4469,13 +4441,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
|
||||
|
||||
static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
|
||||
{
|
||||
ext4_group_t i;
|
||||
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
|
||||
int ret;
|
||||
int freed = 0;
|
||||
|
||||
trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d",
|
||||
sb->s_id, needed);
|
||||
for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) {
|
||||
for (i = 0; i < ngroups && needed > 0; i++) {
|
||||
ret = ext4_mb_discard_group_preallocations(sb, i, needed);
|
||||
freed += ret;
|
||||
needed -= ret;
|
||||
@ -4859,29 +4831,25 @@ do_more:
|
||||
new_entry->group = block_group;
|
||||
new_entry->count = count;
|
||||
new_entry->t_tid = handle->h_transaction->t_tid;
|
||||
|
||||
ext4_lock_group(sb, block_group);
|
||||
mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
|
||||
bit, count);
|
||||
mb_clear_bits(bitmap_bh->b_data, bit, count);
|
||||
ext4_mb_free_metadata(handle, &e4b, new_entry);
|
||||
ext4_unlock_group(sb, block_group);
|
||||
} else {
|
||||
ext4_lock_group(sb, block_group);
|
||||
/* need to update group_info->bb_free and bitmap
|
||||
* with group lock held. generate_buddy look at
|
||||
* them with group lock_held
|
||||
*/
|
||||
mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data,
|
||||
bit, count);
|
||||
ext4_lock_group(sb, block_group);
|
||||
mb_clear_bits(bitmap_bh->b_data, bit, count);
|
||||
mb_free_blocks(inode, &e4b, bit, count);
|
||||
ext4_mb_return_to_preallocation(inode, &e4b, block, count);
|
||||
ext4_unlock_group(sb, block_group);
|
||||
}
|
||||
|
||||
spin_lock(sb_bgl_lock(sbi, block_group));
|
||||
ret = ext4_free_blks_count(sb, gdp) + count;
|
||||
ext4_free_blks_set(sb, gdp, ret);
|
||||
gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
|
||||
spin_unlock(sb_bgl_lock(sbi, block_group));
|
||||
ext4_unlock_group(sb, block_group);
|
||||
percpu_counter_add(&sbi->s_freeblocks_counter, count);
|
||||
|
||||
if (sbi->s_log_groups_per_flex) {
|
||||
|
@ -23,7 +23,6 @@
|
||||
#include <linux/mutex.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
#include "group.h"
|
||||
|
||||
/*
|
||||
* with AGGRESSIVE_CHECK allocator runs consistency checks over
|
||||
|
@ -37,7 +37,6 @@
|
||||
#include "ext4.h"
|
||||
#include "ext4_jbd2.h"
|
||||
|
||||
#include "namei.h"
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
|
||||
@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
|
||||
ext4fs_dirhash(de->name, de->name_len, &h);
|
||||
map_tail--;
|
||||
map_tail->hash = h.hash;
|
||||
map_tail->offs = (u16) ((char *) de - base);
|
||||
map_tail->offs = ((char *) de - base)>>2;
|
||||
map_tail->size = le16_to_cpu(de->rec_len);
|
||||
count++;
|
||||
cond_resched();
|
||||
@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
|
||||
unsigned rec_len = 0;
|
||||
|
||||
while (count--) {
|
||||
struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs);
|
||||
struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
|
||||
(from + (map->offs<<2));
|
||||
rec_len = EXT4_DIR_REC_LEN(de->name_len);
|
||||
memcpy (to, de, rec_len);
|
||||
((struct ext4_dir_entry_2 *) to)->rec_len =
|
||||
@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
|
||||
if (!ext4_handle_valid(handle))
|
||||
return 0;
|
||||
|
||||
lock_super(sb);
|
||||
mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
|
||||
if (!list_empty(&EXT4_I(inode)->i_orphan))
|
||||
goto out_unlock;
|
||||
|
||||
@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
|
||||
|
||||
/* @@@ FIXME: Observation from aviro:
|
||||
* I think I can trigger J_ASSERT in ext4_orphan_add(). We block
|
||||
* here (on lock_super()), so race with ext4_link() which might bump
|
||||
* here (on s_orphan_lock), so race with ext4_link() which might bump
|
||||
* ->i_nlink. For, say it, character device. Not a regular file,
|
||||
* not a directory, not a symlink and ->i_nlink > 0.
|
||||
*
|
||||
* tytso, 4/25/2009: I'm not sure how that could happen;
|
||||
* shouldn't the fs core protect us from these sort of
|
||||
* unlink()/link() races?
|
||||
*/
|
||||
J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
|
||||
S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
|
||||
@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
|
||||
jbd_debug(4, "orphan inode %lu will point to %d\n",
|
||||
inode->i_ino, NEXT_ORPHAN(inode));
|
||||
out_unlock:
|
||||
unlock_super(sb);
|
||||
mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
|
||||
ext4_std_error(inode->i_sb, err);
|
||||
return err;
|
||||
}
|
||||
@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
|
||||
if (!ext4_handle_valid(handle))
|
||||
return 0;
|
||||
|
||||
lock_super(inode->i_sb);
|
||||
if (list_empty(&ei->i_orphan)) {
|
||||
unlock_super(inode->i_sb);
|
||||
return 0;
|
||||
}
|
||||
mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
|
||||
if (list_empty(&ei->i_orphan))
|
||||
goto out;
|
||||
|
||||
ino_next = NEXT_ORPHAN(inode);
|
||||
prev = ei->i_orphan.prev;
|
||||
@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
|
||||
out_err:
|
||||
ext4_std_error(inode->i_sb, err);
|
||||
out:
|
||||
unlock_super(inode->i_sb);
|
||||
mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
|
||||
return err;
|
||||
|
||||
out_brelse:
|
||||
@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = {
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.permission = ext4_permission,
|
||||
.fiemap = ext4_fiemap,
|
||||
};
|
||||
|
||||
const struct inode_operations ext4_special_inode_operations = {
|
||||
|
@ -1,8 +0,0 @@
|
||||
/* linux/fs/ext4/namei.h
|
||||
*
|
||||
* Copyright (C) 2005 Simtec Electronics
|
||||
* Ben Dooks <ben@simtec.co.uk>
|
||||
*
|
||||
*/
|
||||
|
||||
extern struct dentry *ext4_get_parent(struct dentry *child);
|
@ -15,7 +15,6 @@
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "ext4_jbd2.h"
|
||||
#include "group.h"
|
||||
|
||||
#define outside(b, first, last) ((b) < (first) || (b) >= (last))
|
||||
#define inside(b, first, last) ((b) >= (first) && (b) < (last))
|
||||
@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb,
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
lock_super(sb);
|
||||
mutex_lock(&sbi->s_resize_lock);
|
||||
if (input->group != sbi->s_groups_count) {
|
||||
err = -EBUSY;
|
||||
goto exit_journal;
|
||||
@ -302,7 +301,7 @@ exit_bh:
|
||||
brelse(bh);
|
||||
|
||||
exit_journal:
|
||||
unlock_super(sb);
|
||||
mutex_unlock(&sbi->s_resize_lock);
|
||||
if ((err2 = ext4_journal_stop(handle)) && !err)
|
||||
err = err2;
|
||||
|
||||
@ -643,11 +642,12 @@ exit_free:
|
||||
* important part is that the new block and inode counts are in the backup
|
||||
* superblocks, and the location of the new group metadata in the GDT backups.
|
||||
*
|
||||
* We do not need lock_super() for this, because these blocks are not
|
||||
* otherwise touched by the filesystem code when it is mounted. We don't
|
||||
* need to worry about last changing from sbi->s_groups_count, because the
|
||||
* worst that can happen is that we do not copy the full number of backups
|
||||
* at this time. The resize which changed s_groups_count will backup again.
|
||||
* We do not need take the s_resize_lock for this, because these
|
||||
* blocks are not otherwise touched by the filesystem code when it is
|
||||
* mounted. We don't need to worry about last changing from
|
||||
* sbi->s_groups_count, because the worst that can happen is that we
|
||||
* do not copy the full number of backups at this time. The resize
|
||||
* which changed s_groups_count will backup again.
|
||||
*/
|
||||
static void update_backups(struct super_block *sb,
|
||||
int blk_off, char *data, int size)
|
||||
@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
|
||||
goto exit_put;
|
||||
}
|
||||
|
||||
lock_super(sb);
|
||||
mutex_lock(&sbi->s_resize_lock);
|
||||
if (input->group != sbi->s_groups_count) {
|
||||
ext4_warning(sb, __func__,
|
||||
"multiple resizers run on filesystem!");
|
||||
@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
|
||||
/*
|
||||
* OK, now we've set up the new group. Time to make it active.
|
||||
*
|
||||
* Current kernels don't lock all allocations via lock_super(),
|
||||
* We do not lock all allocations via s_resize_lock
|
||||
* so we have to be safe wrt. concurrent accesses the group
|
||||
* data. So we need to be careful to set all of the relevant
|
||||
* group descriptor data etc. *before* we enable the group.
|
||||
@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
|
||||
*
|
||||
* The precise rules we use are:
|
||||
*
|
||||
* * Writers of s_groups_count *must* hold lock_super
|
||||
* * Writers of s_groups_count *must* hold s_resize_lock
|
||||
* AND
|
||||
* * Writers must perform a smp_wmb() after updating all dependent
|
||||
* data and before modifying the groups count
|
||||
*
|
||||
* * Readers must hold lock_super() over the access
|
||||
* * Readers must hold s_resize_lock over the access
|
||||
* OR
|
||||
* * Readers must perform an smp_rmb() after reading the groups count
|
||||
* and before reading any dependent data.
|
||||
@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
|
||||
sb->s_dirt = 1;
|
||||
|
||||
exit_journal:
|
||||
unlock_super(sb);
|
||||
mutex_unlock(&sbi->s_resize_lock);
|
||||
if ((err2 = ext4_journal_stop(handle)) && !err)
|
||||
err = err2;
|
||||
if (!err) {
|
||||
@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
|
||||
|
||||
/* We don't need to worry about locking wrt other resizers just
|
||||
* yet: we're going to revalidate es->s_blocks_count after
|
||||
* taking lock_super() below. */
|
||||
* taking the s_resize_lock below. */
|
||||
o_blocks_count = ext4_blocks_count(es);
|
||||
o_groups_count = EXT4_SB(sb)->s_groups_count;
|
||||
|
||||
@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
|
||||
goto exit_put;
|
||||
}
|
||||
|
||||
lock_super(sb);
|
||||
mutex_lock(&EXT4_SB(sb)->s_resize_lock);
|
||||
if (o_blocks_count != ext4_blocks_count(es)) {
|
||||
ext4_warning(sb, __func__,
|
||||
"multiple resizers run on filesystem!");
|
||||
unlock_super(sb);
|
||||
mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
|
||||
ext4_journal_stop(handle);
|
||||
err = -EBUSY;
|
||||
goto exit_put;
|
||||
@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
|
||||
EXT4_SB(sb)->s_sbh))) {
|
||||
ext4_warning(sb, __func__,
|
||||
"error %d on journal write access", err);
|
||||
unlock_super(sb);
|
||||
mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
|
||||
ext4_journal_stop(handle);
|
||||
goto exit_put;
|
||||
}
|
||||
ext4_blocks_count_set(es, o_blocks_count + add);
|
||||
ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
|
||||
sb->s_dirt = 1;
|
||||
unlock_super(sb);
|
||||
mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
|
||||
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
|
||||
o_blocks_count + add);
|
||||
/* We add the blocks to the bitmap and set the group need init bit */
|
||||
|
831
fs/ext4/super.c
831
fs/ext4/super.c
File diff suppressed because it is too large
Load Diff
14
fs/ioctl.c
14
fs/ioctl.c
@ -414,10 +414,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd,
|
||||
switch (cmd) {
|
||||
case FIBMAP:
|
||||
return ioctl_fibmap(filp, p);
|
||||
case FS_IOC_FIEMAP:
|
||||
return ioctl_fiemap(filp, arg);
|
||||
case FIGETBSZ:
|
||||
return put_user(inode->i_sb->s_blocksize, p);
|
||||
case FIONREAD:
|
||||
return put_user(i_size_read(inode) - filp->f_pos, p);
|
||||
}
|
||||
@ -557,6 +553,16 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
|
||||
error = ioctl_fsthaw(filp);
|
||||
break;
|
||||
|
||||
case FS_IOC_FIEMAP:
|
||||
return ioctl_fiemap(filp, arg);
|
||||
|
||||
case FIGETBSZ:
|
||||
{
|
||||
struct inode *inode = filp->f_path.dentry->d_inode;
|
||||
int __user *p = (int __user *)arg;
|
||||
return put_user(inode->i_sb->s_blocksize, p);
|
||||
}
|
||||
|
||||
default:
|
||||
if (S_ISREG(filp->f_path.dentry->d_inode->i_mode))
|
||||
error = file_ioctl(filp, cmd, arg);
|
||||
|
@ -1781,7 +1781,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
|
||||
* Journal abort has very specific semantics, which we describe
|
||||
* for journal abort.
|
||||
*
|
||||
* Two internal function, which provide abort to te jbd layer
|
||||
* Two internal functions, which provide abort to the jbd layer
|
||||
* itself are here.
|
||||
*/
|
||||
|
||||
@ -1879,7 +1879,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
|
||||
* int jbd2_journal_errno () - returns the journal's error state.
|
||||
* @journal: journal to examine.
|
||||
*
|
||||
* This is the errno numbet set with jbd2_journal_abort(), the last
|
||||
* This is the errno number set with jbd2_journal_abort(), the last
|
||||
* time the journal was mounted - if the journal was stopped
|
||||
* without calling abort this will be 0.
|
||||
*
|
||||
@ -1903,7 +1903,7 @@ int jbd2_journal_errno(journal_t *journal)
|
||||
* int jbd2_journal_clear_err () - clears the journal's error state
|
||||
* @journal: journal to act on.
|
||||
*
|
||||
* An error must be cleared or Acked to take a FS out of readonly
|
||||
* An error must be cleared or acked to take a FS out of readonly
|
||||
* mode.
|
||||
*/
|
||||
int jbd2_journal_clear_err(journal_t *journal)
|
||||
@ -1923,7 +1923,7 @@ int jbd2_journal_clear_err(journal_t *journal)
|
||||
* void jbd2_journal_ack_err() - Ack journal err.
|
||||
* @journal: journal to act on.
|
||||
*
|
||||
* An error must be cleared or Acked to take a FS out of readonly
|
||||
* An error must be cleared or acked to take a FS out of readonly
|
||||
* mode.
|
||||
*/
|
||||
void jbd2_journal_ack_err(journal_t *journal)
|
||||
|
@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
|
||||
struct buffer_head map_bh;
|
||||
unsigned long first_logical_block = 0;
|
||||
|
||||
clear_buffer_mapped(&map_bh);
|
||||
map_bh.b_state = 0;
|
||||
map_bh.b_size = 0;
|
||||
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
|
||||
struct page *page = list_entry(pages->prev, struct page, lru);
|
||||
|
||||
@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block)
|
||||
struct buffer_head map_bh;
|
||||
unsigned long first_logical_block = 0;
|
||||
|
||||
clear_buffer_mapped(&map_bh);
|
||||
map_bh.b_state = 0;
|
||||
map_bh.b_size = 0;
|
||||
bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
|
||||
&map_bh, &first_logical_block, get_block);
|
||||
if (bio)
|
||||
|
Loading…
Reference in New Issue
Block a user