forked from Minki/linux
This merge window saw the the following new featuers added to ext4:
* Direct I/O via iomap (required the iomap-for-next branch from Darrick as a prereq). * Support for using dioread-nolock where the block size < page size. * Support for encryption for file systems where the block size < page size. * Rework of journal credits handling so a revoke-heavy workload will not cause the journal to run out of space. * Replace bit-spinlocks with spinlocks in jbd2 Also included were some bug fixes and cleanups, mostly to clean up corner cases from fuzzed file systems and error path handling. -----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAl3dHxoACgkQ8vlZVpUN gaMZswf5AbtQhTEJDXO7Pc1ull38GIGFgAv7uAth0TymLC3h1/FEYWW0crEPFsDr 1Eei55UUVOYrMMUKQ4P7wlLX0cIh3XDPMWnRFuqBoV5/ZOsH/ZSbkY//TG2Xze/v 9wXIH/RKQnzbRtXffJ1+DnvmXJk+HFm1R1gjl0nfyUXGrnlSfqJxhLSczyd6bJJq ehi/tso5UC/4EQsAIdWp7VWsAdaHcZ7ogHqDoy8dXpM1equ408iml7VlKr8R+Nr7 5ANpCISXChSlLLYm0NYN5vhO8upF5uDxWLdCtxVPL5kFdM2m/ELjXw9h9C+78l7C EWJGlGlxvx07Px+e+bfStEsoixpWBg== =0eko -----END PGP SIGNATURE----- Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "This merge window saw the the following new featuers added to ext4: - Direct I/O via iomap (required the iomap-for-next branch from Darrick as a prereq). - Support for using dioread-nolock where the block size < page size. - Support for encryption for file systems where the block size < page size. - Rework of journal credits handling so a revoke-heavy workload will not cause the journal to run out of space. - Replace bit-spinlocks with spinlocks in jbd2 Also included were some bug fixes and cleanups, mostly to clean up corner cases from fuzzed file systems and error path handling" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (59 commits) ext4: work around deleting a file with i_nlink == 0 safely ext4: add more paranoia checking in ext4_expand_extra_isize handling jbd2: make jbd2_handle_buffer_credits() handle reserved handles ext4: fix a bug in ext4_wait_for_tail_page_commit ext4: bio_alloc with __GFP_DIRECT_RECLAIM never fails ext4: code cleanup for get_next_id ext4: fix leak of quota reservations ext4: remove unused variable warning in parse_options() ext4: Enable encryption for subpage-sized blocks fs/buffer.c: support fscrypt in block_read_full_page() ext4: Add error handling for io_end_vec struct allocation jbd2: Fine tune estimate of necessary descriptor blocks jbd2: Provide trace event for handle restarts ext4: Reserve revoke credits for freed blocks jbd2: Make credit checking more strict jbd2: Rename h_buffer_credits to h_total_credits jbd2: Reserve space for revoke descriptor blocks jbd2: Drop jbd2_space_needed() jbd2: Account descriptor blocks into t_outstanding_credits jbd2: Factor out common parts of stopping and restarting a handle ...
This commit is contained in:
commit
50b8b3f85a
@ -342,8 +342,8 @@ Contents encryption
|
||||
-------------------
|
||||
|
||||
For file contents, each filesystem block is encrypted independently.
|
||||
Currently, only the case where the filesystem block size is equal to
|
||||
the system's page size (usually 4096 bytes) is supported.
|
||||
Starting from Linux kernel 5.5, encryption of filesystems with block
|
||||
size less than system's page size is supported.
|
||||
|
||||
Each block's IV is set to the logical block number within the file as
|
||||
a little endian number, except that:
|
||||
|
48
fs/buffer.c
48
fs/buffer.c
@ -47,6 +47,7 @@
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <trace/events/block.h>
|
||||
#include <linux/fscrypt.h>
|
||||
|
||||
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
|
||||
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
|
||||
@ -246,10 +247,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* I/O completion handler for block_read_full_page() - pages
|
||||
* which come unlocked at the end of I/O.
|
||||
*/
|
||||
static void end_buffer_async_read(struct buffer_head *bh, int uptodate)
|
||||
{
|
||||
unsigned long flags;
|
||||
@ -307,6 +304,47 @@ still_busy:
|
||||
return;
|
||||
}
|
||||
|
||||
struct decrypt_bh_ctx {
|
||||
struct work_struct work;
|
||||
struct buffer_head *bh;
|
||||
};
|
||||
|
||||
static void decrypt_bh(struct work_struct *work)
|
||||
{
|
||||
struct decrypt_bh_ctx *ctx =
|
||||
container_of(work, struct decrypt_bh_ctx, work);
|
||||
struct buffer_head *bh = ctx->bh;
|
||||
int err;
|
||||
|
||||
err = fscrypt_decrypt_pagecache_blocks(bh->b_page, bh->b_size,
|
||||
bh_offset(bh));
|
||||
end_buffer_async_read(bh, err == 0);
|
||||
kfree(ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* I/O completion handler for block_read_full_page() - pages
|
||||
* which come unlocked at the end of I/O.
|
||||
*/
|
||||
static void end_buffer_async_read_io(struct buffer_head *bh, int uptodate)
|
||||
{
|
||||
/* Decrypt if needed */
|
||||
if (uptodate && IS_ENABLED(CONFIG_FS_ENCRYPTION) &&
|
||||
IS_ENCRYPTED(bh->b_page->mapping->host) &&
|
||||
S_ISREG(bh->b_page->mapping->host->i_mode)) {
|
||||
struct decrypt_bh_ctx *ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
|
||||
|
||||
if (ctx) {
|
||||
INIT_WORK(&ctx->work, decrypt_bh);
|
||||
ctx->bh = bh;
|
||||
fscrypt_enqueue_decrypt_work(&ctx->work);
|
||||
return;
|
||||
}
|
||||
uptodate = 0;
|
||||
}
|
||||
end_buffer_async_read(bh, uptodate);
|
||||
}
|
||||
|
||||
/*
|
||||
* Completion handler for block_write_full_page() - pages which are unlocked
|
||||
* during I/O, and which have PageWriteback cleared upon I/O completion.
|
||||
@ -379,7 +417,7 @@ EXPORT_SYMBOL(end_buffer_async_write);
|
||||
*/
|
||||
static void mark_buffer_async_read(struct buffer_head *bh)
|
||||
{
|
||||
bh->b_end_io = end_buffer_async_read;
|
||||
bh->b_end_io = end_buffer_async_read_io;
|
||||
set_buffer_async_read(bh);
|
||||
}
|
||||
|
||||
|
@ -198,6 +198,12 @@ struct ext4_system_blocks {
|
||||
*/
|
||||
#define EXT4_IO_END_UNWRITTEN 0x0001
|
||||
|
||||
struct ext4_io_end_vec {
|
||||
struct list_head list; /* list of io_end_vec */
|
||||
loff_t offset; /* offset in the file */
|
||||
ssize_t size; /* size of the extent */
|
||||
};
|
||||
|
||||
/*
|
||||
* For converting unwritten extents on a work queue. 'handle' is used for
|
||||
* buffered writeback.
|
||||
@ -211,8 +217,7 @@ typedef struct ext4_io_end {
|
||||
* bios covering the extent */
|
||||
unsigned int flag; /* unwritten or not */
|
||||
atomic_t count; /* reference counter */
|
||||
loff_t offset; /* offset in the file */
|
||||
ssize_t size; /* size of the extent */
|
||||
struct list_head list_vec; /* list of ext4_io_end_vec */
|
||||
} ext4_io_end_t;
|
||||
|
||||
struct ext4_io_submit {
|
||||
@ -1579,7 +1584,6 @@ enum {
|
||||
EXT4_STATE_NO_EXPAND, /* No space for expansion */
|
||||
EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */
|
||||
EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
|
||||
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
|
||||
EXT4_STATE_NEWENTRY, /* File just added to dir */
|
||||
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
|
||||
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
|
||||
@ -2562,8 +2566,6 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create);
|
||||
int ext4_get_block(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create);
|
||||
int ext4_dio_get_block(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create);
|
||||
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh, int create);
|
||||
int ext4_walk_page_buffers(handle_t *handle,
|
||||
@ -2606,7 +2608,6 @@ extern int ext4_can_truncate(struct inode *inode);
|
||||
extern int ext4_truncate(struct inode *);
|
||||
extern int ext4_break_layouts(struct inode *);
|
||||
extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
|
||||
extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);
|
||||
extern void ext4_set_inode_flags(struct inode *);
|
||||
extern int ext4_alloc_da_blocks(struct inode *inode);
|
||||
extern void ext4_set_aops(struct inode *inode);
|
||||
@ -3266,6 +3267,8 @@ extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
|
||||
loff_t len);
|
||||
extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
|
||||
loff_t offset, ssize_t len);
|
||||
extern int ext4_convert_unwritten_io_end_vec(handle_t *handle,
|
||||
ext4_io_end_t *io_end);
|
||||
extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
|
||||
struct ext4_map_blocks *map, int flags);
|
||||
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
|
||||
@ -3298,6 +3301,10 @@ extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
|
||||
ext4_lblk_t lblk2, ext4_lblk_t count,
|
||||
int mark_unwritten,int *err);
|
||||
extern int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu);
|
||||
extern int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
|
||||
int check_cred, int restart_cred,
|
||||
int revoke_cred);
|
||||
|
||||
|
||||
/* move_extent.c */
|
||||
extern void ext4_double_down_write_data_sem(struct inode *first,
|
||||
@ -3324,6 +3331,8 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
int len,
|
||||
struct writeback_control *wbc,
|
||||
bool keep_towrite);
|
||||
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
|
||||
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);
|
||||
|
||||
/* mmp.c */
|
||||
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
|
||||
@ -3381,6 +3390,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
|
||||
}
|
||||
|
||||
extern const struct iomap_ops ext4_iomap_ops;
|
||||
extern const struct iomap_ops ext4_iomap_report_ops;
|
||||
|
||||
static inline int ext4_buffer_uptodate(struct buffer_head *bh)
|
||||
{
|
||||
|
@ -65,12 +65,14 @@ static int ext4_journal_check_start(struct super_block *sb)
|
||||
}
|
||||
|
||||
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
|
||||
int type, int blocks, int rsv_blocks)
|
||||
int type, int blocks, int rsv_blocks,
|
||||
int revoke_creds)
|
||||
{
|
||||
journal_t *journal;
|
||||
int err;
|
||||
|
||||
trace_ext4_journal_start(sb, blocks, rsv_blocks, _RET_IP_);
|
||||
trace_ext4_journal_start(sb, blocks, rsv_blocks, revoke_creds,
|
||||
_RET_IP_);
|
||||
err = ext4_journal_check_start(sb);
|
||||
if (err < 0)
|
||||
return ERR_PTR(err);
|
||||
@ -78,8 +80,8 @@ handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
|
||||
journal = EXT4_SB(sb)->s_journal;
|
||||
if (!journal)
|
||||
return ext4_get_nojournal();
|
||||
return jbd2__journal_start(journal, blocks, rsv_blocks, GFP_NOFS,
|
||||
type, line);
|
||||
return jbd2__journal_start(journal, blocks, rsv_blocks, revoke_creds,
|
||||
GFP_NOFS, type, line);
|
||||
}
|
||||
|
||||
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
|
||||
@ -119,8 +121,8 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
|
||||
return ext4_get_nojournal();
|
||||
|
||||
sb = handle->h_journal->j_private;
|
||||
trace_ext4_journal_start_reserved(sb, handle->h_buffer_credits,
|
||||
_RET_IP_);
|
||||
trace_ext4_journal_start_reserved(sb,
|
||||
jbd2_handle_buffer_credits(handle), _RET_IP_);
|
||||
err = ext4_journal_check_start(sb);
|
||||
if (err < 0) {
|
||||
jbd2_journal_free_reserved(handle);
|
||||
@ -133,6 +135,19 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
|
||||
return handle;
|
||||
}
|
||||
|
||||
int __ext4_journal_ensure_credits(handle_t *handle, int check_cred,
|
||||
int extend_cred, int revoke_cred)
|
||||
{
|
||||
if (!ext4_handle_valid(handle))
|
||||
return 0;
|
||||
if (jbd2_handle_buffer_credits(handle) >= check_cred &&
|
||||
handle->h_revoke_credits >= revoke_cred)
|
||||
return 0;
|
||||
extend_cred = max(0, extend_cred - jbd2_handle_buffer_credits(handle));
|
||||
revoke_cred = max(0, revoke_cred - handle->h_revoke_credits);
|
||||
return ext4_journal_extend(handle, extend_cred, revoke_cred);
|
||||
}
|
||||
|
||||
static void ext4_journal_abort_handle(const char *caller, unsigned int line,
|
||||
const char *err_fn,
|
||||
struct buffer_head *bh,
|
||||
@ -278,7 +293,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
|
||||
handle->h_type,
|
||||
handle->h_line_no,
|
||||
handle->h_requested_credits,
|
||||
handle->h_buffer_credits, err);
|
||||
jbd2_handle_buffer_credits(handle), err);
|
||||
return err;
|
||||
}
|
||||
ext4_error_inode(inode, where, line,
|
||||
@ -289,7 +304,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
|
||||
handle->h_type,
|
||||
handle->h_line_no,
|
||||
handle->h_requested_credits,
|
||||
handle->h_buffer_credits, err);
|
||||
jbd2_handle_buffer_credits(handle),
|
||||
err);
|
||||
}
|
||||
} else {
|
||||
if (inode)
|
||||
|
@ -261,7 +261,8 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line,
|
||||
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
|
||||
|
||||
handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line,
|
||||
int type, int blocks, int rsv_blocks);
|
||||
int type, int blocks, int rsv_blocks,
|
||||
int revoke_creds);
|
||||
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
|
||||
|
||||
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
|
||||
@ -288,28 +289,41 @@ static inline int ext4_handle_is_aborted(handle_t *handle)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed)
|
||||
static inline int ext4_free_metadata_revoke_credits(struct super_block *sb,
|
||||
int blocks)
|
||||
{
|
||||
if (ext4_handle_valid(handle) && handle->h_buffer_credits < needed)
|
||||
return 0;
|
||||
return 1;
|
||||
/* Freeing each metadata block can result in freeing one cluster */
|
||||
return blocks * EXT4_SB(sb)->s_cluster_ratio;
|
||||
}
|
||||
|
||||
static inline int ext4_trans_default_revoke_credits(struct super_block *sb)
|
||||
{
|
||||
return ext4_free_metadata_revoke_credits(sb, 8);
|
||||
}
|
||||
|
||||
#define ext4_journal_start_sb(sb, type, nblocks) \
|
||||
__ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0)
|
||||
__ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0, \
|
||||
ext4_trans_default_revoke_credits(sb))
|
||||
|
||||
#define ext4_journal_start(inode, type, nblocks) \
|
||||
__ext4_journal_start((inode), __LINE__, (type), (nblocks), 0)
|
||||
__ext4_journal_start((inode), __LINE__, (type), (nblocks), 0, \
|
||||
ext4_trans_default_revoke_credits((inode)->i_sb))
|
||||
|
||||
#define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks) \
|
||||
__ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks))
|
||||
#define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks)\
|
||||
__ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks),\
|
||||
ext4_trans_default_revoke_credits((inode)->i_sb))
|
||||
|
||||
#define ext4_journal_start_with_revoke(inode, type, blocks, revoke_creds) \
|
||||
__ext4_journal_start((inode), __LINE__, (type), (blocks), 0, \
|
||||
(revoke_creds))
|
||||
|
||||
static inline handle_t *__ext4_journal_start(struct inode *inode,
|
||||
unsigned int line, int type,
|
||||
int blocks, int rsv_blocks)
|
||||
int blocks, int rsv_blocks,
|
||||
int revoke_creds)
|
||||
{
|
||||
return __ext4_journal_start_sb(inode->i_sb, line, type, blocks,
|
||||
rsv_blocks);
|
||||
rsv_blocks, revoke_creds);
|
||||
}
|
||||
|
||||
#define ext4_journal_stop(handle) \
|
||||
@ -332,20 +346,68 @@ static inline handle_t *ext4_journal_current_handle(void)
|
||||
return journal_current_handle();
|
||||
}
|
||||
|
||||
static inline int ext4_journal_extend(handle_t *handle, int nblocks)
|
||||
static inline int ext4_journal_extend(handle_t *handle, int nblocks, int revoke)
|
||||
{
|
||||
if (ext4_handle_valid(handle))
|
||||
return jbd2_journal_extend(handle, nblocks);
|
||||
return jbd2_journal_extend(handle, nblocks, revoke);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int ext4_journal_restart(handle_t *handle, int nblocks)
|
||||
static inline int ext4_journal_restart(handle_t *handle, int nblocks,
|
||||
int revoke)
|
||||
{
|
||||
if (ext4_handle_valid(handle))
|
||||
return jbd2_journal_restart(handle, nblocks);
|
||||
return jbd2__journal_restart(handle, nblocks, revoke, GFP_NOFS);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __ext4_journal_ensure_credits(handle_t *handle, int check_cred,
|
||||
int extend_cred, int revoke_cred);
|
||||
|
||||
|
||||
/*
|
||||
* Ensure @handle has at least @check_creds credits available. If not,
|
||||
* transaction will be extended or restarted to contain at least @extend_cred
|
||||
* credits. Before restarting transaction @fn is executed to allow for cleanup
|
||||
* before the transaction is restarted.
|
||||
*
|
||||
* The return value is < 0 in case of error, 0 in case the handle has enough
|
||||
* credits or transaction extension succeeded, 1 in case transaction had to be
|
||||
* restarted.
|
||||
*/
|
||||
#define ext4_journal_ensure_credits_fn(handle, check_cred, extend_cred, \
|
||||
revoke_cred, fn) \
|
||||
({ \
|
||||
__label__ __ensure_end; \
|
||||
int err = __ext4_journal_ensure_credits((handle), (check_cred), \
|
||||
(extend_cred), (revoke_cred)); \
|
||||
\
|
||||
if (err <= 0) \
|
||||
goto __ensure_end; \
|
||||
err = (fn); \
|
||||
if (err < 0) \
|
||||
goto __ensure_end; \
|
||||
err = ext4_journal_restart((handle), (extend_cred), (revoke_cred)); \
|
||||
if (err == 0) \
|
||||
err = 1; \
|
||||
__ensure_end: \
|
||||
err; \
|
||||
})
|
||||
|
||||
/*
|
||||
* Ensure given handle has at least requested amount of credits available,
|
||||
* possibly restarting transaction if needed. We also make sure the transaction
|
||||
* has space for at least ext4_trans_default_revoke_credits(sb) revoke records
|
||||
* as freeing one or two blocks is very common pattern and requesting this is
|
||||
* very cheap.
|
||||
*/
|
||||
static inline int ext4_journal_ensure_credits(handle_t *handle, int credits,
|
||||
int revoke_creds)
|
||||
{
|
||||
return ext4_journal_ensure_credits_fn(handle, credits, credits,
|
||||
revoke_creds, 0);
|
||||
}
|
||||
|
||||
static inline int ext4_journal_blocks_per_page(struct inode *inode)
|
||||
{
|
||||
if (EXT4_JOURNAL(inode) != NULL)
|
||||
@ -407,6 +469,7 @@ static inline int ext4_inode_journal_mode(struct inode *inode)
|
||||
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
|
||||
/* We do not support data journalling with delayed allocation */
|
||||
if (!S_ISREG(inode->i_mode) ||
|
||||
ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE) ||
|
||||
test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
|
||||
(ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
|
||||
!test_opt(inode->i_sb, DELALLOC))) {
|
||||
@ -437,6 +500,19 @@ static inline int ext4_should_writeback_data(struct inode *inode)
|
||||
return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
|
||||
}
|
||||
|
||||
static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks)
|
||||
{
|
||||
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
|
||||
return 0;
|
||||
if (!ext4_should_journal_data(inode))
|
||||
return 0;
|
||||
/*
|
||||
* Data blocks in one extent are contiguous, just account for partial
|
||||
* clusters at extent boundaries
|
||||
*/
|
||||
return blocks + 2*(EXT4_SB(inode->i_sb)->s_cluster_ratio - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function controls whether or not we should try to go down the
|
||||
* dioread_nolock code paths, which makes it safe to avoid taking
|
||||
|
@ -100,29 +100,41 @@ static int ext4_split_extent_at(handle_t *handle,
|
||||
static int ext4_find_delayed_extent(struct inode *inode,
|
||||
struct extent_status *newes);
|
||||
|
||||
static int ext4_ext_truncate_extend_restart(handle_t *handle,
|
||||
struct inode *inode,
|
||||
int needed)
|
||||
static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!ext4_handle_valid(handle))
|
||||
return 0;
|
||||
if (handle->h_buffer_credits >= needed)
|
||||
return 0;
|
||||
/*
|
||||
* If we need to extend the journal get a few extra blocks
|
||||
* while we're at it for efficiency's sake.
|
||||
* Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
|
||||
* moment, get_block can be called only for blocks inside i_size since
|
||||
* page cache has been already dropped and writes are blocked by
|
||||
* i_mutex. So we can safely drop the i_data_sem here.
|
||||
*/
|
||||
needed += 3;
|
||||
err = ext4_journal_extend(handle, needed - handle->h_buffer_credits);
|
||||
if (err <= 0)
|
||||
return err;
|
||||
err = ext4_truncate_restart_trans(handle, inode, needed);
|
||||
if (err == 0)
|
||||
err = -EAGAIN;
|
||||
BUG_ON(EXT4_JOURNAL(inode) == NULL);
|
||||
ext4_discard_preallocations(inode);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
*dropped = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return err;
|
||||
/*
|
||||
* Make sure 'handle' has at least 'check_cred' credits. If not, restart
|
||||
* transaction with 'restart_cred' credits. The function drops i_data_sem
|
||||
* when restarting transaction and gets it after transaction is restarted.
|
||||
*
|
||||
* The function returns 0 on success, 1 if transaction had to be restarted,
|
||||
* and < 0 in case of fatal error.
|
||||
*/
|
||||
int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
|
||||
int check_cred, int restart_cred,
|
||||
int revoke_cred)
|
||||
{
|
||||
int ret;
|
||||
int dropped = 0;
|
||||
|
||||
ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
|
||||
revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
|
||||
if (dropped)
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1753,16 +1765,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
|
||||
*/
|
||||
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
|
||||
return 0;
|
||||
/*
|
||||
* The check for IO to unwritten extent is somewhat racy as we
|
||||
* increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after
|
||||
* dropping i_data_sem. But reserved blocks should save us in that
|
||||
* case.
|
||||
*/
|
||||
|
||||
if (ext4_ext_is_unwritten(ex1) &&
|
||||
(ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
|
||||
atomic_read(&EXT4_I(inode)->i_unwritten) ||
|
||||
(ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)))
|
||||
ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
|
||||
return 0;
|
||||
#ifdef AGGRESSIVE_TEST
|
||||
if (ext1_ee_len >= 4)
|
||||
@ -1840,7 +1845,8 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
|
||||
* group descriptor to release the extent tree block. If we
|
||||
* can't get the journal credits, give up.
|
||||
*/
|
||||
if (ext4_journal_extend(handle, 2))
|
||||
if (ext4_journal_extend(handle, 2,
|
||||
ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -2727,7 +2733,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
int err = 0, correct_index = 0;
|
||||
int depth = ext_depth(inode), credits;
|
||||
int depth = ext_depth(inode), credits, revoke_credits;
|
||||
struct ext4_extent_header *eh;
|
||||
ext4_lblk_t a, b;
|
||||
unsigned num;
|
||||
@ -2819,10 +2825,23 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
|
||||
credits += (ext_depth(inode)) + 1;
|
||||
}
|
||||
credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
|
||||
/*
|
||||
* We may end up freeing some index blocks and data from the
|
||||
* punched range. Note that partial clusters are accounted for
|
||||
* by ext4_free_data_revoke_credits().
|
||||
*/
|
||||
revoke_credits =
|
||||
ext4_free_metadata_revoke_credits(inode->i_sb,
|
||||
ext_depth(inode)) +
|
||||
ext4_free_data_revoke_credits(inode, b - a + 1);
|
||||
|
||||
err = ext4_ext_truncate_extend_restart(handle, inode, credits);
|
||||
if (err)
|
||||
err = ext4_datasem_ensure_credits(handle, inode, credits,
|
||||
credits, revoke_credits);
|
||||
if (err) {
|
||||
if (err > 0)
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = ext4_ext_get_access(handle, inode, path + depth);
|
||||
if (err)
|
||||
@ -2948,7 +2967,9 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
|
||||
ext_debug("truncate since %u to %u\n", start, end);
|
||||
|
||||
/* probably first extent we're gonna free will be last in block */
|
||||
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
|
||||
handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
|
||||
depth + 1,
|
||||
ext4_free_metadata_revoke_credits(inode->i_sb, depth));
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
@ -4962,23 +4983,13 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
|
||||
int ret = 0;
|
||||
int ret2 = 0;
|
||||
struct ext4_map_blocks map;
|
||||
unsigned int credits, blkbits = inode->i_blkbits;
|
||||
unsigned int blkbits = inode->i_blkbits;
|
||||
unsigned int credits = 0;
|
||||
|
||||
map.m_lblk = offset >> blkbits;
|
||||
max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
|
||||
|
||||
/*
|
||||
* This is somewhat ugly but the idea is clear: When transaction is
|
||||
* reserved, everything goes into it. Otherwise we rather start several
|
||||
* smaller transactions for conversion of each extent separately.
|
||||
*/
|
||||
if (handle) {
|
||||
handle = ext4_journal_start_reserved(handle,
|
||||
EXT4_HT_EXT_CONVERT);
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
credits = 0;
|
||||
} else {
|
||||
if (!handle) {
|
||||
/*
|
||||
* credits to insert 1 extent into extent tree
|
||||
*/
|
||||
@ -5009,11 +5020,40 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
|
||||
if (ret <= 0 || ret2)
|
||||
break;
|
||||
}
|
||||
if (!credits)
|
||||
ret2 = ext4_journal_stop(handle);
|
||||
return ret > 0 ? ret2 : ret;
|
||||
}
|
||||
|
||||
int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
|
||||
{
|
||||
int ret, err = 0;
|
||||
struct ext4_io_end_vec *io_end_vec;
|
||||
|
||||
/*
|
||||
* This is somewhat ugly but the idea is clear: When transaction is
|
||||
* reserved, everything goes into it. Otherwise we rather start several
|
||||
* smaller transactions for conversion of each extent separately.
|
||||
*/
|
||||
if (handle) {
|
||||
handle = ext4_journal_start_reserved(handle,
|
||||
EXT4_HT_EXT_CONVERT);
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
}
|
||||
|
||||
list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
|
||||
ret = ext4_convert_unwritten_extents(handle, io_end->inode,
|
||||
io_end_vec->offset,
|
||||
io_end_vec->size);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
if (handle)
|
||||
err = ext4_journal_stop(handle);
|
||||
|
||||
return ret < 0 ? ret : err;
|
||||
}
|
||||
|
||||
/*
|
||||
* If newes is not existing extent (newes->ec_pblk equals zero) find
|
||||
* delayed extent at start of newes and update newes accordingly and
|
||||
@ -5206,13 +5246,10 @@ ext4_access_path(handle_t *handle, struct inode *inode,
|
||||
* descriptor) for each block group; assume two block
|
||||
* groups
|
||||
*/
|
||||
if (handle->h_buffer_credits < 7) {
|
||||
credits = ext4_writepage_trans_blocks(inode);
|
||||
err = ext4_ext_truncate_extend_restart(handle, inode, credits);
|
||||
/* EAGAIN is success */
|
||||
if (err && err != -EAGAIN)
|
||||
return err;
|
||||
}
|
||||
credits = ext4_writepage_trans_blocks(inode);
|
||||
err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = ext4_ext_get_access(handle, inode, path);
|
||||
return err;
|
||||
|
418
fs/ext4/file.c
418
fs/ext4/file.c
@ -29,10 +29,58 @@
|
||||
#include <linux/pagevec.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include "ext4.h"
|
||||
#include "ext4_jbd2.h"
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
#include "truncate.h"
|
||||
|
||||
static bool ext4_dio_supported(struct inode *inode)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
|
||||
return false;
|
||||
if (fsverity_active(inode))
|
||||
return false;
|
||||
if (ext4_should_journal_data(inode))
|
||||
return false;
|
||||
if (ext4_has_inline_data(inode))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
ssize_t ret;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
if (!inode_trylock_shared(inode))
|
||||
return -EAGAIN;
|
||||
} else {
|
||||
inode_lock_shared(inode);
|
||||
}
|
||||
|
||||
if (!ext4_dio_supported(inode)) {
|
||||
inode_unlock_shared(inode);
|
||||
/*
|
||||
* Fallback to buffered I/O if the operation being performed on
|
||||
* the inode is not supported by direct I/O. The IOCB_DIRECT
|
||||
* flag needs to be cleared here in order to ensure that the
|
||||
* direct I/O path within generic_file_read_iter() is not
|
||||
* taken.
|
||||
*/
|
||||
iocb->ki_flags &= ~IOCB_DIRECT;
|
||||
return generic_file_read_iter(iocb, to);
|
||||
}
|
||||
|
||||
ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL,
|
||||
is_sync_kiocb(iocb));
|
||||
inode_unlock_shared(inode);
|
||||
|
||||
file_accessed(iocb->ki_filp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
@ -64,16 +112,21 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
|
||||
static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(file_inode(iocb->ki_filp)->i_sb))))
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
|
||||
return -EIO;
|
||||
|
||||
if (!iov_iter_count(to))
|
||||
return 0; /* skip atime */
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
if (IS_DAX(file_inode(iocb->ki_filp)))
|
||||
if (IS_DAX(inode))
|
||||
return ext4_dax_read_iter(iocb, to);
|
||||
#endif
|
||||
if (iocb->ki_flags & IOCB_DIRECT)
|
||||
return ext4_dio_read_iter(iocb, to);
|
||||
|
||||
return generic_file_read_iter(iocb, to);
|
||||
}
|
||||
|
||||
@ -103,13 +156,6 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ext4_unwritten_wait(struct inode *inode)
|
||||
{
|
||||
wait_queue_head_t *wq = ext4_ioend_wq(inode);
|
||||
|
||||
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
|
||||
}
|
||||
|
||||
/*
|
||||
* This tests whether the IO in question is block-aligned or not.
|
||||
* Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
|
||||
@ -162,13 +208,13 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
ssize_t ret;
|
||||
|
||||
if (unlikely(IS_IMMUTABLE(inode)))
|
||||
return -EPERM;
|
||||
|
||||
ret = generic_write_checks(iocb, from);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
|
||||
if (unlikely(IS_IMMUTABLE(inode)))
|
||||
return -EPERM;
|
||||
|
||||
/*
|
||||
* If we have encountered a bitmap-format file, the size limit
|
||||
* is smaller than s_maxbytes, which is for extent-mapped files.
|
||||
@ -180,32 +226,301 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
|
||||
return -EFBIG;
|
||||
iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
|
||||
}
|
||||
|
||||
ret = file_modified(iocb->ki_filp);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return iov_iter_count(from);
|
||||
}
|
||||
|
||||
static ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
|
||||
struct iov_iter *from)
|
||||
{
|
||||
ssize_t ret;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
inode_lock(inode);
|
||||
ret = ext4_write_checks(iocb, from);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
|
||||
current->backing_dev_info = inode_to_bdi(inode);
|
||||
ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
|
||||
current->backing_dev_info = NULL;
|
||||
|
||||
out:
|
||||
inode_unlock(inode);
|
||||
if (likely(ret > 0)) {
|
||||
iocb->ki_pos += ret;
|
||||
ret = generic_write_sync(iocb, ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
|
||||
ssize_t written, size_t count)
|
||||
{
|
||||
handle_t *handle;
|
||||
bool truncate = false;
|
||||
u8 blkbits = inode->i_blkbits;
|
||||
ext4_lblk_t written_blk, end_blk;
|
||||
|
||||
/*
|
||||
* Note that EXT4_I(inode)->i_disksize can get extended up to
|
||||
* inode->i_size while the I/O was running due to writeback of delalloc
|
||||
* blocks. But, the code in ext4_iomap_alloc() is careful to use
|
||||
* zeroed/unwritten extents if this is possible; thus we won't leave
|
||||
* uninitialized blocks in a file even if we didn't succeed in writing
|
||||
* as much as we intended.
|
||||
*/
|
||||
WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
|
||||
if (offset + count <= EXT4_I(inode)->i_disksize) {
|
||||
/*
|
||||
* We need to ensure that the inode is removed from the orphan
|
||||
* list if it has been added prematurely, due to writeback of
|
||||
* delalloc blocks.
|
||||
*/
|
||||
if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
||||
|
||||
if (IS_ERR(handle)) {
|
||||
ext4_orphan_del(NULL, inode);
|
||||
return PTR_ERR(handle);
|
||||
}
|
||||
|
||||
ext4_orphan_del(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
}
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
if (written < 0)
|
||||
goto truncate;
|
||||
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
||||
if (IS_ERR(handle)) {
|
||||
written = PTR_ERR(handle);
|
||||
goto truncate;
|
||||
}
|
||||
|
||||
if (ext4_update_inode_size(inode, offset + written))
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
|
||||
/*
|
||||
* We may need to truncate allocated but not written blocks beyond EOF.
|
||||
*/
|
||||
written_blk = ALIGN(offset + written, 1 << blkbits);
|
||||
end_blk = ALIGN(offset + count, 1 << blkbits);
|
||||
if (written_blk < end_blk && ext4_can_truncate(inode))
|
||||
truncate = true;
|
||||
|
||||
/*
|
||||
* Remove the inode from the orphan list if it has been extended and
|
||||
* everything went OK.
|
||||
*/
|
||||
if (!truncate && inode->i_nlink)
|
||||
ext4_orphan_del(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
if (truncate) {
|
||||
truncate:
|
||||
ext4_truncate_failed_write(inode);
|
||||
/*
|
||||
* If the truncate operation failed early, then the inode may
|
||||
* still be on the orphan list. In that case, we need to try
|
||||
* remove the inode from the in-memory linked list.
|
||||
*/
|
||||
if (inode->i_nlink)
|
||||
ext4_orphan_del(NULL, inode);
|
||||
}
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
|
||||
int error, unsigned int flags)
|
||||
{
|
||||
loff_t offset = iocb->ki_pos;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
if (size && flags & IOMAP_DIO_UNWRITTEN)
|
||||
return ext4_convert_unwritten_extents(NULL, inode,
|
||||
offset, size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct iomap_dio_ops ext4_dio_write_ops = {
|
||||
.end_io = ext4_dio_write_end_io,
|
||||
};
|
||||
|
||||
static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
ssize_t ret;
|
||||
size_t count;
|
||||
loff_t offset;
|
||||
handle_t *handle;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
bool extend = false, overwrite = false, unaligned_aio = false;
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
if (!inode_trylock(inode))
|
||||
return -EAGAIN;
|
||||
} else {
|
||||
inode_lock(inode);
|
||||
}
|
||||
|
||||
if (!ext4_dio_supported(inode)) {
|
||||
inode_unlock(inode);
|
||||
/*
|
||||
* Fallback to buffered I/O if the inode does not support
|
||||
* direct I/O.
|
||||
*/
|
||||
return ext4_buffered_write_iter(iocb, from);
|
||||
}
|
||||
|
||||
ret = ext4_write_checks(iocb, from);
|
||||
if (ret <= 0) {
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unaligned asynchronous direct I/O must be serialized among each
|
||||
* other as the zeroing of partial blocks of two competing unaligned
|
||||
* asynchronous direct I/O writes can result in data corruption.
|
||||
*/
|
||||
offset = iocb->ki_pos;
|
||||
count = iov_iter_count(from);
|
||||
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
|
||||
!is_sync_kiocb(iocb) && ext4_unaligned_aio(inode, from, offset)) {
|
||||
unaligned_aio = true;
|
||||
inode_dio_wait(inode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine whether the I/O will overwrite allocated and initialized
|
||||
* blocks. If so, check to see whether it is possible to take the
|
||||
* dioread_nolock path.
|
||||
*/
|
||||
if (!unaligned_aio && ext4_overwrite_io(inode, offset, count) &&
|
||||
ext4_should_dioread_nolock(inode)) {
|
||||
overwrite = true;
|
||||
downgrade_write(&inode->i_rwsem);
|
||||
}
|
||||
|
||||
if (offset + count > EXT4_I(inode)->i_disksize) {
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ext4_orphan_add(handle, inode);
|
||||
if (ret) {
|
||||
ext4_journal_stop(handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
extend = true;
|
||||
ext4_journal_stop(handle);
|
||||
}
|
||||
|
||||
ret = iomap_dio_rw(iocb, from, &ext4_iomap_ops, &ext4_dio_write_ops,
|
||||
is_sync_kiocb(iocb) || unaligned_aio || extend);
|
||||
|
||||
if (extend)
|
||||
ret = ext4_handle_inode_extension(inode, offset, ret, count);
|
||||
|
||||
out:
|
||||
if (overwrite)
|
||||
inode_unlock_shared(inode);
|
||||
else
|
||||
inode_unlock(inode);
|
||||
|
||||
if (ret >= 0 && iov_iter_count(from)) {
|
||||
ssize_t err;
|
||||
loff_t endbyte;
|
||||
|
||||
offset = iocb->ki_pos;
|
||||
err = ext4_buffered_write_iter(iocb, from);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* We need to ensure that the pages within the page cache for
|
||||
* the range covered by this I/O are written to disk and
|
||||
* invalidated. This is in attempt to preserve the expected
|
||||
* direct I/O semantics in the case we fallback to buffered I/O
|
||||
* to complete off the I/O request.
|
||||
*/
|
||||
ret += err;
|
||||
endbyte = offset + err - 1;
|
||||
err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
|
||||
offset, endbyte);
|
||||
if (!err)
|
||||
invalidate_mapping_pages(iocb->ki_filp->f_mapping,
|
||||
offset >> PAGE_SHIFT,
|
||||
endbyte >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
static ssize_t
|
||||
ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
ssize_t ret;
|
||||
size_t count;
|
||||
loff_t offset;
|
||||
handle_t *handle;
|
||||
bool extend = false;
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
|
||||
if (!inode_trylock(inode)) {
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
return -EAGAIN;
|
||||
inode_lock(inode);
|
||||
}
|
||||
|
||||
ret = ext4_write_checks(iocb, from);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
ret = file_remove_privs(iocb->ki_filp);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = file_update_time(iocb->ki_filp);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
offset = iocb->ki_pos;
|
||||
count = iov_iter_count(from);
|
||||
|
||||
if (offset + count > EXT4_I(inode)->i_disksize) {
|
||||
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
|
||||
if (IS_ERR(handle)) {
|
||||
ret = PTR_ERR(handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ext4_orphan_add(handle, inode);
|
||||
if (ret) {
|
||||
ext4_journal_stop(handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
extend = true;
|
||||
ext4_journal_stop(handle);
|
||||
}
|
||||
|
||||
ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
|
||||
|
||||
if (extend)
|
||||
ret = ext4_handle_inode_extension(inode, offset, ret, count);
|
||||
out:
|
||||
inode_unlock(inode);
|
||||
if (ret > 0)
|
||||
@ -218,10 +533,6 @@ static ssize_t
|
||||
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
int o_direct = iocb->ki_flags & IOCB_DIRECT;
|
||||
int unaligned_aio = 0;
|
||||
int overwrite = 0;
|
||||
ssize_t ret;
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
|
||||
return -EIO;
|
||||
@ -230,59 +541,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
if (IS_DAX(inode))
|
||||
return ext4_dax_write_iter(iocb, from);
|
||||
#endif
|
||||
if (iocb->ki_flags & IOCB_DIRECT)
|
||||
return ext4_dio_write_iter(iocb, from);
|
||||
|
||||
if (!inode_trylock(inode)) {
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
return -EAGAIN;
|
||||
inode_lock(inode);
|
||||
}
|
||||
|
||||
ret = ext4_write_checks(iocb, from);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Unaligned direct AIO must be serialized among each other as zeroing
|
||||
* of partial blocks of two competing unaligned AIOs can result in data
|
||||
* corruption.
|
||||
*/
|
||||
if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
|
||||
!is_sync_kiocb(iocb) &&
|
||||
ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
|
||||
unaligned_aio = 1;
|
||||
ext4_unwritten_wait(inode);
|
||||
}
|
||||
|
||||
iocb->private = &overwrite;
|
||||
/* Check whether we do a DIO overwrite or not */
|
||||
if (o_direct && !unaligned_aio) {
|
||||
if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
|
||||
if (ext4_should_dioread_nolock(inode))
|
||||
overwrite = 1;
|
||||
} else if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = __generic_file_write_iter(iocb, from);
|
||||
/*
|
||||
* Unaligned direct AIO must be the only IO in flight. Otherwise
|
||||
* overlapping aligned IO after unaligned might result in data
|
||||
* corruption.
|
||||
*/
|
||||
if (ret == -EIOCBQUEUED && unaligned_aio)
|
||||
ext4_unwritten_wait(inode);
|
||||
inode_unlock(inode);
|
||||
|
||||
if (ret > 0)
|
||||
ret = generic_write_sync(iocb, ret);
|
||||
|
||||
return ret;
|
||||
|
||||
out:
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
return ext4_buffered_write_iter(iocb, from);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
@ -494,12 +756,14 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
|
||||
maxbytes, i_size_read(inode));
|
||||
case SEEK_HOLE:
|
||||
inode_lock_shared(inode);
|
||||
offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
|
||||
offset = iomap_seek_hole(inode, offset,
|
||||
&ext4_iomap_report_ops);
|
||||
inode_unlock_shared(inode);
|
||||
break;
|
||||
case SEEK_DATA:
|
||||
inode_lock_shared(inode);
|
||||
offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
|
||||
offset = iomap_seek_data(inode, offset,
|
||||
&ext4_iomap_report_ops);
|
||||
inode_unlock_shared(inode);
|
||||
break;
|
||||
}
|
||||
|
@ -80,6 +80,43 @@ static int ext4_sync_parent(struct inode *inode)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext4_fsync_nojournal(struct inode *inode, bool datasync,
|
||||
bool *needs_barrier)
|
||||
{
|
||||
int ret, err;
|
||||
|
||||
ret = sync_mapping_buffers(inode->i_mapping);
|
||||
if (!(inode->i_state & I_DIRTY_ALL))
|
||||
return ret;
|
||||
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
|
||||
return ret;
|
||||
|
||||
err = sync_inode_metadata(inode, 1);
|
||||
if (!ret)
|
||||
ret = err;
|
||||
|
||||
if (!ret)
|
||||
ret = ext4_sync_parent(inode);
|
||||
if (test_opt(inode->i_sb, BARRIER))
|
||||
*needs_barrier = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext4_fsync_journal(struct inode *inode, bool datasync,
|
||||
bool *needs_barrier)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||
tid_t commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
|
||||
|
||||
if (journal->j_flags & JBD2_BARRIER &&
|
||||
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
|
||||
*needs_barrier = true;
|
||||
|
||||
return jbd2_complete_transaction(journal, commit_tid);
|
||||
}
|
||||
|
||||
/*
|
||||
* akpm: A new design for ext4_sync_file().
|
||||
*
|
||||
@ -91,17 +128,14 @@ static int ext4_sync_parent(struct inode *inode)
|
||||
* What we do is just kick off a commit and wait on it. This will snapshot the
|
||||
* inode to disk.
|
||||
*/
|
||||
|
||||
int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
{
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||
int ret = 0, err;
|
||||
tid_t commit_tid;
|
||||
bool needs_barrier = false;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
|
||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
|
||||
if (unlikely(ext4_forced_shutdown(sbi)))
|
||||
return -EIO;
|
||||
|
||||
J_ASSERT(ext4_journal_current_handle() == NULL);
|
||||
@ -111,23 +145,15 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
if (sb_rdonly(inode->i_sb)) {
|
||||
/* Make sure that we read updated s_mount_flags value */
|
||||
smp_rmb();
|
||||
if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
|
||||
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
|
||||
ret = -EROFS;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!journal) {
|
||||
ret = __generic_file_fsync(file, start, end, datasync);
|
||||
if (!ret)
|
||||
ret = ext4_sync_parent(inode);
|
||||
if (test_opt(inode->i_sb, BARRIER))
|
||||
goto issue_flush;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = file_write_and_wait_range(file, start, end);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* data=writeback,ordered:
|
||||
* The caller's filemap_fdatawrite()/wait will sync the data.
|
||||
@ -142,18 +168,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
* (they were dirtied by commit). But that's OK - the blocks are
|
||||
* safe in-journal, which is all fsync() needs to ensure.
|
||||
*/
|
||||
if (ext4_should_journal_data(inode)) {
|
||||
if (!sbi->s_journal)
|
||||
ret = ext4_fsync_nojournal(inode, datasync, &needs_barrier);
|
||||
else if (ext4_should_journal_data(inode))
|
||||
ret = ext4_force_commit(inode->i_sb);
|
||||
goto out;
|
||||
}
|
||||
else
|
||||
ret = ext4_fsync_journal(inode, datasync, &needs_barrier);
|
||||
|
||||
commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
|
||||
if (journal->j_flags & JBD2_BARRIER &&
|
||||
!jbd2_trans_will_send_data_barrier(journal, commit_tid))
|
||||
needs_barrier = true;
|
||||
ret = jbd2_complete_transaction(journal, commit_tid);
|
||||
if (needs_barrier) {
|
||||
issue_flush:
|
||||
err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
if (!ret)
|
||||
ret = err;
|
||||
|
@ -265,13 +265,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
|
||||
ext4_debug("freeing inode %lu\n", ino);
|
||||
trace_ext4_free_inode(inode);
|
||||
|
||||
/*
|
||||
* Note: we must free any quota before locking the superblock,
|
||||
* as writing the quota to disk may need the lock as well.
|
||||
*/
|
||||
dquot_initialize(inode);
|
||||
dquot_free_inode(inode);
|
||||
dquot_drop(inode);
|
||||
|
||||
is_directory = S_ISDIR(inode->i_mode);
|
||||
|
||||
@ -927,7 +922,7 @@ repeat_in_this_group:
|
||||
BUG_ON(nblocks <= 0);
|
||||
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
|
||||
handle_type, nblocks,
|
||||
0);
|
||||
0, 0);
|
||||
if (IS_ERR(handle)) {
|
||||
err = PTR_ERR(handle);
|
||||
ext4_std_error(sb, err);
|
||||
|
@ -331,11 +331,14 @@ static int ext4_alloc_branch(handle_t *handle,
|
||||
for (i = 0; i <= indirect_blks; i++) {
|
||||
if (i == indirect_blks) {
|
||||
new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
|
||||
} else
|
||||
} else {
|
||||
ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
|
||||
ar->inode, ar->goal,
|
||||
ar->flags & EXT4_MB_DELALLOC_RESERVED,
|
||||
NULL, &err);
|
||||
/* Simplify error cleanup... */
|
||||
branch[i+1].bh = NULL;
|
||||
}
|
||||
if (err) {
|
||||
i--;
|
||||
goto failed;
|
||||
@ -377,18 +380,25 @@ static int ext4_alloc_branch(handle_t *handle,
|
||||
}
|
||||
return 0;
|
||||
failed:
|
||||
if (i == indirect_blks) {
|
||||
/* Free data blocks */
|
||||
ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
|
||||
ar->len, 0);
|
||||
i--;
|
||||
}
|
||||
for (; i >= 0; i--) {
|
||||
/*
|
||||
* We want to ext4_forget() only freshly allocated indirect
|
||||
* blocks. Buffer for new_blocks[i-1] is at branch[i].bh and
|
||||
* buffer at branch[0].bh is indirect block / inode already
|
||||
* existing before ext4_alloc_branch() was called.
|
||||
* blocks. Buffer for new_blocks[i] is at branch[i+1].bh
|
||||
* (buffer at branch[0].bh is indirect block / inode already
|
||||
* existing before ext4_alloc_branch() was called). Also
|
||||
* because blocks are freshly allocated, we don't need to
|
||||
* revoke them which is why we don't set
|
||||
* EXT4_FREE_BLOCKS_METADATA.
|
||||
*/
|
||||
if (i > 0 && i != indirect_blks && branch[i].bh)
|
||||
ext4_forget(handle, 1, ar->inode, branch[i].bh,
|
||||
branch[i].bh->b_blocknr);
|
||||
ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
|
||||
(i == indirect_blks) ? ar->len : 1, 0);
|
||||
ext4_free_blocks(handle, ar->inode, branch[i+1].bh,
|
||||
new_blocks[i], 1,
|
||||
branch[i+1].bh ? EXT4_FREE_BLOCKS_FORGET : 0);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
@ -689,27 +699,63 @@ int ext4_ind_trans_blocks(struct inode *inode, int nrblocks)
|
||||
return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
|
||||
}
|
||||
|
||||
static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
|
||||
struct buffer_head *bh, int *dropped)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (bh) {
|
||||
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
|
||||
err = ext4_handle_dirty_metadata(handle, inode, bh);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
}
|
||||
err = ext4_mark_inode_dirty(handle, inode);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
/*
|
||||
* Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
|
||||
* moment, get_block can be called only for blocks inside i_size since
|
||||
* page cache has been already dropped and writes are blocked by
|
||||
* i_mutex. So we can safely drop the i_data_sem here.
|
||||
*/
|
||||
BUG_ON(EXT4_JOURNAL(inode) == NULL);
|
||||
ext4_discard_preallocations(inode);
|
||||
up_write(&EXT4_I(inode)->i_data_sem);
|
||||
*dropped = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Truncate transactions can be complex and absolutely huge. So we need to
|
||||
* be able to restart the transaction at a conventient checkpoint to make
|
||||
* sure we don't overflow the journal.
|
||||
*
|
||||
* Try to extend this transaction for the purposes of truncation. If
|
||||
* extend fails, we need to propagate the failure up and restart the
|
||||
* transaction in the top-level truncate loop. --sct
|
||||
*
|
||||
* Returns 0 if we managed to create more room. If we can't create more
|
||||
* room, and the transaction must be restarted we return 1.
|
||||
* extend fails, we restart transaction.
|
||||
*/
|
||||
static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
|
||||
static int ext4_ind_truncate_ensure_credits(handle_t *handle,
|
||||
struct inode *inode,
|
||||
struct buffer_head *bh,
|
||||
int revoke_creds)
|
||||
{
|
||||
if (!ext4_handle_valid(handle))
|
||||
return 0;
|
||||
if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
|
||||
return 0;
|
||||
if (!ext4_journal_extend(handle, ext4_blocks_for_truncate(inode)))
|
||||
return 0;
|
||||
return 1;
|
||||
int ret;
|
||||
int dropped = 0;
|
||||
|
||||
ret = ext4_journal_ensure_credits_fn(handle, EXT4_RESERVE_TRANS_BLOCKS,
|
||||
ext4_blocks_for_truncate(inode), revoke_creds,
|
||||
ext4_ind_trunc_restart_fn(handle, inode, bh, &dropped));
|
||||
if (dropped)
|
||||
down_write(&EXT4_I(inode)->i_data_sem);
|
||||
if (ret <= 0)
|
||||
return ret;
|
||||
if (bh) {
|
||||
BUFFER_TRACE(bh, "retaking write access");
|
||||
ret = ext4_journal_get_write_access(handle, bh);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -844,27 +890,10 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (try_to_extend_transaction(handle, inode)) {
|
||||
if (bh) {
|
||||
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
|
||||
err = ext4_handle_dirty_metadata(handle, inode, bh);
|
||||
if (unlikely(err))
|
||||
goto out_err;
|
||||
}
|
||||
err = ext4_mark_inode_dirty(handle, inode);
|
||||
if (unlikely(err))
|
||||
goto out_err;
|
||||
err = ext4_truncate_restart_trans(handle, inode,
|
||||
ext4_blocks_for_truncate(inode));
|
||||
if (unlikely(err))
|
||||
goto out_err;
|
||||
if (bh) {
|
||||
BUFFER_TRACE(bh, "retaking write access");
|
||||
err = ext4_journal_get_write_access(handle, bh);
|
||||
if (unlikely(err))
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
err = ext4_ind_truncate_ensure_credits(handle, inode, bh,
|
||||
ext4_free_data_revoke_credits(inode, count));
|
||||
if (err < 0)
|
||||
goto out_err;
|
||||
|
||||
for (p = first; p < last; p++)
|
||||
*p = 0;
|
||||
@ -1047,11 +1076,11 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
|
||||
*/
|
||||
if (ext4_handle_is_aborted(handle))
|
||||
return;
|
||||
if (try_to_extend_transaction(handle, inode)) {
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_truncate_restart_trans(handle, inode,
|
||||
ext4_blocks_for_truncate(inode));
|
||||
}
|
||||
if (ext4_ind_truncate_ensure_credits(handle, inode,
|
||||
NULL,
|
||||
ext4_free_metadata_revoke_credits(
|
||||
inode->i_sb, 1)) < 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The forget flag here is critical because if
|
||||
|
958
fs/ext4/inode.c
958
fs/ext4/inode.c
File diff suppressed because it is too large
Load Diff
@ -50,29 +50,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
|
||||
needed = ext4_ext_calc_credits_for_single_extent(inode,
|
||||
lb->last_block - lb->first_block + 1, path);
|
||||
|
||||
/*
|
||||
* Make sure the credit we accumalated is not really high
|
||||
*/
|
||||
if (needed && ext4_handle_has_enough_credits(handle,
|
||||
EXT4_RESERVE_TRANS_BLOCKS)) {
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
retval = ext4_journal_restart(handle, needed);
|
||||
down_write((&EXT4_I(inode)->i_data_sem));
|
||||
if (retval)
|
||||
goto err_out;
|
||||
} else if (needed) {
|
||||
retval = ext4_journal_extend(handle, needed);
|
||||
if (retval) {
|
||||
/*
|
||||
* IF not able to extend the journal restart the journal
|
||||
*/
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
retval = ext4_journal_restart(handle, needed);
|
||||
down_write((&EXT4_I(inode)->i_data_sem));
|
||||
if (retval)
|
||||
goto err_out;
|
||||
}
|
||||
}
|
||||
retval = ext4_datasem_ensure_credits(handle, inode, needed, needed, 0);
|
||||
if (retval < 0)
|
||||
goto err_out;
|
||||
retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
|
||||
err_out:
|
||||
up_write((&EXT4_I(inode)->i_data_sem));
|
||||
@ -196,42 +176,30 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
|
||||
|
||||
}
|
||||
|
||||
static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
|
||||
{
|
||||
int retval = 0, needed;
|
||||
|
||||
if (ext4_handle_has_enough_credits(handle, EXT4_RESERVE_TRANS_BLOCKS+1))
|
||||
return 0;
|
||||
/*
|
||||
* We are freeing a blocks. During this we touch
|
||||
* superblock, group descriptor and block bitmap.
|
||||
* So allocate a credit of 3. We may update
|
||||
* quota (user and group).
|
||||
*/
|
||||
needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
|
||||
|
||||
if (ext4_journal_extend(handle, needed) != 0)
|
||||
retval = ext4_journal_restart(handle, needed);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int free_dind_blocks(handle_t *handle,
|
||||
struct inode *inode, __le32 i_data)
|
||||
{
|
||||
int i;
|
||||
__le32 *tmp_idata;
|
||||
struct buffer_head *bh;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
|
||||
int err;
|
||||
|
||||
bh = ext4_sb_bread(inode->i_sb, le32_to_cpu(i_data), 0);
|
||||
bh = ext4_sb_bread(sb, le32_to_cpu(i_data), 0);
|
||||
if (IS_ERR(bh))
|
||||
return PTR_ERR(bh);
|
||||
|
||||
tmp_idata = (__le32 *)bh->b_data;
|
||||
for (i = 0; i < max_entries; i++) {
|
||||
if (tmp_idata[i]) {
|
||||
extend_credit_for_blkdel(handle, inode);
|
||||
err = ext4_journal_ensure_credits(handle,
|
||||
EXT4_RESERVE_TRANS_BLOCKS,
|
||||
ext4_free_metadata_revoke_credits(sb, 1));
|
||||
if (err < 0) {
|
||||
put_bh(bh);
|
||||
return err;
|
||||
}
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
le32_to_cpu(tmp_idata[i]), 1,
|
||||
EXT4_FREE_BLOCKS_METADATA |
|
||||
@ -239,7 +207,10 @@ static int free_dind_blocks(handle_t *handle,
|
||||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
extend_credit_for_blkdel(handle, inode);
|
||||
err = ext4_journal_ensure_credits(handle, EXT4_RESERVE_TRANS_BLOCKS,
|
||||
ext4_free_metadata_revoke_credits(sb, 1));
|
||||
if (err < 0)
|
||||
return err;
|
||||
ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
|
||||
EXT4_FREE_BLOCKS_METADATA |
|
||||
EXT4_FREE_BLOCKS_FORGET);
|
||||
@ -270,7 +241,10 @@ static int free_tind_blocks(handle_t *handle,
|
||||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
extend_credit_for_blkdel(handle, inode);
|
||||
retval = ext4_journal_ensure_credits(handle, EXT4_RESERVE_TRANS_BLOCKS,
|
||||
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
|
||||
if (retval < 0)
|
||||
return retval;
|
||||
ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1,
|
||||
EXT4_FREE_BLOCKS_METADATA |
|
||||
EXT4_FREE_BLOCKS_FORGET);
|
||||
@ -283,7 +257,11 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
|
||||
|
||||
/* ei->i_data[EXT4_IND_BLOCK] */
|
||||
if (i_data[0]) {
|
||||
extend_credit_for_blkdel(handle, inode);
|
||||
retval = ext4_journal_ensure_credits(handle,
|
||||
EXT4_RESERVE_TRANS_BLOCKS,
|
||||
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
|
||||
if (retval < 0)
|
||||
return retval;
|
||||
ext4_free_blocks(handle, inode, NULL,
|
||||
le32_to_cpu(i_data[0]), 1,
|
||||
EXT4_FREE_BLOCKS_METADATA |
|
||||
@ -318,12 +296,9 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
|
||||
* One credit accounted for writing the
|
||||
* i_data field of the original inode
|
||||
*/
|
||||
retval = ext4_journal_extend(handle, 1);
|
||||
if (retval) {
|
||||
retval = ext4_journal_restart(handle, 1);
|
||||
if (retval)
|
||||
goto err_out;
|
||||
}
|
||||
retval = ext4_journal_ensure_credits(handle, 1, 0);
|
||||
if (retval < 0)
|
||||
goto err_out;
|
||||
|
||||
i_data[0] = ei->i_data[EXT4_IND_BLOCK];
|
||||
i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
|
||||
@ -391,15 +366,20 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
|
||||
ix = EXT_FIRST_INDEX(eh);
|
||||
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
|
||||
retval = free_ext_idx(handle, inode, ix);
|
||||
if (retval)
|
||||
break;
|
||||
if (retval) {
|
||||
put_bh(bh);
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
}
|
||||
put_bh(bh);
|
||||
extend_credit_for_blkdel(handle, inode);
|
||||
retval = ext4_journal_ensure_credits(handle, EXT4_RESERVE_TRANS_BLOCKS,
|
||||
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
|
||||
if (retval < 0)
|
||||
return retval;
|
||||
ext4_free_blocks(handle, inode, NULL, block, 1,
|
||||
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
|
||||
return retval;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -574,9 +554,9 @@ err_out:
|
||||
}
|
||||
|
||||
/* We mark the tmp_inode dirty via ext4_ext_tree_init. */
|
||||
if (ext4_journal_extend(handle, 1) != 0)
|
||||
ext4_journal_restart(handle, 1);
|
||||
|
||||
retval = ext4_journal_ensure_credits(handle, 1, 0);
|
||||
if (retval < 0)
|
||||
goto out_stop;
|
||||
/*
|
||||
* Mark the tmp_inode as of size zero
|
||||
*/
|
||||
@ -594,6 +574,7 @@ err_out:
|
||||
|
||||
/* Reset the extent details */
|
||||
ext4_ext_tree_init(handle, tmp_inode);
|
||||
out_stop:
|
||||
ext4_journal_stop(handle);
|
||||
out:
|
||||
unlock_new_inode(tmp_inode);
|
||||
|
@ -2547,18 +2547,29 @@ static void ext4_dec_count(handle_t *handle, struct inode *inode)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Add non-directory inode to a directory. On success, the inode reference is
|
||||
* consumed by dentry is instantiation. This is also indicated by clearing of
|
||||
* *inodep pointer. On failure, the caller is responsible for dropping the
|
||||
* inode reference in the safe context.
|
||||
*/
|
||||
static int ext4_add_nondir(handle_t *handle,
|
||||
struct dentry *dentry, struct inode *inode)
|
||||
struct dentry *dentry, struct inode **inodep)
|
||||
{
|
||||
struct inode *dir = d_inode(dentry->d_parent);
|
||||
struct inode *inode = *inodep;
|
||||
int err = ext4_add_entry(handle, dentry, inode);
|
||||
if (!err) {
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
if (IS_DIRSYNC(dir))
|
||||
ext4_handle_sync(handle);
|
||||
d_instantiate_new(dentry, inode);
|
||||
*inodep = NULL;
|
||||
return 0;
|
||||
}
|
||||
drop_nlink(inode);
|
||||
ext4_orphan_add(handle, inode);
|
||||
unlock_new_inode(inode);
|
||||
iput(inode);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -2592,12 +2603,12 @@ retry:
|
||||
inode->i_op = &ext4_file_inode_operations;
|
||||
inode->i_fop = &ext4_file_operations;
|
||||
ext4_set_aops(inode);
|
||||
err = ext4_add_nondir(handle, dentry, inode);
|
||||
if (!err && IS_DIRSYNC(dir))
|
||||
ext4_handle_sync(handle);
|
||||
err = ext4_add_nondir(handle, dentry, &inode);
|
||||
}
|
||||
if (handle)
|
||||
ext4_journal_stop(handle);
|
||||
if (!IS_ERR_OR_NULL(inode))
|
||||
iput(inode);
|
||||
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
|
||||
goto retry;
|
||||
return err;
|
||||
@ -2624,12 +2635,12 @@ retry:
|
||||
if (!IS_ERR(inode)) {
|
||||
init_special_inode(inode, inode->i_mode, rdev);
|
||||
inode->i_op = &ext4_special_inode_operations;
|
||||
err = ext4_add_nondir(handle, dentry, inode);
|
||||
if (!err && IS_DIRSYNC(dir))
|
||||
ext4_handle_sync(handle);
|
||||
err = ext4_add_nondir(handle, dentry, &inode);
|
||||
}
|
||||
if (handle)
|
||||
ext4_journal_stop(handle);
|
||||
if (!IS_ERR_OR_NULL(inode))
|
||||
iput(inode);
|
||||
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
|
||||
goto retry;
|
||||
return err;
|
||||
@ -2779,10 +2790,12 @@ retry:
|
||||
if (err) {
|
||||
out_clear_inode:
|
||||
clear_nlink(inode);
|
||||
ext4_orphan_add(handle, inode);
|
||||
unlock_new_inode(inode);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
ext4_journal_stop(handle);
|
||||
iput(inode);
|
||||
goto out_stop;
|
||||
goto out_retry;
|
||||
}
|
||||
ext4_inc_count(handle, dir);
|
||||
ext4_update_dx_flag(dir);
|
||||
@ -2796,6 +2809,7 @@ out_clear_inode:
|
||||
out_stop:
|
||||
if (handle)
|
||||
ext4_journal_stop(handle);
|
||||
out_retry:
|
||||
if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
|
||||
goto retry;
|
||||
return err;
|
||||
@ -3182,18 +3196,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
|
||||
if (IS_DIRSYNC(dir))
|
||||
ext4_handle_sync(handle);
|
||||
|
||||
if (inode->i_nlink == 0) {
|
||||
ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
|
||||
dentry->d_name.len, dentry->d_name.name);
|
||||
set_nlink(inode, 1);
|
||||
}
|
||||
retval = ext4_delete_entry(handle, dir, de, bh);
|
||||
if (retval)
|
||||
goto end_unlink;
|
||||
dir->i_ctime = dir->i_mtime = current_time(dir);
|
||||
ext4_update_dx_flag(dir);
|
||||
ext4_mark_inode_dirty(handle, dir);
|
||||
drop_nlink(inode);
|
||||
if (inode->i_nlink == 0)
|
||||
ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
|
||||
dentry->d_name.len, dentry->d_name.name);
|
||||
else
|
||||
drop_nlink(inode);
|
||||
if (!inode->i_nlink)
|
||||
ext4_orphan_add(handle, inode);
|
||||
inode->i_ctime = current_time(inode);
|
||||
@ -3328,12 +3341,11 @@ static int ext4_symlink(struct inode *dir,
|
||||
inode->i_size = disk_link.len - 1;
|
||||
}
|
||||
EXT4_I(inode)->i_disksize = inode->i_size;
|
||||
err = ext4_add_nondir(handle, dentry, inode);
|
||||
if (!err && IS_DIRSYNC(dir))
|
||||
ext4_handle_sync(handle);
|
||||
|
||||
err = ext4_add_nondir(handle, dentry, &inode);
|
||||
if (handle)
|
||||
ext4_journal_stop(handle);
|
||||
if (inode)
|
||||
iput(inode);
|
||||
goto out_free_encrypted_link;
|
||||
|
||||
err_drop_inode:
|
||||
|
@ -31,18 +31,56 @@
|
||||
#include "acl.h"
|
||||
|
||||
static struct kmem_cache *io_end_cachep;
|
||||
static struct kmem_cache *io_end_vec_cachep;
|
||||
|
||||
int __init ext4_init_pageio(void)
|
||||
{
|
||||
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
|
||||
if (io_end_cachep == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0);
|
||||
if (io_end_vec_cachep == NULL) {
|
||||
kmem_cache_destroy(io_end_cachep);
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ext4_exit_pageio(void)
|
||||
{
|
||||
kmem_cache_destroy(io_end_cachep);
|
||||
kmem_cache_destroy(io_end_vec_cachep);
|
||||
}
|
||||
|
||||
struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
|
||||
{
|
||||
struct ext4_io_end_vec *io_end_vec;
|
||||
|
||||
io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS);
|
||||
if (!io_end_vec)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
INIT_LIST_HEAD(&io_end_vec->list);
|
||||
list_add_tail(&io_end_vec->list, &io_end->list_vec);
|
||||
return io_end_vec;
|
||||
}
|
||||
|
||||
static void ext4_free_io_end_vec(ext4_io_end_t *io_end)
|
||||
{
|
||||
struct ext4_io_end_vec *io_end_vec, *tmp;
|
||||
|
||||
if (list_empty(&io_end->list_vec))
|
||||
return;
|
||||
list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) {
|
||||
list_del(&io_end_vec->list);
|
||||
kmem_cache_free(io_end_vec_cachep, io_end_vec);
|
||||
}
|
||||
}
|
||||
|
||||
struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end)
|
||||
{
|
||||
BUG_ON(list_empty(&io_end->list_vec));
|
||||
return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -125,6 +163,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
|
||||
ext4_finish_bio(bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
ext4_free_io_end_vec(io_end);
|
||||
kmem_cache_free(io_end_cachep, io_end);
|
||||
}
|
||||
|
||||
@ -136,29 +175,26 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
|
||||
* cannot get to ext4_ext_truncate() before all IOs overlapping that range are
|
||||
* completed (happens from ext4_free_ioend()).
|
||||
*/
|
||||
static int ext4_end_io(ext4_io_end_t *io)
|
||||
static int ext4_end_io_end(ext4_io_end_t *io_end)
|
||||
{
|
||||
struct inode *inode = io->inode;
|
||||
loff_t offset = io->offset;
|
||||
ssize_t size = io->size;
|
||||
handle_t *handle = io->handle;
|
||||
struct inode *inode = io_end->inode;
|
||||
handle_t *handle = io_end->handle;
|
||||
int ret = 0;
|
||||
|
||||
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
|
||||
ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p,"
|
||||
"list->prev 0x%p\n",
|
||||
io, inode->i_ino, io->list.next, io->list.prev);
|
||||
io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
|
||||
|
||||
io->handle = NULL; /* Following call will use up the handle */
|
||||
ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
|
||||
io_end->handle = NULL; /* Following call will use up the handle */
|
||||
ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
|
||||
if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
|
||||
ext4_msg(inode->i_sb, KERN_EMERG,
|
||||
"failed to convert unwritten extents to written "
|
||||
"extents -- potential data loss! "
|
||||
"(inode %lu, offset %llu, size %zd, error %d)",
|
||||
inode->i_ino, offset, size, ret);
|
||||
"(inode %lu, error %d)", inode->i_ino, ret);
|
||||
}
|
||||
ext4_clear_io_unwritten_flag(io);
|
||||
ext4_release_io_end(io);
|
||||
ext4_clear_io_unwritten_flag(io_end);
|
||||
ext4_release_io_end(io_end);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -166,21 +202,21 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head)
|
||||
{
|
||||
#ifdef EXT4FS_DEBUG
|
||||
struct list_head *cur, *before, *after;
|
||||
ext4_io_end_t *io, *io0, *io1;
|
||||
ext4_io_end_t *io_end, *io_end0, *io_end1;
|
||||
|
||||
if (list_empty(head))
|
||||
return;
|
||||
|
||||
ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
|
||||
list_for_each_entry(io, head, list) {
|
||||
cur = &io->list;
|
||||
list_for_each_entry(io_end, head, list) {
|
||||
cur = &io_end->list;
|
||||
before = cur->prev;
|
||||
io0 = container_of(before, ext4_io_end_t, list);
|
||||
io_end0 = container_of(before, ext4_io_end_t, list);
|
||||
after = cur->next;
|
||||
io1 = container_of(after, ext4_io_end_t, list);
|
||||
io_end1 = container_of(after, ext4_io_end_t, list);
|
||||
|
||||
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
|
||||
io, inode->i_ino, io0, io1);
|
||||
io_end, inode->i_ino, io_end0, io_end1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
@ -207,7 +243,7 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end)
|
||||
static int ext4_do_flush_completed_IO(struct inode *inode,
|
||||
struct list_head *head)
|
||||
{
|
||||
ext4_io_end_t *io;
|
||||
ext4_io_end_t *io_end;
|
||||
struct list_head unwritten;
|
||||
unsigned long flags;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
@ -219,11 +255,11 @@ static int ext4_do_flush_completed_IO(struct inode *inode,
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
|
||||
while (!list_empty(&unwritten)) {
|
||||
io = list_entry(unwritten.next, ext4_io_end_t, list);
|
||||
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
|
||||
list_del_init(&io->list);
|
||||
io_end = list_entry(unwritten.next, ext4_io_end_t, list);
|
||||
BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
|
||||
list_del_init(&io_end->list);
|
||||
|
||||
err = ext4_end_io(io);
|
||||
err = ext4_end_io_end(io_end);
|
||||
if (unlikely(!ret && err))
|
||||
ret = err;
|
||||
}
|
||||
@ -242,19 +278,22 @@ void ext4_end_io_rsv_work(struct work_struct *work)
|
||||
|
||||
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
||||
{
|
||||
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
|
||||
if (io) {
|
||||
io->inode = inode;
|
||||
INIT_LIST_HEAD(&io->list);
|
||||
atomic_set(&io->count, 1);
|
||||
ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags);
|
||||
|
||||
if (io_end) {
|
||||
io_end->inode = inode;
|
||||
INIT_LIST_HEAD(&io_end->list);
|
||||
INIT_LIST_HEAD(&io_end->list_vec);
|
||||
atomic_set(&io_end->count, 1);
|
||||
}
|
||||
return io;
|
||||
return io_end;
|
||||
}
|
||||
|
||||
void ext4_put_io_end_defer(ext4_io_end_t *io_end)
|
||||
{
|
||||
if (atomic_dec_and_test(&io_end->count)) {
|
||||
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
|
||||
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
|
||||
list_empty(&io_end->list_vec)) {
|
||||
ext4_release_io_end(io_end);
|
||||
return;
|
||||
}
|
||||
@ -268,9 +307,8 @@ int ext4_put_io_end(ext4_io_end_t *io_end)
|
||||
|
||||
if (atomic_dec_and_test(&io_end->count)) {
|
||||
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
|
||||
err = ext4_convert_unwritten_extents(io_end->handle,
|
||||
io_end->inode, io_end->offset,
|
||||
io_end->size);
|
||||
err = ext4_convert_unwritten_io_end_vec(io_end->handle,
|
||||
io_end);
|
||||
io_end->handle = NULL;
|
||||
ext4_clear_io_unwritten_flag(io_end);
|
||||
}
|
||||
@ -307,10 +345,8 @@ static void ext4_end_bio(struct bio *bio)
|
||||
struct inode *inode = io_end->inode;
|
||||
|
||||
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
|
||||
"(offset %llu size %ld starting block %llu)",
|
||||
"starting block %llu)",
|
||||
bio->bi_status, inode->i_ino,
|
||||
(unsigned long long) io_end->offset,
|
||||
(long) io_end->size,
|
||||
(unsigned long long)
|
||||
bi_sector >> (inode->i_blkbits - 9));
|
||||
mapping_set_error(inode->i_mapping,
|
||||
@ -358,14 +394,16 @@ void ext4_io_submit_init(struct ext4_io_submit *io,
|
||||
io->io_end = NULL;
|
||||
}
|
||||
|
||||
static int io_submit_init_bio(struct ext4_io_submit *io,
|
||||
struct buffer_head *bh)
|
||||
static void io_submit_init_bio(struct ext4_io_submit *io,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
/*
|
||||
* bio_alloc will _always_ be able to allocate a bio if
|
||||
* __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
|
||||
*/
|
||||
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
|
||||
if (!bio)
|
||||
return -ENOMEM;
|
||||
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
|
||||
bio_set_dev(bio, bh->b_bdev);
|
||||
bio->bi_end_io = ext4_end_bio;
|
||||
@ -373,13 +411,12 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
|
||||
io->io_bio = bio;
|
||||
io->io_next_block = bh->b_blocknr;
|
||||
wbc_init_bio(io->io_wbc, bio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_submit_add_bh(struct ext4_io_submit *io,
|
||||
struct inode *inode,
|
||||
struct page *page,
|
||||
struct buffer_head *bh)
|
||||
static void io_submit_add_bh(struct ext4_io_submit *io,
|
||||
struct inode *inode,
|
||||
struct page *page,
|
||||
struct buffer_head *bh)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -388,9 +425,7 @@ submit_and_retry:
|
||||
ext4_io_submit(io);
|
||||
}
|
||||
if (io->io_bio == NULL) {
|
||||
ret = io_submit_init_bio(io, bh);
|
||||
if (ret)
|
||||
return ret;
|
||||
io_submit_init_bio(io, bh);
|
||||
io->io_bio->bi_write_hint = inode->i_write_hint;
|
||||
}
|
||||
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
|
||||
@ -398,7 +433,6 @@ submit_and_retry:
|
||||
goto submit_and_retry;
|
||||
wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size);
|
||||
io->io_next_block++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
@ -491,8 +525,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
gfp_flags |= __GFP_NOFAIL;
|
||||
goto retry_encrypt;
|
||||
}
|
||||
bounce_page = NULL;
|
||||
goto out;
|
||||
|
||||
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
do {
|
||||
clear_buffer_async_write(bh);
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
|
||||
@ -500,30 +540,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
do {
|
||||
if (!buffer_async_write(bh))
|
||||
continue;
|
||||
ret = io_submit_add_bh(io, inode, bounce_page ?: page, bh);
|
||||
if (ret) {
|
||||
/*
|
||||
* We only get here on ENOMEM. Not much else
|
||||
* we can do but mark the page as dirty, and
|
||||
* better luck next time.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
io_submit_add_bh(io, inode,
|
||||
bounce_page ? bounce_page : page, bh);
|
||||
nr_submitted++;
|
||||
clear_buffer_dirty(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
|
||||
/* Error stopped previous loop? Clean up buffers... */
|
||||
if (ret) {
|
||||
out:
|
||||
fscrypt_free_bounce_page(bounce_page);
|
||||
printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
|
||||
redirty_page_for_writepage(wbc, page);
|
||||
do {
|
||||
clear_buffer_async_write(bh);
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
}
|
||||
unlock:
|
||||
unlock_page(page);
|
||||
/* Nothing submitted - we have to end page writeback */
|
||||
if (!nr_submitted)
|
||||
|
@ -360,10 +360,12 @@ int ext4_mpage_readpages(struct address_space *mapping,
|
||||
if (bio == NULL) {
|
||||
struct bio_post_read_ctx *ctx;
|
||||
|
||||
/*
|
||||
* bio_alloc will _always_ be able to allocate a bio if
|
||||
* __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
|
||||
*/
|
||||
bio = bio_alloc(GFP_KERNEL,
|
||||
min_t(int, nr_pages, BIO_MAX_PAGES));
|
||||
if (!bio)
|
||||
goto set_error_page;
|
||||
ctx = get_bio_post_read_ctx(inode, bio, page->index);
|
||||
if (IS_ERR(ctx)) {
|
||||
bio_put(bio);
|
||||
|
@ -388,28 +388,10 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
|
||||
return bh;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have fewer than thresh credits, extend by EXT4_MAX_TRANS_DATA.
|
||||
* If that fails, restart the transaction & regain write access for the
|
||||
* buffer head which is used for block_bitmap modifications.
|
||||
*/
|
||||
static int extend_or_restart_transaction(handle_t *handle, int thresh)
|
||||
static int ext4_resize_ensure_credits_batch(handle_t *handle, int credits)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (ext4_handle_has_enough_credits(handle, thresh))
|
||||
return 0;
|
||||
|
||||
err = ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA);
|
||||
if (err < 0)
|
||||
return err;
|
||||
if (err) {
|
||||
err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return ext4_journal_ensure_credits_fn(handle, credits,
|
||||
EXT4_MAX_TRANS_DATA, 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -451,8 +433,8 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
|
||||
continue;
|
||||
}
|
||||
|
||||
err = extend_or_restart_transaction(handle, 1);
|
||||
if (err)
|
||||
err = ext4_resize_ensure_credits_batch(handle, 1);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
|
||||
@ -544,8 +526,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
|
||||
struct buffer_head *gdb;
|
||||
|
||||
ext4_debug("update backup group %#04llx\n", block);
|
||||
err = extend_or_restart_transaction(handle, 1);
|
||||
if (err)
|
||||
err = ext4_resize_ensure_credits_batch(handle, 1);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
gdb = sb_getblk(sb, block);
|
||||
@ -602,8 +584,8 @@ handle_bb:
|
||||
|
||||
/* Initialize block bitmap of the @group */
|
||||
block = group_data[i].block_bitmap;
|
||||
err = extend_or_restart_transaction(handle, 1);
|
||||
if (err)
|
||||
err = ext4_resize_ensure_credits_batch(handle, 1);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
bh = bclean(handle, sb, block);
|
||||
@ -631,8 +613,8 @@ handle_ib:
|
||||
|
||||
/* Initialize inode bitmap of the @group */
|
||||
block = group_data[i].inode_bitmap;
|
||||
err = extend_or_restart_transaction(handle, 1);
|
||||
if (err)
|
||||
err = ext4_resize_ensure_credits_batch(handle, 1);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
/* Mark unused entries in inode bitmap used */
|
||||
bh = bclean(handle, sb, block);
|
||||
@ -1109,10 +1091,8 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
|
||||
ext4_fsblk_t backup_block;
|
||||
|
||||
/* Out of journal space, and can't get more - abort - so sad */
|
||||
if (ext4_handle_valid(handle) &&
|
||||
handle->h_buffer_credits == 0 &&
|
||||
ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
|
||||
(err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
|
||||
err = ext4_resize_ensure_credits_batch(handle, 1);
|
||||
if (err < 0)
|
||||
break;
|
||||
|
||||
if (meta_bg == 0)
|
||||
|
@ -1172,9 +1172,9 @@ void ext4_clear_inode(struct inode *inode)
|
||||
{
|
||||
invalidate_inode_buffers(inode);
|
||||
clear_inode(inode);
|
||||
dquot_drop(inode);
|
||||
ext4_discard_preallocations(inode);
|
||||
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
|
||||
dquot_drop(inode);
|
||||
if (EXT4_I(inode)->jinode) {
|
||||
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
|
||||
EXT4_I(inode)->jinode);
|
||||
@ -1388,7 +1388,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
|
||||
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
|
||||
unsigned int flags);
|
||||
static int ext4_enable_quotas(struct super_block *sb);
|
||||
static int ext4_get_next_id(struct super_block *sb, struct kqid *qid);
|
||||
|
||||
static struct dquot **ext4_get_dquots(struct inode *inode)
|
||||
{
|
||||
@ -1406,7 +1405,7 @@ static const struct dquot_operations ext4_quota_operations = {
|
||||
.destroy_dquot = dquot_destroy,
|
||||
.get_projid = ext4_get_projid,
|
||||
.get_inode_usage = ext4_get_inode_usage,
|
||||
.get_next_id = ext4_get_next_id,
|
||||
.get_next_id = dquot_get_next_id,
|
||||
};
|
||||
|
||||
static const struct quotactl_ops ext4_qctl_operations = {
|
||||
@ -2065,7 +2064,7 @@ static int parse_options(char *options, struct super_block *sb,
|
||||
unsigned int *journal_ioprio,
|
||||
int is_remount)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_sb_info __maybe_unused *sbi = EXT4_SB(sb);
|
||||
char *p, __maybe_unused *usr_qf_name, __maybe_unused *grp_qf_name;
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
int token;
|
||||
@ -2119,16 +2118,6 @@ static int parse_options(char *options, struct super_block *sb,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (test_opt(sb, DIOREAD_NOLOCK)) {
|
||||
int blocksize =
|
||||
BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
|
||||
|
||||
if (blocksize < PAGE_SIZE) {
|
||||
ext4_msg(sb, KERN_ERR, "can't mount with "
|
||||
"dioread_nolock if block size != PAGE_SIZE");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -3569,12 +3558,15 @@ static void ext4_clamp_want_extra_isize(struct super_block *sb)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_super_block *es = sbi->s_es;
|
||||
unsigned def_extra_isize = sizeof(struct ext4_inode) -
|
||||
EXT4_GOOD_OLD_INODE_SIZE;
|
||||
|
||||
/* determine the minimum size of new large inodes, if present */
|
||||
if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE &&
|
||||
sbi->s_want_extra_isize == 0) {
|
||||
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
|
||||
EXT4_GOOD_OLD_INODE_SIZE;
|
||||
if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) {
|
||||
sbi->s_want_extra_isize = 0;
|
||||
return;
|
||||
}
|
||||
if (sbi->s_want_extra_isize < 4) {
|
||||
sbi->s_want_extra_isize = def_extra_isize;
|
||||
if (ext4_has_feature_extra_isize(sb)) {
|
||||
if (sbi->s_want_extra_isize <
|
||||
le16_to_cpu(es->s_want_extra_isize))
|
||||
@ -3587,10 +3579,10 @@ static void ext4_clamp_want_extra_isize(struct super_block *sb)
|
||||
}
|
||||
}
|
||||
/* Check if enough inode space is available */
|
||||
if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
|
||||
sbi->s_inode_size) {
|
||||
sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
|
||||
EXT4_GOOD_OLD_INODE_SIZE;
|
||||
if ((sbi->s_want_extra_isize > sbi->s_inode_size) ||
|
||||
(EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
|
||||
sbi->s_inode_size)) {
|
||||
sbi->s_want_extra_isize = def_extra_isize;
|
||||
ext4_msg(sb, KERN_INFO,
|
||||
"required extra inode space not available");
|
||||
}
|
||||
@ -4453,13 +4445,6 @@ no_journal:
|
||||
}
|
||||
}
|
||||
|
||||
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
|
||||
(blocksize != PAGE_SIZE)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Unsupported blocksize for fs encryption");
|
||||
goto failed_mount_wq;
|
||||
}
|
||||
|
||||
if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) {
|
||||
ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity");
|
||||
goto failed_mount_wq;
|
||||
@ -6033,18 +6018,6 @@ out:
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static int ext4_get_next_id(struct super_block *sb, struct kqid *qid)
|
||||
{
|
||||
const struct quota_format_ops *ops;
|
||||
|
||||
if (!sb_has_quota_loaded(sb, qid->type))
|
||||
return -ESRCH;
|
||||
ops = sb_dqopt(sb)->ops[qid->type];
|
||||
if (!ops || !ops->get_next_id)
|
||||
return -ENOSYS;
|
||||
return dquot_get_next_id(sb, qid);
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
|
||||
|
@ -967,55 +967,6 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
|
||||
return credits;
|
||||
}
|
||||
|
||||
static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
|
||||
int credits, struct buffer_head *bh,
|
||||
bool dirty, bool block_csum)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!ext4_handle_valid(handle))
|
||||
return 0;
|
||||
|
||||
if (handle->h_buffer_credits >= credits)
|
||||
return 0;
|
||||
|
||||
error = ext4_journal_extend(handle, credits - handle->h_buffer_credits);
|
||||
if (!error)
|
||||
return 0;
|
||||
if (error < 0) {
|
||||
ext4_warning(inode->i_sb, "Extend journal (error %d)", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if (bh && dirty) {
|
||||
if (block_csum)
|
||||
ext4_xattr_block_csum_set(inode, bh);
|
||||
error = ext4_handle_dirty_metadata(handle, NULL, bh);
|
||||
if (error) {
|
||||
ext4_warning(inode->i_sb, "Handle metadata (error %d)",
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
|
||||
error = ext4_journal_restart(handle, credits);
|
||||
if (error) {
|
||||
ext4_warning(inode->i_sb, "Restart journal (error %d)", error);
|
||||
return error;
|
||||
}
|
||||
|
||||
if (bh) {
|
||||
error = ext4_journal_get_write_access(handle, bh);
|
||||
if (error) {
|
||||
ext4_warning(inode->i_sb,
|
||||
"Get write access failed (error %d)",
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
|
||||
int ref_change)
|
||||
{
|
||||
@ -1149,6 +1100,24 @@ cleanup:
|
||||
return saved_err;
|
||||
}
|
||||
|
||||
static int ext4_xattr_restart_fn(handle_t *handle, struct inode *inode,
|
||||
struct buffer_head *bh, bool block_csum, bool dirty)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (bh && dirty) {
|
||||
if (block_csum)
|
||||
ext4_xattr_block_csum_set(inode, bh);
|
||||
error = ext4_handle_dirty_metadata(handle, NULL, bh);
|
||||
if (error) {
|
||||
ext4_warning(inode->i_sb, "Handle metadata (error %d)",
|
||||
error);
|
||||
return error;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
|
||||
struct buffer_head *bh,
|
||||
@ -1185,13 +1154,24 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
|
||||
continue;
|
||||
}
|
||||
|
||||
err = ext4_xattr_ensure_credits(handle, parent, credits, bh,
|
||||
dirty, block_csum);
|
||||
if (err) {
|
||||
err = ext4_journal_ensure_credits_fn(handle, credits, credits,
|
||||
ext4_free_metadata_revoke_credits(parent->i_sb, 1),
|
||||
ext4_xattr_restart_fn(handle, parent, bh, block_csum,
|
||||
dirty));
|
||||
if (err < 0) {
|
||||
ext4_warning_inode(ea_inode, "Ensure credits err=%d",
|
||||
err);
|
||||
continue;
|
||||
}
|
||||
if (err > 0) {
|
||||
err = ext4_journal_get_write_access(handle, bh);
|
||||
if (err) {
|
||||
ext4_warning_inode(ea_inode,
|
||||
"Re-get write access err=%d",
|
||||
err);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
err = ext4_xattr_inode_dec_ref(handle, ea_inode);
|
||||
if (err) {
|
||||
@ -2335,7 +2315,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
|
||||
flags & XATTR_CREATE);
|
||||
brelse(bh);
|
||||
|
||||
if (!ext4_handle_has_enough_credits(handle, credits)) {
|
||||
if (jbd2_handle_buffer_credits(handle) < credits) {
|
||||
error = -ENOSPC;
|
||||
goto cleanup;
|
||||
}
|
||||
@ -2862,11 +2842,9 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
|
||||
struct inode *ea_inode;
|
||||
int error;
|
||||
|
||||
error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
|
||||
NULL /* bh */,
|
||||
false /* dirty */,
|
||||
false /* block_csum */);
|
||||
if (error) {
|
||||
error = ext4_journal_ensure_credits(handle, extra_credits,
|
||||
ext4_free_metadata_revoke_credits(inode->i_sb, 1));
|
||||
if (error < 0) {
|
||||
EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
|
||||
goto cleanup;
|
||||
}
|
||||
|
@ -110,7 +110,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
|
||||
int nblocks, space_left;
|
||||
/* assert_spin_locked(&journal->j_state_lock); */
|
||||
|
||||
nblocks = jbd2_space_needed(journal);
|
||||
nblocks = journal->j_max_transaction_buffers;
|
||||
while (jbd2_log_space_left(journal) < nblocks) {
|
||||
write_unlock(&journal->j_state_lock);
|
||||
mutex_lock_io(&journal->j_checkpoint_mutex);
|
||||
|
@ -482,10 +482,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||
if (jh->b_committed_data) {
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
jbd2_free(jh->b_committed_data, bh->b_size);
|
||||
jh->b_committed_data = NULL;
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
}
|
||||
jbd2_journal_refile_buffer(journal, jh);
|
||||
}
|
||||
@ -560,8 +560,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||
stats.run.rs_logging = jiffies;
|
||||
stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
|
||||
stats.run.rs_logging);
|
||||
stats.run.rs_blocks =
|
||||
atomic_read(&commit_transaction->t_outstanding_credits);
|
||||
stats.run.rs_blocks = commit_transaction->t_nr_buffers;
|
||||
stats.run.rs_blocks_logged = 0;
|
||||
|
||||
J_ASSERT(commit_transaction->t_nr_buffers <=
|
||||
@ -642,8 +641,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
|
||||
|
||||
/*
|
||||
* start_this_handle() uses t_outstanding_credits to determine
|
||||
* the free space in the log, but this counter is changed
|
||||
* by jbd2_journal_next_log_block() also.
|
||||
* the free space in the log.
|
||||
*/
|
||||
atomic_dec(&commit_transaction->t_outstanding_credits);
|
||||
|
||||
@ -727,7 +725,6 @@ start_journal_io:
|
||||
submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
|
||||
}
|
||||
cond_resched();
|
||||
stats.run.rs_blocks_logged += bufs;
|
||||
|
||||
/* Force a new descriptor to be generated next
|
||||
time round the loop. */
|
||||
@ -814,6 +811,7 @@ start_journal_io:
|
||||
if (unlikely(!buffer_uptodate(bh)))
|
||||
err = -EIO;
|
||||
jbd2_unfile_log_bh(bh);
|
||||
stats.run.rs_blocks_logged++;
|
||||
|
||||
/*
|
||||
* The list contains temporary buffer heads created by
|
||||
@ -859,6 +857,7 @@ start_journal_io:
|
||||
BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
|
||||
clear_buffer_jwrite(bh);
|
||||
jbd2_unfile_log_bh(bh);
|
||||
stats.run.rs_blocks_logged++;
|
||||
__brelse(bh); /* One for getblk */
|
||||
/* AKPM: bforget here */
|
||||
}
|
||||
@ -880,6 +879,7 @@ start_journal_io:
|
||||
}
|
||||
if (cbh)
|
||||
err = journal_wait_on_commit_record(journal, cbh);
|
||||
stats.run.rs_blocks_logged++;
|
||||
if (jbd2_has_feature_async_commit(journal) &&
|
||||
journal->j_flags & JBD2_BARRIER) {
|
||||
blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
|
||||
@ -888,6 +888,9 @@ start_journal_io:
|
||||
if (err)
|
||||
jbd2_journal_abort(journal, err);
|
||||
|
||||
WARN_ON_ONCE(
|
||||
atomic_read(&commit_transaction->t_outstanding_credits) < 0);
|
||||
|
||||
/*
|
||||
* Now disk caches for filesystem device are flushed so we are safe to
|
||||
* erase checkpointed transactions from the log by updating journal
|
||||
@ -918,6 +921,7 @@ restart_loop:
|
||||
transaction_t *cp_transaction;
|
||||
struct buffer_head *bh;
|
||||
int try_to_free = 0;
|
||||
bool drop_ref;
|
||||
|
||||
jh = commit_transaction->t_forget;
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
@ -927,7 +931,7 @@ restart_loop:
|
||||
* done with it.
|
||||
*/
|
||||
get_bh(bh);
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
|
||||
|
||||
/*
|
||||
@ -1022,8 +1026,10 @@ restart_loop:
|
||||
try_to_free = 1;
|
||||
}
|
||||
JBUFFER_TRACE(jh, "refile or unfile buffer");
|
||||
__jbd2_journal_refile_buffer(jh);
|
||||
jbd_unlock_bh_state(bh);
|
||||
drop_ref = __jbd2_journal_refile_buffer(jh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
if (drop_ref)
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
if (try_to_free)
|
||||
release_buffer_page(bh); /* Drops bh reference */
|
||||
else
|
||||
|
@ -363,7 +363,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
|
||||
/* keep subsequent assertions sane */
|
||||
atomic_set(&new_bh->b_count, 1);
|
||||
|
||||
jbd_lock_bh_state(bh_in);
|
||||
spin_lock(&jh_in->b_state_lock);
|
||||
repeat:
|
||||
/*
|
||||
* If a new transaction has already done a buffer copy-out, then
|
||||
@ -405,13 +405,13 @@ repeat:
|
||||
if (need_copy_out && !done_copy_out) {
|
||||
char *tmp;
|
||||
|
||||
jbd_unlock_bh_state(bh_in);
|
||||
spin_unlock(&jh_in->b_state_lock);
|
||||
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
|
||||
if (!tmp) {
|
||||
brelse(new_bh);
|
||||
return -ENOMEM;
|
||||
}
|
||||
jbd_lock_bh_state(bh_in);
|
||||
spin_lock(&jh_in->b_state_lock);
|
||||
if (jh_in->b_frozen_data) {
|
||||
jbd2_free(tmp, bh_in->b_size);
|
||||
goto repeat;
|
||||
@ -464,7 +464,7 @@ repeat:
|
||||
__jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
set_buffer_shadow(bh_in);
|
||||
jbd_unlock_bh_state(bh_in);
|
||||
spin_unlock(&jh_in->b_state_lock);
|
||||
|
||||
return do_escape | (done_copy_out << 1);
|
||||
}
|
||||
@ -840,6 +840,7 @@ jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
|
||||
bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
|
||||
if (!bh)
|
||||
return NULL;
|
||||
atomic_dec(&transaction->t_outstanding_credits);
|
||||
lock_buffer(bh);
|
||||
memset(bh->b_data, 0, journal->j_blocksize);
|
||||
header = (journal_header_t *)bh->b_data;
|
||||
@ -1098,6 +1099,16 @@ static void jbd2_stats_proc_exit(journal_t *journal)
|
||||
remove_proc_entry(journal->j_devname, proc_jbd2_stats);
|
||||
}
|
||||
|
||||
/* Minimum size of descriptor tag */
|
||||
static int jbd2_min_tag_size(void)
|
||||
{
|
||||
/*
|
||||
* Tag with 32-bit block numbers does not use last four bytes of the
|
||||
* structure
|
||||
*/
|
||||
return sizeof(journal_block_tag_t) - 4;
|
||||
}
|
||||
|
||||
/*
|
||||
* Management for journal control blocks: functions to create and
|
||||
* destroy journal_t structures, and to initialise and read existing
|
||||
@ -1156,7 +1167,8 @@ static journal_t *journal_init_common(struct block_device *bdev,
|
||||
journal->j_fs_dev = fs_dev;
|
||||
journal->j_blk_offset = start;
|
||||
journal->j_maxlen = len;
|
||||
n = journal->j_blocksize / sizeof(journal_block_tag_t);
|
||||
/* We need enough buffers to write out full descriptor block. */
|
||||
n = journal->j_blocksize / jbd2_min_tag_size();
|
||||
journal->j_wbufsize = n;
|
||||
journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
|
||||
GFP_KERNEL);
|
||||
@ -1488,6 +1500,21 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
|
||||
|
||||
static int journal_revoke_records_per_block(journal_t *journal)
|
||||
{
|
||||
int record_size;
|
||||
int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
|
||||
|
||||
if (jbd2_has_feature_64bit(journal))
|
||||
record_size = 8;
|
||||
else
|
||||
record_size = 4;
|
||||
|
||||
if (jbd2_journal_has_csum_v2or3(journal))
|
||||
space -= sizeof(struct jbd2_journal_block_tail);
|
||||
return space / record_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the superblock for a given journal, performing initial
|
||||
* validation of the format.
|
||||
@ -1596,6 +1623,8 @@ static int journal_get_superblock(journal_t *journal)
|
||||
sizeof(sb->s_uuid));
|
||||
}
|
||||
|
||||
journal->j_revoke_records_per_block =
|
||||
journal_revoke_records_per_block(journal);
|
||||
set_buffer_verified(bh);
|
||||
|
||||
return 0;
|
||||
@ -1916,6 +1945,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
|
||||
sb->s_feature_ro_compat |= cpu_to_be32(ro);
|
||||
sb->s_feature_incompat |= cpu_to_be32(incompat);
|
||||
unlock_buffer(journal->j_sb_buffer);
|
||||
journal->j_revoke_records_per_block =
|
||||
journal_revoke_records_per_block(journal);
|
||||
|
||||
return 1;
|
||||
#undef COMPAT_FEATURE_ON
|
||||
@ -1946,6 +1977,8 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
|
||||
sb->s_feature_compat &= ~cpu_to_be32(compat);
|
||||
sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
|
||||
sb->s_feature_incompat &= ~cpu_to_be32(incompat);
|
||||
journal->j_revoke_records_per_block =
|
||||
journal_revoke_records_per_block(journal);
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_journal_clear_features);
|
||||
|
||||
@ -2410,6 +2443,8 @@ static struct journal_head *journal_alloc_journal_head(void)
|
||||
ret = kmem_cache_zalloc(jbd2_journal_head_cache,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
}
|
||||
if (ret)
|
||||
spin_lock_init(&ret->b_state_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2529,17 +2564,23 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
|
||||
J_ASSERT_BH(bh, buffer_jbd(bh));
|
||||
J_ASSERT_BH(bh, jh2bh(jh) == bh);
|
||||
BUFFER_TRACE(bh, "remove journal_head");
|
||||
if (jh->b_frozen_data) {
|
||||
printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
|
||||
jbd2_free(jh->b_frozen_data, bh->b_size);
|
||||
}
|
||||
if (jh->b_committed_data) {
|
||||
printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
|
||||
jbd2_free(jh->b_committed_data, bh->b_size);
|
||||
}
|
||||
|
||||
/* Unlink before dropping the lock */
|
||||
bh->b_private = NULL;
|
||||
jh->b_bh = NULL; /* debug, really */
|
||||
clear_buffer_jbd(bh);
|
||||
}
|
||||
|
||||
static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
|
||||
{
|
||||
if (jh->b_frozen_data) {
|
||||
printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
|
||||
jbd2_free(jh->b_frozen_data, b_size);
|
||||
}
|
||||
if (jh->b_committed_data) {
|
||||
printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
|
||||
jbd2_free(jh->b_committed_data, b_size);
|
||||
}
|
||||
journal_free_journal_head(jh);
|
||||
}
|
||||
|
||||
@ -2557,9 +2598,11 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
|
||||
if (!jh->b_jcount) {
|
||||
__journal_remove_journal_head(bh);
|
||||
jbd_unlock_bh_journal_head(bh);
|
||||
journal_release_journal_head(jh, bh->b_size);
|
||||
__brelse(bh);
|
||||
} else
|
||||
} else {
|
||||
jbd_unlock_bh_journal_head(bh);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -371,6 +371,11 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
|
||||
}
|
||||
#endif
|
||||
|
||||
if (WARN_ON_ONCE(handle->h_revoke_credits <= 0)) {
|
||||
if (!bh_in)
|
||||
brelse(bh);
|
||||
return -EIO;
|
||||
}
|
||||
/* We really ought not ever to revoke twice in a row without
|
||||
first having the revoke cancelled: it's illegal to free a
|
||||
block twice without allocating it in between! */
|
||||
@ -391,6 +396,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
|
||||
__brelse(bh);
|
||||
}
|
||||
}
|
||||
handle->h_revoke_credits--;
|
||||
|
||||
jbd_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in);
|
||||
err = insert_revoke_hash(journal, blocknr,
|
||||
|
@ -62,6 +62,28 @@ void jbd2_journal_free_transaction(transaction_t *transaction)
|
||||
kmem_cache_free(transaction_cache, transaction);
|
||||
}
|
||||
|
||||
/*
|
||||
* Base amount of descriptor blocks we reserve for each transaction.
|
||||
*/
|
||||
static int jbd2_descriptor_blocks_per_trans(journal_t *journal)
|
||||
{
|
||||
int tag_space = journal->j_blocksize - sizeof(journal_header_t);
|
||||
int tags_per_block;
|
||||
|
||||
/* Subtract UUID */
|
||||
tag_space -= 16;
|
||||
if (jbd2_journal_has_csum_v2or3(journal))
|
||||
tag_space -= sizeof(struct jbd2_journal_block_tail);
|
||||
/* Commit code leaves a slack space of 16 bytes at the end of block */
|
||||
tags_per_block = (tag_space - 16) / journal_tag_bytes(journal);
|
||||
/*
|
||||
* Revoke descriptors are accounted separately so we need to reserve
|
||||
* space for commit block and normal transaction descriptor blocks.
|
||||
*/
|
||||
return 1 + DIV_ROUND_UP(journal->j_max_transaction_buffers,
|
||||
tags_per_block);
|
||||
}
|
||||
|
||||
/*
|
||||
* jbd2_get_transaction: obtain a new transaction_t object.
|
||||
*
|
||||
@ -88,7 +110,9 @@ static void jbd2_get_transaction(journal_t *journal,
|
||||
spin_lock_init(&transaction->t_handle_lock);
|
||||
atomic_set(&transaction->t_updates, 0);
|
||||
atomic_set(&transaction->t_outstanding_credits,
|
||||
jbd2_descriptor_blocks_per_trans(journal) +
|
||||
atomic_read(&journal->j_reserved_credits));
|
||||
atomic_set(&transaction->t_outstanding_revokes, 0);
|
||||
atomic_set(&transaction->t_handle_count, 0);
|
||||
INIT_LIST_HEAD(&transaction->t_inode_list);
|
||||
INIT_LIST_HEAD(&transaction->t_private_list);
|
||||
@ -258,12 +282,13 @@ static int add_transaction_credits(journal_t *journal, int blocks,
|
||||
* *before* starting to dirty potentially checkpointed buffers
|
||||
* in the new transaction.
|
||||
*/
|
||||
if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
|
||||
if (jbd2_log_space_left(journal) < journal->j_max_transaction_buffers) {
|
||||
atomic_sub(total, &t->t_outstanding_credits);
|
||||
read_unlock(&journal->j_state_lock);
|
||||
jbd2_might_wait_for_commit(journal);
|
||||
write_lock(&journal->j_state_lock);
|
||||
if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
|
||||
if (jbd2_log_space_left(journal) <
|
||||
journal->j_max_transaction_buffers)
|
||||
__jbd2_log_wait_for_space(journal);
|
||||
write_unlock(&journal->j_state_lock);
|
||||
return 1;
|
||||
@ -299,12 +324,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
transaction_t *transaction, *new_transaction = NULL;
|
||||
int blocks = handle->h_buffer_credits;
|
||||
int blocks = handle->h_total_credits;
|
||||
int rsv_blocks = 0;
|
||||
unsigned long ts = jiffies;
|
||||
|
||||
if (handle->h_rsv_handle)
|
||||
rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
|
||||
rsv_blocks = handle->h_rsv_handle->h_total_credits;
|
||||
|
||||
/*
|
||||
* Limit the number of reserved credits to 1/2 of maximum transaction
|
||||
@ -405,6 +430,7 @@ repeat:
|
||||
update_t_max_wait(transaction, ts);
|
||||
handle->h_transaction = transaction;
|
||||
handle->h_requested_credits = blocks;
|
||||
handle->h_revoke_credits_requested = handle->h_revoke_credits;
|
||||
handle->h_start_jiffies = jiffies;
|
||||
atomic_inc(&transaction->t_updates);
|
||||
atomic_inc(&transaction->t_handle_count);
|
||||
@ -431,15 +457,15 @@ static handle_t *new_handle(int nblocks)
|
||||
handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
|
||||
if (!handle)
|
||||
return NULL;
|
||||
handle->h_buffer_credits = nblocks;
|
||||
handle->h_total_credits = nblocks;
|
||||
handle->h_ref = 1;
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
|
||||
gfp_t gfp_mask, unsigned int type,
|
||||
unsigned int line_no)
|
||||
int revoke_records, gfp_t gfp_mask,
|
||||
unsigned int type, unsigned int line_no)
|
||||
{
|
||||
handle_t *handle = journal_current_handle();
|
||||
int err;
|
||||
@ -453,6 +479,8 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
|
||||
return handle;
|
||||
}
|
||||
|
||||
nblocks += DIV_ROUND_UP(revoke_records,
|
||||
journal->j_revoke_records_per_block);
|
||||
handle = new_handle(nblocks);
|
||||
if (!handle)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@ -468,6 +496,7 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
|
||||
rsv_handle->h_journal = journal;
|
||||
handle->h_rsv_handle = rsv_handle;
|
||||
}
|
||||
handle->h_revoke_credits = revoke_records;
|
||||
|
||||
err = start_this_handle(journal, handle, gfp_mask);
|
||||
if (err < 0) {
|
||||
@ -508,16 +537,21 @@ EXPORT_SYMBOL(jbd2__journal_start);
|
||||
*/
|
||||
handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
|
||||
{
|
||||
return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0);
|
||||
return jbd2__journal_start(journal, nblocks, 0, 0, GFP_NOFS, 0, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_journal_start);
|
||||
|
||||
void jbd2_journal_free_reserved(handle_t *handle)
|
||||
static void __jbd2_journal_unreserve_handle(handle_t *handle)
|
||||
{
|
||||
journal_t *journal = handle->h_journal;
|
||||
|
||||
WARN_ON(!handle->h_reserved);
|
||||
sub_reserved_credits(journal, handle->h_buffer_credits);
|
||||
sub_reserved_credits(journal, handle->h_total_credits);
|
||||
}
|
||||
|
||||
void jbd2_journal_free_reserved(handle_t *handle)
|
||||
{
|
||||
__jbd2_journal_unreserve_handle(handle);
|
||||
jbd2_free_handle(handle);
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_journal_free_reserved);
|
||||
@ -571,7 +605,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
|
||||
handle->h_line_no = line_no;
|
||||
trace_jbd2_handle_start(journal->j_fs_dev->bd_dev,
|
||||
handle->h_transaction->t_tid, type,
|
||||
line_no, handle->h_buffer_credits);
|
||||
line_no, handle->h_total_credits);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_journal_start_reserved);
|
||||
@ -580,6 +614,7 @@ EXPORT_SYMBOL(jbd2_journal_start_reserved);
|
||||
* int jbd2_journal_extend() - extend buffer credits.
|
||||
* @handle: handle to 'extend'
|
||||
* @nblocks: nr blocks to try to extend by.
|
||||
* @revoke_records: number of revoke records to try to extend by.
|
||||
*
|
||||
* Some transactions, such as large extends and truncates, can be done
|
||||
* atomically all at once or in several stages. The operation requests
|
||||
@ -596,7 +631,7 @@ EXPORT_SYMBOL(jbd2_journal_start_reserved);
|
||||
* return code < 0 implies an error
|
||||
* return code > 0 implies normal transaction-full status.
|
||||
*/
|
||||
int jbd2_journal_extend(handle_t *handle, int nblocks)
|
||||
int jbd2_journal_extend(handle_t *handle, int nblocks, int revoke_records)
|
||||
{
|
||||
transaction_t *transaction = handle->h_transaction;
|
||||
journal_t *journal;
|
||||
@ -618,6 +653,12 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
|
||||
goto error_out;
|
||||
}
|
||||
|
||||
nblocks += DIV_ROUND_UP(
|
||||
handle->h_revoke_credits_requested + revoke_records,
|
||||
journal->j_revoke_records_per_block) -
|
||||
DIV_ROUND_UP(
|
||||
handle->h_revoke_credits_requested,
|
||||
journal->j_revoke_records_per_block);
|
||||
spin_lock(&transaction->t_handle_lock);
|
||||
wanted = atomic_add_return(nblocks,
|
||||
&transaction->t_outstanding_credits);
|
||||
@ -629,22 +670,16 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) >
|
||||
jbd2_log_space_left(journal)) {
|
||||
jbd_debug(3, "denied handle %p %d blocks: "
|
||||
"insufficient log space\n", handle, nblocks);
|
||||
atomic_sub(nblocks, &transaction->t_outstanding_credits);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev,
|
||||
transaction->t_tid,
|
||||
handle->h_type, handle->h_line_no,
|
||||
handle->h_buffer_credits,
|
||||
handle->h_total_credits,
|
||||
nblocks);
|
||||
|
||||
handle->h_buffer_credits += nblocks;
|
||||
handle->h_total_credits += nblocks;
|
||||
handle->h_requested_credits += nblocks;
|
||||
handle->h_revoke_credits += revoke_records;
|
||||
handle->h_revoke_credits_requested += revoke_records;
|
||||
result = 0;
|
||||
|
||||
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
|
||||
@ -655,11 +690,55 @@ error_out:
|
||||
return result;
|
||||
}
|
||||
|
||||
static void stop_this_handle(handle_t *handle)
|
||||
{
|
||||
transaction_t *transaction = handle->h_transaction;
|
||||
journal_t *journal = transaction->t_journal;
|
||||
int revokes;
|
||||
|
||||
J_ASSERT(journal_current_handle() == handle);
|
||||
J_ASSERT(atomic_read(&transaction->t_updates) > 0);
|
||||
current->journal_info = NULL;
|
||||
/*
|
||||
* Subtract necessary revoke descriptor blocks from handle credits. We
|
||||
* take care to account only for revoke descriptor blocks the
|
||||
* transaction will really need as large sequences of transactions with
|
||||
* small numbers of revokes are relatively common.
|
||||
*/
|
||||
revokes = handle->h_revoke_credits_requested - handle->h_revoke_credits;
|
||||
if (revokes) {
|
||||
int t_revokes, revoke_descriptors;
|
||||
int rr_per_blk = journal->j_revoke_records_per_block;
|
||||
|
||||
WARN_ON_ONCE(DIV_ROUND_UP(revokes, rr_per_blk)
|
||||
> handle->h_total_credits);
|
||||
t_revokes = atomic_add_return(revokes,
|
||||
&transaction->t_outstanding_revokes);
|
||||
revoke_descriptors =
|
||||
DIV_ROUND_UP(t_revokes, rr_per_blk) -
|
||||
DIV_ROUND_UP(t_revokes - revokes, rr_per_blk);
|
||||
handle->h_total_credits -= revoke_descriptors;
|
||||
}
|
||||
atomic_sub(handle->h_total_credits,
|
||||
&transaction->t_outstanding_credits);
|
||||
if (handle->h_rsv_handle)
|
||||
__jbd2_journal_unreserve_handle(handle->h_rsv_handle);
|
||||
if (atomic_dec_and_test(&transaction->t_updates))
|
||||
wake_up(&journal->j_wait_updates);
|
||||
|
||||
rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
|
||||
/*
|
||||
* Scope of the GFP_NOFS context is over here and so we can restore the
|
||||
* original alloc context.
|
||||
*/
|
||||
memalloc_nofs_restore(handle->saved_alloc_context);
|
||||
}
|
||||
|
||||
/**
|
||||
* int jbd2_journal_restart() - restart a handle .
|
||||
* @handle: handle to restart
|
||||
* @nblocks: nr credits requested
|
||||
* @revoke_records: number of revoke record credits requested
|
||||
* @gfp_mask: memory allocation flags (for start_this_handle)
|
||||
*
|
||||
* Restart a handle for a multi-transaction filesystem
|
||||
@ -672,56 +751,48 @@ error_out:
|
||||
* credits. We preserve reserved handle if there's any attached to the
|
||||
* passed in handle.
|
||||
*/
|
||||
int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
|
||||
int jbd2__journal_restart(handle_t *handle, int nblocks, int revoke_records,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
transaction_t *transaction = handle->h_transaction;
|
||||
journal_t *journal;
|
||||
tid_t tid;
|
||||
int need_to_start, ret;
|
||||
int need_to_start;
|
||||
int ret;
|
||||
|
||||
/* If we've had an abort of any type, don't even think about
|
||||
* actually doing the restart! */
|
||||
if (is_handle_aborted(handle))
|
||||
return 0;
|
||||
journal = transaction->t_journal;
|
||||
tid = transaction->t_tid;
|
||||
|
||||
/*
|
||||
* First unlink the handle from its current transaction, and start the
|
||||
* commit on that.
|
||||
*/
|
||||
J_ASSERT(atomic_read(&transaction->t_updates) > 0);
|
||||
J_ASSERT(journal_current_handle() == handle);
|
||||
|
||||
read_lock(&journal->j_state_lock);
|
||||
spin_lock(&transaction->t_handle_lock);
|
||||
atomic_sub(handle->h_buffer_credits,
|
||||
&transaction->t_outstanding_credits);
|
||||
if (handle->h_rsv_handle) {
|
||||
sub_reserved_credits(journal,
|
||||
handle->h_rsv_handle->h_buffer_credits);
|
||||
}
|
||||
if (atomic_dec_and_test(&transaction->t_updates))
|
||||
wake_up(&journal->j_wait_updates);
|
||||
tid = transaction->t_tid;
|
||||
spin_unlock(&transaction->t_handle_lock);
|
||||
handle->h_transaction = NULL;
|
||||
current->journal_info = NULL;
|
||||
|
||||
jbd_debug(2, "restarting handle %p\n", handle);
|
||||
stop_this_handle(handle);
|
||||
handle->h_transaction = NULL;
|
||||
|
||||
/*
|
||||
* TODO: If we use READ_ONCE / WRITE_ONCE for j_commit_request we can
|
||||
* get rid of pointless j_state_lock traffic like this.
|
||||
*/
|
||||
read_lock(&journal->j_state_lock);
|
||||
need_to_start = !tid_geq(journal->j_commit_request, tid);
|
||||
read_unlock(&journal->j_state_lock);
|
||||
if (need_to_start)
|
||||
jbd2_log_start_commit(journal, tid);
|
||||
|
||||
rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
|
||||
handle->h_buffer_credits = nblocks;
|
||||
/*
|
||||
* Restore the original nofs context because the journal restart
|
||||
* is basically the same thing as journal stop and start.
|
||||
* start_this_handle will start a new nofs context.
|
||||
*/
|
||||
memalloc_nofs_restore(handle->saved_alloc_context);
|
||||
handle->h_total_credits = nblocks +
|
||||
DIV_ROUND_UP(revoke_records,
|
||||
journal->j_revoke_records_per_block);
|
||||
handle->h_revoke_credits = revoke_records;
|
||||
ret = start_this_handle(journal, handle, gfp_mask);
|
||||
trace_jbd2_handle_restart(journal->j_fs_dev->bd_dev,
|
||||
ret ? 0 : handle->h_transaction->t_tid,
|
||||
handle->h_type, handle->h_line_no,
|
||||
handle->h_total_credits);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2__journal_restart);
|
||||
@ -729,7 +800,7 @@ EXPORT_SYMBOL(jbd2__journal_restart);
|
||||
|
||||
int jbd2_journal_restart(handle_t *handle, int nblocks)
|
||||
{
|
||||
return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
|
||||
return jbd2__journal_restart(handle, nblocks, 0, GFP_NOFS);
|
||||
}
|
||||
EXPORT_SYMBOL(jbd2_journal_restart);
|
||||
|
||||
@ -879,7 +950,7 @@ repeat:
|
||||
|
||||
start_lock = jiffies;
|
||||
lock_buffer(bh);
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
|
||||
/* If it takes too long to lock the buffer, trace it */
|
||||
time_lock = jbd2_time_diff(start_lock, jiffies);
|
||||
@ -929,7 +1000,7 @@ repeat:
|
||||
|
||||
error = -EROFS;
|
||||
if (is_handle_aborted(handle)) {
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
goto out;
|
||||
}
|
||||
error = 0;
|
||||
@ -993,7 +1064,7 @@ repeat:
|
||||
*/
|
||||
if (buffer_shadow(bh)) {
|
||||
JBUFFER_TRACE(jh, "on shadow: sleep");
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE);
|
||||
goto repeat;
|
||||
}
|
||||
@ -1014,7 +1085,7 @@ repeat:
|
||||
JBUFFER_TRACE(jh, "generate frozen data");
|
||||
if (!frozen_buffer) {
|
||||
JBUFFER_TRACE(jh, "allocate memory for buffer");
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
goto repeat;
|
||||
@ -1033,7 +1104,7 @@ attach_next:
|
||||
jh->b_next_transaction = transaction;
|
||||
|
||||
done:
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
|
||||
/*
|
||||
* If we are about to journal a buffer, then any revoke pending on it is
|
||||
@ -1172,7 +1243,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
|
||||
* that case: the transaction must have deleted the buffer for it to be
|
||||
* reused here.
|
||||
*/
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
|
||||
jh->b_transaction == NULL ||
|
||||
(jh->b_transaction == journal->j_committing_transaction &&
|
||||
@ -1207,7 +1278,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
|
||||
jh->b_next_transaction = transaction;
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
}
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
|
||||
/*
|
||||
* akpm: I added this. ext3_alloc_branch can pick up new indirect
|
||||
@ -1275,13 +1346,13 @@ repeat:
|
||||
committed_data = jbd2_alloc(jh2bh(jh)->b_size,
|
||||
GFP_NOFS|__GFP_NOFAIL);
|
||||
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
if (!jh->b_committed_data) {
|
||||
/* Copy out the current buffer contents into the
|
||||
* preserved, committed copy. */
|
||||
JBUFFER_TRACE(jh, "generate b_committed data");
|
||||
if (!committed_data) {
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
goto repeat;
|
||||
}
|
||||
|
||||
@ -1289,7 +1360,7 @@ repeat:
|
||||
committed_data = NULL;
|
||||
memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
|
||||
}
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
out:
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
if (unlikely(committed_data))
|
||||
@ -1390,16 +1461,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
|
||||
*/
|
||||
if (jh->b_transaction != transaction &&
|
||||
jh->b_next_transaction != transaction) {
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
J_ASSERT_JH(jh, jh->b_transaction == transaction ||
|
||||
jh->b_next_transaction == transaction);
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
}
|
||||
if (jh->b_modified == 1) {
|
||||
/* If it's in our transaction it must be in BJ_Metadata list. */
|
||||
if (jh->b_transaction == transaction &&
|
||||
jh->b_jlist != BJ_Metadata) {
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
if (jh->b_transaction == transaction &&
|
||||
jh->b_jlist != BJ_Metadata)
|
||||
pr_err("JBD2: assertion failure: h_type=%u "
|
||||
@ -1409,13 +1480,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
|
||||
jh->b_jlist);
|
||||
J_ASSERT_JH(jh, jh->b_transaction != transaction ||
|
||||
jh->b_jlist == BJ_Metadata);
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
journal = transaction->t_journal;
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
|
||||
if (jh->b_modified == 0) {
|
||||
/*
|
||||
@ -1423,12 +1494,12 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
|
||||
* of the transaction. This needs to be done
|
||||
* once a transaction -bzzz
|
||||
*/
|
||||
if (handle->h_buffer_credits <= 0) {
|
||||
if (WARN_ON_ONCE(jbd2_handle_buffer_credits(handle) <= 0)) {
|
||||
ret = -ENOSPC;
|
||||
goto out_unlock_bh;
|
||||
}
|
||||
jh->b_modified = 1;
|
||||
handle->h_buffer_credits--;
|
||||
handle->h_total_credits--;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1501,7 +1572,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
|
||||
__jbd2_journal_file_buffer(jh, transaction, BJ_Metadata);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
out_unlock_bh:
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
out:
|
||||
JBUFFER_TRACE(jh, "exit");
|
||||
return ret;
|
||||
@ -1539,18 +1610,20 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
|
||||
|
||||
BUFFER_TRACE(bh, "entry");
|
||||
|
||||
jbd_lock_bh_state(bh);
|
||||
jh = jbd2_journal_grab_journal_head(bh);
|
||||
if (!jh) {
|
||||
__bforget(bh);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!buffer_jbd(bh))
|
||||
goto not_jbd;
|
||||
jh = bh2jh(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
|
||||
/* Critical error: attempting to delete a bitmap buffer, maybe?
|
||||
* Don't do any jbd operations, and return an error. */
|
||||
if (!J_EXPECT_JH(jh, !jh->b_committed_data,
|
||||
"inconsistent data on disk")) {
|
||||
err = -EIO;
|
||||
goto not_jbd;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/* keep track of whether or not this transaction modified us */
|
||||
@ -1598,10 +1671,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
|
||||
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
|
||||
} else {
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
if (!buffer_jbd(bh)) {
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
goto not_jbd;
|
||||
}
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
}
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
} else if (jh->b_transaction) {
|
||||
@ -1643,7 +1713,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
|
||||
if (!jh->b_cp_transaction) {
|
||||
JBUFFER_TRACE(jh, "belongs to none transaction");
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
goto not_jbd;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1653,7 +1723,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
|
||||
if (!buffer_dirty(bh)) {
|
||||
__jbd2_journal_remove_checkpoint(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
goto not_jbd;
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1666,20 +1736,15 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
|
||||
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
}
|
||||
|
||||
jbd_unlock_bh_state(bh);
|
||||
__brelse(bh);
|
||||
drop:
|
||||
__brelse(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
if (drop_reserve) {
|
||||
/* no need to reserve log space for this block -bzzz */
|
||||
handle->h_buffer_credits++;
|
||||
handle->h_total_credits++;
|
||||
}
|
||||
return err;
|
||||
|
||||
not_jbd:
|
||||
jbd_unlock_bh_state(bh);
|
||||
__bforget(bh);
|
||||
goto drop;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1706,45 +1771,34 @@ int jbd2_journal_stop(handle_t *handle)
|
||||
tid_t tid;
|
||||
pid_t pid;
|
||||
|
||||
if (--handle->h_ref > 0) {
|
||||
jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
|
||||
handle->h_ref);
|
||||
if (is_handle_aborted(handle))
|
||||
return -EIO;
|
||||
return 0;
|
||||
}
|
||||
if (!transaction) {
|
||||
/*
|
||||
* Handle is already detached from the transaction so
|
||||
* there is nothing to do other than decrease a refcount,
|
||||
* or free the handle if refcount drops to zero
|
||||
* Handle is already detached from the transaction so there is
|
||||
* nothing to do other than free the handle.
|
||||
*/
|
||||
if (--handle->h_ref > 0) {
|
||||
jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
|
||||
handle->h_ref);
|
||||
return err;
|
||||
} else {
|
||||
if (handle->h_rsv_handle)
|
||||
jbd2_free_handle(handle->h_rsv_handle);
|
||||
goto free_and_exit;
|
||||
}
|
||||
memalloc_nofs_restore(handle->saved_alloc_context);
|
||||
goto free_and_exit;
|
||||
}
|
||||
journal = transaction->t_journal;
|
||||
|
||||
J_ASSERT(journal_current_handle() == handle);
|
||||
tid = transaction->t_tid;
|
||||
|
||||
if (is_handle_aborted(handle))
|
||||
err = -EIO;
|
||||
else
|
||||
J_ASSERT(atomic_read(&transaction->t_updates) > 0);
|
||||
|
||||
if (--handle->h_ref > 0) {
|
||||
jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
|
||||
handle->h_ref);
|
||||
return err;
|
||||
}
|
||||
|
||||
jbd_debug(4, "Handle %p going down\n", handle);
|
||||
trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev,
|
||||
transaction->t_tid,
|
||||
handle->h_type, handle->h_line_no,
|
||||
tid, handle->h_type, handle->h_line_no,
|
||||
jiffies - handle->h_start_jiffies,
|
||||
handle->h_sync, handle->h_requested_credits,
|
||||
(handle->h_requested_credits -
|
||||
handle->h_buffer_credits));
|
||||
handle->h_total_credits));
|
||||
|
||||
/*
|
||||
* Implement synchronous transaction batching. If the handle
|
||||
@ -1804,19 +1858,13 @@ int jbd2_journal_stop(handle_t *handle)
|
||||
|
||||
if (handle->h_sync)
|
||||
transaction->t_synchronous_commit = 1;
|
||||
current->journal_info = NULL;
|
||||
atomic_sub(handle->h_buffer_credits,
|
||||
&transaction->t_outstanding_credits);
|
||||
|
||||
/*
|
||||
* If the handle is marked SYNC, we need to set another commit
|
||||
* going! We also want to force a commit if the current
|
||||
* transaction is occupying too much of the log, or if the
|
||||
* transaction is too old now.
|
||||
* going! We also want to force a commit if the transaction is too
|
||||
* old now.
|
||||
*/
|
||||
if (handle->h_sync ||
|
||||
(atomic_read(&transaction->t_outstanding_credits) >
|
||||
journal->j_max_transaction_buffers) ||
|
||||
time_after_eq(jiffies, transaction->t_expires)) {
|
||||
/* Do this even for aborted journals: an abort still
|
||||
* completes the commit thread, it just doesn't write
|
||||
@ -1825,7 +1873,7 @@ int jbd2_journal_stop(handle_t *handle)
|
||||
jbd_debug(2, "transaction too old, requesting commit for "
|
||||
"handle %p\n", handle);
|
||||
/* This is non-blocking */
|
||||
jbd2_log_start_commit(journal, transaction->t_tid);
|
||||
jbd2_log_start_commit(journal, tid);
|
||||
|
||||
/*
|
||||
* Special case: JBD2_SYNC synchronous updates require us
|
||||
@ -1836,31 +1884,19 @@ int jbd2_journal_stop(handle_t *handle)
|
||||
}
|
||||
|
||||
/*
|
||||
* Once we drop t_updates, if it goes to zero the transaction
|
||||
* could start committing on us and eventually disappear. So
|
||||
* once we do this, we must not dereference transaction
|
||||
* pointer again.
|
||||
* Once stop_this_handle() drops t_updates, the transaction could start
|
||||
* committing on us and eventually disappear. So we must not
|
||||
* dereference transaction pointer again after calling
|
||||
* stop_this_handle().
|
||||
*/
|
||||
tid = transaction->t_tid;
|
||||
if (atomic_dec_and_test(&transaction->t_updates)) {
|
||||
wake_up(&journal->j_wait_updates);
|
||||
if (journal->j_barrier_count)
|
||||
wake_up(&journal->j_wait_transaction_locked);
|
||||
}
|
||||
|
||||
rwsem_release(&journal->j_trans_commit_map, _THIS_IP_);
|
||||
stop_this_handle(handle);
|
||||
|
||||
if (wait_for_commit)
|
||||
err = jbd2_log_wait_commit(journal, tid);
|
||||
|
||||
if (handle->h_rsv_handle)
|
||||
jbd2_journal_free_reserved(handle->h_rsv_handle);
|
||||
free_and_exit:
|
||||
/*
|
||||
* Scope of the GFP_NOFS context is over here and so we can restore the
|
||||
* original alloc context.
|
||||
*/
|
||||
memalloc_nofs_restore(handle->saved_alloc_context);
|
||||
if (handle->h_rsv_handle)
|
||||
jbd2_free_handle(handle->h_rsv_handle);
|
||||
jbd2_free_handle(handle);
|
||||
return err;
|
||||
}
|
||||
@ -1878,7 +1914,7 @@ free_and_exit:
|
||||
*
|
||||
* j_list_lock is held.
|
||||
*
|
||||
* jbd_lock_bh_state(jh2bh(jh)) is held.
|
||||
* jh->b_state_lock is held.
|
||||
*/
|
||||
|
||||
static inline void
|
||||
@ -1902,7 +1938,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh)
|
||||
*
|
||||
* Called with j_list_lock held, and the journal may not be locked.
|
||||
*
|
||||
* jbd_lock_bh_state(jh2bh(jh)) is held.
|
||||
* jh->b_state_lock is held.
|
||||
*/
|
||||
|
||||
static inline void
|
||||
@ -1934,7 +1970,7 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
|
||||
transaction_t *transaction;
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
|
||||
lockdep_assert_held(&jh->b_state_lock);
|
||||
transaction = jh->b_transaction;
|
||||
if (transaction)
|
||||
assert_spin_locked(&transaction->t_journal->j_list_lock);
|
||||
@ -1971,17 +2007,15 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove buffer from all transactions.
|
||||
* Remove buffer from all transactions. The caller is responsible for dropping
|
||||
* the jh reference that belonged to the transaction.
|
||||
*
|
||||
* Called with bh_state lock and j_list_lock
|
||||
*
|
||||
* jh and bh may be already freed when this function returns.
|
||||
*/
|
||||
static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
|
||||
{
|
||||
__jbd2_journal_temp_unlink_buffer(jh);
|
||||
jh->b_transaction = NULL;
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
}
|
||||
|
||||
void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
|
||||
@ -1990,18 +2024,19 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
|
||||
|
||||
/* Get reference so that buffer cannot be freed before we unlock it */
|
||||
get_bh(bh);
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
__brelse(bh);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from jbd2_journal_try_to_free_buffers().
|
||||
*
|
||||
* Called under jbd_lock_bh_state(bh)
|
||||
* Called under jh->b_state_lock
|
||||
*/
|
||||
static void
|
||||
__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
|
||||
@ -2088,10 +2123,10 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
|
||||
if (!jh)
|
||||
continue;
|
||||
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
__journal_try_to_free_buffer(journal, bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
jbd_unlock_bh_state(bh);
|
||||
if (buffer_jbd(bh))
|
||||
goto busy;
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
@ -2112,7 +2147,7 @@ busy:
|
||||
*
|
||||
* Called under j_list_lock.
|
||||
*
|
||||
* Called under jbd_lock_bh_state(bh).
|
||||
* Called under jh->b_state_lock.
|
||||
*/
|
||||
static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
|
||||
{
|
||||
@ -2133,6 +2168,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
|
||||
} else {
|
||||
JBUFFER_TRACE(jh, "on running transaction");
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
}
|
||||
return may_free;
|
||||
}
|
||||
@ -2199,18 +2235,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
|
||||
* holding the page lock. --sct
|
||||
*/
|
||||
|
||||
if (!buffer_jbd(bh))
|
||||
jh = jbd2_journal_grab_journal_head(bh);
|
||||
if (!jh)
|
||||
goto zap_buffer_unlocked;
|
||||
|
||||
/* OK, we have data buffer in journaled mode */
|
||||
write_lock(&journal->j_state_lock);
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
|
||||
jh = jbd2_journal_grab_journal_head(bh);
|
||||
if (!jh)
|
||||
goto zap_buffer_no_jh;
|
||||
|
||||
/*
|
||||
* We cannot remove the buffer from checkpoint lists until the
|
||||
* transaction adding inode to orphan list (let's call it T)
|
||||
@ -2289,10 +2322,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
|
||||
* for commit and try again.
|
||||
*/
|
||||
if (partial_page) {
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
write_unlock(&journal->j_state_lock);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
return -EBUSY;
|
||||
}
|
||||
/*
|
||||
@ -2304,10 +2337,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
|
||||
set_buffer_freed(bh);
|
||||
if (journal->j_running_transaction && buffer_jbddirty(bh))
|
||||
jh->b_next_transaction = journal->j_running_transaction;
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
write_unlock(&journal->j_state_lock);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
return 0;
|
||||
} else {
|
||||
/* Good, the buffer belongs to the running transaction.
|
||||
@ -2331,11 +2364,10 @@ zap_buffer:
|
||||
* here.
|
||||
*/
|
||||
jh->b_modified = 0;
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
zap_buffer_no_jh:
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
jbd_unlock_bh_state(bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
write_unlock(&journal->j_state_lock);
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
zap_buffer_unlocked:
|
||||
clear_buffer_dirty(bh);
|
||||
J_ASSERT_BH(bh, !buffer_jbddirty(bh));
|
||||
@ -2422,7 +2454,7 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
|
||||
int was_dirty = 0;
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
|
||||
lockdep_assert_held(&jh->b_state_lock);
|
||||
assert_spin_locked(&transaction->t_journal->j_list_lock);
|
||||
|
||||
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
|
||||
@ -2484,11 +2516,11 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
|
||||
void jbd2_journal_file_buffer(struct journal_head *jh,
|
||||
transaction_t *transaction, int jlist)
|
||||
{
|
||||
jbd_lock_bh_state(jh2bh(jh));
|
||||
spin_lock(&jh->b_state_lock);
|
||||
spin_lock(&transaction->t_journal->j_list_lock);
|
||||
__jbd2_journal_file_buffer(jh, transaction, jlist);
|
||||
spin_unlock(&transaction->t_journal->j_list_lock);
|
||||
jbd_unlock_bh_state(jh2bh(jh));
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2498,23 +2530,25 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
|
||||
* buffer on that transaction's metadata list.
|
||||
*
|
||||
* Called under j_list_lock
|
||||
* Called under jbd_lock_bh_state(jh2bh(jh))
|
||||
* Called under jh->b_state_lock
|
||||
*
|
||||
* jh and bh may be already free when this function returns
|
||||
* When this function returns true, there's no next transaction to refile to
|
||||
* and the caller has to drop jh reference through
|
||||
* jbd2_journal_put_journal_head().
|
||||
*/
|
||||
void __jbd2_journal_refile_buffer(struct journal_head *jh)
|
||||
bool __jbd2_journal_refile_buffer(struct journal_head *jh)
|
||||
{
|
||||
int was_dirty, jlist;
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
|
||||
J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
|
||||
lockdep_assert_held(&jh->b_state_lock);
|
||||
if (jh->b_transaction)
|
||||
assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
|
||||
|
||||
/* If the buffer is now unused, just drop it. */
|
||||
if (jh->b_next_transaction == NULL) {
|
||||
__jbd2_journal_unfile_buffer(jh);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2542,6 +2576,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
|
||||
|
||||
if (was_dirty)
|
||||
set_buffer_jbddirty(bh);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2552,16 +2587,15 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
|
||||
*/
|
||||
void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
|
||||
{
|
||||
struct buffer_head *bh = jh2bh(jh);
|
||||
bool drop;
|
||||
|
||||
/* Get reference so that buffer cannot be freed before we unlock it */
|
||||
get_bh(bh);
|
||||
jbd_lock_bh_state(bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
__jbd2_journal_refile_buffer(jh);
|
||||
jbd_unlock_bh_state(bh);
|
||||
drop = __jbd2_journal_refile_buffer(jh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
__brelse(bh);
|
||||
if (drop)
|
||||
jbd2_journal_put_journal_head(jh);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2288,9 +2288,9 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
|
||||
int ret = 0;
|
||||
int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
|
||||
|
||||
if (handle->h_buffer_credits < credits)
|
||||
if (jbd2_handle_buffer_credits(handle) < credits)
|
||||
ret = ocfs2_extend_trans(handle,
|
||||
credits - handle->h_buffer_credits);
|
||||
credits - jbd2_handle_buffer_credits(handle));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2367,7 +2367,7 @@ static int ocfs2_rotate_tree_right(handle_t *handle,
|
||||
struct ocfs2_path *right_path,
|
||||
struct ocfs2_path **ret_left_path)
|
||||
{
|
||||
int ret, start, orig_credits = handle->h_buffer_credits;
|
||||
int ret, start, orig_credits = jbd2_handle_buffer_credits(handle);
|
||||
u32 cpos;
|
||||
struct ocfs2_path *left_path = NULL;
|
||||
struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
|
||||
@ -3148,7 +3148,7 @@ static int ocfs2_rotate_tree_left(handle_t *handle,
|
||||
struct ocfs2_path *path,
|
||||
struct ocfs2_cached_dealloc_ctxt *dealloc)
|
||||
{
|
||||
int ret, orig_credits = handle->h_buffer_credits;
|
||||
int ret, orig_credits = jbd2_handle_buffer_credits(handle);
|
||||
struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
|
||||
struct ocfs2_extent_block *eb;
|
||||
struct ocfs2_extent_list *el;
|
||||
@ -3386,8 +3386,8 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
|
||||
right_path);
|
||||
|
||||
ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
|
||||
handle->h_buffer_credits,
|
||||
right_path);
|
||||
jbd2_handle_buffer_credits(handle),
|
||||
right_path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
@ -3548,8 +3548,8 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
|
||||
right_path);
|
||||
|
||||
ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
|
||||
handle->h_buffer_credits,
|
||||
left_path);
|
||||
jbd2_handle_buffer_credits(handle),
|
||||
left_path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
@ -3623,7 +3623,7 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
|
||||
le16_to_cpu(el->l_next_free_rec) == 1) {
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
jbd2_handle_buffer_credits(handle),
|
||||
right_path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -3669,7 +3669,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
|
||||
if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
jbd2_handle_buffer_credits(handle),
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -3725,7 +3725,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
|
||||
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
jbd2_handle_buffer_credits(handle),
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -3755,7 +3755,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
|
||||
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
jbd2_handle_buffer_credits(handle),
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -3799,7 +3799,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
|
||||
if (ctxt->c_split_covers_rec) {
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
jbd2_handle_buffer_credits(handle),
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -5358,7 +5358,7 @@ static int ocfs2_truncate_rec(handle_t *handle,
|
||||
if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
|
||||
/* extend credit for ocfs2_remove_rightmost_path */
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
jbd2_handle_buffer_credits(handle),
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
@ -5427,8 +5427,8 @@ static int ocfs2_truncate_rec(handle_t *handle,
|
||||
}
|
||||
|
||||
ret = ocfs2_extend_rotate_transaction(handle, 0,
|
||||
handle->h_buffer_credits,
|
||||
path);
|
||||
jbd2_handle_buffer_credits(handle),
|
||||
path);
|
||||
if (ret) {
|
||||
mlog_errno(ret);
|
||||
goto out;
|
||||
|
@ -420,14 +420,14 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
|
||||
if (!nblocks)
|
||||
return 0;
|
||||
|
||||
old_nblocks = handle->h_buffer_credits;
|
||||
old_nblocks = jbd2_handle_buffer_credits(handle);
|
||||
|
||||
trace_ocfs2_extend_trans(old_nblocks, nblocks);
|
||||
|
||||
#ifdef CONFIG_OCFS2_DEBUG_FS
|
||||
status = 1;
|
||||
#else
|
||||
status = jbd2_journal_extend(handle, nblocks);
|
||||
status = jbd2_journal_extend(handle, nblocks, 0);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
@ -461,13 +461,13 @@ int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
|
||||
|
||||
BUG_ON(!handle);
|
||||
|
||||
old_nblks = handle->h_buffer_credits;
|
||||
old_nblks = jbd2_handle_buffer_credits(handle);
|
||||
trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
|
||||
|
||||
if (old_nblks < thresh)
|
||||
return 0;
|
||||
|
||||
status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA);
|
||||
status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA, 0);
|
||||
if (status < 0) {
|
||||
mlog_errno(status);
|
||||
goto bail;
|
||||
|
@ -1252,6 +1252,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
|
||||
int nr)
|
||||
{
|
||||
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
|
||||
struct journal_head *jh;
|
||||
int ret;
|
||||
|
||||
if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
|
||||
@ -1260,13 +1261,14 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
|
||||
if (!buffer_jbd(bg_bh))
|
||||
return 1;
|
||||
|
||||
jbd_lock_bh_state(bg_bh);
|
||||
bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
|
||||
jh = bh2jh(bg_bh);
|
||||
spin_lock(&jh->b_state_lock);
|
||||
bg = (struct ocfs2_group_desc *) jh->b_committed_data;
|
||||
if (bg)
|
||||
ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
|
||||
else
|
||||
ret = 1;
|
||||
jbd_unlock_bh_state(bg_bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2387,6 +2389,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
|
||||
int status;
|
||||
unsigned int tmp;
|
||||
struct ocfs2_group_desc *undo_bg = NULL;
|
||||
struct journal_head *jh;
|
||||
|
||||
/* The caller got this descriptor from
|
||||
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
|
||||
@ -2405,10 +2408,10 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
|
||||
goto bail;
|
||||
}
|
||||
|
||||
jh = bh2jh(group_bh);
|
||||
if (undo_fn) {
|
||||
jbd_lock_bh_state(group_bh);
|
||||
undo_bg = (struct ocfs2_group_desc *)
|
||||
bh2jh(group_bh)->b_committed_data;
|
||||
spin_lock(&jh->b_state_lock);
|
||||
undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data;
|
||||
BUG_ON(!undo_bg);
|
||||
}
|
||||
|
||||
@ -2423,7 +2426,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
|
||||
le16_add_cpu(&bg->bg_free_bits_count, num_bits);
|
||||
if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
|
||||
if (undo_fn)
|
||||
jbd_unlock_bh_state(group_bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n",
|
||||
(unsigned long long)le64_to_cpu(bg->bg_blkno),
|
||||
le16_to_cpu(bg->bg_bits),
|
||||
@ -2432,7 +2435,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
|
||||
}
|
||||
|
||||
if (undo_fn)
|
||||
jbd_unlock_bh_state(group_bh);
|
||||
spin_unlock(&jh->b_state_lock);
|
||||
|
||||
ocfs2_journal_dirty(handle, group_bh);
|
||||
bail:
|
||||
|
@ -313,7 +313,6 @@ enum jbd_state_bits {
|
||||
BH_Revoked, /* Has been revoked from the log */
|
||||
BH_RevokeValid, /* Revoked flag is valid */
|
||||
BH_JBDDirty, /* Is dirty but journaled */
|
||||
BH_State, /* Pins most journal_head state */
|
||||
BH_JournalHead, /* Pins bh->b_private and jh->b_bh */
|
||||
BH_Shadow, /* IO on shadow buffer is running */
|
||||
BH_Verified, /* Metadata block has been verified ok */
|
||||
@ -342,26 +341,6 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
|
||||
return bh->b_private;
|
||||
}
|
||||
|
||||
static inline void jbd_lock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
bit_spin_lock(BH_State, &bh->b_state);
|
||||
}
|
||||
|
||||
static inline int jbd_trylock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
return bit_spin_trylock(BH_State, &bh->b_state);
|
||||
}
|
||||
|
||||
static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
return bit_spin_is_locked(BH_State, &bh->b_state);
|
||||
}
|
||||
|
||||
static inline void jbd_unlock_bh_state(struct buffer_head *bh)
|
||||
{
|
||||
bit_spin_unlock(BH_State, &bh->b_state);
|
||||
}
|
||||
|
||||
static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
|
||||
{
|
||||
bit_spin_lock(BH_JournalHead, &bh->b_state);
|
||||
@ -477,7 +456,9 @@ struct jbd2_revoke_table_s;
|
||||
* @h_transaction: Which compound transaction is this update a part of?
|
||||
* @h_journal: Which journal handle belongs to - used iff h_reserved set.
|
||||
* @h_rsv_handle: Handle reserved for finishing the logical operation.
|
||||
* @h_buffer_credits: Number of remaining buffers we are allowed to dirty.
|
||||
* @h_total_credits: Number of remaining buffers we are allowed to add to
|
||||
journal. These are dirty buffers and revoke descriptor blocks.
|
||||
* @h_revoke_credits: Number of remaining revoke records available for handle
|
||||
* @h_ref: Reference count on this handle.
|
||||
* @h_err: Field for caller's use to track errors through large fs operations.
|
||||
* @h_sync: Flag for sync-on-close.
|
||||
@ -487,7 +468,8 @@ struct jbd2_revoke_table_s;
|
||||
* @h_type: For handle statistics.
|
||||
* @h_line_no: For handle statistics.
|
||||
* @h_start_jiffies: Handle Start time.
|
||||
* @h_requested_credits: Holds @h_buffer_credits after handle is started.
|
||||
* @h_requested_credits: Holds @h_total_credits after handle is started.
|
||||
* @h_revoke_credits_requested: Holds @h_revoke_credits after handle is started.
|
||||
* @saved_alloc_context: Saved context while transaction is open.
|
||||
**/
|
||||
|
||||
@ -504,7 +486,9 @@ struct jbd2_journal_handle
|
||||
};
|
||||
|
||||
handle_t *h_rsv_handle;
|
||||
int h_buffer_credits;
|
||||
int h_total_credits;
|
||||
int h_revoke_credits;
|
||||
int h_revoke_credits_requested;
|
||||
int h_ref;
|
||||
int h_err;
|
||||
|
||||
@ -556,9 +540,9 @@ struct transaction_chp_stats_s {
|
||||
* ->jbd_lock_bh_journal_head() (This is "innermost")
|
||||
*
|
||||
* j_state_lock
|
||||
* ->jbd_lock_bh_state()
|
||||
* ->b_state_lock
|
||||
*
|
||||
* jbd_lock_bh_state()
|
||||
* b_state_lock
|
||||
* ->j_list_lock
|
||||
*
|
||||
* j_state_lock
|
||||
@ -681,11 +665,24 @@ struct transaction_s
|
||||
atomic_t t_updates;
|
||||
|
||||
/*
|
||||
* Number of buffers reserved for use by all handles in this transaction
|
||||
* handle but not yet modified. [none]
|
||||
* Number of blocks reserved for this transaction in the journal.
|
||||
* This is including all credits reserved when starting transaction
|
||||
* handles as well as all journal descriptor blocks needed for this
|
||||
* transaction. [none]
|
||||
*/
|
||||
atomic_t t_outstanding_credits;
|
||||
|
||||
/*
|
||||
* Number of revoke records for this transaction added by already
|
||||
* stopped handles. [none]
|
||||
*/
|
||||
atomic_t t_outstanding_revokes;
|
||||
|
||||
/*
|
||||
* How many handles used this transaction? [none]
|
||||
*/
|
||||
atomic_t t_handle_count;
|
||||
|
||||
/*
|
||||
* Forward and backward links for the circular list of all transactions
|
||||
* awaiting checkpoint. [j_list_lock]
|
||||
@ -703,11 +700,6 @@ struct transaction_s
|
||||
*/
|
||||
ktime_t t_start_time;
|
||||
|
||||
/*
|
||||
* How many handles used this transaction? [none]
|
||||
*/
|
||||
atomic_t t_handle_count;
|
||||
|
||||
/*
|
||||
* This transaction is being forced and some process is
|
||||
* waiting for it to finish.
|
||||
@ -1024,6 +1016,13 @@ struct journal_s
|
||||
*/
|
||||
int j_max_transaction_buffers;
|
||||
|
||||
/**
|
||||
* @j_revoke_records_per_block:
|
||||
*
|
||||
* Number of revoke records that fit in one descriptor block.
|
||||
*/
|
||||
int j_revoke_records_per_block;
|
||||
|
||||
/**
|
||||
* @j_commit_interval:
|
||||
*
|
||||
@ -1257,7 +1256,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3)
|
||||
|
||||
/* Filing buffers */
|
||||
extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
|
||||
extern void __jbd2_journal_refile_buffer(struct journal_head *);
|
||||
extern bool __jbd2_journal_refile_buffer(struct journal_head *);
|
||||
extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
|
||||
extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
|
||||
extern void __journal_free_buffer(struct journal_head *bh);
|
||||
@ -1358,14 +1357,16 @@ static inline handle_t *journal_current_handle(void)
|
||||
|
||||
extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
|
||||
extern handle_t *jbd2__journal_start(journal_t *, int blocks, int rsv_blocks,
|
||||
gfp_t gfp_mask, unsigned int type,
|
||||
unsigned int line_no);
|
||||
int revoke_records, gfp_t gfp_mask,
|
||||
unsigned int type, unsigned int line_no);
|
||||
extern int jbd2_journal_restart(handle_t *, int nblocks);
|
||||
extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask);
|
||||
extern int jbd2__journal_restart(handle_t *, int nblocks,
|
||||
int revoke_records, gfp_t gfp_mask);
|
||||
extern int jbd2_journal_start_reserved(handle_t *handle,
|
||||
unsigned int type, unsigned int line_no);
|
||||
extern void jbd2_journal_free_reserved(handle_t *handle);
|
||||
extern int jbd2_journal_extend (handle_t *, int nblocks);
|
||||
extern int jbd2_journal_extend(handle_t *handle, int nblocks,
|
||||
int revoke_records);
|
||||
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
|
||||
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
|
||||
extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
|
||||
@ -1560,38 +1561,19 @@ static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
|
||||
return journal->j_chksum_driver != NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for
|
||||
* transaction control blocks.
|
||||
*/
|
||||
#define JBD2_CONTROL_BLOCKS_SHIFT 5
|
||||
|
||||
/*
|
||||
* Return the minimum number of blocks which must be free in the journal
|
||||
* before a new transaction may be started. Must be called under j_state_lock.
|
||||
*/
|
||||
static inline int jbd2_space_needed(journal_t *journal)
|
||||
{
|
||||
int nblocks = journal->j_max_transaction_buffers;
|
||||
return nblocks + (nblocks >> JBD2_CONTROL_BLOCKS_SHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return number of free blocks in the log. Must be called under j_state_lock.
|
||||
*/
|
||||
static inline unsigned long jbd2_log_space_left(journal_t *journal)
|
||||
{
|
||||
/* Allow for rounding errors */
|
||||
unsigned long free = journal->j_free - 32;
|
||||
long free = journal->j_free - 32;
|
||||
|
||||
if (journal->j_committing_transaction) {
|
||||
unsigned long committing = atomic_read(&journal->
|
||||
j_committing_transaction->t_outstanding_credits);
|
||||
|
||||
/* Transaction + control blocks */
|
||||
free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT);
|
||||
free -= atomic_read(&journal->
|
||||
j_committing_transaction->t_outstanding_credits);
|
||||
}
|
||||
return free;
|
||||
return max_t(long, free, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1645,6 +1627,20 @@ static inline tid_t jbd2_get_latest_transaction(journal_t *journal)
|
||||
return tid;
|
||||
}
|
||||
|
||||
static inline int jbd2_handle_buffer_credits(handle_t *handle)
|
||||
{
|
||||
journal_t *journal;
|
||||
|
||||
if (!handle->h_reserved)
|
||||
journal = handle->h_transaction->t_journal;
|
||||
else
|
||||
journal = handle->h_journal;
|
||||
|
||||
return handle->h_total_credits -
|
||||
DIV_ROUND_UP(handle->h_revoke_credits_requested,
|
||||
journal->j_revoke_records_per_block);
|
||||
}
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#define buffer_trace_init(bh) do {} while (0)
|
||||
|
@ -11,6 +11,8 @@
|
||||
#ifndef JOURNAL_HEAD_H_INCLUDED
|
||||
#define JOURNAL_HEAD_H_INCLUDED
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
typedef unsigned int tid_t; /* Unique transaction ID */
|
||||
typedef struct transaction_s transaction_t; /* Compound transaction type */
|
||||
|
||||
@ -23,6 +25,11 @@ struct journal_head {
|
||||
*/
|
||||
struct buffer_head *b_bh;
|
||||
|
||||
/*
|
||||
* Protect the buffer head state
|
||||
*/
|
||||
spinlock_t b_state_lock;
|
||||
|
||||
/*
|
||||
* Reference count - see description in journal.c
|
||||
* [jbd_lock_bh_journal_head()]
|
||||
@ -30,7 +37,7 @@ struct journal_head {
|
||||
int b_jcount;
|
||||
|
||||
/*
|
||||
* Journalling list for this buffer [jbd_lock_bh_state()]
|
||||
* Journalling list for this buffer [b_state_lock]
|
||||
* NOTE: We *cannot* combine this with b_modified into a bitfield
|
||||
* as gcc would then (which the C standard allows but which is
|
||||
* very unuseful) make 64-bit accesses to the bitfield and clobber
|
||||
@ -41,20 +48,20 @@ struct journal_head {
|
||||
/*
|
||||
* This flag signals the buffer has been modified by
|
||||
* the currently running transaction
|
||||
* [jbd_lock_bh_state()]
|
||||
* [b_state_lock]
|
||||
*/
|
||||
unsigned b_modified;
|
||||
|
||||
/*
|
||||
* Copy of the buffer data frozen for writing to the log.
|
||||
* [jbd_lock_bh_state()]
|
||||
* [b_state_lock]
|
||||
*/
|
||||
char *b_frozen_data;
|
||||
|
||||
/*
|
||||
* Pointer to a saved copy of the buffer containing no uncommitted
|
||||
* deallocation references, so that allocations can avoid overwriting
|
||||
* uncommitted deletes. [jbd_lock_bh_state()]
|
||||
* uncommitted deletes. [b_state_lock]
|
||||
*/
|
||||
char *b_committed_data;
|
||||
|
||||
@ -63,7 +70,7 @@ struct journal_head {
|
||||
* metadata: either the running transaction or the committing
|
||||
* transaction (if there is one). Only applies to buffers on a
|
||||
* transaction's data or metadata journaling list.
|
||||
* [j_list_lock] [jbd_lock_bh_state()]
|
||||
* [j_list_lock] [b_state_lock]
|
||||
* Either of these locks is enough for reading, both are needed for
|
||||
* changes.
|
||||
*/
|
||||
@ -73,13 +80,13 @@ struct journal_head {
|
||||
* Pointer to the running compound transaction which is currently
|
||||
* modifying the buffer's metadata, if there was already a transaction
|
||||
* committing it when the new transaction touched it.
|
||||
* [t_list_lock] [jbd_lock_bh_state()]
|
||||
* [t_list_lock] [b_state_lock]
|
||||
*/
|
||||
transaction_t *b_next_transaction;
|
||||
|
||||
/*
|
||||
* Doubly-linked list of buffers on a transaction's data, metadata or
|
||||
* forget queue. [t_list_lock] [jbd_lock_bh_state()]
|
||||
* forget queue. [t_list_lock] [b_state_lock]
|
||||
*/
|
||||
struct journal_head *b_tnext, *b_tprev;
|
||||
|
||||
|
@ -1746,15 +1746,16 @@ TRACE_EVENT(ext4_load_inode,
|
||||
|
||||
TRACE_EVENT(ext4_journal_start,
|
||||
TP_PROTO(struct super_block *sb, int blocks, int rsv_blocks,
|
||||
unsigned long IP),
|
||||
int revoke_creds, unsigned long IP),
|
||||
|
||||
TP_ARGS(sb, blocks, rsv_blocks, IP),
|
||||
TP_ARGS(sb, blocks, rsv_blocks, revoke_creds, IP),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( dev_t, dev )
|
||||
__field(unsigned long, ip )
|
||||
__field( int, blocks )
|
||||
__field( int, rsv_blocks )
|
||||
__field( int, revoke_creds )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@ -1762,11 +1763,13 @@ TRACE_EVENT(ext4_journal_start,
|
||||
__entry->ip = IP;
|
||||
__entry->blocks = blocks;
|
||||
__entry->rsv_blocks = rsv_blocks;
|
||||
__entry->revoke_creds = revoke_creds;
|
||||
),
|
||||
|
||||
TP_printk("dev %d,%d blocks, %d rsv_blocks, %d caller %pS",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->blocks, __entry->rsv_blocks, (void *)__entry->ip)
|
||||
TP_printk("dev %d,%d blocks %d, rsv_blocks %d, revoke_creds %d, "
|
||||
"caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->blocks, __entry->rsv_blocks, __entry->revoke_creds,
|
||||
(void *)__entry->ip)
|
||||
);
|
||||
|
||||
TRACE_EVENT(ext4_journal_start_reserved,
|
||||
|
@ -133,7 +133,7 @@ TRACE_EVENT(jbd2_submit_inode_data,
|
||||
(unsigned long) __entry->ino)
|
||||
);
|
||||
|
||||
TRACE_EVENT(jbd2_handle_start,
|
||||
DECLARE_EVENT_CLASS(jbd2_handle_start_class,
|
||||
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
|
||||
unsigned int line_no, int requested_blocks),
|
||||
|
||||
@ -161,6 +161,20 @@ TRACE_EVENT(jbd2_handle_start,
|
||||
__entry->type, __entry->line_no, __entry->requested_blocks)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_start,
|
||||
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
|
||||
unsigned int line_no, int requested_blocks),
|
||||
|
||||
TP_ARGS(dev, tid, type, line_no, requested_blocks)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(jbd2_handle_start_class, jbd2_handle_restart,
|
||||
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
|
||||
unsigned int line_no, int requested_blocks),
|
||||
|
||||
TP_ARGS(dev, tid, type, line_no, requested_blocks)
|
||||
);
|
||||
|
||||
TRACE_EVENT(jbd2_handle_extend,
|
||||
TP_PROTO(dev_t dev, unsigned long tid, unsigned int type,
|
||||
unsigned int line_no, int buffer_credits,
|
||||
|
Loading…
Reference in New Issue
Block a user