ext4: restructure writeback path

There are two issues with current writeback path in ext4.  For one we
don't necessarily map complete pages when blocksize < pagesize and
thus needn't do any writeback in one iteration.  We always map some
blocks though so we will eventually finish mapping the page.  Just if
writeback races with other operations on the file, forward progress is
not really guaranteed. The second problem is that current code
structure makes it hard to associate all the bios to some range of
pages with one io_end structure so that unwritten extents can be
converted after all the bios are finished.  This will be especially
difficult later when io_end will be associated with reserved
transaction handle.

We restructure the writeback path to a relatively simple loop which
first prepares extent of pages, then maps one or more extents so that
no page is partially mapped, and once page is fully mapped it is
submitted for IO. We keep all the mapping and IO submission
information in mpage_da_data structure to somewhat reduce stack usage.
Resulting code is somewhat shorter than the old one and hopefully also
easier to read.

Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
Jan Kara 2013-06-04 13:17:40 -04:00 committed by Theodore Ts'o
parent fffb273997
commit 4e7ea81db5
4 changed files with 526 additions and 566 deletions
fs/ext4
include/trace/events

View File

@ -176,21 +176,6 @@ struct ext4_map_blocks {
unsigned int m_flags;
};
/*
* For delayed allocation tracking
*/
struct mpage_da_data {
struct inode *inode;
sector_t b_blocknr; /* start block number of extent */
size_t b_size; /* size of extent */
unsigned long b_state; /* state of the extent */
unsigned long first_page, next_page; /* extent of pages */
struct writeback_control *wbc;
int io_done;
int pages_written;
int retval;
};
/*
* Flags for ext4_io_end->flags
*/

File diff suppressed because it is too large Load Diff

View File

@ -360,9 +360,6 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
bio->bi_bdev = bh->b_bdev;
bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
if (!io->io_end->size)
io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
+ bh_offset(bh);
io->io_bio = bio;
io->io_next_block = bh->b_blocknr;
return 0;
@ -390,7 +387,6 @@ submit_and_retry:
io_end = io->io_end;
if (test_clear_buffer_uninit(bh))
ext4_set_io_unwritten_flag(inode, io_end);
io_end->size += bh->b_size;
io->io_next_block++;
return 0;
}

View File

@ -324,43 +324,59 @@ TRACE_EVENT(ext4_da_writepages,
);
TRACE_EVENT(ext4_da_write_pages,
TP_PROTO(struct inode *inode, struct mpage_da_data *mpd),
TP_PROTO(struct inode *inode, pgoff_t first_page,
struct writeback_control *wbc),
TP_ARGS(inode, mpd),
TP_ARGS(inode, first_page, wbc),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, b_blocknr )
__field( __u32, b_size )
__field( __u32, b_state )
__field( unsigned long, first_page )
__field( int, io_done )
__field( int, pages_written )
__field( pgoff_t, first_page )
__field( long, nr_to_write )
__field( int, sync_mode )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->b_blocknr = mpd->b_blocknr;
__entry->b_size = mpd->b_size;
__entry->b_state = mpd->b_state;
__entry->first_page = mpd->first_page;
__entry->io_done = mpd->io_done;
__entry->pages_written = mpd->pages_written;
__entry->sync_mode = mpd->wbc->sync_mode;
__entry->first_page = first_page;
__entry->nr_to_write = wbc->nr_to_write;
__entry->sync_mode = wbc->sync_mode;
),
TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x "
"first_page %lu io_done %d pages_written %d sync_mode %d",
TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld "
"sync_mode %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino,
__entry->b_blocknr, __entry->b_size,
__entry->b_state, __entry->first_page,
__entry->io_done, __entry->pages_written,
__entry->sync_mode
)
(unsigned long) __entry->ino, __entry->first_page,
__entry->nr_to_write, __entry->sync_mode)
);
TRACE_EVENT(ext4_da_write_pages_extent,
TP_PROTO(struct inode *inode, struct ext4_map_blocks *map),
TP_ARGS(inode, map),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, lblk )
__field( __u32, len )
__field( __u32, flags )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = map->m_lblk;
__entry->len = map->m_len;
__entry->flags = map->m_flags;
),
TP_printk("dev %d,%d ino %lu lblk %llu len %u flags 0x%04x",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->lblk, __entry->len,
__entry->flags)
);
TRACE_EVENT(ext4_da_writepages_result,