f2fs: support in batch multi blocks preallocation

This patch introduces reserve_new_blocks to make preallocation of multi
blocks as in batch operation, so it can avoid lots of redundant
operation, result in better performance.

In virtual machine, with rotational device:

time fallocate -l 32G /mnt/f2fs/file

Before:
real	0m4.584s
user	0m0.000s
sys	0m4.580s

After:
real	0m0.292s
user	0m0.000s
sys	0m0.272s

In x86, with SSD:

time fallocate -l 500G $MNT/testfile

Before : 24.758 s
After  :  1.604 s

Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: fix bugs and add performance numbers measured in x86.]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
This commit is contained in:
Chao Yu 2016-05-09 19:56:30 +08:00 committed by Jaegeuk Kim
parent 0fac558b96
commit 46008c6d42
3 changed files with 117 additions and 49 deletions

View File

@ -278,6 +278,16 @@ alloc_new:
trace_f2fs_submit_page_mbio(fio->page, fio); trace_f2fs_submit_page_mbio(fio->page, fio);
} }
static void __set_data_blkaddr(struct dnode_of_data *dn)
{
struct f2fs_node *rn = F2FS_NODE(dn->node_page);
__le32 *addr_array;
/* Get physical address of data block */
addr_array = blkaddr_in_node(rn);
addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
}
/* /*
* Lock ordering for the change of data block address: * Lock ordering for the change of data block address:
* ->data_page * ->data_page
@ -286,19 +296,9 @@ alloc_new:
*/ */
void set_data_blkaddr(struct dnode_of_data *dn) void set_data_blkaddr(struct dnode_of_data *dn)
{ {
struct f2fs_node *rn; f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
__le32 *addr_array; __set_data_blkaddr(dn);
struct page *node_page = dn->node_page; if (set_page_dirty(dn->node_page))
unsigned int ofs_in_node = dn->ofs_in_node;
f2fs_wait_on_page_writeback(node_page, NODE, true);
rn = F2FS_NODE(node_page);
/* Get physical address of data block */
addr_array = blkaddr_in_node(rn);
addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
if (set_page_dirty(node_page))
dn->node_changed = true; dn->node_changed = true;
} }
@ -309,24 +309,53 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
f2fs_update_extent_cache(dn); f2fs_update_extent_cache(dn);
} }
int reserve_new_block(struct dnode_of_data *dn) /* dn->ofs_in_node will be returned with up-to-date last block pointer */
int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
{ {
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
if (!count)
return 0;
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return -EPERM; return -EPERM;
if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
return -ENOSPC; return -ENOSPC;
trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node); trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
dn->ofs_in_node, count);
f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
for (; count > 0; dn->ofs_in_node++) {
block_t blkaddr =
datablock_addr(dn->node_page, dn->ofs_in_node);
if (blkaddr == NULL_ADDR) {
dn->data_blkaddr = NEW_ADDR;
__set_data_blkaddr(dn);
count--;
}
}
if (set_page_dirty(dn->node_page))
dn->node_changed = true;
dn->data_blkaddr = NEW_ADDR;
set_data_blkaddr(dn);
mark_inode_dirty(dn->inode); mark_inode_dirty(dn->inode);
sync_inode_page(dn); sync_inode_page(dn);
return 0; return 0;
} }
/* Should keep dn->ofs_in_node unchanged */
int reserve_new_block(struct dnode_of_data *dn)
{
unsigned int ofs_in_node = dn->ofs_in_node;
int ret;
ret = reserve_new_blocks(dn, 1);
dn->ofs_in_node = ofs_in_node;
return ret;
}
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{ {
bool need_put = dn->inode_page ? false : true; bool need_put = dn->inode_page ? false : true;
@ -545,6 +574,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
struct node_info ni; struct node_info ni;
int seg = CURSEG_WARM_DATA; int seg = CURSEG_WARM_DATA;
pgoff_t fofs; pgoff_t fofs;
blkcnt_t count = 1;
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return -EPERM; return -EPERM;
@ -553,7 +583,7 @@ static int __allocate_data_block(struct dnode_of_data *dn)
if (dn->data_blkaddr == NEW_ADDR) if (dn->data_blkaddr == NEW_ADDR)
goto alloc; goto alloc;
if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1))) if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
return -ENOSPC; return -ENOSPC;
alloc: alloc:
@ -621,8 +651,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
struct dnode_of_data dn; struct dnode_of_data dn;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
pgoff_t pgofs, end_offset; pgoff_t pgofs, end_offset, end;
int err = 0, ofs = 1; int err = 0, ofs = 1;
unsigned int ofs_in_node, last_ofs_in_node;
blkcnt_t prealloc;
struct extent_info ei; struct extent_info ei;
bool allocated = false; bool allocated = false;
block_t blkaddr; block_t blkaddr;
@ -632,6 +664,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
/* it only supports block size == page size */ /* it only supports block size == page size */
pgofs = (pgoff_t)map->m_lblk; pgofs = (pgoff_t)map->m_lblk;
end = pgofs + maxblocks;
if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) { if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
map->m_pblk = ei.blk + pgofs - ei.fofs; map->m_pblk = ei.blk + pgofs - ei.fofs;
@ -659,6 +692,8 @@ next_dnode:
goto unlock_out; goto unlock_out;
} }
prealloc = 0;
ofs_in_node = dn.ofs_in_node;
end_offset = ADDRS_PER_PAGE(dn.node_page, inode); end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
next_block: next_block:
@ -671,17 +706,20 @@ next_block:
goto sync_out; goto sync_out;
} }
if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (flag == F2FS_GET_BLOCK_PRE_AIO) {
if (blkaddr == NULL_ADDR) if (blkaddr == NULL_ADDR) {
err = reserve_new_block(&dn); prealloc++;
last_ofs_in_node = dn.ofs_in_node;
}
} else { } else {
err = __allocate_data_block(&dn); err = __allocate_data_block(&dn);
if (!err) if (!err) {
set_inode_flag(F2FS_I(inode), set_inode_flag(F2FS_I(inode),
FI_APPEND_WRITE); FI_APPEND_WRITE);
allocated = true;
}
} }
if (err) if (err)
goto sync_out; goto sync_out;
allocated = true;
map->m_flags = F2FS_MAP_NEW; map->m_flags = F2FS_MAP_NEW;
blkaddr = dn.data_blkaddr; blkaddr = dn.data_blkaddr;
} else { } else {
@ -700,6 +738,9 @@ next_block:
} }
} }
if (flag == F2FS_GET_BLOCK_PRE_AIO)
goto skip;
if (map->m_len == 0) { if (map->m_len == 0) {
/* preallocated unwritten block should be mapped for fiemap. */ /* preallocated unwritten block should be mapped for fiemap. */
if (blkaddr == NEW_ADDR) if (blkaddr == NEW_ADDR)
@ -711,33 +752,50 @@ next_block:
} else if ((map->m_pblk != NEW_ADDR && } else if ((map->m_pblk != NEW_ADDR &&
blkaddr == (map->m_pblk + ofs)) || blkaddr == (map->m_pblk + ofs)) ||
(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
flag == F2FS_GET_BLOCK_PRE_DIO || flag == F2FS_GET_BLOCK_PRE_DIO) {
flag == F2FS_GET_BLOCK_PRE_AIO) {
ofs++; ofs++;
map->m_len++; map->m_len++;
} else { } else {
goto sync_out; goto sync_out;
} }
skip:
dn.ofs_in_node++; dn.ofs_in_node++;
pgofs++; pgofs++;
if (map->m_len < maxblocks) { /* preallocate blocks in batch for one dnode page */
if (dn.ofs_in_node < end_offset) if (flag == F2FS_GET_BLOCK_PRE_AIO &&
goto next_block; (pgofs == end || dn.ofs_in_node == end_offset)) {
if (allocated) dn.ofs_in_node = ofs_in_node;
sync_inode_page(&dn); err = reserve_new_blocks(&dn, prealloc);
f2fs_put_dnode(&dn); if (err)
goto sync_out;
if (create) { map->m_len += dn.ofs_in_node - ofs_in_node;
f2fs_unlock_op(sbi); if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
f2fs_balance_fs(sbi, allocated); err = -ENOSPC;
goto sync_out;
} }
allocated = false; dn.ofs_in_node = end_offset;
goto next_dnode;
} }
if (pgofs >= end)
goto sync_out;
else if (dn.ofs_in_node < end_offset)
goto next_block;
if (allocated)
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
if (create) {
f2fs_unlock_op(sbi);
f2fs_balance_fs(sbi, allocated);
}
allocated = false;
goto next_dnode;
sync_out: sync_out:
if (allocated) if (allocated)
sync_inode_page(&dn); sync_inode_page(&dn);

View File

@ -1094,7 +1094,7 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
} }
static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode, blkcnt_t count) struct inode *inode, blkcnt_t *count)
{ {
block_t valid_block_count; block_t valid_block_count;
@ -1106,14 +1106,19 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi,
} }
#endif #endif
valid_block_count = valid_block_count =
sbi->total_valid_block_count + (block_t)count; sbi->total_valid_block_count + (block_t)(*count);
if (unlikely(valid_block_count > sbi->user_block_count)) { if (unlikely(valid_block_count > sbi->user_block_count)) {
spin_unlock(&sbi->stat_lock); *count = sbi->user_block_count - sbi->total_valid_block_count;
return false; if (!*count) {
spin_unlock(&sbi->stat_lock);
return false;
}
} }
inode->i_blocks += count; /* *count can be recalculated */
sbi->total_valid_block_count = valid_block_count; inode->i_blocks += *count;
sbi->alloc_valid_block_count += (block_t)count; sbi->total_valid_block_count =
sbi->total_valid_block_count + (block_t)(*count);
sbi->alloc_valid_block_count += (block_t)(*count);
spin_unlock(&sbi->stat_lock); spin_unlock(&sbi->stat_lock);
return true; return true;
} }
@ -1945,6 +1950,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *);
void f2fs_submit_page_mbio(struct f2fs_io_info *); void f2fs_submit_page_mbio(struct f2fs_io_info *);
void set_data_blkaddr(struct dnode_of_data *); void set_data_blkaddr(struct dnode_of_data *);
void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t); void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
int reserve_new_blocks(struct dnode_of_data *, blkcnt_t);
int reserve_new_block(struct dnode_of_data *); int reserve_new_block(struct dnode_of_data *);
int f2fs_get_block(struct dnode_of_data *, pgoff_t); int f2fs_get_block(struct dnode_of_data *, pgoff_t);
ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *); ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);

View File

@ -694,28 +694,32 @@ TRACE_EVENT(f2fs_direct_IO_exit,
__entry->ret) __entry->ret)
); );
TRACE_EVENT(f2fs_reserve_new_block, TRACE_EVENT(f2fs_reserve_new_blocks,
TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node), TP_PROTO(struct inode *inode, nid_t nid, unsigned int ofs_in_node,
blkcnt_t count),
TP_ARGS(inode, nid, ofs_in_node), TP_ARGS(inode, nid, ofs_in_node, count),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(nid_t, nid) __field(nid_t, nid)
__field(unsigned int, ofs_in_node) __field(unsigned int, ofs_in_node)
__field(blkcnt_t, count)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = inode->i_sb->s_dev; __entry->dev = inode->i_sb->s_dev;
__entry->nid = nid; __entry->nid = nid;
__entry->ofs_in_node = ofs_in_node; __entry->ofs_in_node = ofs_in_node;
__entry->count = count;
), ),
TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u", TP_printk("dev = (%d,%d), nid = %u, ofs_in_node = %u, count = %llu",
show_dev(__entry), show_dev(__entry),
(unsigned int)__entry->nid, (unsigned int)__entry->nid,
__entry->ofs_in_node) __entry->ofs_in_node,
(unsigned long long)__entry->count)
); );
DECLARE_EVENT_CLASS(f2fs__submit_page_bio, DECLARE_EVENT_CLASS(f2fs__submit_page_bio,