diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3c51e243450d..6b678b968d84 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1536,6 +1536,8 @@ struct ext4_sb_info { unsigned int s_mb_free_pending; struct list_head s_freed_data_list; /* List of blocks to be freed after commit completed */ + struct list_head s_discard_list; + struct work_struct s_discard_work; struct rb_root s_mb_avg_fragment_size_root; rwlock_t s_mb_rb_lock; struct list_head *s_mb_largest_free_orders; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 34be2f07449d..907b3577988c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -408,6 +408,10 @@ static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac); static bool ext4_mb_good_group(struct ext4_allocation_context *ac, ext4_group_t group, int cr); +static int ext4_try_to_trim_range(struct super_block *sb, + struct ext4_buddy *e4b, ext4_grpblk_t start, + ext4_grpblk_t max, ext4_grpblk_t minblocks); + /* * The algorithm using this percpu seq counter goes below: * 1. We sample the percpu discard_pa_seq counter before trying for block @@ -3308,6 +3312,55 @@ static int ext4_groupinfo_create_slab(size_t size) return 0; } +static void ext4_discard_work(struct work_struct *work) +{ + struct ext4_sb_info *sbi = container_of(work, + struct ext4_sb_info, s_discard_work); + struct super_block *sb = sbi->s_sb; + struct ext4_free_data *fd, *nfd; + struct ext4_buddy e4b; + struct list_head discard_list; + ext4_group_t grp, load_grp; + int err = 0; + + INIT_LIST_HEAD(&discard_list); + spin_lock(&sbi->s_md_lock); + list_splice_init(&sbi->s_discard_list, &discard_list); + spin_unlock(&sbi->s_md_lock); + + load_grp = UINT_MAX; + list_for_each_entry_safe(fd, nfd, &discard_list, efd_list) { + /* + * If filesystem is umounting or no memory, give up the discard + */ + if ((sb->s_flags & SB_ACTIVE) && !err) { + grp = fd->efd_group; + if (grp != load_grp) { + if (load_grp != UINT_MAX) + ext4_mb_unload_buddy(&e4b); + + err = ext4_mb_load_buddy(sb, grp, &e4b); + if (err) { + kmem_cache_free(ext4_free_data_cachep, fd); + load_grp = UINT_MAX; + continue; + } else { + load_grp = grp; + } + } + + ext4_lock_group(sb, grp); + ext4_try_to_trim_range(sb, &e4b, fd->efd_start_cluster, + fd->efd_start_cluster + fd->efd_count - 1, 1); + ext4_unlock_group(sb, grp); + } + kmem_cache_free(ext4_free_data_cachep, fd); + } + + if (load_grp != UINT_MAX) + ext4_mb_unload_buddy(&e4b); +} + int ext4_mb_init(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -3376,6 +3429,8 @@ int ext4_mb_init(struct super_block *sb) spin_lock_init(&sbi->s_md_lock); sbi->s_mb_free_pending = 0; INIT_LIST_HEAD(&sbi->s_freed_data_list); + INIT_LIST_HEAD(&sbi->s_discard_list); + INIT_WORK(&sbi->s_discard_work, ext4_discard_work); sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN; @@ -3474,6 +3529,14 @@ int ext4_mb_release(struct super_block *sb) struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); int count; + if (test_opt(sb, DISCARD)) { + /* + * wait the discard work to drain all of ext4_free_data + */ + flush_work(&sbi->s_discard_work); + WARN_ON_ONCE(!list_empty(&sbi->s_discard_list)); + } + if (sbi->s_group_info) { for (i = 0; i < ngroups; i++) { cond_resched(); @@ -3596,7 +3659,6 @@ static void ext4_free_data_in_buddy(struct super_block *sb, put_page(e4b.bd_bitmap_page); } ext4_unlock_group(sb, entry->efd_group); - kmem_cache_free(ext4_free_data_cachep, entry); ext4_mb_unload_buddy(&e4b); mb_debug(sb, "freed %d blocks in %d structures\n", count, @@ -3611,10 +3673,9 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_free_data *entry, *tmp; - struct bio *discard_bio = NULL; struct list_head freed_data_list; struct list_head *cut_pos = NULL; - int err; + bool wake; INIT_LIST_HEAD(&freed_data_list); @@ -3629,30 +3690,20 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid) cut_pos); spin_unlock(&sbi->s_md_lock); - if (test_opt(sb, DISCARD)) { - list_for_each_entry(entry, &freed_data_list, efd_list) { - err = ext4_issue_discard(sb, entry->efd_group, - entry->efd_start_cluster, - entry->efd_count, - &discard_bio); - if (err && err != -EOPNOTSUPP) { - ext4_msg(sb, KERN_WARNING, "discard request in" - " group:%d block:%d count:%d failed" - " with %d", entry->efd_group, - entry->efd_start_cluster, - entry->efd_count, err); - } else if (err == -EOPNOTSUPP) - break; - } - - if (discard_bio) { - submit_bio_wait(discard_bio); - bio_put(discard_bio); - } - } - - list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) + list_for_each_entry(entry, &freed_data_list, efd_list) ext4_free_data_in_buddy(sb, entry); + + if (test_opt(sb, DISCARD)) { + spin_lock(&sbi->s_md_lock); + wake = list_empty(&sbi->s_discard_list); + list_splice_tail(&freed_data_list, &sbi->s_discard_list); + spin_unlock(&sbi->s_md_lock); + if (wake) + queue_work(system_unbound_wq, &sbi->s_discard_work); + } else { + list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list) + kmem_cache_free(ext4_free_data_cachep, entry); + } } int __init ext4_init_mballoc(void)