writeback: flush inode cgroup wb switches instead of pinning super_block
If cgroup writeback is in use, inodes can be scheduled for asynchronous wb switching. Before5ff8eaac16
("writeback: keep superblock pinned during cgroup writeback association switches"), this could race with umount leading to super_block being destroyed while inodes are pinned for wb switching.5ff8eaac16
fixed it by bumping s_active while wb switches are in flight; however, this allowed in-flight wb switches to make umounts asynchronous when the userland expected synchronosity - e.g. fsck immediately following umount may fail because the device is still busy. This patch removes the problematic super_block pinning and instead makes generic_shutdown_super() flush in-flight wb switches. wb switches are now executed on a dedicated isw_wq so that they can be flushed and isw_nr_in_flight keeps track of the number of in-flight wb switches so that flushing can be avoided in most cases. v2: Move cgroup_writeback_umount() further below and add MS_ACTIVE check in inode_switch_wbs() as Jan an Al suggested. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Tahsin Erdogan <tahsin@google.com> Cc: Jan Kara <jack@suse.cz> Cc: Al Viro <viro@ZenIV.linux.org.uk> Link: http://lkml.kernel.org/g/CAAeU0aNCq7LGODvVGRU-oU_o-6enii5ey0p1c26D1ZzYwkDc5A@mail.gmail.com Fixes:5ff8eaac16
("writeback: keep superblock pinned during cgroup writeback association switches") Cc: stable@vger.kernel.org #v4.5 Reviewed-by: Jan Kara <jack@suse.cz> Tested-by: Tahsin Erdogan <tahsin@google.com> Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
e9fc63d682
commit
a1a0e23e49
@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi,
|
||||
#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
|
||||
/* one round can affect upto 5 slots */
|
||||
|
||||
static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
|
||||
static struct workqueue_struct *isw_wq;
|
||||
|
||||
void __inode_attach_wb(struct inode *inode, struct page *page)
|
||||
{
|
||||
struct backing_dev_info *bdi = inode_to_bdi(inode);
|
||||
@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
|
||||
struct inode_switch_wbs_context *isw =
|
||||
container_of(work, struct inode_switch_wbs_context, work);
|
||||
struct inode *inode = isw->inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct bdi_writeback *old_wb = inode->i_wb;
|
||||
struct bdi_writeback *new_wb = isw->new_wb;
|
||||
@ -424,8 +426,9 @@ skip_switch:
|
||||
wb_put(new_wb);
|
||||
|
||||
iput(inode);
|
||||
deactivate_super(sb);
|
||||
kfree(isw);
|
||||
|
||||
atomic_dec(&isw_nr_in_flight);
|
||||
}
|
||||
|
||||
static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
|
||||
@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
|
||||
|
||||
/* needs to grab bh-unsafe locks, bounce to work item */
|
||||
INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
|
||||
schedule_work(&isw->work);
|
||||
queue_work(isw_wq, &isw->work);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
|
||||
|
||||
/* while holding I_WB_SWITCH, no one else can update the association */
|
||||
spin_lock(&inode->i_lock);
|
||||
|
||||
if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
|
||||
inode_to_wb(inode) == isw->new_wb)
|
||||
goto out_unlock;
|
||||
|
||||
if (!atomic_inc_not_zero(&inode->i_sb->s_active))
|
||||
goto out_unlock;
|
||||
|
||||
if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
|
||||
inode->i_state & (I_WB_SWITCH | I_FREEING) ||
|
||||
inode_to_wb(inode) == isw->new_wb) {
|
||||
spin_unlock(&inode->i_lock);
|
||||
goto out_free;
|
||||
}
|
||||
inode->i_state |= I_WB_SWITCH;
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
ihold(inode);
|
||||
isw->inode = inode;
|
||||
|
||||
atomic_inc(&isw_nr_in_flight);
|
||||
|
||||
/*
|
||||
* In addition to synchronizing among switchers, I_WB_SWITCH tells
|
||||
* the RCU protected stat update paths to grab the mapping's
|
||||
@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
|
||||
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
|
||||
return;
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&inode->i_lock);
|
||||
out_free:
|
||||
if (isw->new_wb)
|
||||
wb_put(isw->new_wb);
|
||||
@ -847,6 +848,33 @@ restart:
|
||||
wb_put(last_wb);
|
||||
}
|
||||
|
||||
/**
|
||||
* cgroup_writeback_umount - flush inode wb switches for umount
|
||||
*
|
||||
* This function is called when a super_block is about to be destroyed and
|
||||
* flushes in-flight inode wb switches. An inode wb switch goes through
|
||||
* RCU and then workqueue, so the two need to be flushed in order to ensure
|
||||
* that all previously scheduled switches are finished. As wb switches are
|
||||
* rare occurrences and synchronize_rcu() can take a while, perform
|
||||
* flushing iff wb switches are in flight.
|
||||
*/
|
||||
void cgroup_writeback_umount(void)
|
||||
{
|
||||
if (atomic_read(&isw_nr_in_flight)) {
|
||||
synchronize_rcu();
|
||||
flush_workqueue(isw_wq);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init cgroup_writeback_init(void)
|
||||
{
|
||||
isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
|
||||
if (!isw_wq)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
fs_initcall(cgroup_writeback_init);
|
||||
|
||||
#else /* CONFIG_CGROUP_WRITEBACK */
|
||||
|
||||
static struct bdi_writeback *
|
||||
|
@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb)
|
||||
sb->s_flags &= ~MS_ACTIVE;
|
||||
|
||||
fsnotify_unmount_inodes(sb);
|
||||
cgroup_writeback_umount();
|
||||
|
||||
evict_inodes(sb);
|
||||
|
||||
|
@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
|
||||
void wbc_detach_inode(struct writeback_control *wbc);
|
||||
void wbc_account_io(struct writeback_control *wbc, struct page *page,
|
||||
size_t bytes);
|
||||
void cgroup_writeback_umount(void);
|
||||
|
||||
/**
|
||||
* inode_attach_wb - associate an inode with its wb
|
||||
@ -301,6 +302,10 @@ static inline void wbc_account_io(struct writeback_control *wbc,
|
||||
{
|
||||
}
|
||||
|
||||
static inline void cgroup_writeback_umount(void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_CGROUP_WRITEBACK */
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user