forked from Minki/linux
\n
-----BEGIN PGP SIGNATURE----- iQEzBAABCAAdFiEEq1nRK9aeMoq1VSgcnJ2qBz9kQNkFAl9JG9wACgkQnJ2qBz9k QNlp3ggA3B/Xopb2X3cCpf2fFw63YGJU4i0XJxi+3fC/v6m8U+D4XbqJUjaM5TZz +4XABQf7OHvSwDezc3n6KXXD/zbkZCeVm9aohEXvfMYLyKbs+S7QNQALHEtpfBUU 3IY2pQ90K7JT9cD9pJls/Y/EaA1ObWP7+3F1zpw8OutGchKcE8SvVjzL3SSJaj7k d8OTtMosAFuTe4saFWfsf9CmZzbx4sZw3VAzXEXAArrxsmqFKIcY8dI8TQ0WaYNh C3wQFvW+n9wHapylyi7RhGl2QH9Tj8POfnCTahNFFJbsmJBx0Z3r42mCBAk4janG FW+uDdH5V780bTNNVUKz0v4C/YDiKg== =jQnW -----END PGP SIGNATURE----- Merge tag 'writeback_for_v5.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs Pull writeback fixes from Jan Kara: "Fixes for writeback code occasionally skipping writeback of some inodes or livelocking sync(2)" * tag 'writeback_for_v5.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: writeback: Drop I_DIRTY_TIME_EXPIRE writeback: Fix sync livelock due to b_dirty_time processing writeback: Avoid skipping inode writeback writeback: Protect inode->i_io_list with inode->i_lock
This commit is contained in:
commit
e309428590
@ -4901,7 +4901,7 @@ static void __ext4_update_other_inode_time(struct super_block *sb,
|
||||
(inode->i_state & I_DIRTY_TIME)) {
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
|
||||
inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
|
||||
inode->i_state &= ~I_DIRTY_TIME;
|
||||
spin_unlock(&inode->i_lock);
|
||||
|
||||
spin_lock(&ei->i_raw_lock);
|
||||
|
@ -42,7 +42,6 @@
|
||||
struct wb_writeback_work {
|
||||
long nr_pages;
|
||||
struct super_block *sb;
|
||||
unsigned long *older_than_this;
|
||||
enum writeback_sync_modes sync_mode;
|
||||
unsigned int tagged_writepages:1;
|
||||
unsigned int for_kupdate:1;
|
||||
@ -144,7 +143,9 @@ static void inode_io_list_del_locked(struct inode *inode,
|
||||
struct bdi_writeback *wb)
|
||||
{
|
||||
assert_spin_locked(&wb->list_lock);
|
||||
assert_spin_locked(&inode->i_lock);
|
||||
|
||||
inode->i_state &= ~I_SYNC_QUEUED;
|
||||
list_del_init(&inode->i_io_list);
|
||||
wb_io_lists_depopulated(wb);
|
||||
}
|
||||
@ -1122,7 +1123,9 @@ void inode_io_list_del(struct inode *inode)
|
||||
struct bdi_writeback *wb;
|
||||
|
||||
wb = inode_to_wb_and_lock_list(inode);
|
||||
spin_lock(&inode->i_lock);
|
||||
inode_io_list_del_locked(inode, wb);
|
||||
spin_unlock(&inode->i_lock);
|
||||
spin_unlock(&wb->list_lock);
|
||||
}
|
||||
EXPORT_SYMBOL(inode_io_list_del);
|
||||
@ -1172,8 +1175,10 @@ void sb_clear_inode_writeback(struct inode *inode)
|
||||
* the case then the inode must have been redirtied while it was being written
|
||||
* out and we don't reset its dirtied_when.
|
||||
*/
|
||||
static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
|
||||
static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
|
||||
{
|
||||
assert_spin_locked(&inode->i_lock);
|
||||
|
||||
if (!list_empty(&wb->b_dirty)) {
|
||||
struct inode *tail;
|
||||
|
||||
@ -1182,6 +1187,14 @@ static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
|
||||
inode->dirtied_when = jiffies;
|
||||
}
|
||||
inode_io_list_move_locked(inode, wb, &wb->b_dirty);
|
||||
inode->i_state &= ~I_SYNC_QUEUED;
|
||||
}
|
||||
|
||||
static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
|
||||
{
|
||||
spin_lock(&inode->i_lock);
|
||||
redirty_tail_locked(inode, wb);
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1220,16 +1233,13 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
|
||||
#define EXPIRE_DIRTY_ATIME 0x0001
|
||||
|
||||
/*
|
||||
* Move expired (dirtied before work->older_than_this) dirty inodes from
|
||||
* Move expired (dirtied before dirtied_before) dirty inodes from
|
||||
* @delaying_queue to @dispatch_queue.
|
||||
*/
|
||||
static int move_expired_inodes(struct list_head *delaying_queue,
|
||||
struct list_head *dispatch_queue,
|
||||
int flags,
|
||||
struct wb_writeback_work *work)
|
||||
unsigned long dirtied_before)
|
||||
{
|
||||
unsigned long *older_than_this = NULL;
|
||||
unsigned long expire_time;
|
||||
LIST_HEAD(tmp);
|
||||
struct list_head *pos, *node;
|
||||
struct super_block *sb = NULL;
|
||||
@ -1237,21 +1247,15 @@ static int move_expired_inodes(struct list_head *delaying_queue,
|
||||
int do_sb_sort = 0;
|
||||
int moved = 0;
|
||||
|
||||
if ((flags & EXPIRE_DIRTY_ATIME) == 0)
|
||||
older_than_this = work->older_than_this;
|
||||
else if (!work->for_sync) {
|
||||
expire_time = jiffies - (dirtytime_expire_interval * HZ);
|
||||
older_than_this = &expire_time;
|
||||
}
|
||||
while (!list_empty(delaying_queue)) {
|
||||
inode = wb_inode(delaying_queue->prev);
|
||||
if (older_than_this &&
|
||||
inode_dirtied_after(inode, *older_than_this))
|
||||
if (inode_dirtied_after(inode, dirtied_before))
|
||||
break;
|
||||
list_move(&inode->i_io_list, &tmp);
|
||||
moved++;
|
||||
if (flags & EXPIRE_DIRTY_ATIME)
|
||||
set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
|
||||
spin_lock(&inode->i_lock);
|
||||
inode->i_state |= I_SYNC_QUEUED;
|
||||
spin_unlock(&inode->i_lock);
|
||||
if (sb_is_blkdev_sb(inode->i_sb))
|
||||
continue;
|
||||
if (sb && sb != inode->i_sb)
|
||||
@ -1289,18 +1293,22 @@ out:
|
||||
* |
|
||||
* +--> dequeue for IO
|
||||
*/
|
||||
static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
|
||||
static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
|
||||
unsigned long dirtied_before)
|
||||
{
|
||||
int moved;
|
||||
unsigned long time_expire_jif = dirtied_before;
|
||||
|
||||
assert_spin_locked(&wb->list_lock);
|
||||
list_splice_init(&wb->b_more_io, &wb->b_io);
|
||||
moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
|
||||
moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before);
|
||||
if (!work->for_sync)
|
||||
time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
|
||||
moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
|
||||
EXPIRE_DIRTY_ATIME, work);
|
||||
time_expire_jif);
|
||||
if (moved)
|
||||
wb_io_lists_populated(wb);
|
||||
trace_writeback_queue_io(wb, work, moved);
|
||||
trace_writeback_queue_io(wb, work, dirtied_before, moved);
|
||||
}
|
||||
|
||||
static int write_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
@ -1394,7 +1402,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
|
||||
* writeback is not making progress due to locked
|
||||
* buffers. Skip this inode for now.
|
||||
*/
|
||||
redirty_tail(inode, wb);
|
||||
redirty_tail_locked(inode, wb);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1414,7 +1422,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
|
||||
* retrying writeback of the dirty page/inode
|
||||
* that cannot be performed immediately.
|
||||
*/
|
||||
redirty_tail(inode, wb);
|
||||
redirty_tail_locked(inode, wb);
|
||||
}
|
||||
} else if (inode->i_state & I_DIRTY) {
|
||||
/*
|
||||
@ -1422,10 +1430,11 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
|
||||
* such as delayed allocation during submission or metadata
|
||||
* updates after data IO completion.
|
||||
*/
|
||||
redirty_tail(inode, wb);
|
||||
redirty_tail_locked(inode, wb);
|
||||
} else if (inode->i_state & I_DIRTY_TIME) {
|
||||
inode->dirtied_when = jiffies;
|
||||
inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
|
||||
inode->i_state &= ~I_SYNC_QUEUED;
|
||||
} else {
|
||||
/* The inode is clean. Remove from writeback lists. */
|
||||
inode_io_list_del_locked(inode, wb);
|
||||
@ -1472,18 +1481,14 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
|
||||
spin_lock(&inode->i_lock);
|
||||
|
||||
dirty = inode->i_state & I_DIRTY;
|
||||
if (inode->i_state & I_DIRTY_TIME) {
|
||||
if ((dirty & I_DIRTY_INODE) ||
|
||||
wbc->sync_mode == WB_SYNC_ALL ||
|
||||
unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
|
||||
unlikely(time_after(jiffies,
|
||||
(inode->dirtied_time_when +
|
||||
dirtytime_expire_interval * HZ)))) {
|
||||
dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
|
||||
trace_writeback_lazytime(inode);
|
||||
}
|
||||
} else
|
||||
inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
|
||||
if ((inode->i_state & I_DIRTY_TIME) &&
|
||||
((dirty & I_DIRTY_INODE) ||
|
||||
wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
|
||||
time_after(jiffies, inode->dirtied_time_when +
|
||||
dirtytime_expire_interval * HZ))) {
|
||||
dirty |= I_DIRTY_TIME;
|
||||
trace_writeback_lazytime(inode);
|
||||
}
|
||||
inode->i_state &= ~dirty;
|
||||
|
||||
/*
|
||||
@ -1669,8 +1674,8 @@ static long writeback_sb_inodes(struct super_block *sb,
|
||||
*/
|
||||
spin_lock(&inode->i_lock);
|
||||
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
|
||||
redirty_tail_locked(inode, wb);
|
||||
spin_unlock(&inode->i_lock);
|
||||
redirty_tail(inode, wb);
|
||||
continue;
|
||||
}
|
||||
if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
|
||||
@ -1811,7 +1816,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
|
||||
blk_start_plug(&plug);
|
||||
spin_lock(&wb->list_lock);
|
||||
if (list_empty(&wb->b_io))
|
||||
queue_io(wb, &work);
|
||||
queue_io(wb, &work, jiffies);
|
||||
__writeback_inodes_wb(wb, &work);
|
||||
spin_unlock(&wb->list_lock);
|
||||
blk_finish_plug(&plug);
|
||||
@ -1831,7 +1836,7 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
|
||||
* takes longer than a dirty_writeback_interval interval, then leave a
|
||||
* one-second gap.
|
||||
*
|
||||
* older_than_this takes precedence over nr_to_write. So we'll only write back
|
||||
* dirtied_before takes precedence over nr_to_write. So we'll only write back
|
||||
* all dirty pages if they are all attached to "old" mappings.
|
||||
*/
|
||||
static long wb_writeback(struct bdi_writeback *wb,
|
||||
@ -1839,14 +1844,11 @@ static long wb_writeback(struct bdi_writeback *wb,
|
||||
{
|
||||
unsigned long wb_start = jiffies;
|
||||
long nr_pages = work->nr_pages;
|
||||
unsigned long oldest_jif;
|
||||
unsigned long dirtied_before = jiffies;
|
||||
struct inode *inode;
|
||||
long progress;
|
||||
struct blk_plug plug;
|
||||
|
||||
oldest_jif = jiffies;
|
||||
work->older_than_this = &oldest_jif;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
spin_lock(&wb->list_lock);
|
||||
for (;;) {
|
||||
@ -1880,14 +1882,14 @@ static long wb_writeback(struct bdi_writeback *wb,
|
||||
* safe.
|
||||
*/
|
||||
if (work->for_kupdate) {
|
||||
oldest_jif = jiffies -
|
||||
dirtied_before = jiffies -
|
||||
msecs_to_jiffies(dirty_expire_interval * 10);
|
||||
} else if (work->for_background)
|
||||
oldest_jif = jiffies;
|
||||
dirtied_before = jiffies;
|
||||
|
||||
trace_writeback_start(wb, work);
|
||||
if (list_empty(&wb->b_io))
|
||||
queue_io(wb, work);
|
||||
queue_io(wb, work, dirtied_before);
|
||||
if (work->sb)
|
||||
progress = writeback_sb_inodes(work->sb, wb, work);
|
||||
else
|
||||
@ -2289,11 +2291,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
|
||||
inode->i_state |= flags;
|
||||
|
||||
/*
|
||||
* If the inode is being synced, just update its dirty state.
|
||||
* The unlocker will place the inode on the appropriate
|
||||
* superblock list, based upon its state.
|
||||
* If the inode is queued for writeback by flush worker, just
|
||||
* update its dirty state. Once the flush worker is done with
|
||||
* the inode it will place it on the appropriate superblock
|
||||
* list, based upon its state.
|
||||
*/
|
||||
if (inode->i_state & I_SYNC)
|
||||
if (inode->i_state & I_SYNC_QUEUED)
|
||||
goto out_unlock_inode;
|
||||
|
||||
/*
|
||||
|
@ -110,9 +110,9 @@ xfs_trans_log_inode(
|
||||
* to log the timestamps, or will clear already cleared fields in the
|
||||
* worst case.
|
||||
*/
|
||||
if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
|
||||
if (inode->i_state & I_DIRTY_TIME) {
|
||||
spin_lock(&inode->i_lock);
|
||||
inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
|
||||
inode->i_state &= ~I_DIRTY_TIME;
|
||||
spin_unlock(&inode->i_lock);
|
||||
}
|
||||
|
||||
|
@ -2132,6 +2132,10 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
|
||||
*
|
||||
* I_DONTCACHE Evict inode as soon as it is not used anymore.
|
||||
*
|
||||
* I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists.
|
||||
* Used to detect that mark_inode_dirty() should not move
|
||||
* inode between dirty lists.
|
||||
*
|
||||
* Q: What is the difference between I_WILL_FREE and I_FREEING?
|
||||
*/
|
||||
#define I_DIRTY_SYNC (1 << 0)
|
||||
@ -2149,12 +2153,11 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
|
||||
#define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP)
|
||||
#define I_LINKABLE (1 << 10)
|
||||
#define I_DIRTY_TIME (1 << 11)
|
||||
#define __I_DIRTY_TIME_EXPIRED 12
|
||||
#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED)
|
||||
#define I_WB_SWITCH (1 << 13)
|
||||
#define I_OVL_INUSE (1 << 14)
|
||||
#define I_CREATING (1 << 15)
|
||||
#define I_DONTCACHE (1 << 16)
|
||||
#define I_SYNC_QUEUED (1 << 17)
|
||||
|
||||
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
|
||||
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
|
||||
|
@ -20,7 +20,6 @@
|
||||
{I_CLEAR, "I_CLEAR"}, \
|
||||
{I_SYNC, "I_SYNC"}, \
|
||||
{I_DIRTY_TIME, "I_DIRTY_TIME"}, \
|
||||
{I_DIRTY_TIME_EXPIRED, "I_DIRTY_TIME_EXPIRED"}, \
|
||||
{I_REFERENCED, "I_REFERENCED"} \
|
||||
)
|
||||
|
||||
@ -498,8 +497,9 @@ DEFINE_WBC_EVENT(wbc_writepage);
|
||||
TRACE_EVENT(writeback_queue_io,
|
||||
TP_PROTO(struct bdi_writeback *wb,
|
||||
struct wb_writeback_work *work,
|
||||
unsigned long dirtied_before,
|
||||
int moved),
|
||||
TP_ARGS(wb, work, moved),
|
||||
TP_ARGS(wb, work, dirtied_before, moved),
|
||||
TP_STRUCT__entry(
|
||||
__array(char, name, 32)
|
||||
__field(unsigned long, older)
|
||||
@ -509,19 +509,17 @@ TRACE_EVENT(writeback_queue_io,
|
||||
__field(ino_t, cgroup_ino)
|
||||
),
|
||||
TP_fast_assign(
|
||||
unsigned long *older_than_this = work->older_than_this;
|
||||
strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
|
||||
__entry->older = older_than_this ? *older_than_this : 0;
|
||||
__entry->age = older_than_this ?
|
||||
(jiffies - *older_than_this) * 1000 / HZ : -1;
|
||||
__entry->older = dirtied_before;
|
||||
__entry->age = (jiffies - dirtied_before) * 1000 / HZ;
|
||||
__entry->moved = moved;
|
||||
__entry->reason = work->reason;
|
||||
__entry->cgroup_ino = __trace_wb_assign_cgroup(wb);
|
||||
),
|
||||
TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup_ino=%lu",
|
||||
__entry->name,
|
||||
__entry->older, /* older_than_this in jiffies */
|
||||
__entry->age, /* older_than_this in relative milliseconds */
|
||||
__entry->older, /* dirtied_before in jiffies */
|
||||
__entry->age, /* dirtied_before in relative milliseconds */
|
||||
__entry->moved,
|
||||
__print_symbolic(__entry->reason, WB_WORK_REASON),
|
||||
(unsigned long)__entry->cgroup_ino
|
||||
|
Loading…
Reference in New Issue
Block a user