mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 04:31:50 +00:00
Merge branch 'writeback-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux
* 'writeback-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/wfg/linux: writeback: move MIN_WRITEBACK_PAGES to fs-writeback.c writeback: balanced_rate cannot exceed write bandwidth writeback: do strict bdi dirty_exceeded writeback: avoid tiny dirty poll intervals writeback: max, min and target dirty pause time writeback: dirty ratelimit - think time compensation btrfs: fix dirtied pages accounting on sub-page writes writeback: fix dirtied pages accounting on redirty writeback: fix dirtied pages accounting on sub-page writes writeback: charge leaked page dirties to active tasks writeback: Include all dirty inodes in background writeback
This commit is contained in:
commit
001a541ea9
@ -1136,7 +1136,8 @@ again:
|
|||||||
GFP_NOFS);
|
GFP_NOFS);
|
||||||
}
|
}
|
||||||
for (i = 0; i < num_pages; i++) {
|
for (i = 0; i < num_pages; i++) {
|
||||||
clear_page_dirty_for_io(pages[i]);
|
if (clear_page_dirty_for_io(pages[i]))
|
||||||
|
account_page_redirty(pages[i]);
|
||||||
set_page_extent_mapped(pages[i]);
|
set_page_extent_mapped(pages[i]);
|
||||||
WARN_ON(!PageLocked(pages[i]));
|
WARN_ON(!PageLocked(pages[i]));
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
|
#include <linux/pagemap.h>
|
||||||
#include <linux/kthread.h>
|
#include <linux/kthread.h>
|
||||||
#include <linux/freezer.h>
|
#include <linux/freezer.h>
|
||||||
#include <linux/writeback.h>
|
#include <linux/writeback.h>
|
||||||
@ -28,6 +29,11 @@
|
|||||||
#include <linux/tracepoint.h>
|
#include <linux/tracepoint.h>
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 4MB minimal write chunk size
|
||||||
|
*/
|
||||||
|
#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_CACHE_SHIFT - 10))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Passed into wb_writeback(), essentially a subset of writeback_control
|
* Passed into wb_writeback(), essentially a subset of writeback_control
|
||||||
*/
|
*/
|
||||||
@ -742,11 +748,17 @@ static long wb_writeback(struct bdi_writeback *wb,
|
|||||||
if (work->for_background && !over_bground_thresh(wb->bdi))
|
if (work->for_background && !over_bground_thresh(wb->bdi))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Kupdate and background works are special and we want to
|
||||||
|
* include all inodes that need writing. Livelock avoidance is
|
||||||
|
* handled by these works yielding to any other work so we are
|
||||||
|
* safe.
|
||||||
|
*/
|
||||||
if (work->for_kupdate) {
|
if (work->for_kupdate) {
|
||||||
oldest_jif = jiffies -
|
oldest_jif = jiffies -
|
||||||
msecs_to_jiffies(dirty_expire_interval * 10);
|
msecs_to_jiffies(dirty_expire_interval * 10);
|
||||||
work->older_than_this = &oldest_jif;
|
} else if (work->for_background)
|
||||||
}
|
oldest_jif = jiffies;
|
||||||
|
|
||||||
trace_writeback_start(wb->bdi, work);
|
trace_writeback_start(wb->bdi, work);
|
||||||
if (list_empty(&wb->b_io))
|
if (list_empty(&wb->b_io))
|
||||||
|
@ -1544,6 +1544,7 @@ struct task_struct {
|
|||||||
*/
|
*/
|
||||||
int nr_dirtied;
|
int nr_dirtied;
|
||||||
int nr_dirtied_pause;
|
int nr_dirtied_pause;
|
||||||
|
unsigned long dirty_paused_when; /* start of a write-and-pause period */
|
||||||
|
|
||||||
#ifdef CONFIG_LATENCYTOP
|
#ifdef CONFIG_LATENCYTOP
|
||||||
int latency_record_count;
|
int latency_record_count;
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
|
|
||||||
|
DECLARE_PER_CPU(int, dirty_throttle_leaks);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The 1/4 region under the global dirty thresh is for smooth dirty throttling:
|
* The 1/4 region under the global dirty thresh is for smooth dirty throttling:
|
||||||
*
|
*
|
||||||
@ -23,11 +25,6 @@
|
|||||||
#define DIRTY_SCOPE 8
|
#define DIRTY_SCOPE 8
|
||||||
#define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2)
|
#define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2)
|
||||||
|
|
||||||
/*
|
|
||||||
* 4MB minimal write chunk size
|
|
||||||
*/
|
|
||||||
#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_CACHE_SHIFT - 10))
|
|
||||||
|
|
||||||
struct backing_dev_info;
|
struct backing_dev_info;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -194,6 +191,8 @@ void writeback_set_ratelimit(void);
|
|||||||
void tag_pages_for_writeback(struct address_space *mapping,
|
void tag_pages_for_writeback(struct address_space *mapping,
|
||||||
pgoff_t start, pgoff_t end);
|
pgoff_t start, pgoff_t end);
|
||||||
|
|
||||||
|
void account_page_redirty(struct page *page);
|
||||||
|
|
||||||
/* pdflush.c */
|
/* pdflush.c */
|
||||||
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
|
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
|
||||||
read-only. */
|
read-only. */
|
||||||
|
@ -300,12 +300,13 @@ TRACE_EVENT(balance_dirty_pages,
|
|||||||
unsigned long dirty_ratelimit,
|
unsigned long dirty_ratelimit,
|
||||||
unsigned long task_ratelimit,
|
unsigned long task_ratelimit,
|
||||||
unsigned long dirtied,
|
unsigned long dirtied,
|
||||||
|
unsigned long period,
|
||||||
long pause,
|
long pause,
|
||||||
unsigned long start_time),
|
unsigned long start_time),
|
||||||
|
|
||||||
TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
|
TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
|
||||||
dirty_ratelimit, task_ratelimit,
|
dirty_ratelimit, task_ratelimit,
|
||||||
dirtied, pause, start_time),
|
dirtied, period, pause, start_time),
|
||||||
|
|
||||||
TP_STRUCT__entry(
|
TP_STRUCT__entry(
|
||||||
__array( char, bdi, 32)
|
__array( char, bdi, 32)
|
||||||
@ -320,6 +321,8 @@ TRACE_EVENT(balance_dirty_pages,
|
|||||||
__field(unsigned int, dirtied_pause)
|
__field(unsigned int, dirtied_pause)
|
||||||
__field(unsigned long, paused)
|
__field(unsigned long, paused)
|
||||||
__field( long, pause)
|
__field( long, pause)
|
||||||
|
__field(unsigned long, period)
|
||||||
|
__field( long, think)
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
@ -336,6 +339,9 @@ TRACE_EVENT(balance_dirty_pages,
|
|||||||
__entry->task_ratelimit = KBps(task_ratelimit);
|
__entry->task_ratelimit = KBps(task_ratelimit);
|
||||||
__entry->dirtied = dirtied;
|
__entry->dirtied = dirtied;
|
||||||
__entry->dirtied_pause = current->nr_dirtied_pause;
|
__entry->dirtied_pause = current->nr_dirtied_pause;
|
||||||
|
__entry->think = current->dirty_paused_when == 0 ? 0 :
|
||||||
|
(long)(jiffies - current->dirty_paused_when) * 1000/HZ;
|
||||||
|
__entry->period = period * 1000 / HZ;
|
||||||
__entry->pause = pause * 1000 / HZ;
|
__entry->pause = pause * 1000 / HZ;
|
||||||
__entry->paused = (jiffies - start_time) * 1000 / HZ;
|
__entry->paused = (jiffies - start_time) * 1000 / HZ;
|
||||||
),
|
),
|
||||||
@ -346,7 +352,7 @@ TRACE_EVENT(balance_dirty_pages,
|
|||||||
"bdi_setpoint=%lu bdi_dirty=%lu "
|
"bdi_setpoint=%lu bdi_dirty=%lu "
|
||||||
"dirty_ratelimit=%lu task_ratelimit=%lu "
|
"dirty_ratelimit=%lu task_ratelimit=%lu "
|
||||||
"dirtied=%u dirtied_pause=%u "
|
"dirtied=%u dirtied_pause=%u "
|
||||||
"paused=%lu pause=%ld",
|
"paused=%lu pause=%ld period=%lu think=%ld",
|
||||||
__entry->bdi,
|
__entry->bdi,
|
||||||
__entry->limit,
|
__entry->limit,
|
||||||
__entry->setpoint,
|
__entry->setpoint,
|
||||||
@ -358,7 +364,9 @@ TRACE_EVENT(balance_dirty_pages,
|
|||||||
__entry->dirtied,
|
__entry->dirtied,
|
||||||
__entry->dirtied_pause,
|
__entry->dirtied_pause,
|
||||||
__entry->paused, /* ms */
|
__entry->paused, /* ms */
|
||||||
__entry->pause /* ms */
|
__entry->pause, /* ms */
|
||||||
|
__entry->period, /* ms */
|
||||||
|
__entry->think /* ms */
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -51,6 +51,7 @@
|
|||||||
#include <trace/events/sched.h>
|
#include <trace/events/sched.h>
|
||||||
#include <linux/hw_breakpoint.h>
|
#include <linux/hw_breakpoint.h>
|
||||||
#include <linux/oom.h>
|
#include <linux/oom.h>
|
||||||
|
#include <linux/writeback.h>
|
||||||
|
|
||||||
#include <asm/uaccess.h>
|
#include <asm/uaccess.h>
|
||||||
#include <asm/unistd.h>
|
#include <asm/unistd.h>
|
||||||
@ -1035,6 +1036,8 @@ NORET_TYPE void do_exit(long code)
|
|||||||
validate_creds_for_do_exit(tsk);
|
validate_creds_for_do_exit(tsk);
|
||||||
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
|
if (tsk->nr_dirtied)
|
||||||
|
__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
|
||||||
exit_rcu();
|
exit_rcu();
|
||||||
/* causes final put_task_struct in finish_task_switch(). */
|
/* causes final put_task_struct in finish_task_switch(). */
|
||||||
tsk->state = TASK_DEAD;
|
tsk->state = TASK_DEAD;
|
||||||
|
@ -1294,6 +1294,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
|||||||
|
|
||||||
p->nr_dirtied = 0;
|
p->nr_dirtied = 0;
|
||||||
p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
|
p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
|
||||||
|
p->dirty_paused_when = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ok, make it visible to the rest of the system.
|
* Ok, make it visible to the rest of the system.
|
||||||
|
@ -41,6 +41,12 @@
|
|||||||
*/
|
*/
|
||||||
#define MAX_PAUSE max(HZ/5, 1)
|
#define MAX_PAUSE max(HZ/5, 1)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to keep balance_dirty_pages() call intervals higher than this many pages
|
||||||
|
* by raising pause time to max_pause when falls below it.
|
||||||
|
*/
|
||||||
|
#define DIRTY_POLL_THRESH (128 >> (PAGE_SHIFT - 10))
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Estimate write bandwidth at 200ms intervals.
|
* Estimate write bandwidth at 200ms intervals.
|
||||||
*/
|
*/
|
||||||
@ -898,6 +904,11 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
|
|||||||
*/
|
*/
|
||||||
balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
|
balanced_dirty_ratelimit = div_u64((u64)task_ratelimit * write_bw,
|
||||||
dirty_rate | 1);
|
dirty_rate | 1);
|
||||||
|
/*
|
||||||
|
* balanced_dirty_ratelimit ~= (write_bw / N) <= write_bw
|
||||||
|
*/
|
||||||
|
if (unlikely(balanced_dirty_ratelimit > write_bw))
|
||||||
|
balanced_dirty_ratelimit = write_bw;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We could safely do this and return immediately:
|
* We could safely do this and return immediately:
|
||||||
@ -1044,25 +1055,11 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
|
static long bdi_max_pause(struct backing_dev_info *bdi,
|
||||||
unsigned long bdi_dirty)
|
unsigned long bdi_dirty)
|
||||||
{
|
{
|
||||||
unsigned long bw = bdi->avg_write_bandwidth;
|
long bw = bdi->avg_write_bandwidth;
|
||||||
unsigned long hi = ilog2(bw);
|
long t;
|
||||||
unsigned long lo = ilog2(bdi->dirty_ratelimit);
|
|
||||||
unsigned long t;
|
|
||||||
|
|
||||||
/* target for 20ms max pause on 1-dd case */
|
|
||||||
t = HZ / 50;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Scale up pause time for concurrent dirtiers in order to reduce CPU
|
|
||||||
* overheads.
|
|
||||||
*
|
|
||||||
* (N * 20ms) on 2^N concurrent tasks.
|
|
||||||
*/
|
|
||||||
if (hi > lo)
|
|
||||||
t += (hi - lo) * (20 * HZ) / 1024;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Limit pause time for small memory systems. If sleeping for too long
|
* Limit pause time for small memory systems. If sleeping for too long
|
||||||
@ -1071,13 +1068,85 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
|
|||||||
*
|
*
|
||||||
* 8 serves as the safety ratio.
|
* 8 serves as the safety ratio.
|
||||||
*/
|
*/
|
||||||
t = min(t, bdi_dirty * HZ / (8 * bw + 1));
|
t = bdi_dirty / (1 + bw / roundup_pow_of_two(1 + HZ / 8));
|
||||||
|
t++;
|
||||||
|
|
||||||
|
return min_t(long, t, MAX_PAUSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
static long bdi_min_pause(struct backing_dev_info *bdi,
|
||||||
|
long max_pause,
|
||||||
|
unsigned long task_ratelimit,
|
||||||
|
unsigned long dirty_ratelimit,
|
||||||
|
int *nr_dirtied_pause)
|
||||||
|
{
|
||||||
|
long hi = ilog2(bdi->avg_write_bandwidth);
|
||||||
|
long lo = ilog2(bdi->dirty_ratelimit);
|
||||||
|
long t; /* target pause */
|
||||||
|
long pause; /* estimated next pause */
|
||||||
|
int pages; /* target nr_dirtied_pause */
|
||||||
|
|
||||||
|
/* target for 10ms pause on 1-dd case */
|
||||||
|
t = max(1, HZ / 100);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The pause time will be settled within range (max_pause/4, max_pause).
|
* Scale up pause time for concurrent dirtiers in order to reduce CPU
|
||||||
* Apply a minimal value of 4 to get a non-zero max_pause/4.
|
* overheads.
|
||||||
|
*
|
||||||
|
* (N * 10ms) on 2^N concurrent tasks.
|
||||||
*/
|
*/
|
||||||
return clamp_val(t, 4, MAX_PAUSE);
|
if (hi > lo)
|
||||||
|
t += (hi - lo) * (10 * HZ) / 1024;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is a bit convoluted. We try to base the next nr_dirtied_pause
|
||||||
|
* on the much more stable dirty_ratelimit. However the next pause time
|
||||||
|
* will be computed based on task_ratelimit and the two rate limits may
|
||||||
|
* depart considerably at some time. Especially if task_ratelimit goes
|
||||||
|
* below dirty_ratelimit/2 and the target pause is max_pause, the next
|
||||||
|
* pause time will be max_pause*2 _trimmed down_ to max_pause. As a
|
||||||
|
* result task_ratelimit won't be executed faithfully, which could
|
||||||
|
* eventually bring down dirty_ratelimit.
|
||||||
|
*
|
||||||
|
* We apply two rules to fix it up:
|
||||||
|
* 1) try to estimate the next pause time and if necessary, use a lower
|
||||||
|
* nr_dirtied_pause so as not to exceed max_pause. When this happens,
|
||||||
|
* nr_dirtied_pause will be "dancing" with task_ratelimit.
|
||||||
|
* 2) limit the target pause time to max_pause/2, so that the normal
|
||||||
|
* small fluctuations of task_ratelimit won't trigger rule (1) and
|
||||||
|
* nr_dirtied_pause will remain as stable as dirty_ratelimit.
|
||||||
|
*/
|
||||||
|
t = min(t, 1 + max_pause / 2);
|
||||||
|
pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tiny nr_dirtied_pause is found to hurt I/O performance in the test
|
||||||
|
* case fio-mmap-randwrite-64k, which does 16*{sync read, async write}.
|
||||||
|
* When the 16 consecutive reads are often interrupted by some dirty
|
||||||
|
* throttling pause during the async writes, cfq will go into idles
|
||||||
|
* (deadline is fine). So push nr_dirtied_pause as high as possible
|
||||||
|
* until reaches DIRTY_POLL_THRESH=32 pages.
|
||||||
|
*/
|
||||||
|
if (pages < DIRTY_POLL_THRESH) {
|
||||||
|
t = max_pause;
|
||||||
|
pages = dirty_ratelimit * t / roundup_pow_of_two(HZ);
|
||||||
|
if (pages > DIRTY_POLL_THRESH) {
|
||||||
|
pages = DIRTY_POLL_THRESH;
|
||||||
|
t = HZ * DIRTY_POLL_THRESH / dirty_ratelimit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pause = HZ * pages / (task_ratelimit + 1);
|
||||||
|
if (pause > max_pause) {
|
||||||
|
t = max_pause;
|
||||||
|
pages = task_ratelimit * t / roundup_pow_of_two(HZ);
|
||||||
|
}
|
||||||
|
|
||||||
|
*nr_dirtied_pause = pages;
|
||||||
|
/*
|
||||||
|
* The minimal pause time will normally be half the target pause time.
|
||||||
|
*/
|
||||||
|
return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1098,16 +1167,21 @@ static void balance_dirty_pages(struct address_space *mapping,
|
|||||||
unsigned long background_thresh;
|
unsigned long background_thresh;
|
||||||
unsigned long dirty_thresh;
|
unsigned long dirty_thresh;
|
||||||
unsigned long bdi_thresh;
|
unsigned long bdi_thresh;
|
||||||
long pause = 0;
|
long period;
|
||||||
long uninitialized_var(max_pause);
|
long pause;
|
||||||
|
long max_pause;
|
||||||
|
long min_pause;
|
||||||
|
int nr_dirtied_pause;
|
||||||
bool dirty_exceeded = false;
|
bool dirty_exceeded = false;
|
||||||
unsigned long task_ratelimit;
|
unsigned long task_ratelimit;
|
||||||
unsigned long uninitialized_var(dirty_ratelimit);
|
unsigned long dirty_ratelimit;
|
||||||
unsigned long pos_ratio;
|
unsigned long pos_ratio;
|
||||||
struct backing_dev_info *bdi = mapping->backing_dev_info;
|
struct backing_dev_info *bdi = mapping->backing_dev_info;
|
||||||
unsigned long start_time = jiffies;
|
unsigned long start_time = jiffies;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
unsigned long now = jiffies;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Unstable writes are a feature of certain networked
|
* Unstable writes are a feature of certain networked
|
||||||
* filesystems (i.e. NFS) in which data may have been
|
* filesystems (i.e. NFS) in which data may have been
|
||||||
@ -1127,8 +1201,13 @@ static void balance_dirty_pages(struct address_space *mapping,
|
|||||||
*/
|
*/
|
||||||
freerun = dirty_freerun_ceiling(dirty_thresh,
|
freerun = dirty_freerun_ceiling(dirty_thresh,
|
||||||
background_thresh);
|
background_thresh);
|
||||||
if (nr_dirty <= freerun)
|
if (nr_dirty <= freerun) {
|
||||||
|
current->dirty_paused_when = now;
|
||||||
|
current->nr_dirtied = 0;
|
||||||
|
current->nr_dirtied_pause =
|
||||||
|
dirty_poll_interval(nr_dirty, dirty_thresh);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (unlikely(!writeback_in_progress(bdi)))
|
if (unlikely(!writeback_in_progress(bdi)))
|
||||||
bdi_start_background_writeback(bdi);
|
bdi_start_background_writeback(bdi);
|
||||||
@ -1168,7 +1247,7 @@ static void balance_dirty_pages(struct address_space *mapping,
|
|||||||
bdi_stat(bdi, BDI_WRITEBACK);
|
bdi_stat(bdi, BDI_WRITEBACK);
|
||||||
}
|
}
|
||||||
|
|
||||||
dirty_exceeded = (bdi_dirty > bdi_thresh) ||
|
dirty_exceeded = (bdi_dirty > bdi_thresh) &&
|
||||||
(nr_dirty > dirty_thresh);
|
(nr_dirty > dirty_thresh);
|
||||||
if (dirty_exceeded && !bdi->dirty_exceeded)
|
if (dirty_exceeded && !bdi->dirty_exceeded)
|
||||||
bdi->dirty_exceeded = 1;
|
bdi->dirty_exceeded = 1;
|
||||||
@ -1177,20 +1256,34 @@ static void balance_dirty_pages(struct address_space *mapping,
|
|||||||
nr_dirty, bdi_thresh, bdi_dirty,
|
nr_dirty, bdi_thresh, bdi_dirty,
|
||||||
start_time);
|
start_time);
|
||||||
|
|
||||||
max_pause = bdi_max_pause(bdi, bdi_dirty);
|
|
||||||
|
|
||||||
dirty_ratelimit = bdi->dirty_ratelimit;
|
dirty_ratelimit = bdi->dirty_ratelimit;
|
||||||
pos_ratio = bdi_position_ratio(bdi, dirty_thresh,
|
pos_ratio = bdi_position_ratio(bdi, dirty_thresh,
|
||||||
background_thresh, nr_dirty,
|
background_thresh, nr_dirty,
|
||||||
bdi_thresh, bdi_dirty);
|
bdi_thresh, bdi_dirty);
|
||||||
task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>
|
task_ratelimit = ((u64)dirty_ratelimit * pos_ratio) >>
|
||||||
RATELIMIT_CALC_SHIFT;
|
RATELIMIT_CALC_SHIFT;
|
||||||
|
max_pause = bdi_max_pause(bdi, bdi_dirty);
|
||||||
|
min_pause = bdi_min_pause(bdi, max_pause,
|
||||||
|
task_ratelimit, dirty_ratelimit,
|
||||||
|
&nr_dirtied_pause);
|
||||||
|
|
||||||
if (unlikely(task_ratelimit == 0)) {
|
if (unlikely(task_ratelimit == 0)) {
|
||||||
|
period = max_pause;
|
||||||
pause = max_pause;
|
pause = max_pause;
|
||||||
goto pause;
|
goto pause;
|
||||||
}
|
}
|
||||||
pause = HZ * pages_dirtied / task_ratelimit;
|
period = HZ * pages_dirtied / task_ratelimit;
|
||||||
if (unlikely(pause <= 0)) {
|
pause = period;
|
||||||
|
if (current->dirty_paused_when)
|
||||||
|
pause -= now - current->dirty_paused_when;
|
||||||
|
/*
|
||||||
|
* For less than 1s think time (ext3/4 may block the dirtier
|
||||||
|
* for up to 800ms from time to time on 1-HDD; so does xfs,
|
||||||
|
* however at much less frequency), try to compensate it in
|
||||||
|
* future periods by updating the virtual time; otherwise just
|
||||||
|
* do a reset, as it may be a light dirtier.
|
||||||
|
*/
|
||||||
|
if (pause < min_pause) {
|
||||||
trace_balance_dirty_pages(bdi,
|
trace_balance_dirty_pages(bdi,
|
||||||
dirty_thresh,
|
dirty_thresh,
|
||||||
background_thresh,
|
background_thresh,
|
||||||
@ -1200,12 +1293,24 @@ static void balance_dirty_pages(struct address_space *mapping,
|
|||||||
dirty_ratelimit,
|
dirty_ratelimit,
|
||||||
task_ratelimit,
|
task_ratelimit,
|
||||||
pages_dirtied,
|
pages_dirtied,
|
||||||
pause,
|
period,
|
||||||
|
min(pause, 0L),
|
||||||
start_time);
|
start_time);
|
||||||
pause = 1; /* avoid resetting nr_dirtied_pause below */
|
if (pause < -HZ) {
|
||||||
|
current->dirty_paused_when = now;
|
||||||
|
current->nr_dirtied = 0;
|
||||||
|
} else if (period) {
|
||||||
|
current->dirty_paused_when += period;
|
||||||
|
current->nr_dirtied = 0;
|
||||||
|
} else if (current->nr_dirtied_pause <= pages_dirtied)
|
||||||
|
current->nr_dirtied_pause += pages_dirtied;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
pause = min(pause, max_pause);
|
if (unlikely(pause > max_pause)) {
|
||||||
|
/* for occasional dropped task_ratelimit */
|
||||||
|
now += min(pause - max_pause, max_pause);
|
||||||
|
pause = max_pause;
|
||||||
|
}
|
||||||
|
|
||||||
pause:
|
pause:
|
||||||
trace_balance_dirty_pages(bdi,
|
trace_balance_dirty_pages(bdi,
|
||||||
@ -1217,11 +1322,16 @@ pause:
|
|||||||
dirty_ratelimit,
|
dirty_ratelimit,
|
||||||
task_ratelimit,
|
task_ratelimit,
|
||||||
pages_dirtied,
|
pages_dirtied,
|
||||||
|
period,
|
||||||
pause,
|
pause,
|
||||||
start_time);
|
start_time);
|
||||||
__set_current_state(TASK_KILLABLE);
|
__set_current_state(TASK_KILLABLE);
|
||||||
io_schedule_timeout(pause);
|
io_schedule_timeout(pause);
|
||||||
|
|
||||||
|
current->dirty_paused_when = now + pause;
|
||||||
|
current->nr_dirtied = 0;
|
||||||
|
current->nr_dirtied_pause = nr_dirtied_pause;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is typically equal to (nr_dirty < dirty_thresh) and can
|
* This is typically equal to (nr_dirty < dirty_thresh) and can
|
||||||
* also keep "1000+ dd on a slow USB stick" under control.
|
* also keep "1000+ dd on a slow USB stick" under control.
|
||||||
@ -1249,23 +1359,6 @@ pause:
|
|||||||
if (!dirty_exceeded && bdi->dirty_exceeded)
|
if (!dirty_exceeded && bdi->dirty_exceeded)
|
||||||
bdi->dirty_exceeded = 0;
|
bdi->dirty_exceeded = 0;
|
||||||
|
|
||||||
current->nr_dirtied = 0;
|
|
||||||
if (pause == 0) { /* in freerun area */
|
|
||||||
current->nr_dirtied_pause =
|
|
||||||
dirty_poll_interval(nr_dirty, dirty_thresh);
|
|
||||||
} else if (pause <= max_pause / 4 &&
|
|
||||||
pages_dirtied >= current->nr_dirtied_pause) {
|
|
||||||
current->nr_dirtied_pause = clamp_val(
|
|
||||||
dirty_ratelimit * (max_pause / 2) / HZ,
|
|
||||||
pages_dirtied + pages_dirtied / 8,
|
|
||||||
pages_dirtied * 4);
|
|
||||||
} else if (pause >= max_pause) {
|
|
||||||
current->nr_dirtied_pause = 1 | clamp_val(
|
|
||||||
dirty_ratelimit * (max_pause / 2) / HZ,
|
|
||||||
pages_dirtied / 4,
|
|
||||||
pages_dirtied - pages_dirtied / 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (writeback_in_progress(bdi))
|
if (writeback_in_progress(bdi))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1296,6 +1389,22 @@ void set_page_dirty_balance(struct page *page, int page_mkwrite)
|
|||||||
|
|
||||||
static DEFINE_PER_CPU(int, bdp_ratelimits);
|
static DEFINE_PER_CPU(int, bdp_ratelimits);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Normal tasks are throttled by
|
||||||
|
* loop {
|
||||||
|
* dirty tsk->nr_dirtied_pause pages;
|
||||||
|
* take a snap in balance_dirty_pages();
|
||||||
|
* }
|
||||||
|
* However there is a worst case. If every task exit immediately when dirtied
|
||||||
|
* (tsk->nr_dirtied_pause - 1) pages, balance_dirty_pages() will never be
|
||||||
|
* called to throttle the page dirties. The solution is to save the not yet
|
||||||
|
* throttled page dirties in dirty_throttle_leaks on task exit and charge them
|
||||||
|
* randomly into the running tasks. This works well for the above worst case,
|
||||||
|
* as the new task will pick up and accumulate the old task's leaked dirty
|
||||||
|
* count and eventually get throttled.
|
||||||
|
*/
|
||||||
|
DEFINE_PER_CPU(int, dirty_throttle_leaks) = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
|
* balance_dirty_pages_ratelimited_nr - balance dirty memory state
|
||||||
* @mapping: address_space which was dirtied
|
* @mapping: address_space which was dirtied
|
||||||
@ -1324,8 +1433,6 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
|
|||||||
if (bdi->dirty_exceeded)
|
if (bdi->dirty_exceeded)
|
||||||
ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
|
ratelimit = min(ratelimit, 32 >> (PAGE_SHIFT - 10));
|
||||||
|
|
||||||
current->nr_dirtied += nr_pages_dirtied;
|
|
||||||
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
/*
|
/*
|
||||||
* This prevents one CPU to accumulate too many dirtied pages without
|
* This prevents one CPU to accumulate too many dirtied pages without
|
||||||
@ -1336,12 +1443,20 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
|
|||||||
p = &__get_cpu_var(bdp_ratelimits);
|
p = &__get_cpu_var(bdp_ratelimits);
|
||||||
if (unlikely(current->nr_dirtied >= ratelimit))
|
if (unlikely(current->nr_dirtied >= ratelimit))
|
||||||
*p = 0;
|
*p = 0;
|
||||||
else {
|
else if (unlikely(*p >= ratelimit_pages)) {
|
||||||
*p += nr_pages_dirtied;
|
*p = 0;
|
||||||
if (unlikely(*p >= ratelimit_pages)) {
|
ratelimit = 0;
|
||||||
*p = 0;
|
}
|
||||||
ratelimit = 0;
|
/*
|
||||||
}
|
* Pick up the dirtied pages by the exited tasks. This avoids lots of
|
||||||
|
* short-lived tasks (eg. gcc invocations in a kernel build) escaping
|
||||||
|
* the dirty throttling and livelock other long-run dirtiers.
|
||||||
|
*/
|
||||||
|
p = &__get_cpu_var(dirty_throttle_leaks);
|
||||||
|
if (*p > 0 && current->nr_dirtied < ratelimit) {
|
||||||
|
nr_pages_dirtied = min(*p, ratelimit - current->nr_dirtied);
|
||||||
|
*p -= nr_pages_dirtied;
|
||||||
|
current->nr_dirtied += nr_pages_dirtied;
|
||||||
}
|
}
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
|
|
||||||
@ -1823,6 +1938,8 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
|
|||||||
__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
|
__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
|
||||||
__inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
|
__inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
|
||||||
task_io_account_write(PAGE_CACHE_SIZE);
|
task_io_account_write(PAGE_CACHE_SIZE);
|
||||||
|
current->nr_dirtied++;
|
||||||
|
this_cpu_inc(bdp_ratelimits);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(account_page_dirtied);
|
EXPORT_SYMBOL(account_page_dirtied);
|
||||||
@ -1882,6 +1999,24 @@ int __set_page_dirty_nobuffers(struct page *page)
|
|||||||
}
|
}
|
||||||
EXPORT_SYMBOL(__set_page_dirty_nobuffers);
|
EXPORT_SYMBOL(__set_page_dirty_nobuffers);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Call this whenever redirtying a page, to de-account the dirty counters
|
||||||
|
* (NR_DIRTIED, BDI_DIRTIED, tsk->nr_dirtied), so that they match the written
|
||||||
|
* counters (NR_WRITTEN, BDI_WRITTEN) in long term. The mismatches will lead to
|
||||||
|
* systematic errors in balanced_dirty_ratelimit and the dirty pages position
|
||||||
|
* control.
|
||||||
|
*/
|
||||||
|
void account_page_redirty(struct page *page)
|
||||||
|
{
|
||||||
|
struct address_space *mapping = page->mapping;
|
||||||
|
if (mapping && mapping_cap_account_dirty(mapping)) {
|
||||||
|
current->nr_dirtied--;
|
||||||
|
dec_zone_page_state(page, NR_DIRTIED);
|
||||||
|
dec_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(account_page_redirty);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When a writepage implementation decides that it doesn't want to write this
|
* When a writepage implementation decides that it doesn't want to write this
|
||||||
* page for some reason, it should redirty the locked page via
|
* page for some reason, it should redirty the locked page via
|
||||||
@ -1890,6 +2025,7 @@ EXPORT_SYMBOL(__set_page_dirty_nobuffers);
|
|||||||
int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
|
int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
|
||||||
{
|
{
|
||||||
wbc->pages_skipped++;
|
wbc->pages_skipped++;
|
||||||
|
account_page_redirty(page);
|
||||||
return __set_page_dirty_nobuffers(page);
|
return __set_page_dirty_nobuffers(page);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(redirty_page_for_writepage);
|
EXPORT_SYMBOL(redirty_page_for_writepage);
|
||||||
|
Loading…
Reference in New Issue
Block a user