mm: add overcommit_kbytes sysctl variable

Some applications that run on HPC clusters are designed around the
availability of RAM and the overcommit ratio is fine tuned to get the
maximum usage of memory without swapping.  With growing memory, the
1%-of-all-RAM grain provided by overcommit_ratio has become too coarse
for these workload (on a 2TB machine it represents no less than 20GB).

This patch adds the new overcommit_kbytes sysctl variable that allow a
much finer grain.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix nommu build]
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Jerome Marchand 2014-01-21 15:49:14 -08:00 committed by Linus Torvalds
parent aec6a8889a
commit 49f0ce5f92
8 changed files with 70 additions and 8 deletions

View File

@ -47,6 +47,7 @@ Currently, these files are in /proc/sys/vm:
- numa_zonelist_order - numa_zonelist_order
- oom_dump_tasks - oom_dump_tasks
- oom_kill_allocating_task - oom_kill_allocating_task
- overcommit_kbytes
- overcommit_memory - overcommit_memory
- overcommit_ratio - overcommit_ratio
- page-cluster - page-cluster
@ -574,6 +575,17 @@ The default value is 0.
============================================================== ==============================================================
overcommit_kbytes:
When overcommit_memory is set to 2, the committed address space is not
permitted to exceed swap plus this amount of physical RAM. See below.
Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
of them may be specified at a time. Setting one disables the other (which
then appears as 0 when read).
==============================================================
overcommit_memory: overcommit_memory:
This value contains a flag that enables memory overcommitment. This value contains a flag that enables memory overcommitment.

View File

@ -14,8 +14,8 @@ The Linux kernel supports the following overcommit handling modes
2 - Don't overcommit. The total address space commit 2 - Don't overcommit. The total address space commit
for the system is not permitted to exceed swap + a for the system is not permitted to exceed swap + a
configurable percentage (default is 50) of physical RAM. configurable amount (default is 50%) of physical RAM.
Depending on the percentage you use, in most situations Depending on the amount you use, in most situations
this means a process will not be killed while accessing this means a process will not be killed while accessing
pages but will receive errors on memory allocation as pages but will receive errors on memory allocation as
appropriate. appropriate.
@ -26,7 +26,8 @@ The Linux kernel supports the following overcommit handling modes
The overcommit policy is set via the sysctl `vm.overcommit_memory'. The overcommit policy is set via the sysctl `vm.overcommit_memory'.
The overcommit percentage is set via `vm.overcommit_ratio'. The overcommit amount can be set via `vm.overcommit_ratio' (percentage)
or `vm.overcommit_kbytes' (absolute value).
The current overcommit limit and amount committed are viewable in The current overcommit limit and amount committed are viewable in
/proc/meminfo as CommitLimit and Committed_AS respectively. /proc/meminfo as CommitLimit and Committed_AS respectively.

View File

@ -57,6 +57,15 @@ extern int sysctl_legacy_va_layout;
extern unsigned long sysctl_user_reserve_kbytes; extern unsigned long sysctl_user_reserve_kbytes;
extern unsigned long sysctl_admin_reserve_kbytes; extern unsigned long sysctl_admin_reserve_kbytes;
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern unsigned long sysctl_overcommit_kbytes;
extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,
size_t *, loff_t *);
extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
size_t *, loff_t *);
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
/* to align the pointer to the (next) page boundary */ /* to align the pointer to the (next) page boundary */

View File

@ -9,6 +9,7 @@
extern int sysctl_overcommit_memory; extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio; extern int sysctl_overcommit_ratio;
extern unsigned long sysctl_overcommit_kbytes;
extern struct percpu_counter vm_committed_as; extern struct percpu_counter vm_committed_as;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP

View File

@ -95,8 +95,6 @@
#if defined(CONFIG_SYSCTL) #if defined(CONFIG_SYSCTL)
/* External variables not in a header file. */ /* External variables not in a header file. */
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern int max_threads; extern int max_threads;
extern int suid_dumpable; extern int suid_dumpable;
#ifdef CONFIG_COREDUMP #ifdef CONFIG_COREDUMP
@ -1121,7 +1119,14 @@ static struct ctl_table vm_table[] = {
.data = &sysctl_overcommit_ratio, .data = &sysctl_overcommit_ratio,
.maxlen = sizeof(sysctl_overcommit_ratio), .maxlen = sizeof(sysctl_overcommit_ratio),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = overcommit_ratio_handler,
},
{
.procname = "overcommit_kbytes",
.data = &sysctl_overcommit_kbytes,
.maxlen = sizeof(sysctl_overcommit_kbytes),
.mode = 0644,
.proc_handler = overcommit_kbytes_handler,
}, },
{ {
.procname = "page-cluster", .procname = "page-cluster",

View File

@ -86,6 +86,7 @@ EXPORT_SYMBOL(vm_get_page_prot);
int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */
unsigned long sysctl_overcommit_kbytes __read_mostly;
int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */

View File

@ -60,6 +60,7 @@ unsigned long highest_memmap_pfn;
struct percpu_counter vm_committed_as; struct percpu_counter vm_committed_as;
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_overcommit_ratio = 50; /* default is 50% */
unsigned long sysctl_overcommit_kbytes __read_mostly;
int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */

View File

@ -404,13 +404,45 @@ struct address_space *page_mapping(struct page *page)
return mapping; return mapping;
} }
int overcommit_ratio_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret == 0 && write)
sysctl_overcommit_kbytes = 0;
return ret;
}
int overcommit_kbytes_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret;
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
if (ret == 0 && write)
sysctl_overcommit_ratio = 0;
return ret;
}
/* /*
* Committed memory limit enforced when OVERCOMMIT_NEVER policy is used * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used
*/ */
unsigned long vm_commit_limit(void) unsigned long vm_commit_limit(void)
{ {
return ((totalram_pages - hugetlb_total_pages()) unsigned long allowed;
* sysctl_overcommit_ratio / 100) + total_swap_pages;
if (sysctl_overcommit_kbytes)
allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10);
else
allowed = ((totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100);
allowed += total_swap_pages;
return allowed;
} }