memcg: add memory.numastat api for numa statistics

The new API exports numa_maps per-memcg basis.  This is a piece of useful
information where it exports per-memcg page distribution across real numa
nodes.

One of the usecases is evaluating application performance by combining
this information w/ the cpu allocation to the application.

The output of the memory.numastat tries to follow w/ simiar format of
numa_maps like:

  total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
  file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
  anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
  unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...

And we have per-node:

  total = file + anon + unevictable

  $ cat /dev/cgroup/memory/memory.numa_stat
  total=250020 N0=87620 N1=52367 N2=45298 N3=64735
  file=225232 N0=83402 N1=46160 N2=40522 N3=55148
  anon=21053 N0=3424 N1=6207 N2=4776 N3=6646
  unevictable=3735 N0=794 N1=0 N2=0 N3=2941

Signed-off-by: Ying Han <yinghan@google.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Ying Han 2011-05-26 16:25:37 -07:00 committed by Linus Torvalds
parent 1bac180bd2
commit 406eb0c9ba

View File

@ -1089,6 +1089,93 @@ unsigned long mem_cgroup_zone_nr_lru_pages(struct mem_cgroup *memcg,
return MEM_CGROUP_ZSTAT(mz, lru); return MEM_CGROUP_ZSTAT(mz, lru);
} }
#ifdef CONFIG_NUMA
static unsigned long mem_cgroup_node_nr_file_lru_pages(struct mem_cgroup *memcg,
int nid)
{
unsigned long ret;
ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_FILE) +
mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_FILE);
return ret;
}
static unsigned long mem_cgroup_nr_file_lru_pages(struct mem_cgroup *memcg)
{
u64 total = 0;
int nid;
for_each_node_state(nid, N_HIGH_MEMORY)
total += mem_cgroup_node_nr_file_lru_pages(memcg, nid);
return total;
}
static unsigned long mem_cgroup_node_nr_anon_lru_pages(struct mem_cgroup *memcg,
int nid)
{
unsigned long ret;
ret = mem_cgroup_get_zonestat_node(memcg, nid, LRU_INACTIVE_ANON) +
mem_cgroup_get_zonestat_node(memcg, nid, LRU_ACTIVE_ANON);
return ret;
}
static unsigned long mem_cgroup_nr_anon_lru_pages(struct mem_cgroup *memcg)
{
u64 total = 0;
int nid;
for_each_node_state(nid, N_HIGH_MEMORY)
total += mem_cgroup_node_nr_anon_lru_pages(memcg, nid);
return total;
}
static unsigned long
mem_cgroup_node_nr_unevictable_lru_pages(struct mem_cgroup *memcg, int nid)
{
return mem_cgroup_get_zonestat_node(memcg, nid, LRU_UNEVICTABLE);
}
static unsigned long
mem_cgroup_nr_unevictable_lru_pages(struct mem_cgroup *memcg)
{
u64 total = 0;
int nid;
for_each_node_state(nid, N_HIGH_MEMORY)
total += mem_cgroup_node_nr_unevictable_lru_pages(memcg, nid);
return total;
}
static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
int nid)
{
enum lru_list l;
u64 total = 0;
for_each_lru(l)
total += mem_cgroup_get_zonestat_node(memcg, nid, l);
return total;
}
static unsigned long mem_cgroup_nr_lru_pages(struct mem_cgroup *memcg)
{
u64 total = 0;
int nid;
for_each_node_state(nid, N_HIGH_MEMORY)
total += mem_cgroup_node_nr_lru_pages(memcg, nid);
return total;
}
#endif /* CONFIG_NUMA */
struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg, struct zone_reclaim_stat *mem_cgroup_get_reclaim_stat(struct mem_cgroup *memcg,
struct zone *zone) struct zone *zone)
{ {
@ -3944,6 +4031,51 @@ mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s)
mem_cgroup_get_local_stat(iter, s); mem_cgroup_get_local_stat(iter, s);
} }
#ifdef CONFIG_NUMA
static int mem_control_numa_stat_show(struct seq_file *m, void *arg)
{
int nid;
unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
unsigned long node_nr;
struct cgroup *cont = m->private;
struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
total_nr = mem_cgroup_nr_lru_pages(mem_cont);
seq_printf(m, "total=%lu", total_nr);
for_each_node_state(nid, N_HIGH_MEMORY) {
node_nr = mem_cgroup_node_nr_lru_pages(mem_cont, nid);
seq_printf(m, " N%d=%lu", nid, node_nr);
}
seq_putc(m, '\n');
file_nr = mem_cgroup_nr_file_lru_pages(mem_cont);
seq_printf(m, "file=%lu", file_nr);
for_each_node_state(nid, N_HIGH_MEMORY) {
node_nr = mem_cgroup_node_nr_file_lru_pages(mem_cont, nid);
seq_printf(m, " N%d=%lu", nid, node_nr);
}
seq_putc(m, '\n');
anon_nr = mem_cgroup_nr_anon_lru_pages(mem_cont);
seq_printf(m, "anon=%lu", anon_nr);
for_each_node_state(nid, N_HIGH_MEMORY) {
node_nr = mem_cgroup_node_nr_anon_lru_pages(mem_cont, nid);
seq_printf(m, " N%d=%lu", nid, node_nr);
}
seq_putc(m, '\n');
unevictable_nr = mem_cgroup_nr_unevictable_lru_pages(mem_cont);
seq_printf(m, "unevictable=%lu", unevictable_nr);
for_each_node_state(nid, N_HIGH_MEMORY) {
node_nr = mem_cgroup_node_nr_unevictable_lru_pages(mem_cont,
nid);
seq_printf(m, " N%d=%lu", nid, node_nr);
}
seq_putc(m, '\n');
return 0;
}
#endif /* CONFIG_NUMA */
static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
struct cgroup_map_cb *cb) struct cgroup_map_cb *cb)
{ {
@ -3954,6 +4086,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
memset(&mystat, 0, sizeof(mystat)); memset(&mystat, 0, sizeof(mystat));
mem_cgroup_get_local_stat(mem_cont, &mystat); mem_cgroup_get_local_stat(mem_cont, &mystat);
for (i = 0; i < NR_MCS_STAT; i++) { for (i = 0; i < NR_MCS_STAT; i++) {
if (i == MCS_SWAP && !do_swap_account) if (i == MCS_SWAP && !do_swap_account)
continue; continue;
@ -4377,6 +4510,22 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
return 0; return 0;
} }
#ifdef CONFIG_NUMA
static const struct file_operations mem_control_numa_stat_file_operations = {
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
{
struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
file->f_op = &mem_control_numa_stat_file_operations;
return single_open(file, mem_control_numa_stat_show, cont);
}
#endif /* CONFIG_NUMA */
static struct cftype mem_cgroup_files[] = { static struct cftype mem_cgroup_files[] = {
{ {
.name = "usage_in_bytes", .name = "usage_in_bytes",
@ -4440,6 +4589,12 @@ static struct cftype mem_cgroup_files[] = {
.unregister_event = mem_cgroup_oom_unregister_event, .unregister_event = mem_cgroup_oom_unregister_event,
.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
}, },
#ifdef CONFIG_NUMA
{
.name = "numa_stat",
.open = mem_control_numa_stat_open,
},
#endif
}; };
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP