memcg: add per-memcg total kernel memory stat

Currently memcg stats show several types of kernel memory: kernel stack,
page tables, sock, vmalloc, and slab.  However, there are other
allocations with __GFP_ACCOUNT (or supersets such as GFP_KERNEL_ACCOUNT)
that are not accounted in any of those stats, a few examples are:

 - various kvm allocations (e.g. allocated pages to create vcpus)
 - io_uring
 - tmp_page in pipes during pipe_write()
 - bpf ringbuffers
 - unix sockets

Keeping track of the total kernel memory is essential for the ease of
migration from cgroup v1 to v2 as there are large discrepancies between
v1's kmem.usage_in_bytes and the sum of the available kernel memory
stats in v2.  Adding separate memcg stats for all __GFP_ACCOUNT kernel
allocations is an impractical maintenance burden as there a lot of those
all over the kernel code, with more use cases likely to show up in the
future.

Therefore, add a "kernel" memcg stat that is analogous to kmem page
counter, with added benefits such as using rstat infrastructure which
aggregates stats more efficiently.  Additionally, this provides a
lighter alternative in case the legacy kmem is deprecated in the future

[yosryahmed@google.com: v2]
  Link: https://lkml.kernel.org/r/20220203193856.972500-1-yosryahmed@google.com

Link: https://lkml.kernel.org/r/20220201200823.3283171-1-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Yosry Ahmed 2022-03-22 14:40:10 -07:00 committed by Linus Torvalds
parent 086f694a75
commit a8c49af3be
3 changed files with 27 additions and 6 deletions

View File

@ -1301,6 +1301,11 @@ PAGE_SIZE multiple when read back.
Amount of memory used to cache filesystem data, Amount of memory used to cache filesystem data,
including tmpfs and shared memory. including tmpfs and shared memory.
kernel (npn)
Amount of total kernel memory, including
(kernel_stack, pagetables, percpu, vmalloc, slab) in
addition to other kernel memory use cases.
kernel_stack kernel_stack
Amount of memory allocated to kernel stacks. Amount of memory allocated to kernel stacks.

View File

@ -34,6 +34,7 @@ enum memcg_stat_item {
MEMCG_SOCK, MEMCG_SOCK,
MEMCG_PERCPU_B, MEMCG_PERCPU_B,
MEMCG_VMALLOC, MEMCG_VMALLOC,
MEMCG_KMEM,
MEMCG_NR_STAT, MEMCG_NR_STAT,
}; };

View File

@ -1371,6 +1371,7 @@ struct memory_stat {
static const struct memory_stat memory_stats[] = { static const struct memory_stat memory_stats[] = {
{ "anon", NR_ANON_MAPPED }, { "anon", NR_ANON_MAPPED },
{ "file", NR_FILE_PAGES }, { "file", NR_FILE_PAGES },
{ "kernel", MEMCG_KMEM },
{ "kernel_stack", NR_KERNEL_STACK_KB }, { "kernel_stack", NR_KERNEL_STACK_KB },
{ "pagetables", NR_PAGETABLE }, { "pagetables", NR_PAGETABLE },
{ "percpu", MEMCG_PERCPU_B }, { "percpu", MEMCG_PERCPU_B },
@ -2114,6 +2115,7 @@ static DEFINE_MUTEX(percpu_charge_mutex);
static void drain_obj_stock(struct obj_stock *stock); static void drain_obj_stock(struct obj_stock *stock);
static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
struct mem_cgroup *root_memcg); struct mem_cgroup *root_memcg);
static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages);
#else #else
static inline void drain_obj_stock(struct obj_stock *stock) static inline void drain_obj_stock(struct obj_stock *stock)
@ -2124,6 +2126,9 @@ static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
{ {
return false; return false;
} }
static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
{
}
#endif #endif
/** /**
@ -2979,6 +2984,18 @@ static void memcg_free_cache_id(int id)
ida_simple_remove(&memcg_cache_ida, id); ida_simple_remove(&memcg_cache_ida, id);
} }
static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages)
{
mod_memcg_state(memcg, MEMCG_KMEM, nr_pages);
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
if (nr_pages > 0)
page_counter_charge(&memcg->kmem, nr_pages);
else
page_counter_uncharge(&memcg->kmem, -nr_pages);
}
}
/* /*
* obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg * obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg
* @objcg: object cgroup to uncharge * @objcg: object cgroup to uncharge
@ -2991,8 +3008,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
memcg = get_mem_cgroup_from_objcg(objcg); memcg = get_mem_cgroup_from_objcg(objcg);
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) memcg_account_kmem(memcg, -nr_pages);
page_counter_uncharge(&memcg->kmem, nr_pages);
refill_stock(memcg, nr_pages); refill_stock(memcg, nr_pages);
css_put(&memcg->css); css_put(&memcg->css);
@ -3018,8 +3034,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp,
if (ret) if (ret)
goto out; goto out;
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) memcg_account_kmem(memcg, nr_pages);
page_counter_charge(&memcg->kmem, nr_pages);
out: out:
css_put(&memcg->css); css_put(&memcg->css);
@ -6801,8 +6816,8 @@ static void uncharge_batch(const struct uncharge_gather *ug)
page_counter_uncharge(&ug->memcg->memory, ug->nr_memory); page_counter_uncharge(&ug->memcg->memory, ug->nr_memory);
if (do_memsw_account()) if (do_memsw_account())
page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory); page_counter_uncharge(&ug->memcg->memsw, ug->nr_memory);
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem) if (ug->nr_kmem)
page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem); memcg_account_kmem(ug->memcg, -ug->nr_kmem);
memcg_oom_recover(ug->memcg); memcg_oom_recover(ug->memcg);
} }