linux/kernel/res_counter.c
Balbir Singh 28dbc4b6a0 memcg: memory cgroup resource counters for hierarchy
Add support for building hierarchies in resource counters.  Cgroups allows
us to build a deep hierarchy, but we currently don't link the resource
counters belonging to the memory controller control groups, in the same
fashion as the corresponding cgroup entries in the cgroup hierarchy.  This
patch provides the infrastructure for resource counters that have the same
hiearchy as their cgroup counter parts.

These set of patches are based on the resource counter hiearchy patches
posted by Pavel Emelianov.

NOTE: Building hiearchies is expensive, deeper hierarchies imply charging
the all the way up to the root.  It is known that hiearchies are
expensive, so the user needs to be careful and aware of the trade-offs
before creating very deep ones.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-08 08:31:05 -08:00

166 lines
3.5 KiB
C

/*
* resource cgroups
*
* Copyright 2007 OpenVZ SWsoft Inc
*
* Author: Pavel Emelianov <xemul@openvz.org>
*
*/
#include <linux/types.h>
#include <linux/parser.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/res_counter.h>
#include <linux/uaccess.h>
#include <linux/mm.h>
void res_counter_init(struct res_counter *counter, struct res_counter *parent)
{
spin_lock_init(&counter->lock);
counter->limit = (unsigned long long)LLONG_MAX;
counter->parent = parent;
}
int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
{
if (counter->usage + val > counter->limit) {
counter->failcnt++;
return -ENOMEM;
}
counter->usage += val;
if (counter->usage > counter->max_usage)
counter->max_usage = counter->usage;
return 0;
}
int res_counter_charge(struct res_counter *counter, unsigned long val,
struct res_counter **limit_fail_at)
{
int ret;
unsigned long flags;
struct res_counter *c, *u;
*limit_fail_at = NULL;
local_irq_save(flags);
for (c = counter; c != NULL; c = c->parent) {
spin_lock(&c->lock);
ret = res_counter_charge_locked(c, val);
spin_unlock(&c->lock);
if (ret < 0) {
*limit_fail_at = c;
goto undo;
}
}
ret = 0;
goto done;
undo:
for (u = counter; u != c; u = u->parent) {
spin_lock(&u->lock);
res_counter_uncharge_locked(u, val);
spin_unlock(&u->lock);
}
done:
local_irq_restore(flags);
return ret;
}
void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
{
if (WARN_ON(counter->usage < val))
val = counter->usage;
counter->usage -= val;
}
void res_counter_uncharge(struct res_counter *counter, unsigned long val)
{
unsigned long flags;
struct res_counter *c;
local_irq_save(flags);
for (c = counter; c != NULL; c = c->parent) {
spin_lock(&c->lock);
res_counter_uncharge_locked(c, val);
spin_unlock(&c->lock);
}
local_irq_restore(flags);
}
static inline unsigned long long *
res_counter_member(struct res_counter *counter, int member)
{
switch (member) {
case RES_USAGE:
return &counter->usage;
case RES_MAX_USAGE:
return &counter->max_usage;
case RES_LIMIT:
return &counter->limit;
case RES_FAILCNT:
return &counter->failcnt;
};
BUG();
return NULL;
}
ssize_t res_counter_read(struct res_counter *counter, int member,
const char __user *userbuf, size_t nbytes, loff_t *pos,
int (*read_strategy)(unsigned long long val, char *st_buf))
{
unsigned long long *val;
char buf[64], *s;
s = buf;
val = res_counter_member(counter, member);
if (read_strategy)
s += read_strategy(*val, s);
else
s += sprintf(s, "%llu\n", *val);
return simple_read_from_buffer((void __user *)userbuf, nbytes,
pos, buf, s - buf);
}
u64 res_counter_read_u64(struct res_counter *counter, int member)
{
return *res_counter_member(counter, member);
}
int res_counter_memparse_write_strategy(const char *buf,
unsigned long long *res)
{
char *end;
/* FIXME - make memparse() take const char* args */
*res = memparse((char *)buf, &end);
if (*end != '\0')
return -EINVAL;
*res = PAGE_ALIGN(*res);
return 0;
}
int res_counter_write(struct res_counter *counter, int member,
const char *buf, write_strategy_fn write_strategy)
{
char *end;
unsigned long flags;
unsigned long long tmp, *val;
if (write_strategy) {
if (write_strategy(buf, &tmp))
return -EINVAL;
} else {
tmp = simple_strtoull(buf, &end, 10);
if (*end != '\0')
return -EINVAL;
}
spin_lock_irqsave(&counter->lock, flags);
val = res_counter_member(counter, member);
*val = tmp;
spin_unlock_irqrestore(&counter->lock, flags);
return 0;
}