Memory controller improve user interface
Change the interface to use bytes instead of pages. Page sizes can vary across platforms and configurations. A new strategy routine has been added to the resource counters infrastructure to format the data as desired. Suggested by David Rientjes, Andrew Morton and Herbert Poetzl Tested on a UML setup with the config for memory control enabled. [kamezawa.hiroyu@jp.fujitsu.com: possible race fix in res_counter] Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Pavel Emelianov <xemul@openvz.org> Cc: Paul Menage <menage@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Kirill Korotaev <dev@sw.ru> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: David Rientjes <rientjes@google.com> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
		
							parent
							
								
									66e1707bc3
								
							
						
					
					
						commit
						0eea103017
					
				| @ -165,11 +165,30 @@ c. Enable CONFIG_CGROUP_MEM_CONT | ||||
| 
 | ||||
| Since now we're in the 0 cgroup, | ||||
| We can alter the memory limit: | ||||
| # echo -n 6000 > /cgroups/0/memory.limit | ||||
| # echo -n 4M > /cgroups/0/memory.limit_in_bytes | ||||
| 
 | ||||
| NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, | ||||
| mega or gigabytes. | ||||
| 
 | ||||
| # cat /cgroups/0/memory.limit_in_bytes | ||||
| 4194304 Bytes | ||||
| 
 | ||||
| NOTE: The interface has now changed to display the usage in bytes | ||||
| instead of pages | ||||
| 
 | ||||
| We can check the usage: | ||||
| # cat /cgroups/0/memory.usage | ||||
| 25 | ||||
| # cat /cgroups/0/memory.usage_in_bytes | ||||
| 1216512 Bytes | ||||
| 
 | ||||
| A successful write to this file does not guarantee a successful set of | ||||
| this limit to the value written into the file.  This can be due to a | ||||
| number of factors, such as rounding up to page boundaries or the total | ||||
| availability of memory on the system.  The user is required to re-read | ||||
| this file after a write to guarantee the value committed by the kernel. | ||||
| 
 | ||||
| # echo -n 1 > memory.limit_in_bytes | ||||
| # cat memory.limit_in_bytes | ||||
| 4096 Bytes | ||||
| 
 | ||||
| The memory.failcnt field gives the number of times that the cgroup limit was | ||||
| exceeded. | ||||
| @ -206,8 +225,8 @@ cgroup might have some charge associated with it, even though all | ||||
| tasks have migrated away from it. If some pages are still left, after following | ||||
| the steps listed in sections 4.1 and 4.2, check the Swap Cache usage in | ||||
| /proc/meminfo to see if the Swap Cache usage is showing up in the | ||||
| cgroups memory.usage counter. A simple test of swapoff -a and swapon -a | ||||
| should free any pending Swap Cache usage. | ||||
| cgroups memory.usage_in_bytes counter. A simple test of swapoff -a and | ||||
| swapon -a should free any pending Swap Cache usage. | ||||
| 
 | ||||
| 4.4 Choosing what to account  -- Page Cache (unmapped) vs RSS (mapped)? | ||||
| 
 | ||||
|  | ||||
| @ -23,15 +23,15 @@ struct res_counter { | ||||
| 	/*
 | ||||
| 	 * the current resource consumption level | ||||
| 	 */ | ||||
| 	unsigned long usage; | ||||
| 	unsigned long long usage; | ||||
| 	/*
 | ||||
| 	 * the limit that usage cannot exceed | ||||
| 	 */ | ||||
| 	unsigned long limit; | ||||
| 	unsigned long long limit; | ||||
| 	/*
 | ||||
| 	 * the number of unsuccessful attempts to consume the resource | ||||
| 	 */ | ||||
| 	unsigned long failcnt; | ||||
| 	unsigned long long failcnt; | ||||
| 	/*
 | ||||
| 	 * the lock to protect all of the above. | ||||
| 	 * the routines below consider this to be IRQ-safe | ||||
| @ -52,9 +52,11 @@ struct res_counter { | ||||
|  */ | ||||
| 
 | ||||
| ssize_t res_counter_read(struct res_counter *counter, int member, | ||||
| 		const char __user *buf, size_t nbytes, loff_t *pos); | ||||
| 		const char __user *buf, size_t nbytes, loff_t *pos, | ||||
| 		int (*read_strategy)(unsigned long long val, char *s)); | ||||
| ssize_t res_counter_write(struct res_counter *counter, int member, | ||||
| 		const char __user *buf, size_t nbytes, loff_t *pos); | ||||
| 		const char __user *buf, size_t nbytes, loff_t *pos, | ||||
| 		int (*write_strategy)(char *buf, unsigned long long *val)); | ||||
| 
 | ||||
| /*
 | ||||
|  * the field descriptors. one for each member of res_counter | ||||
|  | ||||
| @ -16,7 +16,7 @@ | ||||
| void res_counter_init(struct res_counter *counter) | ||||
| { | ||||
| 	spin_lock_init(&counter->lock); | ||||
| 	counter->limit = (unsigned long)LONG_MAX; | ||||
| 	counter->limit = (unsigned long long)LLONG_MAX; | ||||
| } | ||||
| 
 | ||||
| int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | ||||
| @ -59,8 +59,8 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val) | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static inline unsigned long *res_counter_member(struct res_counter *counter, | ||||
| 						int member) | ||||
| static inline unsigned long long * | ||||
| res_counter_member(struct res_counter *counter, int member) | ||||
| { | ||||
| 	switch (member) { | ||||
| 	case RES_USAGE: | ||||
| @ -76,24 +76,30 @@ static inline unsigned long *res_counter_member(struct res_counter *counter, | ||||
| } | ||||
| 
 | ||||
| ssize_t res_counter_read(struct res_counter *counter, int member, | ||||
| 		const char __user *userbuf, size_t nbytes, loff_t *pos) | ||||
| 		const char __user *userbuf, size_t nbytes, loff_t *pos, | ||||
| 		int (*read_strategy)(unsigned long long val, char *st_buf)) | ||||
| { | ||||
| 	unsigned long *val; | ||||
| 	unsigned long long *val; | ||||
| 	char buf[64], *s; | ||||
| 
 | ||||
| 	s = buf; | ||||
| 	val = res_counter_member(counter, member); | ||||
| 	s += sprintf(s, "%lu\n", *val); | ||||
| 	if (read_strategy) | ||||
| 		s += read_strategy(*val, s); | ||||
| 	else | ||||
| 		s += sprintf(s, "%llu\n", *val); | ||||
| 	return simple_read_from_buffer((void __user *)userbuf, nbytes, | ||||
| 			pos, buf, s - buf); | ||||
| } | ||||
| 
 | ||||
| ssize_t res_counter_write(struct res_counter *counter, int member, | ||||
| 		const char __user *userbuf, size_t nbytes, loff_t *pos) | ||||
| 		const char __user *userbuf, size_t nbytes, loff_t *pos, | ||||
| 		int (*write_strategy)(char *st_buf, unsigned long long *val)) | ||||
| { | ||||
| 	int ret; | ||||
| 	char *buf, *end; | ||||
| 	unsigned long tmp, *val; | ||||
| 	unsigned long flags; | ||||
| 	unsigned long long tmp, *val; | ||||
| 
 | ||||
| 	buf = kmalloc(nbytes + 1, GFP_KERNEL); | ||||
| 	ret = -ENOMEM; | ||||
| @ -106,12 +112,20 @@ ssize_t res_counter_write(struct res_counter *counter, int member, | ||||
| 		goto out_free; | ||||
| 
 | ||||
| 	ret = -EINVAL; | ||||
| 	tmp = simple_strtoul(buf, &end, 10); | ||||
| 	if (*end != '\0') | ||||
| 		goto out_free; | ||||
| 
 | ||||
| 	if (write_strategy) { | ||||
| 		if (write_strategy(buf, &tmp)) { | ||||
| 			goto out_free; | ||||
| 		} | ||||
| 	} else { | ||||
| 		tmp = simple_strtoull(buf, &end, 10); | ||||
| 		if (*end != '\0') | ||||
| 			goto out_free; | ||||
| 	} | ||||
| 	spin_lock_irqsave(&counter->lock, flags); | ||||
| 	val = res_counter_member(counter, member); | ||||
| 	*val = tmp; | ||||
| 	spin_unlock_irqrestore(&counter->lock, flags); | ||||
| 	ret = nbytes; | ||||
| out_free: | ||||
| 	kfree(buf); | ||||
|  | ||||
| @ -302,7 +302,7 @@ retry: | ||||
| 	 * If we created the page_cgroup, we should free it on exceeding | ||||
| 	 * the cgroup limit. | ||||
| 	 */ | ||||
| 	while (res_counter_charge(&mem->res, 1)) { | ||||
| 	while (res_counter_charge(&mem->res, PAGE_SIZE)) { | ||||
| 		if (try_to_free_mem_cgroup_pages(mem)) | ||||
| 			continue; | ||||
| 
 | ||||
| @ -341,7 +341,7 @@ retry: | ||||
| 		kfree(pc); | ||||
| 		pc = race_pc; | ||||
| 		atomic_inc(&pc->ref_cnt); | ||||
| 		res_counter_uncharge(&mem->res, 1); | ||||
| 		res_counter_uncharge(&mem->res, PAGE_SIZE); | ||||
| 		css_put(&mem->css); | ||||
| 		goto done; | ||||
| 	} | ||||
| @ -384,7 +384,7 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) | ||||
| 		css_put(&mem->css); | ||||
| 		page_assign_page_cgroup(page, NULL); | ||||
| 		unlock_page_cgroup(page); | ||||
| 		res_counter_uncharge(&mem->res, 1); | ||||
| 		res_counter_uncharge(&mem->res, PAGE_SIZE); | ||||
| 
 | ||||
|  		spin_lock_irqsave(&mem->lru_lock, flags); | ||||
|  		list_del_init(&pc->lru); | ||||
| @ -393,12 +393,26 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft, | ||||
| 			struct file *file, char __user *userbuf, size_t nbytes, | ||||
| 			loff_t *ppos) | ||||
| int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) | ||||
| { | ||||
| 	*tmp = memparse(buf, &buf); | ||||
| 	if (*buf != '\0') | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Round up the value to the closest page size | ||||
| 	 */ | ||||
| 	*tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static ssize_t mem_cgroup_read(struct cgroup *cont, | ||||
| 			struct cftype *cft, struct file *file, | ||||
| 			char __user *userbuf, size_t nbytes, loff_t *ppos) | ||||
| { | ||||
| 	return res_counter_read(&mem_cgroup_from_cont(cont)->res, | ||||
| 				cft->private, userbuf, nbytes, ppos); | ||||
| 				cft->private, userbuf, nbytes, ppos, | ||||
| 				NULL); | ||||
| } | ||||
| 
 | ||||
| static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | ||||
| @ -406,17 +420,18 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, | ||||
| 				size_t nbytes, loff_t *ppos) | ||||
| { | ||||
| 	return res_counter_write(&mem_cgroup_from_cont(cont)->res, | ||||
| 				cft->private, userbuf, nbytes, ppos); | ||||
| 				cft->private, userbuf, nbytes, ppos, | ||||
| 				mem_cgroup_write_strategy); | ||||
| } | ||||
| 
 | ||||
| static struct cftype mem_cgroup_files[] = { | ||||
| 	{ | ||||
| 		.name = "usage", | ||||
| 		.name = "usage_in_bytes", | ||||
| 		.private = RES_USAGE, | ||||
| 		.read = mem_cgroup_read, | ||||
| 	}, | ||||
| 	{ | ||||
| 		.name = "limit", | ||||
| 		.name = "limit_in_bytes", | ||||
| 		.private = RES_LIMIT, | ||||
| 		.write = mem_cgroup_write, | ||||
| 		.read = mem_cgroup_read, | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user