memcg: use css_tryget in memcg
From:KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
css_tryget() newly is added and we can know css is alive or not and get
refcnt of css in very safe way.  ("alive" here means "rmdir/destroy" is
not called.)
This patch replaces css_get() to css_tryget(), where I cannot explain
why css_get() is safe. And removes memcg->obsolete flag.
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
			
			
This commit is contained in:
		
							parent
							
								
									a7ba0eef3a
								
							
						
					
					
						commit
						54595fe265
					
				| @ -162,7 +162,6 @@ struct mem_cgroup { | ||||
| 	 */ | ||||
| 	bool use_hierarchy; | ||||
| 	unsigned long	last_oom_jiffies; | ||||
| 	int		obsolete; | ||||
| 	atomic_t	refcnt; | ||||
| 
 | ||||
| 	unsigned int	swappiness; | ||||
| @ -283,6 +282,31 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | ||||
| 				struct mem_cgroup, css); | ||||
| } | ||||
| 
 | ||||
| static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | ||||
| { | ||||
| 	struct mem_cgroup *mem = NULL; | ||||
| 	/*
 | ||||
| 	 * Because we have no locks, mm->owner's may be being moved to other | ||||
| 	 * cgroup. We use css_tryget() here even if this looks | ||||
| 	 * pessimistic (rather than adding locks here). | ||||
| 	 */ | ||||
| 	rcu_read_lock(); | ||||
| 	do { | ||||
| 		mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | ||||
| 		if (unlikely(!mem)) | ||||
| 			break; | ||||
| 	} while (!css_tryget(&mem->css)); | ||||
| 	rcu_read_unlock(); | ||||
| 	return mem; | ||||
| } | ||||
| 
 | ||||
| static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem) | ||||
| { | ||||
| 	if (!mem) | ||||
| 		return true; | ||||
| 	return css_is_removed(&mem->css); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Following LRU functions are allowed to be used without PCG_LOCK. | ||||
|  * Operations are called by routine of global LRU independently from memcg. | ||||
| @ -622,8 +646,9 @@ mem_cgroup_get_first_node(struct mem_cgroup *root_mem) | ||||
| { | ||||
| 	struct cgroup *cgroup; | ||||
| 	struct mem_cgroup *ret; | ||||
| 	bool obsolete = (root_mem->last_scanned_child && | ||||
| 				root_mem->last_scanned_child->obsolete); | ||||
| 	bool obsolete; | ||||
| 
 | ||||
| 	obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Scan all children under the mem_cgroup mem | ||||
| @ -636,7 +661,7 @@ mem_cgroup_get_first_node(struct mem_cgroup *root_mem) | ||||
| 
 | ||||
| 	if (!root_mem->last_scanned_child || obsolete) { | ||||
| 
 | ||||
| 		if (obsolete) | ||||
| 		if (obsolete && root_mem->last_scanned_child) | ||||
| 			mem_cgroup_put(root_mem->last_scanned_child); | ||||
| 
 | ||||
| 		cgroup = list_first_entry(&root_mem->css.cgroup->children, | ||||
| @ -711,7 +736,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | ||||
| 	next_mem = mem_cgroup_get_first_node(root_mem); | ||||
| 
 | ||||
| 	while (next_mem != root_mem) { | ||||
| 		if (next_mem->obsolete) { | ||||
| 		if (mem_cgroup_is_obsolete(next_mem)) { | ||||
| 			mem_cgroup_put(next_mem); | ||||
| 			cgroup_lock(); | ||||
| 			next_mem = mem_cgroup_get_first_node(root_mem); | ||||
| @ -769,23 +794,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | ||||
| 	 * thread group leader migrates. It's possible that mm is not | ||||
| 	 * set, if so charge the init_mm (happens for pagecache usage). | ||||
| 	 */ | ||||
| 	if (likely(!*memcg)) { | ||||
| 		rcu_read_lock(); | ||||
| 		mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | ||||
| 		if (unlikely(!mem)) { | ||||
| 			rcu_read_unlock(); | ||||
| 			return 0; | ||||
| 		} | ||||
| 		/*
 | ||||
| 		 * For every charge from the cgroup, increment reference count | ||||
| 		 */ | ||||
| 		css_get(&mem->css); | ||||
| 	mem = *memcg; | ||||
| 	if (likely(!mem)) { | ||||
| 		mem = try_get_mem_cgroup_from_mm(mm); | ||||
| 		*memcg = mem; | ||||
| 		rcu_read_unlock(); | ||||
| 	} else { | ||||
| 		mem = *memcg; | ||||
| 		css_get(&mem->css); | ||||
| 	} | ||||
| 	if (unlikely(!mem)) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	VM_BUG_ON(mem_cgroup_is_obsolete(mem)); | ||||
| 
 | ||||
| 	while (1) { | ||||
| 		int ret; | ||||
| @ -1072,12 +1091,19 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | ||||
| 				MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * While swap-in, try_charge -> commit or cancel, the page is locked. | ||||
|  * And when try_charge() successfully returns, one refcnt to memcg without | ||||
|  * struct page_cgroup is aquired. This refcnt will be cumsumed by | ||||
|  * "commit()" or removed by "cancel()" | ||||
|  */ | ||||
| int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | ||||
| 				 struct page *page, | ||||
| 				 gfp_t mask, struct mem_cgroup **ptr) | ||||
| { | ||||
| 	struct mem_cgroup *mem; | ||||
| 	swp_entry_t     ent; | ||||
| 	int ret; | ||||
| 
 | ||||
| 	if (mem_cgroup_disabled()) | ||||
| 		return 0; | ||||
| @ -1096,10 +1122,15 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | ||||
| 	ent.val = page_private(page); | ||||
| 
 | ||||
| 	mem = lookup_swap_cgroup(ent); | ||||
| 	if (!mem || mem->obsolete) | ||||
| 	if (!mem) | ||||
| 		goto charge_cur_mm; | ||||
| 	if (!css_tryget(&mem->css)) | ||||
| 		goto charge_cur_mm; | ||||
| 	*ptr = mem; | ||||
| 	return __mem_cgroup_try_charge(NULL, mask, ptr, true); | ||||
| 	ret = __mem_cgroup_try_charge(NULL, mask, ptr, true); | ||||
| 	/* drop extra refcnt from tryget */ | ||||
| 	css_put(&mem->css); | ||||
| 	return ret; | ||||
| charge_cur_mm: | ||||
| 	if (unlikely(!mm)) | ||||
| 		mm = &init_mm; | ||||
| @ -1130,13 +1161,18 @@ int mem_cgroup_cache_charge_swapin(struct page *page, | ||||
| 		ent.val = page_private(page); | ||||
| 		if (do_swap_account) { | ||||
| 			mem = lookup_swap_cgroup(ent); | ||||
| 			if (mem && mem->obsolete) | ||||
| 				mem = NULL; | ||||
| 			if (mem) | ||||
| 				mm = NULL; | ||||
| 			if (mem) { | ||||
| 				if (css_tryget(&mem->css)) | ||||
| 					mm = NULL; /* charge to recorded */ | ||||
| 				else | ||||
| 					mem = NULL; /* charge to current */ | ||||
| 			} | ||||
| 		} | ||||
| 		ret = mem_cgroup_charge_common(page, mm, mask, | ||||
| 				MEM_CGROUP_CHARGE_TYPE_SHMEM, mem); | ||||
| 		/* drop extra refcnt from tryget */ | ||||
| 		if (mem) | ||||
| 			css_put(&mem->css); | ||||
| 
 | ||||
| 		if (!ret && do_swap_account) { | ||||
| 			/* avoid double counting */ | ||||
| @ -1178,7 +1214,6 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) | ||||
| 		struct mem_cgroup *memcg; | ||||
| 		memcg = swap_cgroup_record(ent, NULL); | ||||
| 		if (memcg) { | ||||
| 			/* If memcg is obsolete, memcg can be != ptr */ | ||||
| 			res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | ||||
| 			mem_cgroup_put(memcg); | ||||
| 		} | ||||
| @ -1421,14 +1456,9 @@ int mem_cgroup_shrink_usage(struct mm_struct *mm, gfp_t gfp_mask) | ||||
| 	if (!mm) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	rcu_read_lock(); | ||||
| 	mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); | ||||
| 	if (unlikely(!mem)) { | ||||
| 		rcu_read_unlock(); | ||||
| 	mem = try_get_mem_cgroup_from_mm(mm); | ||||
| 	if (unlikely(!mem)) | ||||
| 		return 0; | ||||
| 	} | ||||
| 	css_get(&mem->css); | ||||
| 	rcu_read_unlock(); | ||||
| 
 | ||||
| 	do { | ||||
| 		progress = mem_cgroup_hierarchical_reclaim(mem, gfp_mask, true); | ||||
| @ -2086,9 +2116,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) | ||||
|  * the number of reference from swap_cgroup and free mem_cgroup when | ||||
|  * it goes down to 0. | ||||
|  * | ||||
|  * When mem_cgroup is destroyed, mem->obsolete will be set to 0 and | ||||
|  * entry which points to this memcg will be ignore at swapin. | ||||
|  * | ||||
|  * Removal of cgroup itself succeeds regardless of refs from swap. | ||||
|  */ | ||||
| 
 | ||||
| @ -2174,7 +2201,6 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, | ||||
| 					struct cgroup *cont) | ||||
| { | ||||
| 	struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | ||||
| 	mem->obsolete = 1; | ||||
| 	mem_cgroup_force_empty(mem, false); | ||||
| } | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user