diff --git a/fs/buffer.c b/fs/buffer.c index 23f645657488..b56f99f82b5b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -657,7 +657,7 @@ int __set_page_dirty_buffers(struct page *page) } while (bh != head); } /* - * Lock out page->mem_cgroup migration to keep PageDirty + * Lock out page's memcg migration to keep PageDirty * synchronized with per-memcg dirty page counters. */ lock_page_memcg(page); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 10cc7979ce38..16a1e82e3aeb 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -650,7 +650,7 @@ iomap_set_page_dirty(struct page *page) return !TestSetPageDirty(page); /* - * Lock out page->mem_cgroup migration to keep PageDirty + * Lock out page's memcg migration to keep PageDirty * synchronized with per-memcg dirty page counters. */ lock_page_memcg(page); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1bcb6d7345c..a9de5711b23f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -20,6 +20,8 @@ #include #include #include +#include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -37,6 +39,7 @@ struct bpf_iter_aux_info; struct bpf_local_storage; struct bpf_local_storage_map; struct kobject; +struct mem_cgroup; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -135,11 +138,6 @@ struct bpf_map_ops { const struct bpf_iter_seq_info *iter_seq_info; }; -struct bpf_map_memory { - u32 pages; - struct user_struct *user; -}; - struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -160,7 +158,9 @@ struct bpf_map { u32 btf_key_type_id; u32 btf_value_type_id; struct btf *btf; - struct bpf_map_memory memory; +#ifdef CONFIG_MEMCG_KMEM + struct mem_cgroup *memcg; +#endif char name[BPF_OBJ_NAME_LEN]; u32 btf_vmlinux_value_type_id; bool bypass_spec_v1; @@ -1202,8 +1202,6 @@ void bpf_prog_sub(struct bpf_prog *prog, int i); void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -int __bpf_prog_charge(struct user_struct *user, u32 pages); -void __bpf_prog_uncharge(struct user_struct *user, u32 pages); void __bpf_free_used_maps(struct bpf_prog_aux *aux, struct bpf_map **used_maps, u32 len); @@ -1218,12 +1216,6 @@ void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); -int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); -void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); -int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size); -void bpf_map_charge_finish(struct bpf_map_memory *mem); -void bpf_map_charge_move(struct bpf_map_memory *dst, - struct bpf_map_memory *src); void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); @@ -1240,6 +1232,34 @@ int generic_map_delete_batch(struct bpf_map *map, struct bpf_map *bpf_map_get_curr_or_next(u32 *id); struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); +#ifdef CONFIG_MEMCG_KMEM +void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, + int node); +void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); +void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, + size_t align, gfp_t flags); +#else +static inline void * +bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, + int node) +{ + return kmalloc_node(size, flags, node); +} + +static inline void * +bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) +{ + return kzalloc(size, flags); +} + +static inline void __percpu * +bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align, + gfp_t flags) +{ + return __alloc_percpu_gfp(size, align, flags); +} +#endif + extern int sysctl_unprivileged_bpf_disabled; static inline bool bpf_allow_ptr_leaks(void) @@ -1490,15 +1510,6 @@ bpf_prog_inc_not_zero(struct bpf_prog *prog) return ERR_PTR(-EOPNOTSUPP); } -static inline int __bpf_prog_charge(struct user_struct *user, u32 pages) -{ - return 0; -} - -static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) -{ -} - static inline void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e391e3c56de5..7c9d43476166 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -343,6 +343,175 @@ struct mem_cgroup { extern struct mem_cgroup *root_mem_cgroup; +enum page_memcg_data_flags { + /* page->memcg_data is a pointer to an objcgs vector */ + MEMCG_DATA_OBJCGS = (1UL << 0), + /* page has been accounted as a non-slab kernel page */ + MEMCG_DATA_KMEM = (1UL << 1), + /* the next bit after the last actual flag */ + __NR_MEMCG_DATA_FLAGS = (1UL << 2), +}; + +#define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1) + +/* + * page_memcg - get the memory cgroup associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper memory cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages. + * + * Any of the following ensures page and memcg binding stability: + * - the page lock + * - LRU isolation + * - lock_page_memcg() + * - exclusive reference + */ +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + unsigned long memcg_data = page->memcg_data; + + VM_BUG_ON_PAGE(PageSlab(page), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page); + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * page_memcg_rcu - locklessly get the memory cgroup associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function assumes that the page is known to have a + * proper memory cgroup pointer. It's not safe to call this function + * against some type of pages, e.g. slab pages or ex-slab pages. + */ +static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +{ + VM_BUG_ON_PAGE(PageSlab(page), page); + WARN_ON_ONCE(!rcu_read_lock_held()); + + return (struct mem_cgroup *)(READ_ONCE(page->memcg_data) & + ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * page_memcg_check - get the memory cgroup associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the memory cgroup associated with the page, + * or NULL. This function unlike page_memcg() can take any page + * as an argument. It has to be used in cases when it's not known if a page + * has an associated memory cgroup pointer or an object cgroups vector. + * + * Any of the following ensures page and memcg binding stability: + * - the page lock + * - LRU isolation + * - lock_page_memcg() + * - exclusive reference + */ +static inline struct mem_cgroup *page_memcg_check(struct page *page) +{ + /* + * Because page->memcg_data might be changed asynchronously + * for slab pages, READ_ONCE() should be used here. + */ + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + if (memcg_data & MEMCG_DATA_OBJCGS) + return NULL; + + return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * PageMemcgKmem - check if the page has MemcgKmem flag set + * @page: a pointer to the page struct + * + * Checks if the page has MemcgKmem flag set. The caller must ensure that + * the page has an associated memory cgroup. It's not safe to call this function + * against some types of pages, e.g. slab pages. + */ +static inline bool PageMemcgKmem(struct page *page) +{ + VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page); + return page->memcg_data & MEMCG_DATA_KMEM; +} + +#ifdef CONFIG_MEMCG_KMEM +/* + * page_objcgs - get the object cgroups vector associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroups vector associated with the page, + * or NULL. This function assumes that the page is known to have an + * associated object cgroups vector. It's not safe to call this function + * against pages, which might have an associated memory cgroup: e.g. + * kernel stack pages. + */ +static inline struct obj_cgroup **page_objcgs(struct page *page) +{ + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + VM_BUG_ON_PAGE(memcg_data && !(memcg_data & MEMCG_DATA_OBJCGS), page); + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + + return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * page_objcgs_check - get the object cgroups vector associated with a page + * @page: a pointer to the page struct + * + * Returns a pointer to the object cgroups vector associated with the page, + * or NULL. This function is safe to use if the page can be directly associated + * with a memory cgroup. + */ +static inline struct obj_cgroup **page_objcgs_check(struct page *page) +{ + unsigned long memcg_data = READ_ONCE(page->memcg_data); + + if (!memcg_data || !(memcg_data & MEMCG_DATA_OBJCGS)) + return NULL; + + VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page); + + return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); +} + +/* + * set_page_objcgs - associate a page with a object cgroups vector + * @page: a pointer to the page struct + * @objcgs: a pointer to the object cgroups vector + * + * Atomically associates a page with a vector of object cgroups. + */ +static inline bool set_page_objcgs(struct page *page, + struct obj_cgroup **objcgs) +{ + return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | + MEMCG_DATA_OBJCGS); +} +#else +static inline struct obj_cgroup **page_objcgs(struct page *page) +{ + return NULL; +} + +static inline struct obj_cgroup **page_objcgs_check(struct page *page) +{ + return NULL; +} + +static inline bool set_page_objcgs(struct page *page, + struct obj_cgroup **objcgs) +{ + return true; +} +#endif + static __always_inline bool memcg_stat_item_in_bytes(int idx) { if (idx == MEMCG_PERCPU_B) @@ -743,15 +912,19 @@ static inline void mod_memcg_state(struct mem_cgroup *memcg, static inline void __mod_memcg_page_state(struct page *page, int idx, int val) { - if (page->mem_cgroup) - __mod_memcg_state(page->mem_cgroup, idx, val); + struct mem_cgroup *memcg = page_memcg(page); + + if (memcg) + __mod_memcg_state(memcg, idx, val); } static inline void mod_memcg_page_state(struct page *page, int idx, int val) { - if (page->mem_cgroup) - mod_memcg_state(page->mem_cgroup, idx, val); + struct mem_cgroup *memcg = page_memcg(page); + + if (memcg) + mod_memcg_state(memcg, idx, val); } static inline unsigned long lruvec_page_state(struct lruvec *lruvec, @@ -834,16 +1007,17 @@ static inline void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx, int val) { struct page *head = compound_head(page); /* rmap on tail pages */ + struct mem_cgroup *memcg = page_memcg(head); pg_data_t *pgdat = page_pgdat(page); struct lruvec *lruvec; /* Untracked pages have no memcg, no lruvec. Update only the node */ - if (!head->mem_cgroup) { + if (!memcg) { __mod_node_page_state(pgdat, idx, val); return; } - lruvec = mem_cgroup_lruvec(head->mem_cgroup, pgdat); + lruvec = mem_cgroup_lruvec(memcg, pgdat); __mod_lruvec_state(lruvec, idx, val); } @@ -878,8 +1052,10 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, static inline void count_memcg_page_event(struct page *page, enum vm_event_item idx) { - if (page->mem_cgroup) - count_memcg_events(page->mem_cgroup, idx, 1); + struct mem_cgroup *memcg = page_memcg(page); + + if (memcg) + count_memcg_events(memcg, idx, 1); } static inline void count_memcg_event_mm(struct mm_struct *mm, @@ -941,6 +1117,27 @@ void mem_cgroup_split_huge_fixup(struct page *head); struct mem_cgroup; +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return NULL; +} + +static inline struct mem_cgroup *page_memcg_rcu(struct page *page) +{ + WARN_ON_ONCE(!rcu_read_lock_held()); + return NULL; +} + +static inline struct mem_cgroup *page_memcg_check(struct page *page) +{ + return NULL; +} + +static inline bool PageMemcgKmem(struct page *page) +{ + return false; +} + static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) { return true; @@ -1430,7 +1627,7 @@ static inline void mem_cgroup_track_foreign_dirty(struct page *page, if (mem_cgroup_disabled()) return; - if (unlikely(&page->mem_cgroup->css != wb->memcg_css)) + if (unlikely(&page_memcg(page)->css != wb->memcg_css)) mem_cgroup_track_foreign_dirty_slowpath(page, wb); } diff --git a/include/linux/mm.h b/include/linux/mm.h index db6ae4d3fb4e..6b0c9d2c1d10 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1484,28 +1484,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } -#ifdef CONFIG_MEMCG -static inline struct mem_cgroup *page_memcg(struct page *page) -{ - return page->mem_cgroup; -} -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return READ_ONCE(page->mem_cgroup); -} -#else -static inline struct mem_cgroup *page_memcg(struct page *page) -{ - return NULL; -} -static inline struct mem_cgroup *page_memcg_rcu(struct page *page) -{ - WARN_ON_ONCE(!rcu_read_lock_held()); - return NULL; -} -#endif - /* * Some inline functions in vmstat.h depend on page_zone() */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5a9238f6caad..80f5d755c037 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -199,10 +199,7 @@ struct page { atomic_t _refcount; #ifdef CONFIG_MEMCG - union { - struct mem_cgroup *mem_cgroup; - struct obj_cgroup **obj_cgroups; - }; + unsigned long memcg_data; #endif /* diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 4f6ba9379112..fc0e1bd48e73 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -715,9 +715,8 @@ PAGEFLAG_FALSE(DoubleMap) #define PAGE_MAPCOUNT_RESERVE -128 #define PG_buddy 0x00000080 #define PG_offline 0x00000100 -#define PG_kmemcg 0x00000200 -#define PG_table 0x00000400 -#define PG_guard 0x00000800 +#define PG_table 0x00000200 +#define PG_guard 0x00000400 #define PageType(page, flag) \ ((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE) @@ -768,12 +767,6 @@ PAGE_TYPE_OPS(Buddy, buddy) */ PAGE_TYPE_OPS(Offline, offline) -/* - * If kmemcg is enabled, the buddy allocator will set PageKmemcg() on - * pages allocated with __GFP_ACCOUNT. It gets cleared on page free. - */ -PAGE_TYPE_OPS(Kmemcg, kmemcg) - /* * Marks pages in use as page tables. */ diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index e7cbccc7c14c..39a40dfb578a 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -257,7 +257,7 @@ TRACE_EVENT(track_foreign_dirty, __entry->ino = inode ? inode->i_ino : 0; __entry->memcg_id = wb->memcg_css->id; __entry->cgroup_ino = __trace_wb_assign_cgroup(wb); - __entry->page_cgroup_ino = cgroup_ino(page->mem_cgroup->css.cgroup); + __entry->page_cgroup_ino = cgroup_ino(page_memcg(page)->css.cgroup); ), TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu", diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c6c81eceb68f..1f8453343bf2 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -34,8 +34,8 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) int i; for (i = 0; i < array->map.max_entries; i++) { - ptr = __alloc_percpu_gfp(array->elem_size, 8, - GFP_USER | __GFP_NOWARN); + ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8, + GFP_USER | __GFP_NOWARN); if (!ptr) { bpf_array_free_percpu(array); return -ENOMEM; @@ -81,11 +81,10 @@ int array_map_alloc_check(union bpf_attr *attr) static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; - int ret, numa_node = bpf_map_attr_numa_node(attr); + int numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; bool bypass_spec_v1 = bpf_bypass_spec_v1(); - u64 cost, array_size, mask64; - struct bpf_map_memory mem; + u64 array_size, mask64; struct bpf_array *array; elem_size = round_up(attr->value_size, 8); @@ -126,44 +125,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) } } - /* make sure there is no u32 overflow later in round_up() */ - cost = array_size; - if (percpu) - cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); - - ret = bpf_map_charge_init(&mem, cost); - if (ret < 0) - return ERR_PTR(ret); - /* allocate all map elements and zero-initialize them */ if (attr->map_flags & BPF_F_MMAPABLE) { void *data; /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */ data = bpf_map_area_mmapable_alloc(array_size, numa_node); - if (!data) { - bpf_map_charge_finish(&mem); + if (!data) return ERR_PTR(-ENOMEM); - } array = data + PAGE_ALIGN(sizeof(struct bpf_array)) - offsetof(struct bpf_array, value); } else { array = bpf_map_area_alloc(array_size, numa_node); } - if (!array) { - bpf_map_charge_finish(&mem); + if (!array) return ERR_PTR(-ENOMEM); - } array->index_mask = index_mask; array->map.bypass_spec_v1 = bypass_spec_v1; /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); - bpf_map_charge_move(&array->map.memory, &mem); array->elem_size = elem_size; if (percpu && bpf_array_alloc_percpu(array)) { - bpf_map_charge_finish(&array->map.memory); bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } @@ -1018,7 +1002,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) struct bpf_array_aux *aux; struct bpf_map *map; - aux = kzalloc(sizeof(*aux), GFP_KERNEL); + aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT); if (!aux) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 5d3a7af9ba9b..dd5aedee99e7 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -67,7 +67,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, if (charge_mem && mem_charge(smap, owner, smap->elem_size)) return NULL; - selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN); + selem = bpf_map_kzalloc(&smap->map, smap->elem_size, + GFP_ATOMIC | __GFP_NOWARN); if (selem) { if (value) memcpy(SDATA(selem)->data, value, smap->map.value_size); @@ -264,7 +265,8 @@ int bpf_local_storage_alloc(void *owner, if (err) return err; - storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN); + storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), + GFP_ATOMIC | __GFP_NOWARN); if (!storage) { err = -ENOMEM; goto uncharge; @@ -543,10 +545,8 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr) struct bpf_local_storage_map *smap; unsigned int i; u32 nbuckets; - u64 cost; - int ret; - smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN); + smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!smap) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&smap->map, attr); @@ -555,18 +555,10 @@ struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr) /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ nbuckets = max_t(u32, 2, nbuckets); smap->bucket_log = ilog2(nbuckets); - cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap); - - ret = bpf_map_charge_init(&smap->map.memory, cost); - if (ret < 0) { - kfree(smap); - return ERR_PTR(ret); - } smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets, - GFP_USER | __GFP_NOWARN); + GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!smap->buckets) { - bpf_map_charge_finish(&smap->map.memory); kfree(smap); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 4c3b543bb33b..1a666a975416 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -548,12 +548,10 @@ static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) { const struct bpf_struct_ops *st_ops; - size_t map_total_size, st_map_size; + size_t st_map_size; struct bpf_struct_ops_map *st_map; const struct btf_type *t, *vt; - struct bpf_map_memory mem; struct bpf_map *map; - int err; if (!bpf_capable()) return ERR_PTR(-EPERM); @@ -573,20 +571,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) * struct bpf_struct_ops_tcp_congestions_ops */ (vt->size - sizeof(struct bpf_struct_ops_value)); - map_total_size = st_map_size + - /* uvalue */ - sizeof(vt->size) + - /* struct bpf_progs **progs */ - btf_type_vlen(t) * sizeof(struct bpf_prog *); - err = bpf_map_charge_init(&mem, map_total_size); - if (err < 0) - return ERR_PTR(err); st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); - if (!st_map) { - bpf_map_charge_finish(&mem); + if (!st_map) return ERR_PTR(-ENOMEM); - } + st_map->st_ops = st_ops; map = &st_map->map; @@ -597,14 +586,12 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); if (!st_map->uvalue || !st_map->progs || !st_map->image) { bpf_struct_ops_map_free(map); - bpf_map_charge_finish(&mem); return ERR_PTR(-ENOMEM); } mutex_init(&st_map->lock); set_vm_flush_reset_perms(st_map->image); bpf_map_init_from_attr(map, attr); - bpf_map_charge_move(&map->memory, &mem); return map; } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ff55cbcfbab4..261f8692d0d2 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -77,7 +77,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; struct bpf_prog_aux *aux; struct bpf_prog *fp; @@ -86,7 +86,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag if (fp == NULL) return NULL; - aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags); + aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT | gfp_extra_flags); if (aux == NULL) { vfree(fp); return NULL; @@ -106,7 +106,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *prog; int cpu; @@ -138,7 +138,7 @@ int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) prog->aux->jited_linfo = kcalloc(prog->aux->nr_linfo, sizeof(*prog->aux->jited_linfo), - GFP_KERNEL | __GFP_NOWARN); + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!prog->aux->jited_linfo) return -ENOMEM; @@ -219,25 +219,17 @@ void bpf_prog_free_linfo(struct bpf_prog *prog) struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; + gfp_t gfp_flags = GFP_KERNEL_ACCOUNT | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp; - u32 pages, delta; - int ret; + u32 pages; size = round_up(size, PAGE_SIZE); pages = size / PAGE_SIZE; if (pages <= fp_old->pages) return fp_old; - delta = pages - fp_old->pages; - ret = __bpf_prog_charge(fp_old->aux->user, delta); - if (ret) - return NULL; - fp = __vmalloc(size, gfp_flags); - if (fp == NULL) { - __bpf_prog_uncharge(fp_old->aux->user, delta); - } else { + if (fp) { memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); fp->pages = pages; fp->aux->prog = fp; diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index c61a23b564aa..747313698178 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -84,8 +84,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) u32 value_size = attr->value_size; struct bpf_cpu_map *cmap; int err = -ENOMEM; - u64 cost; - int ret; if (!bpf_capable()) return ERR_PTR(-EPERM); @@ -97,7 +95,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) attr->map_flags & ~BPF_F_NUMA_NODE) return ERR_PTR(-EINVAL); - cmap = kzalloc(sizeof(*cmap), GFP_USER); + cmap = kzalloc(sizeof(*cmap), GFP_USER | __GFP_ACCOUNT); if (!cmap) return ERR_PTR(-ENOMEM); @@ -109,26 +107,14 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) goto free_cmap; } - /* make sure page count doesn't overflow */ - cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); - - /* Notice returns -EPERM on if map size is larger than memlock limit */ - ret = bpf_map_charge_init(&cmap->map.memory, cost); - if (ret) { - err = ret; - goto free_cmap; - } - /* Alloc array for possible remote "destination" CPUs */ cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *), cmap->map.numa_node); if (!cmap->cpu_map) - goto free_charge; + goto free_cmap; return &cmap->map; -free_charge: - bpf_map_charge_finish(&cmap->map.memory); free_cmap: kfree(cmap); return ERR_PTR(err); @@ -412,7 +398,8 @@ static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) } static struct bpf_cpu_map_entry * -__cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id) +__cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, + u32 cpu) { int numa, err, i, fd = value->bpf_prog.fd; gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; @@ -422,13 +409,13 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id) /* Have map->numa_node, but choose node of redirect target CPU */ numa = cpu_to_node(cpu); - rcpu = kzalloc_node(sizeof(*rcpu), gfp, numa); + rcpu = bpf_map_kmalloc_node(map, sizeof(*rcpu), gfp | __GFP_ZERO, numa); if (!rcpu) return NULL; /* Alloc percpu bulkq */ - rcpu->bulkq = __alloc_percpu_gfp(sizeof(*rcpu->bulkq), - sizeof(void *), gfp); + rcpu->bulkq = bpf_map_alloc_percpu(map, sizeof(*rcpu->bulkq), + sizeof(void *), gfp); if (!rcpu->bulkq) goto free_rcu; @@ -438,7 +425,8 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id) } /* Alloc queue */ - rcpu->queue = kzalloc_node(sizeof(*rcpu->queue), gfp, numa); + rcpu->queue = bpf_map_kmalloc_node(map, sizeof(*rcpu->queue), gfp, + numa); if (!rcpu->queue) goto free_bulkq; @@ -447,7 +435,7 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id) goto free_queue; rcpu->cpu = cpu; - rcpu->map_id = map_id; + rcpu->map_id = map->id; rcpu->value.qsize = value->qsize; if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd)) @@ -455,7 +443,8 @@ __cpu_map_entry_alloc(struct bpf_cpumap_val *value, u32 cpu, int map_id) /* Setup kthread */ rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa, - "cpumap/%d/map:%d", cpu, map_id); + "cpumap/%d/map:%d", cpu, + map->id); if (IS_ERR(rcpu->kthread)) goto free_prog; @@ -571,7 +560,7 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value, rcpu = NULL; /* Same as deleting */ } else { /* Updating qsize cause re-allocation of bpf_cpu_map_entry */ - rcpu = __cpu_map_entry_alloc(&cpumap_value, key_cpu, map->id); + rcpu = __cpu_map_entry_alloc(map, &cpumap_value, key_cpu); if (!rcpu) return -ENOMEM; rcpu->cmap = cmap; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 2b5ca93c17de..f6e9c68afdd4 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -109,8 +109,6 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab, static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) { u32 valsize = attr->value_size; - u64 cost = 0; - int err; /* check sanity of attributes. 2 value sizes supported: * 4 bytes: ifindex @@ -135,21 +133,13 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) if (!dtab->n_buckets) /* Overflow check */ return -EINVAL; - cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets; - } else { - cost += (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); } - /* if map size is larger than memlock limit, reject it */ - err = bpf_map_charge_init(&dtab->map.memory, cost); - if (err) - return -EINVAL; - if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets, dtab->map.numa_node); if (!dtab->dev_index_head) - goto free_charge; + return -ENOMEM; spin_lock_init(&dtab->index_lock); } else { @@ -157,14 +147,10 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map) - goto free_charge; + return -ENOMEM; } return 0; - -free_charge: - bpf_map_charge_finish(&dtab->map.memory); - return -ENOMEM; } static struct bpf_map *dev_map_alloc(union bpf_attr *attr) @@ -175,7 +161,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); - dtab = kzalloc(sizeof(*dtab), GFP_USER); + dtab = kzalloc(sizeof(*dtab), GFP_USER | __GFP_ACCOUNT); if (!dtab) return ERR_PTR(-ENOMEM); @@ -602,8 +588,9 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, struct bpf_prog *prog = NULL; struct bpf_dtab_netdev *dev; - dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, - dtab->map.numa_node); + dev = bpf_map_kmalloc_node(&dtab->map, sizeof(*dev), + GFP_ATOMIC | __GFP_NOWARN, + dtab->map.numa_node); if (!dev) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index ec46266aaf1c..fe7a0733a63a 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -292,7 +292,8 @@ static int prealloc_init(struct bpf_htab *htab) u32 size = round_up(htab->map.value_size, 8); void __percpu *pptr; - pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN); + pptr = bpf_map_alloc_percpu(&htab->map, size, 8, + GFP_USER | __GFP_NOWARN); if (!pptr) goto free_elems; htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, @@ -346,8 +347,8 @@ static int alloc_extra_elems(struct bpf_htab *htab) struct pcpu_freelist_node *l; int cpu; - pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8, - GFP_USER | __GFP_NOWARN); + pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8, + GFP_USER | __GFP_NOWARN); if (!pptr) return -ENOMEM; @@ -442,9 +443,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); struct bpf_htab *htab; int err, i; - u64 cost; - htab = kzalloc(sizeof(*htab), GFP_USER); + htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT); if (!htab) return ERR_PTR(-ENOMEM); @@ -480,30 +480,18 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) htab->n_buckets > U32_MAX / sizeof(struct bucket)) goto free_htab; - cost = (u64) htab->n_buckets * sizeof(struct bucket) + - (u64) htab->elem_size * htab->map.max_entries; - - if (percpu) - cost += (u64) round_up(htab->map.value_size, 8) * - num_possible_cpus() * htab->map.max_entries; - else - cost += (u64) htab->elem_size * num_possible_cpus(); - - /* if map size is larger than memlock limit, reject it */ - err = bpf_map_charge_init(&htab->map.memory, cost); - if (err) - goto free_htab; - err = -ENOMEM; htab->buckets = bpf_map_area_alloc(htab->n_buckets * sizeof(struct bucket), htab->map.numa_node); if (!htab->buckets) - goto free_charge; + goto free_htab; for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) { - htab->map_locked[i] = __alloc_percpu_gfp(sizeof(int), - sizeof(int), GFP_USER); + htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map, + sizeof(int), + sizeof(int), + GFP_USER); if (!htab->map_locked[i]) goto free_map_locked; } @@ -538,8 +526,6 @@ free_map_locked: for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) free_percpu(htab->map_locked[i]); bpf_map_area_free(htab->buckets); -free_charge: - bpf_map_charge_finish(&htab->map.memory); free_htab: lockdep_unregister_key(&htab->lockdep_key); kfree(htab); @@ -925,8 +911,9 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, l_new = ERR_PTR(-E2BIG); goto dec_count; } - l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, - htab->map.numa_node); + l_new = bpf_map_kmalloc_node(&htab->map, htab->elem_size, + GFP_ATOMIC | __GFP_NOWARN, + htab->map.numa_node); if (!l_new) { l_new = ERR_PTR(-ENOMEM); goto dec_count; @@ -942,8 +929,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, pptr = htab_elem_get_ptr(l_new, key_size); } else { /* alloc_percpu zero-fills */ - pptr = __alloc_percpu_gfp(size, 8, - GFP_ATOMIC | __GFP_NOWARN); + pptr = bpf_map_alloc_percpu(&htab->map, size, 8, + GFP_ATOMIC | __GFP_NOWARN); if (!pptr) { kfree(l_new); l_new = ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 571bb351ed3b..2d4f9ac12377 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -164,10 +164,10 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *key, return 0; } - new = kmalloc_node(sizeof(struct bpf_storage_buffer) + - map->value_size, - __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, - map->numa_node); + new = bpf_map_kmalloc_node(map, sizeof(struct bpf_storage_buffer) + + map->value_size, + __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN, + map->numa_node); if (!new) return -ENOMEM; @@ -287,8 +287,6 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); struct bpf_cgroup_storage_map *map; - struct bpf_map_memory mem; - int ret; if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) && attr->key_size != sizeof(__u64)) @@ -308,18 +306,10 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) /* max_entries is not used and enforced to be 0 */ return ERR_PTR(-EINVAL); - ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map)); - if (ret < 0) - return ERR_PTR(ret); - map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map), - __GFP_ZERO | GFP_USER, numa_node); - if (!map) { - bpf_map_charge_finish(&mem); + __GFP_ZERO | GFP_USER | __GFP_ACCOUNT, numa_node); + if (!map) return ERR_PTR(-ENOMEM); - } - - bpf_map_charge_move(&map->map.memory, &mem); /* copy mandatory map attributes */ bpf_map_init_from_attr(&map->map, attr); @@ -496,9 +486,9 @@ static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages) struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { + const gfp_t gfp = __GFP_ZERO | GFP_USER; struct bpf_cgroup_storage *storage; struct bpf_map *map; - gfp_t flags; size_t size; u32 pages; @@ -508,23 +498,19 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, size = bpf_cgroup_storage_calculate_size(map, &pages); - if (bpf_map_charge_memlock(map, pages)) - return ERR_PTR(-EPERM); - - storage = kmalloc_node(sizeof(struct bpf_cgroup_storage), - __GFP_ZERO | GFP_USER, map->numa_node); + storage = bpf_map_kmalloc_node(map, sizeof(struct bpf_cgroup_storage), + gfp, map->numa_node); if (!storage) goto enomem; - flags = __GFP_ZERO | GFP_USER; - if (stype == BPF_CGROUP_STORAGE_SHARED) { - storage->buf = kmalloc_node(size, flags, map->numa_node); + storage->buf = bpf_map_kmalloc_node(map, size, gfp, + map->numa_node); if (!storage->buf) goto enomem; check_and_init_map_lock(map, storage->buf->data); } else { - storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags); + storage->percpu_buf = bpf_map_alloc_percpu(map, size, 8, gfp); if (!storage->percpu_buf) goto enomem; } @@ -534,7 +520,6 @@ struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog, return storage; enomem: - bpf_map_uncharge_memlock(map, pages); kfree(storage); return ERR_PTR(-ENOMEM); } @@ -561,16 +546,11 @@ void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage) { enum bpf_cgroup_storage_type stype; struct bpf_map *map; - u32 pages; if (!storage) return; map = &storage->map->map; - - bpf_cgroup_storage_calculate_size(map, &pages); - bpf_map_uncharge_memlock(map, pages); - stype = cgroup_storage_type(map); if (stype == BPF_CGROUP_STORAGE_SHARED) call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu); diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 00e32f2ec3e6..cec792a17e5f 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -282,8 +282,8 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, if (value) size += trie->map.value_size; - node = kmalloc_node(size, GFP_ATOMIC | __GFP_NOWARN, - trie->map.numa_node); + node = bpf_map_kmalloc_node(&trie->map, size, GFP_ATOMIC | __GFP_NOWARN, + trie->map.numa_node); if (!node) return NULL; @@ -540,8 +540,6 @@ out: static struct bpf_map *trie_alloc(union bpf_attr *attr) { struct lpm_trie *trie; - u64 cost = sizeof(*trie), cost_per_node; - int ret; if (!bpf_capable()) return ERR_PTR(-EPERM); @@ -557,7 +555,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) attr->value_size > LPM_VAL_SIZE_MAX) return ERR_PTR(-EINVAL); - trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN); + trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN | __GFP_ACCOUNT); if (!trie) return ERR_PTR(-ENOMEM); @@ -567,20 +565,9 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) offsetof(struct bpf_lpm_trie_key, data); trie->max_prefixlen = trie->data_size * 8; - cost_per_node = sizeof(struct lpm_trie_node) + - attr->value_size + trie->data_size; - cost += (u64) attr->max_entries * cost_per_node; - - ret = bpf_map_charge_init(&trie->map.memory, cost); - if (ret) - goto out_err; - spin_lock_init(&trie->lock); return &trie->map; -out_err: - kfree(trie); - return ERR_PTR(ret); } static void trie_free(struct bpf_map *map) diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index 0ee2347ba510..f9c734aaa990 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -66,29 +66,21 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr) static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) { - int ret, numa_node = bpf_map_attr_numa_node(attr); - struct bpf_map_memory mem = {0}; + int numa_node = bpf_map_attr_numa_node(attr); struct bpf_queue_stack *qs; - u64 size, queue_size, cost; + u64 size, queue_size; size = (u64) attr->max_entries + 1; - cost = queue_size = sizeof(*qs) + size * attr->value_size; - - ret = bpf_map_charge_init(&mem, cost); - if (ret < 0) - return ERR_PTR(ret); + queue_size = sizeof(*qs) + size * attr->value_size; qs = bpf_map_area_alloc(queue_size, numa_node); - if (!qs) { - bpf_map_charge_finish(&mem); + if (!qs) return ERR_PTR(-ENOMEM); - } memset(qs, 0, sizeof(*qs)); bpf_map_init_from_attr(&qs->map, attr); - bpf_map_charge_move(&qs->map.memory, &mem); qs->size = size; raw_spin_lock_init(&qs->lock); diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index a55cd542f2ce..4838922f723d 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -150,9 +150,8 @@ static void reuseport_array_free(struct bpf_map *map) static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) { - int err, numa_node = bpf_map_attr_numa_node(attr); + int numa_node = bpf_map_attr_numa_node(attr); struct reuseport_array *array; - struct bpf_map_memory mem; u64 array_size; if (!bpf_capable()) @@ -161,20 +160,13 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) array_size = sizeof(*array); array_size += (u64)attr->max_entries * sizeof(struct sock *); - err = bpf_map_charge_init(&mem, array_size); - if (err) - return ERR_PTR(err); - /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(array_size, numa_node); - if (!array) { - bpf_map_charge_finish(&mem); + if (!array) return ERR_PTR(-ENOMEM); - } /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); - bpf_map_charge_move(&array->map.memory, &mem); return &array->map; } diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 31cb04a4dd2d..f25b719ac786 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -48,7 +48,6 @@ struct bpf_ringbuf { struct bpf_ringbuf_map { struct bpf_map map; - struct bpf_map_memory memory; struct bpf_ringbuf *rb; }; @@ -60,8 +59,8 @@ struct bpf_ringbuf_hdr { static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) { - const gfp_t flags = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN | - __GFP_ZERO; + const gfp_t flags = GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL | + __GFP_NOWARN | __GFP_ZERO; int nr_meta_pages = RINGBUF_PGOFF + RINGBUF_POS_PAGES; int nr_data_pages = data_sz >> PAGE_SHIFT; int nr_pages = nr_meta_pages + nr_data_pages; @@ -88,10 +87,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) * user-space implementations significantly. */ array_size = (nr_meta_pages + 2 * nr_data_pages) * sizeof(*pages); - if (array_size > PAGE_SIZE) - pages = vmalloc_node(array_size, numa_node); - else - pages = kmalloc_node(array_size, flags, numa_node); + pages = bpf_map_area_alloc(array_size, numa_node); if (!pages) return NULL; @@ -134,7 +130,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) rb = bpf_ringbuf_area_alloc(data_sz, numa_node); if (!rb) - return ERR_PTR(-ENOMEM); + return NULL; spin_lock_init(&rb->spinlock); init_waitqueue_head(&rb->waitq); @@ -150,8 +146,6 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node) static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) { struct bpf_ringbuf_map *rb_map; - u64 cost; - int err; if (attr->map_flags & ~RINGBUF_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); @@ -167,32 +161,19 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) return ERR_PTR(-E2BIG); #endif - rb_map = kzalloc(sizeof(*rb_map), GFP_USER); + rb_map = kzalloc(sizeof(*rb_map), GFP_USER | __GFP_ACCOUNT); if (!rb_map) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&rb_map->map, attr); - cost = sizeof(struct bpf_ringbuf_map) + - sizeof(struct bpf_ringbuf) + - attr->max_entries; - err = bpf_map_charge_init(&rb_map->map.memory, cost); - if (err) - goto err_free_map; - rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node); - if (IS_ERR(rb_map->rb)) { - err = PTR_ERR(rb_map->rb); - goto err_uncharge; + if (!rb_map->rb) { + kfree(rb_map); + return ERR_PTR(-ENOMEM); } return &rb_map->map; - -err_uncharge: - bpf_map_charge_finish(&rb_map->map.memory); -err_free_map: - kfree(rb_map); - return ERR_PTR(err); } static void bpf_ringbuf_free(struct bpf_ringbuf *rb) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 06065fa27124..3325add8e629 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -90,7 +90,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) { u32 value_size = attr->value_size; struct bpf_stack_map *smap; - struct bpf_map_memory mem; u64 cost, n_buckets; int err; @@ -119,15 +118,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap); cost += n_buckets * (value_size + sizeof(struct stack_map_bucket)); - err = bpf_map_charge_init(&mem, cost); - if (err) - return ERR_PTR(err); - smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); - if (!smap) { - bpf_map_charge_finish(&mem); + if (!smap) return ERR_PTR(-ENOMEM); - } bpf_map_init_from_attr(&smap->map, attr); smap->map.value_size = value_size; @@ -135,20 +128,17 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) err = get_callchain_buffers(sysctl_perf_event_max_stack); if (err) - goto free_charge; + goto free_smap; err = prealloc_elems_and_freelist(smap); if (err) goto put_buffers; - bpf_map_charge_move(&smap->map.memory, &mem); - return &smap->map; put_buffers: put_callchain_buffers(); -free_charge: - bpf_map_charge_finish(&mem); +free_smap: bpf_map_area_free(smap); return ERR_PTR(err); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f3fe9f53f93c..d16dd4945100 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -31,6 +31,7 @@ #include #include #include +#include #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \ @@ -127,7 +128,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) return map; } -static u32 bpf_map_value_size(struct bpf_map *map) +static u32 bpf_map_value_size(const struct bpf_map *map) { if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || @@ -267,6 +268,10 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, return err; } +/* Please, do not use this function outside from the map creation path + * (e.g. in map update path) without taking care of setting the active + * memory cgroup (see at bpf_map_kmalloc_node() for example). + */ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) { /* We really just want to fail instead of triggering OOM killer @@ -279,7 +284,7 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) * __GFP_RETRY_MAYFAIL to avoid such situations. */ - const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO; + const gfp_t gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_ACCOUNT; unsigned int flags = 0; unsigned long align = 1; void *area; @@ -341,77 +346,6 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr) map->numa_node = bpf_map_attr_numa_node(attr); } -static int bpf_charge_memlock(struct user_struct *user, u32 pages) -{ - unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - if (atomic_long_add_return(pages, &user->locked_vm) > memlock_limit) { - atomic_long_sub(pages, &user->locked_vm); - return -EPERM; - } - return 0; -} - -static void bpf_uncharge_memlock(struct user_struct *user, u32 pages) -{ - if (user) - atomic_long_sub(pages, &user->locked_vm); -} - -int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size) -{ - u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT; - struct user_struct *user; - int ret; - - if (size >= U32_MAX - PAGE_SIZE) - return -E2BIG; - - user = get_current_user(); - ret = bpf_charge_memlock(user, pages); - if (ret) { - free_uid(user); - return ret; - } - - mem->pages = pages; - mem->user = user; - - return 0; -} - -void bpf_map_charge_finish(struct bpf_map_memory *mem) -{ - bpf_uncharge_memlock(mem->user, mem->pages); - free_uid(mem->user); -} - -void bpf_map_charge_move(struct bpf_map_memory *dst, - struct bpf_map_memory *src) -{ - *dst = *src; - - /* Make sure src will not be used for the redundant uncharging. */ - memset(src, 0, sizeof(struct bpf_map_memory)); -} - -int bpf_map_charge_memlock(struct bpf_map *map, u32 pages) -{ - int ret; - - ret = bpf_charge_memlock(map->memory.user, pages); - if (ret) - return ret; - map->memory.pages += pages; - return ret; -} - -void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages) -{ - bpf_uncharge_memlock(map->memory.user, pages); - map->memory.pages -= pages; -} - static int bpf_map_alloc_id(struct bpf_map *map) { int id; @@ -456,17 +390,74 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) __release(&map_idr_lock); } +#ifdef CONFIG_MEMCG_KMEM +static void bpf_map_save_memcg(struct bpf_map *map) +{ + map->memcg = get_mem_cgroup_from_mm(current->mm); +} + +static void bpf_map_release_memcg(struct bpf_map *map) +{ + mem_cgroup_put(map->memcg); +} + +void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, + int node) +{ + struct mem_cgroup *old_memcg; + void *ptr; + + old_memcg = set_active_memcg(map->memcg); + ptr = kmalloc_node(size, flags | __GFP_ACCOUNT, node); + set_active_memcg(old_memcg); + + return ptr; +} + +void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) +{ + struct mem_cgroup *old_memcg; + void *ptr; + + old_memcg = set_active_memcg(map->memcg); + ptr = kzalloc(size, flags | __GFP_ACCOUNT); + set_active_memcg(old_memcg); + + return ptr; +} + +void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, + size_t align, gfp_t flags) +{ + struct mem_cgroup *old_memcg; + void __percpu *ptr; + + old_memcg = set_active_memcg(map->memcg); + ptr = __alloc_percpu_gfp(size, align, flags | __GFP_ACCOUNT); + set_active_memcg(old_memcg); + + return ptr; +} + +#else +static void bpf_map_save_memcg(struct bpf_map *map) +{ +} + +static void bpf_map_release_memcg(struct bpf_map *map) +{ +} +#endif + /* called from workqueue */ static void bpf_map_free_deferred(struct work_struct *work) { struct bpf_map *map = container_of(work, struct bpf_map, work); - struct bpf_map_memory mem; - bpf_map_charge_move(&mem, &map->memory); security_bpf_map_free(map); + bpf_map_release_memcg(map); /* implementation dependent freeing */ map->ops->map_free(map); - bpf_map_charge_finish(&mem); } static void bpf_map_put_uref(struct bpf_map *map) @@ -527,6 +518,19 @@ static fmode_t map_get_sys_perms(struct bpf_map *map, struct fd f) } #ifdef CONFIG_PROC_FS +/* Provides an approximation of the map's memory footprint. + * Used only to provide a backward compatibility and display + * a reasonable "memlock" info. + */ +static unsigned long bpf_map_memory_footprint(const struct bpf_map *map) +{ + unsigned long size; + + size = round_up(map->key_size + bpf_map_value_size(map), 8); + + return round_up(map->max_entries * size, PAGE_SIZE); +} + static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_map *map = filp->private_data; @@ -545,7 +549,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) "value_size:\t%u\n" "max_entries:\t%u\n" "map_flags:\t%#x\n" - "memlock:\t%llu\n" + "memlock:\t%lu\n" "map_id:\t%u\n" "frozen:\t%u\n", map->map_type, @@ -553,7 +557,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) map->value_size, map->max_entries, map->map_flags, - map->memory.pages * 1ULL << PAGE_SHIFT, + bpf_map_memory_footprint(map), map->id, READ_ONCE(map->frozen)); if (type) { @@ -796,7 +800,6 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, static int map_create(union bpf_attr *attr) { int numa_node = bpf_map_attr_numa_node(attr); - struct bpf_map_memory mem; struct bpf_map *map; int f_flags; int err; @@ -875,6 +878,8 @@ static int map_create(union bpf_attr *attr) if (err) goto free_map_sec; + bpf_map_save_memcg(map); + err = bpf_map_new_fd(map, f_flags); if (err < 0) { /* failed to allocate fd. @@ -893,9 +898,7 @@ free_map_sec: security_bpf_map_free(map); free_map: btf_put(map->btf); - bpf_map_charge_move(&mem, &map->memory); map->ops->map_free(map); - bpf_map_charge_finish(&mem); return err; } @@ -1629,51 +1632,6 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) audit_log_end(ab); } -int __bpf_prog_charge(struct user_struct *user, u32 pages) -{ - unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - unsigned long user_bufs; - - if (user) { - user_bufs = atomic_long_add_return(pages, &user->locked_vm); - if (user_bufs > memlock_limit) { - atomic_long_sub(pages, &user->locked_vm); - return -EPERM; - } - } - - return 0; -} - -void __bpf_prog_uncharge(struct user_struct *user, u32 pages) -{ - if (user) - atomic_long_sub(pages, &user->locked_vm); -} - -static int bpf_prog_charge_memlock(struct bpf_prog *prog) -{ - struct user_struct *user = get_current_user(); - int ret; - - ret = __bpf_prog_charge(user, prog->pages); - if (ret) { - free_uid(user); - return ret; - } - - prog->aux->user = user; - return 0; -} - -static void bpf_prog_uncharge_memlock(struct bpf_prog *prog) -{ - struct user_struct *user = prog->aux->user; - - __bpf_prog_uncharge(user, prog->pages); - free_uid(user); -} - static int bpf_prog_alloc_id(struct bpf_prog *prog) { int id; @@ -1723,7 +1681,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) kvfree(aux->func_info); kfree(aux->func_info_aux); - bpf_prog_uncharge_memlock(aux->prog); + free_uid(aux->user); security_bpf_prog_free(aux); bpf_prog_free(aux->prog); } @@ -2161,7 +2119,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) dst_prog = bpf_prog_get(attr->attach_prog_fd); if (IS_ERR(dst_prog)) { err = PTR_ERR(dst_prog); - goto free_prog_nouncharge; + goto free_prog; } prog->aux->dst_prog = dst_prog; } @@ -2171,18 +2129,15 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) err = security_bpf_prog_alloc(prog->aux); if (err) - goto free_prog_nouncharge; - - err = bpf_prog_charge_memlock(prog); - if (err) - goto free_prog_sec; + goto free_prog; + prog->aux->user = get_current_user(); prog->len = attr->insn_cnt; err = -EFAULT; if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns), bpf_prog_insn_size(prog)) != 0) - goto free_prog; + goto free_prog_sec; prog->orig_prog = NULL; prog->jited = 0; @@ -2193,19 +2148,19 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (bpf_prog_is_dev_bound(prog->aux)) { err = bpf_prog_offload_init(prog, attr); if (err) - goto free_prog; + goto free_prog_sec; } /* find program type: socket_filter vs tracing_filter */ err = find_prog_type(type, prog); if (err < 0) - goto free_prog; + goto free_prog_sec; prog->aux->load_time = ktime_get_boottime_ns(); err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name, sizeof(attr->prog_name)); if (err < 0) - goto free_prog; + goto free_prog_sec; /* run eBPF verifier */ err = bpf_check(&prog, attr, uattr); @@ -2250,11 +2205,10 @@ free_used_maps: */ __bpf_prog_put_noref(prog, prog->aux->func_cnt); return err; -free_prog: - bpf_prog_uncharge_memlock(prog); free_prog_sec: + free_uid(prog->aux->user); security_bpf_prog_free(prog->aux); -free_prog_nouncharge: +free_prog: bpf_prog_free(prog); return err; } diff --git a/kernel/fork.c b/kernel/fork.c index 6d266388d380..cbd4f6f58409 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -404,9 +404,10 @@ static int memcg_charge_kernel_stack(struct task_struct *tsk) for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) { /* - * If memcg_kmem_charge_page() fails, page->mem_cgroup - * pointer is NULL, and memcg_kmem_uncharge_page() in - * free_thread_stack() will ignore this page. + * If memcg_kmem_charge_page() fails, page's + * memory cgroup pointer is NULL, and + * memcg_kmem_uncharge_page() in free_thread_stack() + * will ignore this page. */ ret = memcg_kmem_charge_page(vm->pages[i], GFP_KERNEL, 0); diff --git a/mm/debug.c b/mm/debug.c index ccca576b2899..8a40b3fefbeb 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -182,8 +182,8 @@ hex_only: pr_warn("page dumped because: %s\n", reason); #ifdef CONFIG_MEMCG - if (!page_poisoned && page->mem_cgroup) - pr_warn("page->mem_cgroup:%px\n", page->mem_cgroup); + if (!page_poisoned && page->memcg_data) + pr_warn("pages's memcg:%lx\n", page->memcg_data); #endif } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9474dbc150ed..cedfb3503411 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -470,7 +470,7 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) #ifdef CONFIG_MEMCG static inline struct deferred_split *get_deferred_split_queue(struct page *page) { - struct mem_cgroup *memcg = compound_head(page)->mem_cgroup; + struct mem_cgroup *memcg = page_memcg(compound_head(page)); struct pglist_data *pgdat = NODE_DATA(page_to_nid(page)); if (memcg) @@ -2765,7 +2765,7 @@ void deferred_split_huge_page(struct page *page) { struct deferred_split *ds_queue = get_deferred_split_queue(page); #ifdef CONFIG_MEMCG - struct mem_cgroup *memcg = compound_head(page)->mem_cgroup; + struct mem_cgroup *memcg = page_memcg(compound_head(page)); #endif unsigned long flags; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3dcbf24d2227..e0366e306221 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -533,7 +533,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page) { struct mem_cgroup *memcg; - memcg = page->mem_cgroup; + memcg = page_memcg(page); if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) memcg = root_mem_cgroup; @@ -560,16 +560,7 @@ ino_t page_cgroup_ino(struct page *page) unsigned long ino = 0; rcu_read_lock(); - memcg = page->mem_cgroup; - - /* - * The lowest bit set means that memcg isn't a valid - * memcg pointer, but a obj_cgroups pointer. - * In this case the page is shared and doesn't belong - * to any specific memory cgroup. - */ - if ((unsigned long) memcg & 0x1UL) - memcg = NULL; + memcg = page_memcg_check(page); while (memcg && !(memcg->css.flags & CSS_ONLINE)) memcg = parent_mem_cgroup(memcg); @@ -1050,7 +1041,7 @@ EXPORT_SYMBOL(get_mem_cgroup_from_mm); */ struct mem_cgroup *get_mem_cgroup_from_page(struct page *page) { - struct mem_cgroup *memcg = page->mem_cgroup; + struct mem_cgroup *memcg = page_memcg(page); if (mem_cgroup_disabled()) return NULL; @@ -1349,7 +1340,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgd goto out; } - memcg = page->mem_cgroup; + memcg = page_memcg(page); /* * Swapcache readahead pages are added to the LRU - and * possibly migrated - before they are charged. @@ -2109,7 +2100,7 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg) } /** - * lock_page_memcg - lock a page->mem_cgroup binding + * lock_page_memcg - lock a page and memcg binding * @page: the page * * This function protects unlocked LRU pages from being moved to @@ -2141,7 +2132,7 @@ struct mem_cgroup *lock_page_memcg(struct page *page) if (mem_cgroup_disabled()) return NULL; again: - memcg = head->mem_cgroup; + memcg = page_memcg(head); if (unlikely(!memcg)) return NULL; @@ -2149,7 +2140,7 @@ again: return memcg; spin_lock_irqsave(&memcg->move_lock, flags); - if (memcg != head->mem_cgroup) { + if (memcg != page_memcg(head)) { spin_unlock_irqrestore(&memcg->move_lock, flags); goto again; } @@ -2187,14 +2178,14 @@ void __unlock_page_memcg(struct mem_cgroup *memcg) } /** - * unlock_page_memcg - unlock a page->mem_cgroup binding + * unlock_page_memcg - unlock a page and memcg binding * @page: the page */ void unlock_page_memcg(struct page *page) { struct page *head = compound_head(page); - __unlock_page_memcg(head->mem_cgroup); + __unlock_page_memcg(page_memcg(head)); } EXPORT_SYMBOL(unlock_page_memcg); @@ -2884,7 +2875,7 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) static void commit_charge(struct page *page, struct mem_cgroup *memcg) { - VM_BUG_ON_PAGE(page->mem_cgroup, page); + VM_BUG_ON_PAGE(page_memcg(page), page); /* * Any of the following ensures page->mem_cgroup stability: * @@ -2893,7 +2884,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg) * - lock_page_memcg() * - exclusive reference */ - page->mem_cgroup = memcg; + page->memcg_data = (unsigned long)memcg; } #ifdef CONFIG_MEMCG_KMEM @@ -2908,8 +2899,7 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, if (!vec) return -ENOMEM; - if (cmpxchg(&page->obj_cgroups, NULL, - (struct obj_cgroup **) ((unsigned long)vec | 0x1UL))) + if (!set_page_objcgs(page, vec)) kfree(vec); else kmemleak_not_leak(vec); @@ -2920,6 +2910,12 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, /* * Returns a pointer to the memory cgroup to which the kernel object is charged. * + * A passed kernel object can be a slab object or a generic kernel page, so + * different mechanisms for getting the memory cgroup pointer should be used. + * In certain cases (e.g. kernel stacks or large kmallocs with SLUB) the caller + * can not know for sure how the kernel object is implemented. + * mem_cgroup_from_obj() can be safely used in such cases. + * * The caller must ensure the memcg lifetime, e.g. by taking rcu_read_lock(), * cgroup_mutex, etc. */ @@ -2932,36 +2928,31 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) page = virt_to_head_page(p); - /* - * If page->mem_cgroup is set, it's either a simple mem_cgroup pointer - * or a pointer to obj_cgroup vector. In the latter case the lowest - * bit of the pointer is set. - * The page->mem_cgroup pointer can be asynchronously changed - * from NULL to (obj_cgroup_vec | 0x1UL), but can't be changed - * from a valid memcg pointer to objcg vector or back. - */ - if (!page->mem_cgroup) - return NULL; - /* * Slab objects are accounted individually, not per-page. * Memcg membership data for each individual object is saved in * the page->obj_cgroups. */ - if (page_has_obj_cgroups(page)) { + if (page_objcgs_check(page)) { struct obj_cgroup *objcg; unsigned int off; off = obj_to_index(page->slab_cache, page, p); - objcg = page_obj_cgroups(page)[off]; + objcg = page_objcgs(page)[off]; if (objcg) return obj_cgroup_memcg(objcg); return NULL; } - /* All other pages use page->mem_cgroup */ - return page->mem_cgroup; + /* + * page_memcg_check() is used here, because page_has_obj_cgroups() + * check above could fail because the object cgroups vector wasn't set + * at that moment, but it can be set concurrently. + * page_memcg_check(page) will guarantee that a proper memory + * cgroup pointer or NULL will be returned. + */ + return page_memcg_check(page); } __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void) @@ -3099,8 +3090,8 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) if (memcg && !mem_cgroup_is_root(memcg)) { ret = __memcg_kmem_charge(memcg, gfp, 1 << order); if (!ret) { - page->mem_cgroup = memcg; - __SetPageKmemcg(page); + page->memcg_data = (unsigned long)memcg | + MEMCG_DATA_KMEM; return 0; } css_put(&memcg->css); @@ -3115,7 +3106,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) */ void __memcg_kmem_uncharge_page(struct page *page, int order) { - struct mem_cgroup *memcg = page->mem_cgroup; + struct mem_cgroup *memcg = page_memcg(page); unsigned int nr_pages = 1 << order; if (!memcg) @@ -3123,12 +3114,8 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page); __memcg_kmem_uncharge(memcg, nr_pages); - page->mem_cgroup = NULL; + page->memcg_data = 0; css_put(&memcg->css); - - /* slab pages do not have PageKmemcg flag set */ - if (PageKmemcg(page)) - __ClearPageKmemcg(page); } static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes) @@ -3274,7 +3261,7 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) */ void mem_cgroup_split_huge_fixup(struct page *head) { - struct mem_cgroup *memcg = head->mem_cgroup; + struct mem_cgroup *memcg = page_memcg(head); int i; if (mem_cgroup_disabled()) @@ -3282,7 +3269,7 @@ void mem_cgroup_split_huge_fixup(struct page *head) for (i = 1; i < HPAGE_PMD_NR; i++) { css_get(&memcg->css); - head[i].mem_cgroup = memcg; + head[i].memcg_data = (unsigned long)memcg; } } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -4664,7 +4651,7 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, void mem_cgroup_track_foreign_dirty_slowpath(struct page *page, struct bdi_writeback *wb) { - struct mem_cgroup *memcg = page->mem_cgroup; + struct mem_cgroup *memcg = page_memcg(page); struct memcg_cgwb_frn *frn; u64 now = get_jiffies_64(); u64 oldest_at = now; @@ -5641,14 +5628,14 @@ static int mem_cgroup_move_account(struct page *page, /* * Prevent mem_cgroup_migrate() from looking at - * page->mem_cgroup of its source page while we change it. + * page's memory cgroup of its source page while we change it. */ ret = -EBUSY; if (!trylock_page(page)) goto out; ret = -EINVAL; - if (page->mem_cgroup != from) + if (page_memcg(page) != from) goto out_unlock; pgdat = page_pgdat(page); @@ -5703,13 +5690,13 @@ static int mem_cgroup_move_account(struct page *page, /* * All state has been migrated, let's switch to the new memcg. * - * It is safe to change page->mem_cgroup here because the page + * It is safe to change page's memcg here because the page * is referenced, charged, isolated, and locked: we can't race * with (un)charging, migration, LRU putback, or anything else - * that would rely on a stable page->mem_cgroup. + * that would rely on a stable page's memory cgroup. * * Note that lock_page_memcg is a memcg lock, not a page lock, - * to save space. As soon as we switch page->mem_cgroup to a + * to save space. As soon as we switch page's memory cgroup to a * new memcg that isn't locked, the above state can change * concurrently again. Make sure we're truly done with it. */ @@ -5718,7 +5705,7 @@ static int mem_cgroup_move_account(struct page *page, css_get(&to->css); css_put(&from->css); - page->mem_cgroup = to; + page->memcg_data = (unsigned long)to; __unlock_page_memcg(from); @@ -5784,7 +5771,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, * mem_cgroup_move_account() checks the page is valid or * not under LRU exclusion. */ - if (page->mem_cgroup == mc.from) { + if (page_memcg(page) == mc.from) { ret = MC_TARGET_PAGE; if (is_device_private_page(page)) ret = MC_TARGET_DEVICE; @@ -5828,7 +5815,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma, VM_BUG_ON_PAGE(!page || !PageHead(page), page); if (!(mc.flags & MOVE_ANON)) return ret; - if (page->mem_cgroup == mc.from) { + if (page_memcg(page) == mc.from) { ret = MC_TARGET_PAGE; if (target) { get_page(page); @@ -6774,12 +6761,12 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) /* * Every swap fault against a single page tries to charge the * page, bail as early as possible. shmem_unuse() encounters - * already charged pages, too. page->mem_cgroup is protected - * by the page lock, which serializes swap cache removal, which - * in turn serializes uncharging. + * already charged pages, too. page and memcg binding is + * protected by the page lock, which serializes swap cache + * removal, which in turn serializes uncharging. */ VM_BUG_ON_PAGE(!PageLocked(page), page); - if (compound_head(page)->mem_cgroup) + if (page_memcg(compound_head(page))) goto out; id = lookup_swap_cgroup_id(ent); @@ -6863,21 +6850,21 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) VM_BUG_ON_PAGE(PageLRU(page), page); - if (!page->mem_cgroup) + if (!page_memcg(page)) return; /* * Nobody should be changing or seriously looking at - * page->mem_cgroup at this point, we have fully + * page_memcg(page) at this point, we have fully * exclusive access to the page. */ - if (ug->memcg != page->mem_cgroup) { + if (ug->memcg != page_memcg(page)) { if (ug->memcg) { uncharge_batch(ug); uncharge_gather_clear(ug); } - ug->memcg = page->mem_cgroup; + ug->memcg = page_memcg(page); /* pairs with css_put in uncharge_batch */ css_get(&ug->memcg->css); @@ -6886,15 +6873,13 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug) nr_pages = compound_nr(page); ug->nr_pages += nr_pages; - if (!PageKmemcg(page)) { - ug->pgpgout++; - } else { + if (PageMemcgKmem(page)) ug->nr_kmem += nr_pages; - __ClearPageKmemcg(page); - } + else + ug->pgpgout++; ug->dummy_page = page; - page->mem_cgroup = NULL; + page->memcg_data = 0; css_put(&ug->memcg->css); } @@ -6937,7 +6922,7 @@ void mem_cgroup_uncharge(struct page *page) return; /* Don't touch page->lru of any random page, pre-check: */ - if (!page->mem_cgroup) + if (!page_memcg(page)) return; uncharge_gather_clear(&ug); @@ -6987,11 +6972,11 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) return; /* Page cache replacement: new page already charged? */ - if (newpage->mem_cgroup) + if (page_memcg(newpage)) return; /* Swapcache readahead pages can get replaced before being charged */ - memcg = oldpage->mem_cgroup; + memcg = page_memcg(oldpage); if (!memcg) return; @@ -7186,7 +7171,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) return; - memcg = page->mem_cgroup; + memcg = page_memcg(page); /* Readahead page, never charged */ if (!memcg) @@ -7207,7 +7192,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry) VM_BUG_ON_PAGE(oldid, page); mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); - page->mem_cgroup = NULL; + page->memcg_data = 0; if (!mem_cgroup_is_root(memcg)) page_counter_uncharge(&memcg->memory, nr_entries); @@ -7250,7 +7235,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry) if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) return 0; - memcg = page->mem_cgroup; + memcg = page_memcg(page); /* Readahead page, never charged */ if (!memcg) @@ -7331,7 +7316,7 @@ bool mem_cgroup_swap_full(struct page *page) if (cgroup_memory_noswap || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) return false; - memcg = page->mem_cgroup; + memcg = page_memcg(page); if (!memcg) return false; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 23f5066bd4a5..3c53018c9c61 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1092,7 +1092,7 @@ static inline bool page_expected_state(struct page *page, if (unlikely((unsigned long)page->mapping | page_ref_count(page) | #ifdef CONFIG_MEMCG - (unsigned long)page->mem_cgroup | + (unsigned long)page_memcg(page) | #endif (page->flags & check_flags))) return false; @@ -1117,7 +1117,7 @@ static const char *page_bad_reason(struct page *page, unsigned long flags) bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set"; } #ifdef CONFIG_MEMCG - if (unlikely(page->mem_cgroup)) + if (unlikely(page_memcg(page))) bad_reason = "page still charged to cgroup"; #endif return bad_reason; @@ -1214,7 +1214,7 @@ static __always_inline bool free_pages_prepare(struct page *page, * Do not let hwpoison pages hit pcplists/buddy * Untie memcg state and reset page's owner */ - if (memcg_kmem_enabled() && PageKmemcg(page)) + if (memcg_kmem_enabled() && PageMemcgKmem(page)) __memcg_kmem_uncharge_page(page, order); reset_page_owner(page, order); return false; @@ -1244,7 +1244,7 @@ static __always_inline bool free_pages_prepare(struct page *page, } if (PageMappingFlags(page)) page->mapping = NULL; - if (memcg_kmem_enabled() && PageKmemcg(page)) + if (memcg_kmem_enabled() && PageMemcgKmem(page)) __memcg_kmem_uncharge_page(page, order); if (check_free) bad += check_free_page(page); diff --git a/mm/page_io.c b/mm/page_io.c index 433df1263349..9bca17ecc4df 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -291,12 +291,14 @@ static inline void count_swpout_vm_event(struct page *page) static void bio_associate_blkg_from_page(struct bio *bio, struct page *page) { struct cgroup_subsys_state *css; + struct mem_cgroup *memcg; - if (!page->mem_cgroup) + memcg = page_memcg(page); + if (!memcg) return; rcu_read_lock(); - css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); + css = cgroup_e_css(memcg->css.cgroup, &io_cgrp_subsys); bio_associate_blkg_from_css(bio, css); rcu_read_unlock(); } diff --git a/mm/slab.h b/mm/slab.h index 6d7c6a5056ba..9a54a0cb5cca 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -239,30 +239,13 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla } #ifdef CONFIG_MEMCG_KMEM -static inline struct obj_cgroup **page_obj_cgroups(struct page *page) -{ - /* - * page->mem_cgroup and page->obj_cgroups are sharing the same - * space. To distinguish between them in case we don't know for sure - * that the page is a slab page (e.g. page_cgroup_ino()), let's - * always set the lowest bit of obj_cgroups. - */ - return (struct obj_cgroup **) - ((unsigned long)page->obj_cgroups & ~0x1UL); -} - -static inline bool page_has_obj_cgroups(struct page *page) -{ - return ((unsigned long)page->obj_cgroups & 0x1UL); -} - int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s, gfp_t gfp); static inline void memcg_free_page_obj_cgroups(struct page *page) { - kfree(page_obj_cgroups(page)); - page->obj_cgroups = NULL; + kfree(page_objcgs(page)); + page->memcg_data = 0; } static inline size_t obj_full_size(struct kmem_cache *s) @@ -323,7 +306,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, if (likely(p[i])) { page = virt_to_head_page(p[i]); - if (!page_has_obj_cgroups(page) && + if (!page_objcgs(page) && memcg_alloc_page_obj_cgroups(page, s, flags)) { obj_cgroup_uncharge(objcg, obj_full_size(s)); continue; @@ -331,7 +314,7 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, off = obj_to_index(s, page, p[i]); obj_cgroup_get(objcg); - page_obj_cgroups(page)[off] = objcg; + page_objcgs(page)[off] = objcg; mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), obj_full_size(s)); } else { @@ -345,6 +328,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, void **p, int objects) { struct kmem_cache *s; + struct obj_cgroup **objcgs; struct obj_cgroup *objcg; struct page *page; unsigned int off; @@ -358,7 +342,8 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, continue; page = virt_to_head_page(p[i]); - if (!page_has_obj_cgroups(page)) + objcgs = page_objcgs(page); + if (!objcgs) continue; if (!s_orig) @@ -367,11 +352,11 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, s = s_orig; off = obj_to_index(s, page, p[i]); - objcg = page_obj_cgroups(page)[off]; + objcg = objcgs[off]; if (!objcg) continue; - page_obj_cgroups(page)[off] = NULL; + objcgs[off] = NULL; obj_cgroup_uncharge(objcg, obj_full_size(s)); mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s), -obj_full_size(s)); @@ -380,11 +365,6 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, } #else /* CONFIG_MEMCG_KMEM */ -static inline bool page_has_obj_cgroups(struct page *page) -{ - return false; -} - static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr) { return NULL; diff --git a/mm/workingset.c b/mm/workingset.c index 975a4d2dd02e..130348cbf40a 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -257,7 +257,7 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) struct lruvec *lruvec; int memcgid; - /* Page is fully exclusive and pins page->mem_cgroup */ + /* Page is fully exclusive and pins page's memory cgroup pointer */ VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); diff --git a/net/core/sock_map.c b/net/core/sock_map.c index ddc899e83313..64b5ec14ff50 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -27,8 +27,6 @@ struct bpf_stab { static struct bpf_map *sock_map_alloc(union bpf_attr *attr) { struct bpf_stab *stab; - u64 cost; - int err; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -39,29 +37,22 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) attr->map_flags & ~SOCK_CREATE_FLAG_MASK) return ERR_PTR(-EINVAL); - stab = kzalloc(sizeof(*stab), GFP_USER); + stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_ACCOUNT); if (!stab) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&stab->map, attr); raw_spin_lock_init(&stab->lock); - /* Make sure page count doesn't overflow. */ - cost = (u64) stab->map.max_entries * sizeof(struct sock *); - err = bpf_map_charge_init(&stab->map.memory, cost); - if (err) - goto free_stab; - stab->sks = bpf_map_area_alloc(stab->map.max_entries * sizeof(struct sock *), stab->map.numa_node); - if (stab->sks) - return &stab->map; - err = -ENOMEM; - bpf_map_charge_finish(&stab->map.memory); -free_stab: - kfree(stab); - return ERR_PTR(err); + if (!stab->sks) { + kfree(stab); + return ERR_PTR(-ENOMEM); + } + + return &stab->map; } int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) @@ -975,8 +966,9 @@ static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab, } } - new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN, - htab->map.numa_node); + new = bpf_map_kmalloc_node(&htab->map, htab->elem_size, + GFP_ATOMIC | __GFP_NOWARN, + htab->map.numa_node); if (!new) { atomic_dec(&htab->count); return ERR_PTR(-ENOMEM); @@ -1103,7 +1095,6 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) { struct bpf_shtab *htab; int i, err; - u64 cost; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -1116,7 +1107,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) if (attr->key_size > MAX_BPF_STACK) return ERR_PTR(-E2BIG); - htab = kzalloc(sizeof(*htab), GFP_USER); + htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT); if (!htab) return ERR_PTR(-ENOMEM); @@ -1131,21 +1122,10 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) goto free_htab; } - cost = (u64) htab->buckets_num * sizeof(struct bpf_shtab_bucket) + - (u64) htab->elem_size * htab->map.max_entries; - if (cost >= U32_MAX - PAGE_SIZE) { - err = -EINVAL; - goto free_htab; - } - err = bpf_map_charge_init(&htab->map.memory, cost); - if (err) - goto free_htab; - htab->buckets = bpf_map_area_alloc(htab->buckets_num * sizeof(struct bpf_shtab_bucket), htab->map.numa_node); if (!htab->buckets) { - bpf_map_charge_finish(&htab->map.memory); err = -ENOMEM; goto free_htab; } diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c index 66231ba6c348..113fd9017203 100644 --- a/net/xdp/xskmap.c +++ b/net/xdp/xskmap.c @@ -16,7 +16,8 @@ static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map, { struct xsk_map_node *node; - node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN); + node = bpf_map_kzalloc(&map->map, sizeof(*node), + GFP_ATOMIC | __GFP_NOWARN); if (!node) return ERR_PTR(-ENOMEM); @@ -57,9 +58,8 @@ static void xsk_map_sock_delete(struct xdp_sock *xs, static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) { - struct bpf_map_memory mem; - int err, numa_node; struct xsk_map *m; + int numa_node; u64 size; if (!capable(CAP_NET_ADMIN)) @@ -73,18 +73,11 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) numa_node = bpf_map_attr_numa_node(attr); size = struct_size(m, xsk_map, attr->max_entries); - err = bpf_map_charge_init(&mem, size); - if (err < 0) - return ERR_PTR(err); - m = bpf_map_area_alloc(size, numa_node); - if (!m) { - bpf_map_charge_finish(&mem); + if (!m) return ERR_PTR(-ENOMEM); - } bpf_map_init_from_attr(&m->map, attr); - bpf_map_charge_move(&m->map.memory, &mem); spin_lock_init(&m->lock); return &m->map; diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c index 8b13230b4c46..9db949290a78 100644 --- a/samples/bpf/map_perf_test_user.c +++ b/samples/bpf/map_perf_test_user.c @@ -421,7 +421,6 @@ static void fixup_map(struct bpf_object *obj) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); struct bpf_link *links[8]; struct bpf_program *prog; @@ -430,11 +429,6 @@ int main(int argc, char **argv) char filename[256]; int i = 0; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (argc > 1) test_flags = atoi(argv[1]) ? : test_flags; diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c index 5734cfdaaacb..73a986876c1a 100644 --- a/samples/bpf/offwaketime_user.c +++ b/samples/bpf/offwaketime_user.c @@ -95,18 +95,12 @@ static void int_exit(int sig) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_object *obj = NULL; struct bpf_link *links[2]; struct bpf_program *prog; int delay = 1, i = 0; char filename[256]; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); return 2; diff --git a/samples/bpf/sockex2_user.c b/samples/bpf/sockex2_user.c index af925a5afd1d..bafa567b840c 100644 --- a/samples/bpf/sockex2_user.c +++ b/samples/bpf/sockex2_user.c @@ -16,7 +16,6 @@ struct pair { int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_object *obj; int map_fd, prog_fd; char filename[256]; @@ -24,7 +23,6 @@ int main(int ac, char **argv) FILE *f; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); if (bpf_prog_load(filename, BPF_PROG_TYPE_SOCKET_FILTER, &obj, &prog_fd)) diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 7793f6a6ae7e..6ae99ecc766c 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -26,7 +26,6 @@ struct pair { int main(int argc, char **argv) { int i, sock, key, fd, main_prog_fd, jmp_table_fd, hash_map_fd; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_program *prog; struct bpf_object *obj; const char *section; @@ -34,7 +33,6 @@ int main(int argc, char **argv) FILE *f; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c index f090d0dc60d6..0d7e1e5a8658 100644 --- a/samples/bpf/spintest_user.c +++ b/samples/bpf/spintest_user.c @@ -10,7 +10,6 @@ int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char filename[256], symbol[256]; struct bpf_object *obj = NULL; struct bpf_link *links[20]; @@ -20,11 +19,6 @@ int main(int ac, char **argv) const char *section; struct ksym *sym; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); return 2; diff --git a/samples/bpf/syscall_tp_user.c b/samples/bpf/syscall_tp_user.c index 76a1d00128fb..a0ebf1833ed3 100644 --- a/samples/bpf/syscall_tp_user.c +++ b/samples/bpf/syscall_tp_user.c @@ -115,7 +115,6 @@ cleanup: int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int opt, num_progs = 1; char filename[256]; @@ -131,7 +130,6 @@ int main(int argc, char **argv) } } - setrlimit(RLIMIT_MEMLOCK, &r); snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); return test(filename, num_progs); diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c index f6b772faa348..a78025b0026b 100644 --- a/samples/bpf/task_fd_query_user.c +++ b/samples/bpf/task_fd_query_user.c @@ -310,7 +310,6 @@ cleanup: int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; extern char __executable_start; char filename[256], buf[256]; __u64 uprobe_file_offset; @@ -318,11 +317,6 @@ int main(int argc, char **argv) struct bpf_object *obj; int i = 0, err = -1; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return err; - } - if (load_kallsyms()) { printf("failed to process /proc/kallsyms\n"); return err; diff --git a/samples/bpf/test_lru_dist.c b/samples/bpf/test_lru_dist.c index b313dba4111b..c92c5c06b965 100644 --- a/samples/bpf/test_lru_dist.c +++ b/samples/bpf/test_lru_dist.c @@ -489,7 +489,6 @@ static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int map_flags[] = {0, BPF_F_NO_COMMON_LRU}; const char *dist_file; int nr_tasks = 1; @@ -508,8 +507,6 @@ int main(int argc, char **argv) setbuf(stdout, NULL); - assert(!setrlimit(RLIMIT_MEMLOCK, &r)); - srand(time(NULL)); nr_cpus = bpf_num_possible_cpus(); diff --git a/samples/bpf/test_map_in_map_user.c b/samples/bpf/test_map_in_map_user.c index 98656de56b83..472d65c70354 100644 --- a/samples/bpf/test_map_in_map_user.c +++ b/samples/bpf/test_map_in_map_user.c @@ -114,17 +114,11 @@ static void test_map_in_map(void) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *link = NULL; struct bpf_program *prog; struct bpf_object *obj; char filename[256]; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/test_overhead_user.c b/samples/bpf/test_overhead_user.c index 819a6fe86f89..4821f9d99c1f 100644 --- a/samples/bpf/test_overhead_user.c +++ b/samples/bpf/test_overhead_user.c @@ -162,13 +162,11 @@ static void unload_progs(void) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int num_cpu = sysconf(_SC_NPROCESSORS_ONLN); int test_flags = ~0; char filename[256]; int err = 0; - setrlimit(RLIMIT_MEMLOCK, &r); if (argc > 1) test_flags = atoi(argv[1]) ? : test_flags; diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index ac1ba368195c..9664749bf618 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -294,13 +294,11 @@ static void test_bpf_perf_event(void) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_object *obj = NULL; char filename[256]; int error = 1; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - setrlimit(RLIMIT_MEMLOCK, &r); signal(SIGINT, err_exit); signal(SIGTERM, err_exit); diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c index 3d6eab711d23..1626d51dfffd 100644 --- a/samples/bpf/tracex2_user.c +++ b/samples/bpf/tracex2_user.c @@ -116,7 +116,6 @@ static void int_exit(int sig) int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; long key, next_key, value; struct bpf_link *links[2]; struct bpf_program *prog; @@ -125,11 +124,6 @@ int main(int ac, char **argv) int i, j = 0; FILE *f; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c index 83e0fecbb01a..33e16ba39f25 100644 --- a/samples/bpf/tracex3_user.c +++ b/samples/bpf/tracex3_user.c @@ -107,7 +107,6 @@ static void print_hist(int fd) int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *links[2]; struct bpf_program *prog; struct bpf_object *obj; @@ -127,11 +126,6 @@ int main(int ac, char **argv) } } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c index e8faf8f184ae..cea399424bca 100644 --- a/samples/bpf/tracex4_user.c +++ b/samples/bpf/tracex4_user.c @@ -48,18 +48,12 @@ static void print_old_objects(int fd) int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *links[2]; struct bpf_program *prog; struct bpf_object *obj; char filename[256]; int map_fd, i, j = 0; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); - return 1; - } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index c17d3fb5fd64..08dfdc77ad2a 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -34,7 +34,6 @@ static void install_accept_all_seccomp(void) int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *link = NULL; struct bpf_program *prog; struct bpf_object *obj; @@ -43,8 +42,6 @@ int main(int ac, char **argv) char filename[256]; FILE *f; - setrlimit(RLIMIT_MEMLOCK, &r); - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c index 33df9784775d..28296f40c133 100644 --- a/samples/bpf/tracex6_user.c +++ b/samples/bpf/tracex6_user.c @@ -175,15 +175,12 @@ static void test_bpf_perf_event(void) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_link *links[2]; struct bpf_program *prog; struct bpf_object *obj; char filename[256]; int i = 0; - setrlimit(RLIMIT_MEMLOCK, &r); - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index c447ad9e3a1d..116e39f6b666 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -79,7 +79,6 @@ static void usage(const char *prog) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -117,11 +116,6 @@ int main(int argc, char **argv) return 1; } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - ifindex = if_nametoindex(argv[optind]); if (!ifindex) { perror("if_nametoindex"); diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index ba482dc3da33..a70b094c8ec5 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -82,7 +82,6 @@ static void usage(const char *cmd) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -143,11 +142,6 @@ int main(int argc, char **argv) } } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); - return 1; - } - if (!ifindex) { fprintf(stderr, "Invalid ifname\n"); return 1; diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c index 03d0a182913f..49ebc49aefc3 100644 --- a/samples/bpf/xdp_monitor_user.c +++ b/samples/bpf/xdp_monitor_user.c @@ -687,7 +687,6 @@ static void print_bpf_prog_info(void) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_program *prog; int longindex = 0, opt; int ret = EXIT_FAILURE; @@ -719,10 +718,6 @@ int main(int argc, char **argv) } snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return ret; - } /* Remove tracepoint program when program is interrupted or killed */ signal(SIGINT, int_exit); diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index f78cb18319aa..576411612523 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -765,7 +765,6 @@ static int load_cpumap_prog(char *file_name, char *prog_name, int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; char *prog_name = "xdp_cpu_map5_lb_hash_ip_pairs"; char *mprog_filename = "xdp_redirect_kern.o"; char *redir_interface = NULL, *redir_map = NULL; @@ -804,11 +803,6 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return err; diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index 35e16dee613e..31131b6e7782 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -96,7 +96,6 @@ static void usage(const char *prog) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -135,11 +134,6 @@ int main(int argc, char **argv) return 1; } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - ifindex_in = if_nametoindex(argv[optind]); if (!ifindex_in) ifindex_in = strtoul(argv[optind], NULL, 0); diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 9ca2bf457cda..41d705c3a1f7 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -97,7 +97,6 @@ static void usage(const char *prog) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -136,11 +135,6 @@ int main(int argc, char **argv) return 1; } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - ifindex_in = if_nametoindex(argv[optind]); if (!ifindex_in) ifindex_in = strtoul(argv[optind], NULL, 0); diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index c2da1b51ff95..b5f03cb17a3c 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -625,7 +625,6 @@ static void usage(const char *prog) int main(int ac, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -670,11 +669,6 @@ int main(int ac, char **argv) return 1; } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return 1; diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 93fa1bc54f13..74a2926eba08 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -450,7 +450,6 @@ static void stats_poll(int interval, int action, __u32 cfg_opt) int main(int argc, char **argv) { __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */ - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -474,11 +473,6 @@ int main(int argc, char **argv) snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd)) return EXIT_FAIL; diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index 4b2a300c750c..706475e004cb 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -109,7 +109,6 @@ static void usage(const char *prog) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; @@ -143,11 +142,6 @@ int main(int argc, char **argv) return 1; } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK)"); - return 1; - } - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); prog_load_attr.file = filename; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index a419bee151a8..1d4f305d02aa 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -155,7 +155,6 @@ int main(int argc, char **argv) struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; int min_port = 0, max_port = 0, vip2tnl_map_fd; const char *optstr = "i:a:p:s:d:m:T:P:FSNh"; unsigned char opt_flags[256] = {}; @@ -254,11 +253,6 @@ int main(int argc, char **argv) } } - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)"); - return 1; - } - if (!ifindex) { fprintf(stderr, "Invalid ifname\n"); return 1; diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 036bd019e400..909f77647deb 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1489,7 +1489,6 @@ static void apply_setsockopt(struct xsk_socket_info *xsk) int main(int argc, char **argv) { - struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; bool rx = false, tx = false; struct xsk_umem_info *umem; struct bpf_object *obj; @@ -1499,12 +1498,6 @@ int main(int argc, char **argv) parse_command_line(argc, argv); - if (setrlimit(RLIMIT_MEMLOCK, &r)) { - fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", - strerror(errno)); - exit(EXIT_FAILURE); - } - if (opt_num_xsks > 1) load_xdp_program(argv, &obj); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c index 08651b23edba..b83b5d2e17dc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c @@ -23,6 +23,6 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx) BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter, map->usercnt.counter, - map->memory.user->locked_vm.counter); + 0LLU); return 0; } diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index c325405751e2..d8850bc6a9f1 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -26,17 +26,12 @@ __u32 g_line = 0; return 0; \ }) -struct bpf_map_memory { - __u32 pages; -} __attribute__((preserve_access_index)); - struct bpf_map { enum bpf_map_type map_type; __u32 key_size; __u32 value_size; __u32 max_entries; __u32 id; - struct bpf_map_memory memory; } __attribute__((preserve_access_index)); static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size, @@ -47,7 +42,6 @@ static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size, VERIFY(map->value_size == value_size); VERIFY(map->max_entries == max_entries); VERIFY(map->id > 0); - VERIFY(map->memory.pages > 0); return 1; } @@ -60,7 +54,6 @@ static inline int check_bpf_map_ptr(struct bpf_map *indirect, VERIFY(indirect->value_size == direct->value_size); VERIFY(indirect->max_entries == direct->max_entries); VERIFY(indirect->id == direct->id); - VERIFY(indirect->memory.pages == direct->memory.pages); return 1; }