mirror of
https://github.com/torvalds/linux.git
synced 2024-11-23 20:51:44 +00:00
mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'
Currently there is one 'struct page_frag' for every 'struct sock' and 'struct task_struct', we are about to replace the 'struct page_frag' with 'struct page_frag_cache' for them. Before begin the replacing, we need to ensure the size of 'struct page_frag_cache' is not bigger than the size of 'struct page_frag', as there may be tens of thousands of 'struct sock' and 'struct task_struct' instances in the system. By or'ing the page order & pfmemalloc with lower bits of 'va' instead of using 'u16' or 'u32' for page size and 'u8' for pfmemalloc, we are able to avoid 3 or 5 bytes space waste. And page address & pfmemalloc & order is unchanged for the same page in the same 'page_frag_cache' instance, it makes sense to fit them together. After this patch, the size of 'struct page_frag_cache' should be the same as the size of 'struct page_frag'. CC: Andrew Morton <akpm@linux-foundation.org> CC: Linux-MM <linux-mm@kvack.org> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> Reviewed-by: Alexander Duyck <alexanderduyck@fb.com> Link: https://patch.msgid.link/20241028115343.3405838-7-linyunsheng@huawei.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
parent
49e302be73
commit
0c3ce2f502
@ -47,18 +47,21 @@ struct page_frag {
|
||||
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
|
||||
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
struct page_frag_cache {
|
||||
void *va;
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
__u16 offset;
|
||||
__u16 size;
|
||||
#else
|
||||
__u32 offset;
|
||||
#endif
|
||||
/* encoded_page consists of the virtual address, pfmemalloc bit and
|
||||
* order of a page.
|
||||
*/
|
||||
unsigned long encoded_page;
|
||||
|
||||
/* we maintain a pagecount bias, so that we dont dirty cache line
|
||||
* containing page->_refcount every time we allocate a fragment.
|
||||
*/
|
||||
unsigned int pagecnt_bias;
|
||||
bool pfmemalloc;
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
|
||||
__u16 offset;
|
||||
__u16 pagecnt_bias;
|
||||
#else
|
||||
__u32 offset;
|
||||
__u32 pagecnt_bias;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Track pages that require TLB flushes */
|
||||
|
@ -3,18 +3,38 @@
|
||||
#ifndef _LINUX_PAGE_FRAG_CACHE_H
|
||||
#define _LINUX_PAGE_FRAG_CACHE_H
|
||||
|
||||
#include <linux/bits.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/mm_types_task.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
/* Use a full byte here to enable assembler optimization as the shift
|
||||
* operation is usually expecting a byte.
|
||||
*/
|
||||
#define PAGE_FRAG_CACHE_ORDER_MASK GENMASK(7, 0)
|
||||
#else
|
||||
/* Compiler should be able to figure out we don't read things as any value
|
||||
* ANDed with 0 is 0.
|
||||
*/
|
||||
#define PAGE_FRAG_CACHE_ORDER_MASK 0
|
||||
#endif
|
||||
|
||||
#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT (PAGE_FRAG_CACHE_ORDER_MASK + 1)
|
||||
|
||||
static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page)
|
||||
{
|
||||
return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
|
||||
}
|
||||
|
||||
static inline void page_frag_cache_init(struct page_frag_cache *nc)
|
||||
{
|
||||
nc->va = NULL;
|
||||
nc->encoded_page = 0;
|
||||
}
|
||||
|
||||
static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
|
||||
{
|
||||
return !!nc->pfmemalloc;
|
||||
return encoded_page_decode_pfmemalloc(nc->encoded_page);
|
||||
}
|
||||
|
||||
void page_frag_cache_drain(struct page_frag_cache *nc);
|
||||
|
@ -12,6 +12,7 @@
|
||||
* be used in the "frags" portion of skb_shared_info.
|
||||
*/
|
||||
|
||||
#include <linux/build_bug.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/gfp_types.h>
|
||||
#include <linux/init.h>
|
||||
@ -19,9 +20,36 @@
|
||||
#include <linux/page_frag_cache.h>
|
||||
#include "internal.h"
|
||||
|
||||
static unsigned long encoded_page_create(struct page *page, unsigned int order,
|
||||
bool pfmemalloc)
|
||||
{
|
||||
BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
|
||||
BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
|
||||
|
||||
return (unsigned long)page_address(page) |
|
||||
(order & PAGE_FRAG_CACHE_ORDER_MASK) |
|
||||
((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
|
||||
}
|
||||
|
||||
static unsigned long encoded_page_decode_order(unsigned long encoded_page)
|
||||
{
|
||||
return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
|
||||
}
|
||||
|
||||
static void *encoded_page_decode_virt(unsigned long encoded_page)
|
||||
{
|
||||
return (void *)(encoded_page & PAGE_MASK);
|
||||
}
|
||||
|
||||
static struct page *encoded_page_decode_page(unsigned long encoded_page)
|
||||
{
|
||||
return virt_to_page((void *)encoded_page);
|
||||
}
|
||||
|
||||
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
|
||||
struct page *page = NULL;
|
||||
gfp_t gfp = gfp_mask;
|
||||
|
||||
@ -30,23 +58,26 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
|
||||
__GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
|
||||
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
|
||||
PAGE_FRAG_CACHE_MAX_ORDER);
|
||||
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
|
||||
#endif
|
||||
if (unlikely(!page))
|
||||
if (unlikely(!page)) {
|
||||
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
|
||||
order = 0;
|
||||
}
|
||||
|
||||
nc->va = page ? page_address(page) : NULL;
|
||||
nc->encoded_page = page ?
|
||||
encoded_page_create(page, order, page_is_pfmemalloc(page)) : 0;
|
||||
|
||||
return page;
|
||||
}
|
||||
|
||||
void page_frag_cache_drain(struct page_frag_cache *nc)
|
||||
{
|
||||
if (!nc->va)
|
||||
if (!nc->encoded_page)
|
||||
return;
|
||||
|
||||
__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
|
||||
nc->va = NULL;
|
||||
__page_frag_cache_drain(encoded_page_decode_page(nc->encoded_page),
|
||||
nc->pagecnt_bias);
|
||||
nc->encoded_page = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(page_frag_cache_drain);
|
||||
|
||||
@ -63,35 +94,29 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask,
|
||||
unsigned int align_mask)
|
||||
{
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
unsigned int size = nc->size;
|
||||
#else
|
||||
unsigned int size = PAGE_SIZE;
|
||||
#endif
|
||||
unsigned int offset;
|
||||
unsigned long encoded_page = nc->encoded_page;
|
||||
unsigned int size, offset;
|
||||
struct page *page;
|
||||
|
||||
if (unlikely(!nc->va)) {
|
||||
if (unlikely(!encoded_page)) {
|
||||
refill:
|
||||
page = __page_frag_cache_refill(nc, gfp_mask);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
|
||||
/* if size can vary use size else just use PAGE_SIZE */
|
||||
size = nc->size;
|
||||
#endif
|
||||
encoded_page = nc->encoded_page;
|
||||
|
||||
/* Even if we own the page, we do not use atomic_set().
|
||||
* This would break get_page_unless_zero() users.
|
||||
*/
|
||||
page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
|
||||
|
||||
/* reset page count bias and offset to start of new frag */
|
||||
nc->pfmemalloc = page_is_pfmemalloc(page);
|
||||
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
|
||||
nc->offset = 0;
|
||||
}
|
||||
|
||||
size = PAGE_SIZE << encoded_page_decode_order(encoded_page);
|
||||
offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask);
|
||||
if (unlikely(offset + fragsz > size)) {
|
||||
if (unlikely(fragsz > PAGE_SIZE)) {
|
||||
@ -107,13 +132,14 @@ refill:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
page = virt_to_page(nc->va);
|
||||
page = encoded_page_decode_page(encoded_page);
|
||||
|
||||
if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
|
||||
goto refill;
|
||||
|
||||
if (unlikely(nc->pfmemalloc)) {
|
||||
free_unref_page(page, compound_order(page));
|
||||
if (unlikely(encoded_page_decode_pfmemalloc(encoded_page))) {
|
||||
free_unref_page(page,
|
||||
encoded_page_decode_order(encoded_page));
|
||||
goto refill;
|
||||
}
|
||||
|
||||
@ -128,7 +154,7 @@ refill:
|
||||
nc->pagecnt_bias--;
|
||||
nc->offset = offset + fragsz;
|
||||
|
||||
return nc->va + offset;
|
||||
return encoded_page_decode_virt(encoded_page) + offset;
|
||||
}
|
||||
EXPORT_SYMBOL(__page_frag_alloc_align);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user