mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'

Currently there is one 'struct page_frag' for every 'struct
sock' and 'struct task_struct', we are about to replace the
'struct page_frag' with 'struct page_frag_cache' for them.
Before begin the replacing, we need to ensure the size of
'struct page_frag_cache' is not bigger than the size of
'struct page_frag', as there may be tens of thousands of
'struct sock' and 'struct task_struct' instances in the
system.

By or'ing the page order & pfmemalloc with lower bits of
'va' instead of using 'u16' or 'u32' for page size and 'u8'
for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
And page address & pfmemalloc & order is unchanged for the
same page in the same 'page_frag_cache' instance, it makes
sense to fit them together.

After this patch, the size of 'struct page_frag_cache' should be
the same as the size of 'struct page_frag'.

CC: Andrew Morton <akpm@linux-foundation.org>
CC: Linux-MM <linux-mm@kvack.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://patch.msgid.link/20241028115343.3405838-7-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Yunsheng Lin 2024-10-28 19:53:41 +08:00 committed by Jakub Kicinski
parent 49e302be73
commit 0c3ce2f502
3 changed files with 82 additions and 33 deletions

View File

@ -47,18 +47,21 @@ struct page_frag {
#define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK)
#define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE)
struct page_frag_cache {
void *va;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
__u16 offset;
__u16 size;
#else
__u32 offset;
#endif
/* encoded_page consists of the virtual address, pfmemalloc bit and
* order of a page.
*/
unsigned long encoded_page;
/* we maintain a pagecount bias, so that we dont dirty cache line
* containing page->_refcount every time we allocate a fragment.
*/
unsigned int pagecnt_bias;
bool pfmemalloc;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
__u16 offset;
__u16 pagecnt_bias;
#else
__u32 offset;
__u32 pagecnt_bias;
#endif
};
/* Track pages that require TLB flushes */

View File

@ -3,18 +3,38 @@
#ifndef _LINUX_PAGE_FRAG_CACHE_H
#define _LINUX_PAGE_FRAG_CACHE_H
#include <linux/bits.h>
#include <linux/log2.h>
#include <linux/mm_types_task.h>
#include <linux/types.h>
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
/* Use a full byte here to enable assembler optimization as the shift
* operation is usually expecting a byte.
*/
#define PAGE_FRAG_CACHE_ORDER_MASK GENMASK(7, 0)
#else
/* Compiler should be able to figure out we don't read things as any value
* ANDed with 0 is 0.
*/
#define PAGE_FRAG_CACHE_ORDER_MASK 0
#endif
#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT (PAGE_FRAG_CACHE_ORDER_MASK + 1)
static inline bool encoded_page_decode_pfmemalloc(unsigned long encoded_page)
{
return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
}
static inline void page_frag_cache_init(struct page_frag_cache *nc)
{
nc->va = NULL;
nc->encoded_page = 0;
}
static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
{
return !!nc->pfmemalloc;
return encoded_page_decode_pfmemalloc(nc->encoded_page);
}
void page_frag_cache_drain(struct page_frag_cache *nc);

View File

@ -12,6 +12,7 @@
* be used in the "frags" portion of skb_shared_info.
*/
#include <linux/build_bug.h>
#include <linux/export.h>
#include <linux/gfp_types.h>
#include <linux/init.h>
@ -19,9 +20,36 @@
#include <linux/page_frag_cache.h>
#include "internal.h"
static unsigned long encoded_page_create(struct page *page, unsigned int order,
bool pfmemalloc)
{
BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
return (unsigned long)page_address(page) |
(order & PAGE_FRAG_CACHE_ORDER_MASK) |
((unsigned long)pfmemalloc * PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
}
static unsigned long encoded_page_decode_order(unsigned long encoded_page)
{
return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
}
static void *encoded_page_decode_virt(unsigned long encoded_page)
{
return (void *)(encoded_page & PAGE_MASK);
}
static struct page *encoded_page_decode_page(unsigned long encoded_page)
{
return virt_to_page((void *)encoded_page);
}
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
gfp_t gfp_mask)
{
unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
struct page *page = NULL;
gfp_t gfp = gfp_mask;
@ -30,23 +58,26 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
__GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
PAGE_FRAG_CACHE_MAX_ORDER);
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
#endif
if (unlikely(!page))
if (unlikely(!page)) {
page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
order = 0;
}
nc->va = page ? page_address(page) : NULL;
nc->encoded_page = page ?
encoded_page_create(page, order, page_is_pfmemalloc(page)) : 0;
return page;
}
void page_frag_cache_drain(struct page_frag_cache *nc)
{
if (!nc->va)
if (!nc->encoded_page)
return;
__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
nc->va = NULL;
__page_frag_cache_drain(encoded_page_decode_page(nc->encoded_page),
nc->pagecnt_bias);
nc->encoded_page = 0;
}
EXPORT_SYMBOL(page_frag_cache_drain);
@ -63,35 +94,29 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask,
unsigned int align_mask)
{
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
unsigned int size = nc->size;
#else
unsigned int size = PAGE_SIZE;
#endif
unsigned int offset;
unsigned long encoded_page = nc->encoded_page;
unsigned int size, offset;
struct page *page;
if (unlikely(!nc->va)) {
if (unlikely(!encoded_page)) {
refill:
page = __page_frag_cache_refill(nc, gfp_mask);
if (!page)
return NULL;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
/* if size can vary use size else just use PAGE_SIZE */
size = nc->size;
#endif
encoded_page = nc->encoded_page;
/* Even if we own the page, we do not use atomic_set().
* This would break get_page_unless_zero() users.
*/
page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
/* reset page count bias and offset to start of new frag */
nc->pfmemalloc = page_is_pfmemalloc(page);
nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
nc->offset = 0;
}
size = PAGE_SIZE << encoded_page_decode_order(encoded_page);
offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask);
if (unlikely(offset + fragsz > size)) {
if (unlikely(fragsz > PAGE_SIZE)) {
@ -107,13 +132,14 @@ refill:
return NULL;
}
page = virt_to_page(nc->va);
page = encoded_page_decode_page(encoded_page);
if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
goto refill;
if (unlikely(nc->pfmemalloc)) {
free_unref_page(page, compound_order(page));
if (unlikely(encoded_page_decode_pfmemalloc(encoded_page))) {
free_unref_page(page,
encoded_page_decode_order(encoded_page));
goto refill;
}
@ -128,7 +154,7 @@ refill:
nc->pagecnt_bias--;
nc->offset = offset + fragsz;
return nc->va + offset;
return encoded_page_decode_virt(encoded_page) + offset;
}
EXPORT_SYMBOL(__page_frag_alloc_align);