mm,tmpfs: consider end of file write in shmem_is_huge

Take the end of a file write into consideration when deciding whether or
not to use huge pages for tmpfs files when the tmpfs filesystem is mounted
with huge=within_size

This allows large writes that append to the end of a file to automatically
use large pages.

Doing 4MB sequential writes without fallocate to a 16GB tmpfs file with
fio.  The numbers without THP or with huge=always stay the same, but the
performance with huge=within_size now matches that of huge=always.

huge		before		after
4kB pages	1560 MB/s	1560 MB/s
within_size	1560 MB/s	4720 MB/s
always:		4720 MB/s	4720 MB/s

[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/20240903111928.7171e60c@imladris.surriel.com
Signed-off-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Rik van Riel 2024-09-03 11:19:28 -04:00 committed by Andrew Morton
parent e899007a5e
commit e1e4cfd01a
7 changed files with 42 additions and 39 deletions

View File

@ -126,7 +126,7 @@ xfile_load(
unsigned int len; unsigned int len;
unsigned int offset; unsigned int offset;
if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
SGP_READ) < 0) SGP_READ) < 0)
break; break;
if (!folio) { if (!folio) {
@ -196,7 +196,7 @@ xfile_store(
unsigned int len; unsigned int len;
unsigned int offset; unsigned int offset;
if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
SGP_CACHE) < 0) SGP_CACHE) < 0)
break; break;
if (filemap_check_wb_err(inode->i_mapping, 0)) { if (filemap_check_wb_err(inode->i_mapping, 0)) {
@ -267,7 +267,7 @@ xfile_get_folio(
i_size_write(inode, pos + len); i_size_write(inode, pos + len);
pflags = memalloc_nofs_save(); pflags = memalloc_nofs_save();
error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
(flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ); (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
memalloc_nofs_restore(pflags); memalloc_nofs_restore(pflags);
if (error) if (error)

View File

@ -149,7 +149,7 @@ xmbuf_map_page(
return -ENOMEM; return -ENOMEM;
} }
error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE); error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE);
if (error) if (error)
return error; return error;

View File

@ -113,11 +113,11 @@ int shmem_unuse(unsigned int type);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
unsigned long shmem_allowable_huge_orders(struct inode *inode, unsigned long shmem_allowable_huge_orders(struct inode *inode,
struct vm_area_struct *vma, pgoff_t index, struct vm_area_struct *vma, pgoff_t index,
bool shmem_huge_force); loff_t write_end, bool shmem_huge_force);
#else #else
static inline unsigned long shmem_allowable_huge_orders(struct inode *inode, static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
struct vm_area_struct *vma, pgoff_t index, struct vm_area_struct *vma, pgoff_t index,
bool shmem_huge_force) loff_t write_end, bool shmem_huge_force)
{ {
return 0; return 0;
} }
@ -143,8 +143,8 @@ enum sgp_type {
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
}; };
int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
enum sgp_type sgp); struct folio **foliop, enum sgp_type sgp);
struct folio *shmem_read_folio_gfp(struct address_space *mapping, struct folio *shmem_read_folio_gfp(struct address_space *mapping,
pgoff_t index, gfp_t gfp); pgoff_t index, gfp_t gfp);

View File

@ -164,7 +164,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
*/ */
if (!in_pf && shmem_file(vma->vm_file)) if (!in_pf && shmem_file(vma->vm_file))
return shmem_allowable_huge_orders(file_inode(vma->vm_file), return shmem_allowable_huge_orders(file_inode(vma->vm_file),
vma, vma->vm_pgoff, vma, vma->vm_pgoff, 0,
!enforce_sysfs); !enforce_sysfs);
if (!vma_is_anonymous(vma)) { if (!vma_is_anonymous(vma)) {

View File

@ -1870,7 +1870,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
if (xa_is_value(folio) || !folio_test_uptodate(folio)) { if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
/* swap in or instantiate fallocated page */ /* swap in or instantiate fallocated page */
if (shmem_get_folio(mapping->host, index, if (shmem_get_folio(mapping->host, index, 0,
&folio, SGP_NOALLOC)) { &folio, SGP_NOALLOC)) {
result = SCAN_FAIL; result = SCAN_FAIL;
goto xa_unlocked; goto xa_unlocked;

View File

@ -549,7 +549,8 @@ static bool shmem_confirm_swap(struct address_space *mapping,
static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index, static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
bool shmem_huge_force, struct vm_area_struct *vma, loff_t write_end, bool shmem_huge_force,
struct vm_area_struct *vma,
unsigned long vm_flags) unsigned long vm_flags)
{ {
struct mm_struct *mm = vma ? vma->vm_mm : NULL; struct mm_struct *mm = vma ? vma->vm_mm : NULL;
@ -569,7 +570,8 @@ static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
return true; return true;
case SHMEM_HUGE_WITHIN_SIZE: case SHMEM_HUGE_WITHIN_SIZE:
index = round_up(index + 1, HPAGE_PMD_NR); index = round_up(index + 1, HPAGE_PMD_NR);
i_size = round_up(i_size_read(inode), PAGE_SIZE); i_size = max(write_end, i_size_read(inode));
i_size = round_up(i_size, PAGE_SIZE);
if (i_size >> PAGE_SHIFT >= index) if (i_size >> PAGE_SHIFT >= index)
return true; return true;
fallthrough; fallthrough;
@ -583,14 +585,14 @@ static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
} }
static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
bool shmem_huge_force, struct vm_area_struct *vma, loff_t write_end, bool shmem_huge_force,
unsigned long vm_flags) struct vm_area_struct *vma, unsigned long vm_flags)
{ {
if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER) if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER)
return false; return false;
return __shmem_huge_global_enabled(inode, index, shmem_huge_force, return __shmem_huge_global_enabled(inode, index, write_end,
vma, vm_flags); shmem_huge_force, vma, vm_flags);
} }
#if defined(CONFIG_SYSFS) #if defined(CONFIG_SYSFS)
@ -770,8 +772,8 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
} }
static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
bool shmem_huge_force, struct vm_area_struct *vma, loff_t write_end, bool shmem_huge_force,
unsigned long vm_flags) struct vm_area_struct *vma, unsigned long vm_flags)
{ {
return false; return false;
} }
@ -978,7 +980,7 @@ static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
* (although in some cases this is just a waste of time). * (although in some cases this is just a waste of time).
*/ */
folio = NULL; folio = NULL;
shmem_get_folio(inode, index, &folio, SGP_READ); shmem_get_folio(inode, index, 0, &folio, SGP_READ);
return folio; return folio;
} }
@ -1166,7 +1168,7 @@ static int shmem_getattr(struct mnt_idmap *idmap,
STATX_ATTR_NODUMP); STATX_ATTR_NODUMP);
generic_fillattr(idmap, request_mask, inode, stat); generic_fillattr(idmap, request_mask, inode, stat);
if (shmem_huge_global_enabled(inode, 0, false, NULL, 0)) if (shmem_huge_global_enabled(inode, 0, 0, false, NULL, 0))
stat->blksize = HPAGE_PMD_SIZE; stat->blksize = HPAGE_PMD_SIZE;
if (request_mask & STATX_BTIME) { if (request_mask & STATX_BTIME) {
@ -1653,7 +1655,7 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
unsigned long shmem_allowable_huge_orders(struct inode *inode, unsigned long shmem_allowable_huge_orders(struct inode *inode,
struct vm_area_struct *vma, pgoff_t index, struct vm_area_struct *vma, pgoff_t index,
bool shmem_huge_force) loff_t write_end, bool shmem_huge_force)
{ {
unsigned long mask = READ_ONCE(huge_shmem_orders_always); unsigned long mask = READ_ONCE(huge_shmem_orders_always);
unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size); unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
@ -1670,8 +1672,8 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED)) if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
return 0; return 0;
global_huge = shmem_huge_global_enabled(inode, index, shmem_huge_force, global_huge = shmem_huge_global_enabled(inode, index, write_end,
vma, vm_flags); shmem_huge_force, vma, vm_flags);
if (!vma || !vma_is_anon_shmem(vma)) { if (!vma || !vma_is_anon_shmem(vma)) {
/* /*
* For tmpfs, we now only support PMD sized THP if huge page * For tmpfs, we now only support PMD sized THP if huge page
@ -2231,8 +2233,8 @@ unlock:
* vmf and fault_type are only supplied by shmem_fault: otherwise they are NULL. * vmf and fault_type are only supplied by shmem_fault: otherwise they are NULL.
*/ */
static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
struct folio **foliop, enum sgp_type sgp, gfp_t gfp, loff_t write_end, struct folio **foliop, enum sgp_type sgp,
struct vm_fault *vmf, vm_fault_t *fault_type) gfp_t gfp, struct vm_fault *vmf, vm_fault_t *fault_type)
{ {
struct vm_area_struct *vma = vmf ? vmf->vma : NULL; struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
struct mm_struct *fault_mm; struct mm_struct *fault_mm;
@ -2312,7 +2314,7 @@ repeat:
} }
/* Find hugepage orders that are allowed for anonymous shmem and tmpfs. */ /* Find hugepage orders that are allowed for anonymous shmem and tmpfs. */
orders = shmem_allowable_huge_orders(inode, vma, index, false); orders = shmem_allowable_huge_orders(inode, vma, index, write_end, false);
if (orders > 0) { if (orders > 0) {
gfp_t huge_gfp; gfp_t huge_gfp;
@ -2413,6 +2415,7 @@ unlock:
* shmem_get_folio - find, and lock a shmem folio. * shmem_get_folio - find, and lock a shmem folio.
* @inode: inode to search * @inode: inode to search
* @index: the page index. * @index: the page index.
* @write_end: end of a write, could extend inode size
* @foliop: pointer to the folio if found * @foliop: pointer to the folio if found
* @sgp: SGP_* flags to control behavior * @sgp: SGP_* flags to control behavior
* *
@ -2432,10 +2435,10 @@ unlock:
* Context: May sleep. * Context: May sleep.
* Return: 0 if successful, else a negative error code. * Return: 0 if successful, else a negative error code.
*/ */
int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop, int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
enum sgp_type sgp) struct folio **foliop, enum sgp_type sgp)
{ {
return shmem_get_folio_gfp(inode, index, foliop, sgp, return shmem_get_folio_gfp(inode, index, write_end, foliop, sgp,
mapping_gfp_mask(inode->i_mapping), NULL, NULL); mapping_gfp_mask(inode->i_mapping), NULL, NULL);
} }
EXPORT_SYMBOL_GPL(shmem_get_folio); EXPORT_SYMBOL_GPL(shmem_get_folio);
@ -2530,7 +2533,7 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
} }
WARN_ON_ONCE(vmf->page != NULL); WARN_ON_ONCE(vmf->page != NULL);
err = shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE, err = shmem_get_folio_gfp(inode, vmf->pgoff, 0, &folio, SGP_CACHE,
gfp, vmf, &ret); gfp, vmf, &ret);
if (err) if (err)
return vmf_error(err); return vmf_error(err);
@ -3040,7 +3043,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
return -EPERM; return -EPERM;
} }
ret = shmem_get_folio(inode, index, &folio, SGP_WRITE); ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE);
if (ret) if (ret)
return ret; return ret;
@ -3111,7 +3114,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
break; break;
} }
error = shmem_get_folio(inode, index, &folio, SGP_READ); error = shmem_get_folio(inode, index, 0, &folio, SGP_READ);
if (error) { if (error) {
if (error == -EINVAL) if (error == -EINVAL)
error = 0; error = 0;
@ -3287,7 +3290,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
if (*ppos >= i_size_read(inode)) if (*ppos >= i_size_read(inode))
break; break;
error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, error = shmem_get_folio(inode, *ppos / PAGE_SIZE, 0, &folio,
SGP_READ); SGP_READ);
if (error) { if (error) {
if (error == -EINVAL) if (error == -EINVAL)
@ -3477,8 +3480,8 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
error = -ENOMEM; error = -ENOMEM;
else else
error = shmem_get_folio(inode, index, &folio, error = shmem_get_folio(inode, index, offset + len,
SGP_FALLOC); &folio, SGP_FALLOC);
if (error) { if (error) {
info->fallocend = undo_fallocend; info->fallocend = undo_fallocend;
/* Remove the !uptodate folios we added */ /* Remove the !uptodate folios we added */
@ -3829,7 +3832,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
} else { } else {
inode_nohighmem(inode); inode_nohighmem(inode);
inode->i_mapping->a_ops = &shmem_aops; inode->i_mapping->a_ops = &shmem_aops;
error = shmem_get_folio(inode, 0, &folio, SGP_WRITE); error = shmem_get_folio(inode, 0, 0, &folio, SGP_WRITE);
if (error) if (error)
goto out_remove_offset; goto out_remove_offset;
inode->i_op = &shmem_symlink_inode_operations; inode->i_op = &shmem_symlink_inode_operations;
@ -3875,7 +3878,7 @@ static const char *shmem_get_link(struct dentry *dentry, struct inode *inode,
return ERR_PTR(-ECHILD); return ERR_PTR(-ECHILD);
} }
} else { } else {
error = shmem_get_folio(inode, 0, &folio, SGP_READ); error = shmem_get_folio(inode, 0, 0, &folio, SGP_READ);
if (error) if (error)
return ERR_PTR(error); return ERR_PTR(error);
if (!folio) if (!folio)
@ -5343,7 +5346,7 @@ struct folio *shmem_read_folio_gfp(struct address_space *mapping,
struct folio *folio; struct folio *folio;
int error; int error;
error = shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE, error = shmem_get_folio_gfp(inode, index, 0, &folio, SGP_CACHE,
gfp, NULL, NULL); gfp, NULL, NULL);
if (error) if (error)
return ERR_PTR(error); return ERR_PTR(error);

View File

@ -391,7 +391,7 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
struct page *page; struct page *page;
int ret; int ret;
ret = shmem_get_folio(inode, pgoff, &folio, SGP_NOALLOC); ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
/* Our caller expects us to return -EFAULT if we failed to find folio */ /* Our caller expects us to return -EFAULT if we failed to find folio */
if (ret == -ENOENT) if (ret == -ENOENT)
ret = -EFAULT; ret = -EFAULT;