shmem: split huge pages beyond i_size under memory pressure
Even if user asked to allocate huge pages always (huge=always), we should be able to free up some memory by splitting pages which are partly byound i_size if memory presure comes or once we hit limit on filesystem size (-o size=). In order to do this we maintain per-superblock list of inodes, which potentially have huge pages on the border of file size. Per-fs shrinker can reclaim memory by splitting such pages. If we hit -ENOSPC during shmem_getpage_gfp(), we try to split a page to free up space on the filesystem and retry allocation if it succeed. Link: http://lkml.kernel.org/r/1466021202-61880-37-git-send-email-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
e496cf3d78
commit
779750d20b
@ -16,8 +16,9 @@ struct shmem_inode_info {
|
|||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned long alloced; /* data pages alloced to file */
|
unsigned long alloced; /* data pages alloced to file */
|
||||||
unsigned long swapped; /* subtotal assigned to swap */
|
unsigned long swapped; /* subtotal assigned to swap */
|
||||||
struct shared_policy policy; /* NUMA memory alloc policy */
|
struct list_head shrinklist; /* shrinkable hpage inodes */
|
||||||
struct list_head swaplist; /* chain of maybes on swap */
|
struct list_head swaplist; /* chain of maybes on swap */
|
||||||
|
struct shared_policy policy; /* NUMA memory alloc policy */
|
||||||
struct simple_xattrs xattrs; /* list of xattrs */
|
struct simple_xattrs xattrs; /* list of xattrs */
|
||||||
struct inode vfs_inode;
|
struct inode vfs_inode;
|
||||||
};
|
};
|
||||||
@ -33,6 +34,9 @@ struct shmem_sb_info {
|
|||||||
kuid_t uid; /* Mount uid for root directory */
|
kuid_t uid; /* Mount uid for root directory */
|
||||||
kgid_t gid; /* Mount gid for root directory */
|
kgid_t gid; /* Mount gid for root directory */
|
||||||
struct mempolicy *mpol; /* default memory policy for mappings */
|
struct mempolicy *mpol; /* default memory policy for mappings */
|
||||||
|
spinlock_t shrinklist_lock; /* Protects shrinklist */
|
||||||
|
struct list_head shrinklist; /* List of shinkable inodes */
|
||||||
|
unsigned long shrinklist_len; /* Length of shrinklist */
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
|
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
|
||||||
|
175
mm/shmem.c
175
mm/shmem.c
@ -188,6 +188,7 @@ static const struct inode_operations shmem_inode_operations;
|
|||||||
static const struct inode_operations shmem_dir_inode_operations;
|
static const struct inode_operations shmem_dir_inode_operations;
|
||||||
static const struct inode_operations shmem_special_inode_operations;
|
static const struct inode_operations shmem_special_inode_operations;
|
||||||
static const struct vm_operations_struct shmem_vm_ops;
|
static const struct vm_operations_struct shmem_vm_ops;
|
||||||
|
static struct file_system_type shmem_fs_type;
|
||||||
|
|
||||||
static LIST_HEAD(shmem_swaplist);
|
static LIST_HEAD(shmem_swaplist);
|
||||||
static DEFINE_MUTEX(shmem_swaplist_mutex);
|
static DEFINE_MUTEX(shmem_swaplist_mutex);
|
||||||
@ -406,10 +407,122 @@ static const char *shmem_format_huge(int huge)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
|
||||||
|
struct shrink_control *sc, unsigned long nr_to_split)
|
||||||
|
{
|
||||||
|
LIST_HEAD(list), *pos, *next;
|
||||||
|
struct inode *inode;
|
||||||
|
struct shmem_inode_info *info;
|
||||||
|
struct page *page;
|
||||||
|
unsigned long batch = sc ? sc->nr_to_scan : 128;
|
||||||
|
int removed = 0, split = 0;
|
||||||
|
|
||||||
|
if (list_empty(&sbinfo->shrinklist))
|
||||||
|
return SHRINK_STOP;
|
||||||
|
|
||||||
|
spin_lock(&sbinfo->shrinklist_lock);
|
||||||
|
list_for_each_safe(pos, next, &sbinfo->shrinklist) {
|
||||||
|
info = list_entry(pos, struct shmem_inode_info, shrinklist);
|
||||||
|
|
||||||
|
/* pin the inode */
|
||||||
|
inode = igrab(&info->vfs_inode);
|
||||||
|
|
||||||
|
/* inode is about to be evicted */
|
||||||
|
if (!inode) {
|
||||||
|
list_del_init(&info->shrinklist);
|
||||||
|
removed++;
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if there's anything to gain */
|
||||||
|
if (round_up(inode->i_size, PAGE_SIZE) ==
|
||||||
|
round_up(inode->i_size, HPAGE_PMD_SIZE)) {
|
||||||
|
list_del_init(&info->shrinklist);
|
||||||
|
removed++;
|
||||||
|
iput(inode);
|
||||||
|
goto next;
|
||||||
|
}
|
||||||
|
|
||||||
|
list_move(&info->shrinklist, &list);
|
||||||
|
next:
|
||||||
|
if (!--batch)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
spin_unlock(&sbinfo->shrinklist_lock);
|
||||||
|
|
||||||
|
list_for_each_safe(pos, next, &list) {
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
info = list_entry(pos, struct shmem_inode_info, shrinklist);
|
||||||
|
inode = &info->vfs_inode;
|
||||||
|
|
||||||
|
if (nr_to_split && split >= nr_to_split) {
|
||||||
|
iput(inode);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
page = find_lock_page(inode->i_mapping,
|
||||||
|
(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
|
||||||
|
if (!page)
|
||||||
|
goto drop;
|
||||||
|
|
||||||
|
if (!PageTransHuge(page)) {
|
||||||
|
unlock_page(page);
|
||||||
|
put_page(page);
|
||||||
|
goto drop;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = split_huge_page(page);
|
||||||
|
unlock_page(page);
|
||||||
|
put_page(page);
|
||||||
|
|
||||||
|
if (ret) {
|
||||||
|
/* split failed: leave it on the list */
|
||||||
|
iput(inode);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
split++;
|
||||||
|
drop:
|
||||||
|
list_del_init(&info->shrinklist);
|
||||||
|
removed++;
|
||||||
|
iput(inode);
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_lock(&sbinfo->shrinklist_lock);
|
||||||
|
list_splice_tail(&list, &sbinfo->shrinklist);
|
||||||
|
sbinfo->shrinklist_len -= removed;
|
||||||
|
spin_unlock(&sbinfo->shrinklist_lock);
|
||||||
|
|
||||||
|
return split;
|
||||||
|
}
|
||||||
|
|
||||||
|
static long shmem_unused_huge_scan(struct super_block *sb,
|
||||||
|
struct shrink_control *sc)
|
||||||
|
{
|
||||||
|
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
|
||||||
|
|
||||||
|
if (!READ_ONCE(sbinfo->shrinklist_len))
|
||||||
|
return SHRINK_STOP;
|
||||||
|
|
||||||
|
return shmem_unused_huge_shrink(sbinfo, sc, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static long shmem_unused_huge_count(struct super_block *sb,
|
||||||
|
struct shrink_control *sc)
|
||||||
|
{
|
||||||
|
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
|
||||||
|
return READ_ONCE(sbinfo->shrinklist_len);
|
||||||
|
}
|
||||||
#else /* !CONFIG_TRANSPARENT_HUGE_PAGECACHE */
|
#else /* !CONFIG_TRANSPARENT_HUGE_PAGECACHE */
|
||||||
|
|
||||||
#define shmem_huge SHMEM_HUGE_DENY
|
#define shmem_huge SHMEM_HUGE_DENY
|
||||||
|
|
||||||
|
static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
|
||||||
|
struct shrink_control *sc, unsigned long nr_to_split)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
|
#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -843,6 +956,7 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
|
|||||||
{
|
{
|
||||||
struct inode *inode = d_inode(dentry);
|
struct inode *inode = d_inode(dentry);
|
||||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||||
|
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
error = inode_change_ok(inode, attr);
|
error = inode_change_ok(inode, attr);
|
||||||
@ -878,6 +992,20 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
|
|||||||
if (oldsize > holebegin)
|
if (oldsize > holebegin)
|
||||||
unmap_mapping_range(inode->i_mapping,
|
unmap_mapping_range(inode->i_mapping,
|
||||||
holebegin, 0, 1);
|
holebegin, 0, 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Part of the huge page can be beyond i_size: subject
|
||||||
|
* to shrink under memory pressure.
|
||||||
|
*/
|
||||||
|
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE)) {
|
||||||
|
spin_lock(&sbinfo->shrinklist_lock);
|
||||||
|
if (list_empty(&info->shrinklist)) {
|
||||||
|
list_add_tail(&info->shrinklist,
|
||||||
|
&sbinfo->shrinklist);
|
||||||
|
sbinfo->shrinklist_len++;
|
||||||
|
}
|
||||||
|
spin_unlock(&sbinfo->shrinklist_lock);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -890,11 +1018,20 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
|
|||||||
static void shmem_evict_inode(struct inode *inode)
|
static void shmem_evict_inode(struct inode *inode)
|
||||||
{
|
{
|
||||||
struct shmem_inode_info *info = SHMEM_I(inode);
|
struct shmem_inode_info *info = SHMEM_I(inode);
|
||||||
|
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
|
||||||
|
|
||||||
if (inode->i_mapping->a_ops == &shmem_aops) {
|
if (inode->i_mapping->a_ops == &shmem_aops) {
|
||||||
shmem_unacct_size(info->flags, inode->i_size);
|
shmem_unacct_size(info->flags, inode->i_size);
|
||||||
inode->i_size = 0;
|
inode->i_size = 0;
|
||||||
shmem_truncate_range(inode, 0, (loff_t)-1);
|
shmem_truncate_range(inode, 0, (loff_t)-1);
|
||||||
|
if (!list_empty(&info->shrinklist)) {
|
||||||
|
spin_lock(&sbinfo->shrinklist_lock);
|
||||||
|
if (!list_empty(&info->shrinklist)) {
|
||||||
|
list_del_init(&info->shrinklist);
|
||||||
|
sbinfo->shrinklist_len--;
|
||||||
|
}
|
||||||
|
spin_unlock(&sbinfo->shrinklist_lock);
|
||||||
|
}
|
||||||
if (!list_empty(&info->swaplist)) {
|
if (!list_empty(&info->swaplist)) {
|
||||||
mutex_lock(&shmem_swaplist_mutex);
|
mutex_lock(&shmem_swaplist_mutex);
|
||||||
list_del_init(&info->swaplist);
|
list_del_init(&info->swaplist);
|
||||||
@ -1563,8 +1700,23 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
|
|||||||
index, false);
|
index, false);
|
||||||
}
|
}
|
||||||
if (IS_ERR(page)) {
|
if (IS_ERR(page)) {
|
||||||
|
int retry = 5;
|
||||||
error = PTR_ERR(page);
|
error = PTR_ERR(page);
|
||||||
page = NULL;
|
page = NULL;
|
||||||
|
if (error != -ENOSPC)
|
||||||
|
goto failed;
|
||||||
|
/*
|
||||||
|
* Try to reclaim some spece by splitting a huge page
|
||||||
|
* beyond i_size on the filesystem.
|
||||||
|
*/
|
||||||
|
while (retry--) {
|
||||||
|
int ret;
|
||||||
|
ret = shmem_unused_huge_shrink(sbinfo, NULL, 1);
|
||||||
|
if (ret == SHRINK_STOP)
|
||||||
|
break;
|
||||||
|
if (ret)
|
||||||
|
goto alloc_nohuge;
|
||||||
|
}
|
||||||
goto failed;
|
goto failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1603,6 +1755,22 @@ alloc_nohuge: page = shmem_alloc_and_acct_page(gfp, info, sbinfo,
|
|||||||
spin_unlock_irq(&info->lock);
|
spin_unlock_irq(&info->lock);
|
||||||
alloced = true;
|
alloced = true;
|
||||||
|
|
||||||
|
if (PageTransHuge(page) &&
|
||||||
|
DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
|
||||||
|
hindex + HPAGE_PMD_NR - 1) {
|
||||||
|
/*
|
||||||
|
* Part of the huge page is beyond i_size: subject
|
||||||
|
* to shrink under memory pressure.
|
||||||
|
*/
|
||||||
|
spin_lock(&sbinfo->shrinklist_lock);
|
||||||
|
if (list_empty(&info->shrinklist)) {
|
||||||
|
list_add_tail(&info->shrinklist,
|
||||||
|
&sbinfo->shrinklist);
|
||||||
|
sbinfo->shrinklist_len++;
|
||||||
|
}
|
||||||
|
spin_unlock(&sbinfo->shrinklist_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
|
* Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
|
||||||
*/
|
*/
|
||||||
@ -1920,6 +2088,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
|
|||||||
spin_lock_init(&info->lock);
|
spin_lock_init(&info->lock);
|
||||||
info->seals = F_SEAL_SEAL;
|
info->seals = F_SEAL_SEAL;
|
||||||
info->flags = flags & VM_NORESERVE;
|
info->flags = flags & VM_NORESERVE;
|
||||||
|
INIT_LIST_HEAD(&info->shrinklist);
|
||||||
INIT_LIST_HEAD(&info->swaplist);
|
INIT_LIST_HEAD(&info->swaplist);
|
||||||
simple_xattrs_init(&info->xattrs);
|
simple_xattrs_init(&info->xattrs);
|
||||||
cache_no_acl(inode);
|
cache_no_acl(inode);
|
||||||
@ -3518,6 +3687,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
|
|||||||
if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
|
if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
|
||||||
goto failed;
|
goto failed;
|
||||||
sbinfo->free_inodes = sbinfo->max_inodes;
|
sbinfo->free_inodes = sbinfo->max_inodes;
|
||||||
|
spin_lock_init(&sbinfo->shrinklist_lock);
|
||||||
|
INIT_LIST_HEAD(&sbinfo->shrinklist);
|
||||||
|
|
||||||
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
||||||
sb->s_blocksize = PAGE_SIZE;
|
sb->s_blocksize = PAGE_SIZE;
|
||||||
@ -3680,6 +3851,10 @@ static const struct super_operations shmem_ops = {
|
|||||||
.evict_inode = shmem_evict_inode,
|
.evict_inode = shmem_evict_inode,
|
||||||
.drop_inode = generic_delete_inode,
|
.drop_inode = generic_delete_inode,
|
||||||
.put_super = shmem_put_super,
|
.put_super = shmem_put_super,
|
||||||
|
#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
|
||||||
|
.nr_cached_objects = shmem_unused_huge_count,
|
||||||
|
.free_cached_objects = shmem_unused_huge_scan,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct vm_operations_struct shmem_vm_ops = {
|
static const struct vm_operations_struct shmem_vm_ops = {
|
||||||
|
Loading…
Reference in New Issue
Block a user