truncate: new helpers
Introduce new truncate helpers truncate_pagecache and inode_newsize_ok. vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and into mm/truncate.c. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
eca6f534e6
commit
25d9e2d152
@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
|
||||
mm start up ... this is a loose form of stability on mm_users. For
|
||||
example, it is used in copy_mm to protect against a racing tlb_gather_mmu
|
||||
single address space optimization, so that the zap_page_range (from
|
||||
vmtruncate) does not lose sending ipi's to cloned threads that might
|
||||
truncate) does not lose sending ipi's to cloned threads that might
|
||||
be spawned underneath it and go to user mode to drag in pte's into tlbs.
|
||||
|
||||
swap_lock
|
||||
|
46
fs/attr.c
46
fs/attr.c
@ -18,7 +18,7 @@
|
||||
/* Taken over from the old code... */
|
||||
|
||||
/* POSIX UID/GID verification for setting inode attributes. */
|
||||
int inode_change_ok(struct inode *inode, struct iattr *attr)
|
||||
int inode_change_ok(const struct inode *inode, struct iattr *attr)
|
||||
{
|
||||
int retval = -EPERM;
|
||||
unsigned int ia_valid = attr->ia_valid;
|
||||
@ -60,9 +60,51 @@ fine:
|
||||
error:
|
||||
return retval;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(inode_change_ok);
|
||||
|
||||
/**
|
||||
* inode_newsize_ok - may this inode be truncated to a given size
|
||||
* @inode: the inode to be truncated
|
||||
* @offset: the new size to assign to the inode
|
||||
* @Returns: 0 on success, -ve errno on failure
|
||||
*
|
||||
* inode_newsize_ok will check filesystem limits and ulimits to check that the
|
||||
* new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
|
||||
* when necessary. Caller must not proceed with inode size change if failure is
|
||||
* returned. @inode must be a file (not directory), with appropriate
|
||||
* permissions to allow truncate (inode_newsize_ok does NOT check these
|
||||
* conditions).
|
||||
*
|
||||
* inode_newsize_ok must be called with i_mutex held.
|
||||
*/
|
||||
int inode_newsize_ok(const struct inode *inode, loff_t offset)
|
||||
{
|
||||
if (inode->i_size < offset) {
|
||||
unsigned long limit;
|
||||
|
||||
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
|
||||
if (limit != RLIM_INFINITY && offset > limit)
|
||||
goto out_sig;
|
||||
if (offset > inode->i_sb->s_maxbytes)
|
||||
goto out_big;
|
||||
} else {
|
||||
/*
|
||||
* truncation of in-use swapfiles is disallowed - it would
|
||||
* cause subsequent swapout to scribble on the now-freed
|
||||
* blocks.
|
||||
*/
|
||||
if (IS_SWAPFILE(inode))
|
||||
return -ETXTBSY;
|
||||
}
|
||||
|
||||
return 0;
|
||||
out_sig:
|
||||
send_sig(SIGXFSZ, current, 0);
|
||||
out_big:
|
||||
return -EFBIG;
|
||||
}
|
||||
EXPORT_SYMBOL(inode_newsize_ok);
|
||||
|
||||
int inode_setattr(struct inode * inode, struct iattr * attr)
|
||||
{
|
||||
unsigned int ia_valid = attr->ia_valid;
|
||||
|
@ -2382,7 +2382,8 @@ extern int buffer_migrate_page(struct address_space *,
|
||||
#define buffer_migrate_page NULL
|
||||
#endif
|
||||
|
||||
extern int inode_change_ok(struct inode *, struct iattr *);
|
||||
extern int inode_change_ok(const struct inode *, struct iattr *);
|
||||
extern int inode_newsize_ok(const struct inode *, loff_t offset);
|
||||
extern int __must_check inode_setattr(struct inode *, struct iattr *);
|
||||
|
||||
extern void file_update_time(struct file *file);
|
||||
|
@ -791,8 +791,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
|
||||
unmap_mapping_range(mapping, holebegin, holelen, 0);
|
||||
}
|
||||
|
||||
extern int vmtruncate(struct inode * inode, loff_t offset);
|
||||
extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
|
||||
extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
|
||||
extern int vmtruncate(struct inode *inode, loff_t offset);
|
||||
extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||
|
@ -58,7 +58,7 @@
|
||||
/*
|
||||
* Lock ordering:
|
||||
*
|
||||
* ->i_mmap_lock (vmtruncate)
|
||||
* ->i_mmap_lock (truncate_pagecache)
|
||||
* ->private_lock (__free_pte->__set_page_dirty_buffers)
|
||||
* ->swap_lock (exclusive_swap_page, others)
|
||||
* ->mapping->tree_lock
|
||||
|
62
mm/memory.c
62
mm/memory.c
@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||
unsigned long addr = vma->vm_start;
|
||||
|
||||
/*
|
||||
* Hide vma from rmap and vmtruncate before freeing pgtables
|
||||
* Hide vma from rmap and truncate_pagecache before freeing
|
||||
* pgtables
|
||||
*/
|
||||
anon_vma_unlink(vma);
|
||||
unlink_file_vma(vma);
|
||||
@ -2407,7 +2408,7 @@ restart:
|
||||
* @mapping: the address space containing mmaps to be unmapped.
|
||||
* @holebegin: byte in first page to unmap, relative to the start of
|
||||
* the underlying file. This will be rounded down to a PAGE_SIZE
|
||||
* boundary. Note that this is different from vmtruncate(), which
|
||||
* boundary. Note that this is different from truncate_pagecache(), which
|
||||
* must keep the partial page. In contrast, we must get rid of
|
||||
* partial pages.
|
||||
* @holelen: size of prospective hole in bytes. This will be rounded
|
||||
@ -2458,63 +2459,6 @@ void unmap_mapping_range(struct address_space *mapping,
|
||||
}
|
||||
EXPORT_SYMBOL(unmap_mapping_range);
|
||||
|
||||
/**
|
||||
* vmtruncate - unmap mappings "freed" by truncate() syscall
|
||||
* @inode: inode of the file used
|
||||
* @offset: file offset to start truncating
|
||||
*
|
||||
* NOTE! We have to be ready to update the memory sharing
|
||||
* between the file and the memory map for a potential last
|
||||
* incomplete page. Ugly, but necessary.
|
||||
*/
|
||||
int vmtruncate(struct inode * inode, loff_t offset)
|
||||
{
|
||||
if (inode->i_size < offset) {
|
||||
unsigned long limit;
|
||||
|
||||
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
|
||||
if (limit != RLIM_INFINITY && offset > limit)
|
||||
goto out_sig;
|
||||
if (offset > inode->i_sb->s_maxbytes)
|
||||
goto out_big;
|
||||
i_size_write(inode, offset);
|
||||
} else {
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
|
||||
/*
|
||||
* truncation of in-use swapfiles is disallowed - it would
|
||||
* cause subsequent swapout to scribble on the now-freed
|
||||
* blocks.
|
||||
*/
|
||||
if (IS_SWAPFILE(inode))
|
||||
return -ETXTBSY;
|
||||
i_size_write(inode, offset);
|
||||
|
||||
/*
|
||||
* unmap_mapping_range is called twice, first simply for
|
||||
* efficiency so that truncate_inode_pages does fewer
|
||||
* single-page unmaps. However after this first call, and
|
||||
* before truncate_inode_pages finishes, it is possible for
|
||||
* private pages to be COWed, which remain after
|
||||
* truncate_inode_pages finishes, hence the second
|
||||
* unmap_mapping_range call must be made for correctness.
|
||||
*/
|
||||
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
|
||||
truncate_inode_pages(mapping, offset);
|
||||
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
|
||||
}
|
||||
|
||||
if (inode->i_op->truncate)
|
||||
inode->i_op->truncate(inode);
|
||||
return 0;
|
||||
|
||||
out_sig:
|
||||
send_sig(SIGXFSZ, current, 0);
|
||||
out_big:
|
||||
return -EFBIG;
|
||||
}
|
||||
EXPORT_SYMBOL(vmtruncate);
|
||||
|
||||
int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
|
||||
{
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
|
@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
|
||||
if (vma->vm_file) {
|
||||
/*
|
||||
* Subtle point from Rajesh Venkatasubramanian: before
|
||||
* moving file-based ptes, we must lock vmtruncate out,
|
||||
* since it might clean the dst vma before the src vma,
|
||||
* moving file-based ptes, we must lock truncate_pagecache
|
||||
* out, since it might clean the dst vma before the src vma,
|
||||
* and we propagate stale pages into the dst afterward.
|
||||
*/
|
||||
mapping = vma->vm_file->f_mapping;
|
||||
|
40
mm/nommu.c
40
mm/nommu.c
@ -82,46 +82,6 @@ DECLARE_RWSEM(nommu_region_sem);
|
||||
struct vm_operations_struct generic_file_vm_ops = {
|
||||
};
|
||||
|
||||
/*
|
||||
* Handle all mappings that got truncated by a "truncate()"
|
||||
* system call.
|
||||
*
|
||||
* NOTE! We have to be ready to update the memory sharing
|
||||
* between the file and the memory map for a potential last
|
||||
* incomplete page. Ugly, but necessary.
|
||||
*/
|
||||
int vmtruncate(struct inode *inode, loff_t offset)
|
||||
{
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
unsigned long limit;
|
||||
|
||||
if (inode->i_size < offset)
|
||||
goto do_expand;
|
||||
i_size_write(inode, offset);
|
||||
|
||||
truncate_inode_pages(mapping, offset);
|
||||
goto out_truncate;
|
||||
|
||||
do_expand:
|
||||
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
|
||||
if (limit != RLIM_INFINITY && offset > limit)
|
||||
goto out_sig;
|
||||
if (offset > inode->i_sb->s_maxbytes)
|
||||
goto out;
|
||||
i_size_write(inode, offset);
|
||||
|
||||
out_truncate:
|
||||
if (inode->i_op->truncate)
|
||||
inode->i_op->truncate(inode);
|
||||
return 0;
|
||||
out_sig:
|
||||
send_sig(SIGXFSZ, current, 0);
|
||||
out:
|
||||
return -EFBIG;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(vmtruncate);
|
||||
|
||||
/*
|
||||
* Return the total memory allocated for this pointer, not
|
||||
* just what the caller asked for.
|
||||
|
@ -465,3 +465,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
|
||||
return invalidate_inode_pages2_range(mapping, 0, -1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
|
||||
|
||||
/**
|
||||
* truncate_pagecache - unmap and remove pagecache that has been truncated
|
||||
* @inode: inode
|
||||
* @old: old file offset
|
||||
* @new: new file offset
|
||||
*
|
||||
* inode's new i_size must already be written before truncate_pagecache
|
||||
* is called.
|
||||
*
|
||||
* This function should typically be called before the filesystem
|
||||
* releases resources associated with the freed range (eg. deallocates
|
||||
* blocks). This way, pagecache will always stay logically coherent
|
||||
* with on-disk format, and the filesystem would not have to deal with
|
||||
* situations such as writepage being called for a page that has already
|
||||
* had its underlying blocks deallocated.
|
||||
*/
|
||||
void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
|
||||
{
|
||||
if (new < old) {
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
|
||||
/*
|
||||
* unmap_mapping_range is called twice, first simply for
|
||||
* efficiency so that truncate_inode_pages does fewer
|
||||
* single-page unmaps. However after this first call, and
|
||||
* before truncate_inode_pages finishes, it is possible for
|
||||
* private pages to be COWed, which remain after
|
||||
* truncate_inode_pages finishes, hence the second
|
||||
* unmap_mapping_range call must be made for correctness.
|
||||
*/
|
||||
unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
|
||||
truncate_inode_pages(mapping, new);
|
||||
unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(truncate_pagecache);
|
||||
|
||||
/**
|
||||
* vmtruncate - unmap mappings "freed" by truncate() syscall
|
||||
* @inode: inode of the file used
|
||||
* @offset: file offset to start truncating
|
||||
*
|
||||
* NOTE! We have to be ready to update the memory sharing
|
||||
* between the file and the memory map for a potential last
|
||||
* incomplete page. Ugly, but necessary.
|
||||
*/
|
||||
int vmtruncate(struct inode *inode, loff_t offset)
|
||||
{
|
||||
loff_t oldsize;
|
||||
int error;
|
||||
|
||||
error = inode_newsize_ok(inode, offset);
|
||||
if (error)
|
||||
return error;
|
||||
oldsize = inode->i_size;
|
||||
i_size_write(inode, offset);
|
||||
truncate_pagecache(inode, oldsize, offset);
|
||||
if (inode->i_op->truncate)
|
||||
inode->i_op->truncate(inode);
|
||||
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL(vmtruncate);
|
||||
|
Loading…
Reference in New Issue
Block a user