ocfs2: support for removing file regions
Provide an internal interface for the removal of arbitrary file regions. ocfs2_remove_inode_range() takes a byte range within a file and will remove existing extents within that range. Partial clusters will be zeroed so that any read from within the region will return zeros. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
This commit is contained in:
		
							parent
							
								
									35edec1d52
								
							
						
					
					
						commit
						063c4561f5
					
				| @ -4373,10 +4373,10 @@ out: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | ||||
| 			       u32 cpos, u32 len, handle_t *handle, | ||||
| 			       struct ocfs2_alloc_context *meta_ac, | ||||
| 			       struct ocfs2_cached_dealloc_ctxt *dealloc) | ||||
| int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | ||||
| 			u32 cpos, u32 len, handle_t *handle, | ||||
| 			struct ocfs2_alloc_context *meta_ac, | ||||
| 			struct ocfs2_cached_dealloc_ctxt *dealloc) | ||||
| { | ||||
| 	int ret, index; | ||||
| 	u32 rec_range, trunc_range; | ||||
| @ -4506,7 +4506,7 @@ out: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) | ||||
| int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) | ||||
| { | ||||
| 	struct buffer_head *tl_bh = osb->osb_tl_bh; | ||||
| 	struct ocfs2_dinode *di; | ||||
| @ -4539,10 +4539,10 @@ static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, | ||||
| 	return current_tail == new_start; | ||||
| } | ||||
| 
 | ||||
| static int ocfs2_truncate_log_append(struct ocfs2_super *osb, | ||||
| 				     handle_t *handle, | ||||
| 				     u64 start_blk, | ||||
| 				     unsigned int num_clusters) | ||||
| int ocfs2_truncate_log_append(struct ocfs2_super *osb, | ||||
| 			      handle_t *handle, | ||||
| 			      u64 start_blk, | ||||
| 			      unsigned int num_clusters) | ||||
| { | ||||
| 	int status, index; | ||||
| 	unsigned int start_cluster, tl_count; | ||||
| @ -4698,7 +4698,7 @@ bail: | ||||
| } | ||||
| 
 | ||||
| /* Expects you to already be holding tl_inode->i_mutex */ | ||||
| static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | ||||
| int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) | ||||
| { | ||||
| 	int status; | ||||
| 	unsigned int num_to_flush; | ||||
|  | ||||
| @ -41,6 +41,10 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | ||||
| 			      handle_t *handle, u32 cpos, u32 len, u32 phys, | ||||
| 			      struct ocfs2_alloc_context *meta_ac, | ||||
| 			      struct ocfs2_cached_dealloc_ctxt *dealloc); | ||||
| int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | ||||
| 			u32 cpos, u32 len, handle_t *handle, | ||||
| 			struct ocfs2_alloc_context *meta_ac, | ||||
| 			struct ocfs2_cached_dealloc_ctxt *dealloc); | ||||
| int ocfs2_num_free_extents(struct ocfs2_super *osb, | ||||
| 			   struct inode *inode, | ||||
| 			   struct ocfs2_dinode *fe); | ||||
| @ -68,6 +72,12 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | ||||
| 				      struct ocfs2_dinode **tl_copy); | ||||
| int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, | ||||
| 					 struct ocfs2_dinode *tl_copy); | ||||
| int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb); | ||||
| int ocfs2_truncate_log_append(struct ocfs2_super *osb, | ||||
| 			      handle_t *handle, | ||||
| 			      u64 start_blk, | ||||
| 			      unsigned int num_clusters); | ||||
| int __ocfs2_flush_truncate_log(struct ocfs2_super *osb); | ||||
| 
 | ||||
| /*
 | ||||
|  * Process local structure which describes the block unlinks done | ||||
|  | ||||
							
								
								
									
										242
									
								
								fs/ocfs2/file.c
									
									
									
									
									
								
							
							
						
						
									
										242
									
								
								fs/ocfs2/file.c
									
									
									
									
									
								
							| @ -541,12 +541,15 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | ||||
| 			  struct ocfs2_alloc_context **data_ac, | ||||
| 			  struct ocfs2_alloc_context **meta_ac) | ||||
| { | ||||
| 	int ret, num_free_extents; | ||||
| 	int ret = 0, num_free_extents; | ||||
| 	unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | ||||
| 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||||
| 
 | ||||
| 	*meta_ac = NULL; | ||||
| 	*data_ac = NULL; | ||||
| 	if (data_ac) | ||||
| 		*data_ac = NULL; | ||||
| 
 | ||||
| 	BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||||
| 
 | ||||
| 	mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | ||||
| 	     "clusters_to_add = %u, extents_to_split = %u\n", | ||||
| @ -583,6 +586,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (clusters_to_add == 0) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | ||||
| 	if (ret < 0) { | ||||
| 		if (ret != -ENOSPC) | ||||
| @ -1252,6 +1258,238 @@ out: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int __ocfs2_remove_inode_range(struct inode *inode, | ||||
| 				      struct buffer_head *di_bh, | ||||
| 				      u32 cpos, u32 phys_cpos, u32 len, | ||||
| 				      struct ocfs2_cached_dealloc_ctxt *dealloc) | ||||
| { | ||||
| 	int ret; | ||||
| 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||||
| 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||||
| 	struct inode *tl_inode = osb->osb_tl_inode; | ||||
| 	handle_t *handle; | ||||
| 	struct ocfs2_alloc_context *meta_ac = NULL; | ||||
| 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||||
| 
 | ||||
| 	ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); | ||||
| 	if (ret) { | ||||
| 		mlog_errno(ret); | ||||
| 		return ret; | ||||
| 	} | ||||
| 
 | ||||
| 	mutex_lock(&tl_inode->i_mutex); | ||||
| 
 | ||||
| 	if (ocfs2_truncate_log_needs_flush(osb)) { | ||||
| 		ret = __ocfs2_flush_truncate_log(osb); | ||||
| 		if (ret < 0) { | ||||
| 			mlog_errno(ret); | ||||
| 			goto out; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||||
| 	if (handle == NULL) { | ||||
| 		ret = -ENOMEM; | ||||
| 		mlog_errno(ret); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = ocfs2_journal_access(handle, inode, di_bh, | ||||
| 				   OCFS2_JOURNAL_ACCESS_WRITE); | ||||
| 	if (ret) { | ||||
| 		mlog_errno(ret); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, | ||||
| 				  dealloc); | ||||
| 	if (ret) { | ||||
| 		mlog_errno(ret); | ||||
| 		goto out_commit; | ||||
| 	} | ||||
| 
 | ||||
| 	OCFS2_I(inode)->ip_clusters -= len; | ||||
| 	di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); | ||||
| 
 | ||||
| 	ret = ocfs2_journal_dirty(handle, di_bh); | ||||
| 	if (ret) { | ||||
| 		mlog_errno(ret); | ||||
| 		goto out_commit; | ||||
| 	} | ||||
| 
 | ||||
| 	ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); | ||||
| 	if (ret) | ||||
| 		mlog_errno(ret); | ||||
| 
 | ||||
| out_commit: | ||||
| 	ocfs2_commit_trans(osb, handle); | ||||
| out: | ||||
| 	mutex_unlock(&tl_inode->i_mutex); | ||||
| 
 | ||||
| 	if (meta_ac) | ||||
| 		ocfs2_free_alloc_context(meta_ac); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Truncate a byte range, avoiding pages within partial clusters. This | ||||
|  * preserves those pages for the zeroing code to write to. | ||||
|  */ | ||||
| static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, | ||||
| 					 u64 byte_len) | ||||
| { | ||||
| 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||||
| 	loff_t start, end; | ||||
| 	struct address_space *mapping = inode->i_mapping; | ||||
| 
 | ||||
| 	start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start); | ||||
| 	end = byte_start + byte_len; | ||||
| 	end = end & ~(osb->s_clustersize - 1); | ||||
| 
 | ||||
| 	if (start < end) { | ||||
| 		unmap_mapping_range(mapping, start, end - start, 0); | ||||
| 		truncate_inode_pages_range(mapping, start, end - 1); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static int ocfs2_zero_partial_clusters(struct inode *inode, | ||||
| 				       u64 start, u64 len) | ||||
| { | ||||
| 	int ret = 0; | ||||
| 	u64 tmpend, end = start + len; | ||||
| 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||||
| 	unsigned int csize = osb->s_clustersize; | ||||
| 	handle_t *handle; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * The "start" and "end" values are NOT necessarily part of | ||||
| 	 * the range whose allocation is being deleted. Rather, this | ||||
| 	 * is what the user passed in with the request. We must zero | ||||
| 	 * partial clusters here. There's no need to worry about | ||||
| 	 * physical allocation - the zeroing code knows to skip holes. | ||||
| 	 */ | ||||
| 	mlog(0, "byte start: %llu, end: %llu\n", | ||||
| 	     (unsigned long long)start, (unsigned long long)end); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If both edges are on a cluster boundary then there's no | ||||
| 	 * zeroing required as the region is part of the allocation to | ||||
| 	 * be truncated. | ||||
| 	 */ | ||||
| 	if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||||
| 	if (handle == NULL) { | ||||
| 		ret = -ENOMEM; | ||||
| 		mlog_errno(ret); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * We want to get the byte offset of the end of the 1st cluster. | ||||
| 	 */ | ||||
| 	tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); | ||||
| 	if (tmpend > end) | ||||
| 		tmpend = end; | ||||
| 
 | ||||
| 	mlog(0, "1st range: start: %llu, tmpend: %llu\n", | ||||
| 	     (unsigned long long)start, (unsigned long long)tmpend); | ||||
| 
 | ||||
| 	ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); | ||||
| 	if (ret) | ||||
| 		mlog_errno(ret); | ||||
| 
 | ||||
| 	if (tmpend < end) { | ||||
| 		/*
 | ||||
| 		 * This may make start and end equal, but the zeroing | ||||
| 		 * code will skip any work in that case so there's no | ||||
| 		 * need to catch it up here. | ||||
| 		 */ | ||||
| 		start = end & ~(osb->s_clustersize - 1); | ||||
| 
 | ||||
| 		mlog(0, "2nd range: start: %llu, end: %llu\n", | ||||
| 		     (unsigned long long)start, (unsigned long long)end); | ||||
| 
 | ||||
| 		ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); | ||||
| 		if (ret) | ||||
| 			mlog_errno(ret); | ||||
| 	} | ||||
| 
 | ||||
| 	ocfs2_commit_trans(osb, handle); | ||||
| out: | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int ocfs2_remove_inode_range(struct inode *inode, | ||||
| 				    struct buffer_head *di_bh, u64 byte_start, | ||||
| 				    u64 byte_len) | ||||
| { | ||||
| 	int ret = 0; | ||||
| 	u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; | ||||
| 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||||
| 	struct ocfs2_cached_dealloc_ctxt dealloc; | ||||
| 
 | ||||
| 	ocfs2_init_dealloc_ctxt(&dealloc); | ||||
| 
 | ||||
| 	if (byte_len == 0) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); | ||||
| 	trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; | ||||
| 	if (trunc_len >= trunc_start) | ||||
| 		trunc_len -= trunc_start; | ||||
| 	else | ||||
| 		trunc_len = 0; | ||||
| 
 | ||||
| 	mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", | ||||
| 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||||
| 	     (unsigned long long)byte_start, | ||||
| 	     (unsigned long long)byte_len, trunc_start, trunc_len); | ||||
| 
 | ||||
| 	ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); | ||||
| 	if (ret) { | ||||
| 		mlog_errno(ret); | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	cpos = trunc_start; | ||||
| 	while (trunc_len) { | ||||
| 		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | ||||
| 					 &alloc_size, NULL); | ||||
| 		if (ret) { | ||||
| 			mlog_errno(ret); | ||||
| 			goto out; | ||||
| 		} | ||||
| 
 | ||||
| 		if (alloc_size > trunc_len) | ||||
| 			alloc_size = trunc_len; | ||||
| 
 | ||||
| 		/* Only do work for non-holes */ | ||||
| 		if (phys_cpos != 0) { | ||||
| 			ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, | ||||
| 							 phys_cpos, alloc_size, | ||||
| 							 &dealloc); | ||||
| 			if (ret) { | ||||
| 				mlog_errno(ret); | ||||
| 				goto out; | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		cpos += alloc_size; | ||||
| 		trunc_len -= alloc_size; | ||||
| 	} | ||||
| 
 | ||||
| 	ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); | ||||
| 
 | ||||
| out: | ||||
| 	ocfs2_schedule_truncate_log_flush(osb, 1); | ||||
| 	ocfs2_run_deallocs(osb, &dealloc); | ||||
| 
 | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | ||||
| 					 loff_t *ppos, | ||||
| 					 size_t count, | ||||
|  | ||||
| @ -289,6 +289,8 @@ int                  ocfs2_journal_dirty_data(handle_t *handle, | ||||
| #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE 		      \ | ||||
| 					 + OCFS2_TRUNCATE_LOG_UPDATE) | ||||
| 
 | ||||
| #define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS) | ||||
| 
 | ||||
| /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
 | ||||
|  * bitmap block for the new bit) */ | ||||
| #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user