UBIFS: handle allocation failures in UBIFS write path
Running kernel 2.6.37, my PPC-based device occasionally gets an order-2 allocation failure in UBIFS, which causes the root FS to become unwritable: kswapd0: page allocation failure. order:2, mode:0x4050 Call Trace: [c787dc30] [c00085b8] show_stack+0x7c/0x194 (unreliable) [c787dc70] [c0061aec] __alloc_pages_nodemask+0x4f0/0x57c [c787dd00] [c0061b98] __get_free_pages+0x20/0x50 [c787dd10] [c00e4f88] ubifs_jnl_write_data+0x54/0x200 [c787dd50] [c00e82d4] do_writepage+0x94/0x198 [c787dd90] [c00675e4] shrink_page_list+0x40c/0x77c [c787de40] [c0067de0] shrink_inactive_list+0x1e0/0x370 [c787de90] [c0068224] shrink_zone+0x2b4/0x2b8 [c787df00] [c0068854] kswapd+0x408/0x5d4 [c787dfb0] [c0037bcc] kthread+0x80/0x84 [c787dff0] [c000ef44] kernel_thread+0x4c/0x68 Similar problems were encountered last April by Tomasz Stanislawski: http://patchwork.ozlabs.org/patch/50965/ This patch implements Artem's suggested fix: fall back to a mutex-protected static buffer, allocated at mount time. I tested it by forcing execution down the failure path, and didn't see any ill effects. Artem: massaged the patch a little, improved it so that we'd not allocate the write reserve buffer when we are in R/O mode. Signed-off-by: Matthew L. Creech <mlcreech@gmail.com> Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
This commit is contained in:
		
							parent
							
								
									2765df7da5
								
							
						
					
					
						commit
						d882962f6a
					
				| @ -690,7 +690,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, | ||||
| { | ||||
| 	struct ubifs_data_node *data; | ||||
| 	int err, lnum, offs, compr_type, out_len; | ||||
| 	int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; | ||||
| 	int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; | ||||
| 	struct ubifs_inode *ui = ubifs_inode(inode); | ||||
| 
 | ||||
| 	dbg_jnl("ino %lu, blk %u, len %d, key %s", | ||||
| @ -698,9 +698,19 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, | ||||
| 		DBGKEY(key)); | ||||
| 	ubifs_assert(len <= UBIFS_BLOCK_SIZE); | ||||
| 
 | ||||
| 	data = kmalloc(dlen, GFP_NOFS); | ||||
| 	if (!data) | ||||
| 		return -ENOMEM; | ||||
| 	data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); | ||||
| 	if (!data) { | ||||
| 		/*
 | ||||
| 		 * Fall-back to the write reserve buffer. Note, we might be | ||||
| 		 * currently on the memory reclaim path, when the kernel is | ||||
| 		 * trying to free some memory by writing out dirty pages. The | ||||
| 		 * write reserve buffer helps us to guarantee that we are | ||||
| 		 * always able to write the data. | ||||
| 		 */ | ||||
| 		allocated = 0; | ||||
| 		mutex_lock(&c->write_reserve_mutex); | ||||
| 		data = c->write_reserve_buf; | ||||
| 	} | ||||
| 
 | ||||
| 	data->ch.node_type = UBIFS_DATA_NODE; | ||||
| 	key_write(c, key, &data->key); | ||||
| @ -736,7 +746,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, | ||||
| 		goto out_ro; | ||||
| 
 | ||||
| 	finish_reservation(c); | ||||
| 	kfree(data); | ||||
| 	if (!allocated) | ||||
| 		mutex_unlock(&c->write_reserve_mutex); | ||||
| 	else | ||||
| 		kfree(data); | ||||
| 	return 0; | ||||
| 
 | ||||
| out_release: | ||||
| @ -745,7 +758,10 @@ out_ro: | ||||
| 	ubifs_ro_mode(c, err); | ||||
| 	finish_reservation(c); | ||||
| out_free: | ||||
| 	kfree(data); | ||||
| 	if (!allocated) | ||||
| 		mutex_unlock(&c->write_reserve_mutex); | ||||
| 	else | ||||
| 		kfree(data); | ||||
| 	return err; | ||||
| } | ||||
| 
 | ||||
|  | ||||
| @ -1213,6 +1213,13 @@ static int mount_ubifs(struct ubifs_info *c) | ||||
| 	if (c->bulk_read == 1) | ||||
| 		bu_init(c); | ||||
| 
 | ||||
| 	if (!c->ro_mount) { | ||||
| 		c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, | ||||
| 					       GFP_KERNEL); | ||||
| 		if (!c->write_reserve_buf) | ||||
| 			goto out_free; | ||||
| 	} | ||||
| 
 | ||||
| 	c->mounting = 1; | ||||
| 
 | ||||
| 	err = ubifs_read_superblock(c); | ||||
| @ -1482,6 +1489,7 @@ out_wbufs: | ||||
| out_cbuf: | ||||
| 	kfree(c->cbuf); | ||||
| out_free: | ||||
| 	kfree(c->write_reserve_buf); | ||||
| 	kfree(c->bu.buf); | ||||
| 	vfree(c->ileb_buf); | ||||
| 	vfree(c->sbuf); | ||||
| @ -1520,6 +1528,7 @@ static void ubifs_umount(struct ubifs_info *c) | ||||
| 	kfree(c->cbuf); | ||||
| 	kfree(c->rcvrd_mst_node); | ||||
| 	kfree(c->mst_node); | ||||
| 	kfree(c->write_reserve_buf); | ||||
| 	kfree(c->bu.buf); | ||||
| 	vfree(c->ileb_buf); | ||||
| 	vfree(c->sbuf); | ||||
| @ -1605,6 +1614,10 @@ static int ubifs_remount_rw(struct ubifs_info *c) | ||||
| 		goto out; | ||||
| 	} | ||||
| 
 | ||||
| 	c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL); | ||||
| 	if (!c->write_reserve_buf) | ||||
| 		goto out; | ||||
| 
 | ||||
| 	err = ubifs_lpt_init(c, 0, 1); | ||||
| 	if (err) | ||||
| 		goto out; | ||||
| @ -1669,6 +1682,8 @@ out: | ||||
| 		c->bgt = NULL; | ||||
| 	} | ||||
| 	free_wbufs(c); | ||||
| 	kfree(c->write_reserve_buf); | ||||
| 	c->write_reserve_buf = NULL; | ||||
| 	vfree(c->ileb_buf); | ||||
| 	c->ileb_buf = NULL; | ||||
| 	ubifs_lpt_free(c, 1); | ||||
| @ -1712,6 +1727,8 @@ static void ubifs_remount_ro(struct ubifs_info *c) | ||||
| 	free_wbufs(c); | ||||
| 	vfree(c->orph_buf); | ||||
| 	c->orph_buf = NULL; | ||||
| 	kfree(c->write_reserve_buf); | ||||
| 	c->write_reserve_buf = NULL; | ||||
| 	vfree(c->ileb_buf); | ||||
| 	c->ileb_buf = NULL; | ||||
| 	ubifs_lpt_free(c, 1); | ||||
| @ -1942,6 +1959,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) | ||||
| 	mutex_init(&c->mst_mutex); | ||||
| 	mutex_init(&c->umount_mutex); | ||||
| 	mutex_init(&c->bu_mutex); | ||||
| 	mutex_init(&c->write_reserve_mutex); | ||||
| 	init_waitqueue_head(&c->cmt_wq); | ||||
| 	c->buds = RB_ROOT; | ||||
| 	c->old_idx = RB_ROOT; | ||||
|  | ||||
| @ -151,6 +151,12 @@ | ||||
|  */ | ||||
| #define WORST_COMPR_FACTOR 2 | ||||
| 
 | ||||
| /*
 | ||||
|  * How much memory is needed for a buffer where we comress a data node. | ||||
|  */ | ||||
| #define COMPRESSED_DATA_NODE_BUF_SZ \ | ||||
| 	(UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) | ||||
| 
 | ||||
| /* Maximum expected tree height for use by bottom_up_buf */ | ||||
| #define BOTTOM_UP_HEIGHT 64 | ||||
| 
 | ||||
| @ -1005,6 +1011,11 @@ struct ubifs_debug_info; | ||||
|  * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu | ||||
|  * @bu: pre-allocated bulk-read information | ||||
|  * | ||||
|  * @write_reserve_mutex: protects @write_reserve_buf | ||||
|  * @write_reserve_buf: on the write path we allocate memory, which might | ||||
|  *                     sometimes be unavailable, in which case we use this | ||||
|  *                     write reserve buffer | ||||
|  * | ||||
|  * @log_lebs: number of logical eraseblocks in the log | ||||
|  * @log_bytes: log size in bytes | ||||
|  * @log_last: last LEB of the log | ||||
| @ -1256,6 +1267,9 @@ struct ubifs_info { | ||||
| 	struct mutex bu_mutex; | ||||
| 	struct bu_info bu; | ||||
| 
 | ||||
| 	struct mutex write_reserve_mutex; | ||||
| 	void *write_reserve_buf; | ||||
| 
 | ||||
| 	int log_lebs; | ||||
| 	long long log_bytes; | ||||
| 	int log_last; | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user