From 96150606e2fb82d242c9e4a414e4e922849f7bf7 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Sat, 26 Nov 2011 11:00:47 +0530 Subject: [PATCH 01/10] logfs: update page reference count for pined pages LogFS sets PG_private flag to indicate a pined page. We assumed that marking a page as private is enough to ensure its existence. But instead it is necessary to hold a reference count to the page. The change resolves the following BUG BUG: Bad page state in process flush-253:16 pfn:6a6d0 page flags: 0x100000000000808(uptodate|private) Suggested-and-Acked-by: Joern Engel Signed-off-by: Prasad Joshi --- fs/logfs/readwrite.c | 29 ++++++++++++++++++++++------- fs/logfs/segment.c | 37 +++++++++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 15 deletions(-) diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 2ac4217b7901..6d663e8ea6da 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -560,8 +560,13 @@ static void inode_free_block(struct super_block *sb, struct logfs_block *block) static void indirect_free_block(struct super_block *sb, struct logfs_block *block) { - ClearPagePrivate(block->page); - block->page->private = 0; + struct page *page = block->page; + + if (PagePrivate(page)) { + ClearPagePrivate(page); + page_cache_release(page); + set_page_private(page, 0); + } __free_block(sb, block); } @@ -650,8 +655,11 @@ static void alloc_data_block(struct inode *inode, struct page *page) logfs_unpack_index(page->index, &bix, &level); block = __alloc_block(inode->i_sb, inode->i_ino, bix, level); block->page = page; + SetPagePrivate(page); - page->private = (unsigned long)block; + page_cache_get(page); + set_page_private(page, (unsigned long) block); + block->ops = &indirect_block_ops; } @@ -1901,8 +1909,11 @@ static void move_page_to_inode(struct inode *inode, struct page *page) li->li_block = block; block->page = NULL; - page->private = 0; - ClearPagePrivate(page); + if (PagePrivate(page)) { + ClearPagePrivate(page); + page_cache_release(page); + set_page_private(page, 0); + } } static void move_inode_to_page(struct page *page, struct inode *inode) @@ -1918,8 +1929,12 @@ static void move_inode_to_page(struct page *page, struct inode *inode) BUG_ON(PagePrivate(page)); block->ops = &indirect_block_ops; block->page = page; - page->private = (unsigned long)block; - SetPagePrivate(page); + + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + set_page_private(page, (unsigned long) block); + } block->inode = NULL; li->li_block = NULL; diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 9d5187353255..6aee6092860d 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -86,7 +86,11 @@ int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, BUG_ON(!page); /* FIXME: reserve a pool */ SetPageUptodate(page); memcpy(page_address(page) + offset, buf, copylen); - SetPagePrivate(page); + + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + } page_cache_release(page); buf += copylen; @@ -110,7 +114,10 @@ static void pad_partial_page(struct logfs_area *area) page = get_mapping_page(sb, index, 0); BUG_ON(!page); /* FIXME: reserve a pool */ memset(page_address(page) + offset, 0xff, len); - SetPagePrivate(page); + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + } page_cache_release(page); } } @@ -130,7 +137,10 @@ static void pad_full_pages(struct logfs_area *area) BUG_ON(!page); /* FIXME: reserve a pool */ SetPageUptodate(page); memset(page_address(page), 0xff, PAGE_CACHE_SIZE); - SetPagePrivate(page); + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + } page_cache_release(page); index++; no_indizes--; @@ -485,8 +495,12 @@ static void move_btree_to_page(struct inode *inode, struct page *page, mempool_free(item, super->s_alias_pool); } block->page = page; - SetPagePrivate(page); - page->private = (unsigned long)block; + + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + set_page_private(page, (unsigned long) block); + } block->ops = &indirect_block_ops; initialize_block_counters(page, block, data, 0); } @@ -536,8 +550,12 @@ void move_page_to_btree(struct page *page) list_add(&item->list, &block->item_list); } block->page = NULL; - ClearPagePrivate(page); - page->private = 0; + + if (PagePrivate(page)) { + ClearPagePrivate(page); + page_cache_release(page); + set_page_private(page, 0); + } block->ops = &btree_block_ops; err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, block); @@ -702,7 +720,10 @@ void freeseg(struct super_block *sb, u32 segno) page = find_get_page(mapping, ofs >> PAGE_SHIFT); if (!page) continue; - ClearPagePrivate(page); + if (PagePrivate(page)) { + ClearPagePrivate(page); + page_cache_release(page); + } page_cache_release(page); } } From 934eed395d201bf0901ca0c0cc3703b18729d0ce Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Sun, 20 Nov 2011 22:29:01 +0530 Subject: [PATCH 02/10] logfs: Prevent memory corruption This is a bad one. I wonder whether we were so far protected by no_free_segments(sb) usually being smaller than LOGFS_NO_AREAS. Found by Dan Carpenter using smatch. Signed-off-by: Joern Engel Signed-off-by: Prasad Joshi --- fs/logfs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/logfs/gc.c b/fs/logfs/gc.c index caa4419285dc..d4efb061bdc5 100644 --- a/fs/logfs/gc.c +++ b/fs/logfs/gc.c @@ -367,7 +367,7 @@ static struct gc_candidate *get_candidate(struct super_block *sb) int i, max_dist; struct gc_candidate *cand = NULL, *this; - max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS); + max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS - 1); for (i = max_dist; i >= 0; i--) { this = first_in_list(&super->s_low_list[i]); From 13ced29cb28996a9bc4f68e43ff0c57eafdb1e21 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Sat, 28 Jan 2012 11:36:06 +0530 Subject: [PATCH 03/10] logfs: take write mutex lock during fsync and sync LogFS uses super->s_write_mutex while writing data to disk. Taking the same mutex lock in sync and fsync code path solves the following BUG: ------------[ cut here ]------------ kernel BUG at /home/prasad/logfs/dev_bdev.c:134! Pid: 2387, comm: flush-253:16 Not tainted 3.0.0+ #4 Bochs Bochs RIP: 0010:[] [] bdev_writeseg+0x25d/0x270 [logfs] Call Trace: [] logfs_open_area+0x91/0x150 [logfs] [] ? find_level.clone.9+0x62/0x100 [] __logfs_segment_write.clone.20+0x5c/0x190 [logfs] [] ? mempool_kmalloc+0x15/0x20 [] ? mempool_alloc+0x53/0x130 [] logfs_segment_write+0x1d4/0x230 [logfs] [] logfs_write_i0+0x12e/0x190 [logfs] [] __logfs_write_rec+0x140/0x220 [logfs] [] logfs_write_rec+0x64/0xd0 [logfs] [] __logfs_write_buf+0x106/0x110 [logfs] [] logfs_write_buf+0x4e/0x80 [logfs] [] __logfs_writepage+0x23/0x80 [logfs] [] logfs_writepage+0xdc/0x110 [logfs] [] __writepage+0x17/0x40 [] write_cache_pages+0x208/0x4f0 [] ? set_page_dirty+0x70/0x70 [] generic_writepages+0x4a/0x70 [] do_writepages+0x21/0x40 [] writeback_single_inode+0x101/0x250 [] writeback_sb_inodes+0xed/0x1c0 [] writeback_inodes_wb+0x7b/0x1e0 [] wb_writeback+0x4c3/0x530 [] ? sub_preempt_count+0x9d/0xd0 [] wb_do_writeback+0xdb/0x290 [] ? sub_preempt_count+0x9d/0xd0 [] ? _raw_spin_unlock_irqrestore+0x18/0x40 [] ? del_timer+0x8a/0x120 [] bdi_writeback_thread+0x8c/0x2e0 [] ? wb_do_writeback+0x290/0x290 [] kthread+0x96/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? kthread_worker_fn+0x190/0x190 [] ? gs_change+0xb/0xb RIP [] bdev_writeseg+0x25d/0x270 [logfs] ---[ end trace 0211ad60a57657c4 ]--- Reviewed-by: Joern Engel Signed-off-by: Prasad Joshi --- fs/logfs/file.c | 2 ++ fs/logfs/inode.c | 2 ++ fs/logfs/logfs.h | 2 ++ fs/logfs/readwrite.c | 6 ++---- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/logfs/file.c b/fs/logfs/file.c index b548c87a86f1..3886cded283c 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -230,7 +230,9 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; mutex_lock(&inode->i_mutex); + logfs_get_wblocks(sb, NULL, WF_LOCK); logfs_write_anchor(sb); + logfs_put_wblocks(sb, NULL, WF_LOCK); mutex_unlock(&inode->i_mutex); return 0; diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 7e441ad5f792..388d7c5a7bed 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -364,7 +364,9 @@ static void logfs_init_once(void *_li) static int logfs_sync_fs(struct super_block *sb, int wait) { + logfs_get_wblocks(sb, NULL, WF_LOCK); logfs_write_anchor(sb); + logfs_put_wblocks(sb, NULL, WF_LOCK); return 0; } diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 398ecff6e548..bb4340850c1b 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -577,6 +577,8 @@ void initialize_block_counters(struct page *page, struct logfs_block *block, __be64 *array, int page_is_empty); int logfs_exist_block(struct inode *inode, u64 bix); int get_page_reserve(struct inode *inode, struct page *page); +void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock); +void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock); extern struct logfs_block_ops indirect_block_ops; /* segment.c */ diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 6d663e8ea6da..7b10e8aecced 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -244,8 +244,7 @@ static void preunlock_page(struct super_block *sb, struct page *page, int lock) * is waiting for s_write_mutex. We annotate this fact by setting PG_pre_locked * in addition to PG_locked. */ -static void logfs_get_wblocks(struct super_block *sb, struct page *page, - int lock) +void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock) { struct logfs_super *super = logfs_super(sb); @@ -260,8 +259,7 @@ static void logfs_get_wblocks(struct super_block *sb, struct page *page, } } -static void logfs_put_wblocks(struct super_block *sb, struct page *page, - int lock) +void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock) { struct logfs_super *super = logfs_super(sb); From ecfd890991a26e70547e025673580923a004c5e4 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Sun, 30 Oct 2011 22:15:32 +0530 Subject: [PATCH 04/10] logfs: set superblock shutdown flag after generic sb shutdown While unmounting the file system LogFS calls generic_shutdown_super. The function does file system independent superblock shutdown. However, it might result in call file system specific inode eviction. LogFS marks FS shutting down by setting bit LOGFS_SB_FLAG_SHUTDOWN in super->s_flags. Since, inode eviction might call truncate on inode, following BUG is observed when file system is unmounted: ------------[ cut here ]------------ kernel BUG at /home/prasad/logfs/segment.c:362! invalid opcode: 0000 [#1] PREEMPT SMP CPU 3 Modules linked in: logfs binfmt_misc ppdev virtio_blk parport_pc lp parport psmouse floppy virtio_pci serio_raw virtio_ring virtio Pid: 1933, comm: umount Not tainted 3.0.0+ #4 Bochs Bochs RIP: 0010:[] [] logfs_segment_write+0x211/0x230 [logfs] RSP: 0018:ffff880062d7b9e8 EFLAGS: 00010202 RAX: 000000000000000e RBX: ffff88006eca9000 RCX: 0000000000000000 RDX: ffff88006fd87c40 RSI: ffffea00014ff468 RDI: ffff88007b68e000 RBP: ffff880062d7ba48 R08: 8000000020451430 R09: 0000000000000000 R10: dead000000100100 R11: 0000000000000000 R12: ffff88006fd87c40 R13: ffffea00014ff468 R14: ffff88005ad0a460 R15: 0000000000000000 FS: 00007f25d50ea760(0000) GS:ffff88007fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000d05e48 CR3: 0000000062c72000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process umount (pid: 1933, threadinfo ffff880062d7a000, task ffff880070b44500) Stack: ffff880062d7ba38 ffff88005ad0a508 0000000000001000 0000000000000000 8000000020451430 ffffea00014ff468 ffff880062d7ba48 ffff88005ad0a460 ffff880062d7bad8 ffffea00014ff468 ffff88006fd87c40 0000000000000000 Call Trace: [] logfs_write_i0+0x12e/0x190 [logfs] [] __logfs_write_rec+0x140/0x220 [logfs] [] __logfs_write_rec+0xf2/0x220 [logfs] [] logfs_write_rec+0x64/0xd0 [logfs] [] __logfs_write_buf+0x106/0x110 [logfs] [] logfs_write_buf+0x4e/0x80 [logfs] [] __logfs_write_inode+0x98/0x110 [logfs] [] logfs_truncate+0x54/0x290 [logfs] [] logfs_evict_inode+0xdc/0x190 [logfs] [] evict+0x85/0x170 [] iput+0xe6/0x1b0 [] shrink_dcache_for_umount_subtree+0x218/0x280 [] shrink_dcache_for_umount+0x51/0x90 [] generic_shutdown_super+0x2c/0x100 [] logfs_kill_sb+0x57/0xf0 [logfs] [] deactivate_locked_super+0x45/0x70 [] deactivate_super+0x4a/0x70 [] mntput_no_expire+0xa4/0xf0 [] sys_umount+0x6f/0x380 [] system_call_fastpath+0x16/0x1b Code: 55 c8 49 8d b6 a8 00 00 00 45 89 f9 45 89 e8 4c 89 e1 4c 89 55 b8 c7 04 24 00 00 00 00 e8 68 fc ff ff 4c 8b 55 b8 e9 3c ff ff ff <0f> 0b 0f 0b c7 45 c0 00 00 00 00 e9 44 fe ff ff 66 66 66 66 66 RIP [] logfs_segment_write+0x211/0x230 [logfs] RSP ---[ end trace fe6b040cea952290 ]--- Therefore, move super->s_flags setting after the fs-indenpendent work has been finished. Reviewed-by: Joern Engel Signed-off-by: Prasad Joshi --- fs/logfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/logfs/super.c b/fs/logfs/super.c index e795c234ea33..f9b7a30b00a3 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -491,9 +491,9 @@ static void logfs_kill_sb(struct super_block *sb) * From this point on alias entries are simply dropped - and any * writes to the object store are considered bugs. */ - super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN; log_super("LogFS: Now in shutdown\n"); generic_shutdown_super(sb); + super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN; BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes); From 0bd90387ed5a8abbcf43391b480efdc211721cfe Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Sun, 2 Oct 2011 23:46:51 +0530 Subject: [PATCH 05/10] logfs: Propagate page parameter to __logfs_write_inode During GC LogFS has to rewrite each valid block to a separate segment. Rewrite operation reads data from an old segment and writes it to a newly allocated segment. Since every write operation changes data block pointers maintained in inode, inode should also be rewritten. In GC path to avoid AB-BA deadlock LogFS marks a page with PG_pre_locked in addition to locking the page (PG_locked). The page lock is ignored iff the page is pre-locked. LogFS uses a special file called segment file. The segment file maintains an 8 bytes entry for every segment. It keeps track of erase count, level etc. for every segment. Bad things happen with a segment belonging to the segment file is GCed ------------[ cut here ]------------ kernel BUG at /home/prasad/logfs/readwrite.c:297! invalid opcode: 0000 [#1] SMP Modules linked in: logfs joydev usbhid hid psmouse e1000 i2c_piix4 serio_raw [last unloaded: logfs] Pid: 20161, comm: mount Not tainted 3.1.0-rc3+ #3 innotek GmbH VirtualBox EIP: 0060:[] EFLAGS: 00010292 CPU: 0 EIP is at logfs_lock_write_page+0x6a/0x70 [logfs] EAX: 00000027 EBX: f73f5b20 ECX: c16007c8 EDX: 00000094 ESI: 00000000 EDI: e59be6e4 EBP: c7337b28 ESP: c7337b18 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process mount (pid: 20161, ti=c7336000 task=eb323f70 task.ti=c7336000) Stack: f8099a3d c7337b24 f73f5b20 00001002 c7337b50 f8091f6d f8099a4d f80994e4 00000003 00000000 c7337b68 00000000 c67e4400 00001000 c7337b80 f80935e5 00000000 00000000 00000000 00000000 e1fcf000 0000000f e59be618 c70bf900 Call Trace: [] logfs_get_write_page.clone.16+0xdd/0x100 [logfs] [] logfs_mod_segment_entry+0x55/0x110 [logfs] [] logfs_get_segment_entry+0x1d/0x20 [logfs] [] ? logfs_cleanup_journal+0x50/0x50 [logfs] [] ostore_get_erase_count+0x1b/0x40 [logfs] [] logfs_open_area+0xc8/0x150 [logfs] [] ? kmemleak_alloc+0x2c/0x60 [] __logfs_segment_write.clone.16+0x4e/0x1b0 [logfs] [] ? mempool_kmalloc+0x13/0x20 [] ? mempool_kmalloc+0x13/0x20 [] logfs_segment_write+0x17f/0x1d0 [logfs] [] logfs_write_i0+0x11c/0x180 [logfs] [] logfs_write_direct+0x45/0x90 [logfs] [] __logfs_write_buf+0xbd/0xf0 [logfs] [] ? kmap_atomic_prot+0x4e/0xe0 [] logfs_write_buf+0x3b/0x60 [logfs] [] __logfs_write_inode+0xa9/0x110 [logfs] [] logfs_rewrite_block+0xc0/0x110 [logfs] [] ? get_mapping_page+0x10/0x60 [logfs] [] ? logfs_load_object_aliases+0x2e0/0x2f0 [logfs] [] logfs_gc_segment+0x2ad/0x310 [logfs] [] __logfs_gc_once+0x4a/0x80 [logfs] [] logfs_gc_pass+0x683/0x6a0 [logfs] [] logfs_mount+0x5a9/0x680 [logfs] [] mount_fs+0x21/0xd0 [] ? __alloc_percpu+0xf/0x20 [] ? alloc_vfsmnt+0xb1/0x130 [] vfs_kern_mount+0x4b/0xa0 [] do_kern_mount+0x3e/0xe0 [] do_mount+0x34d/0x670 [] ? strndup_user+0x49/0x70 [] sys_mount+0x6b/0xa0 [] syscall_call+0x7/0xb Code: f8 e8 8b 93 39 c9 8b 45 f8 3e 0f ba 28 00 19 d2 85 d2 74 ca eb d0 0f 0b 8d 45 fc 89 44 24 04 c7 04 24 3d 9a 09 f8 e8 09 92 39 c9 <0f> 0b 8d 74 26 00 55 89 e5 3e 8d 74 26 00 8b 10 80 e6 01 74 09 EIP: [] logfs_lock_write_page+0x6a/0x70 [logfs] SS:ESP 0068:c7337b18 ---[ end trace 96e67d5b3aa3d6ca ]--- The patch passes locked page to __logfs_write_inode. It calls function logfs_get_wblocks() to pre-lock the page. This ensures any further attempts to lock the page are ignored (esp from get_erase_count). Acked-by: Joern Engel Signed-off-by: Prasad Joshi --- fs/logfs/dir.c | 2 +- fs/logfs/inode.c | 2 +- fs/logfs/logfs.h | 2 +- fs/logfs/readwrite.c | 12 ++++++------ 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index b7d7f67cee5a..b6404898da83 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -71,7 +71,7 @@ static int write_dir(struct inode *dir, struct logfs_disk_dentry *dd, static int write_inode(struct inode *inode) { - return __logfs_write_inode(inode, WF_LOCK); + return __logfs_write_inode(inode, NULL, WF_LOCK); } static s64 dir_seek_data(struct inode *inode, s64 pos) diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 388d7c5a7bed..7c42c132c177 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -287,7 +287,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN) return 0; - ret = __logfs_write_inode(inode, flags); + ret = __logfs_write_inode(inode, NULL, flags); LOGFS_BUG_ON(ret, inode->i_sb); return ret; } diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index bb4340850c1b..0dec29887a8a 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -528,7 +528,7 @@ void logfs_destroy_inode_cache(void); void logfs_set_blocks(struct inode *inode, u64 no); /* these logically belong into inode.c but actually reside in readwrite.c */ int logfs_read_inode(struct inode *inode); -int __logfs_write_inode(struct inode *inode, long flags); +int __logfs_write_inode(struct inode *inode, struct page *, long flags); void logfs_evict_inode(struct inode *inode); /* journal.c */ diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 7b10e8aecced..88284c67ba97 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -422,7 +422,7 @@ static void inode_write_block(struct logfs_block *block) if (inode->i_ino == LOGFS_INO_MASTER) logfs_write_anchor(inode->i_sb); else { - ret = __logfs_write_inode(inode, 0); + ret = __logfs_write_inode(inode, NULL, 0); /* see indirect_write_block comment */ BUG_ON(ret); } @@ -1629,7 +1629,7 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, if (inode->i_ino == LOGFS_INO_MASTER) logfs_write_anchor(inode->i_sb); else { - err = __logfs_write_inode(inode, flags); + err = __logfs_write_inode(inode, page, flags); } } } @@ -1879,7 +1879,7 @@ int logfs_truncate(struct inode *inode, u64 target) logfs_get_wblocks(sb, NULL, 1); err = __logfs_truncate(inode, size); if (!err) - err = __logfs_write_inode(inode, 0); + err = __logfs_write_inode(inode, NULL, 0); logfs_put_wblocks(sb, NULL, 1); } @@ -2119,14 +2119,14 @@ void logfs_set_segment_unreserved(struct super_block *sb, u32 segno, u32 ec) ec_level); } -int __logfs_write_inode(struct inode *inode, long flags) +int __logfs_write_inode(struct inode *inode, struct page *page, long flags) { struct super_block *sb = inode->i_sb; int ret; - logfs_get_wblocks(sb, NULL, flags & WF_LOCK); + logfs_get_wblocks(sb, page, flags & WF_LOCK); ret = do_write_inode(inode); - logfs_put_wblocks(sb, NULL, flags & WF_LOCK); + logfs_put_wblocks(sb, page, flags & WF_LOCK); return ret; } From 756ccb3c351e425ad5544ff1a92cfe6bec83b904 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Tue, 13 Sep 2011 23:04:11 +0530 Subject: [PATCH 06/10] MAINTAINERS: Add Prasad Joshi in LogFS maintiners Acked-by: Joern Engel Signed-off-by: Prasad Joshi --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0e7a80aefa0c..ce7029b79cf5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4116,6 +4116,7 @@ F: fs/partitions/ldm.* LogFS M: Joern Engel +M: Prasad Joshi L: logfs@logfs.org W: logfs.org S: Maintained From 6c69494f6b442834f26377e02d43fc8e1272221d Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Mon, 12 Sep 2011 21:09:16 +0530 Subject: [PATCH 07/10] logfs: remove useless BUG_ON It prevents write sizes >4k. Signed-off-by: Joern Engel --- fs/logfs/journal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 9da29706f91c..1e1c369df22b 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -612,7 +612,6 @@ static size_t __logfs_write_je(struct super_block *sb, void *buf, u16 type, if (len == 0) return logfs_write_header(super, header, 0, type); - BUG_ON(len > sb->s_blocksize); compr_len = logfs_compress(buf, data, len, sb->s_blocksize); if (compr_len < 0 || type == JE_ANCHOR) { memcpy(data, buf, len); From 1bcceaff8cbe5e5698ccf1015c9a938aa72718c4 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Fri, 5 Aug 2011 11:18:19 +0200 Subject: [PATCH 08/10] logfs: Free areas before calling generic_shutdown_super() Or hit an assertion in map_invalidatepage() instead. Signed-off-by: Joern Engel --- fs/logfs/logfs.h | 1 + fs/logfs/segment.c | 14 ++++++++++---- fs/logfs/super.c | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 0dec29887a8a..59ed32cd62d1 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -596,6 +596,7 @@ int logfs_init_mapping(struct super_block *sb); void logfs_sync_area(struct logfs_area *area); void logfs_sync_segments(struct super_block *sb); void freeseg(struct super_block *sb, u32 segno); +void free_areas(struct super_block *sb); /* area handling */ int logfs_init_areas(struct super_block *sb); diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 6aee6092860d..ab798ed1cc88 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -862,6 +862,16 @@ static void free_area(struct logfs_area *area) kfree(area); } +void free_areas(struct super_block *sb) +{ + struct logfs_super *super = logfs_super(sb); + int i; + + for_each_area(i) + free_area(super->s_area[i]); + free_area(super->s_journal_area); +} + static struct logfs_area *alloc_area(struct super_block *sb) { struct logfs_area *area; @@ -944,10 +954,6 @@ err: void logfs_cleanup_areas(struct super_block *sb) { struct logfs_super *super = logfs_super(sb); - int i; btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); - for_each_area(i) - free_area(super->s_area[i]); - free_area(super->s_journal_area); } diff --git a/fs/logfs/super.c b/fs/logfs/super.c index f9b7a30b00a3..c9ee7f5d1caf 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -486,6 +486,7 @@ static void logfs_kill_sb(struct super_block *sb) /* Alias entries slow down mount, so evict as many as possible */ sync_filesystem(sb); logfs_write_anchor(sb); + free_areas(sb); /* * From this point on alias entries are simply dropped - and any From bbe01387129f76fa4bec17904eb14c4bdc3c179f Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Fri, 5 Aug 2011 11:13:30 +0200 Subject: [PATCH 09/10] logfs: Grow inode in delete path Can be necessary if an inode gets deleted (through -ENOSPC) before being written. Might be better to move this into logfs_write_rec(), but for now go with the stupid&safe patch. Signed-off-by: Joern Engel --- fs/logfs/readwrite.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 88284c67ba97..4153e65b0148 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -1576,11 +1576,15 @@ int logfs_write_buf(struct inode *inode, struct page *page, long flags) static int __logfs_delete(struct inode *inode, struct page *page) { long flags = WF_DELETE; + int err; inode->i_ctime = inode->i_mtime = CURRENT_TIME; if (page->index < I0_BLOCKS) return logfs_write_direct(inode, page, flags); + err = grow_inode(inode, page->index, 0); + if (err) + return err; return logfs_write_rec(inode, page, page->index, 0, flags); } From f2933e86ad93a8d1287079d59e67afd6f4166a9d Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Fri, 5 Aug 2011 11:09:55 +0200 Subject: [PATCH 10/10] Logfs: Allow NULL block_isbad() methods Not all mtd drivers define block_isbad(). Let's assume no bad blocks instead of refusing to mount. Signed-off-by: Joern Engel --- fs/logfs/dev_mtd.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c index 339e17e9133d..d054d7e975ca 100644 --- a/fs/logfs/dev_mtd.c +++ b/fs/logfs/dev_mtd.c @@ -150,14 +150,13 @@ static struct page *mtd_find_first_sb(struct super_block *sb, u64 *ofs) filler_t *filler = mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd->block_isbad) - return NULL; - *ofs = 0; - while (mtd->block_isbad(mtd, *ofs)) { - *ofs += mtd->erasesize; - if (*ofs >= mtd->size) - return NULL; + if (mtd->block_isbad) { + while (mtd->block_isbad(mtd, *ofs)) { + *ofs += mtd->erasesize; + if (*ofs >= mtd->size) + return NULL; + } } BUG_ON(*ofs & ~PAGE_MASK); return read_cache_page(mapping, *ofs >> PAGE_SHIFT, filler, sb); @@ -170,14 +169,13 @@ static struct page *mtd_find_last_sb(struct super_block *sb, u64 *ofs) filler_t *filler = mtd_readpage; struct mtd_info *mtd = super->s_mtd; - if (!mtd->block_isbad) - return NULL; - *ofs = mtd->size - mtd->erasesize; - while (mtd->block_isbad(mtd, *ofs)) { - *ofs -= mtd->erasesize; - if (*ofs <= 0) - return NULL; + if (mtd->block_isbad) { + while (mtd->block_isbad(mtd, *ofs)) { + *ofs -= mtd->erasesize; + if (*ofs <= 0) + return NULL; + } } *ofs = *ofs + mtd->erasesize - 0x1000; BUG_ON(*ofs & ~PAGE_MASK);