From 49137f2efb5cf68724bccaba531ab3d59acd71f9 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Tue, 16 Mar 2010 21:46:15 +0100 Subject: [PATCH 01/12] Open segment file before using it logfs_recover_sb() needs it open. Signed-off-by: Joern Engel --- fs/logfs/super.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/logfs/super.c b/fs/logfs/super.c index c66beab78dee..018728120bb3 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -289,6 +289,10 @@ static int logfs_make_writeable(struct super_block *sb) { int err; + err = logfs_open_segfile(sb); + if (err) + return err; + /* Repair any broken superblock copies */ err = logfs_recover_sb(sb); if (err) @@ -299,10 +303,6 @@ static int logfs_make_writeable(struct super_block *sb) if (err) return err; - err = logfs_open_segfile(sb); - if (err) - return err; - /* Do one GC pass before any data gets dirtied */ logfs_gc_pass(sb); From 59fe27c0a8173a74b105debc803b97582028c90b Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Wed, 17 Mar 2010 13:47:45 +0100 Subject: [PATCH 02/12] Limit max_pages for insane devices Intel SSDs have a limit of 0xffff as queue_max_hw_sectors(q). Such a limit may make sense from a hardware pov, but it causes bio_alloc() to return NULL. Signed-off-by: Joern Engel --- fs/logfs/dev_bdev.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 9718c22f186d..f99f5dcce546 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -97,8 +97,10 @@ static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index, unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); int i; + if (max_pages > BIO_MAX_PAGES) + max_pages = BIO_MAX_PAGES; bio = bio_alloc(GFP_NOFS, max_pages); - BUG_ON(!bio); /* FIXME: handle this */ + BUG_ON(!bio); for (i = 0; i < nr_pages; i++) { if (i >= max_pages) { @@ -191,8 +193,10 @@ static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index, unsigned int max_pages = queue_max_hw_sectors(q) >> (PAGE_SHIFT - 9); int i; + if (max_pages > BIO_MAX_PAGES) + max_pages = BIO_MAX_PAGES; bio = bio_alloc(GFP_NOFS, max_pages); - BUG_ON(!bio); /* FIXME: handle this */ + BUG_ON(!bio); for (i = 0; i < nr_pages; i++) { if (i >= max_pages) { From e07bf553f37cd4fa470b499ff34d800956df2d48 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Wed, 17 Mar 2010 15:29:38 +0100 Subject: [PATCH 03/12] Plug memory leak in writeseg_end_io Signed-off-by: Joern Engel --- fs/logfs/dev_bdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index f99f5dcce546..a5d0c56d3ebc 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -80,6 +80,7 @@ static void writeseg_end_io(struct bio *bio, int err) prefetchw(&bvec->bv_page->flags); end_page_writeback(page); + page_cache_release(page); } while (bvec >= bio->bi_io_vec); bio_put(bio); if (atomic_dec_and_test(&super->s_pending_writes)) From e326068806ee044cc617b1dc24be1293fca3fbf6 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Wed, 17 Mar 2010 16:00:07 +0100 Subject: [PATCH 04/12] Prevent schedule while atomic in __logfs_readdir Apparently filldir can sleep, which forbids kmap_atomic. Signed-off-by: Joern Engel --- fs/logfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 56a8bfbb0120..c76b4b5c7ff6 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -303,12 +303,12 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) (filler_t *)logfs_readpage, NULL); if (IS_ERR(page)) return PTR_ERR(page); - dd = kmap_atomic(page, KM_USER0); + dd = kmap(page); BUG_ON(dd->namelen == 0); full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen), pos, be64_to_cpu(dd->ino), dd->type); - kunmap_atomic(dd, KM_USER0); + kunmap(page); page_cache_release(page); if (full) break; From faaa27ab919799929732c356a92a160f8657ecc6 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Fri, 26 Mar 2010 10:18:36 +0100 Subject: [PATCH 05/12] Write out both superblocks on mismatch If the first superblock is wrong and the second gets written, there will still be a mismatch on next mount. Write both to make sure they match. Signed-off-by: Joern Engel --- fs/logfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/logfs/super.c b/fs/logfs/super.c index 018728120bb3..006670fe9e8b 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -277,7 +277,7 @@ static int logfs_recover_sb(struct super_block *sb) } if (valid0 && valid1 && ds_cmp(ds0, ds1)) { printk(KERN_INFO"Superblocks don't match - fixing.\n"); - return write_one_sb(sb, super->s_devops->find_last_sb); + return logfs_write_sb(sb); } /* If neither is valid now, something's wrong. Didn't we properly * check them before?!? */ From 7db8064c17b92e95aec2e333096c035db9ddd4fe Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Fri, 26 Mar 2010 14:45:55 +0100 Subject: [PATCH 06/12] Fix logfs_get_sb_final error path rootdir was already allocated, so we must iput it again. Found by Al Viro. Signed-off-by: Joern Engel --- fs/logfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/logfs/super.c b/fs/logfs/super.c index 006670fe9e8b..2845c41d70d4 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -328,7 +328,7 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt) sb->s_root = d_alloc_root(rootdir); if (!sb->s_root) - goto fail; + goto fail2; super->s_erase_page = alloc_pages(GFP_KERNEL, 0); if (!super->s_erase_page) From 6f2e9e6a950a165a7d2c399ab7557e6745ef2bfd Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Fri, 26 Mar 2010 14:50:08 +0100 Subject: [PATCH 07/12] Use deactivate_locked_super Found by Al Viro. Signed-off-by: Joern Engel --- fs/logfs/super.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/logfs/super.c b/fs/logfs/super.c index 2845c41d70d4..9d856c49afc5 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -572,8 +572,7 @@ int logfs_get_sb_device(struct file_system_type *type, int flags, return 0; err1: - up_write(&sb->s_umount); - deactivate_super(sb); + deactivate_locked_super(sb); return err; err0: kfree(super); From 193219172691e29813821dc8433317768c6ed1a3 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Sat, 27 Mar 2010 09:56:58 +0100 Subject: [PATCH 08/12] Prevent data corruption in logfs_rewrite_block() The comment was correct, so make the code match the comment. As the new comment indicates, we might be able to do a little less work. But for the current -rc series let's keep it simple and just fix the bug. Signed-off-by: Joern Engel --- fs/logfs/readwrite.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 7a23b3e7c0a7..c3a3a6814b84 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -1594,7 +1594,6 @@ int logfs_delete(struct inode *inode, pgoff_t index, return ret; } -/* Rewrite cannot mark the inode dirty but has to write it immediatly. */ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, gc_level_t gc_level, long flags) { @@ -1611,6 +1610,18 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, if (level != 0) alloc_indirect_block(inode, page, 0); err = logfs_write_buf(inode, page, flags); + if (!err && shrink_level(gc_level) == 0) { + /* Rewrite cannot mark the inode dirty but has to + * write it immediatly. + * Q: Can't we just create an alias for the inode + * instead? And if not, why not? + */ + if (inode->i_ino == LOGFS_INO_MASTER) + logfs_write_anchor(inode->i_sb); + else { + err = __logfs_write_inode(inode, flags); + } + } } logfs_put_write_page(page); return err; From 81def6b9862764924a99ac1b680e73ac8c80ac64 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Sun, 28 Mar 2010 12:47:09 +0200 Subject: [PATCH 09/12] Simplify and fix pad_wbuf A comment in the old code read: /* The math in this function can surely use some love */ And indeed it did. In the case that area->a_used_bytes is exactly 4096 bytes below segment size it fell apart. pad_wbuf is now split into two helpers that are significantly less complicated. Signed-off-by: Joern Engel --- fs/logfs/segment.c | 52 ++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 1a14f9910d55..ff9d7f3b4d36 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -93,49 +93,57 @@ void __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, } while (len); } -/* - * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. - */ -static void pad_wbuf(struct logfs_area *area, int final) +static void pad_partial_page(struct logfs_area *area) { struct super_block *sb = area->a_sb; - struct logfs_super *super = logfs_super(sb); struct page *page; u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); pgoff_t index = ofs >> PAGE_SHIFT; long offset = ofs & (PAGE_SIZE-1); u32 len = PAGE_SIZE - offset; - if (len == PAGE_SIZE) { - /* The math in this function can surely use some love */ - len = 0; - } - if (len) { - BUG_ON(area->a_used_bytes >= super->s_segsize); - - page = get_mapping_page(area->a_sb, index, 0); + if (len % PAGE_SIZE) { + page = get_mapping_page(sb, index, 0); BUG_ON(!page); /* FIXME: reserve a pool */ memset(page_address(page) + offset, 0xff, len); SetPagePrivate(page); page_cache_release(page); } +} - if (!final) - return; +static void pad_full_pages(struct logfs_area *area) +{ + struct super_block *sb = area->a_sb; + struct logfs_super *super = logfs_super(sb); + u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); + u32 len = super->s_segsize - area->a_used_bytes; + pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT; + pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT; + struct page *page; - area->a_used_bytes += len; - for ( ; area->a_used_bytes < super->s_segsize; - area->a_used_bytes += PAGE_SIZE) { - /* Memset another page */ - index++; - page = get_mapping_page(area->a_sb, index, 0); + while (no_indizes) { + page = get_mapping_page(sb, index, 0); BUG_ON(!page); /* FIXME: reserve a pool */ - memset(page_address(page), 0xff, PAGE_SIZE); + SetPageUptodate(page); + memset(page_address(page), 0xff, PAGE_CACHE_SIZE); SetPagePrivate(page); page_cache_release(page); + index++; + no_indizes--; } } +/* + * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. + * Also make sure we allocate (and memset) all pages for final writeout. + */ +static void pad_wbuf(struct logfs_area *area, int final) +{ + pad_partial_page(area); + if (final) + pad_full_pages(area); +} + /* * We have to be careful with the alias tree. Since lookup is done by bix, * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with From 723b2ff40876678b49e61df34fb1d8001e34639d Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Sun, 28 Mar 2010 18:10:07 +0200 Subject: [PATCH 10/12] [LogFS] Clear PagePrivate when moving journal do_logfs_journal_wl_pass() must call freeseg(), thereby clear PagePrivate on all pages of the current journal segment. Signed-off-by: Joern Engel --- fs/logfs/journal.c | 1 + fs/logfs/logfs.h | 1 + fs/logfs/segment.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 6ad30a4c9052..15454ac7bd93 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -821,6 +821,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) logfs_set_segment_reserved(sb, segno); } /* Manually move journal_area */ + freeseg(sb, area->a_segno); area->a_segno = super->s_journal_seg[0]; area->a_is_open = 0; area->a_used_bytes = 0; diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 129779431373..b84b0eec6024 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -587,6 +587,7 @@ void move_page_to_btree(struct page *page); int logfs_init_mapping(struct super_block *sb); void logfs_sync_area(struct logfs_area *area); void logfs_sync_segments(struct super_block *sb); +void freeseg(struct super_block *sb, u32 segno); /* area handling */ int logfs_init_areas(struct super_block *sb); diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index ff9d7f3b4d36..0ecd8f07c11e 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -691,7 +691,7 @@ int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) return 0; } -static void freeseg(struct super_block *sb, u32 segno) +void freeseg(struct super_block *sb, u32 segno) { struct logfs_super *super = logfs_super(sb); struct address_space *mapping = super->s_mapping_inode->i_mapping; From 0943846ae05603efd98550f2d475e9c98191bde8 Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Mon, 29 Mar 2010 21:13:28 +0200 Subject: [PATCH 11/12] [LogFS] Move reserved segments with journal Fixes a GC livelock. Signed-off-by: Joern Engel --- fs/logfs/journal.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 15454ac7bd93..25b1345c4652 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -800,6 +800,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) { struct logfs_super *super = logfs_super(sb); struct logfs_area *area = super->s_journal_area; + struct btree_head32 *head = &super->s_reserved_segments; u32 segno, ec; int i, err; @@ -807,6 +808,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb) /* Drop old segments */ journal_for_each(i) if (super->s_journal_seg[i]) { + btree_remove32(head, super->s_journal_seg[i]); logfs_set_segment_unreserved(sb, super->s_journal_seg[i], super->s_journal_ec[i]); @@ -819,6 +821,8 @@ void do_logfs_journal_wl_pass(struct super_block *sb) super->s_journal_seg[i] = segno; super->s_journal_ec[i] = ec; logfs_set_segment_reserved(sb, segno); + err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); + BUG_ON(err); /* mempool should prevent this */ } /* Manually move journal_area */ freeseg(sb, area->a_segno); From 6be7fa06eb4d721df734bd0946b5e63b27c0589b Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Mon, 29 Mar 2010 21:14:52 +0200 Subject: [PATCH 12/12] [LogFS] Erase new journal segments If the device contains on old logfs image and the journal is moved to segment that have never been used by the current logfs and not all journal segments are erased before the next mount, the old content can confuse mount code. To prevent this, always erase the new journal segments. Signed-off-by: Joern Engel --- fs/logfs/journal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c index 25b1345c4652..d57c7b07b60b 100644 --- a/fs/logfs/journal.c +++ b/fs/logfs/journal.c @@ -823,6 +823,8 @@ void do_logfs_journal_wl_pass(struct super_block *sb) logfs_set_segment_reserved(sb, segno); err = btree_insert32(head, segno, (void *)1, GFP_KERNEL); BUG_ON(err); /* mempool should prevent this */ + err = logfs_erase_segment(sb, segno, 1); + BUG_ON(err); /* FIXME: remount-ro would be nicer */ } /* Manually move journal_area */ freeseg(sb, area->a_segno);