mirror of
https://github.com/torvalds/linux.git
synced 2024-11-15 08:31:55 +00:00
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: Remove automatic enabling of the HUGE_FILE feature flag ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback ext4: Update Documentation/filesystems/ext4.txt ext4: Remove unused mount options: nomballoc, mballoc, nocheck ext4: Remove compile warnings when building w/o CONFIG_PROC_FS ext4: Add missing newlines to printk messages ext4: Fix file fragmentation during large file write. vfs: Add no_nrwrite_index_update writeback control flag vfs: Remove the range_cont writeback mode. ext4: Use tag dirty lookup during mpage_da_submit_io ext4: let the block device know when unused blocks can be discarded ext4: Don't reuse released data blocks until transaction commits ext4: Use an rbtree for tracking blocks freed during transaction. ext4: Do mballoc init before doing filesystem recovery ext4: Free ext4_prealloc_space using kmem_cache_free ext4: Fix Kconfig typo for ext4dev ext4: Remove an old reference to ext4dev in Makefile comment
This commit is contained in:
commit
58617d5e59
@ -2,19 +2,24 @@
|
||||
Ext4 Filesystem
|
||||
===============
|
||||
|
||||
This is a development version of the ext4 filesystem, an advanced level
|
||||
of the ext3 filesystem which incorporates scalability and reliability
|
||||
enhancements for supporting large filesystems (64 bit) in keeping with
|
||||
increasing disk capacities and state-of-the-art feature requirements.
|
||||
Ext4 is an an advanced level of the ext3 filesystem which incorporates
|
||||
scalability and reliability enhancements for supporting large filesystems
|
||||
(64 bit) in keeping with increasing disk capacities and state-of-the-art
|
||||
feature requirements.
|
||||
|
||||
Mailing list: linux-ext4@vger.kernel.org
|
||||
Mailing list: linux-ext4@vger.kernel.org
|
||||
Web site: http://ext4.wiki.kernel.org
|
||||
|
||||
|
||||
1. Quick usage instructions:
|
||||
===========================
|
||||
|
||||
Note: More extensive information for getting started with ext4 can be
|
||||
found at the ext4 wiki site at the URL:
|
||||
http://ext4.wiki.kernel.org/index.php/Ext4_Howto
|
||||
|
||||
- Compile and install the latest version of e2fsprogs (as of this
|
||||
writing version 1.41) from:
|
||||
writing version 1.41.3) from:
|
||||
|
||||
http://sourceforge.net/project/showfiles.php?group_id=2406
|
||||
|
||||
@ -36,11 +41,9 @@ Mailing list: linux-ext4@vger.kernel.org
|
||||
|
||||
# mke2fs -t ext4 /dev/hda1
|
||||
|
||||
Or configure an existing ext3 filesystem to support extents and set
|
||||
the test_fs flag to indicate that it's ok for an in-development
|
||||
filesystem to touch this filesystem:
|
||||
Or to configure an existing ext3 filesystem to support extents:
|
||||
|
||||
# tune2fs -O extents -E test_fs /dev/hda1
|
||||
# tune2fs -O extents /dev/hda1
|
||||
|
||||
If the filesystem was created with 128 byte inodes, it can be
|
||||
converted to use 256 byte for greater efficiency via:
|
||||
@ -104,8 +107,8 @@ exist yet so I'm not sure they're in the near-term roadmap.
|
||||
The big performance win will come with mballoc, delalloc and flex_bg
|
||||
grouping of bitmaps and inode tables. Some test results available here:
|
||||
|
||||
- http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html
|
||||
- http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html
|
||||
- http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-write-2.6.27-rc1.html
|
||||
- http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-readwrite-2.6.27-rc1.html
|
||||
|
||||
3. Options
|
||||
==========
|
||||
@ -214,9 +217,6 @@ noreservation
|
||||
bsddf (*) Make 'df' act like BSD.
|
||||
minixdf Make 'df' act like Minix.
|
||||
|
||||
check=none Don't do extra checking of bitmaps on mount.
|
||||
nocheck
|
||||
|
||||
debug Extra debugging information is sent to syslog.
|
||||
|
||||
errors=remount-ro(*) Remount the filesystem read-only on an error.
|
||||
@ -253,8 +253,6 @@ nobh (a) cache disk block mapping information
|
||||
"nobh" option tries to avoid associating buffer
|
||||
heads (supported only for "writeback" mode).
|
||||
|
||||
mballoc (*) Use the multiple block allocator for block allocation
|
||||
nomballoc disabled multiple block allocator for block allocation.
|
||||
stripe=n Number of filesystem blocks that mballoc will try
|
||||
to use for allocation size and alignment. For RAID5/6
|
||||
systems this should be the number of data
|
||||
|
@ -160,7 +160,7 @@ config EXT4_FS
|
||||
filesystem initially.
|
||||
|
||||
To compile this file system support as a module, choose M here. The
|
||||
module will be called ext4dev.
|
||||
module will be called ext4.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
|
@ -71,7 +71,7 @@ obj-$(CONFIG_DLM) += dlm/
|
||||
# Do not add any filesystems before this line
|
||||
obj-$(CONFIG_REISERFS_FS) += reiserfs/
|
||||
obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
|
||||
obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev
|
||||
obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4
|
||||
obj-$(CONFIG_JBD) += jbd/
|
||||
obj-$(CONFIG_JBD2) += jbd2/
|
||||
obj-$(CONFIG_EXT2_FS) += ext2/
|
||||
|
@ -568,8 +568,16 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
|
||||
|
||||
/* this isn't the right place to decide whether block is metadata
|
||||
* inode.c/extents.c knows better, but for safety ... */
|
||||
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
|
||||
ext4_should_journal_data(inode))
|
||||
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
|
||||
metadata = 1;
|
||||
|
||||
/* We need to make sure we don't reuse
|
||||
* block released untill the transaction commit.
|
||||
* writeback mode have weak data consistency so
|
||||
* don't force data as metadata when freeing block
|
||||
* for writeback mode.
|
||||
*/
|
||||
if (metadata == 0 && !ext4_should_writeback_data(inode))
|
||||
metadata = 1;
|
||||
|
||||
sb = inode->i_sb;
|
||||
|
@ -511,7 +511,6 @@ do { \
|
||||
/*
|
||||
* Mount flags
|
||||
*/
|
||||
#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
|
||||
#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
|
||||
#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
|
||||
#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
|
||||
|
@ -99,9 +99,6 @@ struct ext4_sb_info {
|
||||
struct inode *s_buddy_cache;
|
||||
long s_blocks_reserved;
|
||||
spinlock_t s_reserve_lock;
|
||||
struct list_head s_active_transaction;
|
||||
struct list_head s_closed_transaction;
|
||||
struct list_head s_committed_transaction;
|
||||
spinlock_t s_md_lock;
|
||||
tid_t s_last_transaction;
|
||||
unsigned short *s_mb_offsets, *s_mb_maxs;
|
||||
|
141
fs/ext4/inode.c
141
fs/ext4/inode.c
@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
|
||||
int ret = 0, err, nr_pages, i;
|
||||
unsigned long index, end;
|
||||
struct pagevec pvec;
|
||||
long pages_skipped;
|
||||
|
||||
BUG_ON(mpd->next_page <= mpd->first_page);
|
||||
pagevec_init(&pvec, 0);
|
||||
@ -1655,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
|
||||
end = mpd->next_page - 1;
|
||||
|
||||
while (index <= end) {
|
||||
/* XXX: optimize tail */
|
||||
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
|
||||
/*
|
||||
* We can use PAGECACHE_TAG_DIRTY lookup here because
|
||||
* even though we have cleared the dirty flag on the page
|
||||
* We still keep the page in the radix tree with tag
|
||||
* PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
|
||||
* The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
|
||||
* which is called via the below writepage callback.
|
||||
*/
|
||||
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
|
||||
PAGECACHE_TAG_DIRTY,
|
||||
min(end - index,
|
||||
(pgoff_t)PAGEVEC_SIZE-1) + 1);
|
||||
if (nr_pages == 0)
|
||||
break;
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
|
||||
index = page->index;
|
||||
if (index > end)
|
||||
break;
|
||||
index++;
|
||||
|
||||
pages_skipped = mpd->wbc->pages_skipped;
|
||||
err = mapping->a_ops->writepage(page, mpd->wbc);
|
||||
if (!err)
|
||||
if (!err && (pages_skipped == mpd->wbc->pages_skipped))
|
||||
/*
|
||||
* have successfully written the page
|
||||
* without skipping the same
|
||||
*/
|
||||
mpd->pages_written++;
|
||||
/*
|
||||
* In error case, we have to continue because
|
||||
@ -2104,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc,
|
||||
struct mpage_da_data *mpd)
|
||||
{
|
||||
long to_write;
|
||||
int ret;
|
||||
|
||||
if (!mpd->get_block)
|
||||
@ -2119,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
|
||||
mpd->pages_written = 0;
|
||||
mpd->retval = 0;
|
||||
|
||||
to_write = wbc->nr_to_write;
|
||||
|
||||
ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
|
||||
|
||||
/*
|
||||
* Handle last extent of pages
|
||||
*/
|
||||
if (!mpd->io_done && mpd->next_page != mpd->first_page) {
|
||||
if (mpage_da_map_blocks(mpd) == 0)
|
||||
mpage_da_submit_io(mpd);
|
||||
}
|
||||
|
||||
wbc->nr_to_write = to_write - mpd->pages_written;
|
||||
mpd->io_done = 1;
|
||||
ret = MPAGE_DA_EXTENT_TAIL;
|
||||
}
|
||||
wbc->nr_to_write -= mpd->pages_written;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2360,12 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
|
||||
static int ext4_da_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
pgoff_t index;
|
||||
int range_whole = 0;
|
||||
handle_t *handle = NULL;
|
||||
loff_t range_start = 0;
|
||||
struct mpage_da_data mpd;
|
||||
struct inode *inode = mapping->host;
|
||||
int no_nrwrite_index_update;
|
||||
long pages_written = 0, pages_skipped;
|
||||
int needed_blocks, ret = 0, nr_to_writebump = 0;
|
||||
long to_write, pages_skipped = 0;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
|
||||
|
||||
/*
|
||||
@ -2385,23 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
|
||||
nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
|
||||
wbc->nr_to_write = sbi->s_mb_stream_request;
|
||||
}
|
||||
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
|
||||
range_whole = 1;
|
||||
|
||||
if (!wbc->range_cyclic)
|
||||
/*
|
||||
* If range_cyclic is not set force range_cont
|
||||
* and save the old writeback_index
|
||||
*/
|
||||
wbc->range_cont = 1;
|
||||
|
||||
range_start = wbc->range_start;
|
||||
pages_skipped = wbc->pages_skipped;
|
||||
if (wbc->range_cyclic)
|
||||
index = mapping->writeback_index;
|
||||
else
|
||||
index = wbc->range_start >> PAGE_CACHE_SHIFT;
|
||||
|
||||
mpd.wbc = wbc;
|
||||
mpd.inode = mapping->host;
|
||||
|
||||
restart_loop:
|
||||
to_write = wbc->nr_to_write;
|
||||
while (!ret && to_write > 0) {
|
||||
/*
|
||||
* we don't want write_cache_pages to update
|
||||
* nr_to_write and writeback_index
|
||||
*/
|
||||
no_nrwrite_index_update = wbc->no_nrwrite_index_update;
|
||||
wbc->no_nrwrite_index_update = 1;
|
||||
pages_skipped = wbc->pages_skipped;
|
||||
|
||||
while (!ret && wbc->nr_to_write > 0) {
|
||||
|
||||
/*
|
||||
* we insert one extent at a time. So we need
|
||||
@ -2422,48 +2436,53 @@ restart_loop:
|
||||
dump_stack();
|
||||
goto out_writepages;
|
||||
}
|
||||
to_write -= wbc->nr_to_write;
|
||||
|
||||
mpd.get_block = ext4_da_get_block_write;
|
||||
ret = mpage_da_writepages(mapping, wbc, &mpd);
|
||||
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
if (mpd.retval == -ENOSPC)
|
||||
if (mpd.retval == -ENOSPC) {
|
||||
/* commit the transaction which would
|
||||
* free blocks released in the transaction
|
||||
* and try again
|
||||
*/
|
||||
jbd2_journal_force_commit_nested(sbi->s_journal);
|
||||
|
||||
/* reset the retry count */
|
||||
if (ret == MPAGE_DA_EXTENT_TAIL) {
|
||||
wbc->pages_skipped = pages_skipped;
|
||||
ret = 0;
|
||||
} else if (ret == MPAGE_DA_EXTENT_TAIL) {
|
||||
/*
|
||||
* got one extent now try with
|
||||
* rest of the pages
|
||||
*/
|
||||
to_write += wbc->nr_to_write;
|
||||
pages_written += mpd.pages_written;
|
||||
wbc->pages_skipped = pages_skipped;
|
||||
ret = 0;
|
||||
} else if (wbc->nr_to_write) {
|
||||
} else if (wbc->nr_to_write)
|
||||
/*
|
||||
* There is no more writeout needed
|
||||
* or we requested for a noblocking writeout
|
||||
* and we found the device congested
|
||||
*/
|
||||
to_write += wbc->nr_to_write;
|
||||
break;
|
||||
}
|
||||
wbc->nr_to_write = to_write;
|
||||
}
|
||||
if (pages_skipped != wbc->pages_skipped)
|
||||
printk(KERN_EMERG "This should not happen leaving %s "
|
||||
"with nr_to_write = %ld ret = %d\n",
|
||||
__func__, wbc->nr_to_write, ret);
|
||||
|
||||
if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
|
||||
/* We skipped pages in this loop */
|
||||
wbc->range_start = range_start;
|
||||
wbc->nr_to_write = to_write +
|
||||
wbc->pages_skipped - pages_skipped;
|
||||
wbc->pages_skipped = pages_skipped;
|
||||
goto restart_loop;
|
||||
}
|
||||
/* Update index */
|
||||
index += pages_written;
|
||||
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
|
||||
/*
|
||||
* set the writeback_index so that range_cyclic
|
||||
* mode will write it back later
|
||||
*/
|
||||
mapping->writeback_index = index;
|
||||
|
||||
out_writepages:
|
||||
wbc->nr_to_write = to_write - nr_to_writebump;
|
||||
wbc->range_start = range_start;
|
||||
if (!no_nrwrite_index_update)
|
||||
wbc->no_nrwrite_index_update = 0;
|
||||
wbc->nr_to_write -= nr_to_writebump;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4175,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle,
|
||||
struct inode *inode = &(ei->vfs_inode);
|
||||
u64 i_blocks = inode->i_blocks;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
int err = 0;
|
||||
|
||||
if (i_blocks <= ~0U) {
|
||||
/*
|
||||
@ -4185,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle,
|
||||
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
|
||||
raw_inode->i_blocks_high = 0;
|
||||
ei->i_flags &= ~EXT4_HUGE_FILE_FL;
|
||||
} else if (i_blocks <= 0xffffffffffffULL) {
|
||||
return 0;
|
||||
}
|
||||
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
|
||||
return -EFBIG;
|
||||
|
||||
if (i_blocks <= 0xffffffffffffULL) {
|
||||
/*
|
||||
* i_blocks can be represented in a 48 bit variable
|
||||
* as multiple of 512 bytes
|
||||
*/
|
||||
err = ext4_update_rocompat_feature(handle, sb,
|
||||
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
|
||||
if (err)
|
||||
goto err_out;
|
||||
/* i_block is stored in the split 48 bit fields */
|
||||
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
|
||||
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
|
||||
ei->i_flags &= ~EXT4_HUGE_FILE_FL;
|
||||
} else {
|
||||
/*
|
||||
* i_blocks should be represented in a 48 bit variable
|
||||
* as multiple of file system block size
|
||||
*/
|
||||
err = ext4_update_rocompat_feature(handle, sb,
|
||||
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
|
||||
if (err)
|
||||
goto err_out;
|
||||
ei->i_flags |= EXT4_HUGE_FILE_FL;
|
||||
/* i_block is stored in file system block size */
|
||||
i_blocks = i_blocks >> (inode->i_blkbits - 9);
|
||||
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
|
||||
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
|
||||
}
|
||||
err_out:
|
||||
return err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
|
||||
meta_group_info[i]->bb_free_root.rb_node = NULL;;
|
||||
|
||||
#ifdef DOUBLE_CHECK
|
||||
{
|
||||
@ -2522,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
|
||||
}
|
||||
|
||||
spin_lock_init(&sbi->s_md_lock);
|
||||
INIT_LIST_HEAD(&sbi->s_active_transaction);
|
||||
INIT_LIST_HEAD(&sbi->s_closed_transaction);
|
||||
INIT_LIST_HEAD(&sbi->s_committed_transaction);
|
||||
spin_lock_init(&sbi->s_bal_lock);
|
||||
|
||||
sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
|
||||
@ -2553,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
|
||||
ext4_mb_init_per_dev_proc(sb);
|
||||
ext4_mb_history_init(sb);
|
||||
|
||||
sbi->s_journal->j_commit_callback = release_blocks_on_commit;
|
||||
|
||||
printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
|
||||
return 0;
|
||||
}
|
||||
@ -2568,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
|
||||
pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
|
||||
list_del(&pa->pa_group_list);
|
||||
count++;
|
||||
kfree(pa);
|
||||
kmem_cache_free(ext4_pspace_cachep, pa);
|
||||
}
|
||||
if (count)
|
||||
mb_debug("mballoc: %u PAs left\n", count);
|
||||
@ -2582,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
|
||||
struct ext4_group_info *grinfo;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
/* release freed, non-committed blocks */
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
list_splice_init(&sbi->s_closed_transaction,
|
||||
&sbi->s_committed_transaction);
|
||||
list_splice_init(&sbi->s_active_transaction,
|
||||
&sbi->s_committed_transaction);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
ext4_mb_free_committed_blocks(sb);
|
||||
|
||||
if (sbi->s_group_info) {
|
||||
for (i = 0; i < sbi->s_groups_count; i++) {
|
||||
grinfo = ext4_get_group_info(sb, i);
|
||||
@ -2644,61 +2635,57 @@ int ext4_mb_release(struct super_block *sb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline_for_stack void
|
||||
ext4_mb_free_committed_blocks(struct super_block *sb)
|
||||
/*
|
||||
* This function is called by the jbd2 layer once the commit has finished,
|
||||
* so we know we can free the blocks that were released with that commit.
|
||||
*/
|
||||
static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
int err;
|
||||
int i;
|
||||
int count = 0;
|
||||
int count2 = 0;
|
||||
struct ext4_free_metadata *md;
|
||||
struct super_block *sb = journal->j_private;
|
||||
struct ext4_buddy e4b;
|
||||
struct ext4_group_info *db;
|
||||
int err, count = 0, count2 = 0;
|
||||
struct ext4_free_data *entry;
|
||||
ext4_fsblk_t discard_block;
|
||||
struct list_head *l, *ltmp;
|
||||
|
||||
if (list_empty(&sbi->s_committed_transaction))
|
||||
return;
|
||||
|
||||
/* there is committed blocks to be freed yet */
|
||||
do {
|
||||
/* get next array of blocks */
|
||||
md = NULL;
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
if (!list_empty(&sbi->s_committed_transaction)) {
|
||||
md = list_entry(sbi->s_committed_transaction.next,
|
||||
struct ext4_free_metadata, list);
|
||||
list_del(&md->list);
|
||||
}
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
|
||||
if (md == NULL)
|
||||
break;
|
||||
list_for_each_safe(l, ltmp, &txn->t_private_list) {
|
||||
entry = list_entry(l, struct ext4_free_data, list);
|
||||
|
||||
mb_debug("gonna free %u blocks in group %lu (0x%p):",
|
||||
md->num, md->group, md);
|
||||
entry->count, entry->group, entry);
|
||||
|
||||
err = ext4_mb_load_buddy(sb, md->group, &e4b);
|
||||
err = ext4_mb_load_buddy(sb, entry->group, &e4b);
|
||||
/* we expect to find existing buddy because it's pinned */
|
||||
BUG_ON(err != 0);
|
||||
|
||||
db = e4b.bd_info;
|
||||
/* there are blocks to put in buddy to make them really free */
|
||||
count += md->num;
|
||||
count += entry->count;
|
||||
count2++;
|
||||
ext4_lock_group(sb, md->group);
|
||||
for (i = 0; i < md->num; i++) {
|
||||
mb_debug(" %u", md->blocks[i]);
|
||||
mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
|
||||
ext4_lock_group(sb, entry->group);
|
||||
/* Take it out of per group rb tree */
|
||||
rb_erase(&entry->node, &(db->bb_free_root));
|
||||
mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
|
||||
|
||||
if (!db->bb_free_root.rb_node) {
|
||||
/* No more items in the per group rb tree
|
||||
* balance refcounts from ext4_mb_free_metadata()
|
||||
*/
|
||||
page_cache_release(e4b.bd_buddy_page);
|
||||
page_cache_release(e4b.bd_bitmap_page);
|
||||
}
|
||||
mb_debug("\n");
|
||||
ext4_unlock_group(sb, md->group);
|
||||
ext4_unlock_group(sb, entry->group);
|
||||
discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
|
||||
+ entry->start_blk
|
||||
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
|
||||
trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
|
||||
(unsigned long long) discard_block, entry->count);
|
||||
sb_issue_discard(sb, discard_block, entry->count);
|
||||
|
||||
/* balance refcounts from ext4_mb_free_metadata() */
|
||||
page_cache_release(e4b.bd_buddy_page);
|
||||
page_cache_release(e4b.bd_bitmap_page);
|
||||
|
||||
kfree(md);
|
||||
kmem_cache_free(ext4_free_ext_cachep, entry);
|
||||
ext4_mb_release_desc(&e4b);
|
||||
|
||||
} while (md);
|
||||
}
|
||||
|
||||
mb_debug("freed %u blocks in %u structures\n", count, count2);
|
||||
}
|
||||
@ -2712,6 +2699,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
|
||||
|
||||
static int ext4_mb_init_per_dev_proc(struct super_block *sb)
|
||||
{
|
||||
#ifdef CONFIG_PROC_FS
|
||||
mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct proc_dir_entry *proc;
|
||||
@ -2735,10 +2723,14 @@ err_out:
|
||||
remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
|
||||
return -ENOMEM;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
|
||||
{
|
||||
#ifdef CONFIG_PROC_FS
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (sbi->s_proc == NULL)
|
||||
@ -2750,7 +2742,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
|
||||
remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
|
||||
remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
|
||||
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2771,6 +2763,16 @@ int __init init_ext4_mballoc(void)
|
||||
kmem_cache_destroy(ext4_pspace_cachep);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ext4_free_ext_cachep =
|
||||
kmem_cache_create("ext4_free_block_extents",
|
||||
sizeof(struct ext4_free_data),
|
||||
0, SLAB_RECLAIM_ACCOUNT, NULL);
|
||||
if (ext4_free_ext_cachep == NULL) {
|
||||
kmem_cache_destroy(ext4_pspace_cachep);
|
||||
kmem_cache_destroy(ext4_ac_cachep);
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2779,6 +2781,7 @@ void exit_ext4_mballoc(void)
|
||||
/* XXX: synchronize_rcu(); */
|
||||
kmem_cache_destroy(ext4_pspace_cachep);
|
||||
kmem_cache_destroy(ext4_ac_cachep);
|
||||
kmem_cache_destroy(ext4_free_ext_cachep);
|
||||
}
|
||||
|
||||
|
||||
@ -4324,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
|
||||
goto out1;
|
||||
}
|
||||
|
||||
ext4_mb_poll_new_transaction(sb, handle);
|
||||
|
||||
*errp = ext4_mb_initialize_context(ac, ar);
|
||||
if (*errp) {
|
||||
ar->len = 0;
|
||||
@ -4384,35 +4385,20 @@ out1:
|
||||
|
||||
return block;
|
||||
}
|
||||
static void ext4_mb_poll_new_transaction(struct super_block *sb,
|
||||
handle_t *handle)
|
||||
|
||||
/*
|
||||
* We can merge two free data extents only if the physical blocks
|
||||
* are contiguous, AND the extents were freed by the same transaction,
|
||||
* AND the blocks are associated with the same group.
|
||||
*/
|
||||
static int can_merge(struct ext4_free_data *entry1,
|
||||
struct ext4_free_data *entry2)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
|
||||
if (sbi->s_last_transaction == handle->h_transaction->t_tid)
|
||||
return;
|
||||
|
||||
/* new transaction! time to close last one and free blocks for
|
||||
* committed transaction. we know that only transaction can be
|
||||
* active, so previos transaction can be being logged and we
|
||||
* know that transaction before previous is known to be already
|
||||
* logged. this means that now we may free blocks freed in all
|
||||
* transactions before previous one. hope I'm clear enough ... */
|
||||
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
|
||||
mb_debug("new transaction %lu, old %lu\n",
|
||||
(unsigned long) handle->h_transaction->t_tid,
|
||||
(unsigned long) sbi->s_last_transaction);
|
||||
list_splice_init(&sbi->s_closed_transaction,
|
||||
&sbi->s_committed_transaction);
|
||||
list_splice_init(&sbi->s_active_transaction,
|
||||
&sbi->s_closed_transaction);
|
||||
sbi->s_last_transaction = handle->h_transaction->t_tid;
|
||||
}
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
|
||||
ext4_mb_free_committed_blocks(sb);
|
||||
if ((entry1->t_tid == entry2->t_tid) &&
|
||||
(entry1->group == entry2->group) &&
|
||||
((entry1->start_blk + entry1->count) == entry2->start_blk))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline_for_stack int
|
||||
@ -4422,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
|
||||
struct ext4_group_info *db = e4b->bd_info;
|
||||
struct super_block *sb = e4b->bd_sb;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_free_metadata *md;
|
||||
int i;
|
||||
struct ext4_free_data *entry, *new_entry;
|
||||
struct rb_node **n = &db->bb_free_root.rb_node, *node;
|
||||
struct rb_node *parent = NULL, *new_node;
|
||||
|
||||
|
||||
BUG_ON(e4b->bd_bitmap_page == NULL);
|
||||
BUG_ON(e4b->bd_buddy_page == NULL);
|
||||
|
||||
new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
|
||||
new_entry->start_blk = block;
|
||||
new_entry->group = group;
|
||||
new_entry->count = count;
|
||||
new_entry->t_tid = handle->h_transaction->t_tid;
|
||||
new_node = &new_entry->node;
|
||||
|
||||
ext4_lock_group(sb, group);
|
||||
for (i = 0; i < count; i++) {
|
||||
md = db->bb_md_cur;
|
||||
if (md && db->bb_tid != handle->h_transaction->t_tid) {
|
||||
db->bb_md_cur = NULL;
|
||||
md = NULL;
|
||||
}
|
||||
|
||||
if (md == NULL) {
|
||||
ext4_unlock_group(sb, group);
|
||||
md = kmalloc(sizeof(*md), GFP_NOFS);
|
||||
if (md == NULL)
|
||||
return -ENOMEM;
|
||||
md->num = 0;
|
||||
md->group = group;
|
||||
|
||||
ext4_lock_group(sb, group);
|
||||
if (db->bb_md_cur == NULL) {
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
list_add(&md->list, &sbi->s_active_transaction);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
/* protect buddy cache from being freed,
|
||||
* otherwise we'll refresh it from
|
||||
* on-disk bitmap and lose not-yet-available
|
||||
* blocks */
|
||||
page_cache_get(e4b->bd_buddy_page);
|
||||
page_cache_get(e4b->bd_bitmap_page);
|
||||
db->bb_md_cur = md;
|
||||
db->bb_tid = handle->h_transaction->t_tid;
|
||||
mb_debug("new md 0x%p for group %lu\n",
|
||||
md, md->group);
|
||||
} else {
|
||||
kfree(md);
|
||||
md = db->bb_md_cur;
|
||||
}
|
||||
}
|
||||
|
||||
BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS);
|
||||
md->blocks[md->num] = block + i;
|
||||
md->num++;
|
||||
if (md->num == EXT4_BB_MAX_BLOCKS) {
|
||||
/* no more space, put full container on a sb's list */
|
||||
db->bb_md_cur = NULL;
|
||||
if (!*n) {
|
||||
/* first free block exent. We need to
|
||||
protect buddy cache from being freed,
|
||||
* otherwise we'll refresh it from
|
||||
* on-disk bitmap and lose not-yet-available
|
||||
* blocks */
|
||||
page_cache_get(e4b->bd_buddy_page);
|
||||
page_cache_get(e4b->bd_bitmap_page);
|
||||
}
|
||||
while (*n) {
|
||||
parent = *n;
|
||||
entry = rb_entry(parent, struct ext4_free_data, node);
|
||||
if (block < entry->start_blk)
|
||||
n = &(*n)->rb_left;
|
||||
else if (block >= (entry->start_blk + entry->count))
|
||||
n = &(*n)->rb_right;
|
||||
else {
|
||||
ext4_error(sb, __func__,
|
||||
"Double free of blocks %d (%d %d)\n",
|
||||
block, entry->start_blk, entry->count);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(new_node, parent, n);
|
||||
rb_insert_color(new_node, &db->bb_free_root);
|
||||
|
||||
/* Now try to see the extent can be merged to left and right */
|
||||
node = rb_prev(new_node);
|
||||
if (node) {
|
||||
entry = rb_entry(node, struct ext4_free_data, node);
|
||||
if (can_merge(entry, new_entry)) {
|
||||
new_entry->start_blk = entry->start_blk;
|
||||
new_entry->count += entry->count;
|
||||
rb_erase(node, &(db->bb_free_root));
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
list_del(&entry->list);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
kmem_cache_free(ext4_free_ext_cachep, entry);
|
||||
}
|
||||
}
|
||||
|
||||
node = rb_next(new_node);
|
||||
if (node) {
|
||||
entry = rb_entry(node, struct ext4_free_data, node);
|
||||
if (can_merge(new_entry, entry)) {
|
||||
new_entry->count += entry->count;
|
||||
rb_erase(node, &(db->bb_free_root));
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
list_del(&entry->list);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
kmem_cache_free(ext4_free_ext_cachep, entry);
|
||||
}
|
||||
}
|
||||
/* Add the extent to transaction's private list */
|
||||
spin_lock(&sbi->s_md_lock);
|
||||
list_add(&new_entry->list, &handle->h_transaction->t_private_list);
|
||||
spin_unlock(&sbi->s_md_lock);
|
||||
ext4_unlock_group(sb, group);
|
||||
return 0;
|
||||
}
|
||||
@ -4500,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
|
||||
|
||||
*freed = 0;
|
||||
|
||||
ext4_mb_poll_new_transaction(sb, handle);
|
||||
|
||||
sbi = EXT4_SB(sb);
|
||||
es = EXT4_SB(sb)->s_es;
|
||||
if (block < le32_to_cpu(es->s_first_data_block) ||
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/marker.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
#include "group.h"
|
||||
@ -98,23 +100,29 @@
|
||||
|
||||
static struct kmem_cache *ext4_pspace_cachep;
|
||||
static struct kmem_cache *ext4_ac_cachep;
|
||||
static struct kmem_cache *ext4_free_ext_cachep;
|
||||
|
||||
#ifdef EXT4_BB_MAX_BLOCKS
|
||||
#undef EXT4_BB_MAX_BLOCKS
|
||||
#endif
|
||||
#define EXT4_BB_MAX_BLOCKS 30
|
||||
struct ext4_free_data {
|
||||
/* this links the free block information from group_info */
|
||||
struct rb_node node;
|
||||
|
||||
struct ext4_free_metadata {
|
||||
ext4_group_t group;
|
||||
unsigned short num;
|
||||
ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
|
||||
/* this links the free block information from ext4_sb_info */
|
||||
struct list_head list;
|
||||
|
||||
/* group which free block extent belongs */
|
||||
ext4_group_t group;
|
||||
|
||||
/* free block extent */
|
||||
ext4_grpblk_t start_blk;
|
||||
ext4_grpblk_t count;
|
||||
|
||||
/* transaction which freed this extent */
|
||||
tid_t t_tid;
|
||||
};
|
||||
|
||||
struct ext4_group_info {
|
||||
unsigned long bb_state;
|
||||
unsigned long bb_tid;
|
||||
struct ext4_free_metadata *bb_md_cur;
|
||||
struct rb_root bb_free_root;
|
||||
unsigned short bb_first_free;
|
||||
unsigned short bb_free;
|
||||
unsigned short bb_fragments;
|
||||
@ -261,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
|
||||
|
||||
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
|
||||
ext4_group_t group);
|
||||
static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
|
||||
static void ext4_mb_free_committed_blocks(struct super_block *);
|
||||
static void ext4_mb_return_to_preallocation(struct inode *inode,
|
||||
struct ext4_buddy *e4b, sector_t block,
|
||||
int count);
|
||||
@ -270,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *,
|
||||
struct super_block *, struct ext4_prealloc_space *pa);
|
||||
static int ext4_mb_init_per_dev_proc(struct super_block *sb);
|
||||
static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
|
||||
static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
|
||||
|
||||
|
||||
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
|
||||
|
132
fs/ext4/super.c
132
fs/ext4/super.c
@ -374,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
|
||||
*/
|
||||
}
|
||||
|
||||
int ext4_update_compat_feature(handle_t *handle,
|
||||
struct super_block *sb, __u32 compat)
|
||||
{
|
||||
int err = 0;
|
||||
if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
|
||||
err = ext4_journal_get_write_access(handle,
|
||||
EXT4_SB(sb)->s_sbh);
|
||||
if (err)
|
||||
return err;
|
||||
EXT4_SET_COMPAT_FEATURE(sb, compat);
|
||||
sb->s_dirt = 1;
|
||||
handle->h_sync = 1;
|
||||
BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
|
||||
"call ext4_journal_dirty_met adata");
|
||||
err = ext4_journal_dirty_metadata(handle,
|
||||
EXT4_SB(sb)->s_sbh);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int ext4_update_rocompat_feature(handle_t *handle,
|
||||
struct super_block *sb, __u32 rocompat)
|
||||
{
|
||||
int err = 0;
|
||||
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
|
||||
err = ext4_journal_get_write_access(handle,
|
||||
EXT4_SB(sb)->s_sbh);
|
||||
if (err)
|
||||
return err;
|
||||
EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
|
||||
sb->s_dirt = 1;
|
||||
handle->h_sync = 1;
|
||||
BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
|
||||
"call ext4_journal_dirty_met adata");
|
||||
err = ext4_journal_dirty_metadata(handle,
|
||||
EXT4_SB(sb)->s_sbh);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int ext4_update_incompat_feature(handle_t *handle,
|
||||
struct super_block *sb, __u32 incompat)
|
||||
{
|
||||
int err = 0;
|
||||
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
|
||||
err = ext4_journal_get_write_access(handle,
|
||||
EXT4_SB(sb)->s_sbh);
|
||||
if (err)
|
||||
return err;
|
||||
EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
|
||||
sb->s_dirt = 1;
|
||||
handle->h_sync = 1;
|
||||
BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
|
||||
"call ext4_journal_dirty_met adata");
|
||||
err = ext4_journal_dirty_metadata(handle,
|
||||
EXT4_SB(sb)->s_sbh);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Open the external journal device
|
||||
*/
|
||||
@ -904,7 +844,7 @@ static const struct export_operations ext4_export_ops = {
|
||||
enum {
|
||||
Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
|
||||
Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
|
||||
Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
|
||||
Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
|
||||
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
|
||||
Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
|
||||
Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
|
||||
@ -915,7 +855,7 @@ enum {
|
||||
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
|
||||
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
|
||||
Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
|
||||
Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc,
|
||||
Opt_stripe, Opt_delalloc, Opt_nodelalloc,
|
||||
Opt_inode_readahead_blks
|
||||
};
|
||||
|
||||
@ -933,8 +873,6 @@ static const match_table_t tokens = {
|
||||
{Opt_err_panic, "errors=panic"},
|
||||
{Opt_err_ro, "errors=remount-ro"},
|
||||
{Opt_nouid32, "nouid32"},
|
||||
{Opt_nocheck, "nocheck"},
|
||||
{Opt_nocheck, "check=none"},
|
||||
{Opt_debug, "debug"},
|
||||
{Opt_oldalloc, "oldalloc"},
|
||||
{Opt_orlov, "orlov"},
|
||||
@ -973,8 +911,6 @@ static const match_table_t tokens = {
|
||||
{Opt_extents, "extents"},
|
||||
{Opt_noextents, "noextents"},
|
||||
{Opt_i_version, "i_version"},
|
||||
{Opt_mballoc, "mballoc"},
|
||||
{Opt_nomballoc, "nomballoc"},
|
||||
{Opt_stripe, "stripe=%u"},
|
||||
{Opt_resize, "resize"},
|
||||
{Opt_delalloc, "delalloc"},
|
||||
@ -1073,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb,
|
||||
case Opt_nouid32:
|
||||
set_opt(sbi->s_mount_opt, NO_UID32);
|
||||
break;
|
||||
case Opt_nocheck:
|
||||
clear_opt(sbi->s_mount_opt, CHECK);
|
||||
break;
|
||||
case Opt_debug:
|
||||
set_opt(sbi->s_mount_opt, DEBUG);
|
||||
break;
|
||||
@ -1618,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb)
|
||||
if (block_bitmap < first_block || block_bitmap > last_block) {
|
||||
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
|
||||
"Block bitmap for group %lu not in group "
|
||||
"(block %llu)!", i, block_bitmap);
|
||||
"(block %llu)!\n", i, block_bitmap);
|
||||
return 0;
|
||||
}
|
||||
inode_bitmap = ext4_inode_bitmap(sb, gdp);
|
||||
if (inode_bitmap < first_block || inode_bitmap > last_block) {
|
||||
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
|
||||
"Inode bitmap for group %lu not in group "
|
||||
"(block %llu)!", i, inode_bitmap);
|
||||
"(block %llu)!\n", i, inode_bitmap);
|
||||
return 0;
|
||||
}
|
||||
inode_table = ext4_inode_table(sb, gdp);
|
||||
@ -1633,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb)
|
||||
inode_table + sbi->s_itb_per_group - 1 > last_block) {
|
||||
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
|
||||
"Inode table for group %lu not in group "
|
||||
"(block %llu)!", i, inode_table);
|
||||
"(block %llu)!\n", i, inode_table);
|
||||
return 0;
|
||||
}
|
||||
spin_lock(sb_bgl_lock(sbi, i));
|
||||
@ -1778,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
|
||||
*
|
||||
* Note, this does *not* consider any metadata overhead for vfs i_blocks.
|
||||
*/
|
||||
static loff_t ext4_max_size(int blkbits)
|
||||
static loff_t ext4_max_size(int blkbits, int has_huge_files)
|
||||
{
|
||||
loff_t res;
|
||||
loff_t upper_limit = MAX_LFS_FILESIZE;
|
||||
|
||||
/* small i_blocks in vfs inode? */
|
||||
if (sizeof(blkcnt_t) < sizeof(u64)) {
|
||||
if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
|
||||
/*
|
||||
* CONFIG_LSF is not enabled implies the inode
|
||||
* i_block represent total blocks in 512 bytes
|
||||
@ -1814,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits)
|
||||
* block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
|
||||
* We need to be 1 filesystem block less than the 2^48 sector limit.
|
||||
*/
|
||||
static loff_t ext4_max_bitmap_size(int bits)
|
||||
static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
|
||||
{
|
||||
loff_t res = EXT4_NDIR_BLOCKS;
|
||||
int meta_blocks;
|
||||
@ -1827,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits)
|
||||
* total number of 512 bytes blocks of the file
|
||||
*/
|
||||
|
||||
if (sizeof(blkcnt_t) < sizeof(u64)) {
|
||||
if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
|
||||
/*
|
||||
* CONFIG_LSF is not enabled implies the inode
|
||||
* i_block represent total blocks in 512 bytes
|
||||
* 32 == size of vfs inode i_blocks * 8
|
||||
* !has_huge_files or CONFIG_LSF is not enabled
|
||||
* implies the inode i_block represent total blocks in
|
||||
* 512 bytes 32 == size of vfs inode i_blocks * 8
|
||||
*/
|
||||
upper_limit = (1LL << 32) - 1;
|
||||
|
||||
@ -1940,7 +1873,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
int blocksize;
|
||||
int db_count;
|
||||
int i;
|
||||
int needs_recovery;
|
||||
int needs_recovery, has_huge_files;
|
||||
__le32 features;
|
||||
__u64 blocks_count;
|
||||
int err;
|
||||
@ -2081,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sb->s_id, le32_to_cpu(features));
|
||||
goto failed_mount;
|
||||
}
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
|
||||
has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
|
||||
if (has_huge_files) {
|
||||
/*
|
||||
* Large file size enabled file system can only be
|
||||
* mount if kernel is build with CONFIG_LSF
|
||||
@ -2131,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
}
|
||||
}
|
||||
|
||||
sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits);
|
||||
sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
|
||||
sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
|
||||
has_huge_files);
|
||||
sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
|
||||
|
||||
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
|
||||
sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
|
||||
@ -2456,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
"available.\n");
|
||||
}
|
||||
|
||||
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
|
||||
printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
|
||||
"requested data journaling mode\n");
|
||||
clear_opt(sbi->s_mount_opt, DELALLOC);
|
||||
} else if (test_opt(sb, DELALLOC))
|
||||
printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
|
||||
|
||||
ext4_ext_init(sb);
|
||||
err = ext4_mb_init(sb, needs_recovery);
|
||||
if (err) {
|
||||
printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
|
||||
err);
|
||||
goto failed_mount4;
|
||||
}
|
||||
|
||||
/*
|
||||
* akpm: core read_super() calls in here with the superblock locked.
|
||||
* That deadlocks, because orphan cleanup needs to lock the superblock
|
||||
@ -2475,21 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
|
||||
"writeback");
|
||||
|
||||
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
|
||||
printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
|
||||
"requested data journaling mode\n");
|
||||
clear_opt(sbi->s_mount_opt, DELALLOC);
|
||||
} else if (test_opt(sb, DELALLOC))
|
||||
printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
|
||||
|
||||
ext4_ext_init(sb);
|
||||
err = ext4_mb_init(sb, needs_recovery);
|
||||
if (err) {
|
||||
printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
|
||||
err);
|
||||
goto failed_mount4;
|
||||
}
|
||||
|
||||
lock_kernel();
|
||||
return 0;
|
||||
|
||||
|
@ -995,6 +995,9 @@ restart_loop:
|
||||
}
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
|
||||
if (journal->j_commit_callback)
|
||||
journal->j_commit_callback(journal, commit_transaction);
|
||||
|
||||
trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
|
||||
journal->j_devname, commit_transaction->t_tid,
|
||||
journal->j_tail_sequence);
|
||||
|
@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
|
||||
transaction->t_expires = jiffies + journal->j_commit_interval;
|
||||
spin_lock_init(&transaction->t_handle_lock);
|
||||
INIT_LIST_HEAD(&transaction->t_inode_list);
|
||||
INIT_LIST_HEAD(&transaction->t_private_list);
|
||||
|
||||
/* Set up the commit timer for the new transaction. */
|
||||
journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
|
||||
|
@ -641,6 +641,11 @@ struct transaction_s
|
||||
*/
|
||||
int t_handle_count;
|
||||
|
||||
/*
|
||||
* For use by the filesystem to store fs-specific data
|
||||
* structures associated with the transaction
|
||||
*/
|
||||
struct list_head t_private_list;
|
||||
};
|
||||
|
||||
struct transaction_run_stats_s {
|
||||
@ -935,6 +940,10 @@ struct journal_s
|
||||
|
||||
pid_t j_last_sync_writer;
|
||||
|
||||
/* This function is called when a transaction is closed */
|
||||
void (*j_commit_callback)(journal_t *,
|
||||
transaction_t *);
|
||||
|
||||
/*
|
||||
* Journal statistics
|
||||
*/
|
||||
|
@ -63,7 +63,15 @@ struct writeback_control {
|
||||
unsigned for_writepages:1; /* This is a writepages() call */
|
||||
unsigned range_cyclic:1; /* range_start is cyclic */
|
||||
unsigned more_io:1; /* more io to be dispatched */
|
||||
unsigned range_cont:1;
|
||||
/*
|
||||
* write_cache_pages() won't update wbc->nr_to_write and
|
||||
* mapping->writeback_index if no_nrwrite_index_update
|
||||
* is set. write_cache_pages() may write more than we
|
||||
* requested and we want to make sure nr_to_write and
|
||||
* writeback_index are updated in a consistent manner
|
||||
* so we use a single control to update them
|
||||
*/
|
||||
unsigned no_nrwrite_index_update:1;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -876,6 +876,7 @@ int write_cache_pages(struct address_space *mapping,
|
||||
pgoff_t end; /* Inclusive */
|
||||
int scanned = 0;
|
||||
int range_whole = 0;
|
||||
long nr_to_write = wbc->nr_to_write;
|
||||
|
||||
if (wbc->nonblocking && bdi_write_congested(bdi)) {
|
||||
wbc->encountered_congestion = 1;
|
||||
@ -939,7 +940,7 @@ retry:
|
||||
unlock_page(page);
|
||||
ret = 0;
|
||||
}
|
||||
if (ret || (--(wbc->nr_to_write) <= 0))
|
||||
if (ret || (--nr_to_write <= 0))
|
||||
done = 1;
|
||||
if (wbc->nonblocking && bdi_write_congested(bdi)) {
|
||||
wbc->encountered_congestion = 1;
|
||||
@ -958,11 +959,12 @@ retry:
|
||||
index = 0;
|
||||
goto retry;
|
||||
}
|
||||
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
|
||||
mapping->writeback_index = index;
|
||||
if (!wbc->no_nrwrite_index_update) {
|
||||
if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
|
||||
mapping->writeback_index = index;
|
||||
wbc->nr_to_write = nr_to_write;
|
||||
}
|
||||
|
||||
if (wbc->range_cont)
|
||||
wbc->range_start = index << PAGE_CACHE_SHIFT;
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(write_cache_pages);
|
||||
|
Loading…
Reference in New Issue
Block a user