Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix data corruption regression by reverting commit 6de9843dab
  ext4: Allow indirect-block file to grow the file size to max file size
  ext4: allow an active handle to be started when freezing
  ext4: sync the directory inode in ext4_sync_parent()
  ext4: init timer earlier to avoid a kernel panic in __save_error_info
  jbd2: fix potential memory leak on transaction commit
  ext4: fix a double free in ext4_register_li_request
  ext4: fix credits computing for indirect mapped files
  ext4: remove unnecessary [cm]time update of quota file
  jbd2: move bdget out of critical section
This commit is contained in:
Linus Torvalds 2011-04-11 15:45:47 -07:00
commit a97b52022a
6 changed files with 102 additions and 35 deletions

View File

@ -86,8 +86,8 @@
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
/* Amount of blocks needed for quota update - we know that the structure was /* Amount of blocks needed for quota update - we know that the structure was
* allocated so we need to update only inode+data */ * allocated so we need to update only data block */
#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) #define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0)
/* Amount of blocks needed for quota insert/delete - we do some block writes /* Amount of blocks needed for quota insert/delete - we do some block writes
* but inode, sb and group updates are done only once */ * but inode, sb and group updates are done only once */
#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\

View File

@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode)
* the parent directory's parent as well, and so on recursively, if * the parent directory's parent as well, and so on recursively, if
* they are also freshly created. * they are also freshly created.
*/ */
static void ext4_sync_parent(struct inode *inode) static int ext4_sync_parent(struct inode *inode)
{ {
struct writeback_control wbc;
struct dentry *dentry = NULL; struct dentry *dentry = NULL;
int ret = 0;
while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {
ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY);
@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode)
if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode)
break; break;
inode = dentry->d_parent->d_inode; inode = dentry->d_parent->d_inode;
sync_mapping_buffers(inode->i_mapping); ret = sync_mapping_buffers(inode->i_mapping);
if (ret)
break;
memset(&wbc, 0, sizeof(wbc));
wbc.sync_mode = WB_SYNC_ALL;
wbc.nr_to_write = 0; /* only write out the inode */
ret = sync_inode(inode, &wbc);
if (ret)
break;
} }
return ret;
} }
/* /*
@ -176,7 +187,7 @@ int ext4_sync_file(struct file *file, int datasync)
if (!journal) { if (!journal) {
ret = generic_file_fsync(file, datasync); ret = generic_file_fsync(file, datasync);
if (!ret && !list_empty(&inode->i_dentry)) if (!ret && !list_empty(&inode->i_dentry))
ext4_sync_parent(inode); ret = ext4_sync_parent(inode);
goto out; goto out;
} }

View File

@ -2502,6 +2502,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
* for partial write. * for partial write.
*/ */
set_buffer_new(bh); set_buffer_new(bh);
set_buffer_mapped(bh);
} }
return 0; return 0;
} }
@ -4429,8 +4430,8 @@ void ext4_truncate(struct inode *inode)
Indirect chain[4]; Indirect chain[4];
Indirect *partial; Indirect *partial;
__le32 nr = 0; __le32 nr = 0;
int n; int n = 0;
ext4_lblk_t last_block; ext4_lblk_t last_block, max_block;
unsigned blocksize = inode->i_sb->s_blocksize; unsigned blocksize = inode->i_sb->s_blocksize;
trace_ext4_truncate_enter(inode); trace_ext4_truncate_enter(inode);
@ -4455,14 +4456,18 @@ void ext4_truncate(struct inode *inode)
last_block = (inode->i_size + blocksize-1) last_block = (inode->i_size + blocksize-1)
>> EXT4_BLOCK_SIZE_BITS(inode->i_sb); >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
if (inode->i_size & (blocksize - 1)) if (inode->i_size & (blocksize - 1))
if (ext4_block_truncate_page(handle, mapping, inode->i_size)) if (ext4_block_truncate_page(handle, mapping, inode->i_size))
goto out_stop; goto out_stop;
n = ext4_block_to_path(inode, last_block, offsets, NULL); if (last_block != max_block) {
if (n == 0) n = ext4_block_to_path(inode, last_block, offsets, NULL);
goto out_stop; /* error */ if (n == 0)
goto out_stop; /* error */
}
/* /*
* OK. This truncate is going to happen. We add the inode to the * OK. This truncate is going to happen. We add the inode to the
@ -4493,7 +4498,13 @@ void ext4_truncate(struct inode *inode)
*/ */
ei->i_disksize = inode->i_size; ei->i_disksize = inode->i_size;
if (n == 1) { /* direct blocks */ if (last_block == max_block) {
/*
* It is unnecessary to free any data blocks if last_block is
* equal to the indirect block limit.
*/
goto out_unlock;
} else if (n == 1) { /* direct blocks */
ext4_free_data(handle, inode, NULL, i_data+offsets[0], ext4_free_data(handle, inode, NULL, i_data+offsets[0],
i_data + EXT4_NDIR_BLOCKS); i_data + EXT4_NDIR_BLOCKS);
goto do_indirects; goto do_indirects;
@ -4553,6 +4564,7 @@ do_indirects:
; ;
} }
out_unlock:
up_write(&ei->i_data_sem); up_write(&ei->i_data_sem);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode); inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
@ -5398,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
/* if nrblocks are contiguous */ /* if nrblocks are contiguous */
if (chunk) { if (chunk) {
/* /*
* With N contiguous data blocks, it need at most * With N contiguous data blocks, we need at most
* N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
* 2 dindirect blocks * 2 dindirect blocks, and 1 tindirect block
* 1 tindirect block
*/ */
indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); return DIV_ROUND_UP(nrblocks,
return indirects + 3; EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
} }
/* /*
* if nrblocks are not contiguous, worse case, each block touch * if nrblocks are not contiguous, worse case, each block touch

View File

@ -242,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle)
* journal_end calls result in the superblock being marked dirty, so * journal_end calls result in the superblock being marked dirty, so
* that sync() will call the filesystem's write_super callback if * that sync() will call the filesystem's write_super callback if
* appropriate. * appropriate.
*
* To avoid j_barrier hold in userspace when a user calls freeze(),
* ext4 prevents a new handle from being started by s_frozen, which
* is in an upper layer.
*/ */
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
{ {
journal_t *journal; journal_t *journal;
handle_t *handle;
if (sb->s_flags & MS_RDONLY) if (sb->s_flags & MS_RDONLY)
return ERR_PTR(-EROFS); return ERR_PTR(-EROFS);
vfs_check_frozen(sb, SB_FREEZE_TRANS);
/* Special case here: if the journal has aborted behind our
* backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly. */
journal = EXT4_SB(sb)->s_journal; journal = EXT4_SB(sb)->s_journal;
if (journal) { handle = ext4_journal_current_handle();
if (is_journal_aborted(journal)) {
ext4_abort(sb, "Detected aborted journal"); /*
return ERR_PTR(-EROFS); * If a handle has been started, it should be allowed to
} * finish, otherwise deadlock could happen between freeze
return jbd2_journal_start(journal, nblocks); * and others(e.g. truncate) due to the restart of the
* journal handle if the filesystem is forzen and active
* handles are not stopped.
*/
if (!handle)
vfs_check_frozen(sb, SB_FREEZE_TRANS);
if (!journal)
return ext4_get_nojournal();
/*
* Special case here: if the journal has aborted behind our
* backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly.
*/
if (is_journal_aborted(journal)) {
ext4_abort(sb, "Detected aborted journal");
return ERR_PTR(-EROFS);
} }
return ext4_get_nojournal(); return jbd2_journal_start(journal, nblocks);
} }
/* /*
@ -2975,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb,
mutex_unlock(&ext4_li_info->li_list_mtx); mutex_unlock(&ext4_li_info->li_list_mtx);
sbi->s_li_request = elr; sbi->s_li_request = elr;
/*
* set elr to NULL here since it has been inserted to
* the request_list and the removal and free of it is
* handled by ext4_clear_request_list from now on.
*/
elr = NULL;
if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
ret = ext4_run_lazyinit_thread(); ret = ext4_run_lazyinit_thread();
@ -3385,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
get_random_bytes(&sbi->s_next_generation, sizeof(u32)); get_random_bytes(&sbi->s_next_generation, sizeof(u32));
spin_lock_init(&sbi->s_next_gen_lock); spin_lock_init(&sbi->s_next_gen_lock);
init_timer(&sbi->s_err_report);
sbi->s_err_report.function = print_daily_error_info;
sbi->s_err_report.data = (unsigned long) sb;
err = percpu_counter_init(&sbi->s_freeblocks_counter, err = percpu_counter_init(&sbi->s_freeblocks_counter,
ext4_count_free_blocks(sb)); ext4_count_free_blocks(sb));
if (!err) { if (!err) {
@ -3646,9 +3673,6 @@ no_journal:
"Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
*sbi->s_es->s_mount_opts ? "; " : "", orig_data); *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
init_timer(&sbi->s_err_report);
sbi->s_err_report.function = print_daily_error_info;
sbi->s_err_report.data = (unsigned long) sb;
if (es->s_error_count) if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
@ -3672,6 +3696,7 @@ failed_mount_wq:
sbi->s_journal = NULL; sbi->s_journal = NULL;
} }
failed_mount3: failed_mount3:
del_timer(&sbi->s_err_report);
if (sbi->s_flex_groups) { if (sbi->s_flex_groups) {
if (is_vmalloc_addr(sbi->s_flex_groups)) if (is_vmalloc_addr(sbi->s_flex_groups))
vfree(sbi->s_flex_groups); vfree(sbi->s_flex_groups);
@ -4138,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
/* /*
* LVM calls this function before a (read-only) snapshot is created. This * LVM calls this function before a (read-only) snapshot is created. This
* gives us a chance to flush the journal completely and mark the fs clean. * gives us a chance to flush the journal completely and mark the fs clean.
*
* Note that only this function cannot bring a filesystem to be in a clean
* state independently, because ext4 prevents a new handle from being started
* by @sb->s_frozen, which stays in an upper layer. It thus needs help from
* the upper layer.
*/ */
static int ext4_freeze(struct super_block *sb) static int ext4_freeze(struct super_block *sb)
{ {
@ -4614,11 +4644,24 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
static int ext4_quota_off(struct super_block *sb, int type) static int ext4_quota_off(struct super_block *sb, int type)
{ {
struct inode *inode = sb_dqopt(sb)->files[type];
handle_t *handle;
/* Force all delayed allocation blocks to be allocated. /* Force all delayed allocation blocks to be allocated.
* Caller already holds s_umount sem */ * Caller already holds s_umount sem */
if (test_opt(sb, DELALLOC)) if (test_opt(sb, DELALLOC))
sync_filesystem(sb); sync_filesystem(sb);
/* Update modification times of quota files when userspace can
* start looking at them */
handle = ext4_journal_start(inode, 1);
if (IS_ERR(handle))
goto out;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
out:
return dquot_quota_off(sb, type); return dquot_quota_off(sb, type);
} }
@ -4714,9 +4757,8 @@ out:
if (inode->i_size < off + len) { if (inode->i_size < off + len) {
i_size_write(inode, off + len); i_size_write(inode, off + len);
EXT4_I(inode)->i_disksize = inode->i_size; EXT4_I(inode)->i_disksize = inode->i_size;
ext4_mark_inode_dirty(handle, inode);
} }
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
ext4_mark_inode_dirty(handle, inode);
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
return len; return len;
} }

View File

@ -105,6 +105,8 @@ static int journal_submit_commit_record(journal_t *journal,
int ret; int ret;
struct timespec now = current_kernel_time(); struct timespec now = current_kernel_time();
*cbh = NULL;
if (is_journal_aborted(journal)) if (is_journal_aborted(journal))
return 0; return 0;
@ -806,7 +808,7 @@ wait_for_iobuf:
if (err) if (err)
__jbd2_journal_abort_hard(journal); __jbd2_journal_abort_hard(journal);
} }
if (!err && !is_journal_aborted(journal)) if (cbh)
err = journal_wait_on_commit_record(journal, cbh); err = journal_wait_on_commit_record(journal, cbh);
if (JBD2_HAS_INCOMPAT_FEATURE(journal, if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&

View File

@ -2413,10 +2413,12 @@ const char *jbd2_dev_to_name(dev_t device)
new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL);
if (!new_dev) if (!new_dev)
return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ return "NODEV-ALLOCFAILURE"; /* Something non-NULL */
bd = bdget(device);
spin_lock(&devname_cache_lock); spin_lock(&devname_cache_lock);
if (devcache[i]) { if (devcache[i]) {
if (devcache[i]->device == device) { if (devcache[i]->device == device) {
kfree(new_dev); kfree(new_dev);
bdput(bd);
ret = devcache[i]->devname; ret = devcache[i]->devname;
spin_unlock(&devname_cache_lock); spin_unlock(&devname_cache_lock);
return ret; return ret;
@ -2425,7 +2427,6 @@ const char *jbd2_dev_to_name(dev_t device)
} }
devcache[i] = new_dev; devcache[i] = new_dev;
devcache[i]->device = device; devcache[i]->device = device;
bd = bdget(device);
if (bd) { if (bd) {
bdevname(bd, devcache[i]->devname); bdevname(bd, devcache[i]->devname);
bdput(bd); bdput(bd);