for-5.11-rc2-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAl/0cI8ACgkQxWXV+ddt
 WDspQw/8DcC8zhGgunk0m2kcXd6dFOGbsr3hNGCsgUSKESRw6AgTZ0rJf/QLjayF
 /vaJWzQW9ijfZ92fWZS+mrmskk0N8RFOsEvkCRLesgRaasbrkchLBo5HGQasOBEV
 LXyU878GrBkNaHzClJz+JdU26i0d17BFdddgtZVQ1St9Wd9ecc7Q6iqG80RWFeE7
 uVbhv+QjocM3EieOnwIy5Mz6jZgJLYwqw7/y2njKduBeJtbt1K1j/y7IJk0WFMUM
 8eUpDL6vlAHB8FjV2wWOzO46bbEaUpaBADM6yabrq0lnM0kr7Rb+WV/WSLM/AZ3g
 Hzs4qROOEP+zjfZ5nYjJQDJRMpSipZomsUY5uMZnhRxlZuHPaoBotRRzs5AIZYj2
 BnkfucOcjxS/JTBD//ltJXE8RxbMIyMBBBipbBwqmxOkR9gM9BPuJ6iJPfUX//gG
 1GHJ+FPns8ua3JW21ih6H31xNEPS36tsywvE8yCEtEWMxCFCBwgGu+4D8KpGBjtY
 ySFxkxxAbTuFi9fqSE/mBC+6lpbVTO0OvizuoEQh8C2izkXRbDsDVgPN8d7rCW7h
 Cdox4DUp61sNf+G3ll9Dv9ceAXroZTVRTHGjlav6NAFpydz3yPo5x54Ex7S+k3oN
 BAcZEl1Tl3hz4WxF8Ywc+yJ8n8l9AVa3KcYRXVbyVjTGg+JjU94=
 =jlQf
 -----END PGP SIGNATURE-----

Merge tag 'for-5.11-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "A few more fixes that arrived before the end of the year:

   - a bunch of fixes related to transaction handle lifetime wrt various
     operations (umount, remount, qgroup scan, orphan cleanup)

   - async discard scheduling fixes

   - fix item size calculation when item keys collide for extend refs
     (hardlinks)

   - fix qgroup flushing from running transaction

   - fix send, wrong file path when there is an inode with a pending
     rmdir

   - fix deadlock when cloning inline extent and low on free metadata
     space"

* tag 'for-5.11-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: run delayed iputs when remounting RO to avoid leaking them
  btrfs: add assertion for empty list of transactions at late stage of umount
  btrfs: fix race between RO remount and the cleaner task
  btrfs: fix transaction leak and crash after cleaning up orphans on RO mount
  btrfs: fix transaction leak and crash after RO remount caused by qgroup rescan
  btrfs: merge critical sections of discard lock in workfn
  btrfs: fix racy access to discard_ctl data
  btrfs: fix async discard stall
  btrfs: tests: initialize test inodes location
  btrfs: send: fix wrong file path when there is an inode with a pending rmdir
  btrfs: qgroup: don't try to wait flushing if we're already holding a transaction
  btrfs: correctly calculate item size used when item key collision happens
  btrfs: fix deadlock when cloning inline extent and low on free metadata space
This commit is contained in:
Linus Torvalds 2021-01-06 11:19:08 -08:00
commit 71c061d244
18 changed files with 243 additions and 97 deletions

View File

@ -42,6 +42,15 @@ enum {
* to an inode.
*/
BTRFS_INODE_NO_XATTRS,
/*
* Set when we are in a context where we need to start a transaction and
* have dirty pages with the respective file range locked. This is to
* ensure that when reserving space for the transaction, if we are low
* on available space and need to flush delalloc, we will not flush
* delalloc for this inode, because that could result in a deadlock (on
* the file range, inode's io_tree).
*/
BTRFS_INODE_NO_DELALLOC_FLUSH,
};
/* in memory btrfs inode */

View File

@ -2555,8 +2555,14 @@ out:
* @p: Holds all btree nodes along the search path
* @root: The root node of the tree
* @key: The key we are looking for
* @ins_len: Indicates purpose of search, for inserts it is 1, for
* deletions it's -1. 0 for plain searches
* @ins_len: Indicates purpose of search:
* >0 for inserts it's size of item inserted (*)
* <0 for deletions
* 0 for plain searches, not modifying the tree
*
* (*) If size of item inserted doesn't include
* sizeof(struct btrfs_item), then p->search_for_extension must
* be set.
* @cow: boolean should CoW operations be performed. Must always be 1
* when modifying the tree.
*
@ -2717,6 +2723,20 @@ cow_done:
if (level == 0) {
p->slots[level] = slot;
/*
* Item key already exists. In this case, if we are
* allowed to insert the item (for example, in dir_item
* case, item key collision is allowed), it will be
* merged with the original item. Only the item size
* grows, no new btrfs item will be added. If
* search_for_extension is not set, ins_len already
* accounts the size btrfs_item, deduct it here so leaf
* space check will be correct.
*/
if (ret == 0 && ins_len > 0 && !p->search_for_extension) {
ASSERT(ins_len >= sizeof(struct btrfs_item));
ins_len -= sizeof(struct btrfs_item);
}
if (ins_len > 0 &&
btrfs_leaf_free_space(b) < ins_len) {
if (write_lock_level < 1) {

View File

@ -131,6 +131,8 @@ enum {
* defrag
*/
BTRFS_FS_STATE_REMOUNTING,
/* Filesystem in RO mode */
BTRFS_FS_STATE_RO,
/* Track if a transaction abort has been reported on this filesystem */
BTRFS_FS_STATE_TRANS_ABORTED,
/*
@ -367,6 +369,12 @@ struct btrfs_path {
unsigned int search_commit_root:1;
unsigned int need_commit_sem:1;
unsigned int skip_release_on_error:1;
/*
* Indicate that new item (btrfs_search_slot) is extending already
* existing item and ins_len contains only the data size and not item
* header (ie. sizeof(struct btrfs_item) is not included).
*/
unsigned int search_for_extension:1;
};
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
sizeof(struct btrfs_item))
@ -2885,10 +2893,26 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
* If we remount the fs to be R/O or umount the fs, the cleaner needn't do
* anything except sleeping. This function is used to check the status of
* the fs.
* We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount,
* since setting and checking for SB_RDONLY in the superblock's flags is not
* atomic.
*/
static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
{
return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info);
return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) ||
btrfs_fs_closing(fs_info);
}
static inline void btrfs_set_sb_rdonly(struct super_block *sb)
{
sb->s_flags |= SB_RDONLY;
set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
}
static inline void btrfs_clear_sb_rdonly(struct super_block *sb)
{
sb->s_flags &= ~SB_RDONLY;
clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
}
/* tree mod log functions from ctree.c */
@ -3073,7 +3097,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u32 min_type);
int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr);
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
bool in_reclaim_context);
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
unsigned int extra_bits,
struct extent_state **cached_state);

View File

@ -715,7 +715,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
* flush all outstanding I/O and inode extent mappings before the
* copy operation is declared as being finished
*/
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
if (ret) {
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return ret;

View File

@ -199,16 +199,15 @@ static struct btrfs_block_group *find_next_block_group(
static struct btrfs_block_group *peek_discard_list(
struct btrfs_discard_ctl *discard_ctl,
enum btrfs_discard_state *discard_state,
int *discard_index)
int *discard_index, u64 now)
{
struct btrfs_block_group *block_group;
const u64 now = ktime_get_ns();
spin_lock(&discard_ctl->lock);
again:
block_group = find_next_block_group(discard_ctl, now);
if (block_group && now > block_group->discard_eligible_time) {
if (block_group && now >= block_group->discard_eligible_time) {
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
block_group->used != 0) {
if (btrfs_is_block_group_data_only(block_group))
@ -222,12 +221,11 @@ again:
block_group->discard_state = BTRFS_DISCARD_EXTENTS;
}
discard_ctl->block_group = block_group;
}
if (block_group) {
*discard_state = block_group->discard_state;
*discard_index = block_group->discard_index;
} else {
block_group = NULL;
}
spin_unlock(&discard_ctl->lock);
return block_group;
@ -330,28 +328,15 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
btrfs_discard_schedule_work(discard_ctl, false);
}
/**
* btrfs_discard_schedule_work - responsible for scheduling the discard work
* @discard_ctl: discard control
* @override: override the current timer
*
* Discards are issued by a delayed workqueue item. @override is used to
* update the current delay as the baseline delay interval is reevaluated on
* transaction commit. This is also maxed with any other rate limit.
*/
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
bool override)
static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
u64 now, bool override)
{
struct btrfs_block_group *block_group;
const u64 now = ktime_get_ns();
spin_lock(&discard_ctl->lock);
if (!btrfs_run_discard_work(discard_ctl))
goto out;
return;
if (!override && delayed_work_pending(&discard_ctl->work))
goto out;
return;
block_group = find_next_block_group(discard_ctl, now);
if (block_group) {
@ -393,7 +378,24 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
mod_delayed_work(discard_ctl->discard_workers,
&discard_ctl->work, nsecs_to_jiffies(delay));
}
out:
}
/*
* btrfs_discard_schedule_work - responsible for scheduling the discard work
* @discard_ctl: discard control
* @override: override the current timer
*
* Discards are issued by a delayed workqueue item. @override is used to
* update the current delay as the baseline delay interval is reevaluated on
* transaction commit. This is also maxed with any other rate limit.
*/
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
bool override)
{
const u64 now = ktime_get_ns();
spin_lock(&discard_ctl->lock);
__btrfs_discard_schedule_work(discard_ctl, now, override);
spin_unlock(&discard_ctl->lock);
}
@ -438,13 +440,18 @@ static void btrfs_discard_workfn(struct work_struct *work)
int discard_index = 0;
u64 trimmed = 0;
u64 minlen = 0;
u64 now = ktime_get_ns();
discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
block_group = peek_discard_list(discard_ctl, &discard_state,
&discard_index);
&discard_index, now);
if (!block_group || !btrfs_run_discard_work(discard_ctl))
return;
if (now < block_group->discard_eligible_time) {
btrfs_discard_schedule_work(discard_ctl, false);
return;
}
/* Perform discarding */
minlen = discard_minlen[discard_index];
@ -474,13 +481,6 @@ static void btrfs_discard_workfn(struct work_struct *work)
discard_ctl->discard_extent_bytes += trimmed;
}
/*
* Updated without locks as this is inside the workfn and nothing else
* is reading the values
*/
discard_ctl->prev_discard = trimmed;
discard_ctl->prev_discard_time = ktime_get_ns();
/* Determine next steps for a block_group */
if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
if (discard_state == BTRFS_DISCARD_BITMAPS) {
@ -496,11 +496,13 @@ static void btrfs_discard_workfn(struct work_struct *work)
}
}
now = ktime_get_ns();
spin_lock(&discard_ctl->lock);
discard_ctl->prev_discard = trimmed;
discard_ctl->prev_discard_time = now;
discard_ctl->block_group = NULL;
__btrfs_discard_schedule_work(discard_ctl, now, false);
spin_unlock(&discard_ctl->lock);
btrfs_discard_schedule_work(discard_ctl, false);
}
/**

View File

@ -1729,7 +1729,7 @@ static int cleaner_kthread(void *arg)
*/
btrfs_delete_unused_bgs(fs_info);
sleep:
clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
if (kthread_should_park())
kthread_parkme();
if (kthread_should_stop())
@ -2830,6 +2830,9 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
return -ENOMEM;
btrfs_init_delayed_root(fs_info->delayed_root);
if (sb_rdonly(sb))
set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
return btrfs_alloc_stripe_hash_table(fs_info);
}
@ -2969,6 +2972,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
}
}
ret = btrfs_find_orphan_roots(fs_info);
out:
return ret;
}
@ -3383,10 +3387,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
}
}
ret = btrfs_find_orphan_roots(fs_info);
if (ret)
goto fail_qgroup;
fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
if (IS_ERR(fs_info->fs_root)) {
err = PTR_ERR(fs_info->fs_root);
@ -4181,6 +4181,9 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_all_workers(fs_info);
/* We shouldn't have any transaction open at this point */
ASSERT(list_empty(&fs_info->trans_list));
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
free_root_pointers(fs_info, true);
btrfs_free_fs_roots(fs_info);

View File

@ -844,6 +844,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
want = extent_ref_type(parent, owner);
if (insert) {
extra_size = btrfs_extent_inline_ref_size(want);
path->search_for_extension = 1;
path->keep_locks = 1;
} else
extra_size = -1;
@ -996,6 +997,7 @@ again:
out:
if (insert) {
path->keep_locks = 0;
path->search_for_extension = 0;
btrfs_unlock_up_safe(path, 1);
}
return err;

View File

@ -1016,8 +1016,10 @@ again:
}
btrfs_release_path(path);
path->search_for_extension = 1;
ret = btrfs_search_slot(trans, root, &file_key, path,
csum_size, 1);
path->search_for_extension = 0;
if (ret < 0)
goto out;

View File

@ -9390,7 +9390,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot)
static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot,
bool in_reclaim_context)
{
struct btrfs_inode *binode;
struct inode *inode;
@ -9411,6 +9412,11 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot
list_move_tail(&binode->delalloc_inodes,
&root->delalloc_inodes);
if (in_reclaim_context &&
test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags))
continue;
inode = igrab(&binode->vfs_inode);
if (!inode) {
cond_resched_lock(&root->delalloc_lock);
@ -9464,10 +9470,11 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
return start_delalloc_inodes(root, &nr, true);
return start_delalloc_inodes(root, &nr, true, false);
}
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr)
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
bool in_reclaim_context)
{
struct btrfs_root *root;
struct list_head splice;
@ -9490,7 +9497,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr)
&fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
ret = start_delalloc_inodes(root, &nr, false);
ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context);
btrfs_put_root(root);
if (ret < 0)
goto out;

View File

@ -4951,7 +4951,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SYNC: {
int ret;
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
if (ret)
return ret;
ret = btrfs_sync_fs(inode->i_sb, 1);

View File

@ -3190,6 +3190,12 @@ out:
return ret;
}
static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
{
return btrfs_fs_closing(fs_info) ||
test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
}
static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
{
struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
@ -3198,6 +3204,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
struct btrfs_trans_handle *trans = NULL;
int err = -ENOMEM;
int ret = 0;
bool stopped = false;
path = btrfs_alloc_path();
if (!path)
@ -3210,7 +3217,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
path->skip_locking = 1;
err = 0;
while (!err && !btrfs_fs_closing(fs_info)) {
while (!err && !(stopped = rescan_should_stop(fs_info))) {
trans = btrfs_start_transaction(fs_info->fs_root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
@ -3253,7 +3260,7 @@ out:
}
mutex_lock(&fs_info->qgroup_rescan_lock);
if (!btrfs_fs_closing(fs_info))
if (!stopped)
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
if (trans) {
ret = update_qgroup_status_item(trans);
@ -3272,7 +3279,7 @@ out:
btrfs_end_transaction(trans);
if (btrfs_fs_closing(fs_info)) {
if (stopped) {
btrfs_info(fs_info, "qgroup scan paused");
} else if (err >= 0) {
btrfs_info(fs_info, "qgroup scan completed%s",
@ -3530,16 +3537,6 @@ static int try_flush_qgroup(struct btrfs_root *root)
int ret;
bool can_commit = true;
/*
* We don't want to run flush again and again, so if there is a running
* one, we won't try to start a new flush, but exit directly.
*/
if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
wait_event(root->qgroup_flush_wait,
!test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
return 0;
}
/*
* If current process holds a transaction, we shouldn't flush, as we
* assume all space reservation happens before a transaction handle is
@ -3554,6 +3551,26 @@ static int try_flush_qgroup(struct btrfs_root *root)
current->journal_info != BTRFS_SEND_TRANS_STUB)
can_commit = false;
/*
* We don't want to run flush again and again, so if there is a running
* one, we won't try to start a new flush, but exit directly.
*/
if (test_and_set_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state)) {
/*
* We are already holding a transaction, thus we can block other
* threads from flushing. So exit right now. This increases
* the chance of EDQUOT for heavy load and near limit cases.
* But we can argue that if we're already near limit, EDQUOT is
* unavoidable anyway.
*/
if (!can_commit)
return 0;
wait_event(root->qgroup_flush_wait,
!test_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state));
return 0;
}
ret = btrfs_start_delalloc_snapshot(root);
if (ret < 0)
goto out;

View File

@ -89,6 +89,19 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
if (ret)
goto out_unlock;
/*
* After dirtying the page our caller will need to start a transaction,
* and if we are low on metadata free space, that can cause flushing of
* delalloc for all inodes in order to get metadata space released.
* However we are holding the range locked for the whole duration of
* the clone/dedupe operation, so we may deadlock if that happens and no
* other task releases enough space. So mark this inode as not being
* possible to flush to avoid such deadlock. We will clear that flag
* when we finish cloning all extents, since a transaction is started
* after finding each extent to clone.
*/
set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags);
if (comp_type == BTRFS_COMPRESS_NONE) {
char *map;
@ -549,6 +562,8 @@ process_slot:
out:
btrfs_free_path(path);
kvfree(buf);
clear_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &BTRFS_I(inode)->runtime_flags);
return ret;
}

View File

@ -236,6 +236,7 @@ struct waiting_dir_move {
* after this directory is moved, we can try to rmdir the ino rmdir_ino.
*/
u64 rmdir_ino;
u64 rmdir_gen;
bool orphanized;
};
@ -316,7 +317,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
static struct waiting_dir_move *
get_waiting_dir_move(struct send_ctx *sctx, u64 ino);
static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino);
static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen);
static int need_send_hole(struct send_ctx *sctx)
{
@ -2299,7 +2300,7 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
fs_path_reset(name);
if (is_waiting_for_rm(sctx, ino)) {
if (is_waiting_for_rm(sctx, ino, gen)) {
ret = gen_unique_name(sctx, ino, gen, name);
if (ret < 0)
goto out;
@ -2858,8 +2859,8 @@ out:
return ret;
}
static struct orphan_dir_info *
add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
static struct orphan_dir_info *add_orphan_dir_info(struct send_ctx *sctx,
u64 dir_ino, u64 dir_gen)
{
struct rb_node **p = &sctx->orphan_dirs.rb_node;
struct rb_node *parent = NULL;
@ -2868,20 +2869,23 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
while (*p) {
parent = *p;
entry = rb_entry(parent, struct orphan_dir_info, node);
if (dir_ino < entry->ino) {
if (dir_ino < entry->ino)
p = &(*p)->rb_left;
} else if (dir_ino > entry->ino) {
else if (dir_ino > entry->ino)
p = &(*p)->rb_right;
} else {
else if (dir_gen < entry->gen)
p = &(*p)->rb_left;
else if (dir_gen > entry->gen)
p = &(*p)->rb_right;
else
return entry;
}
}
odi = kmalloc(sizeof(*odi), GFP_KERNEL);
if (!odi)
return ERR_PTR(-ENOMEM);
odi->ino = dir_ino;
odi->gen = 0;
odi->gen = dir_gen;
odi->last_dir_index_offset = 0;
rb_link_node(&odi->node, parent, p);
@ -2889,8 +2893,8 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
return odi;
}
static struct orphan_dir_info *
get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
static struct orphan_dir_info *get_orphan_dir_info(struct send_ctx *sctx,
u64 dir_ino, u64 gen)
{
struct rb_node *n = sctx->orphan_dirs.rb_node;
struct orphan_dir_info *entry;
@ -2901,15 +2905,19 @@ get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
n = n->rb_left;
else if (dir_ino > entry->ino)
n = n->rb_right;
else if (gen < entry->gen)
n = n->rb_left;
else if (gen > entry->gen)
n = n->rb_right;
else
return entry;
}
return NULL;
}
static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino)
static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino, u64 gen)
{
struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino);
struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino, gen);
return odi != NULL;
}
@ -2954,7 +2962,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = 0;
odi = get_orphan_dir_info(sctx, dir);
odi = get_orphan_dir_info(sctx, dir, dir_gen);
if (odi)
key.offset = odi->last_dir_index_offset;
@ -2985,7 +2993,7 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
dm = get_waiting_dir_move(sctx, loc.objectid);
if (dm) {
odi = add_orphan_dir_info(sctx, dir);
odi = add_orphan_dir_info(sctx, dir, dir_gen);
if (IS_ERR(odi)) {
ret = PTR_ERR(odi);
goto out;
@ -2993,12 +3001,13 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen,
odi->gen = dir_gen;
odi->last_dir_index_offset = found_key.offset;
dm->rmdir_ino = dir;
dm->rmdir_gen = dir_gen;
ret = 0;
goto out;
}
if (loc.objectid > send_progress) {
odi = add_orphan_dir_info(sctx, dir);
odi = add_orphan_dir_info(sctx, dir, dir_gen);
if (IS_ERR(odi)) {
ret = PTR_ERR(odi);
goto out;
@ -3038,6 +3047,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
return -ENOMEM;
dm->ino = ino;
dm->rmdir_ino = 0;
dm->rmdir_gen = 0;
dm->orphanized = orphanized;
while (*p) {
@ -3183,7 +3193,7 @@ static int path_loop(struct send_ctx *sctx, struct fs_path *name,
while (ino != BTRFS_FIRST_FREE_OBJECTID) {
fs_path_reset(name);
if (is_waiting_for_rm(sctx, ino))
if (is_waiting_for_rm(sctx, ino, gen))
break;
if (is_waiting_for_move(sctx, ino)) {
if (*ancestor_ino == 0)
@ -3223,6 +3233,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
u64 parent_ino, parent_gen;
struct waiting_dir_move *dm = NULL;
u64 rmdir_ino = 0;
u64 rmdir_gen;
u64 ancestor;
bool is_orphan;
int ret;
@ -3237,6 +3248,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
dm = get_waiting_dir_move(sctx, pm->ino);
ASSERT(dm);
rmdir_ino = dm->rmdir_ino;
rmdir_gen = dm->rmdir_gen;
is_orphan = dm->orphanized;
free_waiting_dir_move(sctx, dm);
@ -3273,6 +3285,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
dm = get_waiting_dir_move(sctx, pm->ino);
ASSERT(dm);
dm->rmdir_ino = rmdir_ino;
dm->rmdir_gen = rmdir_gen;
}
goto out;
}
@ -3291,7 +3304,7 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
struct orphan_dir_info *odi;
u64 gen;
odi = get_orphan_dir_info(sctx, rmdir_ino);
odi = get_orphan_dir_info(sctx, rmdir_ino, rmdir_gen);
if (!odi) {
/* already deleted */
goto finish;

View File

@ -532,7 +532,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
loops = 0;
while ((delalloc_bytes || dio_bytes) && loops < 3) {
btrfs_start_delalloc_roots(fs_info, items);
btrfs_start_delalloc_roots(fs_info, items, true);
loops++;
if (wait_ordered && !trans) {

View File

@ -175,7 +175,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
btrfs_discard_stop(fs_info);
/* btrfs handle error by forcing the filesystem readonly */
sb->s_flags |= SB_RDONLY;
btrfs_set_sb_rdonly(sb);
btrfs_info(fs_info, "forced readonly");
/*
* Note that a running device replace operation is not canceled here
@ -1953,7 +1953,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
/* avoid complains from lockdep et al. */
up(&fs_info->uuid_tree_rescan_sem);
sb->s_flags |= SB_RDONLY;
btrfs_set_sb_rdonly(sb);
/*
* Setting SB_RDONLY will put the cleaner thread to
@ -1964,10 +1964,42 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
*/
btrfs_delete_unused_bgs(fs_info);
/*
* The cleaner task could be already running before we set the
* flag BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock).
* We must make sure that after we finish the remount, i.e. after
* we call btrfs_commit_super(), the cleaner can no longer start
* a transaction - either because it was dropping a dead root,
* running delayed iputs or deleting an unused block group (the
* cleaner picked a block group from the list of unused block
* groups before we were able to in the previous call to
* btrfs_delete_unused_bgs()).
*/
wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING,
TASK_UNINTERRUPTIBLE);
/*
* We've set the superblock to RO mode, so we might have made
* the cleaner task sleep without running all pending delayed
* iputs. Go through all the delayed iputs here, so that if an
* unmount happens without remounting RW we don't end up at
* finishing close_ctree() with a non-empty list of delayed
* iputs.
*/
btrfs_run_delayed_iputs(fs_info);
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);
btrfs_pause_balance(fs_info);
/*
* Pause the qgroup rescan worker if it is running. We don't want
* it to be still running after we are in RO mode, as after that,
* by the time we unmount, it might have left a transaction open,
* so we would leak the transaction and/or crash.
*/
btrfs_qgroup_wait_for_completion(fs_info, false);
ret = btrfs_commit_super(fs_info);
if (ret)
goto restore;
@ -2006,7 +2038,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
sb->s_flags &= ~SB_RDONLY;
btrfs_clear_sb_rdonly(sb);
set_bit(BTRFS_FS_OPEN, &fs_info->flags);
}
@ -2028,6 +2060,8 @@ restore:
/* We've hit an error - don't reset SB_RDONLY */
if (sb_rdonly(sb))
old_flags |= SB_RDONLY;
if (!(old_flags & SB_RDONLY))
clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
sb->s_flags = old_flags;
fs_info->mount_opt = old_opts;
fs_info->compress_type = old_compress_type;

View File

@ -55,8 +55,14 @@ struct inode *btrfs_new_test_inode(void)
struct inode *inode;
inode = new_inode(test_mnt->mnt_sb);
if (inode)
inode_init_owner(inode, NULL, S_IFREG);
if (!inode)
return NULL;
inode->i_mode = S_IFREG;
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
inode_init_owner(inode, NULL, S_IFREG);
return inode;
}

View File

@ -232,11 +232,6 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
return ret;
}
inode->i_mode = S_IFREG;
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
test_std_err(TEST_ALLOC_FS_INFO);
@ -835,10 +830,6 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
return ret;
}
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
BTRFS_I(inode)->location.offset = 0;
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
if (!fs_info) {
test_std_err(TEST_ALLOC_FS_INFO);

View File

@ -2592,7 +2592,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
if (seeding_dev) {
sb->s_flags &= ~SB_RDONLY;
btrfs_clear_sb_rdonly(sb);
ret = btrfs_prepare_sprout(fs_info);
if (ret) {
btrfs_abort_transaction(trans, ret);
@ -2728,7 +2728,7 @@ error_sysfs:
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
error_trans:
if (seeding_dev)
sb->s_flags |= SB_RDONLY;
btrfs_set_sb_rdonly(sb);
if (trans)
btrfs_end_transaction(trans);
error_free_zone: