bcachefs: snapshot_create_lock

Add a new lock for snapshot creation - this addresses a few races with
logged operations and snapshot deletion.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2023-09-29 01:15:33 -04:00
parent 1e2d399970
commit 37fad9497f
5 changed files with 44 additions and 7 deletions

View File

@ -746,6 +746,7 @@ struct bch_fs {
struct snapshot_table __rcu *snapshots;
size_t snapshot_table_size;
struct mutex snapshot_table_lock;
struct rw_semaphore snapshot_create_lock;
struct work_struct snapshot_delete_work;
struct work_struct snapshot_wait_for_pagecache_and_delete_work;

View File

@ -318,8 +318,8 @@ err:
return ret;
}
static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
struct bch_ioctl_subvolume arg)
static long __bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
struct bch_ioctl_subvolume arg)
{
struct inode *dir;
struct bch_inode_info *inode;
@ -440,6 +440,16 @@ err1:
return error;
}
static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
struct bch_ioctl_subvolume arg)
{
down_write(&c->snapshot_create_lock);
long ret = __bch2_ioctl_subvolume_create(c, filp, arg);
up_write(&c->snapshot_create_lock);
return ret;
}
static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
struct bch_ioctl_subvolume arg)
{

View File

@ -287,9 +287,18 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec
op.v.inum = cpu_to_le64(inum.inum);
op.v.new_i_size = cpu_to_le64(new_i_size);
return bch2_trans_run(c,
/*
* Logged ops aren't atomic w.r.t. snapshot creation: creating a
* snapshot while they're in progress, then crashing, will result in the
* resume only proceeding in one of the snapshots
*/
down_read(&c->snapshot_create_lock);
int ret = bch2_trans_run(c,
bch2_logged_op_start(trans, &op.k_i) ?:
__bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta));
up_read(&c->snapshot_create_lock);
return ret;
}
/* finsert/fcollapse: */
@ -491,7 +500,16 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum,
op.v.src_offset = cpu_to_le64(offset);
op.v.pos = cpu_to_le64(insert ? U64_MAX : offset);
return bch2_trans_run(c,
/*
* Logged ops aren't atomic w.r.t. snapshot creation: creating a
* snapshot while they're in progress, then crashing, will result in the
* resume only proceeding in one of the snapshots
*/
down_read(&c->snapshot_create_lock);
int ret = bch2_trans_run(c,
bch2_logged_op_start(trans, &op.k_i) ?:
__bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta));
up_read(&c->snapshot_create_lock);
return ret;
}

View File

@ -1447,6 +1447,8 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
}
}
down_write(&c->snapshot_create_lock);
for_each_btree_key(trans, iter, BTREE_ID_snapshots,
POS_MIN, 0, k, ret) {
u32 snapshot = k.k->p.offset;
@ -1457,6 +1459,9 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
}
bch2_trans_iter_exit(trans, &iter);
if (ret)
goto err_create_lock;
/*
* Fixing children of deleted snapshots can't be done completely
* atomically, if we crash between here and when we delete the interior
@ -1467,14 +1472,14 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
NULL, NULL, BTREE_INSERT_NOFAIL,
bch2_fix_child_of_deleted_snapshot(trans, &iter, k, &deleted_interior));
if (ret)
goto err;
goto err_create_lock;
darray_for_each(deleted, i) {
ret = commit_do(trans, NULL, NULL, 0,
bch2_snapshot_node_delete(trans, *i));
if (ret) {
bch_err_msg(c, ret, "deleting snapshot %u", *i);
goto err;
goto err_create_lock;
}
}
@ -1483,11 +1488,13 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
bch2_snapshot_node_delete(trans, *i));
if (ret) {
bch_err_msg(c, ret, "deleting snapshot %u", *i);
goto err;
goto err_create_lock;
}
}
clear_bit(BCH_FS_HAVE_DELETED_SNAPSHOTS, &c->flags);
err_create_lock:
up_write(&c->snapshot_create_lock);
err:
darray_exit(&deleted_interior);
darray_exit(&deleted);

View File

@ -720,6 +720,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
mutex_init(&c->bio_bounce_pages_lock);
mutex_init(&c->snapshot_table_lock);
init_rwsem(&c->snapshot_create_lock);
spin_lock_init(&c->btree_write_error_lock);