From ddad1cf27f79b929a681d15678da0496c3a73ba2 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 29 May 2024 17:01:43 +0800 Subject: [PATCH 1/2] btrfs: fix anon_dev leak in create_subvol() mainline inclusion from mainline-v5.19-rc1 commit 2256e901f5bddc56e24089c96f27b77da932dfcc category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9QR71 CVE: CVE-2024-35956 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=2256e901f5bddc56e24089c96f27b77da932dfcc -------------------------------- When btrfs_qgroup_inherit(), btrfs_alloc_tree_block, or btrfs_insert_root() fail in create_subvol(), we return without freeing anon_dev. Reorganize the error handling in create_subvol() to fix this. Reviewed-by: Sweet Tea Dorminy Signed-off-by: Omar Sandoval Reviewed-by: David Sterba Signed-off-by: David Sterba Conflicts: fs/btrfs/ioctl.c [fix context diff] Signed-off-by: Ye Bin --- fs/btrfs/ioctl.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ff87e1d4e03a..ef7d77b085f3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -592,7 +592,7 @@ static noinline int create_subvol(struct inode *dir, struct inode *inode; int ret; int err; - dev_t anon_dev = 0; + dev_t anon_dev; u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; u64 index = 0; @@ -603,11 +603,7 @@ static noinline int create_subvol(struct inode *dir, ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid); if (ret) - goto fail_free; - - ret = get_anon_bdev(&anon_dev); - if (ret < 0) - goto fail_free; + goto out_root_item; /* * Don't create subvolume whose level is not zero. Or qgroup will be @@ -615,9 +611,13 @@ static noinline int create_subvol(struct inode *dir, */ if (btrfs_qgroup_level(objectid)) { ret = -ENOSPC; - goto fail_free; + goto out_root_item; } + ret = get_anon_bdev(&anon_dev); + if (ret < 0) + goto out_root_item; + btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); /* * The same as the snapshot creation, please see the comment @@ -625,26 +625,26 @@ static noinline int create_subvol(struct inode *dir, */ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false); if (ret) - goto fail_free; + goto out_anon_dev; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_subvolume_release_metadata(root, &block_rsv); - goto fail_free; + goto out_anon_dev; } trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit); if (ret) - goto fail; + goto out; leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0, BTRFS_NESTING_NORMAL); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); - goto fail; + goto out; } btrfs_mark_buffer_dirty(leaf); @@ -697,7 +697,7 @@ static noinline int create_subvol(struct inode *dir, */ btrfs_free_tree_block(trans, root, leaf, 0, 1); free_extent_buffer(leaf); - goto fail; + goto out; } free_extent_buffer(leaf); @@ -706,12 +706,11 @@ static noinline int create_subvol(struct inode *dir, key.offset = (u64)-1; new_root = btrfs_get_new_fs_root(fs_info, objectid, &anon_dev); if (IS_ERR(new_root)) { - free_anon_bdev(anon_dev); ret = PTR_ERR(new_root); btrfs_abort_transaction(trans, ret); - goto fail; + goto out; } - /* Freeing will be done in btrfs_put_root() of new_root */ + /* anon_dev is owned by new_root now. */ anon_dev = 0; btrfs_record_root_in_trans(trans, new_root); @@ -721,7 +720,7 @@ static noinline int create_subvol(struct inode *dir, if (ret) { /* We potentially lose an unused inode item here */ btrfs_abort_transaction(trans, ret); - goto fail; + goto out; } mutex_lock(&new_root->objectid_mutex); @@ -734,28 +733,28 @@ static noinline int create_subvol(struct inode *dir, ret = btrfs_set_inode_index(BTRFS_I(dir), &index); if (ret) { btrfs_abort_transaction(trans, ret); - goto fail; + goto out; } ret = btrfs_insert_dir_item(trans, name, namelen, BTRFS_I(dir), &key, BTRFS_FT_DIR, index); if (ret) { btrfs_abort_transaction(trans, ret); - goto fail; + goto out; } btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2); ret = btrfs_update_inode(trans, root, dir); if (ret) { btrfs_abort_transaction(trans, ret); - goto fail; + goto out; } ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid, btrfs_ino(BTRFS_I(dir)), index, name, namelen); if (ret) { btrfs_abort_transaction(trans, ret); - goto fail; + goto out; } ret = btrfs_uuid_tree_add(trans, root_item->uuid, @@ -763,8 +762,7 @@ static noinline int create_subvol(struct inode *dir, if (ret) btrfs_abort_transaction(trans, ret); -fail: - kfree(root_item); +out: trans->block_rsv = NULL; trans->bytes_reserved = 0; btrfs_subvolume_release_metadata(root, &block_rsv); @@ -779,11 +777,10 @@ static noinline int create_subvol(struct inode *dir, return PTR_ERR(inode); d_instantiate(dentry, inode); } - return ret; - -fail_free: +out_anon_dev: if (anon_dev) free_anon_bdev(anon_dev); +out_root_item: kfree(root_item); return ret; } -- Gitee From e60c00090ef4df0b6e8e1f8bc94ec5c227d736b7 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 29 May 2024 17:01:44 +0800 Subject: [PATCH 2/2] btrfs: qgroup: fix qgroup prealloc rsv leak in subvolume operations mainline inclusion from mainline-v6.9-rc4 commit 74e97958121aa1f5854da6effba70143f051b0cd category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9QR71 CVE: CVE-2024-35956 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=74e97958121aa1f5854da6effba70143f051b0cd -------------------------------- Create subvolume, create snapshot and delete subvolume all use btrfs_subvolume_reserve_metadata() to reserve metadata for the changes done to the parent subvolume's fs tree, which cannot be mediated in the normal way via start_transaction. When quota groups (squota or qgroups) are enabled, this reserves qgroup metadata of type PREALLOC. Once the operation is associated to a transaction, we convert PREALLOC to PERTRANS, which gets cleared in bulk at the end of the transaction. However, the error paths of these three operations were not implementing this lifecycle correctly. They unconditionally converted the PREALLOC to PERTRANS in a generic cleanup step regardless of errors or whether the operation was fully associated to a transaction or not. This resulted in error paths occasionally converting this rsv to PERTRANS without calling record_root_in_trans successfully, which meant that unless that root got recorded in the transaction by some other thread, the end of the transaction would not free that root's PERTRANS, leaking it. Ultimately, this resulted in hitting a WARN in CONFIG_BTRFS_DEBUG builds at unmount for the leaked reservation. The fix is to ensure that every qgroup PREALLOC reservation observes the following properties: 1. any failure before record_root_in_trans is called successfully results in freeing the PREALLOC reservation. 2. after record_root_in_trans, we convert to PERTRANS, and now the transaction owns freeing the reservation. This patch enforces those properties on the three operations. Without it, generic/269 with squotas enabled at mkfs time would fail in ~5-10 runs on my system. With this patch, it ran successfully 1000 times in a row. Fixes: e85fde5162bf ("btrfs: qgroup: fix qgroup meta rsv leak for subvolume operations") CC: stable@vger.kernel.org # 6.1+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba Conflicts: fs/btrfs/ctree.h fs/btrfs/ioctl.c fs/btrfs/inode.c fs/btrfs/root-tree.h [fix context diff] Signed-off-by: Ye Bin --- fs/btrfs/ctree.h | 2 -- fs/btrfs/inode.c | 13 ++++++++++++- fs/btrfs/ioctl.c | 36 ++++++++++++++++++++++++++++-------- fs/btrfs/root-tree.c | 10 ---------- 4 files changed, 40 insertions(+), 21 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 92ecb4562584..84ac3642023f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2669,8 +2669,6 @@ enum btrfs_flush_state { int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int nitems, bool use_global_rsv); -void btrfs_subvolume_release_metadata(struct btrfs_root *root, - struct btrfs_block_rsv *rsv); void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes); int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b9b5ea4b7762..0445811787b7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4021,6 +4021,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) struct btrfs_block_rsv block_rsv; u64 root_flags; int ret; + u64 qgroup_reserved = 0; int err; /* @@ -4063,12 +4064,20 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true); if (err) goto out_up_write; + qgroup_reserved = block_rsv.qgroup_rsv_reserved; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out_release; } + ret = btrfs_record_root_in_trans(trans, root); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto out_end_trans; + } + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); + qgroup_reserved = 0; trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; @@ -4129,7 +4138,9 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry) err = ret; inode->i_flags |= S_DEAD; out_release: - btrfs_subvolume_release_metadata(root, &block_rsv); + btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL); + if (qgroup_reserved) + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); out_up_write: up_write(&fs_info->subvol_sem); if (err) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ef7d77b085f3..3d96eef71e2b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -594,6 +594,7 @@ static noinline int create_subvol(struct inode *dir, int err; dev_t anon_dev; u64 objectid; + u64 qgroup_reserved = 0; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; u64 index = 0; @@ -626,13 +627,18 @@ static noinline int create_subvol(struct inode *dir, ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false); if (ret) goto out_anon_dev; + qgroup_reserved = block_rsv.qgroup_rsv_reserved; trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); - btrfs_subvolume_release_metadata(root, &block_rsv); - goto out_anon_dev; + goto out_release_rsv; } + ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root); + if (ret) + goto out; + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); + qgroup_reserved = 0; trans->block_rsv = &block_rsv; trans->bytes_reserved = block_rsv.size; @@ -765,7 +771,6 @@ static noinline int create_subvol(struct inode *dir, out: trans->block_rsv = NULL; trans->bytes_reserved = 0; - btrfs_subvolume_release_metadata(root, &block_rsv); err = btrfs_commit_transaction(trans); if (err && !ret) @@ -777,6 +782,10 @@ static noinline int create_subvol(struct inode *dir, return PTR_ERR(inode); d_instantiate(dentry, inode); } +out_release_rsv: + btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL); + if (qgroup_reserved) + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); out_anon_dev: if (anon_dev) free_anon_bdev(anon_dev); @@ -793,6 +802,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; + struct btrfs_block_rsv *block_rsv; + u64 qgroup_reserved = 0; int ret; if (btrfs_root_refs(&root->root_item) == 0) @@ -822,8 +833,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, goto free_pending; } - btrfs_init_block_rsv(&pending_snapshot->block_rsv, - BTRFS_BLOCK_RSV_TEMP); + block_rsv = &pending_snapshot->block_rsv; + btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP); /* * 1 - parent dir inode * 2 - dir entries @@ -833,10 +844,10 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, * 1 - UUID item */ ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, - &pending_snapshot->block_rsv, 8, - false); + block_rsv, 8, false); if (ret) goto free_pending; + qgroup_reserved = block_rsv->qgroup_rsv_reserved; pending_snapshot->dentry = dentry; pending_snapshot->root = root; @@ -849,6 +860,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, ret = PTR_ERR(trans); goto fail; } + ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root); + if (ret) { + btrfs_end_transaction(trans); + goto fail; + } + btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved); + qgroup_reserved = 0; spin_lock(&fs_info->trans_lock); list_add(&pending_snapshot->list, @@ -881,7 +899,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (ret && pending_snapshot->snap) pending_snapshot->snap->anon_dev = 0; btrfs_put_root(pending_snapshot->snap); - btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv); + btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL); + if (qgroup_reserved) + btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved); free_pending: if (pending_snapshot->anon_dev) free_anon_bdev(pending_snapshot->anon_dev); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index e9e8ca4e98a7..cd40e7a92552 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -521,13 +521,3 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, } return ret; } - -void btrfs_subvolume_release_metadata(struct btrfs_root *root, - struct btrfs_block_rsv *rsv) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - u64 qgroup_to_release; - - btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release); - btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release); -} -- Gitee