From 1842d655e95a630dce95d51ccc95b34c33cccaf3 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Jan 2023 11:03:20 +0100 Subject: [PATCH 01/37] prlimit: do_prlimit needs to have a speculation check stable inclusion from stable-v4.19.271 commit d3ee91e50a6b3c5a45398e3dcb912a8a264f575c category: bugfix issue: #I6ZHKL CVE: CVE-2023-0458 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 739790605705ddcf18f21782b9c99ad7d53a8c11 upstream. do_prlimit() adds the user-controlled resource value to a pointer that will subsequently be dereferenced. In order to help prevent this codepath from being used as a spectre "gadget" a barrier needs to be added after checking the range. Reported-by: Jordy Zomer Tested-by: Jordy Zomer Suggested-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- kernel/sys.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sys.c b/kernel/sys.c index baf60a3aa34b..f8f66c633dc8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1530,6 +1530,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource, if (resource >= RLIM_NLIMITS) return -EINVAL; + resource = array_index_nospec(resource, RLIM_NLIMITS); + if (new_rlim) { if (new_rlim->rlim_cur > new_rlim->rlim_max) return -EINVAL; -- Gitee From 4496aac6995e1decb8ab2cd71a281a6c36df24be Mon Sep 17 00:00:00 2001 From: Baisong Zhong Date: Sun, 20 Nov 2022 06:59:18 +0000 Subject: [PATCH 02/37] media: dvb-usb: az6027: fix null-ptr-deref in az6027_i2c_xfer() stable inclusion from stable-v4.19.270 commit 7abfe467cd685f5da7ecb415441e45e3e4e2baa8 category: bugfix issue: #I6ZQR6 CVE: CVE-2023-28328 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit 0ed554fd769a19ea8464bb83e9ac201002ef74ad ] Wei Chen reports a kernel bug as blew: general protection fault, probably for non-canonical address KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] ... Call Trace: __i2c_transfer+0x77e/0x1930 drivers/i2c/i2c-core-base.c:2109 i2c_transfer+0x1d5/0x3d0 drivers/i2c/i2c-core-base.c:2170 i2cdev_ioctl_rdwr+0x393/0x660 drivers/i2c/i2c-dev.c:297 i2cdev_ioctl+0x75d/0x9f0 drivers/i2c/i2c-dev.c:458 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:870 [inline] __se_sys_ioctl+0xfb/0x170 fs/ioctl.c:856 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fd834a8bded In az6027_i2c_xfer(), if msg[i].addr is 0x99, a null-ptr-deref will caused when accessing msg[i].buf. For msg[i].len is 0 and msg[i].buf is null. Fix this by checking msg[i].len in az6027_i2c_xfer(). Link: https://lore.kernel.org/lkml/CAO4mrfcPHB5aQJO=mpqV+p8mPLNg-Fok0gw8gZ=zemAfMGTzMg@mail.gmail.com/ Link: https://lore.kernel.org/linux-media/20221120065918.2160782-1-zhongbaisong@huawei.com Fixes: 76f9a820c867 ("V4L/DVB: AZ6027: Initial import of the driver") Reported-by: Wei Chen Signed-off-by: Baisong Zhong Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing40281 --- drivers/media/usb/dvb-usb/az6027.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/media/usb/dvb-usb/az6027.c b/drivers/media/usb/dvb-usb/az6027.c index 6321b8e30261..555c8ac44881 100644 --- a/drivers/media/usb/dvb-usb/az6027.c +++ b/drivers/media/usb/dvb-usb/az6027.c @@ -977,6 +977,10 @@ static int az6027_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int n if (msg[i].addr == 0x99) { req = 0xBE; index = 0; + if (msg[i].len < 1) { + i = -EOPNOTSUPP; + break; + } value = msg[i].buf[0] & 0x00ff; length = 1; az6027_usb_out_op(d, req, value, index, data, length); -- Gitee From 669964f645339e80c26ab4a4e25d34be3997f3d4 Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Tue, 14 Mar 2023 16:54:21 +0000 Subject: [PATCH 03/37] i2c: xgene-slimpro: Fix out-of-bounds bug in xgene_slimpro_i2c_xfer() stable inclusion from stable-v4.19.280 commit 92fbb6d1296f81f41f65effd7f5f8c0f74943d15 category: bugfix issue: #I6ZK8Z CVE: CVE-2023-2194 Signed-off-by: wanxiaoqing40281 --------------------------------------- The data->block[0] variable comes from user and is a number between 0-255. Without proper check, the variable may be very large to cause an out-of-bounds when performing memcpy in slimpro_i2c_blkwr. Fix this bug by checking the value of writelen. Fixes: f6505fbabc42 ("i2c: add SLIMpro I2C device driver on APM X-Gene platform") Signed-off-by: Wei Chen Cc: stable@vger.kernel.org Reviewed-by: Andi Shyti Signed-off-by: Wolfram Sang Signed-off-by: wanxiaoqing40281 --- drivers/i2c/busses/i2c-xgene-slimpro.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index a7ac746018ad..7a746f413535 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -321,6 +321,9 @@ static int slimpro_i2c_blkwr(struct slimpro_i2c_dev *ctx, u32 chip, u32 msg[3]; int rc; + if (writelen > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + memcpy(ctx->dma_buffer, data, writelen); paddr = dma_map_single(ctx->dev, ctx->dma_buffer, writelen, DMA_TO_DEVICE); -- Gitee From 34564dcc83ba0fef3f090e9b52350a25e8d5bfd0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 May 2023 10:25:24 +0200 Subject: [PATCH 04/37] netfilter: nf_tables: deactivate anonymous set from preparation phase mainline inclusion from mainline-v6.4-rc1 commit c1592a89942e9678f7d9c8030efa777c0d57edab category: bugfix issue: #I71KHS CVE: CVE-2023-32233 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c1592a89942e9678f7d9c8030efa777c0d57edab Signed-off-by: wanxiaoqing40281 --------------------------------------- Toggle deleted anonymous sets as inactive in the next generation, so users cannot perform any update on it. Clear the generation bitmask in case the transaction is aborted. The following KASAN splat shows a set element deletion for a bound anonymous set that has been already removed in the same transaction. [ 64.921510] ================================================================== [ 64.923123] BUG: KASAN: wild-memory-access in nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.924745] Write of size 8 at addr dead000000000122 by task test/890 [ 64.927903] CPU: 3 PID: 890 Comm: test Not tainted 6.3.0+ #253 [ 64.931120] Call Trace: [ 64.932699] [ 64.934292] dump_stack_lvl+0x33/0x50 [ 64.935908] ? nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.937551] kasan_report+0xda/0x120 [ 64.939186] ? nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.940814] nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.942452] ? __kasan_slab_alloc+0x2d/0x60 [ 64.944070] ? nf_tables_setelem_notify+0x190/0x190 [nf_tables] [ 64.945710] ? kasan_set_track+0x21/0x30 [ 64.947323] nfnetlink_rcv_batch+0x709/0xd90 [nfnetlink] [ 64.948898] ? nfnetlink_rcv_msg+0x480/0x480 [nfnetlink] Signed-off-by: Pablo Neira Ayuso Signed-off-by: wanxiaoqing40281 --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 12 ++++++++++++ net/netfilter/nft_dynset.c | 2 +- net/netfilter/nft_lookup.c | 2 +- net/netfilter/nft_objref.c | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 93253ba1eeac..78f5f0426e6b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -471,6 +471,7 @@ struct nft_set_binding { }; enum nft_trans_phase; +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2169366f3207..d91e065e872c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3750,12 +3750,24 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) +{ + if (nft_set_is_anonymous(set)) + nft_clear(ctx->net, set); + + set->use++; +} +EXPORT_SYMBOL_GPL(nf_tables_activate_set); + void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) { switch (phase) { case NFT_TRANS_PREPARE: + if (nft_set_is_anonymous(set)) + nft_deactivate_next(ctx->net, set); + set->use--; return; case NFT_TRANS_ABORT: diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index eb7f9a5f2aeb..2be1821ff42f 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -249,7 +249,7 @@ static void nft_dynset_activate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_dynset_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 55754d9939b5..cb9e937a5ce0 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -132,7 +132,7 @@ static void nft_lookup_activate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_lookup_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index bf92a40dd1b2..eff2173db7e4 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -182,7 +182,7 @@ static void nft_objref_map_activate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - priv->set->use++; + nf_tables_activate_set(ctx, priv->set); } static void nft_objref_map_destroy(const struct nft_ctx *ctx, -- Gitee From 360eaaeeb62c817c83949990a25df747cc202ad0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 2 May 2023 10:25:24 +0200 Subject: [PATCH 05/37] netfilter: nf_tables: deactivate anonymous set from preparation phase mainline inclusion from mainline-v6.4-rc1 commit c1592a89942e9678f7d9c8030efa777c0d57edab category: bugfix issue: #I71KHS CVE: CVE-2023-32233 Signed-off-by: wanxiaoqing --------------------------------------- Toggle deleted anonymous sets as inactive in the next generation, so users cannot perform any update on it. Clear the generation bitmask in case the transaction is aborted. The following KASAN splat shows a set element deletion for a bound anonymous set that has been already removed in the same transaction. [ 64.921510] ================================================================== [ 64.923123] BUG: KASAN: wild-memory-access in nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.924745] Write of size 8 at addr dead000000000122 by task test/890 [ 64.927903] CPU: 3 PID: 890 Comm: test Not tainted 6.3.0+ #253 [ 64.931120] Call Trace: [ 64.932699] [ 64.934292] dump_stack_lvl+0x33/0x50 [ 64.935908] ? nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.937551] kasan_report+0xda/0x120 [ 64.939186] ? nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.940814] nf_tables_commit+0xa24/0x1490 [nf_tables] [ 64.942452] ? __kasan_slab_alloc+0x2d/0x60 [ 64.944070] ? nf_tables_setelem_notify+0x190/0x190 [nf_tables] [ 64.945710] ? kasan_set_track+0x21/0x30 [ 64.947323] nfnetlink_rcv_batch+0x709/0xd90 [nfnetlink] [ 64.948898] ? nfnetlink_rcv_msg+0x480/0x480 [nfnetlink] Signed-off-by: Pablo Neira Ayuso Signed-off-by: wanxiaoqing40281 --- net/netfilter/nf_tables_api.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d91e065e872c..63dbbdf97776 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3759,6 +3759,15 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) } EXPORT_SYMBOL_GPL(nf_tables_activate_set); +void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) +{ + if (nft_set_is_anonymous(set)) + nft_clear(ctx->net, set); + + set->use++; +} +EXPORT_SYMBOL_GPL(nf_tables_activate_set); + void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, enum nft_trans_phase phase) -- Gitee From c4981b8e25d0e6558c852f94550967136b1f5a68 Mon Sep 17 00:00:00 2001 From: Haimin Zhang Date: Wed, 15 Dec 2021 19:15:30 +0800 Subject: [PATCH 06/37] netdevsim: Zero-initialize memory for new map's value in function nsim_bpf_map_alloc stable inclusion from stable-v4.19.222 commit d861443c4dc88650eed113310d933bd593d37b23 category: bugfix issue:#I728M0 CVE: CVE-2021-4135 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit 481221775d53d6215a6e5e9ce1cce6d2b4ab9a46 ] Zero-initialize memory for new map's value in function nsim_bpf_map_alloc since it may cause a potential kernel information leak issue, as follows: 1. nsim_bpf_map_alloc calls nsim_map_alloc_elem to allocate elements for a new map. 2. nsim_map_alloc_elem uses kmalloc to allocate map's value, but doesn't zero it. 3. A user application can use IOCTL BPF_MAP_LOOKUP_ELEM to get specific element's information in the map. 4. The kernel function map_lookup_elem will call bpf_map_copy_value to get the information allocated at step-2, then use copy_to_user to copy to the user buffer. This can only leak information for an array map. Fixes: 395cacb5f1a0 ("netdevsim: bpf: support fake map offload") Suggested-by: Jakub Kicinski Acked-by: Jakub Kicinski Signed-off-by: Haimin Zhang Link: https://lore.kernel.org/r/20211215111530.72103-1-tcs.kernel@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing40281 --- drivers/net/netdevsim/bpf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 81444208b216..12f100392ed1 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -493,6 +493,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) goto err_free; key = nmap->entry[i].key; *key = i; + memset(nmap->entry[i].value, 0, offmap->map.value_size); } } -- Gitee From d07a64ba6ecb31719903c2a089831a55486f3b76 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 22 Dec 2021 14:19:18 -0800 Subject: [PATCH 07/37] xfs: map unwritten blocks in XFS_IOC_{ALLOC,FREE}SP just like fallocate stable inclusion from stable-v4.19.225 commit 1c3564fca0e7b8c9e96245a2cb35e198b036ee9a category: bugfix issue:#I728M3 CVE: CVE-2021-4155 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 983d8e60f50806f90534cc5373d0ce867e5aaf79 upstream. The old ALLOCSP/FREESP ioctls in XFS can be used to preallocate space at the end of files, just like fallocate and RESVSP. Make the behavior consistent with the other ioctls. Reported-by: Kirill Tkhai Signed-off-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Reviewed-by: Eric Sandeen Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- fs/xfs/xfs_ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 6ffb53edf1b4..f12308981608 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -701,7 +701,8 @@ xfs_ioc_space( flags |= XFS_PREALLOC_CLEAR; if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), - bf->l_start - XFS_ISIZE(ip), 0); + bf->l_start - XFS_ISIZE(ip), + XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } -- Gitee From aa0dbd7ed22e3a489787f1a19a2c5b2803bea1ed Mon Sep 17 00:00:00 2001 From: Zheng Liang Date: Fri, 24 Sep 2021 09:16:27 +0800 Subject: [PATCH 08/37] ovl: fix missing negative dentry check in ovl_rename() stable inclusion from stable-v4.19.211 commit 9d4969d8b5073d02059bae3f1b8d9a20cf023c55 category: bugfix issue:#I728MA CVE: CVE-2021-20321 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit a295aef603e109a47af355477326bd41151765b6 upstream. The following reproducer mkdir lower upper work merge touch lower/old touch lower/new mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merge rm merge/new mv merge/old merge/new & unlink upper/new may result in this race: PROCESS A: rename("merge/old", "merge/new"); overwrite=true,ovl_lower_positive(old)=true, ovl_dentry_is_whiteout(new)=true -> flags |= RENAME_EXCHANGE PROCESS B: unlink("upper/new"); PROCESS A: lookup newdentry in new_upperdir call vfs_rename() with negative newdentry and RENAME_EXCHANGE Fix by adding the missing check for negative newdentry. Signed-off-by: Zheng Liang Fixes: e9be9d5e76e3 ("overlay filesystem") Cc: # v3.18 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- fs/overlayfs/dir.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 800bcad67325..5a560b6318c4 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -1164,9 +1164,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, goto out_dput; } } else { - if (!d_is_negative(newdentry) && - (!new_opaque || !ovl_is_whiteout(newdentry))) - goto out_dput; + if (!d_is_negative(newdentry)) { + if (!new_opaque || !ovl_is_whiteout(newdentry)) + goto out_dput; + } else { + if (flags & RENAME_EXCHANGE) + goto out_dput; + } } if (olddentry == trap) -- Gitee From da2e9ecb3132c9b6cd04d8ed9d438fd627015239 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Oct 2021 10:50:26 +0300 Subject: [PATCH 09/37] ipv6: use siphash in rt6_exception_hash() stable inclusion from stable-v4.19.215 commit ad829847ad59af8e26a1f1c345716099abbc7a58 category: bugfix issue:#I728MD CVE: CVE-2021-20322 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 4785305c05b25a242e5314cc821f54ade4c18810 upstream. A group of security researchers brought to our attention the weakness of hash function used in rt6_exception_hash() Lets use siphash instead of Jenkins Hash, to considerably reduce security risks. Following patch deals with IPv4. Fixes: 35732d01fe31 ("ipv6: introduce a hash table to store dst cache") Signed-off-by: Eric Dumazet Reported-by: Keyu Man Cc: Wei Wang Cc: Martin KaFai Lau Acked-by: Wei Wang Signed-off-by: David S. Miller [OP: adjusted context for 4.19 stable] Signed-off-by: Ovidiu Panait Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- net/ipv6/route.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index dad35cd48807..6981d53848b8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -1337,17 +1338,24 @@ static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) static u32 rt6_exception_hash(const struct in6_addr *dst, const struct in6_addr *src) { - static u32 seed __read_mostly; - u32 val; + static siphash_key_t rt6_exception_key __read_mostly; + struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + }; + u64 val; - net_get_random_once(&seed, sizeof(seed)); - val = jhash(dst, sizeof(*dst), seed); + net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key)); #ifdef CONFIG_IPV6_SUBTREES if (src) - val = jhash(src, sizeof(*src), val); + combined.src = *src; #endif - return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); + val = siphash(&combined, sizeof(combined), &rt6_exception_key); + + return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); } /* Helper function to find the cached rt in the hash table -- Gitee From 914f53c3049405333cc9cd2c5d8e1a4f5bafda89 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Oct 2021 10:50:25 +0300 Subject: [PATCH 10/37] ipv4: use siphash instead of Jenkins in fnhe_hashfun() stable inclusion from stable-v4.19.215 commit 6e2856767eb1a9cfcfcd82136928037f04920e97 category: bugfix issue:#I728MD CVE: CVE-2021-20322 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 6457378fe796815c973f631a1904e147d6ee33b1 upstream. A group of security researchers brought to our attention the weakness of hash function used in fnhe_hashfun(). Lets use siphash instead of Jenkins Hash, to considerably reduce security risks. Also remove the inline keyword, this really is distracting. Fixes: d546c621542d ("ipv4: harden fnhe_hashfun()") Signed-off-by: Eric Dumazet Reported-by: Keyu Man Cc: Willy Tarreau Signed-off-by: David S. Miller [OP: adjusted context for 4.19 stable] Signed-off-by: Ovidiu Panait Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- net/ipv4/route.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 753d4121a019..a5ccf737de2f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -618,14 +618,14 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) return oldest; } -static inline u32 fnhe_hashfun(__be32 daddr) +static u32 fnhe_hashfun(__be32 daddr) { - static u32 fnhe_hashrnd __read_mostly; - u32 hval; + static siphash_key_t fnhe_hash_key __read_mostly; + u64 hval; - net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd)); - hval = jhash_1word((__force u32) daddr, fnhe_hashrnd); - return hash_32(hval, FNHE_HASH_SHIFT); + net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key)); + hval = siphash_1u32((__force u32)daddr, &fnhe_hash_key); + return hash_64(hval, FNHE_HASH_SHIFT); } static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) -- Gitee From bed4e0910953aa4732e6a4b8cdaa9b2d87cf48e6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Aug 2021 15:16:15 -0700 Subject: [PATCH 11/37] ipv4: make exception cache less predictible stable inclusion from stable-v4.19.215 commit 3e6bd2b583f18da9856fc9741ffa200a74a52cba category: bugfix issue:#I728MD CVE: CVE-2021-20322 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit 67d6d681e15b578c1725bad8ad079e05d1c48a8e ] Even after commit 6457378fe796 ("ipv4: use siphash instead of Jenkins in fnhe_hashfun()"), an attacker can still use brute force to learn some secrets from a victim linux host. One way to defeat these attacks is to make the max depth of the hash table bucket a random value. Before this patch, each bucket of the hash table used to store exceptions could contain 6 items under attack. After the patch, each bucket would contains a random number of items, between 6 and 10. The attacker can no longer infer secrets. This is slightly increasing memory size used by the hash table, by 50% in average, we do not expect this to be a problem. This patch is more complex than the prior one (IPv6 equivalent), because IPv4 was reusing the oldest entry. Since we need to be able to evict more than one entry per update_or_create_fnhe() call, I had to replace fnhe_oldest() with fnhe_remove_oldest(). Also note that we will queue extra kfree_rcu() calls under stress, which hopefully wont be a too big issue. Fixes: 4895c771c7f0 ("ipv4: Add FIB nexthop exceptions.") Signed-off-by: Eric Dumazet Reported-by: Keyu Man Cc: Willy Tarreau Signed-off-by: David S. Miller Reviewed-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing40281 --- net/ipv4/route.c | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a5ccf737de2f..6f578a5d837a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -604,18 +604,25 @@ static void fnhe_flush_routes(struct fib_nh_exception *fnhe) } } -static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) +static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) { - struct fib_nh_exception *fnhe, *oldest; + struct fib_nh_exception __rcu **fnhe_p, **oldest_p; + struct fib_nh_exception *fnhe, *oldest = NULL; - oldest = rcu_dereference(hash->chain); - for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; - fnhe = rcu_dereference(fnhe->fnhe_next)) { - if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) + for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) { + fnhe = rcu_dereference_protected(*fnhe_p, + lockdep_is_held(&fnhe_lock)); + if (!fnhe) + break; + if (!oldest || + time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) { oldest = fnhe; + oldest_p = fnhe_p; + } } fnhe_flush_routes(oldest); - return oldest; + *oldest_p = oldest->fnhe_next; + kfree_rcu(oldest, rcu); } static u32 fnhe_hashfun(__be32 daddr) @@ -692,16 +699,21 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, if (rt) fill_route_from_fnhe(rt, fnhe); } else { - if (depth > FNHE_RECLAIM_DEPTH) - fnhe = fnhe_oldest(hash); - else { - fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); - if (!fnhe) - goto out_unlock; - - fnhe->fnhe_next = hash->chain; - rcu_assign_pointer(hash->chain, fnhe); + /* Randomize max depth to avoid some side channels attacks. */ + int max_depth = FNHE_RECLAIM_DEPTH + + prandom_u32_max(FNHE_RECLAIM_DEPTH); + + while (depth > max_depth) { + fnhe_remove_oldest(hash); + depth--; } + + fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); + if (!fnhe) + goto out_unlock; + + fnhe->fnhe_next = hash->chain; + fnhe->fnhe_genid = genid; fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; @@ -709,6 +721,8 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_mtu_locked = lock; fnhe->fnhe_expires = max(1UL, expires); + rcu_assign_pointer(hash->chain, fnhe); + /* Exception created; mark the cached routes for the nexthop * stale, so anyone caching it rechecks if this exception * applies to them. -- Gitee From 87310f443d73c42669f689e9b9e57ef7272ccb4c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Aug 2021 15:16:14 -0700 Subject: [PATCH 12/37] ipv6: make exception cache less predictible stable inclusion from stable-v4.19.215 commit c6d0d68d6da68159948cad3d808d61bb291a0283 category: bugfix issue:#I728MD CVE: CVE-2021-20322 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit a00df2caffed3883c341d5685f830434312e4a43 upstream. Even after commit 4785305c05b2 ("ipv6: use siphash in rt6_exception_hash()"), an attacker can still use brute force to learn some secrets from a victim linux host. One way to defeat these attacks is to make the max depth of the hash table bucket a random value. Before this patch, each bucket of the hash table used to store exceptions could contain 6 items under attack. After the patch, each bucket would contains a random number of items, between 6 and 10. The attacker can no longer infer secrets. This is slightly increasing memory size used by the hash table, we do not expect this to be a problem. Following patch is dealing with the same issue in IPv4. Fixes: 35732d01fe31 ("ipv6: introduce a hash table to store dst cache") Signed-off-by: Eric Dumazet Reported-by: Keyu Man Cc: Wei Wang Cc: Martin KaFai Lau Reviewed-by: David Ahern Signed-off-by: David S. Miller [OP: adjusted context for 4.19 stable] Signed-off-by: Ovidiu Panait Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- net/ipv6/route.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6981d53848b8..5188609fca27 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1454,6 +1454,7 @@ static int rt6_insert_exception(struct rt6_info *nrt, struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; + int max_depth; int err = 0; spin_lock_bh(&rt6_exception_lock); @@ -1515,7 +1516,9 @@ static int rt6_insert_exception(struct rt6_info *nrt, bucket->depth++; net->ipv6.rt6_stats->fib_rt_cache++; - if (bucket->depth > FIB6_MAX_DEPTH) + /* Randomize max depth to avoid some side channels attacks. */ + max_depth = FIB6_MAX_DEPTH + prandom_u32_max(FIB6_MAX_DEPTH); + while (bucket->depth > max_depth) rt6_exception_remove_oldest(bucket); out: -- Gitee From d7493cab1fc526ada078c126615ccabcea78931d Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 16 Dec 2021 08:24:08 +0100 Subject: [PATCH 13/37] xen/console: harden hvc_xen against event channel storms stable inclusion from stable-v4.19.222 commit 57e46acb3b48ea4e8efb1e1bea2e89e0c6cc43e2 category: bugfix issue:#I728MP CVE: CVE-2021-28713 Signed-off-by: wanxiaoqing40281 --------------------------------------- This is commit fe415186b43df0db1f17fa3a46275fd92107fe71 upstream. The Xen console driver is still vulnerable for an attack via excessive number of events sent by the backend. Fix that by using a lateeoi event channel. For the normal domU initial console this requires the introduction of bind_evtchn_to_irq_lateeoi() as there is no xenbus device available at the time the event channel is bound to the irq. As the decision whether an interrupt was spurious or not requires to test for bytes having been read from the backend, move sending the event into the if statement, as sending an event without having found any bytes to be read is making no sense at all. This is part of XSA-391 Signed-off-by: Juergen Gross Reviewed-by: Jan Beulich Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- drivers/tty/hvc/hvc_xen.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index dc43fa96c3de..62df09f7cee6 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -37,6 +37,8 @@ struct xencons_info { struct xenbus_device *xbdev; struct xencons_interface *intf; unsigned int evtchn; + XENCONS_RING_IDX out_cons; + unsigned int out_cons_same; struct hvc_struct *hvc; int irq; int vtermno; @@ -131,6 +133,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) XENCONS_RING_IDX cons, prod; int recv = 0; struct xencons_info *xencons = vtermno_to_xencons(vtermno); + unsigned int eoiflag = 0; + if (xencons == NULL) return -EINVAL; intf = xencons->intf; @@ -146,7 +150,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) mb(); /* read ring before consuming */ intf->in_cons = cons; - notify_daemon(xencons); + /* + * When to mark interrupt having been spurious: + * - there was no new data to be read, and + * - the backend did not consume some output bytes, and + * - the previous round with no read data didn't see consumed bytes + * (we might have a race with an interrupt being in flight while + * updating xencons->out_cons, so account for that by allowing one + * round without any visible reason) + */ + if (intf->out_cons != xencons->out_cons) { + xencons->out_cons = intf->out_cons; + xencons->out_cons_same = 0; + } + if (recv) { + notify_daemon(xencons); + } else if (xencons->out_cons_same++ > 1) { + eoiflag = XEN_EOI_FLAG_SPURIOUS; + } + + xen_irq_lateeoi(xencons->irq, eoiflag); + return recv; } @@ -375,7 +399,7 @@ static int xencons_connect_backend(struct xenbus_device *dev, if (ret) return ret; info->evtchn = evtchn; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn); if (irq < 0) return irq; info->irq = irq; @@ -539,7 +563,7 @@ static int __init xen_hvc_init(void) return r; info = vtermno_to_xencons(HVC_COOKIE); - info->irq = bind_evtchn_to_irq(info->evtchn); + info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn); } if (info->irq < 0) info->irq = 0; /* NO_IRQ */ -- Gitee From 07bd91b439fb8e676255fd287f1b3c5cd96d5d08 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Dec 2021 18:59:27 +0100 Subject: [PATCH 14/37] USB: gadget: detect too-big endpoint 0 requests stable inclusion from stable-v4.19.221 commit 13e45e7a262dd96e8161823314679543048709b9 category: bugfix issue:#I728N1 CVE: CVE-2021-39685 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 153a2d7e3350cc89d406ba2d35be8793a64c2038 upstream. Sometimes USB hosts can ask for buffers that are too large from endpoint 0, which should not be allowed. If this happens for OUT requests, stall the endpoint, but for IN requests, trim the request size to the endpoint buffer size. Co-developed-by: Szymon Heidrich Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- drivers/usb/gadget/composite.c | 12 ++++++++++++ drivers/usb/gadget/legacy/dbgp.c | 13 +++++++++++++ drivers/usb/gadget/legacy/inode.c | 16 +++++++++++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 139b66565ed6..5a531c8826bb 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1624,6 +1624,18 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) struct usb_function *f = NULL; u8 endp; + if (w_length > USB_COMP_EP0_BUFSIZ) { + if (ctrl->bRequestType == USB_DIR_OUT) { + goto done; + } else { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ); + w_length = USB_COMP_EP0_BUFSIZ; + } + } + /* partial re-init of the response message; the function or the * gadget might need to intercept e.g. a control-OUT completion * when we delegate to it. diff --git a/drivers/usb/gadget/legacy/dbgp.c b/drivers/usb/gadget/legacy/dbgp.c index e1d566c9918a..e567afcb2794 100644 --- a/drivers/usb/gadget/legacy/dbgp.c +++ b/drivers/usb/gadget/legacy/dbgp.c @@ -345,6 +345,19 @@ static int dbgp_setup(struct usb_gadget *gadget, void *data = NULL; u16 len = 0; + if (length > DBGP_REQ_LEN) { + if (ctrl->bRequestType == USB_DIR_OUT) { + return err; + } else { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(DBGP_REQ_LEN); + length = DBGP_REQ_LEN; + } + } + + if (request == USB_REQ_GET_DESCRIPTOR) { switch (value>>8) { case USB_DT_DEVICE: diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index f2818195ccf3..9d32fa5d66b4 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -109,6 +109,8 @@ enum ep0_state { /* enough for the whole queue: most events invalidate others */ #define N_EVENT 5 +#define RBUF_SIZE 256 + struct dev_data { spinlock_t lock; refcount_t count; @@ -143,7 +145,7 @@ struct dev_data { struct dentry *dentry; /* except this scratch i/o buffer for ep0 */ - u8 rbuf [256]; + u8 rbuf[RBUF_SIZE]; }; static inline void get_dev (struct dev_data *data) @@ -1333,6 +1335,18 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u16 w_value = le16_to_cpu(ctrl->wValue); u16 w_length = le16_to_cpu(ctrl->wLength); + if (w_length > RBUF_SIZE) { + if (ctrl->bRequestType == USB_DIR_OUT) { + return value; + } else { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(RBUF_SIZE); + w_length = RBUF_SIZE; + } + } + spin_lock (&dev->lock); dev->setup_abort = 0; if (dev->state == STATE_DEV_UNCONNECTED) { -- Gitee From 6044046d57e2517b4f73528809c8975965cdc148 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 9 Dec 2021 19:02:15 +0100 Subject: [PATCH 15/37] USB: gadget: zero allocate endpoint 0 buffers stable inclusion from stable-v4.19.221 commit 32de5efd483db68f12233fbf63743a2d92f20ae4 category: bugfix issue:#I728N1 CVE: CVE-2021-39685 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 86ebbc11bb3f60908a51f3e41a17e3f477c2eaa3 upstream. Under some conditions, USB gadget devices can show allocated buffer contents to a host. Fix this up by zero-allocating them so that any extra data will all just be zeros. Reported-by: Szymon Heidrich Tested-by: Szymon Heidrich Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- drivers/usb/gadget/composite.c | 2 +- drivers/usb/gadget/legacy/dbgp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 5a531c8826bb..e64756b9b3cc 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2152,7 +2152,7 @@ int composite_dev_prepare(struct usb_composite_driver *composite, if (!cdev->req) return -ENOMEM; - cdev->req->buf = kmalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL); + cdev->req->buf = kzalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL); if (!cdev->req->buf) goto fail; diff --git a/drivers/usb/gadget/legacy/dbgp.c b/drivers/usb/gadget/legacy/dbgp.c index e567afcb2794..355bc7dab9d5 100644 --- a/drivers/usb/gadget/legacy/dbgp.c +++ b/drivers/usb/gadget/legacy/dbgp.c @@ -137,7 +137,7 @@ static int dbgp_enable_ep_req(struct usb_ep *ep) goto fail_1; } - req->buf = kmalloc(DBGP_REQ_LEN, GFP_KERNEL); + req->buf = kzalloc(DBGP_REQ_LEN, GFP_KERNEL); if (!req->buf) { err = -ENOMEM; stp = 2; -- Gitee From 2cce8955e14ed51d2c492996cef0dac843de81e7 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Dec 2021 15:53:08 -0800 Subject: [PATCH 16/37] wait: add wake_up_pollfree() stable inclusion from stable-v4.19.221 commit 8dd7c46a59756bdc29cb9783338b899cd3fb4b83 category: bugfix issue:#I728N9 CVE: CVE-2021-39698 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 42288cb44c4b5fff7653bc392b583a2b8bd6a8c0 upstream. Several ->poll() implementations are special in that they use a waitqueue whose lifetime is the current task, rather than the struct file as is normally the case. This is okay for blocking polls, since a blocking poll occurs within one task; however, non-blocking polls require another solution. This solution is for the queue to be cleared before it is freed, using 'wake_up_poll(wq, EPOLLHUP | POLLFREE);'. However, that has a bug: wake_up_poll() calls __wake_up() with nr_exclusive=1. Therefore, if there are multiple "exclusive" waiters, and the wakeup function for the first one returns a positive value, only that one will be called. That's *not* what's needed for POLLFREE; POLLFREE is special in that it really needs to wake up everyone. Considering the three non-blocking poll systems: - io_uring poll doesn't handle POLLFREE at all, so it is broken anyway. - aio poll is unaffected, since it doesn't support exclusive waits. However, that's fragile, as someone could add this feature later. - epoll doesn't appear to be broken by this, since its wakeup function returns 0 when it sees POLLFREE. But this is fragile. Although there is a workaround (see epoll), it's better to define a function which always sends POLLFREE to all waiters. Add such a function. Also make it verify that the queue really becomes empty after all waiters have been woken up. Reported-by: Linus Torvalds Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211209010455.42744-2-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- include/linux/wait.h | 26 ++++++++++++++++++++++++++ kernel/sched/wait.c | 7 +++++++ 2 files changed, 33 insertions(+) diff --git a/include/linux/wait.h b/include/linux/wait.h index ed7c122cb31f..1d726d79063c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -191,6 +191,7 @@ void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); +void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -217,6 +218,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr); #define wake_up_interruptible_sync_poll(x, m) \ __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, poll_to_key(m)) +/** + * wake_up_pollfree - signal that a polled waitqueue is going away + * @wq_head: the wait queue head + * + * In the very rare cases where a ->poll() implementation uses a waitqueue whose + * lifetime is tied to a task rather than to the 'struct file' being polled, + * this function must be called before the waitqueue is freed so that + * non-blocking polls (e.g. epoll) are notified that the queue is going away. + * + * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via + * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU. + */ +static inline void wake_up_pollfree(struct wait_queue_head *wq_head) +{ + /* + * For performance reasons, we don't always take the queue lock here. + * Therefore, we might race with someone removing the last entry from + * the queue, and proceed while they still hold the queue lock. + * However, rcu_read_lock() is required to be held in such cases, so we + * can safely proceed with an RCU-delayed free. + */ + if (waitqueue_active(wq_head)) + __wake_up_pollfree(wq_head); +} + #define ___wait_cond_timeout(condition) \ ({ \ bool __cond = (condition); \ diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 5dd47f1103d1..a44a57fb9540 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -209,6 +209,13 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr_e } EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ +void __wake_up_pollfree(struct wait_queue_head *wq_head) +{ + __wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE)); + /* POLLFREE must have cleared the queue. */ + WARN_ON_ONCE(waitqueue_active(wq_head)); +} + /* * Note: we use "set_current_state()" _after_ the wait-queue add, * because we need a memory barrier there on SMP, so that any -- Gitee From 5d2da566fff9202c6e0d50d7bf2a7fbe081da354 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Dec 2021 15:53:09 -0800 Subject: [PATCH 17/37] binder: use wake_up_pollfree() stable inclusion from stable-v4.19.221 commit 32288f504035b6c359cc33ee615f74f14be2e38a category: bugfix issue:#I728N9 CVE: CVE-2021-39698 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit a880b28a71e39013e357fd3adccd1d8a31bc69a8 upstream. wake_up_poll() uses nr_exclusive=1, so it's not guaranteed to wake up all exclusive waiters. Yet, POLLFREE *must* wake up all waiters. epoll and aio poll are fortunately not affected by this, but it's very fragile. Thus, the new function wake_up_pollfree() has been introduced. Convert binder to use wake_up_pollfree(). Reported-by: Linus Torvalds Fixes: f5cb779ba163 ("ANDROID: binder: remove waitqueue when thread exits.") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211209010455.42744-3-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- drivers/android/binder.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index dc559ccd684d..8b949ffccd81 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -4437,23 +4437,20 @@ static int binder_thread_release(struct binder_proc *proc, } /* - * If this thread used poll, make sure we remove the waitqueue - * from any epoll data structures holding it with POLLFREE. - * waitqueue_active() is safe to use here because we're holding - * the inner lock. + * If this thread used poll, make sure we remove the waitqueue from any + * poll data structures holding it. */ - if ((thread->looper & BINDER_LOOPER_STATE_POLL) && - waitqueue_active(&thread->wait)) { - wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE); - } + if (thread->looper & BINDER_LOOPER_STATE_POLL) + wake_up_pollfree(&thread->wait); binder_inner_proc_unlock(thread->proc); /* - * This is needed to avoid races between wake_up_poll() above and - * and ep_remove_waitqueue() called for other reasons (eg the epoll file - * descriptor being closed); ep_remove_waitqueue() holds an RCU read - * lock, so we can be sure it's done after calling synchronize_rcu(). + * This is needed to avoid races between wake_up_pollfree() above and + * someone else removing the last entry from the queue for other reasons + * (e.g. ep_remove_wait_queue() being called due to an epoll file + * descriptor being closed). Such other users hold an RCU read lock, so + * we can be sure they're done after we call synchronize_rcu(). */ if (thread->looper & BINDER_LOOPER_STATE_POLL) synchronize_rcu(); -- Gitee From ad842134ed95f99573e9f7a12e02392752fa79e3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Dec 2021 15:53:10 -0800 Subject: [PATCH 18/37] signalfd: use wake_up_pollfree() stable inclusion from stable-v4.19.221 commit f226fdd855b7d9c1f2a6e878d82eb3e1fbc880e9 category: bugfix issue:#I728N9 CVE: CVE-2021-39698 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 9537bae0da1f8d1e2361ab6d0479e8af7824e160 upstream. wake_up_poll() uses nr_exclusive=1, so it's not guaranteed to wake up all exclusive waiters. Yet, POLLFREE *must* wake up all waiters. epoll and aio poll are fortunately not affected by this, but it's very fragile. Thus, the new function wake_up_pollfree() has been introduced. Convert signalfd to use wake_up_pollfree(). Reported-by: Linus Torvalds Fixes: d80e731ecab4 ("epoll: introduce POLLFREE to flush ->signalfd_wqh before kfree()") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20211209010455.42744-4-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- fs/signalfd.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 3c40a3bf772c..94e0ae01db5c 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -35,17 +35,7 @@ void signalfd_cleanup(struct sighand_struct *sighand) { - wait_queue_head_t *wqh = &sighand->signalfd_wqh; - /* - * The lockless check can race with remove_wait_queue() in progress, - * but in this case its caller should run under rcu_read_lock() and - * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. - */ - if (likely(!waitqueue_active(wqh))) - return; - - /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */ - wake_up_poll(wqh, EPOLLHUP | POLLFREE); + wake_up_pollfree(&sighand->signalfd_wqh); } struct signalfd_ctx { -- Gitee From bc42a2da1911bafdb19a53e1c0e0c2fcded8b1bb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Dec 2021 15:53:11 -0800 Subject: [PATCH 19/37] aio: keep poll requests on waitqueue until completed stable inclusion from stable-v4.19.221 commit 580c7e023303ce3a187adcaa40868bfc740725d2 category: bugfix issue:#I728N9 CVE: CVE-2021-39698 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 363bee27e25804d8981dd1c025b4ad49dc39c530 upstream. Currently, aio_poll_wake() will always remove the poll request from the waitqueue. Then, if aio_poll_complete_work() sees that none of the polled events are ready and the request isn't cancelled, it re-adds the request to the waitqueue. (This can easily happen when polling a file that doesn't pass an event mask when waking up its waitqueue.) This is fundamentally broken for two reasons: 1. If a wakeup occurs between vfs_poll() and the request being re-added to the waitqueue, it will be missed because the request wasn't on the waitqueue at the time. Therefore, IOCB_CMD_POLL might never complete even if the polled file is ready. 2. When the request isn't on the waitqueue, there is no way to be notified that the waitqueue is being freed (which happens when its lifetime is shorter than the struct file's). This is supposed to happen via the waitqueue entries being woken up with POLLFREE. Therefore, leave the requests on the waitqueue until they are actually completed (or cancelled). To keep track of when aio_poll_complete_work needs to be scheduled, use new fields in struct poll_iocb. Remove the 'done' field which is now redundant. Note that this is consistent with how sys_poll() and eventpoll work; their wakeup functions do *not* remove the waitqueue entries. Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL") Cc: # v4.18+ Link: https://lore.kernel.org/r/20211209010455.42744-5-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- fs/aio.c | 83 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 413ec289bfa1..7c623ca02077 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -176,8 +176,9 @@ struct poll_iocb { struct file *file; struct wait_queue_head *head; __poll_t events; - bool done; bool cancelled; + bool work_scheduled; + bool work_need_resched; struct wait_queue_entry wait; struct work_struct work; }; @@ -1635,14 +1636,26 @@ static void aio_poll_complete_work(struct work_struct *work) * avoid further branches in the fast path. */ spin_lock_irq(&ctx->ctx_lock); + spin_lock(&req->head->lock); if (!mask && !READ_ONCE(req->cancelled)) { - add_wait_queue(req->head, &req->wait); + /* + * The request isn't actually ready to be completed yet. + * Reschedule completion if another wakeup came in. + */ + if (req->work_need_resched) { + schedule_work(&req->work); + req->work_need_resched = false; + } else { + req->work_scheduled = false; + } + spin_unlock(&req->head->lock); spin_unlock_irq(&ctx->ctx_lock); return; } + list_del_init(&req->wait.entry); + spin_unlock(&req->head->lock); list_del_init(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); - req->done = true; spin_unlock_irq(&ctx->ctx_lock); iocb_put(iocb); @@ -1656,9 +1669,9 @@ static int aio_poll_cancel(struct kiocb *iocb) spin_lock(&req->head->lock); WRITE_ONCE(req->cancelled, true); - if (!list_empty(&req->wait.entry)) { - list_del_init(&req->wait.entry); + if (!req->work_scheduled) { schedule_work(&aiocb->poll.work); + req->work_scheduled = true; } spin_unlock(&req->head->lock); @@ -1677,20 +1690,26 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (mask && !(mask & req->events)) return 0; - list_del_init(&req->wait.entry); - - if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { + /* + * Complete the request inline if possible. This requires that three + * conditions be met: + * 1. An event mask must have been passed. If a plain wakeup was done + * instead, then mask == 0 and we have to call vfs_poll() to get + * the events, so inline completion isn't possible. + * 2. The completion work must not have already been scheduled. + * 3. ctx_lock must not be busy. We have to use trylock because we + * already hold the waitqueue lock, so this inverts the normal + * locking order. Use irqsave/irqrestore because not all + * filesystems (e.g. fuse) call this function with IRQs disabled, + * yet IRQs have to be disabled before ctx_lock is obtained. + */ + if (mask && !req->work_scheduled && + spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { struct kioctx *ctx = iocb->ki_ctx; - /* - * Try to complete the iocb inline if we can. Use - * irqsave/irqrestore because not all filesystems (e.g. fuse) - * call this function with IRQs disabled and because IRQs - * have to be disabled before ctx_lock is obtained. - */ + list_del_init(&req->wait.entry); list_del(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); - req->done = true; if (iocb->ki_eventfd && eventfd_signal_count()) { iocb = NULL; INIT_WORK(&req->work, aio_poll_put_work); @@ -1700,7 +1719,20 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (iocb) iocb_put(iocb); } else { - schedule_work(&req->work); + /* + * Schedule the completion work if needed. If it was already + * scheduled, record that another wakeup came in. + * + * Don't remove the request from the waitqueue here, as it might + * not actually be complete yet (we won't know until vfs_poll() + * is called), and we must not miss any wakeups. + */ + if (req->work_scheduled) { + req->work_need_resched = true; + } else { + schedule_work(&req->work); + req->work_scheduled = true; + } } return 1; } @@ -1747,8 +1779,9 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; req->head = NULL; - req->done = false; req->cancelled = false; + req->work_scheduled = false; + req->work_need_resched = false; apt.pt._qproc = aio_poll_queue_proc; apt.pt._key = req->events; @@ -1763,17 +1796,27 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) spin_lock_irq(&ctx->ctx_lock); if (likely(req->head)) { spin_lock(&req->head->lock); - if (unlikely(list_empty(&req->wait.entry))) { - if (apt.error) + if (list_empty(&req->wait.entry) || req->work_scheduled) { + /* + * aio_poll_wake() already either scheduled the async + * completion work, or completed the request inline. + */ + if (apt.error) /* unsupported case: multiple queues */ cancel = true; apt.error = 0; mask = 0; } if (mask || apt.error) { + /* Steal to complete synchronously. */ list_del_init(&req->wait.entry); } else if (cancel) { + /* Cancel if possible (may be too late though). */ WRITE_ONCE(req->cancelled, true); - } else if (!req->done) { /* actually waiting for an event */ + } else if (!list_empty(&req->wait.entry)) { + /* + * Actually waiting for an event, so add the request to + * active_reqs so that it can be cancelled if needed. + */ list_add_tail(&aiocb->ki_list, &ctx->active_reqs); aiocb->ki_cancel = aio_poll_cancel; } -- Gitee From 69fa68581a691893d0505fab7178f256af9f7ab4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 10 Dec 2021 15:53:12 -0800 Subject: [PATCH 20/37] aio: fix use-after-free due to missing POLLFREE handling stable inclusion from stable-v4.19.221 commit 321fba81ec034f88aea4898993c1bf15605c023f category: bugfix issue:#I728N9 CVE: CVE-2021-39698 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 50252e4b5e989ce64555c7aef7516bdefc2fea72 upstream. signalfd_poll() and binder_poll() are special in that they use a waitqueue whose lifetime is the current task, rather than the struct file as is normally the case. This is okay for blocking polls, since a blocking poll occurs within one task; however, non-blocking polls require another solution. This solution is for the queue to be cleared before it is freed, by sending a POLLFREE notification to all waiters. Unfortunately, only eventpoll handles POLLFREE. A second type of non-blocking poll, aio poll, was added in kernel v4.18, and it doesn't handle POLLFREE. This allows a use-after-free to occur if a signalfd or binder fd is polled with aio poll, and the waitqueue gets freed. Fix this by making aio poll handle POLLFREE. A patch by Ramji Jiyani (https://lore.kernel.org/r/20211027011834.2497484-1-ramjiyani@google.com) tried to do this by making aio_poll_wake() always complete the request inline if POLLFREE is seen. However, that solution had two bugs. First, it introduced a deadlock, as it unconditionally locked the aio context while holding the waitqueue lock, which inverts the normal locking order. Second, it didn't consider that POLLFREE notifications are missed while the request has been temporarily de-queued. The second problem was solved by my previous patch. This patch then properly fixes the use-after-free by handling POLLFREE in a deadlock-free way. It does this by taking advantage of the fact that freeing of the waitqueue is RCU-delayed, similar to what eventpoll does. Fixes: 2c14fa838cbe ("aio: implement IOCB_CMD_POLL") Cc: # v4.18+ Link: https://lore.kernel.org/r/20211209010455.42744-6-ebiggers@kernel.org Signed-off-by: Eric Biggers Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- fs/aio.c | 137 ++++++++++++++++++++++++-------- include/uapi/asm-generic/poll.h | 2 +- 2 files changed, 107 insertions(+), 32 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 7c623ca02077..9635c29b83da 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1617,6 +1617,51 @@ static void aio_poll_put_work(struct work_struct *work) iocb_put(iocb); } +/* + * Safely lock the waitqueue which the request is on, synchronizing with the + * case where the ->poll() provider decides to free its waitqueue early. + * + * Returns true on success, meaning that req->head->lock was locked, req->wait + * is on req->head, and an RCU read lock was taken. Returns false if the + * request was already removed from its waitqueue (which might no longer exist). + */ +static bool poll_iocb_lock_wq(struct poll_iocb *req) +{ + wait_queue_head_t *head; + + /* + * While we hold the waitqueue lock and the waitqueue is nonempty, + * wake_up_pollfree() will wait for us. However, taking the waitqueue + * lock in the first place can race with the waitqueue being freed. + * + * We solve this as eventpoll does: by taking advantage of the fact that + * all users of wake_up_pollfree() will RCU-delay the actual free. If + * we enter rcu_read_lock() and see that the pointer to the queue is + * non-NULL, we can then lock it without the memory being freed out from + * under us, then check whether the request is still on the queue. + * + * Keep holding rcu_read_lock() as long as we hold the queue lock, in + * case the caller deletes the entry from the queue, leaving it empty. + * In that case, only RCU prevents the queue memory from being freed. + */ + rcu_read_lock(); + head = smp_load_acquire(&req->head); + if (head) { + spin_lock(&head->lock); + if (!list_empty(&req->wait.entry)) + return true; + spin_unlock(&head->lock); + } + rcu_read_unlock(); + return false; +} + +static void poll_iocb_unlock_wq(struct poll_iocb *req) +{ + spin_unlock(&req->head->lock); + rcu_read_unlock(); +} + static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1636,24 +1681,25 @@ static void aio_poll_complete_work(struct work_struct *work) * avoid further branches in the fast path. */ spin_lock_irq(&ctx->ctx_lock); - spin_lock(&req->head->lock); - if (!mask && !READ_ONCE(req->cancelled)) { - /* - * The request isn't actually ready to be completed yet. - * Reschedule completion if another wakeup came in. - */ - if (req->work_need_resched) { - schedule_work(&req->work); - req->work_need_resched = false; - } else { - req->work_scheduled = false; + if (poll_iocb_lock_wq(req)) { + if (!mask && !READ_ONCE(req->cancelled)) { + /* + * The request isn't actually ready to be completed yet. + * Reschedule completion if another wakeup came in. + */ + if (req->work_need_resched) { + schedule_work(&req->work); + req->work_need_resched = false; + } else { + req->work_scheduled = false; + } + poll_iocb_unlock_wq(req); + spin_unlock_irq(&ctx->ctx_lock); + return; } - spin_unlock(&req->head->lock); - spin_unlock_irq(&ctx->ctx_lock); - return; - } - list_del_init(&req->wait.entry); - spin_unlock(&req->head->lock); + list_del_init(&req->wait.entry); + poll_iocb_unlock_wq(req); + } /* else, POLLFREE has freed the waitqueue, so we must complete */ list_del_init(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); spin_unlock_irq(&ctx->ctx_lock); @@ -1667,13 +1713,14 @@ static int aio_poll_cancel(struct kiocb *iocb) struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); struct poll_iocb *req = &aiocb->poll; - spin_lock(&req->head->lock); - WRITE_ONCE(req->cancelled, true); - if (!req->work_scheduled) { - schedule_work(&aiocb->poll.work); - req->work_scheduled = true; - } - spin_unlock(&req->head->lock); + if (poll_iocb_lock_wq(req)) { + WRITE_ONCE(req->cancelled, true); + if (!req->work_scheduled) { + schedule_work(&aiocb->poll.work); + req->work_scheduled = true; + } + poll_iocb_unlock_wq(req); + } /* else, the request was force-cancelled by POLLFREE already */ return 0; } @@ -1725,7 +1772,8 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, * * Don't remove the request from the waitqueue here, as it might * not actually be complete yet (we won't know until vfs_poll() - * is called), and we must not miss any wakeups. + * is called), and we must not miss any wakeups. POLLFREE is an + * exception to this; see below. */ if (req->work_scheduled) { req->work_need_resched = true; @@ -1733,6 +1781,28 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, schedule_work(&req->work); req->work_scheduled = true; } + + /* + * If the waitqueue is being freed early but we can't complete + * the request inline, we have to tear down the request as best + * we can. That means immediately removing the request from its + * waitqueue and preventing all further accesses to the + * waitqueue via the request. We also need to schedule the + * completion work (done above). Also mark the request as + * cancelled, to potentially skip an unneeded call to ->poll(). + */ + if (mask & POLLFREE) { + WRITE_ONCE(req->cancelled, true); + list_del_init(&req->wait.entry); + + /* + * Careful: this *must* be the last step, since as soon + * as req->head is NULL'ed out, the request can be + * completed and freed, since aio_poll_complete_work() + * will no longer need to take the waitqueue lock. + */ + smp_store_release(&req->head, NULL); + } } return 1; } @@ -1740,6 +1810,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, struct aio_poll_table { struct poll_table_struct pt; struct aio_kiocb *iocb; + bool queued; int error; }; @@ -1750,11 +1821,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); /* multiple wait queues per file are not supported */ - if (unlikely(pt->iocb->poll.head)) { + if (unlikely(pt->queued)) { pt->error = -EINVAL; return; } + pt->queued = true; pt->error = 0; pt->iocb->poll.head = head; add_wait_queue(head, &pt->iocb->poll.wait); @@ -1786,6 +1858,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) apt.pt._qproc = aio_poll_queue_proc; apt.pt._key = req->events; apt.iocb = aiocb; + apt.queued = false; apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ /* initialized the list so that we can do list_empty checks */ @@ -1794,9 +1867,10 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) mask = vfs_poll(req->file, &apt.pt) & req->events; spin_lock_irq(&ctx->ctx_lock); - if (likely(req->head)) { - spin_lock(&req->head->lock); - if (list_empty(&req->wait.entry) || req->work_scheduled) { + if (likely(apt.queued)) { + bool on_queue = poll_iocb_lock_wq(req); + + if (!on_queue || req->work_scheduled) { /* * aio_poll_wake() already either scheduled the async * completion work, or completed the request inline. @@ -1812,7 +1886,7 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) } else if (cancel) { /* Cancel if possible (may be too late though). */ WRITE_ONCE(req->cancelled, true); - } else if (!list_empty(&req->wait.entry)) { + } else if (on_queue) { /* * Actually waiting for an event, so add the request to * active_reqs so that it can be cancelled if needed. @@ -1820,7 +1894,8 @@ static ssize_t aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) list_add_tail(&aiocb->ki_list, &ctx->active_reqs); aiocb->ki_cancel = aio_poll_cancel; } - spin_unlock(&req->head->lock); + if (on_queue) + poll_iocb_unlock_wq(req); } if (mask) { /* no async, we'd stolen it */ aiocb->ki_res.res = mangle_poll(mask); diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h index 41b509f410bf..f9c520ce4bf4 100644 --- a/include/uapi/asm-generic/poll.h +++ b/include/uapi/asm-generic/poll.h @@ -29,7 +29,7 @@ #define POLLRDHUP 0x2000 #endif -#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ +#define POLLFREE (__force __poll_t)0x4000 #define POLL_BUSY_LOOP (__force __poll_t)0x8000 -- Gitee From 602d9244516b99a4b3e9f4052c0f7ce2b1cb715a Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 10 Dec 2021 10:47:29 +0000 Subject: [PATCH 21/37] net: sched: use Qdisc rcu API instead of relying on rtnl lock from stable-v4.19.221 commit ae214e04b95ff64a4b0e9aab6742520bfde6ff0c category: bugfix issue:#I728NE CVE: CVE-2021-39713 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit e368fdb61d8e7c67ac70791b23345b26d7bbc661 ] As a preparation from removing rtnl lock dependency from rules update path, use Qdisc rcu and reference counting capabilities instead of relying on rtnl lock while working with Qdiscs. Create new tcf_block_release() function, and use it to free resources taken by tcf_block_find(). Currently, this function only releases Qdisc and it is extended in next patches in this series. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller [Lee: Sent to Stable] Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a568 Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- net/sched/cls_api.c | 79 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 15 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 184c20b86393..79113b7b1787 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -539,6 +539,7 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, struct netlink_ext_ack *extack) { struct tcf_block *block; + int err = 0; if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) { block = tcf_block_lookup(net, block_index); @@ -550,55 +551,93 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q, const struct Qdisc_class_ops *cops; struct net_device *dev; + rcu_read_lock(); + /* Find link */ - dev = __dev_get_by_index(net, ifindex); - if (!dev) + dev = dev_get_by_index_rcu(net, ifindex); + if (!dev) { + rcu_read_unlock(); return ERR_PTR(-ENODEV); + } /* Find qdisc */ if (!*parent) { *q = dev->qdisc; *parent = (*q)->handle; } else { - *q = qdisc_lookup(dev, TC_H_MAJ(*parent)); + *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); if (!*q) { NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto errout_rcu; } } + *q = qdisc_refcount_inc_nz(*q); + if (!*q) { + NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists"); + err = -EINVAL; + goto errout_rcu; + } + /* Is it classful? */ cops = (*q)->ops->cl_ops; if (!cops) { NL_SET_ERR_MSG(extack, "Qdisc not classful"); - return ERR_PTR(-EINVAL); + err = -EINVAL; + goto errout_rcu; } if (!cops->tcf_block) { NL_SET_ERR_MSG(extack, "Class doesn't support blocks"); - return ERR_PTR(-EOPNOTSUPP); + err = -EOPNOTSUPP; + goto errout_rcu; } + /* At this point we know that qdisc is not noop_qdisc, + * which means that qdisc holds a reference to net_device + * and we hold a reference to qdisc, so it is safe to release + * rcu read lock. + */ + rcu_read_unlock(); + /* Do we search for filter, attached to class? */ if (TC_H_MIN(*parent)) { *cl = cops->find(*q, *parent); if (*cl == 0) { NL_SET_ERR_MSG(extack, "Specified class doesn't exist"); - return ERR_PTR(-ENOENT); + err = -ENOENT; + goto errout_qdisc; } } /* And the last stroke */ block = cops->tcf_block(*q, *cl, extack); - if (!block) - return ERR_PTR(-EINVAL); + if (!block) { + err = -EINVAL; + goto errout_qdisc; + } if (tcf_block_shared(block)) { NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters"); - return ERR_PTR(-EOPNOTSUPP); + err = -EOPNOTSUPP; + goto errout_qdisc; } } return block; + +errout_rcu: + rcu_read_unlock(); +errout_qdisc: + if (*q) + qdisc_put(*q); + return ERR_PTR(err); +} + +static void tcf_block_release(struct Qdisc *q, struct tcf_block *block) +{ + if (q) + qdisc_put(q); } struct tcf_block_owner_item { @@ -1336,6 +1375,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, errout: if (chain) tcf_chain_put(chain); + tcf_block_release(q, block); if (err == -EAGAIN) /* Replay the request. */ goto replay; @@ -1457,6 +1497,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, errout: if (chain) tcf_chain_put(chain); + tcf_block_release(q, block); return err; } @@ -1542,6 +1583,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, errout: if (chain) tcf_chain_put(chain); + tcf_block_release(q, block); return err; } @@ -1858,7 +1900,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; if (chain_index > TC_ACT_EXT_VAL_MASK) { NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit"); - return -EINVAL; + err = -EINVAL; + goto errout_block; } chain = tcf_chain_lookup(block, chain_index); if (n->nlmsg_type == RTM_NEWCHAIN) { @@ -1870,23 +1913,27 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, tcf_chain_hold(chain); } else { NL_SET_ERR_MSG(extack, "Filter chain already exists"); - return -EEXIST; + err = -EEXIST; + goto errout_block; } } else { if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain"); - return -ENOENT; + err = -ENOENT; + goto errout_block; } chain = tcf_chain_create(block, chain_index); if (!chain) { NL_SET_ERR_MSG(extack, "Failed to create filter chain"); - return -ENOMEM; + err = -ENOMEM; + goto errout_block; } } } else { if (!chain || tcf_chain_held_by_acts_only(chain)) { NL_SET_ERR_MSG(extack, "Cannot find specified filter chain"); - return -EINVAL; + err = -EINVAL; + goto errout_block; } tcf_chain_hold(chain); } @@ -1930,6 +1977,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, errout: tcf_chain_put(chain); +errout_block: + tcf_block_release(q, block); if (err == -EAGAIN) /* Replay the request. */ goto replay; -- Gitee From 61f57046fa6022fc7e2f0976b6ca4b0261555151 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 10 Dec 2021 10:47:28 +0000 Subject: [PATCH 22/37] net: sched: add helper function to take reference to Qdisc stable inclusion from stable-v4.19.221 commit da1d324088c40fa0a382224c466175fc5c704106 category: bugfix issue:#I728NE CVE: CVE-2021-39713 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit 9d7e82cec35c027756ec97e274f878251f271181 ] Implement function to take reference to Qdisc that relies on rcu read lock instead of rtnl mutex. Function only takes reference to Qdisc if reference counter isn't zero. Intended to be used by unlocked cls API. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller [Lee: Sent to Stable] Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a568 Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- include/net/sch_generic.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d737a6a2600b..34e4dba0f7b4 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -117,6 +117,19 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc) refcount_inc(&qdisc->refcnt); } +/* Intended to be used by unlocked users, when concurrent qdisc release is + * possible. + */ + +static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN) + return qdisc; + if (refcount_inc_not_zero(&qdisc->refcnt)) + return qdisc; + return NULL; +} + static inline bool qdisc_is_running(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) -- Gitee From d0e21c8a4df84749a6d78160f0f8d3f0cc61129a Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 10 Dec 2021 10:47:26 +0000 Subject: [PATCH 23/37] net: sched: rename qdisc_destroy() to qdisc_put() stable inclusion from stable-v4.19.221 commit 92833e8b5db6c209e9311ac8c6a44d3bf1856659 category: bugfix issue:#I728NE CVE: CVE-2021-39713 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit 86bd446b5cebd783187ea3772ff258210de77d99 ] Current implementation of qdisc_destroy() decrements Qdisc reference counter and only actually destroy Qdisc if reference counter value reached zero. Rename qdisc_destroy() to qdisc_put() in order for it to better describe the way in which this function currently implemented and used. Extract code that deallocates Qdisc into new private qdisc_destroy() function. It is intended to be shared between regular qdisc_put() and its unlocked version that is introduced in next patch in this series. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller [Lee: Sent to Stable] Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a568 Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- include/net/sch_generic.h | 2 +- net/sched/sch_api.c | 6 +++--- net/sched/sch_atm.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_cbs.c | 2 +- net/sched/sch_drr.c | 4 ++-- net/sched/sch_dsmark.c | 2 +- net/sched/sch_fifo.c | 2 +- net/sched/sch_generic.c | 23 ++++++++++++++--------- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 4 ++-- net/sched/sch_mq.c | 4 ++-- net/sched/sch_mqprio.c | 4 ++-- net/sched/sch_multiq.c | 6 +++--- net/sched/sch_netem.c | 2 +- net/sched/sch_prio.c | 6 +++--- net/sched/sch_qfq.c | 4 ++-- net/sched/sch_red.c | 4 ++-- net/sched/sch_sfb.c | 4 ++-- net/sched/sch_tbf.c | 4 ++-- 20 files changed, 47 insertions(+), 42 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 34e4dba0f7b4..de82cb7622b5 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -568,7 +568,7 @@ void dev_deactivate_many(struct list_head *head); struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); -void qdisc_destroy(struct Qdisc *qdisc); +void qdisc_put(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 0b1dcbcfd9ba..69c8ee58b162 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -921,7 +921,7 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb, qdisc_notify(net, skb, n, clid, old, new); if (old) - qdisc_destroy(old); + qdisc_put(old); } /* Graft qdisc "new" to class "classid" of qdisc "parent" or @@ -974,7 +974,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, qdisc_refcount_inc(new); if (!ingress) - qdisc_destroy(old); + qdisc_put(old); } skip: @@ -1586,7 +1586,7 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); if (err) { if (q) - qdisc_destroy(q); + qdisc_put(q); return err; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index a3084a12239e..ff825f40ea04 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -150,7 +150,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl) pr_debug("atm_tc_put: destroying\n"); list_del_init(&flow->list); pr_debug("atm_tc_put: qdisc %p\n", flow->q); - qdisc_destroy(flow->q); + qdisc_put(flow->q); tcf_block_put(flow->block); if (flow->sock) { pr_debug("atm_tc_put: f_count %ld\n", diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index ebc3c8c7e666..5e5bccfd7af4 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1439,7 +1439,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) WARN_ON(cl->filters); tcf_block_put(cl->block); - qdisc_destroy(cl->q); + qdisc_put(cl->q); qdisc_put_rtab(cl->R_tab); gen_kill_estimator(&cl->rate_est); if (cl != &q->link) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index d5e22452d597..f95dc899e989 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -452,7 +452,7 @@ static void cbs_destroy(struct Qdisc *sch) cbs_disable_offload(dev, q); if (q->qdisc) - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); } static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index e0b0cf8a9939..cdebaed0f8cf 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -134,7 +134,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to replace estimator"); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); return err; } @@ -153,7 +153,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl) { gen_kill_estimator(&cl->rate_est); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 1c2fce8c45b2..e0978494aa6e 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -414,7 +414,7 @@ static void dsmark_destroy(struct Qdisc *sch) pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p); tcf_block_put(p->block); - qdisc_destroy(p->q); + qdisc_put(p->q); if (p->mv != p->embedded) kfree(p->mv); } diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 24893d3b5d22..3809c9bf8896 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -177,7 +177,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, if (q) { err = fifo_set_limit(q, limit); if (err < 0) { - qdisc_destroy(q); + qdisc_put(q); q = NULL; } } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index bd96fd261dba..322a8126fa04 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -918,7 +918,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, if (!ops->init || ops->init(sch, NULL, extack) == 0) return sch; - qdisc_destroy(sch); + qdisc_put(sch); return NULL; } EXPORT_SYMBOL(qdisc_create_dflt); @@ -958,7 +958,7 @@ void qdisc_free(struct Qdisc *qdisc) kfree((char *) qdisc - qdisc->padded); } -void qdisc_destroy(struct Qdisc *qdisc) +static void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops; struct sk_buff *skb, *tmp; @@ -967,10 +967,6 @@ void qdisc_destroy(struct Qdisc *qdisc) return; ops = qdisc->ops; - if (qdisc->flags & TCQ_F_BUILTIN || - !refcount_dec_and_test(&qdisc->refcnt)) - return; - #ifdef CONFIG_NET_SCHED qdisc_hash_del(qdisc); @@ -997,7 +993,16 @@ void qdisc_destroy(struct Qdisc *qdisc) qdisc_free(qdisc); } -EXPORT_SYMBOL(qdisc_destroy); + +void qdisc_put(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN || + !refcount_dec_and_test(&qdisc->refcnt)) + return; + + qdisc_destroy(qdisc); +} +EXPORT_SYMBOL(qdisc_put); /* Attach toplevel qdisc to device queue. */ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, @@ -1306,7 +1311,7 @@ static void shutdown_scheduler_queue(struct net_device *dev, rcu_assign_pointer(dev_queue->qdisc, qdisc_default); dev_queue->qdisc_sleeping = qdisc_default; - qdisc_destroy(qdisc); + qdisc_put(qdisc); } } @@ -1315,7 +1320,7 @@ void dev_shutdown(struct net_device *dev) netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); if (dev_ingress_queue(dev)) shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); - qdisc_destroy(dev->qdisc); + qdisc_put(dev->qdisc); dev->qdisc = &noop_qdisc; WARN_ON(timer_pending(&dev->watchdog_timer)); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3278a76f6861..b18ec1f6de60 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1092,7 +1092,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) struct hfsc_sched *q = qdisc_priv(sch); tcf_block_put(cl->block); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); gen_kill_estimator(&cl->rate_est); if (cl != &q->root) kfree(cl); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 43c4bfe625a9..862a33b9e2e0 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1224,7 +1224,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) { if (!cl->level) { WARN_ON(!cl->un.leaf.q); - qdisc_destroy(cl->un.leaf.q); + qdisc_put(cl->un.leaf.q); } gen_kill_estimator(&cl->rate_est); tcf_block_put(cl->block); @@ -1425,7 +1425,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* turn parent into inner node */ qdisc_reset(parent->un.leaf.q); qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog); - qdisc_destroy(parent->un.leaf.q); + qdisc_put(parent->un.leaf.q); if (parent->prio_activity) htb_deactivate(q, parent); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index c008a316e943..e12b5926b97d 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -65,7 +65,7 @@ static void mq_destroy(struct Qdisc *sch) if (!priv->qdiscs) return; for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) - qdisc_destroy(priv->qdiscs[ntx]); + qdisc_put(priv->qdiscs[ntx]); kfree(priv->qdiscs); } @@ -119,7 +119,7 @@ static void mq_attach(struct Qdisc *sch) qdisc = priv->qdiscs[ntx]; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); if (old) - qdisc_destroy(old); + qdisc_put(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) qdisc_hash_add(qdisc, false); diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 008db8d59ace..a8fc999080d5 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -40,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch) for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) - qdisc_destroy(priv->qdiscs[ntx]); + qdisc_put(priv->qdiscs[ntx]); kfree(priv->qdiscs); } @@ -300,7 +300,7 @@ static void mqprio_attach(struct Qdisc *sch) qdisc = priv->qdiscs[ntx]; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); if (old) - qdisc_destroy(old); + qdisc_put(old); if (ntx < dev->real_num_tx_queues) qdisc_hash_add(qdisc, false); } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 1df78e361ef9..1c2f9a3ab1ca 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -175,7 +175,7 @@ multiq_destroy(struct Qdisc *sch) tcf_block_put(q->block); for (band = 0; band < q->bands; band++) - qdisc_destroy(q->queues[band]); + qdisc_put(q->queues[band]); kfree(q->queues); } @@ -204,7 +204,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, q->queues[i] = &noop_qdisc; qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); + qdisc_put(child); } } @@ -228,7 +228,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, qdisc_tree_reduce_backlog(old, old->q.qlen, old->qstats.backlog); - qdisc_destroy(old); + qdisc_put(old); } sch_tree_unlock(sch); } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 02d8d3fd84a5..ad400f4f9a2d 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1054,7 +1054,7 @@ static void netem_destroy(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); if (q->qdisc) - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); dist_free(q->delay_dist); dist_free(q->slot_dist); } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 1cbbd8c31405..0e6f34bd9a68 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -175,7 +175,7 @@ prio_destroy(struct Qdisc *sch) tcf_block_put(q->block); prio_offload(sch, NULL); for (prio = 0; prio < q->bands; prio++) - qdisc_destroy(q->queues[prio]); + qdisc_put(q->queues[prio]); } static int prio_tune(struct Qdisc *sch, struct nlattr *opt, @@ -205,7 +205,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, extack); if (!queues[i]) { while (i > oldbands) - qdisc_destroy(queues[--i]); + qdisc_put(queues[--i]); return -ENOMEM; } } @@ -220,7 +220,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); + qdisc_put(child); } for (i = oldbands; i < q->bands; i++) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index bb1a9c11fc54..dc37c4ead439 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -526,7 +526,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, return 0; destroy_class: - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); return err; } @@ -537,7 +537,7 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) qfq_rm_from_agg(q, cl); gen_kill_estimator(&cl->rate_est); - qdisc_destroy(cl->qdisc); + qdisc_put(cl->qdisc); kfree(cl); } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 56c181c3feeb..3ce6c0a2c493 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -181,7 +181,7 @@ static void red_destroy(struct Qdisc *sch) del_timer_sync(&q->adapt_timer); red_offload(sch, false); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); } static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { @@ -233,7 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (child) { qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, q->qdisc->qstats.backlog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); q->qdisc = child; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 5e237f978075..38cf06515695 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -472,7 +472,7 @@ static void sfb_destroy(struct Qdisc *sch) struct sfb_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); } static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = { @@ -526,7 +526,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt, qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, q->qdisc->qstats.backlog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); q->qdisc = child; q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 6f74a426f159..dd29de1418b7 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -392,7 +392,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, if (child) { qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen, q->qdisc->qstats.backlog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); q->qdisc = child; } q->limit = qopt->limit; @@ -438,7 +438,7 @@ static void tbf_destroy(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); - qdisc_destroy(q->qdisc); + qdisc_put(q->qdisc); } static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb) -- Gitee From f64d220928345d65f7969eee1bb215fd985a42ea Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 10 Dec 2021 10:47:25 +0000 Subject: [PATCH 24/37] net: core: netlink: add helper refcount dec and lock function stable inclusion from stable-v4.19.221 commit cd25f1099284a0cbe916344fc1e6c1ffed6c5306 category: bugfix issue:#I728NE CVE: CVE-2021-39713 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit 6f99528e9797794b91b43321fbbc93fe772b0803 ] Rtnl lock is encapsulated in netlink and cannot be accessed by other modules directly. This means that reference counted objects that rely on rtnl lock cannot use it with refcounter helper function that atomically releases decrements reference and obtains mutex. This patch implements simple wrapper function around refcount_dec_and_lock that obtains rtnl lock if reference counter value reached 0. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller [Lee: Sent to Stable] Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a568 Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- include/linux/rtnetlink.h | 2 ++ net/core/rtnetlink.c | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5225832bd6ff..9cdd76348d9a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -6,6 +6,7 @@ #include #include #include +#include #include extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); @@ -34,6 +35,7 @@ extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); +extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; extern struct rw_semaphore pernet_ops_rwsem; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 935053ee7765..3e1163ece352 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -130,6 +130,12 @@ int rtnl_is_locked(void) } EXPORT_SYMBOL(rtnl_is_locked); +bool refcount_dec_and_rtnl_lock(refcount_t *r) +{ + return refcount_dec_and_mutex_lock(r, &rtnl_mutex); +} +EXPORT_SYMBOL(refcount_dec_and_rtnl_lock); + #ifdef CONFIG_PROVE_LOCKING bool lockdep_rtnl_is_held(void) { -- Gitee From 539dc53f1fa956ea30da4346627be3b23c880e76 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 10 Dec 2021 10:47:27 +0000 Subject: [PATCH 25/37] net: sched: extend Qdisc with rcu stable inclusion from stable-v4.19.221 commit f602ed9f8574512e7ea1ab65c3db7ba71053bf27 category: bugfix issue:#I728NE CVE: CVE-2021-39713 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit 3a7d0d07a386716b459b00783b11a8211cefcc0f ] Currently, Qdisc API functions assume that users have rtnl lock taken. To implement rtnl unlocked classifiers update interface, Qdisc API must be extended with functions that do not require rtnl lock. Extend Qdisc structure with rcu. Implement special version of put function qdisc_put_unlocked() that is called without rtnl lock taken. This function only takes rtnl lock if Qdisc reference counter reached zero and is intended to be used as optimization. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller [Lee: Sent to Stable] Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a568 Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- include/linux/rtnetlink.h | 5 +++++ include/net/pkt_sched.h | 1 + include/net/sch_generic.h | 2 ++ net/sched/sch_api.c | 18 ++++++++++++++++++ net/sched/sch_generic.c | 25 ++++++++++++++++++++++++- 5 files changed, 50 insertions(+), 1 deletion(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 9cdd76348d9a..bb9cb84114c1 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -85,6 +85,11 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) return rtnl_dereference(dev->ingress_queue); } +static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev) +{ + return rcu_dereference(dev->ingress_queue); +} + struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_INGRESS diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 5e99771a5dcc..670ee4fc0c34 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -102,6 +102,7 @@ int qdisc_set_default(const char *id); void qdisc_hash_add(struct Qdisc *q, bool invisible); void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); +struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab, struct netlink_ext_ack *extack); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index de82cb7622b5..e172e9ebe643 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -108,6 +108,7 @@ struct Qdisc { spinlock_t busylock ____cacheline_aligned_in_smp; spinlock_t seqlock; + struct rcu_head rcu; }; static inline void qdisc_refcount_inc(struct Qdisc *qdisc) @@ -569,6 +570,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); void qdisc_reset(struct Qdisc *qdisc); void qdisc_put(struct Qdisc *qdisc); +void qdisc_put_unlocked(struct Qdisc *qdisc); void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n, unsigned int len); struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 69c8ee58b162..e26eac610169 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -315,6 +315,24 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) return q; } +struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) +{ + struct netdev_queue *nq; + struct Qdisc *q; + + if (!handle) + return NULL; + q = qdisc_match_from_root(dev->qdisc, handle); + if (q) + goto out; + + nq = dev_ingress_queue_rcu(dev); + if (nq) + q = qdisc_match_from_root(nq->qdisc_sleeping, handle); +out: + return q; +} + static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) { unsigned long cl; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 322a8126fa04..67e79cdf4893 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -958,6 +958,13 @@ void qdisc_free(struct Qdisc *qdisc) kfree((char *) qdisc - qdisc->padded); } +void qdisc_free_cb(struct rcu_head *head) +{ + struct Qdisc *q = container_of(head, struct Qdisc, rcu); + + qdisc_free(q); +} + static void qdisc_destroy(struct Qdisc *qdisc) { const struct Qdisc_ops *ops; @@ -991,7 +998,7 @@ static void qdisc_destroy(struct Qdisc *qdisc) kfree_skb_list(skb); } - qdisc_free(qdisc); + call_rcu(&qdisc->rcu, qdisc_free_cb); } void qdisc_put(struct Qdisc *qdisc) @@ -1004,6 +1011,22 @@ void qdisc_put(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_put); +/* Version of qdisc_put() that is called with rtnl mutex unlocked. + * Intended to be used as optimization, this function only takes rtnl lock if + * qdisc reference counter reached zero. + */ + +void qdisc_put_unlocked(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN || + !refcount_dec_and_rtnl_lock(&qdisc->refcnt)) + return; + + qdisc_destroy(qdisc); + rtnl_unlock(); +} +EXPORT_SYMBOL(qdisc_put_unlocked); + /* Attach toplevel qdisc to device queue. */ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc) -- Gitee From 06261ee280c87221b1c0d3afd37b61686abc32e4 Mon Sep 17 00:00:00 2001 From: Zekun Shen Date: Sat, 13 Nov 2021 22:24:40 -0500 Subject: [PATCH 26/37] atlantic: Fix OOB read and write in hw_atl_utils_fw_rpc_wait stable inclusion from stable-v4.19.220 commit 0275fcd9b54f0364f66f2f3f6a0f3748648f3d35 category: bugfix issue:#I728NS CVE: CVE-2021-43975 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit b922f622592af76b57cbc566eaeccda0b31a3496 ] This bug report shows up when running our research tools. The reports is SOOB read, but it seems SOOB write is also possible a few lines below. In details, fw.len and sw.len are inputs coming from io. A len over the size of self->rpc triggers SOOB. The patch fixes the bugs by adding sanity checks. The bugs are triggerable with compromised/malfunctioning devices. They are potentially exploitable given they first leak up to 0xffff bytes and able to overwrite the region later. The patch is tested with QEMU emulater. This is NOT tested with a real device. Attached is the log we found by fuzzing. BUG: KASAN: slab-out-of-bounds in hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] Read of size 4 at addr ffff888016260b08 by task modprobe/213 CPU: 0 PID: 213 Comm: modprobe Not tainted 5.6.0 #1 Call Trace: dump_stack+0x76/0xa0 print_address_description.constprop.0+0x16/0x200 ? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] ? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] __kasan_report.cold+0x37/0x7c ? aq_hw_read_reg_bit+0x60/0x70 [atlantic] ? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] kasan_report+0xe/0x20 hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] hw_atl_utils_fw_rpc_call+0x95/0x130 [atlantic] hw_atl_utils_fw_rpc_wait+0x176/0x210 [atlantic] hw_atl_utils_mpi_create+0x229/0x2e0 [atlantic] ? hw_atl_utils_fw_rpc_wait+0x210/0x210 [atlantic] ? hw_atl_utils_initfw+0x9f/0x1c8 [atlantic] hw_atl_utils_initfw+0x12a/0x1c8 [atlantic] aq_nic_ndev_register+0x88/0x650 [atlantic] ? aq_nic_ndev_init+0x235/0x3c0 [atlantic] aq_pci_probe+0x731/0x9b0 [atlantic] ? aq_pci_func_init+0xc0/0xc0 [atlantic] local_pci_probe+0xd3/0x160 pci_device_probe+0x23f/0x3e0 Reported-by: Brendan Dolan-Gavitt Signed-off-by: Zekun Shen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing40281 --- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 096ec18e8f15..49c80bac9ce2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -459,6 +459,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, goto err_exit; if (fw.len == 0xFFFFU) { + if (sw.len > sizeof(self->rpc)) { + printk(KERN_INFO "Invalid sw len: %x\n", sw.len); + err = -EINVAL; + goto err_exit; + } err = hw_atl_utils_fw_rpc_call(self, sw.len); if (err < 0) goto err_exit; @@ -469,6 +474,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, if (rpc) { if (fw.len) { + if (fw.len > sizeof(self->rpc)) { + printk(KERN_INFO "Invalid fw len: %x\n", fw.len); + err = -EINVAL; + goto err_exit; + } err = hw_atl_utils_fw_downld_dwords(self, self->rpc_addr, -- Gitee From 6d2f7306ac07cb83acdfdeaee84d91eea3713f28 Mon Sep 17 00:00:00 2001 From: Zekun Shen Date: Sat, 30 Oct 2021 22:42:50 -0400 Subject: [PATCH 27/37] mwifiex: Fix skb_over_panic in mwifiex_usb_recv() stable inclusion from stable-v4.19.226 commit 2f4b037bf6e8c663a593b8149263c5b6940c7afd category: bugfix issue:#I728NW CVE: CVE-2021-43976 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit 04d80663f67ccef893061b49ec8a42ff7045ae84 ] Currently, with an unknown recv_type, mwifiex_usb_recv just return -1 without restoring the skb. Next time mwifiex_usb_rx_complete is invoked with the same skb, calling skb_put causes skb_over_panic. The bug is triggerable with a compromised/malfunctioning usb device. After applying the patch, skb_over_panic no longer shows up with the same input. Attached is the panic report from fuzzing. skbuff: skb_over_panic: text:000000003bf1b5fa len:2048 put:4 head:00000000dd6a115b data:000000000a9445d8 tail:0x844 end:0x840 dev: kernel BUG at net/core/skbuff.c:109! invalid opcode: 0000 [#1] SMP KASAN NOPTI CPU: 0 PID: 198 Comm: in:imklog Not tainted 5.6.0 #60 RIP: 0010:skb_panic+0x15f/0x161 Call Trace: ? mwifiex_usb_rx_complete+0x26b/0xfcd [mwifiex_usb] skb_put.cold+0x24/0x24 mwifiex_usb_rx_complete+0x26b/0xfcd [mwifiex_usb] __usb_hcd_giveback_urb+0x1e4/0x380 usb_giveback_urb_bh+0x241/0x4f0 ? __hrtimer_run_queues+0x316/0x740 ? __usb_hcd_giveback_urb+0x380/0x380 tasklet_action_common.isra.0+0x135/0x330 __do_softirq+0x18c/0x634 irq_exit+0x114/0x140 smp_apic_timer_interrupt+0xde/0x380 apic_timer_interrupt+0xf/0x20 Reported-by: Brendan Dolan-Gavitt Signed-off-by: Zekun Shen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/YX4CqjfRcTa6bVL+@Zekuns-MBP-16.fios-router.home Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing40281 --- drivers/net/wireless/marvell/mwifiex/usb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 2a8d40ce463d..e7ba3f353006 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -130,7 +130,8 @@ static int mwifiex_usb_recv(struct mwifiex_adapter *adapter, default: mwifiex_dbg(adapter, ERROR, "unknown recv_type %#x\n", recv_type); - return -1; + ret = -1; + goto exit_restore_skb; } break; case MWIFIEX_USB_EP_DATA: -- Gitee From 27820ff4823ef515658127443102568418215359 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Thu, 9 Dec 2021 16:28:39 +0800 Subject: [PATCH 28/37] phonet: refcount leak in pep_sock_accep mainline inclusion from mainline-v5.16-rc6 commit bcd0f93353326954817a4f9fa55ec57fb38acbb0 category: bugfix issue: #I728O0 CVE: CVE-2021-44733,CVE-2021-45095 Signed-off-by: wanxiaoqing --------------------------------------- sock_hold(sk) is invoked in pep_sock_accept(), but __sock_put(sk) is not invoked in subsequent failure branches(pep_accept_conn() != 0). Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20211209082839.33985-1-hbh25y@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: wanxiaoqing40281 --- net/phonet/pep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/phonet/pep.c b/net/phonet/pep.c index db3473540303..63d34e702df5 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -881,6 +881,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, err = pep_accept_conn(newsk, skb); if (err) { + __sock_put(sk); sock_put(newsk); newsk = NULL; goto drop; -- Gitee From 68dd9ea4787d42dcfd4b019f1aa6d60ac3747eee Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 12 Dec 2021 17:16:30 +0800 Subject: [PATCH 29/37] f2fs: fix to do sanity check on last xattr entry in __f2fs_setxattr() stable inclusion from stable-v4.19.223 commit f9dfa44be0fb5e8426183a70f69a246cf5827f49 category: bugfix issue:#I728O9 CVE: CVE-2021-45469 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 5598b24efaf4892741c798b425d543e4bed357a1 upstream. As Wenqing Liu reported in bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=215235 - Overview page fault in f2fs_setxattr() when mount and operate on corrupted image - Reproduce tested on kernel 5.16-rc3, 5.15.X under root 1. unzip tmp7.zip 2. ./single.sh f2fs 7 Sometimes need to run the script several times - Kernel dump loop0: detected capacity change from 0 to 131072 F2FS-fs (loop0): Found nat_bits in checkpoint F2FS-fs (loop0): Mounted with checkpoint version = 7548c2ee BUG: unable to handle page fault for address: ffffe47bc7123f48 RIP: 0010:kfree+0x66/0x320 Call Trace: __f2fs_setxattr+0x2aa/0xc00 [f2fs] f2fs_setxattr+0xfa/0x480 [f2fs] __f2fs_set_acl+0x19b/0x330 [f2fs] __vfs_removexattr+0x52/0x70 __vfs_removexattr_locked+0xb1/0x140 vfs_removexattr+0x56/0x100 removexattr+0x57/0x80 path_removexattr+0xa3/0xc0 __x64_sys_removexattr+0x17/0x20 do_syscall_64+0x37/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae The root cause is in __f2fs_setxattr(), we missed to do sanity check on last xattr entry, result in out-of-bound memory access during updating inconsistent xattr data of target inode. After the fix, it can detect such xattr inconsistency as below: F2FS-fs (loop11): inode (7) has invalid last xattr entry, entry_size: 60676 F2FS-fs (loop11): inode (8) has corrupted xattr F2FS-fs (loop11): inode (8) has corrupted xattr F2FS-fs (loop11): inode (8) has invalid last xattr entry, entry_size: 47736 Cc: stable@vger.kernel.org Reported-by: Wenqing Liu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim [delete f2fs_err() call as it's not in older kernels - gregkh] Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- fs/f2fs/xattr.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 201e9da1692a..64352d2833e2 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -658,8 +658,15 @@ static int __f2fs_setxattr(struct inode *inode, int index, } last = here; - while (!IS_XATTR_LAST_ENTRY(last)) + while (!IS_XATTR_LAST_ENTRY(last)) { + if ((void *)(last) + sizeof(__u32) > last_base_addr || + (void *)XATTR_NEXT_ENTRY(last) > last_base_addr) { + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + error = -EFSCORRUPTED; + goto exit; + } last = XATTR_NEXT_ENTRY(last); + } newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size); -- Gitee From 63c633831f3aa51c458c990b5e18f5a7ae93d1e8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Feb 2022 16:37:53 +0100 Subject: [PATCH 30/37] usb: gadget: rndis: check size of RNDIS_MSG_SET command stable inclusion from stable-v4.19.230 commit db9aaa3026298d652e98f777bc0f5756e2455dda category: bugfix issue:#I72GAW CVE: CVE-2022-25375 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 38ea1eac7d88072bbffb630e2b3db83ca649b826 upstream. Check the size of the RNDIS_MSG_SET command given to us before attempting to respond to an invalid message size. Reported-by: Szymon Heidrich Cc: stable@kernel.org Tested-by: Szymon Heidrich Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- drivers/usb/gadget/function/rndis.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index 04c142c13075..ab827c1badc5 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -637,14 +637,17 @@ static int rndis_set_response(struct rndis_params *params, rndis_set_cmplt_type *resp; rndis_resp_t *r; + BufLength = le32_to_cpu(buf->InformationBufferLength); + BufOffset = le32_to_cpu(buf->InformationBufferOffset); + if ((BufLength > RNDIS_MAX_TOTAL_SIZE) || + (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE)) + return -EINVAL; + r = rndis_add_response(params, sizeof(rndis_set_cmplt_type)); if (!r) return -ENOMEM; resp = (rndis_set_cmplt_type *)r->buf; - BufLength = le32_to_cpu(buf->InformationBufferLength); - BufOffset = le32_to_cpu(buf->InformationBufferOffset); - #ifdef VERBOSE_DEBUG pr_debug("%s: Length: %d\n", __func__, BufLength); pr_debug("%s: Offset: %d\n", __func__, BufOffset); -- Gitee From 72ce8a33305ddcbd3a55018a3845f357a4369c94 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Mon, 24 Jan 2022 11:29:54 +0800 Subject: [PATCH 31/37] yam: fix a memory leak in yam_siocdevprivate() stable inclusion from stable-v4.19.228 commit 4bd197ce18329e3725fe3af5bd27daa4256d3ac7 category: bugfix issue:#I72GAT CVE: CVE-2022-24959 Signed-off-by: wanxiaoqing40281 --------------------------------------- [ Upstream commit 29eb31542787e1019208a2e1047bb7c76c069536 ] ym needs to be free when ym->cmd != SIOCYAMSMCS. Fixes: 0781168e23a2 ("yam: fix a missing-check bug") Signed-off-by: Hangyu Hua Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: wanxiaoqing40281 --- drivers/net/hamradio/yam.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index fdab49872587..3db86f247bf4 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -966,9 +966,7 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(struct yamdrv_ioctl_mcs)); if (IS_ERR(ym)) return PTR_ERR(ym); - if (ym->cmd != SIOCYAMSMCS) - return -EINVAL; - if (ym->bitrate > YAM_MAXBITRATE) { + if (ym->cmd != SIOCYAMSMCS || ym->bitrate > YAM_MAXBITRATE) { kfree(ym); return -EINVAL; } -- Gitee From ae7a770bce1b938a08c800f2beb72558c3066fa3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 6 Jan 2022 18:24:02 -0500 Subject: [PATCH 32/37] NFSv4: Handle case where the lookup of a directory fails stable inclusion from stable-v4.19.228 commit b00b4c6faad0f21e443fb1584f7a8ea222beb0de category: bugfix issue:#I72GAQ CVE: CVE-2022-24448 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit ac795161c93699d600db16c1a8cc23a65a1eceaf upstream. If the application sets the O_DIRECTORY flag, and tries to open a regular file, nfs_atomic_open() will punt to doing a regular lookup. If the server then returns a regular file, we will happily return a file descriptor with uninitialised open state. The fix is to return the expected ENOTDIR error in these cases. Reported-by: Lyu Tao Fixes: 0dd2b474d0b6 ("nfs: implement i_op->atomic_open()") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- fs/nfs/dir.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 733fd9e4f0a1..f8d805c80464 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1626,6 +1626,19 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, no_open: res = nfs_lookup(dir, dentry, lookup_flags); + if (!res) { + inode = d_inode(dentry); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !S_ISDIR(inode->i_mode)) + res = ERR_PTR(-ENOTDIR); + } else if (!IS_ERR(res)) { + inode = d_inode(res); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !S_ISDIR(inode->i_mode)) { + dput(res); + res = ERR_PTR(-ENOTDIR); + } + } if (switched) { d_lookup_done(dentry); if (!res) -- Gitee From 30599aa8e75e4ce3c4db5b5c7d9f1628975ff3f2 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 27 Jan 2022 18:34:19 +1000 Subject: [PATCH 33/37] drm/vmwgfx: Fix stale file descriptors on failed usercopy stable inclusion from stable-v4.19.227 commit 0008a0c78fc33a84e2212a7c04e6b21a36ca6f4d category: bugfix issue:#I72GAN CVE: CVE-2022-22942 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit a0f90c8815706981c483a652a6aefca51a5e191c upstream. A failing usercopy of the fence_rep object will lead to a stale entry in the file descriptor table as put_unused_fd() won't release it. This enables userland to refer to a dangling 'file' object through that still valid file descriptor, leading to all kinds of use-after-free exploitation scenarios. Fix this by deferring the call to fd_install() until after the usercopy has succeeded. Fixes: c906965dee22 ("drm/vmwgfx: Add export fence to file descriptor support") Signed-off-by: Mathias Krause Signed-off-by: Zack Rusin Signed-off-by: Dave Airlie Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 5 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 34 ++++++++++++------------- drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 2 +- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 1abe21758b0d..bca0b8980c0e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -855,15 +855,14 @@ extern int vmw_execbuf_fence_commands(struct drm_file *file_priv, struct vmw_private *dev_priv, struct vmw_fence_obj **p_fence, uint32_t *p_handle); -extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, +extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, struct vmw_fpriv *vmw_fp, int ret, struct drm_vmw_fence_rep __user *user_fence_rep, struct vmw_fence_obj *fence, uint32_t fence_handle, - int32_t out_fence_fd, - struct sync_file *sync_file); + int32_t out_fence_fd); extern int vmw_validate_single_buffer(struct vmw_private *dev_priv, struct ttm_buffer_object *bo, bool interruptible, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 3834aa71c9c4..e65554f5a89d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3873,20 +3873,19 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv, * object so we wait for it immediately, and then unreference the * user-space reference. */ -void +int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, struct vmw_fpriv *vmw_fp, int ret, struct drm_vmw_fence_rep __user *user_fence_rep, struct vmw_fence_obj *fence, uint32_t fence_handle, - int32_t out_fence_fd, - struct sync_file *sync_file) + int32_t out_fence_fd) { struct drm_vmw_fence_rep fence_rep; if (user_fence_rep == NULL) - return; + return 0; memset(&fence_rep, 0, sizeof(fence_rep)); @@ -3914,20 +3913,14 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, * and unreference the handle. */ if (unlikely(ret != 0) && (fence_rep.error == 0)) { - if (sync_file) - fput(sync_file->file); - - if (fence_rep.fd != -1) { - put_unused_fd(fence_rep.fd); - fence_rep.fd = -1; - } - ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle, TTM_REF_USAGE); DRM_ERROR("Fence copy error. Syncing.\n"); (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); } + + return ret ? -EFAULT : 0; } /** @@ -4287,16 +4280,23 @@ int vmw_execbuf_process(struct drm_file *file_priv, (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); + } + } + + ret = vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, + user_fence_rep, fence, handle, out_fence_fd); + + if (sync_file) { + if (ret) { + /* usercopy of fence failed, put the file object */ + fput(sync_file->file); + put_unused_fd(out_fence_fd); } else { /* Link the fence with the FD created earlier */ fd_install(out_fence_fd, sync_file->file); } } - vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, - user_fence_rep, fence, handle, - out_fence_fd, sync_file); - /* Don't unreference when handing fence out */ if (unlikely(out_fence != NULL)) { *out_fence = fence; @@ -4315,7 +4315,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, */ vmw_resource_list_unreference(sw_context, &resource_list); - return 0; + return ret; out_unlock_binding: mutex_unlock(&dev_priv->binding_mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 3d546d409334..72a75316d472 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1169,7 +1169,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, } vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence, - handle, -1, NULL); + handle, -1); vmw_fence_obj_unreference(&fence); return 0; out_no_create: diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 1554b1d84a63..bf8c721ebfe9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2663,7 +2663,7 @@ void vmw_kms_helper_buffer_finish(struct vmw_private *dev_priv, if (file_priv) vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, user_fence_rep, fence, - handle, -1, NULL); + handle, -1); if (out_fence) *out_fence = fence; else -- Gitee From c8755ee067bcafabd71821e569f3f5363f7cf260 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 17 Jan 2022 18:22:13 +0100 Subject: [PATCH 34/37] udf: Fix NULL ptr deref when converting from inline format stable inclusion from stable-v4.19.228 commit a23a59717f9f01a49394488f515550f9382fbada category: bugfix issue:#I72GAK CVE: CVE-2022-0617 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 7fc3b7c2981bbd1047916ade327beccb90994eee upstream. udf_expand_file_adinicb() calls directly ->writepage to write data expanded into a page. This however misses to setup inode for writeback properly and so we can crash on inode->i_wb dereference when submitting page for IO like: BUG: kernel NULL pointer dereference, address: 0000000000000158 #PF: supervisor read access in kernel mode ... __folio_start_writeback+0x2ac/0x350 __block_write_full_page+0x37d/0x490 udf_expand_file_adinicb+0x255/0x400 [udf] udf_file_write_iter+0xbe/0x1b0 [udf] new_sync_write+0x125/0x1c0 vfs_write+0x28e/0x400 Fix the problem by marking the page dirty and going through the standard writeback path to write the page. Strictly speaking we would not even have to write the page but we want to catch e.g. ENOSPC errors early. Reported-by: butt3rflyh4ck CC: stable@vger.kernel.org Fixes: 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- fs/udf/inode.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 4fcd2126605a..d18c6b34b43e 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -251,10 +251,6 @@ int udf_expand_file_adinicb(struct inode *inode) char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); int err; - struct writeback_control udf_wbc = { - .sync_mode = WB_SYNC_NONE, - .nr_to_write = 1, - }; WARN_ON_ONCE(!inode_is_locked(inode)); if (!iinfo->i_lenAlloc) { @@ -298,8 +294,10 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; + set_page_dirty(page); + unlock_page(page); up_write(&iinfo->i_data_sem); - err = inode->i_data.a_ops->writepage(page, &udf_wbc); + err = filemap_fdatawrite(inode->i_mapping); if (err) { /* Restore everything back so that we don't lose data... */ lock_page(page); -- Gitee From bd53400eae3bcb033eb34aa278776456d4d7dfb6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 18 Jan 2022 09:57:25 +0100 Subject: [PATCH 35/37] udf: Restore i_lenAlloc when inode expansion fails stable inclusion from stable-v4.19.228 commit 3740d41e7363374182a42f1621e06d5029c837d5 category: bugfix issue:#I72GAK CVE: CVE-2022-0617 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit ea8569194b43f0f01f0a84c689388542c7254a1f upstream. When we fail to expand inode from inline format to a normal format, we restore inode to contain the original inline formatting but we forgot to set i_lenAlloc back. The mismatch between i_lenAlloc and i_size was then causing further problems such as warnings and lost data down the line. Reported-by: butt3rflyh4ck CC: stable@vger.kernel.org Fixes: 7e49b6f2480c ("udf: Convert UDF to new truncate calling sequence") Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- fs/udf/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index d18c6b34b43e..af53e2fd1b15 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -309,6 +309,7 @@ int udf_expand_file_adinicb(struct inode *inode) unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; inode->i_data.a_ops = &udf_adinicb_aops; + iinfo->i_lenAlloc = inode->i_size; up_write(&iinfo->i_data_sem); } put_page(page); -- Gitee From 2cb3de90ad6239899776cbb01da354c6ce13e12c Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 20 Jan 2022 11:04:01 -0600 Subject: [PATCH 36/37] cgroup-v1: Require capabilities to set release_agent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stable inclusion from stable-v4.19.229 commit 939f8b491887c27585933ea7dc5ad4123de58ff3 category: bugfix issue:#I72GAH CVE: CVE-2022-0492 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit 24f6008564183aa120d07c03d9289519c2fe02af upstream. The cgroup release_agent is called with call_usermodehelper. The function call_usermodehelper starts the release_agent with a full set fo capabilities. Therefore require capabilities when setting the release_agaent. Reported-by: Tabitha Sable Tested-by: Tabitha Sable Fixes: 81a6a5cdd2c5 ("Task Control Groups: automatic userspace notification of idle cgroups") Cc: stable@vger.kernel.org # v2.6.24+ Signed-off-by: "Eric W. Biederman" Signed-off-by: Tejun Heo [mkoutny: Adjust for pre-fs_context, duplicate mount/remount check, drop log messages.] Acked-by: Michal Koutný Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- kernel/cgroup/cgroup-v1.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 75389bccc1a8..cc740091082b 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -578,6 +578,14 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if ((of->file->f_cred->user_ns != &init_user_ns) || + !capable(CAP_SYS_ADMIN)) + return -EPERM; + cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; @@ -1045,6 +1053,7 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) { int ret = 0; struct cgroup_root *root = cgroup_root_from_kf(kf_root); + struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup_sb_opts opts; u16 added_mask, removed_mask; @@ -1058,6 +1067,12 @@ static int cgroup1_remount(struct kernfs_root *kf_root, int *flags, char *data) if (opts.subsys_mask != root->subsys_mask || opts.release_agent) pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n", task_tgid_nr(current), current->comm); + /* See cgroup1_mount release_agent handling */ + if (opts.release_agent && + ((ns->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))) { + ret = -EINVAL; + goto out_unlock; + } added_mask = opts.subsys_mask & ~root->subsys_mask; removed_mask = root->subsys_mask & ~opts.subsys_mask; @@ -1221,6 +1236,15 @@ struct dentry *cgroup1_mount(struct file_system_type *fs_type, int flags, ret = -EPERM; goto out_unlock; } + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if (opts.release_agent && + ((ns->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))) { + ret = -EINVAL; + goto out_unlock; + } root = kzalloc(sizeof(*root), GFP_KERNEL); if (!root) { -- Gitee From 0dbc94a2fbda2a849ca577ba186fb1f723f14b86 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 27 Jan 2022 08:16:38 +0100 Subject: [PATCH 37/37] moxart: fix potential use-after-free on remove path stable inclusion from stable-v4.19.229 commit 9c25d5ff1856b91bd4365e813f566cb59aaa9552 category: bugfix issue:#I72GAE CVE: CVE-2022-0487 Signed-off-by: wanxiaoqing40281 --------------------------------------- commit bd2db32e7c3e35bd4d9b8bbff689434a50893546 upstream. It was reported that the mmc host structure could be accessed after it was freed in moxart_remove(), so fix this by saving the base register of the device and using it instead of the pointer dereference. Cc: Ulf Hansson Cc: Xiyu Yang Cc: Xin Xiong Cc: Xin Tan Cc: Tony Lindgren Cc: Yang Li Cc: linux-mmc@vger.kernel.org Cc: stable Reported-by: whitehat002 Signed-off-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20220127071638.4057899-1-gregkh@linuxfoundation.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman Signed-off-by: wanxiaoqing40281 --- drivers/mmc/host/moxart-mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c index a0670e9cd012..f483211c5cc6 100644 --- a/drivers/mmc/host/moxart-mmc.c +++ b/drivers/mmc/host/moxart-mmc.c @@ -695,12 +695,12 @@ static int moxart_remove(struct platform_device *pdev) if (!IS_ERR(host->dma_chan_rx)) dma_release_channel(host->dma_chan_rx); mmc_remove_host(mmc); - mmc_free_host(mmc); writel(0, host->base + REG_INTERRUPT_MASK); writel(0, host->base + REG_POWER_CONTROL); writel(readl(host->base + REG_CLOCK_CONTROL) | CLK_OFF, host->base + REG_CLOCK_CONTROL); + mmc_free_host(mmc); } return 0; } -- Gitee