From 8fb8ddaca48564ca5617d9965aa3cd2a24fe5b6e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 28 Jan 2022 20:36:37 +0800 Subject: [PATCH 01/17] sctp: use call_rcu to free endpoint stable inclusion from stable-v5.10.90 commit 769d14abd35e0e153b5149c3e1e989a9d719e3ff bugzilla: 186168 https://gitee.com/openeuler/kernel/issues/I4SHY1 CVE: CVE-2022-20154 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=769d14abd35e0e153b5149c3e1e989a9d719e3ff -------------------------------- [ Upstream commit 5ec7d18d1813a5bead0b495045606c93873aecbb ] This patch is to delay the endpoint free by calling call_rcu() to fix another use-after-free issue in sctp_sock_dump(): BUG: KASAN: use-after-free in __lock_acquire+0x36d9/0x4c20 Call Trace: __lock_acquire+0x36d9/0x4c20 kernel/locking/lockdep.c:3218 lock_acquire+0x1ed/0x520 kernel/locking/lockdep.c:3844 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline] _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168 spin_lock_bh include/linux/spinlock.h:334 [inline] __lock_sock+0x203/0x350 net/core/sock.c:2253 lock_sock_nested+0xfe/0x120 net/core/sock.c:2774 lock_sock include/net/sock.h:1492 [inline] sctp_sock_dump+0x122/0xb20 net/sctp/diag.c:324 sctp_for_each_transport+0x2b5/0x370 net/sctp/socket.c:5091 sctp_diag_dump+0x3ac/0x660 net/sctp/diag.c:527 __inet_diag_dump+0xa8/0x140 net/ipv4/inet_diag.c:1049 inet_diag_dump+0x9b/0x110 net/ipv4/inet_diag.c:1065 netlink_dump+0x606/0x1080 net/netlink/af_netlink.c:2244 __netlink_dump_start+0x59a/0x7c0 net/netlink/af_netlink.c:2352 netlink_dump_start include/linux/netlink.h:216 [inline] inet_diag_handler_cmd+0x2ce/0x3f0 net/ipv4/inet_diag.c:1170 __sock_diag_cmd net/core/sock_diag.c:232 [inline] sock_diag_rcv_msg+0x31d/0x410 net/core/sock_diag.c:263 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2477 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:274 This issue occurs when asoc is peeled off and the old sk is freed after getting it by asoc->base.sk and before calling lock_sock(sk). To prevent the sk free, as a holder of the sk, ep should be alive when calling lock_sock(). This patch uses call_rcu() and moves sock_put and ep free into sctp_endpoint_destroy_rcu(), so that it's safe to try to hold the ep under rcu_read_lock in sctp_transport_traverse_process(). If sctp_endpoint_hold() returns true, it means this ep is still alive and we have held it and can continue to dump it; If it returns false, it means this ep is dead and can be freed after rcu_read_unlock, and we should skip it. In sctp_sock_dump(), after locking the sk, if this ep is different from tsp->asoc->ep, it means during this dumping, this asoc was peeled off before calling lock_sock(), and the sk should be skipped; If this ep is the same with tsp->asoc->ep, it means no peeloff happens on this asoc, and due to lock_sock, no peeloff will happen either until release_sock. Note that delaying endpoint free won't delay the port release, as the port release happens in sctp_endpoint_destroy() before calling call_rcu(). Also, freeing endpoint by call_rcu() makes it safe to access the sk by asoc->base.sk in sctp_assocs_seq_show() and sctp_rcv(). Thanks Jones to bring this issue up. v1->v2: - improve the changelog. - add kfree(ep) into sctp_endpoint_destroy_rcu(), as Jakub noticed. Reported-by: syzbot+9276d76e83e3bcde6c99@syzkaller.appspotmail.com Reported-by: Lee Jones Fixes: d25adbeb0cdb ("sctp: fix an use-after-free issue in sctp_sock_dump") Signed-off-by: Xin Long Signed-off-by: David S. Miller Signed-off-by: Sasha Levin Signed-off-by: Chen Jun Signed-off-by: Zheng Zengkai --- include/net/sctp/sctp.h | 6 +++--- include/net/sctp/structs.h | 3 ++- net/sctp/diag.c | 12 ++++++------ net/sctp/endpointola.c | 23 +++++++++++++++-------- net/sctp/socket.c | 23 +++++++++++++++-------- 5 files changed, 41 insertions(+), 26 deletions(-) diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 4fc747b778eb..33475d061823 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,6 +103,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, @@ -113,9 +114,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 51d698f2656f..be9ff0422c16 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1339,6 +1339,7 @@ struct sctp_endpoint { u32 secid; u32 peer_secid; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1354,7 +1355,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 493fc01e5d2b..babadd6720a2 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -292,9 +292,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) return err; } -static int sctp_sock_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; @@ -304,6 +303,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) int err = 0; lock_sock(sk); + if (ep != tsp->asoc->ep) + goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -346,9 +347,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) return err; } -static int sctp_sock_filter(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; @@ -507,8 +507,8 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, - net, &pos, &commp); + sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump, + net, &pos, &commp); cb->args[2] = pos; done: diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 48c9c2c7602f..efffde7f2328 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) } /* Final destructor for endpoint. */ +static void sctp_endpoint_destroy_rcu(struct rcu_head *head) +{ + struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); + struct sock *sk = ep->base.sk; + + sctp_sk(sk)->ep = NULL; + sock_put(sk); + + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); +} + static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; @@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); - sctp_sk(sk)->ep = NULL; - /* Give up our hold on the sock */ - sock_put(sk); - - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); + call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); } /* Hold a reference to an endpoint. */ -void sctp_endpoint_hold(struct sctp_endpoint *ep) +int sctp_endpoint_hold(struct sctp_endpoint *ep) { - refcount_inc(&ep->base.refcnt); + return refcount_inc_not_zero(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e872bc50bbe6..0a9e2c7d8e5f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5223,11 +5223,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p) { +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p) +{ struct rhashtable_iter hti; struct sctp_transport *tsp; + struct sctp_endpoint *ep; int ret; again: @@ -5236,26 +5237,32 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { - ret = cb(tsp, p); - if (ret) - break; + ep = tsp->asoc->ep; + if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ + ret = cb(ep, tsp, p); + if (ret) + break; + sctp_endpoint_put(ep); + } (*pos)++; sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); if (ret) { - if (cb_done && !cb_done(tsp, p)) { + if (cb_done && !cb_done(ep, tsp, p)) { (*pos)++; + sctp_endpoint_put(ep); sctp_transport_put(tsp); goto again; } + sctp_endpoint_put(ep); sctp_transport_put(tsp); } return ret; } -EXPORT_SYMBOL_GPL(sctp_for_each_transport); +EXPORT_SYMBOL_GPL(sctp_transport_traverse_process); /* 7.2.1 Association Status (SCTP_STATUS) -- Gitee From e731f4857ab1e05d8af269bcc527bc5ecc5d5466 Mon Sep 17 00:00:00 2001 From: Liu Jian Date: Thu, 14 Oct 2021 16:57:39 +0800 Subject: [PATCH 02/17] igmp: Add ip_mc_list lock in ip_check_mc_rcu mainline inclusion from mainline-net-next commit 23d2b94043ca8835bd1e67749020e839f396a1c2 category: bugfix bugzilla: 175179 https://gitee.com/openeuler/kernel/issues/I4DDEL CVE: CVE-2022-20141 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git/commit/?id=23d2b94043ca8835bd1e67749020e839f396a1c2 -------------------------------- I got below panic when doing fuzz test: Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 4056 Comm: syz-executor.3 Tainted: G B 5.14.0-rc1-00195-gcff5c4254439-dirty #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-59-gc9ba5276e321-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x7a/0x9b panic+0x2cd/0x5af end_report.cold+0x5a/0x5a kasan_report+0xec/0x110 ip_check_mc_rcu+0x556/0x5d0 __mkroute_output+0x895/0x1740 ip_route_output_key_hash_rcu+0x2d0/0x1050 ip_route_output_key_hash+0x182/0x2e0 ip_route_output_flow+0x28/0x130 udp_sendmsg+0x165d/0x2280 udpv6_sendmsg+0x121e/0x24f0 inet6_sendmsg+0xf7/0x140 sock_sendmsg+0xe9/0x180 ____sys_sendmsg+0x2b8/0x7a0 ___sys_sendmsg+0xf0/0x160 __sys_sendmmsg+0x17e/0x3c0 __x64_sys_sendmmsg+0x9e/0x100 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x462eb9 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f3df5af1c58 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462eb9 RDX: 0000000000000312 RSI: 0000000020001700 RDI: 0000000000000007 RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f3df5af26bc R13: 00000000004c372d R14: 0000000000700b10 R15: 00000000ffffffff It is one use-after-free in ip_check_mc_rcu. In ip_mc_del_src, the ip_sf_list of pmc has been freed under pmc->lock protection. But access to ip_sf_list in ip_check_mc_rcu is not protected by the lock. Signed-off-by: Liu Jian Signed-off-by: David S. Miller Signed-off-by: Liu Jian Reviewed-by: Yue Haibing Signed-off-by: Chen Jun Signed-off-by: Zheng Zengkai --- net/ipv4/igmp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6b3c558a4f23..03589a04f9aa 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2713,6 +2713,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u rv = 1; } else if (im) { if (src_addr) { + spin_lock_bh(&im->lock); for (psf = im->sources; psf; psf = psf->sf_next) { if (psf->sf_inaddr == src_addr) break; @@ -2723,6 +2724,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u im->sfcount[MCAST_EXCLUDE]; else rv = im->sfcount[MCAST_EXCLUDE] != 0; + spin_unlock_bh(&im->lock); } else rv = 1; /* unspecified source; tentatively allow */ } -- Gitee From 9f595f76c2f74e78f670b4a25ab71eb0ef981590 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 14 Jan 2022 19:29:06 +0800 Subject: [PATCH 03/17] io_uring: return back safer resurrect mainline inclusion from mainline-v5.13-rc1 commit f70865db5ff35f5ed0c7e9ef63e7cca3d4947f04 category: bugfix bugzilla: 185824 https://gitee.com/openeuler/kernel/issues/I4DDEL CVE: CVE-2022-20153 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=f70865db5ff35f5ed0c7e9ef63e7cca3d4947f04 ----------------------------------------------- Revert of revert of "io_uring: wait potential ->release() on resurrect", which adds a helper for resurrect not racing completion reinit, as was removed because of a strange bug with no clear root or link to the patch. Was improved, instead of rcu_synchronize(), just wait_for_completion() because we're at 0 refs and it will happen very shortly. Specifically use non-interruptible version to ignore all pending signals that may have ended prior interruptible wait. This reverts commit cb5e1b81304e089ee3ca948db4d29f71902eb575. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/7a080c20f686d026efade810b116b72f88abaff9.1618101759.git.asml.silence@gmail.com Signed-off-by: Jens Axboe conflicts: fs/io_uring.c Signed-off-by: Ye Bin Reviewed-by: Zhang Yi Signed-off-by: Chen Jun Signed-off-by: Zheng Zengkai --- fs/io_uring.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c64d65650855..545b1d8f15db 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1008,6 +1008,18 @@ static inline bool __io_match_files(struct io_kiocb *req, req->work.identity->files == files; } +static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl) +{ + bool got = percpu_ref_tryget(ref); + + /* already at zero, wait for ->release() */ + if (!got) + wait_for_completion(compl); + percpu_ref_resurrect(ref); + if (got) + percpu_ref_put(ref); +} + static bool io_match_task(struct io_kiocb *head, struct task_struct *task, struct files_struct *files) @@ -9700,12 +9712,11 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, if (ret < 0) break; } while (1); - mutex_lock(&ctx->uring_lock); if (ret) { - percpu_ref_resurrect(&ctx->refs); - goto out_quiesce; + io_refs_resurrect(&ctx->refs, &ctx->ref_comp); + return ret; } } @@ -9798,7 +9809,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, if (io_register_op_must_quiesce(opcode)) { /* bring the ctx back to life */ percpu_ref_reinit(&ctx->refs); -out_quiesce: reinit_completion(&ctx->ref_comp); } return ret; -- Gitee From c25f107f8c2eb614409f381fd0747f14d2b37e93 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 6 Jul 2022 18:04:49 +0800 Subject: [PATCH 04/17] x86/speculation/mmio: Print SMT warning stable inclusion from stable-v5.10.123 commit aa238a92cc94a15812c0de4adade86ba8f22707a category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=aa238a92cc94a15812c0de4adade86ba8f22707a -------------------------------- commit 1dc6ff02c8bf77d71b9b5d11cbc9df77cfb28626 upstream Similar to MDS and TAA, print a warning if SMT is enabled for the MMIO Stale Data vulnerability. Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Signed-off-by: Zheng Zengkai --- arch/x86/kernel/cpu/bugs.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 78b9514a3844..de39f73e0051 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1091,6 +1091,7 @@ static void update_mds_branch_idle(void) #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" +#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" void cpu_bugs_smt_update(void) { @@ -1135,6 +1136,16 @@ void cpu_bugs_smt_update(void) break; } + switch (mmio_mitigation) { + case MMIO_MITIGATION_VERW: + case MMIO_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(MMIO_MSG_SMT); + break; + case MMIO_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } -- Gitee From 6e8167ceecce2bfbfcc2dc5b7c1efd6c6cfe81e6 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 18:04:39 +0800 Subject: [PATCH 05/17] Documentation: Add documentation for Processor MMIO Stale Data stable inclusion from stable-v5.10.123 commit f8a85334a57e7842320476ff27be3a5f151da364 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=f8a85334a57e7842320476ff27be3a5f151da364 -------------------------------- commit 4419470191386456e0b8ed4eb06a70b0021798a6 upstream Add the admin guide for Processor MMIO stale data vulnerabilities. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Reviewed-by: Liao Chang Signed-off-by: Zheng Zengkai --- Documentation/admin-guide/hw-vuln/index.rst | 1 + .../hw-vuln/processor_mmio_stale_data.rst | 246 ++++++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index ca4dbdd9016d..2adec1e6520a 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -15,3 +15,4 @@ are configurable at compile, boot or run time. tsx_async_abort multihit.rst special-register-buffer-data-sampling.rst + processor_mmio_stale_data.rst diff --git a/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst new file mode 100644 index 000000000000..9393c50b5afc --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst @@ -0,0 +1,246 @@ +========================================= +Processor MMIO Stale Data Vulnerabilities +========================================= + +Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O +(MMIO) vulnerabilities that can expose data. The sequences of operations for +exposing data range from simple to very complex. Because most of the +vulnerabilities require the attacker to have access to MMIO, many environments +are not affected. System environments using virtualization where MMIO access is +provided to untrusted guests may need mitigation. These vulnerabilities are +not transient execution attacks. However, these vulnerabilities may propagate +stale data into core fill buffers where the data can subsequently be inferred +by an unmitigated transient execution attack. Mitigation for these +vulnerabilities includes a combination of microcode update and software +changes, depending on the platform and usage model. Some of these mitigations +are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or +those used to mitigate Special Register Buffer Data Sampling (SRBDS). + +Data Propagators +================ +Propagators are operations that result in stale data being copied or moved from +one microarchitectural buffer or register to another. Processor MMIO Stale Data +Vulnerabilities are operations that may result in stale data being directly +read into an architectural, software-visible state or sampled from a buffer or +register. + +Fill Buffer Stale Data Propagator (FBSDP) +----------------------------------------- +Stale data may propagate from fill buffers (FB) into the non-coherent portion +of the uncore on some non-coherent writes. Fill buffer propagation by itself +does not make stale data architecturally visible. Stale data must be propagated +to a location where it is subject to reading or sampling. + +Sideband Stale Data Propagator (SSDP) +------------------------------------- +The sideband stale data propagator (SSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. The sideband response buffer is +shared by all client cores. For non-coherent reads that go to sideband +destinations, the uncore logic returns 64 bytes of data to the core, including +both requested data and unrequested stale data, from a transaction buffer and +the sideband response buffer. As a result, stale data from the sideband +response and transaction buffers may now reside in a core fill buffer. + +Primary Stale Data Propagator (PSDP) +------------------------------------ +The primary stale data propagator (PSDP) is limited to the client (including +Intel Xeon server E3) uncore implementation. Similar to the sideband response +buffer, the primary response buffer is shared by all client cores. For some +processors, MMIO primary reads will return 64 bytes of data to the core fill +buffer including both requested data and unrequested stale data. This is +similar to the sideband stale data propagator. + +Vulnerabilities +=============== +Device Register Partial Write (DRPW) (CVE-2022-21166) +----------------------------------------------------- +Some endpoint MMIO registers incorrectly handle writes that are smaller than +the register size. Instead of aborting the write or only copying the correct +subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than +specified by the write transaction may be written to the register. On +processors affected by FBSDP, this may expose stale data from the fill buffers +of the core that created the write transaction. + +Shared Buffers Data Sampling (SBDS) (CVE-2022-21125) +---------------------------------------------------- +After propagators may have moved data around the uncore and copied stale data +into client core fill buffers, processors affected by MFBDS can leak data from +the fill buffer. It is limited to the client (including Intel Xeon server E3) +uncore implementation. + +Shared Buffers Data Read (SBDR) (CVE-2022-21123) +------------------------------------------------ +It is similar to Shared Buffer Data Sampling (SBDS) except that the data is +directly read into the architectural software-visible state. It is limited to +the client (including Intel Xeon server E3) uncore implementation. + +Affected Processors +=================== +Not all the CPUs are affected by all the variants. For instance, most +processors for the server market (excluding Intel Xeon E3 processors) are +impacted by only Device Register Partial Write (DRPW). + +Below is the list of affected Intel processors [#f1]_: + + =================== ============ ========= + Common name Family_Model Steppings + =================== ============ ========= + HASWELL_X 06_3FH 2,4 + SKYLAKE_L 06_4EH 3 + BROADWELL_X 06_4FH All + SKYLAKE_X 06_55H 3,4,6,7,11 + BROADWELL_D 06_56H 3,4,5 + SKYLAKE 06_5EH 3 + ICELAKE_X 06_6AH 4,5,6 + ICELAKE_D 06_6CH 1 + ICELAKE_L 06_7EH 5 + ATOM_TREMONT_D 06_86H All + LAKEFIELD 06_8AH 1 + KABYLAKE_L 06_8EH 9 to 12 + ATOM_TREMONT 06_96H 1 + ATOM_TREMONT_L 06_9CH 0 + KABYLAKE 06_9EH 9 to 13 + COMETLAKE 06_A5H 2,3,5 + COMETLAKE_L 06_A6H 0,1 + ROCKETLAKE 06_A7H 1 + =================== ============ ========= + +If a CPU is in the affected processor list, but not affected by a variant, it +is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later +section, mitigation largely remains the same for all the variants, i.e. to +clear the CPU fill buffers via VERW instruction. + +New bits in MSRs +================ +Newer processors and microcode update on existing affected processors added new +bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate +specific variants of Processor MMIO Stale Data vulnerabilities and mitigation +capability. + +MSR IA32_ARCH_CAPABILITIES +-------------------------- +Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the + Shared Buffers Data Read (SBDR) vulnerability or the sideband stale + data propagator (SSDP). +Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer + Stale Data Propagator (FBSDP). +Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data + Propagator (PSDP). +Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer + values as part of MD_CLEAR operations. Processors that do not + enumerate MDS_NO (meaning they are affected by MDS) but that do + enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate + FB_CLEAR as part of their MD_CLEAR support. +Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR + IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS + bit can be set to cause the VERW instruction to not perform the + FB_CLEAR action. Not all processors that support FB_CLEAR will support + FB_CLEAR_CTRL. + +MSR IA32_MCU_OPT_CTRL +--------------------- +Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR +action. This may be useful to reduce the performance impact of FB_CLEAR in +cases where system software deems it warranted (for example, when performance +is more critical, or the untrusted software has no MMIO access). Note that +FB_CLEAR_DIS has no impact on enumeration (for example, it does not change +FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors +that enumerate FB_CLEAR. + +Mitigation +========== +Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the +same mitigation strategy to force the CPU to clear the affected buffers before +an attacker can extract the secrets. + +This is achieved by using the otherwise unused and obsolete VERW instruction in +combination with a microcode update. The microcode clears the affected CPU +buffers when the VERW instruction is executed. + +Kernel reuses the MDS function to invoke the buffer clearing: + + mds_clear_cpu_buffers() + +On MDS affected CPUs, the kernel already invokes CPU buffer clear on +kernel/userspace, hypervisor/guest and C-state (idle) transitions. No +additional mitigation is needed on such CPUs. + +For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker +with MMIO capability. Therefore, VERW is not required for kernel/userspace. For +virtualization case, VERW is only needed at VMENTER for a guest with MMIO +capability. + +Mitigation points +----------------- +Return to user space +^^^^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation +needed. + +C-State transition +^^^^^^^^^^^^^^^^^^ +Control register writes by CPU during C-state transition can propagate data +from fill buffer to uncore buffers. Execute VERW before C-state transition to +clear CPU fill buffers. + +Guest entry point +^^^^^^^^^^^^^^^^^ +Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise +execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by +MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO +Stale Data vulnerabilities, so there is no need to execute VERW for such guests. + +Mitigation control on the kernel command line +--------------------------------------------- +The kernel command line allows to control the Processor MMIO Stale Data +mitigations at boot time with the option "mmio_stale_data=". The valid +arguments for this option are: + + ========== ================================================================= + full If the CPU is vulnerable, enable mitigation; CPU buffer clearing + on exit to userspace and when entering a VM. Idle transitions are + protected as well. It does not automatically disable SMT. + full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the + complete mitigation. + off Disables mitigation completely. + ========== ================================================================= + +If the CPU is affected and mmio_stale_data=off is not supplied on the kernel +command line, then the kernel selects the appropriate mitigation. + +Mitigation status information +----------------------------- +The Linux kernel provides a sysfs interface to enumerate the current +vulnerability status of the system: whether the system is vulnerable, and +which mitigations are active. The relevant sysfs file is: + + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data + +The possible values in this file are: + + .. list-table:: + + * - 'Not affected' + - The processor is not vulnerable + * - 'Vulnerable' + - The processor is vulnerable, but no mitigation enabled + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The processor is vulnerable, but microcode is not updated. The + mitigation is enabled on a best effort basis. + * - 'Mitigation: Clear CPU buffers' + - The processor is vulnerable and the CPU buffer clearing mitigation is + enabled. + +If the processor is vulnerable then the following information is appended to +the above information: + + ======================== =========================================== + 'SMT vulnerable' SMT is enabled + 'SMT disabled' SMT is disabled + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown + ======================== =========================================== + +References +---------- +.. [#f1] Affected Processors + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html -- Gitee From 8cba49b61ae02534e2d63866147b43a45a036c7c Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 18:04:40 +0800 Subject: [PATCH 06/17] x86/speculation/mmio: Enumerate Processor MMIO Stale Data bug stable inclusion from stable-v5.10.123 commit e66310bc96b74ed3df9993e5d835ef3084d62048 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=e66310bc96b74ed3df9993e5d835ef3084d62048 -------------------------------- commit 51802186158c74a0304f51ab963e7c2b3a2b046f upstream Processor MMIO Stale Data is a class of vulnerabilities that may expose data after an MMIO operation. For more details please refer to Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst Add the Processor MMIO Stale Data bug enumeration. A microcode update adds new bits to the MSR IA32_ARCH_CAPABILITIES, define them. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Reviewed-by: Liao Chang Signed-off-by: Zheng Zengkai --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 19 +++++++++++ arch/x86/kernel/cpu/common.c | 43 ++++++++++++++++++++++-- tools/arch/x86/include/asm/cpufeatures.h | 1 + tools/arch/x86/include/asm/msr-index.h | 19 +++++++++++ 5 files changed, 81 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3b407f46f1a0..f6a6ac0b3bcd 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -417,5 +417,6 @@ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 972a34d93505..37a7b1ec1ac0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -114,6 +114,25 @@ * Not susceptible to * TSX Async Abort (TAA) vulnerabilities. */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 25148ebd3634..df87feb21e21 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1097,18 +1097,39 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { X86_FEATURE_ANY, issues) #define SRBDS BIT(0) +/* CPU is affected by X86_BUG_MMIO_STALE_DATA */ +#define MMIO BIT(1) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | + BIT(7) | BIT(0xB), MMIO), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO), {} }; @@ -1129,6 +1150,13 @@ u64 x86_read_arch_cap_msr(void) return ia32_cap; } +static bool arch_cap_mmio_immune(u64 ia32_cap) +{ + return (ia32_cap & ARCH_CAP_FBSDP_NO && + ia32_cap & ARCH_CAP_PSDP_NO && + ia32_cap & ARCH_CAP_SBDR_SSDP_NO); +} + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = x86_read_arch_cap_msr(); @@ -1188,6 +1216,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) cpu_matches(cpu_vuln_blacklist, SRBDS)) setup_force_cpu_bug(X86_BUG_SRBDS); + /* + * Processor MMIO Stale Data bug enumeration + * + * Affected CPU list is generally enough to enumerate the vulnerability, + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may + * not want the guest to enumerate the bug. + */ + if (cpu_matches(cpu_vuln_blacklist, MMIO) && + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index b58730cc12e8..a7b5c5efcf3b 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -417,5 +417,6 @@ #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 972a34d93505..37a7b1ec1ac0 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -114,6 +114,25 @@ * Not susceptible to * TSX Async Abort (TAA) vulnerabilities. */ +#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* + * Not susceptible to SBDR and SSDP + * variants of Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FBSDP_NO BIT(14) /* + * Not susceptible to FBSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_PSDP_NO BIT(15) /* + * Not susceptible to PSDP variant of + * Processor MMIO stale data + * vulnerabilities. + */ +#define ARCH_CAP_FB_CLEAR BIT(17) /* + * VERW clears CPU fill buffer + * even on MDS_NO CPUs. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* -- Gitee From 821185580d039b215da1befe15c22b4035663c6e Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 18:04:41 +0800 Subject: [PATCH 07/17] x86/speculation: Add a common function for MD_CLEAR mitigation update stable inclusion from stable-v5.10.123 commit f83d4e5be4a3955a6c8af61ecec0934d0ece40c0 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=f83d4e5be4a3955a6c8af61ecec0934d0ece40c0 -------------------------------- commit f52ea6c26953fed339aa4eae717ee5c2133c7ff2 upstream Processor MMIO Stale Data mitigation uses similar mitigation as MDS and TAA. In preparation for adding its mitigation, add a common function to update all mitigations that depend on MD_CLEAR. [ bp: Add a newline in md_clear_update_mitigation() to separate statements better. ] Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Reviewed-by: Liao Chang Signed-off-by: Zheng Zengkai --- arch/x86/kernel/cpu/bugs.c | 59 +++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index de39f73e0051..490809aa8f0e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -41,7 +41,7 @@ static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); -static void __init mds_print_mitigation(void); +static void __init md_clear_update_mitigation(void); static void __init taa_select_mitigation(void); static void __init srbds_select_mitigation(void); @@ -114,10 +114,10 @@ void __init check_bugs(void) srbds_select_mitigation(); /* - * As MDS and TAA mitigations are inter-related, print MDS - * mitigation until after TAA mitigation selection is done. + * As MDS and TAA mitigations are inter-related, update and print their + * mitigation after TAA mitigation selection is done. */ - mds_print_mitigation(); + md_clear_update_mitigation(); arch_smt_update(); @@ -258,14 +258,6 @@ static void __init mds_select_mitigation(void) } } -static void __init mds_print_mitigation(void) -{ - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) - return; - - pr_info("%s\n", mds_strings[mds_mitigation]); -} - static int __init mds_cmdline(char *str) { if (!boot_cpu_has_bug(X86_BUG_MDS)) @@ -320,7 +312,7 @@ static void __init taa_select_mitigation(void) /* TSX previously disabled by tsx=off */ if (!boot_cpu_has(X86_FEATURE_RTM)) { taa_mitigation = TAA_MITIGATION_TSX_DISABLED; - goto out; + return; } if (cpu_mitigations_off()) { @@ -334,7 +326,7 @@ static void __init taa_select_mitigation(void) */ if (taa_mitigation == TAA_MITIGATION_OFF && mds_mitigation == MDS_MITIGATION_OFF) - goto out; + return; if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) taa_mitigation = TAA_MITIGATION_VERW; @@ -366,18 +358,6 @@ static void __init taa_select_mitigation(void) if (taa_nosmt || cpu_mitigations_auto_nosmt()) cpu_smt_disable(false); - - /* - * Update MDS mitigation, if necessary, as the mds_user_clear is - * now enabled for TAA mitigation. - */ - if (mds_mitigation == MDS_MITIGATION_OFF && - boot_cpu_has_bug(X86_BUG_MDS)) { - mds_mitigation = MDS_MITIGATION_FULL; - mds_select_mitigation(); - } -out: - pr_info("%s\n", taa_strings[taa_mitigation]); } static int __init tsx_async_abort_parse_cmdline(char *str) @@ -401,6 +381,33 @@ static int __init tsx_async_abort_parse_cmdline(char *str) } early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "" fmt + +static void __init md_clear_update_mitigation(void) +{ + if (cpu_mitigations_off()) + return; + + if (!static_key_enabled(&mds_user_clear)) + goto out; + + /* + * mds_user_clear is now enabled. Update MDS mitigation, if + * necessary. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } +out: + if (boot_cpu_has_bug(X86_BUG_MDS)) + pr_info("MDS: %s\n", mds_strings[mds_mitigation]); + if (boot_cpu_has_bug(X86_BUG_TAA)) + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); +} + #undef pr_fmt #define pr_fmt(fmt) "SRBDS: " fmt -- Gitee From bc1bee7ed5703a2c50914a82e5a1d85b89567645 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 18:04:42 +0800 Subject: [PATCH 08/17] x86/speculation/mmio: Add mitigation for Processor MMIO Stale Data stable inclusion from stable-v5.10.123 commit 26f6f231f6a5a79ccc274967939b22602dec76e8 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=26f6f231f6a5a79ccc274967939b22602dec76e8 -------------------------------- commit 8cb861e9e3c9a55099ad3d08e1a3b653d29c33ca upstream Processor MMIO Stale Data is a class of vulnerabilities that may expose data after an MMIO operation. For details please refer to Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst. These vulnerabilities are broadly categorized as: Device Register Partial Write (DRPW): Some endpoint MMIO registers incorrectly handle writes that are smaller than the register size. Instead of aborting the write or only copying the correct subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than specified by the write transaction may be written to the register. On some processors, this may expose stale data from the fill buffers of the core that created the write transaction. Shared Buffers Data Sampling (SBDS): After propagators may have moved data around the uncore and copied stale data into client core fill buffers, processors affected by MFBDS can leak data from the fill buffer. Shared Buffers Data Read (SBDR): It is similar to Shared Buffer Data Sampling (SBDS) except that the data is directly read into the architectural software-visible state. An attacker can use these vulnerabilities to extract data from CPU fill buffers using MDS and TAA methods. Mitigate it by clearing the CPU fill buffers using the VERW instruction before returning to a user or a guest. On CPUs not affected by MDS and TAA, user application cannot sample data from CPU fill buffers using MDS or TAA. A guest with MMIO access can still use DRPW or SBDR to extract data architecturally. Mitigate it with VERW instruction to clear fill buffers before VMENTER for MMIO capable guests. Add a kernel parameter mmio_stale_data={off|full|full,nosmt} to control the mitigation. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Signed-off-by: Zheng Zengkai --- .../admin-guide/kernel-parameters.txt | 36 ++++++ arch/x86/include/asm/nospec-branch.h | 2 + arch/x86/kernel/cpu/bugs.c | 111 +++++++++++++++++- arch/x86/kvm/vmx/vmx.c | 3 + 4 files changed, 148 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7ad296e902ae..162c2fb23866 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2866,6 +2866,7 @@ kvm.nx_huge_pages=off [X86] no_entry_flush [PPC] no_uaccess_flush [PPC] + mmio_stale_data=off [X86] Exceptions: This does not have any effect on @@ -2887,6 +2888,7 @@ Equivalent to: l1tf=flush,nosmt [X86] mds=full,nosmt [X86] tsx_async_abort=full,nosmt [X86] + mmio_stale_data=full,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this @@ -2896,6 +2898,40 @@ log everything. Information is printed at KERN_DEBUG so loglevel=8 may also need to be specified. + mmio_stale_data= + [X86,INTEL] Control mitigation for the Processor + MMIO Stale Data vulnerabilities. + + Processor MMIO Stale Data is a class of + vulnerabilities that may expose data after an MMIO + operation. Exposed data could originate or end in + the same CPU buffers as affected by MDS and TAA. + Therefore, similar to MDS and TAA, the mitigation + is to clear the affected CPU buffers. + + This parameter controls the mitigation. The + options are: + + full - Enable mitigation on vulnerable CPUs + + full,nosmt - Enable mitigation and disable SMT on + vulnerable CPUs. + + off - Unconditionally disable mitigation + + On MDS or TAA affected machines, + mmio_stale_data=off can be prevented by an active + MDS or TAA mitigation as these vulnerabilities are + mitigated with the same mechanism so in order to + disable this mitigation, you need to specify + mds=off and tsx_async_abort=off too. + + Not specifying this option is equivalent to + mmio_stale_data=full. + + For details see: + Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst + module.sig_enforce [KNL] When CONFIG_MODULE_SIG is set, this means that modules without (valid) signatures will fail to load. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 4d0f5386e637..e247151c3dcf 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -255,6 +255,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); DECLARE_STATIC_KEY_FALSE(mds_user_clear); DECLARE_STATIC_KEY_FALSE(mds_idle_clear); +DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); + #include /** diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 490809aa8f0e..b703ccbe3e83 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -43,6 +43,7 @@ static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); static void __init md_clear_update_mitigation(void); static void __init taa_select_mitigation(void); +static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ @@ -77,6 +78,10 @@ EXPORT_SYMBOL_GPL(mds_user_clear); DEFINE_STATIC_KEY_FALSE(mds_idle_clear); EXPORT_SYMBOL_GPL(mds_idle_clear); +/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */ +DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); +EXPORT_SYMBOL_GPL(mmio_stale_data_clear); + void __init check_bugs(void) { identify_boot_cpu(); @@ -111,11 +116,13 @@ void __init check_bugs(void) l1tf_select_mitigation(); mds_select_mitigation(); taa_select_mitigation(); + mmio_select_mitigation(); srbds_select_mitigation(); /* - * As MDS and TAA mitigations are inter-related, update and print their - * mitigation after TAA mitigation selection is done. + * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update + * and print their mitigation after MDS, TAA and MMIO Stale Data + * mitigation selection is done. */ md_clear_update_mitigation(); @@ -381,6 +388,90 @@ static int __init tsx_async_abort_parse_cmdline(char *str) } early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "MMIO Stale Data: " fmt + +enum mmio_mitigations { + MMIO_MITIGATION_OFF, + MMIO_MITIGATION_UCODE_NEEDED, + MMIO_MITIGATION_VERW, +}; + +/* Default mitigation for Processor MMIO Stale Data vulnerabilities */ +static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW; +static bool mmio_nosmt __ro_after_init = false; + +static const char * const mmio_strings[] = { + [MMIO_MITIGATION_OFF] = "Vulnerable", + [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", +}; + +static void __init mmio_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || + cpu_mitigations_off()) { + mmio_mitigation = MMIO_MITIGATION_OFF; + return; + } + + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return; + + ia32_cap = x86_read_arch_cap_msr(); + + /* + * Enable CPU buffer clear mitigation for host and VMM, if also affected + * by MDS or TAA. Otherwise, enable mitigation for VMM only. + */ + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && + boot_cpu_has(X86_FEATURE_RTM))) + static_branch_enable(&mds_user_clear); + else + static_branch_enable(&mmio_stale_data_clear); + + /* + * Check if the system has the right microcode. + * + * CPU Fill buffer clear mitigation is enumerated by either an explicit + * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS + * affected systems. + */ + if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + (boot_cpu_has(X86_FEATURE_MD_CLEAR) && + boot_cpu_has(X86_FEATURE_FLUSH_L1D) && + !(ia32_cap & ARCH_CAP_MDS_NO))) + mmio_mitigation = MMIO_MITIGATION_VERW; + else + mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; + + if (mmio_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); +} + +static int __init mmio_stale_data_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + mmio_mitigation = MMIO_MITIGATION_OFF; + } else if (!strcmp(str, "full")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + } else if (!strcmp(str, "full,nosmt")) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_nosmt = true; + } + + return 0; +} +early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "" fmt @@ -393,19 +484,31 @@ static void __init md_clear_update_mitigation(void) goto out; /* - * mds_user_clear is now enabled. Update MDS mitigation, if - * necessary. + * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data + * mitigation, if necessary. */ if (mds_mitigation == MDS_MITIGATION_OFF && boot_cpu_has_bug(X86_BUG_MDS)) { mds_mitigation = MDS_MITIGATION_FULL; mds_select_mitigation(); } + if (taa_mitigation == TAA_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_VERW; + taa_select_mitigation(); + } + if (mmio_mitigation == MMIO_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { + mmio_mitigation = MMIO_MITIGATION_VERW; + mmio_select_mitigation(); + } out: if (boot_cpu_has_bug(X86_BUG_MDS)) pr_info("MDS: %s\n", mds_strings[mds_mitigation]); if (boot_cpu_has_bug(X86_BUG_TAA)) pr_info("TAA: %s\n", taa_strings[taa_mitigation]); + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); } #undef pr_fmt diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index de24d3826788..3229a5574a96 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6626,6 +6626,9 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vmx_l1d_flush(vcpu); else if (static_branch_unlikely(&mds_user_clear)) mds_clear_cpu_buffers(); + else if (static_branch_unlikely(&mmio_stale_data_clear) && + kvm_arch_has_assigned_device(vcpu->kvm)) + mds_clear_cpu_buffers(); if (vcpu->arch.cr2 != native_read_cr2()) native_write_cr2(vcpu->arch.cr2); -- Gitee From c2a323a56574ed20493bc4ba6c2830c7cef43136 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 18:04:43 +0800 Subject: [PATCH 09/17] x86/bugs: Group MDS, TAA & Processor MMIO Stale Data mitigations stable inclusion from stable-v5.10.123 commit 56f0bca5e9c8456b7bb7089cbb6de866a9ba6da9 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=56f0bca5e9c8456b7bb7089cbb6de866a9ba6da9 -------------------------------- commit e5925fb867290ee924fcf2fe3ca887b792714366 upstream MDS, TAA and Processor MMIO Stale Data mitigations rely on clearing CPU buffers. Moreover, status of these mitigations affects each other. During boot, it is important to maintain the order in which these mitigations are selected. This is especially true for md_clear_update_mitigation() that needs to be called after MDS, TAA and Processor MMIO Stale Data mitigation selection is done. Introduce md_clear_select_mitigation(), and select all these mitigations from there. This reflects relationships between these mitigations and ensures proper ordering. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Reviewed-by: Liao Chang Signed-off-by: Zheng Zengkai --- arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b703ccbe3e83..d4c8da792712 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -42,6 +42,7 @@ static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); static void __init md_clear_update_mitigation(void); +static void __init md_clear_select_mitigation(void); static void __init taa_select_mitigation(void); static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); @@ -114,18 +115,9 @@ void __init check_bugs(void) spectre_v2_select_mitigation(); ssb_select_mitigation(); l1tf_select_mitigation(); - mds_select_mitigation(); - taa_select_mitigation(); - mmio_select_mitigation(); + md_clear_select_mitigation(); srbds_select_mitigation(); - /* - * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update - * and print their mitigation after MDS, TAA and MMIO Stale Data - * mitigation selection is done. - */ - md_clear_update_mitigation(); - arch_smt_update(); #ifdef CONFIG_X86_32 @@ -511,6 +503,20 @@ static void __init md_clear_update_mitigation(void) pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); } +static void __init md_clear_select_mitigation(void) +{ + mds_select_mitigation(); + taa_select_mitigation(); + mmio_select_mitigation(); + + /* + * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update + * and print their mitigation after MDS, TAA and MMIO Stale Data + * mitigation selection is done. + */ + md_clear_update_mitigation(); +} + #undef pr_fmt #define pr_fmt(fmt) "SRBDS: " fmt -- Gitee From 1034c95e0fee3e00a906e388eab49ffd5158740d Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 18:04:44 +0800 Subject: [PATCH 10/17] x86/speculation/mmio: Enable CPU Fill buffer clearing on idle stable inclusion from stable-v5.10.123 commit 3eb1180564fa0ecedc33b44029da7687c0a9fbf5 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=3eb1180564fa0ecedc33b44029da7687c0a9fbf5 -------------------------------- commit 99a83db5a605137424e1efe29dc0573d6a5b6316 upstream When the CPU is affected by Processor MMIO Stale Data vulnerabilities, Fill Buffer Stale Data Propagator (FBSDP) can propagate stale data out of Fill buffer to uncore buffer when CPU goes idle. Stale data can then be exploited with other variants using MMIO operations. Mitigate it by clearing the Fill buffer before entering idle state. Signed-off-by: Pawan Gupta Signed-off-by: Thomas Gleixner Co-developed-by: Josh Poimboeuf Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Reviewed-by: Liao Chang Signed-off-by: Zheng Zengkai --- arch/x86/kernel/cpu/bugs.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d4c8da792712..685fc693fbab 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -424,6 +424,14 @@ static void __init mmio_select_mitigation(void) else static_branch_enable(&mmio_stale_data_clear); + /* + * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can + * be propagated to uncore buffers, clearing the Fill buffers on idle + * is required irrespective of SMT state. + */ + if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + static_branch_enable(&mds_idle_clear); + /* * Check if the system has the right microcode. * @@ -1188,6 +1196,8 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { + u64 ia32_cap = x86_read_arch_cap_msr(); + /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1199,10 +1209,12 @@ static void update_mds_branch_idle(void) if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) return; - if (sched_smt_active()) + if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); - else + } else if (mmio_mitigation == MMIO_MITIGATION_OFF || + (ia32_cap & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); + } } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" -- Gitee From 8c53ca45fa52c9e633cae1721af7837b3914d86a Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 18:04:45 +0800 Subject: [PATCH 11/17] x86/speculation/mmio: Add sysfs reporting for Processor MMIO Stale Data stable inclusion from stable-v5.10.123 commit 001415e4e626403c9ff35f2498feb0021d0c8328 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=001415e4e626403c9ff35f2498feb0021d0c8328 -------------------------------- commit 8d50cdf8b8341770bc6367bce40c0c1bb0e1d5b3 upstream Add the sysfs reporting file for Processor MMIO Stale Data vulnerability. It exposes the vulnerability and mitigation state similar to the existing files for the other hardware vulnerabilities. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Reviewed-by: Liao Chang Signed-off-by: Zheng Zengkai --- .../ABI/testing/sysfs-devices-system-cpu | 1 + arch/x86/kernel/cpu/bugs.c | 22 +++++++++++++++++++ drivers/base/cpu.c | 8 +++++++ include/linux/cpu.h | 3 +++ 4 files changed, 34 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 1a04ca8162ad..44c6e5730398 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -510,6 +510,7 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/srbds /sys/devices/system/cpu/vulnerabilities/tsx_async_abort /sys/devices/system/cpu/vulnerabilities/itlb_multihit + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 685fc693fbab..374cfbe72b6c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1843,6 +1843,20 @@ static ssize_t tsx_async_abort_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t mmio_stale_data_show_state(char *buf) +{ + if (mmio_mitigation == MMIO_MITIGATION_OFF) + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + return sysfs_emit(buf, "%s; SMT Host state unknown\n", + mmio_strings[mmio_mitigation]); + } + + return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) @@ -1943,6 +1957,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SRBDS: return srbds_show_state(buf); + case X86_BUG_MMIO_STALE_DATA: + return mmio_stale_data_show_state(buf); + default: break; } @@ -1994,4 +2011,9 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * { return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); } + +ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); +} #endif diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 8f1d6569564c..8ecb9f90f467 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -566,6 +566,12 @@ ssize_t __weak cpu_show_srbds(struct device *dev, return sysfs_emit(buf, "Not affected\n"); } +ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); @@ -575,6 +581,7 @@ static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); +static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -586,6 +593,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_tsx_async_abort.attr, &dev_attr_itlb_multihit.attr, &dev_attr_srbds.attr, + &dev_attr_mmio_stale_data.attr, NULL }; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index d6428aaf67e7..d63b8f70d123 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -65,6 +65,9 @@ extern ssize_t cpu_show_tsx_async_abort(struct device *dev, extern ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, + char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- Gitee From 3efa8e0f5f114a17f02e18b8edd8d2f6855eda11 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 18:04:46 +0800 Subject: [PATCH 12/17] x86/speculation/srbds: Update SRBDS mitigation selection stable inclusion from stable-v5.10.123 commit cf1c01a5e4c3e269b9211ae2ef0a57f8c9474bfc category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=cf1c01a5e4c3e269b9211ae2ef0a57f8c9474bfc -------------------------------- commit 22cac9c677c95f3ac5c9244f8ca0afdc7c8afb19 upstream Currently, Linux disables SRBDS mitigation on CPUs not affected by MDS and have the TSX feature disabled. On such CPUs, secrets cannot be extracted from CPU fill buffers using MDS or TAA. Without SRBDS mitigation, Processor MMIO Stale Data vulnerabilities can be used to extract RDRAND, RDSEED, and EGETKEY data. Do not disable SRBDS mitigation by default when CPU is also affected by Processor MMIO Stale Data vulnerabilities. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Reviewed-by: Liao Chang Signed-off-by: Zheng Zengkai --- arch/x86/kernel/cpu/bugs.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 374cfbe72b6c..2a21046846b6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -586,11 +586,13 @@ static void __init srbds_select_mitigation(void) return; /* - * Check to see if this is one of the MDS_NO systems supporting - * TSX that are only exposed to SRBDS when TSX is enabled. + * Check to see if this is one of the MDS_NO systems supporting TSX that + * are only exposed to SRBDS when TSX is enabled or when CPU is affected + * by Processor MMIO Stale Data vulnerability. */ ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM)) + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; -- Gitee From 8f0ebec126d087b127713ce17f042cf1182af5c6 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 18:04:47 +0800 Subject: [PATCH 13/17] x86/speculation/mmio: Reuse SRBDS mitigation for SBDS stable inclusion from stable-v5.10.123 commit 6df693dca31218f76c63b6fd4aa7b7db3bd6e049 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=6df693dca31218f76c63b6fd4aa7b7db3bd6e049 -------------------------------- commit a992b8a4682f119ae035a01b40d4d0665c4a2875 upstream The Shared Buffers Data Sampling (SBDS) variant of Processor MMIO Stale Data vulnerabilities may expose RDRAND, RDSEED and SGX EGETKEY data. Mitigation for this is added by a microcode update. As some of the implications of SBDS are similar to SRBDS, SRBDS mitigation infrastructure can be leveraged by SBDS. Set X86_BUG_SRBDS and use SRBDS mitigation. Mitigation is enabled by default; use srbds=off to opt-out. Mitigation status can be checked from below file: /sys/devices/system/cpu/vulnerabilities/srbds Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Reviewed-by: Liao Chang Signed-off-by: Zheng Zengkai --- arch/x86/kernel/cpu/common.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index df87feb21e21..2825678c101c 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1099,6 +1099,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define SRBDS BIT(0) /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ #define MMIO BIT(1) +/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ +#define MMIO_SBDS BIT(2) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1120,16 +1122,17 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), - VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x1), MMIO), - VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), {} }; @@ -1210,10 +1213,14 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) /* * SRBDS affects CPUs which support RDRAND or RDSEED and are listed * in the vulnerability blacklist. + * + * Some of the implications and mitigation of Shared Buffers Data + * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as + * SRBDS. */ if ((cpu_has(c, X86_FEATURE_RDRAND) || cpu_has(c, X86_FEATURE_RDSEED)) && - cpu_matches(cpu_vuln_blacklist, SRBDS)) + cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS)) setup_force_cpu_bug(X86_BUG_SRBDS); /* -- Gitee From fa41192861943497399dbdebdc97f40cad1d25aa Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Wed, 6 Jul 2022 18:04:48 +0800 Subject: [PATCH 14/17] KVM: x86/speculation: Disable Fill buffer clear within guests stable inclusion from stable-v5.10.123 commit bde15fdcce44956278b4f50680b7363ca126ffb9 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5D5RS CVE: CVE-2022-21123,CVE-2022-21125,CVE-2022-21166 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.10.y&id=bde15fdcce44956278b4f50680b7363ca126ffb9 -------------------------------- commit 027bbb884be006b05d9c577d6401686053aa789e upstream The enumeration of MD_CLEAR in CPUID(EAX=7,ECX=0).EDX{bit 10} is not an accurate indicator on all CPUs of whether the VERW instruction will overwrite fill buffers. FB_CLEAR enumeration in IA32_ARCH_CAPABILITIES{bit 17} covers the case of CPUs that are not vulnerable to MDS/TAA, indicating that microcode does overwrite fill buffers. Guests running in VMM environments may not be aware of all the capabilities/vulnerabilities of the host CPU. Specifically, a guest may apply MDS/TAA mitigations when a virtual CPU is enumerated as vulnerable to MDS/TAA even when the physical CPU is not. On CPUs that enumerate FB_CLEAR_CTRL the VMM may set FB_CLEAR_DIS to skip overwriting of fill buffers by the VERW instruction. This is done by setting FB_CLEAR_DIS during VMENTER and resetting on VMEXIT. For guests that enumerate FB_CLEAR (explicitly asking for fill buffer clear capability) the VMM will not use FB_CLEAR_DIS. Irrespective of guest state, host overwrites CPU buffers before VMENTER to protect itself from an MMIO capable guest, as part of mitigation for MMIO Stale Data vulnerabilities. Signed-off-by: Pawan Gupta Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman Conflicts: arch/x86/kvm/vmx/vmx.h Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Reviewed-by: Xiu Jianfeng Reviewed-by: Liao Chang Signed-off-by: Zheng Zengkai --- arch/x86/include/asm/msr-index.h | 6 +++ arch/x86/kvm/vmx/vmx.c | 69 ++++++++++++++++++++++++++ arch/x86/kvm/vmx/vmx.h | 2 + arch/x86/kvm/x86.c | 3 ++ tools/arch/x86/include/asm/msr-index.h | 6 +++ 5 files changed, 86 insertions(+) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 37a7b1ec1ac0..96973d197972 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -133,6 +133,11 @@ * VERW clears CPU fill buffer * even on MDS_NO CPUs. */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -150,6 +155,7 @@ /* SRBDS support */ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 #define RNGDS_MITG_DIS BIT(0) +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 3229a5574a96..040f2ebf12e0 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -226,6 +226,9 @@ static const struct { #define L1D_CACHE_ORDER 4 static void *vmx_l1d_flush_pages; +/* Control for disabling CPU Fill buffer clear */ +static bool __read_mostly vmx_fb_clear_ctrl_available; + static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) { struct page *page; @@ -357,6 +360,60 @@ static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp) return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); } +static void vmx_setup_fb_clear_ctrl(void) +{ + u64 msr; + + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) && + !boot_cpu_has_bug(X86_BUG_MDS) && + !boot_cpu_has_bug(X86_BUG_TAA)) { + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); + if (msr & ARCH_CAP_FB_CLEAR_CTRL) + vmx_fb_clear_ctrl_available = true; + } +} + +static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) +{ + u64 msr; + + if (!vmx->disable_fb_clear) + return; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + msr |= FB_CLEAR_DIS; + wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; +} + +static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) +{ + if (!vmx->disable_fb_clear) + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; + wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); +} + +static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +{ + vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; + + /* + * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS + * at VMEntry. Skip the MSR read/write when a guest has no use case to + * execute VERW. + */ + if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) || + ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) && + (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO))) + vmx->disable_fb_clear = false; +} + static const struct kernel_param_ops vmentry_l1d_flush_ops = { .set = vmentry_l1d_flush_set, .get = vmentry_l1d_flush_get, @@ -2210,6 +2267,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ret = kvm_set_msr_common(vcpu, msr_info); } + /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */ + if (msr_index == MSR_IA32_ARCH_CAPABILITIES) + vmx_update_fb_clear_dis(vcpu, vmx); + return ret; } @@ -4478,6 +4539,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vpid_sync_context(vmx->vpid); if (init_event) vmx_clear_hlt(vcpu); + + vmx_update_fb_clear_dis(vcpu, vmx); } static void enable_irq_window(struct kvm_vcpu *vcpu) @@ -6630,6 +6693,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, kvm_arch_has_assigned_device(vcpu->kvm)) mds_clear_cpu_buffers(); + vmx_disable_fb_clear(vmx); + if (vcpu->arch.cr2 != native_read_cr2()) native_write_cr2(vcpu->arch.cr2); @@ -6638,6 +6703,8 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vcpu->arch.cr2 = native_read_cr2(); + vmx_enable_fb_clear(vmx); + /* * VMEXIT disables interrupts (host state), but tracing and lockdep * have them in state 'on' as recorded before entering guest mode. @@ -8018,6 +8085,8 @@ static int __init vmx_init(void) return r; } + vmx_setup_fb_clear_ctrl(); + for_each_possible_cpu(cpu) { INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 73d87d44b657..0d90c620f6b4 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -300,6 +300,8 @@ struct vcpu_vmx { u64 msr_ia32_feature_control; u64 msr_ia32_feature_control_valid_bits; u64 ept_pointer; + u64 msr_ia32_mcu_opt_ctrl; + bool disable_fb_clear; struct pt_desc pt_desc; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b49a121c50fe..ef02d23a8114 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1408,6 +1408,9 @@ static u64 kvm_get_arch_capabilities(void) */ } + /* Guests don't need to know "Fill buffer clear control" exists */ + data &= ~ARCH_CAP_FB_CLEAR_CTRL; + return data; } diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 37a7b1ec1ac0..96973d197972 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -133,6 +133,11 @@ * VERW clears CPU fill buffer * even on MDS_NO CPUs. */ +#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] + * bit available to control VERW + * behavior. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -150,6 +155,7 @@ /* SRBDS support */ #define MSR_IA32_MCU_OPT_CTRL 0x00000123 #define RNGDS_MITG_DIS BIT(0) +#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 -- Gitee From 2b7af6c49af69c1da12fc0056db92ef5aec2f646 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Jun 2022 14:41:11 +0800 Subject: [PATCH 15/17] tcp: change source port randomizarion at connect() time stable inclusion from stable-v5.10.119 commit 33f1b4a27abced7ae0f740d2ec3040debf7c4b3c category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5C3A9 CVE: CVE-2022-32296 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=33f1b4a27abced7ae0f740d2ec3040debf7c4b3c -------------------------------- commit 190cc82489f46f9d88e73c81a47e14f80a791e1a upstream. RFC 6056 (Recommendations for Transport-Protocol Port Randomization) provides good summary of why source selection needs extra care. David Dworken reminded us that linux implements Algorithm 3 as described in RFC 6056 3.3.3 Quoting David : In the context of the web, this creates an interesting info leak where websites can count how many TCP connections a user's computer is establishing over time. For example, this allows a website to count exactly how many subresources a third party website loaded. This also allows: - Distinguishing between different users behind a VPN based on distinct source port ranges. - Tracking users over time across multiple networks. - Covert communication channels between different browsers/browser profiles running on the same computer - Tracking what applications are running on a computer based on the pattern of how fast source ports are getting incremented. Section 3.3.4 describes an enhancement, that reduces attackers ability to use the basic information currently stored into the shared 'u32 hint'. This change also decreases collision rate when multiple applications need to connect() to different destinations. Signed-off-by: Eric Dumazet Reported-by: David Dworken Cc: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Stefan Ghinea Signed-off-by: Greg Kroah-Hartman Conflicts: net/ipv4/inet_hashtables.c Signed-off-by: Baisong Zhong Reviewed-by: Wei Yongjun Reviewed-by: Xiu Jianfeng Signed-off-by: Zheng Zengkai --- net/ipv4/inet_hashtables.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 990af426aaa8..581acf8ef2c1 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -709,6 +709,17 @@ void inet_unhash(struct sock *sk) } EXPORT_SYMBOL_GPL(inet_unhash); +/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm + * Note that we use 32bit integers (vs RFC 'short integers') + * because 2^16 is not a multiple of num_ephemeral and this + * property might be used by clever attacker. + * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, + * we use 256 instead to really give more isolation and + * privacy, this only consumes 1 KB of kernel memory. + */ +#define INET_TABLE_PERTURB_SHIFT 8 +static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; + int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, @@ -722,8 +733,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct inet_bind_bucket *tb; u32 remaining, offset; int ret, i, low, high; - static u32 hint; int l3mdev; + u32 index; if (port) { head = &hinfo->bhash[inet_bhashfn(net, port, @@ -750,7 +761,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U; - offset = hint + port_offset; + net_get_random_once(table_perturb, sizeof(table_perturb)); + index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); + + offset = READ_ONCE(table_perturb[index]) + port_offset; offset %= remaining; /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. @@ -805,7 +819,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, return -EADDRNOTAVAIL; ok: - hint += i + 2; + WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, port); -- Gitee From 20a6fb76f17219b49048e9960f85a95db20b6bf2 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Wed, 22 Jun 2022 14:41:12 +0800 Subject: [PATCH 16/17] tcp: increase source port perturb table to 2^16 mainline inclusion from mainline-v5.18-rc6 commit 4c2c8f03a5ab7cb04ec64724d7d176d00bcc91e5 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5C3A9 CVE: CVE-2022-32296 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4c2c8f03a5ab7cb04ec64724d7d176d00bcc91e5 -------------------------------- Moshe Kol, Amit Klein, and Yossi Gilad reported being able to accurately identify a client by forcing it to emit only 40 times more connections than there are entries in the table_perturb[] table. The previous two improvements consisting in resalting the secret every 10s and adding randomness to each port selection only slightly improved the situation, and the current value of 2^8 was too small as it's not very difficult to make a client emit 10k connections in less than 10 seconds. Thus we're increasing the perturb table from 2^8 to 2^16 so that the same precision now requires 2.6M connections, which is more difficult in this time frame and harder to hide as a background activity. The impact is that the table now uses 256 kB instead of 1 kB, which could mostly affect devices making frequent outgoing connections. However such components usually target a small set of destinations (load balancers, database clients, perf assessment tools), and in practice only a few entries will be visited, like before. A live test at 1 million connections per second showed no performance difference from the previous value. Reported-by: Moshe Kol Reported-by: Yossi Gilad Reported-by: Amit Klein Reviewed-by: Eric Dumazet Signed-off-by: Willy Tarreau Signed-off-by: Jakub Kicinski Conflicts: net/ipv4/inet_hashtables.c Signed-off-by: Baisong Zhong Reviewed-by: Wei Yongjun Reviewed-by: Xiu Jianfeng Signed-off-by: Zheng Zengkai --- net/ipv4/inet_hashtables.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 581acf8ef2c1..f7a3679d8867 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -713,11 +713,12 @@ EXPORT_SYMBOL_GPL(inet_unhash); * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. - * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, - * we use 256 instead to really give more isolation and - * privacy, this only consumes 1 KB of kernel memory. + * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though + * attacks were since demonstrated, thus we use 65536 instead to really + * give more isolation and privacy, at the expense of 256kB of kernel + * memory. */ -#define INET_TABLE_PERTURB_SHIFT 8 +#define INET_TABLE_PERTURB_SHIFT 16 static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; int __inet_hash_connect(struct inet_timewait_death_row *death_row, -- Gitee From 88126125b5048fb6cc2263475b96be8be49a1fc9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 31 May 2022 23:03:54 +0800 Subject: [PATCH 17/17] KVM: x86/mmu: fix NULL pointer dereference on guest INVPCID mainline inclusion from mainline-v5.18 commit 9f46c187e2e680ecd9de7983e4d081c3391acc76 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I59I19 CVE: CVE-2022-1789 Signed-off-by: Fang Minjuan Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9f46c187e2e680ecd9de7983e4d081c3391acc76 -------------------------------- With shadow paging enabled, the INVPCID instruction results in a call to kvm_mmu_invpcid_gva. If INVPCID is executed with CR0.PG=0, the invlpg callback is not set and the result is a NULL pointer dereference. Fix it trivially by checking for mmu->invlpg before every call. There are other possibilities: - check for CR0.PG, because KVM (like all Intel processors after P5) flushes guest TLB on CR0.PG changes so that INVPCID/INVLPG are a nop with paging disabled - check for EFER.LMA, because KVM syncs and flushes when switching MMU contexts outside of 64-bit mode All of these are tricky, go for the simple solution. This is CVE-2022-1789. Reported-by: Yongkang Jia Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Yipeng Zou Reviewed-by: Zhang Jianhua Signed-off-by: Zheng Zengkai --- arch/x86/kvm/mmu/mmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 7ca2da902829..5c9284c7a949 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5171,14 +5171,16 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) uint i; if (pcid == kvm_get_active_pcid(vcpu)) { - mmu->invlpg(vcpu, gva, mmu->root_hpa); + if (mmu->invlpg) + mmu->invlpg(vcpu, gva, mmu->root_hpa); tlb_flush = true; } for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (VALID_PAGE(mmu->prev_roots[i].hpa) && pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) { - mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + if (mmu->invlpg) + mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); tlb_flush = true; } } -- Gitee