From fa5786c16f98dba7578bcb41170861c1d635b155 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 1 Aug 2023 19:14:24 +0200 Subject: [PATCH 1/3] libceph: fix potential hang in ceph_osdc_notify() ANBZ: #11912 commit e6e2843230799230fc5deb8279728a7218b0d63c upstream. If the cluster becomes unavailable, ceph_osdc_notify() may hang even with osd_request_timeout option set because linger_notify_finish_wait() waits for MWatchNotify NOTIFY_COMPLETE message with no associated OSD request in flight -- it's completely asynchronous. Introduce an additional timeout, derived from the specified notify timeout. While at it, switch both waits to killable which is more correct. Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang Reviewed-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman Signed-off-by: Xiuqiao Wu --- net/ceph/osd_client.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 1e9fab79e245..d594cd501861 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -3330,17 +3330,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq) int ret; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - ret = wait_for_completion_interruptible(&lreq->reg_commit_wait); + ret = wait_for_completion_killable(&lreq->reg_commit_wait); return ret ?: lreq->reg_commit_error; } -static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq) +static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq, + unsigned long timeout) { - int ret; + long left; dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id); - ret = wait_for_completion_interruptible(&lreq->notify_finish_wait); - return ret ?: lreq->notify_finish_error; + left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait, + ceph_timeout_jiffies(timeout)); + if (left <= 0) + left = left ?: -ETIMEDOUT; + else + left = lreq->notify_finish_error; /* completed */ + + return left; } /* @@ -4888,7 +4895,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc, linger_submit(lreq); ret = linger_reg_commit_wait(lreq); if (!ret) - ret = linger_notify_finish_wait(lreq); + ret = linger_notify_finish_wait(lreq, + msecs_to_jiffies(2 * timeout * MSEC_PER_SEC)); else dout("lreq %p failed to initiate notify %d\n", lreq, ret); -- Gitee From f571c9cb17c2119027afdba637e85c5324dc8ed2 Mon Sep 17 00:00:00 2001 From: Jordan Rife Date: Wed, 4 Oct 2023 18:38:27 -0500 Subject: [PATCH 2/3] libceph: use kernel_connect() ANBZ: #11912 commit 7563cf17dce0a875ba3d872acdc63a78ea344019 upstream. Direct calls to ops->connect() can overwrite the address parameter when used in conjunction with BPF SOCK_ADDR hooks. Recent changes to kernel_connect() ensure that callers are insulated from such side effects. This patch wraps the direct call to ops->connect() with kernel_connect() to prevent unexpected changes to the address passed to ceph_tcp_connect(). This change was originally part of a larger patch targeting the net tree addressing all instances of unprotected calls to ops->connect() throughout the kernel, but this change was split up into several patches targeting various trees. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/netdev/20230821100007.559638-1-jrife@google.com/ Link: https://lore.kernel.org/netdev/9944248dba1bce861375fcce9de663934d933ba9.camel@redhat.com/ Fixes: d74bad4e74ee ("bpf: Hooks for sys_connect") Signed-off-by: Jordan Rife Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman Signed-off-by: Xiuqiao Wu --- net/ceph/messenger.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index af0f1fa24937..192a43f8efa2 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -477,8 +477,8 @@ static int ceph_tcp_connect(struct ceph_connection *con) dout("connect %s\n", ceph_pr_addr(&con->peer_addr)); con_sock_state_connecting(con); - ret = sock->ops->connect(sock, (struct sockaddr *)&ss, sizeof(ss), - O_NONBLOCK); + ret = kernel_connect(sock, (struct sockaddr *)&ss, sizeof(ss), + O_NONBLOCK); if (ret == -EINPROGRESS) { dout("connect %s EINPROGRESS sk_state = %u\n", ceph_pr_addr(&con->peer_addr), -- Gitee From dd624e03d2be820e14cf350849ec2c7ea8c615fc Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 23 Jul 2024 18:08:08 +0200 Subject: [PATCH 3/3] rbd: don't assume rbd_is_lock_owner() for exclusive mappings ANBZ: #11912 commit 3ceccb14f5576e02b81cc8b105ab81f224bd87f6 upstream. Expanding on the previous commit, assuming that rbd_is_lock_owner() always returns true (i.e. that we are either in RBD_LOCK_STATE_LOCKED or RBD_LOCK_STATE_QUIESCING) if the mapping is exclusive is wrong too. In case ceph_cls_set_cookie() fails, the lock would be temporarily released even if the mapping is exclusive, meaning that we can end up even in RBD_LOCK_STATE_UNLOCKED. IOW, exclusive mappings are really "just" about disabling automatic lock transitions (as documented in the man page), not about grabbing the lock and holding on to it whatever it takes. Cc: stable@vger.kernel.org Fixes: 637cd060537d ("rbd: new exclusive lock wait/wake code") Signed-off-by: Ilya Dryomov Reviewed-by: Dongsheng Yang Signed-off-by: Greg Kroah-Hartman Signed-off-by: Xiuqiao Wu --- drivers/block/rbd.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 6800e2d7f54b..6a596d395b4d 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -6660,11 +6660,6 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev) if (ret) return ret; - /* - * The lock may have been released by now, unless automatic lock - * transitions are disabled. - */ - rbd_assert(!rbd_dev->opts->exclusive || rbd_is_lock_owner(rbd_dev)); return 0; } -- Gitee