From d39cf05a232dbd169920c58acb43a8a0137212ce Mon Sep 17 00:00:00 2001 From: bizhiyuan Date: Tue, 29 Oct 2024 18:49:59 +0800 Subject: [PATCH] Filesystem: only use $umount_force after sending kill_signals Filesystem: on stop, try umount directly, before scanning for users --- ...op-try-umount-directly-before-scanni.patch | 49 +++++++++++++++ ...use-umount_force-after-sending-kill_.patch | 60 +++++++++++++++++++ resource-agents.spec | 8 ++- 3 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 backport-Filesystem-on-stop-try-umount-directly-before-scanni.patch create mode 100644 backport-Filesystem-only-use-umount_force-after-sending-kill_.patch diff --git a/backport-Filesystem-on-stop-try-umount-directly-before-scanni.patch b/backport-Filesystem-on-stop-try-umount-directly-before-scanni.patch new file mode 100644 index 0000000..c7a1395 --- /dev/null +++ b/backport-Filesystem-on-stop-try-umount-directly-before-scanni.patch @@ -0,0 +1,49 @@ +From f02afd0fadb581ca0fc9798beaf28044cf211200 Mon Sep 17 00:00:00 2001 +From: Lars Ellenberg +Date: Wed, 18 Sep 2024 11:53:52 +0200 +Subject: [PATCH] Filesystem: on stop, try umount directly, before scanning for + users + +48ed6e6d (Filesystem: improve stop-action and allow setting term/kill signals and signal_delay for large filesystems, 2023-07-04) +changed the logic from +"try umount; if that fails, find and kill users; repeat" to +"try to find and kill users; then try umount; repeat" + +But even just walking /proc may take "a long time" on busy systems, +and may still turn up with "no users found". + +It will take even longer for "force_umount=safe" +(observed 8 to 10 seconds just for "get_pids() with "safe" to return nothing) +than for "force_umount=yes" (still ~ 2 to 3 seconds), +but it will take "a long time" in any case. +(BTW, that may be longer than the hardcoded default of 6 seconds for "fast_stop", +which is also the default on many systems now) + +If the dependencies are properly configured, +there should be no users left, +and the umount should just work. + +Revert back to "try umount first", and only then try to find "rogue" users. +--- + heartbeat/Filesystem | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index 4dd962fd..99bddaf6 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -732,6 +732,11 @@ fs_stop() { + local SUB="$1" timeout=$2 grace_time ret + grace_time=$((timeout/2)) + ++ # Just walking /proc may take "a long time", even if we don't find any users of this FS. ++ # If dependencies are properly configured, umount should just work. ++ # Only if that fails, try to find and kill processes that still use it. ++ try_umount "" "$SUB" && return $OCF_SUCCESS ++ + # try gracefully terminating processes for up to half of the configured timeout + fs_stop_loop "" "$SUB" "$OCF_RESKEY_term_signals" & + timeout_child $! $grace_time +-- +2.25.1 + diff --git a/backport-Filesystem-only-use-umount_force-after-sending-kill_.patch b/backport-Filesystem-only-use-umount_force-after-sending-kill_.patch new file mode 100644 index 0000000..f3c1b1f --- /dev/null +++ b/backport-Filesystem-only-use-umount_force-after-sending-kill_.patch @@ -0,0 +1,60 @@ +From cb6aaffc260eea0f0fee6fab44393c6cf12b8a83 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Mon, 9 Sep 2024 10:58:12 +0200 +Subject: [PATCH] Filesystem: only use $umount_force after sending kill_signals + +--- + heartbeat/Filesystem | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index f54969f2..4dd962fd 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -694,8 +694,8 @@ signal_processes() { + return 0 + } + try_umount() { +- local SUB="$1" +- $UMOUNT $umount_force "$SUB" ++ local force_arg="$1" SUB="$2" ++ $UMOUNT $force_arg "$SUB" + list_mounts | grep "${TAB}${SUB}${TAB}" >/dev/null 2>&1 || { + ocf_log info "unmounted $SUB successfully" + return $OCF_SUCCESS +@@ -718,14 +718,14 @@ timeout_child() { + return $ret + } + fs_stop_loop() { +- local SUB="$1" signals="$2" sig send_signal ++ local force_arg="$1" SUB="$2" signals="$3" sig send_signal + while true; do + send_signal=false + for sig in $signals; do + signal_processes "$SUB" $sig && send_signal=true + done + $send_signal && sleep $OCF_RESKEY_signal_delay +- try_umount "$SUB" && return $OCF_SUCCESS ++ try_umount "$force_arg" "$SUB" && return $OCF_SUCCESS + done + } + fs_stop() { +@@ -733,13 +733,13 @@ fs_stop() { + grace_time=$((timeout/2)) + + # try gracefully terminating processes for up to half of the configured timeout +- fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" & ++ fs_stop_loop "" "$SUB" "$OCF_RESKEY_term_signals" & + timeout_child $! $grace_time + ret=$? + [ $ret -eq $OCF_SUCCESS ] && return $ret + + # try killing them for the rest of the timeout +- fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" & ++ fs_stop_loop "$umount_force" "$SUB" "$OCF_RESKEY_kill_signals" & + timeout_child $! $grace_time + ret=$? + [ $ret -eq $OCF_SUCCESS ] && return $ret +-- +2.25.1 + diff --git a/resource-agents.spec b/resource-agents.spec index 3d0bc69..47d25ba 100644 --- a/resource-agents.spec +++ b/resource-agents.spec @@ -1,7 +1,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.15.1 -Release: 8 +Release: 9 License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents Source0: https://github.com/ClusterLabs/resource-agents/archive/v%{version}.tar.gz @@ -20,6 +20,8 @@ Patch10: backport-IPaddr2-add-proto-parameter-to-be-able-to-match-a- Patch11: backport-podman-Improve-handling-of-stopping-container-remova.patch Patch12: backport-apache-http-mon_sh-change-curl-opts-to-match-wget.patch Patch13: backport-apache-http-mon_sh-doc-curl-as-preferred-client.patch +Patch14: backport-Filesystem-only-use-umount_force-after-sending-kill_.patch +Patch15: backport-Filesystem-on-stop-try-umount-directly-before-scanni.patch Obsoletes: heartbeat-resources <= %{version} Provides: heartbeat-resources = %{version} @@ -118,6 +120,10 @@ export CFLAGS="$(echo '%{optflags}')" %{_mandir}/man8/{ocf-tester.8*,ldirectord.8*} %changelog +* Tue Oct 29 2024 bizhiyuan - 4.15.1-9 +- Filesystem: only use $umount_force after sending kill_signals +- Filesystem: on stop, try umount directly, before scanning for users + * Fri Oct 25 2024 liupei - 4.15.1-8 - apache/http-mon.sh: change curl opts to match wget - apache/http-mon.sh: doc curl as preferred client -- Gitee