diff --git a/backport-Filesystem-on-stop-try-umount-directly-before-scanni.patch b/backport-Filesystem-on-stop-try-umount-directly-before-scanni.patch new file mode 100644 index 0000000000000000000000000000000000000000..c7a13951a06b768bc48dfe65beeb0b95b0333fbe --- /dev/null +++ b/backport-Filesystem-on-stop-try-umount-directly-before-scanni.patch @@ -0,0 +1,49 @@ +From f02afd0fadb581ca0fc9798beaf28044cf211200 Mon Sep 17 00:00:00 2001 +From: Lars Ellenberg +Date: Wed, 18 Sep 2024 11:53:52 +0200 +Subject: [PATCH] Filesystem: on stop, try umount directly, before scanning for + users + +48ed6e6d (Filesystem: improve stop-action and allow setting term/kill signals and signal_delay for large filesystems, 2023-07-04) +changed the logic from +"try umount; if that fails, find and kill users; repeat" to +"try to find and kill users; then try umount; repeat" + +But even just walking /proc may take "a long time" on busy systems, +and may still turn up with "no users found". + +It will take even longer for "force_umount=safe" +(observed 8 to 10 seconds just for "get_pids() with "safe" to return nothing) +than for "force_umount=yes" (still ~ 2 to 3 seconds), +but it will take "a long time" in any case. +(BTW, that may be longer than the hardcoded default of 6 seconds for "fast_stop", +which is also the default on many systems now) + +If the dependencies are properly configured, +there should be no users left, +and the umount should just work. + +Revert back to "try umount first", and only then try to find "rogue" users. +--- + heartbeat/Filesystem | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index 4dd962fd..99bddaf6 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -732,6 +732,11 @@ fs_stop() { + local SUB="$1" timeout=$2 grace_time ret + grace_time=$((timeout/2)) + ++ # Just walking /proc may take "a long time", even if we don't find any users of this FS. ++ # If dependencies are properly configured, umount should just work. ++ # Only if that fails, try to find and kill processes that still use it. ++ try_umount "" "$SUB" && return $OCF_SUCCESS ++ + # try gracefully terminating processes for up to half of the configured timeout + fs_stop_loop "" "$SUB" "$OCF_RESKEY_term_signals" & + timeout_child $! $grace_time +-- +2.25.1 + diff --git a/backport-Filesystem-only-use-umount_force-after-sending-kill_.patch b/backport-Filesystem-only-use-umount_force-after-sending-kill_.patch new file mode 100644 index 0000000000000000000000000000000000000000..f3c1b1f000af7c35e8132330a16fcc4347d0ef74 --- /dev/null +++ b/backport-Filesystem-only-use-umount_force-after-sending-kill_.patch @@ -0,0 +1,60 @@ +From cb6aaffc260eea0f0fee6fab44393c6cf12b8a83 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Mon, 9 Sep 2024 10:58:12 +0200 +Subject: [PATCH] Filesystem: only use $umount_force after sending kill_signals + +--- + heartbeat/Filesystem | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index f54969f2..4dd962fd 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -694,8 +694,8 @@ signal_processes() { + return 0 + } + try_umount() { +- local SUB="$1" +- $UMOUNT $umount_force "$SUB" ++ local force_arg="$1" SUB="$2" ++ $UMOUNT $force_arg "$SUB" + list_mounts | grep "${TAB}${SUB}${TAB}" >/dev/null 2>&1 || { + ocf_log info "unmounted $SUB successfully" + return $OCF_SUCCESS +@@ -718,14 +718,14 @@ timeout_child() { + return $ret + } + fs_stop_loop() { +- local SUB="$1" signals="$2" sig send_signal ++ local force_arg="$1" SUB="$2" signals="$3" sig send_signal + while true; do + send_signal=false + for sig in $signals; do + signal_processes "$SUB" $sig && send_signal=true + done + $send_signal && sleep $OCF_RESKEY_signal_delay +- try_umount "$SUB" && return $OCF_SUCCESS ++ try_umount "$force_arg" "$SUB" && return $OCF_SUCCESS + done + } + fs_stop() { +@@ -733,13 +733,13 @@ fs_stop() { + grace_time=$((timeout/2)) + + # try gracefully terminating processes for up to half of the configured timeout +- fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" & ++ fs_stop_loop "" "$SUB" "$OCF_RESKEY_term_signals" & + timeout_child $! $grace_time + ret=$? + [ $ret -eq $OCF_SUCCESS ] && return $ret + + # try killing them for the rest of the timeout +- fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" & ++ fs_stop_loop "$umount_force" "$SUB" "$OCF_RESKEY_kill_signals" & + timeout_child $! $grace_time + ret=$? + [ $ret -eq $OCF_SUCCESS ] && return $ret +-- +2.25.1 + diff --git a/resource-agents.spec b/resource-agents.spec index 0ec932f4cb85070113ba1e097d92e22383a5184b..c1088ee7dc4eebdb4f8785ed1ea74e4fccec16c5 100644 --- a/resource-agents.spec +++ b/resource-agents.spec @@ -1,7 +1,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.15.1 -Release: 9 +Release: 10 License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents Source0: https://github.com/ClusterLabs/resource-agents/archive/v%{version}.tar.gz @@ -22,7 +22,8 @@ Patch12: backport-apache-http-mon_sh-change-curl-opts-to-match-wget. Patch13: backport-apache-http-mon_sh-doc-curl-as-preferred-client.patch Patch14: backport-IPaddr2-improve-fail-logic-and-check-ip_status-after.patch Patch15: backport-findif.sh-remove-local-proto-to-use-global-proto-fro.patch - +Patch16: backport-Filesystem-only-use-umount_force-after-sending-kill_.patch +Patch17: backport-Filesystem-on-stop-try-umount-directly-before-scanni.patch Obsoletes: heartbeat-resources <= %{version} Provides: heartbeat-resources = %{version} BuildRequires: automake autoconf pkgconfig gcc perl-interpreter perl-generators python3-devel @@ -120,6 +121,10 @@ export CFLAGS="$(echo '%{optflags}')" %{_mandir}/man8/{ocf-tester.8*,ldirectord.8*} %changelog +* Tue Oct 29 2024 bizhiyuan - 4.15.1-10 +- Filesystem: only use $umount_force after sending kill_signals +- Filesystem: on stop, try umount directly, before scanning for users + * Tue Oct 29 2024 bixiaoyan - 4.15.1-9 - IPaddr2: improve fail logic and check ip_status after adding IP - findif.sh: remove "local proto" to use global proto from the agents