From 1b017c15246e235971ed890d269208bada17469f Mon Sep 17 00:00:00 2001 From: cenhuilin Date: Thu, 11 Apr 2024 09:38:29 +0800 Subject: [PATCH] fix mds metadata lost in one case --- 0016-fix-mds-metadata-lost-in-one-case.patch | 42 ++++++++++++++++++++ ceph.spec | 6 ++- 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 0016-fix-mds-metadata-lost-in-one-case.patch diff --git a/0016-fix-mds-metadata-lost-in-one-case.patch b/0016-fix-mds-metadata-lost-in-one-case.patch new file mode 100644 index 0000000..59fc8fa --- /dev/null +++ b/0016-fix-mds-metadata-lost-in-one-case.patch @@ -0,0 +1,42 @@ +From 56cbf3f0716b556c815487d719abe86021125925 Mon Sep 17 00:00:00 2001 +From: shimin +Date: Wed, 10 Apr 2024 13:47:16 +0800 +Subject: [PATCH] mon:fix mds metadata lost in one case +In most cases, peon's pending_metadata is inconsistent with mon's db. +When a peon turns into leader, and at the same time a active mds stops, +the new leader may flush wrong mds metadata into db. So we meed to +update mds metadata from db at every fsmap change. + +This phenomenon can be reproduce like this: +A Cluster with 3 mon and 3 mds (one active, other two standby), 6 osd. +step 1. stop two standby mds; +step 2. restart all mon; (make pending_medata consistent with db) +step 3. start other two mds +step 4. stop leader mon +step 5. run "ceph mds metadata" command to check mds metadata +step 6. stop active mds +step 7. run "ceph mds metadata" command to check mds metadata again + +In step 7, we would find mds metadata lost. + +Fixes: https://tracker.ceph.com/issues/63166 +Signed-off-by: shimin +--- + src/mon/MDSMonitor.cc | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc +index 0e30a1461..1c7a2340e 100644 +--- a/src/mon/MDSMonitor.cc ++++ b/src/mon/MDSMonitor.cc +@@ -136,6 +136,7 @@ void MDSMonitor::update_from_paxos(bool *need_bootstrap) + << ", my e " << get_fsmap().epoch << dendl; + ceph_assert(version > get_fsmap().epoch); + ++ load_metadata(pending_metadata); + load_health(); + + // read and decode +-- +2.27.0 + diff --git a/ceph.spec b/ceph.spec index 3f851de..5ac63af 100644 --- a/ceph.spec +++ b/ceph.spec @@ -125,7 +125,7 @@ ################################################################################# Name: ceph Version: 16.2.7 -Release: 19 +Release: 20 %if 0%{?fedora} || 0%{?rhel} || 0%{?openEuler} Epoch: 2 %endif @@ -158,6 +158,7 @@ Patch12: 0012-add-missing-includes-when-compiling-with-boost-1.75.patch Patch13: 0013-add-atomic-library-for-loongarch64.patch Patch14: 0014-fix-CVE-2022-3854.patch Patch15: 0015-ceph-volume-add-judgment-for-ceph-volume-lvm-activat.patch +Patch16: 0016-fix-mds-metadata-lost-in-one-case.patch %if 0%{?suse_version} # _insert_obs_source_lines_here ExclusiveArch: x86_64 aarch64 ppc64le s390x @@ -2502,6 +2503,9 @@ exit 0 %config %{_sysconfdir}/prometheus/ceph/ceph_default_alerts.yml %changelog +* Wed Apr 10 2024 cenhuilin - 2:16.2.7-20 +- mon: fix mds metadata lost in one case + * Mon Aug 28 2023 liuqinfei - 2:16.2.7-19 - add judgment for ceph-volume lvm activate - sync from #PR 187 -- Gitee