From 283ee02fb7a56b4e65e1ed7fce98f0a239a68d62 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Mon, 28 Oct 2024 14:58:02 +0100 Subject: [PATCH 1/3] tests: decom before waiting for cluster health in admin_uuid_operations_test The cluster can't become healthy until ghost nodes are decommissioned. (cherry picked from commit 12bb4ecd43b0dbb3601c390b01f895a1faea0801) --- .../rptest/tests/admin_uuid_operations_test.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tests/rptest/tests/admin_uuid_operations_test.py b/tests/rptest/tests/admin_uuid_operations_test.py index 80261467b976e..49a9fb5785e9a 100644 --- a/tests/rptest/tests/admin_uuid_operations_test.py +++ b/tests/rptest/tests/admin_uuid_operations_test.py @@ -276,14 +276,12 @@ def test_force_uuid_override(self, mode): backoff_sec=2, err_msg=f"{to_stop.name} did not take the UUID override") - self.logger.debug(f"Wait for the cluster to become healthy...") + self.logger.debug(f"Decommission ghost node [{ghost_node_id}]...") + self._decommission(ghost_node_id) + self.logger.debug(f"...and wait for the cluster to become healthy.") self.wait_until_cluster_healthy(timeout_sec=30) - self.logger.debug( - f".. and decommission ghost node [{ghost_node_id}]...") - self._decommission(ghost_node_id) - self.logger.debug( "Check that all this state sticks across a rolling restart") @@ -373,14 +371,11 @@ def test_force_uuid_override_multinode(self, mode): auto_assign_node_id=True, ) - self.logger.debug("Wait for the cluster to become healthy...") + self.logger.debug(f"Decommission ghost node [{ghost_node_id}]...") + self._decommission(ghost_node_id) + self.logger.debug("...and wait for the cluster to become healthy.") controller_leader = self.wait_until_cluster_healthy(timeout_sec=30) assert controller_leader is not None, "Didn't elect a controller leader" assert controller_leader not in to_stop, f"Unexpected controller leader {controller_leader.account.hostname}" - - self.logger.debug( - f"...and decommission ghost node [{ghost_node_id}]...") - - self._decommission(ghost_node_id) From 29996ff7eab7905b75b0b2416e8de41019e568e5 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 25 Oct 2024 13:29:46 +0200 Subject: [PATCH 2/3] raft: reply to heartbeats with group_unavailable if addressed to wrong node Previously, we replied with "failure", which updated the last heartbeat timestamp and gave the leader an impression that the group is actually there. Reply with group_unavailable, which is more appropriate here. (cherry picked from commit 25541ce99da197e8dbc048562301b5d9d0128433) --- src/v/raft/consensus.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/v/raft/consensus.cc b/src/v/raft/consensus.cc index dfbbcec98539f..0d3e59d8426ba 100644 --- a/src/v/raft/consensus.cc +++ b/src/v/raft/consensus.cc @@ -3955,7 +3955,7 @@ reply_result consensus::lightweight_heartbeat( target_node, _self, source_node); - return reply_result::failure; + return reply_result::group_unavailable; } /** @@ -4010,7 +4010,7 @@ ss::future consensus::full_heartbeat( target_vnode, _self, source_vnode); - reply.result = reply_result::failure; + reply.result = reply_result::group_unavailable; co_return reply; } /** From 88a682e119bd155ba238851ddb3d2716f8938e55 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 25 Oct 2024 13:34:02 +0200 Subject: [PATCH 3/3] raft: ignore heartbeat replies from unexpected node ids Usually this is a new node replying to heartbeats addressed to the corresponding ghost node. (cherry picked from commit 1c23c3566c317316bc7e46b69f41bd137c395e5e) --- src/v/raft/heartbeat_manager.cc | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/v/raft/heartbeat_manager.cc b/src/v/raft/heartbeat_manager.cc index 34b6f77ee31e7..45b8d82bc6ada 100644 --- a/src/v/raft/heartbeat_manager.cc +++ b/src/v/raft/heartbeat_manager.cc @@ -340,6 +340,17 @@ void heartbeat_manager::process_reply( return; } auto& reply = r.value(); + + if (reply.source() != n) { + vlog( + raftlog.warn, + "got heartbeat reply from a different node id {} (expected {}), " + "ignoring", + reply.source(), + n); + return; + } + reply.for_each_lw_reply([this, n, target = reply.target(), &groups]( group_id group, reply_result result) { auto it = _consensus_groups.find(group);