From 3587225a88d1ca85b1b7111430f8a301f423b291 Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Wed, 8 Apr 2026 12:27:04 +0000 Subject: [PATCH] mapper: fix phantom updateSentPeers on disconnected nodes When send() is called on a node with zero active connections (disconnected but kept for rapid reconnection), it returns nil (success). handleNodeChange then calls updateSentPeers, recording peers as delivered when no client received the data. This corrupts lastSentPeers: future computePeerDiff calculations produce wrong results because they compare against phantom state. After reconnection, the node's initial map resets lastSentPeers, but any changes processed during the disconnect window leave stale entries that cause asymmetric peer visibility. Return errNoActiveConnections from send() when there are no connections. handleNodeChange treats this as a no-op (the change was generated but not deliverable) and skips updateSentPeers, keeping lastSentPeers consistent with what clients actually received. --- hscontrol/mapper/batcher.go | 12 +++++++++++- hscontrol/mapper/batcher_unit_test.go | 5 +++-- hscontrol/mapper/node_conn.go | 9 ++++++++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/hscontrol/mapper/batcher.go b/hscontrol/mapper/batcher.go index 6fbefdbe..ce569732 100644 --- a/hscontrol/mapper/batcher.go +++ b/hscontrol/mapper/batcher.go @@ -178,10 +178,20 @@ func handleNodeChange(nc nodeConnection, mapper *mapper, r change.Change) error // Send the map response err = nc.send(data) if err != nil { + // If the node has no active connections, the data was not + // delivered. Do not update lastSentPeers — recording phantom + // peer state would corrupt future computePeerDiff calculations, + // causing the node to miss peer additions or removals after + // reconnection. + if errors.Is(err, errNoActiveConnections) { + return nil + } + return fmt.Errorf("sending map response to node %d: %w", nodeID, err) } - // Update peer tracking after successful send + // Update peer tracking only after confirmed delivery to at + // least one active connection. nc.updateSentPeers(data) return nil diff --git a/hscontrol/mapper/batcher_unit_test.go b/hscontrol/mapper/batcher_unit_test.go index 54172522..bef897fe 100644 --- a/hscontrol/mapper/batcher_unit_test.go +++ b/hscontrol/mapper/batcher_unit_test.go @@ -339,8 +339,9 @@ func TestMultiChannelSend_ZeroConnections(t *testing.T) { err := mc.send(testMapResponse()) - require.NoError(t, err, - "sending to node with 0 connections should succeed silently (rapid reconnection scenario)") + require.ErrorIs(t, err, errNoActiveConnections, + "sending to node with 0 connections should return errNoActiveConnections "+ + "so callers skip updateSentPeers (prevents phantom peer state)") } func TestMultiChannelSend_NilData(t *testing.T) { diff --git a/hscontrol/mapper/node_conn.go b/hscontrol/mapper/node_conn.go index 87d61544..faa893fa 100644 --- a/hscontrol/mapper/node_conn.go +++ b/hscontrol/mapper/node_conn.go @@ -1,6 +1,7 @@ package mapper import ( + "errors" "fmt" "strconv" "sync" @@ -16,6 +17,12 @@ import ( "tailscale.com/tailcfg" ) +// errNoActiveConnections is returned by send when a node has no active +// connections (disconnected but kept in the batcher for rapid reconnection). +// Callers must not update peer tracking state (lastSentPeers) after this +// error because the data was never delivered to any client. +var errNoActiveConnections = errors.New("no active connections") + // connectionEntry represents a single connection to a node. type connectionEntry struct { id string // unique connection ID @@ -243,7 +250,7 @@ func (mc *multiChannelNodeConn) send(data *tailcfg.MapResponse) error { mc.log.Trace(). Msg("send: no active connections, skipping") - return nil + return errNoActiveConnections } // Copy the slice so we can release the read lock before sending.