diff --git a/hscontrol/mapper/batcher.go b/hscontrol/mapper/batcher.go index 6fbefdbe..ce569732 100644 --- a/hscontrol/mapper/batcher.go +++ b/hscontrol/mapper/batcher.go @@ -178,10 +178,20 @@ func handleNodeChange(nc nodeConnection, mapper *mapper, r change.Change) error // Send the map response err = nc.send(data) if err != nil { + // If the node has no active connections, the data was not + // delivered. Do not update lastSentPeers — recording phantom + // peer state would corrupt future computePeerDiff calculations, + // causing the node to miss peer additions or removals after + // reconnection. + if errors.Is(err, errNoActiveConnections) { + return nil + } + return fmt.Errorf("sending map response to node %d: %w", nodeID, err) } - // Update peer tracking after successful send + // Update peer tracking only after confirmed delivery to at + // least one active connection. nc.updateSentPeers(data) return nil diff --git a/hscontrol/mapper/batcher_unit_test.go b/hscontrol/mapper/batcher_unit_test.go index 54172522..bef897fe 100644 --- a/hscontrol/mapper/batcher_unit_test.go +++ b/hscontrol/mapper/batcher_unit_test.go @@ -339,8 +339,9 @@ func TestMultiChannelSend_ZeroConnections(t *testing.T) { err := mc.send(testMapResponse()) - require.NoError(t, err, - "sending to node with 0 connections should succeed silently (rapid reconnection scenario)") + require.ErrorIs(t, err, errNoActiveConnections, + "sending to node with 0 connections should return errNoActiveConnections "+ + "so callers skip updateSentPeers (prevents phantom peer state)") } func TestMultiChannelSend_NilData(t *testing.T) { diff --git a/hscontrol/mapper/node_conn.go b/hscontrol/mapper/node_conn.go index 87d61544..faa893fa 100644 --- a/hscontrol/mapper/node_conn.go +++ b/hscontrol/mapper/node_conn.go @@ -1,6 +1,7 @@ package mapper import ( + "errors" "fmt" "strconv" "sync" @@ -16,6 +17,12 @@ import ( "tailscale.com/tailcfg" ) +// errNoActiveConnections is returned by send when a node has no active +// connections (disconnected but kept in the batcher for rapid reconnection). +// Callers must not update peer tracking state (lastSentPeers) after this +// error because the data was never delivered to any client. +var errNoActiveConnections = errors.New("no active connections") + // connectionEntry represents a single connection to a node. type connectionEntry struct { id string // unique connection ID @@ -243,7 +250,7 @@ func (mc *multiChannelNodeConn) send(data *tailcfg.MapResponse) error { mc.log.Trace(). Msg("send: no active connections, skipping") - return nil + return errNoActiveConnections } // Copy the slice so we can release the read lock before sending.