mapper: fix phantom updateSentPeers on disconnected nodes

When send() is called on a node with zero active connections
(disconnected but kept for rapid reconnection), it returns nil
(success). handleNodeChange then calls updateSentPeers, recording
peers as delivered when no client received the data.

This corrupts lastSentPeers: future computePeerDiff calculations
produce wrong results because they compare against phantom state.
After reconnection, the node's initial map resets lastSentPeers,
but any changes processed during the disconnect window leave
stale entries that cause asymmetric peer visibility.

Return errNoActiveConnections from send() when there are no
connections. handleNodeChange treats this as a no-op (the change
was generated but not deliverable) and skips updateSentPeers,
keeping lastSentPeers consistent with what clients actually
received.
This commit is contained in:
Kristoffer Dalby
2026-04-08 12:27:04 +00:00
committed by Kristoffer Dalby
parent 9371b4ee28
commit 3587225a88
3 changed files with 22 additions and 4 deletions

View File

@@ -178,10 +178,20 @@ func handleNodeChange(nc nodeConnection, mapper *mapper, r change.Change) error
// Send the map response
err = nc.send(data)
if err != nil {
// If the node has no active connections, the data was not
// delivered. Do not update lastSentPeers — recording phantom
// peer state would corrupt future computePeerDiff calculations,
// causing the node to miss peer additions or removals after
// reconnection.
if errors.Is(err, errNoActiveConnections) {
return nil
}
return fmt.Errorf("sending map response to node %d: %w", nodeID, err)
}
// Update peer tracking after successful send
// Update peer tracking only after confirmed delivery to at
// least one active connection.
nc.updateSentPeers(data)
return nil

View File

@@ -339,8 +339,9 @@ func TestMultiChannelSend_ZeroConnections(t *testing.T) {
err := mc.send(testMapResponse())
require.NoError(t, err,
"sending to node with 0 connections should succeed silently (rapid reconnection scenario)")
require.ErrorIs(t, err, errNoActiveConnections,
"sending to node with 0 connections should return errNoActiveConnections "+
"so callers skip updateSentPeers (prevents phantom peer state)")
}
func TestMultiChannelSend_NilData(t *testing.T) {

View File

@@ -1,6 +1,7 @@
package mapper
import (
"errors"
"fmt"
"strconv"
"sync"
@@ -16,6 +17,12 @@ import (
"tailscale.com/tailcfg"
)
// errNoActiveConnections is returned by send when a node has no active
// connections (disconnected but kept in the batcher for rapid reconnection).
// Callers must not update peer tracking state (lastSentPeers) after this
// error because the data was never delivered to any client.
var errNoActiveConnections = errors.New("no active connections")
// connectionEntry represents a single connection to a node.
type connectionEntry struct {
id string // unique connection ID
@@ -243,7 +250,7 @@ func (mc *multiChannelNodeConn) send(data *tailcfg.MapResponse) error {
mc.log.Trace().
Msg("send: no active connections, skipping")
return nil
return errNoActiveConnections
}
// Copy the slice so we can release the read lock before sending.