mirror of
https://github.com/juanfont/headscale.git
synced 2026-04-11 03:27:20 +02:00
mapper: fix phantom updateSentPeers on disconnected nodes
When send() is called on a node with zero active connections (disconnected but kept for rapid reconnection), it returns nil (success). handleNodeChange then calls updateSentPeers, recording peers as delivered when no client received the data. This corrupts lastSentPeers: future computePeerDiff calculations produce wrong results because they compare against phantom state. After reconnection, the node's initial map resets lastSentPeers, but any changes processed during the disconnect window leave stale entries that cause asymmetric peer visibility. Return errNoActiveConnections from send() when there are no connections. handleNodeChange treats this as a no-op (the change was generated but not deliverable) and skips updateSentPeers, keeping lastSentPeers consistent with what clients actually received.
This commit is contained in:
committed by
Kristoffer Dalby
parent
9371b4ee28
commit
3587225a88
@@ -178,10 +178,20 @@ func handleNodeChange(nc nodeConnection, mapper *mapper, r change.Change) error
|
||||
// Send the map response
|
||||
err = nc.send(data)
|
||||
if err != nil {
|
||||
// If the node has no active connections, the data was not
|
||||
// delivered. Do not update lastSentPeers — recording phantom
|
||||
// peer state would corrupt future computePeerDiff calculations,
|
||||
// causing the node to miss peer additions or removals after
|
||||
// reconnection.
|
||||
if errors.Is(err, errNoActiveConnections) {
|
||||
return nil
|
||||
}
|
||||
|
||||
return fmt.Errorf("sending map response to node %d: %w", nodeID, err)
|
||||
}
|
||||
|
||||
// Update peer tracking after successful send
|
||||
// Update peer tracking only after confirmed delivery to at
|
||||
// least one active connection.
|
||||
nc.updateSentPeers(data)
|
||||
|
||||
return nil
|
||||
|
||||
@@ -339,8 +339,9 @@ func TestMultiChannelSend_ZeroConnections(t *testing.T) {
|
||||
|
||||
err := mc.send(testMapResponse())
|
||||
|
||||
require.NoError(t, err,
|
||||
"sending to node with 0 connections should succeed silently (rapid reconnection scenario)")
|
||||
require.ErrorIs(t, err, errNoActiveConnections,
|
||||
"sending to node with 0 connections should return errNoActiveConnections "+
|
||||
"so callers skip updateSentPeers (prevents phantom peer state)")
|
||||
}
|
||||
|
||||
func TestMultiChannelSend_NilData(t *testing.T) {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package mapper
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"sync"
|
||||
@@ -16,6 +17,12 @@ import (
|
||||
"tailscale.com/tailcfg"
|
||||
)
|
||||
|
||||
// errNoActiveConnections is returned by send when a node has no active
|
||||
// connections (disconnected but kept in the batcher for rapid reconnection).
|
||||
// Callers must not update peer tracking state (lastSentPeers) after this
|
||||
// error because the data was never delivered to any client.
|
||||
var errNoActiveConnections = errors.New("no active connections")
|
||||
|
||||
// connectionEntry represents a single connection to a node.
|
||||
type connectionEntry struct {
|
||||
id string // unique connection ID
|
||||
@@ -243,7 +250,7 @@ func (mc *multiChannelNodeConn) send(data *tailcfg.MapResponse) error {
|
||||
mc.log.Trace().
|
||||
Msg("send: no active connections, skipping")
|
||||
|
||||
return nil
|
||||
return errNoActiveConnections
|
||||
}
|
||||
|
||||
// Copy the slice so we can release the read lock before sending.
|
||||
|
||||
Reference in New Issue
Block a user