mapper/batcher: reduce hot-path log verbosity

Remove Caller(), channel pointer formatting (fmt.Sprintf("%p",...)),
and mutex timing from send(), addConnection(), and
removeConnectionByChannel(). Move per-broadcast summary and
no-connection logs from Debug to Trace. Remove per-connection
"attempting"/"succeeded" logs entirely; keep Warn for failures.

These methods run on every MapResponse delivery, so the savings
compound quickly under load.

Updates #2545
This commit is contained in:
Kristoffer Dalby
2026-03-13 15:36:12 +00:00
parent 051a38a4c4
commit 57a38b5678

View File

@@ -819,22 +819,13 @@ func (mc *multiChannelNodeConn) removeConnectionAtIndexLocked(i int, stopConnect
// addConnection adds a new connection. // addConnection adds a new connection.
func (mc *multiChannelNodeConn) addConnection(entry *connectionEntry) { func (mc *multiChannelNodeConn) addConnection(entry *connectionEntry) {
mutexWaitStart := time.Now()
mc.log.Debug().Caller().Str(zf.Chan, fmt.Sprintf("%p", entry.c)).Str(zf.ConnID, entry.id).
Msg("addConnection: waiting for mutex - POTENTIAL CONTENTION POINT")
mc.mutex.Lock() mc.mutex.Lock()
mutexWaitDur := time.Since(mutexWaitStart)
defer mc.mutex.Unlock() defer mc.mutex.Unlock()
mc.connections = append(mc.connections, entry) mc.connections = append(mc.connections, entry)
mc.log.Debug().Caller().Str(zf.Chan, fmt.Sprintf("%p", entry.c)).Str(zf.ConnID, entry.id). mc.log.Debug().Str(zf.ConnID, entry.id).
Int("total_connections", len(mc.connections)). Int("total_connections", len(mc.connections)).
Dur("mutex_wait_time", mutexWaitDur). Msg("connection added")
Msg("successfully added connection after mutex wait")
} }
// removeConnectionByChannel removes a connection by matching channel pointer. // removeConnectionByChannel removes a connection by matching channel pointer.
@@ -845,9 +836,9 @@ func (mc *multiChannelNodeConn) removeConnectionByChannel(c chan<- *tailcfg.MapR
for i, entry := range mc.connections { for i, entry := range mc.connections {
if entry.c == c { if entry.c == c {
mc.removeConnectionAtIndexLocked(i, false) mc.removeConnectionAtIndexLocked(i, false)
mc.log.Debug().Caller().Str(zf.Chan, fmt.Sprintf("%p", c)). mc.log.Debug().Str(zf.ConnID, entry.id).
Int("remaining_connections", len(mc.connections)). Int("remaining_connections", len(mc.connections)).
Msg("successfully removed connection") Msg("connection removed")
return true return true
} }
@@ -909,8 +900,8 @@ func (mc *multiChannelNodeConn) send(data *tailcfg.MapResponse) error {
if len(mc.connections) == 0 { if len(mc.connections) == 0 {
mc.mutex.RUnlock() mc.mutex.RUnlock()
mc.log.Debug().Caller(). mc.log.Trace().
Msg("send: skipping send to node with no active connections (likely rapid reconnection)") Msg("send: no active connections, skipping")
return nil return nil
} }
@@ -920,9 +911,9 @@ func (mc *multiChannelNodeConn) send(data *tailcfg.MapResponse) error {
copy(snapshot, mc.connections) copy(snapshot, mc.connections)
mc.mutex.RUnlock() mc.mutex.RUnlock()
mc.log.Debug().Caller(). mc.log.Trace().
Int("total_connections", len(snapshot)). Int("total_connections", len(snapshot)).
Msg("send: broadcasting to all connections") Msg("send: broadcasting")
// Send to all connections without holding any lock. // Send to all connections without holding any lock.
// Stale connection timeouts (50ms each) happen here without blocking // Stale connection timeouts (50ms each) happen here without blocking
@@ -934,25 +925,17 @@ func (mc *multiChannelNodeConn) send(data *tailcfg.MapResponse) error {
) )
for _, conn := range snapshot { for _, conn := range snapshot {
mc.log.Debug().Caller().Str(zf.Chan, fmt.Sprintf("%p", conn.c)).
Str(zf.ConnID, conn.id).
Msg("send: attempting to send to connection")
err := conn.send(data) err := conn.send(data)
if err != nil { if err != nil {
lastErr = err lastErr = err
failed = append(failed, conn) failed = append(failed, conn)
mc.log.Warn().Err(err).Str(zf.Chan, fmt.Sprintf("%p", conn.c)). mc.log.Warn().Err(err).
Str(zf.ConnID, conn.id). Str(zf.ConnID, conn.id).
Msg("send: connection send failed") Msg("send: connection failed")
} else { } else {
successCount++ successCount++
mc.log.Debug().Caller().Str(zf.Chan, fmt.Sprintf("%p", conn.c)).
Str(zf.ConnID, conn.id).
Msg("send: successfully sent to connection")
} }
} }
@@ -972,7 +955,7 @@ func (mc *multiChannelNodeConn) send(data *tailcfg.MapResponse) error {
if _, isFailed := failedSet[conn]; !isFailed { if _, isFailed := failedSet[conn]; !isFailed {
clean = append(clean, conn) clean = append(clean, conn)
} else { } else {
mc.log.Debug().Caller(). mc.log.Debug().
Str(zf.ConnID, conn.id). Str(zf.ConnID, conn.id).
Msg("send: removing failed connection") Msg("send: removing failed connection")
// Tear down the owning session so the old serveLongPoll // Tear down the owning session so the old serveLongPoll
@@ -987,10 +970,10 @@ func (mc *multiChannelNodeConn) send(data *tailcfg.MapResponse) error {
mc.updateCount.Add(1) mc.updateCount.Add(1)
mc.log.Debug(). mc.log.Trace().
Int("successful_sends", successCount). Int("successful_sends", successCount).
Int("failed_connections", len(failed)). Int("failed_connections", len(failed)).
Msg("send: completed broadcast") Msg("send: broadcast complete")
// Success if at least one send succeeded // Success if at least one send succeeded
if successCount > 0 { if successCount > 0 {