package mapper

// Benchmarks for batcher components and full pipeline.
//
// Organized into three tiers:
// - Component benchmarks: individual functions (connectionEntry.send, computePeerDiff, etc.)
// - System benchmarks: batching mechanics (addToBatch, processBatchedChanges, broadcast)
// - Full pipeline benchmarks: end-to-end with real DB (gated behind !testing.Short())
//
// All benchmarks use sub-benchmarks with 10/100/1000 node counts for scaling analysis.

import (
	"fmt"
	"sync"
	"testing"
	"time"

	"github.com/juanfont/headscale/hscontrol/types"
	"github.com/juanfont/headscale/hscontrol/types/change"
	"github.com/puzpuzpuz/xsync/v4"
	"github.com/rs/zerolog"
	"tailscale.com/tailcfg"
)

// ============================================================================
// Component Benchmarks
// ============================================================================

// BenchmarkConnectionEntry_Send measures the throughput of sending a single
// MapResponse through a connectionEntry with a buffered channel.
func BenchmarkConnectionEntry_Send(b *testing.B) {
	ch := make(chan *tailcfg.MapResponse, b.N+1)
	entry := makeConnectionEntry("bench-conn", ch)
	data := testMapResponse()

	b.ResetTimer()

	for range b.N {
		_ = entry.send(data)
	}
}

// BenchmarkMultiChannelSend measures broadcast throughput to multiple connections.
func BenchmarkMultiChannelSend(b *testing.B) {
	for _, connCount := range []int{1, 3, 10} {
		b.Run(fmt.Sprintf("%dconn", connCount), func(b *testing.B) {
			mc := newMultiChannelNodeConn(1, nil)

			channels := make([]chan *tailcfg.MapResponse, connCount)
			for i := range channels {
				channels[i] = make(chan *tailcfg.MapResponse, b.N+1)
				mc.addConnection(makeConnectionEntry(fmt.Sprintf("conn-%d", i), channels[i]))
			}

			data := testMapResponse()

			b.ResetTimer()

			for range b.N {
				_ = mc.send(data)
			}
		})
	}
}

// BenchmarkComputePeerDiff measures the cost of computing peer diffs at scale.
func BenchmarkComputePeerDiff(b *testing.B) {
	for _, peerCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dpeers", peerCount), func(b *testing.B) {
			mc := newMultiChannelNodeConn(1, nil)

			// Populate tracked peers: 1..peerCount
			for i := 1; i <= peerCount; i++ {
				mc.lastSentPeers.Store(tailcfg.NodeID(i), struct{}{})
			}

			// Current peers: remove ~10% (every 10th peer is missing)
			current := make([]tailcfg.NodeID, 0, peerCount)
			for i := 1; i <= peerCount; i++ {
				if i%10 != 0 {
					current = append(current, tailcfg.NodeID(i))
				}
			}

			b.ResetTimer()

			for range b.N {
				_ = mc.computePeerDiff(current)
			}
		})
	}
}

// BenchmarkUpdateSentPeers measures the cost of updating peer tracking state.
func BenchmarkUpdateSentPeers(b *testing.B) {
	for _, peerCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dpeers_full", peerCount), func(b *testing.B) {
			mc := newMultiChannelNodeConn(1, nil)

			// Pre-build response with full peer list
			peerIDs := make([]tailcfg.NodeID, peerCount)
			for i := range peerIDs {
				peerIDs[i] = tailcfg.NodeID(i + 1)
			}

			resp := testMapResponseWithPeers(peerIDs...)

			b.ResetTimer()

			for range b.N {
				mc.updateSentPeers(resp)
			}
		})

		b.Run(fmt.Sprintf("%dpeers_incremental", peerCount), func(b *testing.B) {
			mc := newMultiChannelNodeConn(1, nil)

			// Pre-populate with existing peers
			for i := 1; i <= peerCount; i++ {
				mc.lastSentPeers.Store(tailcfg.NodeID(i), struct{}{})
			}

			// Build incremental response: add 10% new peers
			addCount := peerCount / 10
			if addCount == 0 {
				addCount = 1
			}

			resp := testMapResponse()

			resp.PeersChanged = make([]*tailcfg.Node, addCount)
			for i := range addCount {
				resp.PeersChanged[i] = &tailcfg.Node{ID: tailcfg.NodeID(peerCount + i + 1)}
			}

			b.ResetTimer()

			for range b.N {
				mc.updateSentPeers(resp)
			}
		})
	}
}

// ============================================================================
// System Benchmarks (no DB, batcher mechanics only)
// ============================================================================

// benchBatcher creates a lightweight batcher for benchmarks. Unlike the test
// helper, it doesn't register cleanup and suppresses logging.
func benchBatcher(nodeCount, bufferSize int) (*Batcher, map[types.NodeID]chan *tailcfg.MapResponse) {
	b := &Batcher{
		tick:      time.NewTicker(1 * time.Hour), // never fires during bench
		workers:   4,
		workCh:    make(chan work, 4*200),
		nodes:     xsync.NewMap[types.NodeID, *multiChannelNodeConn](),
		connected: xsync.NewMap[types.NodeID, *time.Time](),
		done:      make(chan struct{}),
	}

	channels := make(map[types.NodeID]chan *tailcfg.MapResponse, nodeCount)
	for i := 1; i <= nodeCount; i++ {
		id := types.NodeID(i) //nolint:gosec // benchmark with small controlled values
		mc := newMultiChannelNodeConn(id, nil)
		ch := make(chan *tailcfg.MapResponse, bufferSize)
		entry := &connectionEntry{
			id:      fmt.Sprintf("conn-%d", i),
			c:       ch,
			version: tailcfg.CapabilityVersion(100),
			created: time.Now(),
		}
		entry.lastUsed.Store(time.Now().Unix())
		mc.addConnection(entry)
		b.nodes.Store(id, mc)
		b.connected.Store(id, nil)
		channels[id] = ch
	}

	b.totalNodes.Store(int64(nodeCount))

	return b, channels
}

// BenchmarkAddToBatch_Broadcast measures the cost of broadcasting a change
// to all nodes via addToBatch (no worker processing, just queuing).
func BenchmarkAddToBatch_Broadcast(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			batcher, _ := benchBatcher(nodeCount, 10)

			defer func() {
				close(batcher.done)
				batcher.tick.Stop()
			}()

			ch := change.DERPMap()

			b.ResetTimer()

			for range b.N {
				batcher.addToBatch(ch)
				// Clear pending to avoid unbounded growth
				batcher.nodes.Range(func(_ types.NodeID, nc *multiChannelNodeConn) bool {
					nc.drainPending()
					return true
				})
			}
		})
	}
}

// BenchmarkAddToBatch_Targeted measures the cost of adding a targeted change
// to a single node.
func BenchmarkAddToBatch_Targeted(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			batcher, _ := benchBatcher(nodeCount, 10)

			defer func() {
				close(batcher.done)
				batcher.tick.Stop()
			}()

			b.ResetTimer()

			for i := range b.N {
				targetID := types.NodeID(1 + (i % nodeCount)) //nolint:gosec // benchmark
				ch := change.Change{
					Reason:     "bench-targeted",
					TargetNode: targetID,
					PeerPatches: []*tailcfg.PeerChange{
						{NodeID: tailcfg.NodeID(targetID)}, //nolint:gosec // benchmark
					},
				}
				batcher.addToBatch(ch)
				// Clear pending periodically to avoid growth
				if i%100 == 99 {
					batcher.nodes.Range(func(_ types.NodeID, nc *multiChannelNodeConn) bool {
						nc.drainPending()
						return true
					})
				}
			}
		})
	}
}

// BenchmarkAddToBatch_FullUpdate measures the cost of a FullUpdate broadcast.
func BenchmarkAddToBatch_FullUpdate(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			batcher, _ := benchBatcher(nodeCount, 10)

			defer func() {
				close(batcher.done)
				batcher.tick.Stop()
			}()

			b.ResetTimer()

			for range b.N {
				batcher.addToBatch(change.FullUpdate())
			}
		})
	}
}

// BenchmarkProcessBatchedChanges measures the cost of moving pending changes
// to the work queue.
func BenchmarkProcessBatchedChanges(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dpending", nodeCount), func(b *testing.B) {
			batcher, _ := benchBatcher(nodeCount, 10)
			// Use a very large work channel to avoid blocking
			batcher.workCh = make(chan work, nodeCount*b.N+1)

			defer func() {
				close(batcher.done)
				batcher.tick.Stop()
			}()

			b.ResetTimer()

			for range b.N {
				b.StopTimer()
				// Seed pending changes
				for i := 1; i <= nodeCount; i++ {
					if nc, ok := batcher.nodes.Load(types.NodeID(i)); ok { //nolint:gosec // benchmark
						nc.appendPending(change.DERPMap())
					}
				}

				b.StartTimer()

				batcher.processBatchedChanges()
			}
		})
	}
}

// BenchmarkBroadcastToN measures end-to-end broadcast: addToBatch + processBatchedChanges
// to N nodes. Does NOT include worker processing (MapResponse generation).
func BenchmarkBroadcastToN(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			batcher, _ := benchBatcher(nodeCount, 10)
			batcher.workCh = make(chan work, nodeCount*b.N+1)

			defer func() {
				close(batcher.done)
				batcher.tick.Stop()
			}()

			ch := change.DERPMap()

			b.ResetTimer()

			for range b.N {
				batcher.addToBatch(ch)
				batcher.processBatchedChanges()
			}
		})
	}
}

// BenchmarkMultiChannelBroadcast measures the cost of sending a MapResponse
// to N nodes each with varying connection counts.
func BenchmarkMultiChannelBroadcast(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			batcher, _ := benchBatcher(nodeCount, b.N+1)

			defer func() {
				close(batcher.done)
				batcher.tick.Stop()
			}()

			// Add extra connections to every 3rd node
			for i := 1; i <= nodeCount; i++ {
				if i%3 == 0 {
					if mc, ok := batcher.nodes.Load(types.NodeID(i)); ok { //nolint:gosec // benchmark
						for j := range 2 {
							ch := make(chan *tailcfg.MapResponse, b.N+1)
							entry := &connectionEntry{
								id:      fmt.Sprintf("extra-%d-%d", i, j),
								c:       ch,
								version: tailcfg.CapabilityVersion(100),
								created: time.Now(),
							}
							entry.lastUsed.Store(time.Now().Unix())
							mc.addConnection(entry)
						}
					}
				}
			}

			data := testMapResponse()

			b.ResetTimer()

			for range b.N {
				batcher.nodes.Range(func(_ types.NodeID, mc *multiChannelNodeConn) bool {
					_ = mc.send(data)
					return true
				})
			}
		})
	}
}

// BenchmarkConcurrentAddToBatch measures addToBatch throughput under
// concurrent access from multiple goroutines.
func BenchmarkConcurrentAddToBatch(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			batcher, _ := benchBatcher(nodeCount, 10)

			defer func() {
				close(batcher.done)
				batcher.tick.Stop()
			}()

			// Background goroutine to drain pending periodically
			drainDone := make(chan struct{})

			go func() {
				defer close(drainDone)

				for {
					select {
					case <-batcher.done:
						return
					default:
						batcher.nodes.Range(func(_ types.NodeID, nc *multiChannelNodeConn) bool {
							nc.drainPending()
							return true
						})
						time.Sleep(time.Millisecond) //nolint:forbidigo // benchmark drain loop
					}
				}
			}()

			ch := change.DERPMap()

			b.ResetTimer()
			b.RunParallel(func(pb *testing.PB) {
				for pb.Next() {
					batcher.addToBatch(ch)
				}
			})
			b.StopTimer()

			// Cleanup
			close(batcher.done)
			<-drainDone
			// Re-open done so the defer doesn't double-close
			batcher.done = make(chan struct{})
		})
	}
}

// BenchmarkIsConnected measures the read throughput of IsConnected checks.
func BenchmarkIsConnected(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			batcher, _ := benchBatcher(nodeCount, 1)

			defer func() {
				close(batcher.done)
				batcher.tick.Stop()
			}()

			b.ResetTimer()

			for i := range b.N {
				id := types.NodeID(1 + (i % nodeCount)) //nolint:gosec // benchmark
				_ = batcher.IsConnected(id)
			}
		})
	}
}

// BenchmarkConnectedMap measures the cost of building the full connected map.
func BenchmarkConnectedMap(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			batcher, _ := benchBatcher(nodeCount, 1)

			defer func() {
				close(batcher.done)
				batcher.tick.Stop()
			}()

			// Disconnect 10% of nodes for a realistic mix
			for i := 1; i <= nodeCount; i++ {
				if i%10 == 0 {
					now := time.Now()
					batcher.connected.Store(types.NodeID(i), &now) //nolint:gosec // benchmark
				}
			}

			b.ResetTimer()

			for range b.N {
				_ = batcher.ConnectedMap()
			}
		})
	}
}

// BenchmarkConnectionChurn measures the cost of add/remove connection cycling
// which simulates client reconnection patterns.
func BenchmarkConnectionChurn(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100, 1000} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			batcher, channels := benchBatcher(nodeCount, 10)

			defer func() {
				close(batcher.done)
				batcher.tick.Stop()
			}()

			b.ResetTimer()

			for i := range b.N {
				id := types.NodeID(1 + (i % nodeCount)) //nolint:gosec // benchmark

				mc, ok := batcher.nodes.Load(id)
				if !ok {
					continue
				}

				// Remove old connection
				oldCh := channels[id]
				mc.removeConnectionByChannel(oldCh)

				// Add new connection
				newCh := make(chan *tailcfg.MapResponse, 10)
				entry := &connectionEntry{
					id:      fmt.Sprintf("churn-%d", i),
					c:       newCh,
					version: tailcfg.CapabilityVersion(100),
					created: time.Now(),
				}
				entry.lastUsed.Store(time.Now().Unix())
				mc.addConnection(entry)

				channels[id] = newCh
			}
		})
	}
}

// BenchmarkConcurrentSendAndChurn measures the combined cost of sends happening
// concurrently with connection churn - the hot path in production.
func BenchmarkConcurrentSendAndChurn(b *testing.B) {
	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			batcher, channels := benchBatcher(nodeCount, 100)

			var mu sync.Mutex // protect channels map

			stopChurn := make(chan struct{})
			defer close(stopChurn)

			// Background churn on 10% of nodes
			go func() {
				i := 0

				for {
					select {
					case <-stopChurn:
						return
					default:
						id := types.NodeID(1 + (i % nodeCount)) //nolint:gosec // benchmark
						if i%10 == 0 {                          // only churn 10%
							mc, ok := batcher.nodes.Load(id)
							if ok {
								mu.Lock()
								oldCh := channels[id]
								mu.Unlock()
								mc.removeConnectionByChannel(oldCh)

								newCh := make(chan *tailcfg.MapResponse, 100)
								entry := &connectionEntry{
									id:      fmt.Sprintf("churn-%d", i),
									c:       newCh,
									version: tailcfg.CapabilityVersion(100),
									created: time.Now(),
								}
								entry.lastUsed.Store(time.Now().Unix())
								mc.addConnection(entry)
								mu.Lock()
								channels[id] = newCh
								mu.Unlock()
							}
						}

						i++
					}
				}
			}()

			data := testMapResponse()

			b.ResetTimer()

			for range b.N {
				batcher.nodes.Range(func(_ types.NodeID, mc *multiChannelNodeConn) bool {
					_ = mc.send(data)
					return true
				})
			}
		})
	}
}

// ============================================================================
// Full Pipeline Benchmarks (with DB)
// ============================================================================

// BenchmarkAddNode measures the cost of adding nodes to the batcher,
// including initial MapResponse generation from a real database.
func BenchmarkAddNode(b *testing.B) {
	if testing.Short() {
		b.Skip("skipping full pipeline benchmark in short mode")
	}

	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			testData, cleanup := setupBatcherWithTestData(b, NewBatcherAndMapper, 1, nodeCount, largeBufferSize)
			defer cleanup()

			batcher := testData.Batcher
			allNodes := testData.Nodes

			// Start consumers
			for i := range allNodes {
				allNodes[i].start()
			}

			defer func() {
				for i := range allNodes {
					allNodes[i].cleanup()
				}
			}()

			b.ResetTimer()

			for range b.N {
				// Connect all nodes (measuring AddNode cost)
				for i := range allNodes {
					node := &allNodes[i]
					_ = batcher.AddNode(node.n.ID, node.ch, tailcfg.CapabilityVersion(100), nil)
				}

				b.StopTimer()
				// Disconnect for next iteration
				for i := range allNodes {
					node := &allNodes[i]
					batcher.RemoveNode(node.n.ID, node.ch)
				}
				// Drain channels
				for i := range allNodes {
					for {
						select {
						case <-allNodes[i].ch:
						default:
							goto drained
						}
					}

				drained:
				}

				b.StartTimer()
			}
		})
	}
}

// BenchmarkFullPipeline measures the full pipeline cost: addToBatch → processBatchedChanges
// → worker → generateMapResponse → send, with real nodes from a database.
func BenchmarkFullPipeline(b *testing.B) {
	if testing.Short() {
		b.Skip("skipping full pipeline benchmark in short mode")
	}

	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			testData, cleanup := setupBatcherWithTestData(b, NewBatcherAndMapper, 1, nodeCount, largeBufferSize)
			defer cleanup()

			batcher := testData.Batcher
			allNodes := testData.Nodes

			// Start consumers
			for i := range allNodes {
				allNodes[i].start()
			}

			defer func() {
				for i := range allNodes {
					allNodes[i].cleanup()
				}
			}()

			// Connect all nodes first
			for i := range allNodes {
				node := &allNodes[i]

				err := batcher.AddNode(node.n.ID, node.ch, tailcfg.CapabilityVersion(100), nil)
				if err != nil {
					b.Fatalf("failed to add node %d: %v", i, err)
				}
			}

			// Wait for initial maps to settle
			time.Sleep(200 * time.Millisecond) //nolint:forbidigo // benchmark coordination

			b.ResetTimer()

			for range b.N {
				batcher.AddWork(change.DERPMap())
				// Allow workers to process (the batcher tick is what normally
				// triggers processBatchedChanges, but for benchmarks we need
				// to give the system time to process)
				time.Sleep(20 * time.Millisecond) //nolint:forbidigo // benchmark coordination
			}
		})
	}
}

// BenchmarkMapResponseFromChange measures the cost of synchronous
// MapResponse generation for individual nodes.
func BenchmarkMapResponseFromChange(b *testing.B) {
	if testing.Short() {
		b.Skip("skipping full pipeline benchmark in short mode")
	}

	zerolog.SetGlobalLevel(zerolog.Disabled)
	defer zerolog.SetGlobalLevel(zerolog.DebugLevel)

	for _, nodeCount := range []int{10, 100} {
		b.Run(fmt.Sprintf("%dnodes", nodeCount), func(b *testing.B) {
			testData, cleanup := setupBatcherWithTestData(b, NewBatcherAndMapper, 1, nodeCount, largeBufferSize)
			defer cleanup()

			batcher := testData.Batcher
			allNodes := testData.Nodes

			// Start consumers
			for i := range allNodes {
				allNodes[i].start()
			}

			defer func() {
				for i := range allNodes {
					allNodes[i].cleanup()
				}
			}()

			// Connect all nodes
			for i := range allNodes {
				node := &allNodes[i]

				err := batcher.AddNode(node.n.ID, node.ch, tailcfg.CapabilityVersion(100), nil)
				if err != nil {
					b.Fatalf("failed to add node %d: %v", i, err)
				}
			}

			time.Sleep(200 * time.Millisecond) //nolint:forbidigo // benchmark coordination

			ch := change.DERPMap()

			b.ResetTimer()

			for i := range b.N {
				nodeIdx := i % len(allNodes)
				_, _ = batcher.MapResponseFromChange(allNodes[nodeIdx].n.ID, ch)
			}
		})
	}
}