policy: more accurate node change

This commit changes so that node changes to the policy is
calculated if any of the nodes has changed in a way that might
affect the policy.

Previously we just checked if the number of nodes had changed,
which meant that if a node was added and removed, we would be
in a bad state.

Signed-off-by: Kristoffer Dalby <kristoffer@dalby.cc>
This commit is contained in:
Kristoffer Dalby
2025-12-15 14:33:36 +00:00
parent daf9f36c78
commit 506bd8c8eb
4 changed files with 258 additions and 8 deletions

View File

@@ -498,8 +498,7 @@ func (pm *PolicyManager) SetNodes(nodes views.Slice[types.NodeView]) (bool, erro
pm.mu.Lock()
defer pm.mu.Unlock()
oldNodeCount := pm.nodes.Len()
newNodeCount := nodes.Len()
policyChanged := pm.nodesHavePolicyAffectingChanges(nodes)
// Invalidate cache entries for nodes that changed.
// For autogroup:self: invalidate all nodes belonging to affected users (peer changes).
@@ -508,19 +507,17 @@ func (pm *PolicyManager) SetNodes(nodes views.Slice[types.NodeView]) (bool, erro
pm.nodes = nodes
nodesChanged := oldNodeCount != newNodeCount
// When nodes are added/removed, we must recompile filters because:
// When policy-affecting node properties change, we must recompile filters because:
// 1. User/group aliases (like "user1@") resolve to node IPs
// 2. Filter compilation needs nodes to generate rules
// 3. Without nodes, filters compile to empty (0 rules)
// 2. Tag aliases (like "tag:server") match nodes based on their tags
// 3. Filter compilation needs nodes to generate rules
//
// For autogroup:self: return true when nodes change even if the global filter
// hash didn't change. The global filter is empty for autogroup:self (each node
// has its own filter), so the hash never changes. But peer relationships DO
// change when nodes are added/removed, so we must signal this to trigger updates.
// For global policies: the filter must be recompiled to include the new nodes.
if nodesChanged {
if policyChanged {
// Recompile filter with the new node list
needsUpdate, err := pm.updateLocked()
if err != nil {
@@ -541,6 +538,30 @@ func (pm *PolicyManager) SetNodes(nodes views.Slice[types.NodeView]) (bool, erro
return false, nil
}
func (pm *PolicyManager) nodesHavePolicyAffectingChanges(newNodes views.Slice[types.NodeView]) bool {
if pm.nodes.Len() != newNodes.Len() {
return true
}
oldNodes := make(map[types.NodeID]types.NodeView, pm.nodes.Len())
for _, node := range pm.nodes.All() {
oldNodes[node.ID()] = node
}
for _, newNode := range newNodes.All() {
oldNode, exists := oldNodes[newNode.ID()]
if !exists {
return true
}
if newNode.HasPolicyChange(oldNode) {
return true
}
}
return false
}
// NodeCanHaveTag checks if a node can have the specified tag during client-initiated
// registration or reauth flows (e.g., tailscale up --advertise-tags).
//

View File

@@ -606,3 +606,126 @@ func TestAutogroupSelfPolicyUpdateTriggersMapResponse(t *testing.T) {
require.NoError(t, err)
require.False(t, policyChanged2, "SetPolicy should return false when policy content hasn't changed")
}
// TestTagPropagationToPeerMap tests that when a node's tags change,
// the peer map is correctly updated. This is a regression test for
// https://github.com/juanfont/headscale/issues/2389
func TestTagPropagationToPeerMap(t *testing.T) {
users := types.Users{
{Model: gorm.Model{ID: 1}, Name: "user1", Email: "user1@headscale.net"},
{Model: gorm.Model{ID: 2}, Name: "user2", Email: "user2@headscale.net"},
}
// Policy: user2 can access tag:web nodes
policy := `{
"tagOwners": {
"tag:web": ["user1@headscale.net"],
"tag:internal": ["user1@headscale.net"]
},
"acls": [
{
"action": "accept",
"src": ["user2@headscale.net"],
"dst": ["user2@headscale.net:*"]
},
{
"action": "accept",
"src": ["user2@headscale.net"],
"dst": ["tag:web:*"]
},
{
"action": "accept",
"src": ["tag:web"],
"dst": ["user2@headscale.net:*"]
}
]
}`
// user1's node starts with tag:web and tag:internal
user1Node := &types.Node{
ID: 1,
Hostname: "user1-node",
IPv4: ap("100.64.0.1"),
IPv6: ap("fd7a:115c:a1e0::1"),
User: ptr.To(users[0]),
UserID: ptr.To(users[0].ID),
Tags: []string{"tag:web", "tag:internal"},
}
// user2's node (no tags)
user2Node := &types.Node{
ID: 2,
Hostname: "user2-node",
IPv4: ap("100.64.0.2"),
IPv6: ap("fd7a:115c:a1e0::2"),
User: ptr.To(users[1]),
UserID: ptr.To(users[1].ID),
}
initialNodes := types.Nodes{user1Node, user2Node}
pm, err := NewPolicyManager([]byte(policy), users, initialNodes.ViewSlice())
require.NoError(t, err)
// Initial state: user2 should see user1 as a peer (user1 has tag:web)
initialPeerMap := pm.BuildPeerMap(initialNodes.ViewSlice())
// Check user2's peers - should include user1
user2Peers := initialPeerMap[user2Node.ID]
require.Len(t, user2Peers, 1, "user2 should have 1 peer initially (user1 with tag:web)")
require.Equal(t, user1Node.ID, user2Peers[0].ID(), "user2's peer should be user1")
// Check user1's peers - should include user2 (bidirectional ACL)
user1Peers := initialPeerMap[user1Node.ID]
require.Len(t, user1Peers, 1, "user1 should have 1 peer initially (user2)")
require.Equal(t, user2Node.ID, user1Peers[0].ID(), "user1's peer should be user2")
// Now change user1's tags: remove tag:web, keep only tag:internal
user1NodeUpdated := &types.Node{
ID: 1,
Hostname: "user1-node",
IPv4: ap("100.64.0.1"),
IPv6: ap("fd7a:115c:a1e0::1"),
User: ptr.To(users[0]),
UserID: ptr.To(users[0].ID),
Tags: []string{"tag:internal"}, // tag:web removed!
}
updatedNodes := types.Nodes{user1NodeUpdated, user2Node}
// SetNodes should detect the tag change
changed, err := pm.SetNodes(updatedNodes.ViewSlice())
require.NoError(t, err)
require.True(t, changed, "SetNodes should return true when tags change")
// After tag change: user2 should NOT see user1 as a peer anymore
// (no ACL allows user2 to access tag:internal)
updatedPeerMap := pm.BuildPeerMap(updatedNodes.ViewSlice())
// Check user2's peers - should be empty now
user2PeersAfter := updatedPeerMap[user2Node.ID]
require.Empty(t, user2PeersAfter, "user2 should have no peers after tag:web is removed from user1")
// Check user1's peers - should also be empty
user1PeersAfter := updatedPeerMap[user1Node.ID]
require.Empty(t, user1PeersAfter, "user1 should have no peers after tag:web is removed")
// Also verify MatchersForNode returns non-empty matchers and ReduceNodes filters correctly
// This simulates what buildTailPeers does in the mapper
matchersForUser2, err := pm.MatchersForNode(user2Node.View())
require.NoError(t, err)
require.NotEmpty(t, matchersForUser2, "MatchersForNode should return non-empty matchers (at least self-access rule)")
// Test ReduceNodes logic with the updated nodes and matchers
// This is what buildTailPeers does - it takes peers from ListPeers (which might include user1)
// and filters them using ReduceNodes with the updated matchers
// Inline the ReduceNodes logic to avoid import cycle
user2View := user2Node.View()
user1UpdatedView := user1NodeUpdated.View()
// Check if user2 can access user1 OR user1 can access user2
canAccess := user2View.CanAccess(matchersForUser2, user1UpdatedView) ||
user1UpdatedView.CanAccess(matchersForUser2, user2View)
require.False(t, canAccess, "user2 should NOT be able to access user1 after tag:web is removed (ReduceNodes should filter out)")
}

View File

@@ -1152,3 +1152,92 @@ func TestNodeStoreAllocationStats(t *testing.T) {
allocs := res.AllocsPerOp()
t.Logf("NodeStore allocations per op: %.2f", float64(allocs))
}
// TestRebuildPeerMapsWithChangedPeersFunc tests that RebuildPeerMaps correctly
// rebuilds the peer map when the peersFunc behavior changes.
// This simulates what happens when SetNodeTags changes node tags and the
// PolicyManager's matchers are updated, requiring the peer map to be rebuilt.
func TestRebuildPeerMapsWithChangedPeersFunc(t *testing.T) {
// Create a peersFunc that can be controlled via a channel
// Initially it returns all nodes as peers, then we change it to return no peers
allowPeers := true
// This simulates how PolicyManager.BuildPeerMap works - it reads state
// that can change between calls
dynamicPeersFunc := func(nodes []types.NodeView) map[types.NodeID][]types.NodeView {
ret := make(map[types.NodeID][]types.NodeView, len(nodes))
if allowPeers {
// Allow all peers
for _, node := range nodes {
var peers []types.NodeView
for _, n := range nodes {
if n.ID() != node.ID() {
peers = append(peers, n)
}
}
ret[node.ID()] = peers
}
} else {
// Allow no peers
for _, node := range nodes {
ret[node.ID()] = []types.NodeView{}
}
}
return ret
}
// Create nodes
node1 := createTestNode(1, 1, "user1", "node1")
node2 := createTestNode(2, 2, "user2", "node2")
initialNodes := types.Nodes{&node1, &node2}
// Create store with dynamic peersFunc
store := NewNodeStore(initialNodes, dynamicPeersFunc, TestBatchSize, TestBatchTimeout)
store.Start()
defer store.Stop()
// Initially, nodes should see each other as peers
snapshot := store.data.Load()
require.Len(t, snapshot.peersByNode[1], 1, "node1 should have 1 peer initially")
require.Len(t, snapshot.peersByNode[2], 1, "node2 should have 1 peer initially")
require.Equal(t, types.NodeID(2), snapshot.peersByNode[1][0].ID())
require.Equal(t, types.NodeID(1), snapshot.peersByNode[2][0].ID())
// Now "change the policy" by disabling peers
allowPeers = false
// Call RebuildPeerMaps to rebuild with the new behavior
store.RebuildPeerMaps()
// After rebuild, nodes should have no peers
snapshot = store.data.Load()
assert.Empty(t, snapshot.peersByNode[1], "node1 should have no peers after rebuild")
assert.Empty(t, snapshot.peersByNode[2], "node2 should have no peers after rebuild")
// Verify that ListPeers returns the correct result
peers1 := store.ListPeers(1)
peers2 := store.ListPeers(2)
assert.Equal(t, 0, peers1.Len(), "ListPeers for node1 should return empty")
assert.Equal(t, 0, peers2.Len(), "ListPeers for node2 should return empty")
// Now re-enable peers and rebuild again
allowPeers = true
store.RebuildPeerMaps()
// Nodes should see each other again
snapshot = store.data.Load()
require.Len(t, snapshot.peersByNode[1], 1, "node1 should have 1 peer after re-enabling")
require.Len(t, snapshot.peersByNode[2], 1, "node2 should have 1 peer after re-enabling")
peers1 = store.ListPeers(1)
peers2 = store.ListPeers(2)
assert.Equal(t, 1, peers1.Len(), "ListPeers for node1 should return 1")
assert.Equal(t, 1, peers2.Len(), "ListPeers for node2 should return 1")
}

View File

@@ -973,6 +973,23 @@ func (nv NodeView) HasNetworkChanges(other NodeView) bool {
return false
}
// HasPolicyChange reports whether the node has changes that affect policy evaluation.
func (nv NodeView) HasPolicyChange(other NodeView) bool {
if nv.UserID() != other.UserID() {
return true
}
if !views.SliceEqual(nv.Tags(), other.Tags()) {
return true
}
if !slices.Equal(nv.IPs(), other.IPs()) {
return true
}
return false
}
// TailNodes converts a slice of NodeViews into Tailscale tailcfg.Nodes.
func TailNodes(
nodes views.Slice[NodeView],