diff --git a/hscontrol/policy/v2/policy.go b/hscontrol/policy/v2/policy.go index 27cf70b4..2b7e7589 100644 --- a/hscontrol/policy/v2/policy.go +++ b/hscontrol/policy/v2/policy.go @@ -47,6 +47,14 @@ type PolicyManager struct { usesAutogroupSelf bool } +// filterAndPolicy combines the compiled filter rules with policy content for hashing. +// This ensures filterHash changes when policy changes, even for autogroup:self where +// the compiled filter is always empty. +type filterAndPolicy struct { + Filter []tailcfg.FilterRule + Policy *Policy +} + // NewPolicyManager creates a new PolicyManager from a policy file and a list of users and nodes. // It returns an error if the policy file is invalid. // The policy manager will update the filter rules based on the users and nodes. @@ -77,14 +85,6 @@ func NewPolicyManager(b []byte, users []types.User, nodes views.Slice[types.Node // updateLocked updates the filter rules based on the current policy and nodes. // It must be called with the lock held. func (pm *PolicyManager) updateLocked() (bool, error) { - // Clear the SSH policy map to ensure it's recalculated with the new policy. - // TODO(kradalby): This could potentially be optimized by only clearing the - // policies for nodes that have changed. Particularly if the only difference is - // that nodes has been added or removed. - clear(pm.sshPolicyMap) - clear(pm.compiledFilterRulesMap) - clear(pm.filterRulesMap) - // Check if policy uses autogroup:self pm.usesAutogroupSelf = pm.pol.usesAutogroupSelf() @@ -98,7 +98,14 @@ func (pm *PolicyManager) updateLocked() (bool, error) { return false, fmt.Errorf("compiling filter rules: %w", err) } - filterHash := deephash.Hash(&filter) + // Hash both the compiled filter AND the policy content together. + // This ensures filterHash changes when policy changes, even for autogroup:self + // where the compiled filter is always empty. This eliminates the need for + // a separate policyHash field. + filterHash := deephash.Hash(&filterAndPolicy{ + Filter: filter, + Policy: pm.pol, + }) filterChanged := filterHash != pm.filterHash if filterChanged { log.Debug(). @@ -164,8 +171,27 @@ func (pm *PolicyManager) updateLocked() (bool, error) { pm.exitSet = exitSet pm.exitSetHash = exitSetHash - // If neither of the calculated values changed, no need to update nodes - if !filterChanged && !tagOwnerChanged && !autoApproveChanged && !exitSetChanged { + // Determine if we need to send updates to nodes + // filterChanged now includes policy content changes (via combined hash), + // so it will detect changes even for autogroup:self where compiled filter is empty + needsUpdate := filterChanged || tagOwnerChanged || autoApproveChanged || exitSetChanged + + // Only clear caches if we're actually going to send updates + // This prevents clearing caches when nothing changed, which would leave nodes + // with stale filters until they reconnect. This is critical for autogroup:self + // where even reloading the same policy would clear caches but not send updates. + if needsUpdate { + // Clear the SSH policy map to ensure it's recalculated with the new policy. + // TODO(kradalby): This could potentially be optimized by only clearing the + // policies for nodes that have changed. Particularly if the only difference is + // that nodes has been added or removed. + clear(pm.sshPolicyMap) + clear(pm.compiledFilterRulesMap) + clear(pm.filterRulesMap) + } + + // If nothing changed, no need to update nodes + if !needsUpdate { log.Trace(). Msg("Policy evaluation detected no changes - all hashes match") return false, nil @@ -491,10 +517,17 @@ func (pm *PolicyManager) SetNodes(nodes views.Slice[types.NodeView]) (bool, erro // For global policies: the filter must be recompiled to include the new nodes. if nodesChanged { // Recompile filter with the new node list - _, err := pm.updateLocked() + needsUpdate, err := pm.updateLocked() if err != nil { return false, err } + + if !needsUpdate { + // This ensures fresh filter rules are generated for all nodes + clear(pm.sshPolicyMap) + clear(pm.compiledFilterRulesMap) + clear(pm.filterRulesMap) + } // Always return true when nodes changed, even if filter hash didn't change // (can happen with autogroup:self or when nodes are added but don't affect rules) return true, nil diff --git a/hscontrol/policy/v2/policy_test.go b/hscontrol/policy/v2/policy_test.go index bbde136e..5b36b79e 100644 --- a/hscontrol/policy/v2/policy_test.go +++ b/hscontrol/policy/v2/policy_test.go @@ -519,3 +519,89 @@ func TestAutogroupSelfWithOtherRules(t *testing.T) { require.NoError(t, err) require.NotEmpty(t, rules, "test-1 should have filter rules from both ACL rules") } + +// TestAutogroupSelfPolicyUpdateTriggersMapResponse verifies that when a policy with +// autogroup:self is updated, SetPolicy returns true to trigger MapResponse updates, +// even if the global filter hash didn't change (which is always empty for autogroup:self). +// This fixes the issue where policy updates would clear caches but not trigger updates, +// leaving nodes with stale filter rules until reconnect. +func TestAutogroupSelfPolicyUpdateTriggersMapResponse(t *testing.T) { + users := types.Users{ + {Model: gorm.Model{ID: 1}, Name: "test-1", Email: "test-1@example.com"}, + {Model: gorm.Model{ID: 2}, Name: "test-2", Email: "test-2@example.com"}, + } + + test1Node := &types.Node{ + ID: 1, + Hostname: "test-1-device", + IPv4: ap("100.64.0.1"), + IPv6: ap("fd7a:115c:a1e0::1"), + User: users[0], + UserID: users[0].ID, + Hostinfo: &tailcfg.Hostinfo{}, + } + + test2Node := &types.Node{ + ID: 2, + Hostname: "test-2-device", + IPv4: ap("100.64.0.2"), + IPv6: ap("fd7a:115c:a1e0::2"), + User: users[1], + UserID: users[1].ID, + Hostinfo: &tailcfg.Hostinfo{}, + } + + nodes := types.Nodes{test1Node, test2Node} + + // Initial policy with autogroup:self + initialPolicy := `{ + "acls": [ + { + "action": "accept", + "src": ["autogroup:member"], + "dst": ["autogroup:self:*"] + } + ] + }` + + pm, err := NewPolicyManager([]byte(initialPolicy), users, nodes.ViewSlice()) + require.NoError(t, err) + require.True(t, pm.usesAutogroupSelf, "policy should use autogroup:self") + + // Get initial filter rules for test-1 (should be cached) + rules1, err := pm.FilterForNode(test1Node.View()) + require.NoError(t, err) + require.NotEmpty(t, rules1, "test-1 should have filter rules") + + // Update policy with a different ACL that still results in empty global filter + // (only autogroup:self rules, which compile to empty global filter) + // We add a comment/description change by adding groups (which don't affect filter compilation) + updatedPolicy := `{ + "groups": { + "group:test": ["test-1@example.com"] + }, + "acls": [ + { + "action": "accept", + "src": ["autogroup:member"], + "dst": ["autogroup:self:*"] + } + ] + }` + + // SetPolicy should return true even though global filter hash didn't change + policyChanged, err := pm.SetPolicy([]byte(updatedPolicy)) + require.NoError(t, err) + require.True(t, policyChanged, "SetPolicy should return true when policy content changes, even if global filter hash unchanged (autogroup:self)") + + // Verify that caches were cleared and new rules are generated + // The cache should be empty, so FilterForNode will recompile + rules2, err := pm.FilterForNode(test1Node.View()) + require.NoError(t, err) + require.NotEmpty(t, rules2, "test-1 should have filter rules after policy update") + + // Verify that the policy hash tracking works - a second identical update should return false + policyChanged2, err := pm.SetPolicy([]byte(updatedPolicy)) + require.NoError(t, err) + require.False(t, policyChanged2, "SetPolicy should return false when policy content hasn't changed") +}