policy, noise: implement SSH check action

Implement the SSH "check" action which requires additional
verification before allowing SSH access. The policy compiler generates
a HoldAndDelegate URL that the Tailscale client calls back to
headscale. The SSHActionHandler creates an auth session and waits for
approval via the generalised auth flow.

Sort check (HoldAndDelegate) rules before accept rules to match
Tailscale's first-match-wins evaluation order.

Updates #1850
This commit is contained in:
Kristoffer Dalby
2026-02-24 18:50:18 +00:00
parent 4a7e1475c0
commit 107c2f2f70
10 changed files with 500 additions and 71 deletions

View File

@@ -19,7 +19,7 @@ type PolicyManager interface {
MatchersForNode(node types.NodeView) ([]matcher.Match, error)
// BuildPeerMap constructs peer relationship maps for the given nodes
BuildPeerMap(nodes views.Slice[types.NodeView]) map[types.NodeID][]types.NodeView
SSHPolicy(node types.NodeView) (*tailcfg.SSHPolicy, error)
SSHPolicy(baseURL string, node types.NodeView) (*tailcfg.SSHPolicy, error)
SetPolicy(pol []byte) (bool, error)
SetUsers(users []types.User) (bool, error)
SetNodes(nodes views.Slice[types.NodeView]) (bool, error)

View File

@@ -1188,8 +1188,9 @@ func TestSSHPolicyRules(t *testing.T) {
"root": "",
},
Action: &tailcfg.SSHAction{
Accept: true,
Accept: false,
SessionDuration: 24 * time.Hour,
HoldAndDelegate: "unused-url/machine/ssh/action/from/$SRC_NODE_ID/to/$DST_NODE_ID?ssh_user=$SSH_USER&local_user=$LOCAL_USER",
AllowAgentForwarding: true,
AllowLocalPortForwarding: true,
AllowRemotePortForwarding: true,
@@ -1476,7 +1477,7 @@ func TestSSHPolicyRules(t *testing.T) {
require.NoError(t, err)
got, err := pm.SSHPolicy(tt.targetNode.View())
got, err := pm.SSHPolicy("unused-url", tt.targetNode.View())
require.NoError(t, err)
if diff := cmp.Diff(tt.wantSSH, got); diff != "" {

View File

@@ -319,11 +319,27 @@ func (pol *Policy) compileACLWithAutogroupSelf(
return rules, nil
}
func sshAction(accept bool, duration time.Duration) tailcfg.SSHAction {
var sshAccept = tailcfg.SSHAction{
Reject: false,
Accept: true,
AllowAgentForwarding: true,
AllowLocalPortForwarding: true,
AllowRemotePortForwarding: true,
}
func sshCheck(baseURL string, duration time.Duration) tailcfg.SSHAction {
return tailcfg.SSHAction{
Reject: !accept,
Accept: accept,
SessionDuration: duration,
Reject: false,
Accept: false,
SessionDuration: duration,
// Replaced in the client:
// * $SRC_NODE_IP (URL escaped)
// * $SRC_NODE_ID (Node.ID as int64 string)
// * $DST_NODE_IP (URL escaped)
// * $DST_NODE_ID (Node.ID as int64 string)
// * $SSH_USER (URL escaped, ssh user requested)
// * $LOCAL_USER (URL escaped, local user mapped)
HoldAndDelegate: baseURL + "/machine/ssh/action/from/$SRC_NODE_ID/to/$DST_NODE_ID?ssh_user=$SSH_USER&local_user=$LOCAL_USER",
AllowAgentForwarding: true,
AllowLocalPortForwarding: true,
AllowRemotePortForwarding: true,
@@ -332,6 +348,7 @@ func sshAction(accept bool, duration time.Duration) tailcfg.SSHAction {
//nolint:gocyclo // complex SSH policy compilation logic
func (pol *Policy) compileSSHPolicy(
baseURL string,
users types.Users,
node types.NodeView,
nodes views.Slice[types.NodeView],
@@ -377,9 +394,9 @@ func (pol *Policy) compileSSHPolicy(
switch rule.Action {
case SSHActionAccept:
action = sshAction(true, 0)
action = sshAccept
case SSHActionCheck:
action = sshAction(true, time.Duration(rule.CheckPeriod))
action = sshCheck(baseURL, time.Duration(rule.CheckPeriod))
default:
return nil, fmt.Errorf("parsing SSH policy, unknown action %q, index: %d: %w", rule.Action, index, err)
}
@@ -503,6 +520,23 @@ func (pol *Policy) compileSSHPolicy(
}
}
// Sort rules: check (HoldAndDelegate) before accept, per Tailscale
// evaluation order (most-restrictive first).
slices.SortStableFunc(rules, func(a, b *tailcfg.SSHRule) int {
aIsCheck := a.Action != nil && a.Action.HoldAndDelegate != ""
bIsCheck := b.Action != nil && b.Action.HoldAndDelegate != ""
if aIsCheck == bIsCheck {
return 0
}
if aIsCheck {
return -1
}
return 1
})
return &tailcfg.SSHPolicy{
Rules: rules,
}, nil

View File

@@ -615,7 +615,7 @@ func TestCompileSSHPolicy_UserMapping(t *testing.T) {
require.NoError(t, err)
// Compile SSH policy
sshPolicy, err := tt.policy.compileSSHPolicy(users, tt.targetNode.View(), nodes.ViewSlice())
sshPolicy, err := tt.policy.compileSSHPolicy("unused-server-url", users, tt.targetNode.View(), nodes.ViewSlice())
require.NoError(t, err)
if tt.wantEmpty {
@@ -691,7 +691,7 @@ func TestCompileSSHPolicy_CheckAction(t *testing.T) {
err := policy.validate()
require.NoError(t, err)
sshPolicy, err := policy.compileSSHPolicy(users, nodeTaggedServer.View(), nodes.ViewSlice())
sshPolicy, err := policy.compileSSHPolicy("unused-server-url", users, nodeTaggedServer.View(), nodes.ViewSlice())
require.NoError(t, err)
require.NotNil(t, sshPolicy)
require.Len(t, sshPolicy.Rules, 1)
@@ -704,11 +704,90 @@ func TestCompileSSHPolicy_CheckAction(t *testing.T) {
}
assert.Equal(t, expectedUsers, rule.SSHUsers)
// Verify check action with session duration
assert.True(t, rule.Action.Accept)
// Verify check action: Accept is false, HoldAndDelegate is set
assert.False(t, rule.Action.Accept)
assert.False(t, rule.Action.Reject)
assert.NotEmpty(t, rule.Action.HoldAndDelegate)
assert.Contains(t, rule.Action.HoldAndDelegate, "/machine/ssh/action/")
assert.Equal(t, 24*time.Hour, rule.Action.SessionDuration)
}
// TestCompileSSHPolicy_CheckBeforeAcceptOrdering verifies that check
// (HoldAndDelegate) rules are sorted before accept rules, even when
// the accept rule appears first in the policy definition.
func TestCompileSSHPolicy_CheckBeforeAcceptOrdering(t *testing.T) {
users := types.Users{
{Name: "user1", Model: gorm.Model{ID: 1}},
{Name: "user2", Model: gorm.Model{ID: 2}},
}
nodeTaggedServer := types.Node{
Hostname: "tagged-server",
IPv4: createAddr("100.64.0.1"),
UserID: new(users[0].ID),
User: new(users[0]),
Tags: []string{"tag:server"},
}
nodeUser2 := types.Node{
Hostname: "user2-device",
IPv4: createAddr("100.64.0.2"),
UserID: new(users[1].ID),
User: new(users[1]),
}
nodes := types.Nodes{&nodeTaggedServer, &nodeUser2}
// Accept rule appears BEFORE check rule in policy definition.
policy := &Policy{
TagOwners: TagOwners{
Tag("tag:server"): Owners{up("user1@")},
},
Groups: Groups{
Group("group:admins"): []Username{Username("user2@")},
},
SSHs: []SSH{
{
Action: "accept",
Sources: SSHSrcAliases{gp("group:admins")},
Destinations: SSHDstAliases{tp("tag:server")},
Users: []SSHUser{"root"},
},
{
Action: "check",
CheckPeriod: model.Duration(24 * time.Hour),
Sources: SSHSrcAliases{gp("group:admins")},
Destinations: SSHDstAliases{tp("tag:server")},
Users: []SSHUser{"ssh-it-user"},
},
},
}
err := policy.validate()
require.NoError(t, err)
sshPolicy, err := policy.compileSSHPolicy(
"unused-server-url",
users,
nodeTaggedServer.View(),
nodes.ViewSlice(),
)
require.NoError(t, err)
require.NotNil(t, sshPolicy)
require.Len(t, sshPolicy.Rules, 2)
// First rule must be the check rule (HoldAndDelegate set).
assert.NotEmpty(t, sshPolicy.Rules[0].Action.HoldAndDelegate,
"first rule should be check (HoldAndDelegate)")
assert.False(t, sshPolicy.Rules[0].Action.Accept,
"first rule should not be accept")
// Second rule must be the accept rule.
assert.True(t, sshPolicy.Rules[1].Action.Accept,
"second rule should be accept")
assert.Empty(t, sshPolicy.Rules[1].Action.HoldAndDelegate,
"second rule should not have HoldAndDelegate")
}
// TestSSHIntegrationReproduction reproduces the exact scenario from the integration test
// TestSSHOneUserToAll that was failing with empty sshUsers.
func TestSSHIntegrationReproduction(t *testing.T) {
@@ -756,7 +835,7 @@ func TestSSHIntegrationReproduction(t *testing.T) {
require.NoError(t, err)
// Test SSH policy compilation for node2 (owned by user2, who is in the group)
sshPolicy, err := policy.compileSSHPolicy(users, node2.View(), nodes.ViewSlice())
sshPolicy, err := policy.compileSSHPolicy("unused-server-url", users, node2.View(), nodes.ViewSlice())
require.NoError(t, err)
require.NotNil(t, sshPolicy)
require.Len(t, sshPolicy.Rules, 1)
@@ -806,7 +885,7 @@ func TestSSHJSONSerialization(t *testing.T) {
err := policy.validate()
require.NoError(t, err)
sshPolicy, err := policy.compileSSHPolicy(users, node.View(), nodes.ViewSlice())
sshPolicy, err := policy.compileSSHPolicy("unused-server-url", users, node.View(), nodes.ViewSlice())
require.NoError(t, err)
require.NotNil(t, sshPolicy)
@@ -1413,7 +1492,7 @@ func TestSSHWithAutogroupSelfInDestination(t *testing.T) {
// Test for user1's first node
node1 := nodes[0].View()
sshPolicy, err := policy.compileSSHPolicy(users, node1, nodes.ViewSlice())
sshPolicy, err := policy.compileSSHPolicy("unused-server-url", users, node1, nodes.ViewSlice())
require.NoError(t, err)
require.NotNil(t, sshPolicy)
require.Len(t, sshPolicy.Rules, 1)
@@ -1432,7 +1511,7 @@ func TestSSHWithAutogroupSelfInDestination(t *testing.T) {
// Test for user2's first node
node3 := nodes[2].View()
sshPolicy2, err := policy.compileSSHPolicy(users, node3, nodes.ViewSlice())
sshPolicy2, err := policy.compileSSHPolicy("unused-server-url", users, node3, nodes.ViewSlice())
require.NoError(t, err)
require.NotNil(t, sshPolicy2)
require.Len(t, sshPolicy2.Rules, 1)
@@ -1451,7 +1530,7 @@ func TestSSHWithAutogroupSelfInDestination(t *testing.T) {
// Test for tagged node (should have no SSH rules)
node5 := nodes[4].View()
sshPolicy3, err := policy.compileSSHPolicy(users, node5, nodes.ViewSlice())
sshPolicy3, err := policy.compileSSHPolicy("unused-server-url", users, node5, nodes.ViewSlice())
require.NoError(t, err)
if sshPolicy3 != nil {
@@ -1491,7 +1570,7 @@ func TestSSHWithAutogroupSelfAndSpecificUser(t *testing.T) {
// For user1's node: should allow SSH from user1's devices
node1 := nodes[0].View()
sshPolicy, err := policy.compileSSHPolicy(users, node1, nodes.ViewSlice())
sshPolicy, err := policy.compileSSHPolicy("unused-server-url", users, node1, nodes.ViewSlice())
require.NoError(t, err)
require.NotNil(t, sshPolicy)
require.Len(t, sshPolicy.Rules, 1)
@@ -1508,7 +1587,7 @@ func TestSSHWithAutogroupSelfAndSpecificUser(t *testing.T) {
// For user2's node: should have no rules (user1's devices can't match user2's self)
node3 := nodes[2].View()
sshPolicy2, err := policy.compileSSHPolicy(users, node3, nodes.ViewSlice())
sshPolicy2, err := policy.compileSSHPolicy("unused-server-url", users, node3, nodes.ViewSlice())
require.NoError(t, err)
if sshPolicy2 != nil {
@@ -1551,7 +1630,7 @@ func TestSSHWithAutogroupSelfAndGroup(t *testing.T) {
// For user1's node: should allow SSH from user1's devices only (not user2's)
node1 := nodes[0].View()
sshPolicy, err := policy.compileSSHPolicy(users, node1, nodes.ViewSlice())
sshPolicy, err := policy.compileSSHPolicy("unused-server-url", users, node1, nodes.ViewSlice())
require.NoError(t, err)
require.NotNil(t, sshPolicy)
require.Len(t, sshPolicy.Rules, 1)
@@ -1568,7 +1647,7 @@ func TestSSHWithAutogroupSelfAndGroup(t *testing.T) {
// For user3's node: should have no rules (not in group:admins)
node5 := nodes[4].View()
sshPolicy2, err := policy.compileSSHPolicy(users, node5, nodes.ViewSlice())
sshPolicy2, err := policy.compileSSHPolicy("unused-server-url", users, node5, nodes.ViewSlice())
require.NoError(t, err)
if sshPolicy2 != nil {
@@ -1610,7 +1689,7 @@ func TestSSHWithAutogroupSelfExcludesTaggedDevices(t *testing.T) {
// For untagged node: should only get principals from other untagged nodes
node1 := nodes[0].View()
sshPolicy, err := policy.compileSSHPolicy(users, node1, nodes.ViewSlice())
sshPolicy, err := policy.compileSSHPolicy("unused-server-url", users, node1, nodes.ViewSlice())
require.NoError(t, err)
require.NotNil(t, sshPolicy)
require.Len(t, sshPolicy.Rules, 1)
@@ -1628,7 +1707,7 @@ func TestSSHWithAutogroupSelfExcludesTaggedDevices(t *testing.T) {
// For tagged node: should get no SSH rules
node3 := nodes[2].View()
sshPolicy2, err := policy.compileSSHPolicy(users, node3, nodes.ViewSlice())
sshPolicy2, err := policy.compileSSHPolicy("unused-server-url", users, node3, nodes.ViewSlice())
require.NoError(t, err)
if sshPolicy2 != nil {
@@ -1671,7 +1750,7 @@ func TestSSHWithAutogroupSelfAndMixedDestinations(t *testing.T) {
// Test 1: Compile for user1's device (should only match autogroup:self destination)
node1 := nodes[0].View()
sshPolicy1, err := policy.compileSSHPolicy(users, node1, nodes.ViewSlice())
sshPolicy1, err := policy.compileSSHPolicy("unused-server-url", users, node1, nodes.ViewSlice())
require.NoError(t, err)
require.NotNil(t, sshPolicy1)
require.Len(t, sshPolicy1.Rules, 1, "user1's device should have 1 SSH rule (autogroup:self)")
@@ -1690,7 +1769,7 @@ func TestSSHWithAutogroupSelfAndMixedDestinations(t *testing.T) {
// Test 2: Compile for router (should only match tag:router destination)
routerNode := nodes[3].View() // user2-router
sshPolicyRouter, err := policy.compileSSHPolicy(users, routerNode, nodes.ViewSlice())
sshPolicyRouter, err := policy.compileSSHPolicy("unused-server-url", users, routerNode, nodes.ViewSlice())
require.NoError(t, err)
require.NotNil(t, sshPolicyRouter)
require.Len(t, sshPolicyRouter.Rules, 1, "router should have 1 SSH rule (tag:router)")

View File

@@ -222,7 +222,7 @@ func (pm *PolicyManager) updateLocked() (bool, error) {
return true, nil
}
func (pm *PolicyManager) SSHPolicy(node types.NodeView) (*tailcfg.SSHPolicy, error) {
func (pm *PolicyManager) SSHPolicy(baseURL string, node types.NodeView) (*tailcfg.SSHPolicy, error) {
pm.mu.Lock()
defer pm.mu.Unlock()
@@ -230,7 +230,7 @@ func (pm *PolicyManager) SSHPolicy(node types.NodeView) (*tailcfg.SSHPolicy, err
return sshPol, nil
}
sshPol, err := pm.pol.compileSSHPolicy(pm.users, node, pm.nodes)
sshPol, err := pm.pol.compileSSHPolicy(baseURL, pm.users, node, pm.nodes)
if err != nil {
return nil, fmt.Errorf("compiling SSH policy: %w", err)
}