data "aws_partition" "current" {} data "aws_caller_identity" "current" {} ################################################################################ # User Data ################################################################################ module "user_data" { source = "../_user_data" create = var.create platform = var.platform ami_type = var.ami_type cluster_name = var.cluster_name cluster_endpoint = var.cluster_endpoint cluster_auth_base64 = var.cluster_auth_base64 cluster_ip_family = var.cluster_ip_family cluster_service_cidr = try(coalesce(var.cluster_service_cidr, var.cluster_service_ipv4_cidr), "") enable_bootstrap_user_data = var.enable_bootstrap_user_data pre_bootstrap_user_data = var.pre_bootstrap_user_data post_bootstrap_user_data = var.post_bootstrap_user_data bootstrap_extra_args = var.bootstrap_extra_args user_data_template_path = var.user_data_template_path cloudinit_pre_nodeadm = var.cloudinit_pre_nodeadm cloudinit_post_nodeadm = var.cloudinit_post_nodeadm } ################################################################################ # EFA Support ################################################################################ data "aws_ec2_instance_type" "this" { count = var.create && var.enable_efa_support ? 1 : 0 instance_type = local.efa_instance_type } locals { enable_efa_support = var.create && var.enable_efa_support efa_instance_type = try(element(var.instance_types, 0), "") num_network_cards = try(data.aws_ec2_instance_type.this[0].maximum_network_cards, 0) # Primary network interface must be EFA, remaining can be EFA or EFA-only efa_network_interfaces = [ for i in range(local.num_network_cards) : { associate_public_ip_address = false delete_on_termination = true device_index = i == 0 ? 0 : 1 network_card_index = i interface_type = var.enable_efa_only ? contains(concat([0], var.efa_indices), i) ? "efa" : "efa-only" : "efa" } ] network_interfaces = local.enable_efa_support ? local.efa_network_interfaces : var.network_interfaces } ################################################################################ # Launch template ################################################################################ locals { launch_template_name = coalesce(var.launch_template_name, "${var.name}-eks-node-group") security_group_ids = compact(concat([var.cluster_primary_security_group_id], var.vpc_security_group_ids)) placement = local.create_placement_group ? { group_name = aws_placement_group.this[0].name } : var.placement } resource "aws_launch_template" "this" { count = var.create && var.create_launch_template && var.use_custom_launch_template ? 1 : 0 dynamic "block_device_mappings" { for_each = var.block_device_mappings content { device_name = try(block_device_mappings.value.device_name, null) dynamic "ebs" { for_each = try([block_device_mappings.value.ebs], []) content { delete_on_termination = try(ebs.value.delete_on_termination, null) encrypted = try(ebs.value.encrypted, null) iops = try(ebs.value.iops, null) kms_key_id = try(ebs.value.kms_key_id, null) snapshot_id = try(ebs.value.snapshot_id, null) throughput = try(ebs.value.throughput, null) volume_size = try(ebs.value.volume_size, null) volume_type = try(ebs.value.volume_type, null) } } no_device = try(block_device_mappings.value.no_device, null) virtual_name = try(block_device_mappings.value.virtual_name, null) } } dynamic "capacity_reservation_specification" { for_each = length(var.capacity_reservation_specification) > 0 ? [var.capacity_reservation_specification] : [] content { capacity_reservation_preference = try(capacity_reservation_specification.value.capacity_reservation_preference, null) dynamic "capacity_reservation_target" { for_each = try([capacity_reservation_specification.value.capacity_reservation_target], []) content { capacity_reservation_id = try(capacity_reservation_target.value.capacity_reservation_id, null) capacity_reservation_resource_group_arn = try(capacity_reservation_target.value.capacity_reservation_resource_group_arn, null) } } } } dynamic "cpu_options" { for_each = length(var.cpu_options) > 0 ? [var.cpu_options] : [] content { core_count = try(cpu_options.value.core_count, null) threads_per_core = try(cpu_options.value.threads_per_core, null) } } dynamic "credit_specification" { for_each = length(var.credit_specification) > 0 ? [var.credit_specification] : [] content { cpu_credits = try(credit_specification.value.cpu_credits, null) } } default_version = var.launch_template_default_version description = var.launch_template_description disable_api_termination = var.disable_api_termination ebs_optimized = var.ebs_optimized dynamic "elastic_gpu_specifications" { for_each = var.elastic_gpu_specifications content { type = elastic_gpu_specifications.value.type } } dynamic "elastic_inference_accelerator" { for_each = length(var.elastic_inference_accelerator) > 0 ? [var.elastic_inference_accelerator] : [] content { type = elastic_inference_accelerator.value.type } } dynamic "enclave_options" { for_each = length(var.enclave_options) > 0 ? [var.enclave_options] : [] content { enabled = enclave_options.value.enabled } } # Set on EKS managed node group, will fail if set here # https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-basics # dynamic "hibernation_options" { # for_each = length(var.hibernation_options) > 0 ? [var.hibernation_options] : [] # content { # configured = hibernation_options.value.configured # } # } # Set on EKS managed node group, will fail if set here # https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-basics # dynamic "iam_instance_profile" { # for_each = [var.iam_instance_profile] # content { # name = lookup(var.iam_instance_profile, "name", null) # arn = lookup(var.iam_instance_profile, "arn", null) # } # } image_id = var.ami_id # Set on EKS managed node group, will fail if set here # https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-basics # instance_initiated_shutdown_behavior = var.instance_initiated_shutdown_behavior dynamic "instance_market_options" { for_each = length(var.instance_market_options) > 0 ? [var.instance_market_options] : [] content { market_type = try(instance_market_options.value.market_type, null) dynamic "spot_options" { for_each = try([instance_market_options.value.spot_options], []) content { block_duration_minutes = try(spot_options.value.block_duration_minutes, null) instance_interruption_behavior = try(spot_options.value.instance_interruption_behavior, null) max_price = try(spot_options.value.max_price, null) spot_instance_type = try(spot_options.value.spot_instance_type, null) valid_until = try(spot_options.value.valid_until, null) } } } } # Instance type(s) are generally set on the node group, # except when a ML capacity block reseravtion is used instance_type = var.capacity_type == "CAPACITY_BLOCK" ? element(var.instance_types, 0) : null kernel_id = var.kernel_id key_name = var.key_name dynamic "license_specification" { for_each = length(var.license_specifications) > 0 ? var.license_specifications : {} content { license_configuration_arn = license_specification.value.license_configuration_arn } } dynamic "maintenance_options" { for_each = length(var.maintenance_options) > 0 ? [var.maintenance_options] : [] content { auto_recovery = try(maintenance_options.value.auto_recovery, null) } } dynamic "metadata_options" { for_each = length(var.metadata_options) > 0 ? [var.metadata_options] : [] content { http_endpoint = try(metadata_options.value.http_endpoint, null) http_protocol_ipv6 = try(metadata_options.value.http_protocol_ipv6, null) http_put_response_hop_limit = try(metadata_options.value.http_put_response_hop_limit, null) http_tokens = try(metadata_options.value.http_tokens, null) instance_metadata_tags = try(metadata_options.value.instance_metadata_tags, null) } } dynamic "monitoring" { for_each = var.enable_monitoring ? [1] : [] content { enabled = var.enable_monitoring } } name = var.launch_template_use_name_prefix ? null : local.launch_template_name name_prefix = var.launch_template_use_name_prefix ? "${local.launch_template_name}-" : null dynamic "network_interfaces" { for_each = local.network_interfaces content { associate_carrier_ip_address = try(network_interfaces.value.associate_carrier_ip_address, null) associate_public_ip_address = try(network_interfaces.value.associate_public_ip_address, null) delete_on_termination = try(network_interfaces.value.delete_on_termination, null) description = try(network_interfaces.value.description, null) device_index = try(network_interfaces.value.device_index, null) interface_type = try(network_interfaces.value.interface_type, null) ipv4_address_count = try(network_interfaces.value.ipv4_address_count, null) ipv4_addresses = try(network_interfaces.value.ipv4_addresses, []) ipv4_prefix_count = try(network_interfaces.value.ipv4_prefix_count, null) ipv4_prefixes = try(network_interfaces.value.ipv4_prefixes, null) ipv6_address_count = try(network_interfaces.value.ipv6_address_count, null) ipv6_addresses = try(network_interfaces.value.ipv6_addresses, []) ipv6_prefix_count = try(network_interfaces.value.ipv6_prefix_count, null) ipv6_prefixes = try(network_interfaces.value.ipv6_prefixes, []) network_card_index = try(network_interfaces.value.network_card_index, null) network_interface_id = try(network_interfaces.value.network_interface_id, null) primary_ipv6 = try(network_interfaces.value.primary_ipv6, null) private_ip_address = try(network_interfaces.value.private_ip_address, null) # Ref: https://github.com/hashicorp/terraform-provider-aws/issues/4570 security_groups = compact(concat(try(network_interfaces.value.security_groups, []), local.security_group_ids)) # Set on EKS managed node group, will fail if set here # https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-basics # subnet_id = try(network_interfaces.value.subnet_id, null) } } dynamic "placement" { for_each = length(local.placement) > 0 ? [local.placement] : [] content { affinity = try(placement.value.affinity, null) availability_zone = lookup(placement.value, "availability_zone", null) group_name = lookup(placement.value, "group_name", null) host_id = lookup(placement.value, "host_id", null) host_resource_group_arn = lookup(placement.value, "host_resource_group_arn", null) partition_number = try(placement.value.partition_number, null) spread_domain = try(placement.value.spread_domain, null) tenancy = try(placement.value.tenancy, null) } } dynamic "private_dns_name_options" { for_each = length(var.private_dns_name_options) > 0 ? [var.private_dns_name_options] : [] content { enable_resource_name_dns_aaaa_record = try(private_dns_name_options.value.enable_resource_name_dns_aaaa_record, null) enable_resource_name_dns_a_record = try(private_dns_name_options.value.enable_resource_name_dns_a_record, null) hostname_type = try(private_dns_name_options.value.hostname_type, null) } } ram_disk_id = var.ram_disk_id dynamic "tag_specifications" { for_each = toset(var.tag_specifications) content { resource_type = tag_specifications.key tags = merge(var.tags, { Name = var.name }, var.launch_template_tags) } } update_default_version = var.update_launch_template_default_version user_data = module.user_data.user_data vpc_security_group_ids = length(local.network_interfaces) > 0 ? [] : local.security_group_ids tags = merge( var.tags, var.launch_template_tags, ) # Prevent premature access of policies by pods that # require permissions on create/destroy that depend on nodes depends_on = [ aws_iam_role_policy_attachment.this, aws_iam_role_policy_attachment.additional, ] lifecycle { create_before_destroy = true } } ################################################################################ # AMI SSM Parameter ################################################################################ locals { # Just to ensure templating doesn't fail when values are not provided ssm_cluster_version = var.cluster_version != null ? var.cluster_version : "" ssm_ami_type = var.ami_type != null ? var.ami_type : "" # Map the AMI type to the respective SSM param path ssm_ami_type_to_ssm_param = { AL2_x86_64 = "/aws/service/eks/optimized-ami/${local.ssm_cluster_version}/amazon-linux-2/recommended/release_version" AL2_x86_64_GPU = "/aws/service/eks/optimized-ami/${local.ssm_cluster_version}/amazon-linux-2-gpu/recommended/release_version" AL2_ARM_64 = "/aws/service/eks/optimized-ami/${local.ssm_cluster_version}/amazon-linux-2-arm64/recommended/release_version" CUSTOM = "NONE" BOTTLEROCKET_ARM_64 = "/aws/service/bottlerocket/aws-k8s-${local.ssm_cluster_version}/arm64/latest/image_version" BOTTLEROCKET_x86_64 = "/aws/service/bottlerocket/aws-k8s-${local.ssm_cluster_version}/x86_64/latest/image_version" BOTTLEROCKET_ARM_64_FIPS = "/aws/service/bottlerocket/aws-k8s-${local.ssm_cluster_version}-fips/arm64/latest/image_version" BOTTLEROCKET_x86_64_FIPS = "/aws/service/bottlerocket/aws-k8s-${local.ssm_cluster_version}-fips/x86_64/latest/image_version" BOTTLEROCKET_ARM_64_NVIDIA = "/aws/service/bottlerocket/aws-k8s-${local.ssm_cluster_version}-nvidia/arm64/latest/image_version" BOTTLEROCKET_x86_64_NVIDIA = "/aws/service/bottlerocket/aws-k8s-${local.ssm_cluster_version}-nvidia/x86_64/latest/image_version" WINDOWS_CORE_2019_x86_64 = "/aws/service/ami-windows-latest/Windows_Server-2019-English-Full-EKS_Optimized-${local.ssm_cluster_version}" WINDOWS_FULL_2019_x86_64 = "/aws/service/ami-windows-latest/Windows_Server-2019-English-Core-EKS_Optimized-${local.ssm_cluster_version}" WINDOWS_CORE_2022_x86_64 = "/aws/service/ami-windows-latest/Windows_Server-2022-English-Full-EKS_Optimized-${local.ssm_cluster_version}" WINDOWS_FULL_2022_x86_64 = "/aws/service/ami-windows-latest/Windows_Server-2022-English-Core-EKS_Optimized-${local.ssm_cluster_version}" AL2023_x86_64_STANDARD = "/aws/service/eks/optimized-ami/${local.ssm_cluster_version}/amazon-linux-2023/x86_64/standard/recommended/release_version" AL2023_ARM_64_STANDARD = "/aws/service/eks/optimized-ami/${local.ssm_cluster_version}/amazon-linux-2023/arm64/standard/recommended/release_version" AL2023_x86_64_NEURON = "/aws/service/eks/optimized-ami/${local.ssm_cluster_version}/amazon-linux-2023/x86_64/neuron/recommended/release_version" AL2023_x86_64_NVIDIA = "/aws/service/eks/optimized-ami/${local.ssm_cluster_version}/amazon-linux-2023/x86_64/nvidia/recommended/release_version" AL2023_ARM_64_NVIDIA = "/aws/service/eks/optimized-ami/${local.ssm_cluster_version}/amazon-linux-2023/arm64/nvidia/recommended/release_version" } # The Windows SSM params currently do not have a release version, so we have to get the full output JSON blob and parse out the release version windows_latest_ami_release_version = var.create && var.use_latest_ami_release_version && startswith(local.ssm_ami_type, "WINDOWS") ? nonsensitive(jsondecode(data.aws_ssm_parameter.ami[0].value)["release_version"]) : null # Based on the steps above, try to get an AMI release version - if not, `null` is returned latest_ami_release_version = startswith(local.ssm_ami_type, "WINDOWS") ? local.windows_latest_ami_release_version : try(nonsensitive(data.aws_ssm_parameter.ami[0].value), null) } data "aws_ssm_parameter" "ami" { count = var.create && var.use_latest_ami_release_version ? 1 : 0 name = local.ssm_ami_type_to_ssm_param[var.ami_type] } ################################################################################ # Node Group ################################################################################ locals { launch_template_id = var.create && var.create_launch_template ? try(aws_launch_template.this[0].id, null) : var.launch_template_id # Change order to allow users to set version priority before using defaults launch_template_version = coalesce(var.launch_template_version, try(aws_launch_template.this[0].default_version, "$Default")) } resource "aws_eks_node_group" "this" { count = var.create ? 1 : 0 # Required cluster_name = var.cluster_name node_role_arn = var.create_iam_role ? aws_iam_role.this[0].arn : var.iam_role_arn subnet_ids = local.create_placement_group ? data.aws_subnets.placement_group[0].ids : var.subnet_ids scaling_config { min_size = var.min_size max_size = var.max_size desired_size = var.desired_size } # Optional node_group_name = var.use_name_prefix ? null : var.name node_group_name_prefix = var.use_name_prefix ? "${var.name}-" : null # https://docs.aws.amazon.com/eks/latest/userguide/launch-templates.html#launch-template-custom-ami ami_type = var.ami_id != "" ? null : var.ami_type release_version = var.ami_id != "" ? null : var.use_latest_ami_release_version ? local.latest_ami_release_version : var.ami_release_version version = var.ami_id != "" ? null : var.cluster_version capacity_type = var.capacity_type disk_size = var.use_custom_launch_template ? null : var.disk_size # if using a custom LT, set disk size on custom LT or else it will error here force_update_version = var.force_update_version # ML capacity block reservation requires instance type to be set on the launch template instance_types = var.capacity_type == "CAPACITY_BLOCK" ? null : var.instance_types labels = var.labels dynamic "launch_template" { for_each = var.use_custom_launch_template ? [1] : [] content { id = local.launch_template_id version = local.launch_template_version } } dynamic "remote_access" { for_each = length(var.remote_access) > 0 ? [var.remote_access] : [] content { ec2_ssh_key = try(remote_access.value.ec2_ssh_key, null) source_security_group_ids = try(remote_access.value.source_security_group_ids, []) } } dynamic "taint" { for_each = var.taints content { key = taint.value.key value = try(taint.value.value, null) effect = taint.value.effect } } dynamic "update_config" { for_each = length(var.update_config) > 0 ? [var.update_config] : [] content { max_unavailable_percentage = try(update_config.value.max_unavailable_percentage, null) max_unavailable = try(update_config.value.max_unavailable, null) } } dynamic "node_repair_config" { for_each = var.node_repair_config != null ? [var.node_repair_config] : [] content { enabled = node_repair_config.value.enabled } } timeouts { create = lookup(var.timeouts, "create", null) update = lookup(var.timeouts, "update", null) delete = lookup(var.timeouts, "delete", null) } lifecycle { create_before_destroy = true ignore_changes = [ scaling_config[0].desired_size, ] } tags = merge( var.tags, { Name = var.name } ) } ################################################################################ # IAM Role ################################################################################ locals { create_iam_role = var.create && var.create_iam_role iam_role_name = coalesce(var.iam_role_name, "${var.name}-eks-node-group") iam_role_policy_prefix = "arn:${data.aws_partition.current.partition}:iam::aws:policy" ipv4_cni_policy = { for k, v in { AmazonEKS_CNI_Policy = "${local.iam_role_policy_prefix}/AmazonEKS_CNI_Policy" } : k => v if var.iam_role_attach_cni_policy && var.cluster_ip_family == "ipv4" } ipv6_cni_policy = { for k, v in { AmazonEKS_CNI_IPv6_Policy = "arn:${data.aws_partition.current.partition}:iam::${data.aws_caller_identity.current.account_id}:policy/AmazonEKS_CNI_IPv6_Policy" } : k => v if var.iam_role_attach_cni_policy && var.cluster_ip_family == "ipv6" } } data "aws_iam_policy_document" "assume_role_policy" { count = local.create_iam_role ? 1 : 0 statement { sid = "EKSNodeAssumeRole" actions = ["sts:AssumeRole"] principals { type = "Service" identifiers = ["ec2.amazonaws.com"] } } } resource "aws_iam_role" "this" { count = local.create_iam_role ? 1 : 0 name = var.iam_role_use_name_prefix ? null : local.iam_role_name name_prefix = var.iam_role_use_name_prefix ? "${local.iam_role_name}-" : null path = var.iam_role_path description = var.iam_role_description assume_role_policy = data.aws_iam_policy_document.assume_role_policy[0].json permissions_boundary = var.iam_role_permissions_boundary force_detach_policies = true tags = merge(var.tags, var.iam_role_tags) } # Policies attached ref https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/eks_node_group resource "aws_iam_role_policy_attachment" "this" { for_each = { for k, v in merge( { AmazonEKSWorkerNodePolicy = "${local.iam_role_policy_prefix}/AmazonEKSWorkerNodePolicy" AmazonEC2ContainerRegistryReadOnly = "${local.iam_role_policy_prefix}/AmazonEC2ContainerRegistryReadOnly" }, local.ipv4_cni_policy, local.ipv6_cni_policy ) : k => v if local.create_iam_role } policy_arn = each.value role = aws_iam_role.this[0].name } resource "aws_iam_role_policy_attachment" "additional" { for_each = { for k, v in var.iam_role_additional_policies : k => v if local.create_iam_role } policy_arn = each.value role = aws_iam_role.this[0].name } ################################################################################ # IAM Role Policy ################################################################################ locals { create_iam_role_policy = local.create_iam_role && var.create_iam_role_policy && length(var.iam_role_policy_statements) > 0 } data "aws_iam_policy_document" "role" { count = local.create_iam_role_policy ? 1 : 0 dynamic "statement" { for_each = var.iam_role_policy_statements content { sid = try(statement.value.sid, null) actions = try(statement.value.actions, null) not_actions = try(statement.value.not_actions, null) effect = try(statement.value.effect, null) resources = try(statement.value.resources, null) not_resources = try(statement.value.not_resources, null) dynamic "principals" { for_each = try(statement.value.principals, []) content { type = principals.value.type identifiers = principals.value.identifiers } } dynamic "not_principals" { for_each = try(statement.value.not_principals, []) content { type = not_principals.value.type identifiers = not_principals.value.identifiers } } dynamic "condition" { for_each = try(statement.value.conditions, []) content { test = condition.value.test values = condition.value.values variable = condition.value.variable } } } } } resource "aws_iam_role_policy" "this" { count = local.create_iam_role_policy ? 1 : 0 name = var.iam_role_use_name_prefix ? null : local.iam_role_name name_prefix = var.iam_role_use_name_prefix ? "${local.iam_role_name}-" : null policy = data.aws_iam_policy_document.role[0].json role = aws_iam_role.this[0].id } ################################################################################ # Placement Group ################################################################################ locals { create_placement_group = var.create && (local.enable_efa_support || var.create_placement_group) } resource "aws_placement_group" "this" { count = local.create_placement_group ? 1 : 0 name = "${var.cluster_name}-${var.name}" strategy = var.placement_group_strategy tags = var.tags } ################################################################################ # Instance AZ Lookup # Instances usually used in placement groups w/ EFA are only available in # select availability zones. These data sources will cross reference the availability # zones supported by the instance type with the subnets provided to ensure only # AZs/subnets that are supported are used. ################################################################################ # Find the availability zones supported by the instance type # TODO - remove at next breaking change # Force users to be explicit about which AZ to use when using placement groups, # with or without EFA support data "aws_ec2_instance_type_offerings" "this" { count = local.enable_efa_support ? 1 : 0 filter { name = "instance-type" values = [local.efa_instance_type] } location_type = "availability-zone-id" } # Reverse the lookup to find one of the subnets provided based on the availability # availability zone ID of the queried instance type (supported) data "aws_subnets" "placement_group" { count = local.create_placement_group ? 1 : 0 filter { name = "subnet-id" values = var.subnet_ids } # The data source can lookup the first available AZ or you can specify an AZ (next filter) dynamic "filter" { for_each = var.enable_efa_support && var.placement_group_az == null ? [1] : [] content { name = "availability-zone-id" values = data.aws_ec2_instance_type_offerings.this[0].locations } } dynamic "filter" { for_each = var.placement_group_az != null ? [var.placement_group_az] : [] content { name = "availability-zone" values = [filter.value] } } } ################################################################################ # Autoscaling Group Schedule ################################################################################ resource "aws_autoscaling_schedule" "this" { for_each = { for k, v in var.schedules : k => v if var.create && var.create_schedule } scheduled_action_name = each.key autoscaling_group_name = aws_eks_node_group.this[0].resources[0].autoscaling_groups[0].name min_size = try(each.value.min_size, -1) max_size = try(each.value.max_size, -1) desired_capacity = try(each.value.desired_size, -1) start_time = try(each.value.start_time, null) end_time = try(each.value.end_time, null) time_zone = try(each.value.time_zone, null) # [Minute] [Hour] [Day_of_Month] [Month_of_Year] [Day_of_Week] # Cron examples: https://crontab.guru/examples.html recurrence = try(each.value.recurrence, null) }