diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f00fbfe..ba45548 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,7 @@ repos: - '--args=--only=terraform_standard_module_structure' - '--args=--only=terraform_workspace_remote' - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.1.0 + rev: v4.2.0 hooks: - id: check-merge-conflict - id: end-of-file-fixer diff --git a/README.md b/README.md index 8d51747..dfa75c6 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ module "eks" { - [Complete](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/complete): EKS Cluster using all available node group types in various combinations demonstrating many of the supported features and configurations - [EKS Managed Node Group](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/eks_managed_node_group): EKS Cluster using EKS managed node groups - [Fargate Profile](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/fargate_profile): EKS cluster using [Fargate Profiles](https://docs.aws.amazon.com/eks/latest/userguide/fargate.html) -- [IRSA, Node Autoscaler, Instance Refresh](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/irsa_autoscale_refresh): EKS Cluster using self-managed node group demonstrating how to enable/utilize instance refresh configuration along with node termination handler +- [Karpenter](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/karpenter): EKS Cluster with [Karpenter](https://karpenter.sh/) provisioned for managing compute resource scaling - [Self Managed Node Group](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/self_managed_node_group): EKS Cluster using self-managed node groups - [User Data](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/user_data): Various supported methods of providing necessary bootstrap scripts and configuration settings via user data diff --git a/docs/faq.md b/docs/faq.md index d805a5b..33e8ad7 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -52,8 +52,6 @@ TL;DR - Terraform resource passed into the modules map definition _must_ be know - `node_security_group_additional_rules` (i.e. - referencing an external security group resource in a rule) - `iam_role_additional_policies` (i.e. - referencing an external policy resource) -- Setting `instance_refresh_enabled = true` will recreate your worker nodes without draining them first. It is recommended to install [aws-node-termination-handler](https://github.com/aws/aws-node-termination-handler) for proper node draining. See the [instance_refresh](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/irsa_autoscale_refresh) example provided. - ### Why are nodes not being registered? Nodes not being able to register with the EKS control plane is generally due to networking mis-configurations. diff --git a/examples/eks_managed_node_group/main.tf b/examples/eks_managed_node_group/main.tf index 225c514..8fc40db 100644 --- a/examples/eks_managed_node_group/main.tf +++ b/examples/eks_managed_node_group/main.tf @@ -4,7 +4,7 @@ provider "aws" { locals { name = "ex-${replace(basename(path.cwd), "_", "-")}" - cluster_version = "1.21" + cluster_version = "1.22" region = "eu-west-1" tags = { diff --git a/examples/fargate_profile/main.tf b/examples/fargate_profile/main.tf index 6106a27..6c80e22 100644 --- a/examples/fargate_profile/main.tf +++ b/examples/fargate_profile/main.tf @@ -4,7 +4,7 @@ provider "aws" { locals { name = "ex-${replace(basename(path.cwd), "_", "-")}" - cluster_version = "1.21" + cluster_version = "1.22" region = "eu-west-1" tags = { diff --git a/examples/irsa_autoscale_refresh/charts.tf b/examples/irsa_autoscale_refresh/charts.tf deleted file mode 100644 index 956b8de..0000000 --- a/examples/irsa_autoscale_refresh/charts.tf +++ /dev/null @@ -1,227 +0,0 @@ -provider "helm" { - kubernetes { - host = module.eks.cluster_endpoint - cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) - token = data.aws_eks_cluster_auth.cluster.token - } -} - -################################################################################ -# Cluster Autoscaler -# Based on the official docs at -# https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler -################################################################################ - -resource "helm_release" "cluster_autoscaler" { - name = "cluster-autoscaler" - namespace = "kube-system" - repository = "https://kubernetes.github.io/autoscaler" - chart = "cluster-autoscaler" - version = "9.10.8" - create_namespace = false - - set { - name = "awsRegion" - value = local.region - } - - set { - name = "rbac.serviceAccount.name" - value = "cluster-autoscaler-aws" - } - - set { - name = "rbac.serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" - value = module.cluster_autoscaler_irsa.iam_role_arn - type = "string" - } - - set { - name = "autoDiscovery.clusterName" - value = local.name - } - - set { - name = "autoDiscovery.enabled" - value = "true" - } - - set { - name = "rbac.create" - value = "true" - } - - depends_on = [ - module.eks.cluster_id, - null_resource.apply, - ] -} - -module "cluster_autoscaler_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "~> 4.12" - - role_name_prefix = "cluster-autoscaler" - role_description = "IRSA role for cluster autoscaler" - - attach_cluster_autoscaler_policy = true - cluster_autoscaler_cluster_ids = [module.eks.cluster_id] - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:cluster-autoscaler-aws"] - } - } - - tags = local.tags -} - -################################################################################ -# Node Termination Handler -# Based on the official docs at -# https://github.com/aws/aws-node-termination-handler -################################################################################ - -resource "helm_release" "aws_node_termination_handler" { - name = "aws-node-termination-handler" - namespace = "kube-system" - repository = "https://aws.github.io/eks-charts" - chart = "aws-node-termination-handler" - version = "0.16.0" - create_namespace = false - - set { - name = "awsRegion" - value = local.region - } - - set { - name = "serviceAccount.name" - value = "aws-node-termination-handler" - } - - set { - name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" - value = module.node_termination_handler_irsa.iam_role_arn - type = "string" - } - - set { - name = "enableSqsTerminationDraining" - value = "true" - } - - set { - name = "enableSpotInterruptionDraining" - value = "true" - } - - set { - name = "queueURL" - value = module.aws_node_termination_handler_sqs.sqs_queue_id - } - - set { - name = "logLevel" - value = "debug" - } - - depends_on = [ - module.eks.cluster_id, - null_resource.apply, - ] -} - -module "node_termination_handler_irsa" { - source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" - version = "~> 4.12" - - role_name_prefix = "node-termination-handler" - role_description = "IRSA role for node termination handler" - - attach_node_termination_handler_policy = true - node_termination_handler_sqs_queue_arns = [module.aws_node_termination_handler_sqs.sqs_queue_arn] - - oidc_providers = { - main = { - provider_arn = module.eks.oidc_provider_arn - namespace_service_accounts = ["kube-system:aws-node-termination-handler"] - } - } - - tags = local.tags -} - -module "aws_node_termination_handler_sqs" { - source = "terraform-aws-modules/sqs/aws" - version = "~> 3.0" - - name = local.name - message_retention_seconds = 300 - policy = data.aws_iam_policy_document.aws_node_termination_handler_sqs.json - - tags = local.tags -} - -data "aws_iam_policy_document" "aws_node_termination_handler_sqs" { - statement { - actions = ["sqs:SendMessage"] - resources = ["arn:aws:sqs:${local.region}:${data.aws_caller_identity.current.account_id}:${local.name}"] - - principals { - type = "Service" - identifiers = [ - "events.amazonaws.com", - "sqs.amazonaws.com", - ] - } - } -} - -resource "aws_cloudwatch_event_rule" "aws_node_termination_handler_asg" { - name = "${local.name}-asg-termination" - description = "Node termination event rule" - - event_pattern = jsonencode({ - "source" : ["aws.autoscaling"], - "detail-type" : ["EC2 Instance-terminate Lifecycle Action"] - "resources" : [for group in module.eks.self_managed_node_groups : group.autoscaling_group_arn] - }) - - tags = local.tags -} - -resource "aws_cloudwatch_event_target" "aws_node_termination_handler_asg" { - target_id = "${local.name}-asg-termination" - rule = aws_cloudwatch_event_rule.aws_node_termination_handler_asg.name - arn = module.aws_node_termination_handler_sqs.sqs_queue_arn -} - -resource "aws_cloudwatch_event_rule" "aws_node_termination_handler_spot" { - name = "${local.name}-spot-termination" - description = "Node termination event rule" - event_pattern = jsonencode({ - "source" : ["aws.ec2"], - "detail-type" : ["EC2 Spot Instance Interruption Warning"] - }) -} - -resource "aws_cloudwatch_event_target" "aws_node_termination_handler_spot" { - target_id = "${local.name}-spot-termination" - rule = aws_cloudwatch_event_rule.aws_node_termination_handler_spot.name - arn = module.aws_node_termination_handler_sqs.sqs_queue_arn -} - -# Creating the lifecycle-hook outside of the ASG resource's `initial_lifecycle_hook` -# ensures that node termination does not require the lifecycle action to be completed, -# and thus allows the ASG to be destroyed cleanly. -resource "aws_autoscaling_lifecycle_hook" "aws_node_termination_handler" { - for_each = module.eks.self_managed_node_groups - - name = "aws-node-termination-handler-${each.value.autoscaling_group_name}" - autoscaling_group_name = each.value.autoscaling_group_name - lifecycle_transition = "autoscaling:EC2_INSTANCE_TERMINATING" - heartbeat_timeout = 300 - default_result = "CONTINUE" -} diff --git a/examples/irsa_autoscale_refresh/main.tf b/examples/irsa_autoscale_refresh/main.tf deleted file mode 100644 index faf16dc..0000000 --- a/examples/irsa_autoscale_refresh/main.tf +++ /dev/null @@ -1,188 +0,0 @@ -provider "aws" { - region = local.region -} - -locals { - name = "ex-${replace(basename(path.cwd), "_", "-")}" - cluster_version = "1.21" - region = "eu-west-1" - - tags = { - Example = local.name - GithubRepo = "terraform-aws-eks" - GithubOrg = "terraform-aws-modules" - } -} - -data "aws_caller_identity" "current" {} - -data "aws_eks_cluster_auth" "cluster" { - name = module.eks.cluster_id -} - -################################################################################ -# EKS Module -################################################################################ - -module "eks" { - source = "../.." - - cluster_name = local.name - cluster_version = local.cluster_version - cluster_endpoint_private_access = true - cluster_endpoint_public_access = true - - vpc_id = module.vpc.vpc_id - subnet_ids = module.vpc.private_subnets - - # Self Managed Node Group(s) - self_managed_node_groups = { - refresh = { - max_size = 5 - desired_size = 1 - - instance_type = "m5.large" - - instance_refresh = { - strategy = "Rolling" - preferences = { - checkpoint_delay = 600 - checkpoint_percentages = [35, 70, 100] - instance_warmup = 300 - min_healthy_percentage = 50 - } - triggers = ["tag"] - } - - tags = { "aws-node-termination-handler/managed" = "true" } - } - - mixed_instance = { - use_mixed_instances_policy = true - mixed_instances_policy = { - instances_distribution = { - on_demand_base_capacity = 0 - on_demand_percentage_above_base_capacity = 10 - spot_allocation_strategy = "capacity-optimized" - } - - override = [ - { - instance_type = "m5.large" - weighted_capacity = "1" - }, - { - instance_type = "m6i.large" - weighted_capacity = "2" - }, - ] - } - - tags = { "aws-node-termination-handler/managed" = "true" } - } - - spot = { - instance_type = "m5.large" - instance_market_options = { - market_type = "spot" - } - - bootstrap_extra_args = "--kubelet-extra-args '--node-labels=node.kubernetes.io/lifecycle=spot'" - tags = { "aws-node-termination-handler/managed" = "true" } - } - } - - tags = merge(local.tags, { Foo = "bar" }) -} - -################################################################################ -# aws-auth configmap -# Only EKS managed node groups automatically add roles to aws-auth configmap -# so we need to ensure fargate profiles and self-managed node roles are added -################################################################################ - -data "aws_eks_cluster_auth" "this" { - name = module.eks.cluster_id -} - -locals { - kubeconfig = yamlencode({ - apiVersion = "v1" - kind = "Config" - current-context = "terraform" - clusters = [{ - name = module.eks.cluster_id - cluster = { - certificate-authority-data = module.eks.cluster_certificate_authority_data - server = module.eks.cluster_endpoint - } - }] - contexts = [{ - name = "terraform" - context = { - cluster = module.eks.cluster_id - user = "terraform" - } - }] - users = [{ - name = "terraform" - user = { - token = data.aws_eks_cluster_auth.this.token - } - }] - }) -} - -resource "null_resource" "apply" { - triggers = { - kubeconfig = base64encode(local.kubeconfig) - cmd_patch = <<-EOT - kubectl create configmap aws-auth -n kube-system --kubeconfig <(echo $KUBECONFIG | base64 --decode) - kubectl patch configmap/aws-auth --patch "${module.eks.aws_auth_configmap_yaml}" -n kube-system --kubeconfig <(echo $KUBECONFIG | base64 --decode) - EOT - } - - provisioner "local-exec" { - interpreter = ["/bin/bash", "-c"] - environment = { - KUBECONFIG = self.triggers.kubeconfig - } - command = self.triggers.cmd_patch - } -} - -################################################################################ -# Supporting Resources -################################################################################ - -module "vpc" { - source = "terraform-aws-modules/vpc/aws" - version = "~> 3.0" - - name = local.name - cidr = "10.0.0.0/16" - - azs = ["${local.region}a", "${local.region}b", "${local.region}c"] - private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"] - public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] - - enable_nat_gateway = true - single_nat_gateway = true - enable_dns_hostnames = true - - enable_flow_log = true - create_flow_log_cloudwatch_iam_role = true - create_flow_log_cloudwatch_log_group = true - - public_subnet_tags = { - "kubernetes.io/cluster/${local.name}" = "shared" - "kubernetes.io/role/elb" = 1 - } - - private_subnet_tags = { - "kubernetes.io/cluster/${local.name}" = "shared" - "kubernetes.io/role/internal-elb" = 1 - } - - tags = local.tags -} diff --git a/examples/irsa_autoscale_refresh/README.md b/examples/karpenter/README.md similarity index 63% rename from examples/irsa_autoscale_refresh/README.md rename to examples/karpenter/README.md index a331d69..53e6bca 100644 --- a/examples/irsa_autoscale_refresh/README.md +++ b/examples/karpenter/README.md @@ -1,10 +1,6 @@ -# IRSA, Cluster Autoscaler, and Instance Refresh example +# Karpenter Example -Configuration in this directory creates an AWS EKS cluster with: -- [IAM Roles for Service Accounts (IRSA)](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) enabled -- [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md) provisioned via a Helm Chart manifest -- [Instance Refresh](https://aws.amazon.com/blogs/compute/introducing-instance-refresh-for-ec2-auto-scaling/) feature for self managed node groups -- [Node Termination Handler](https://github.com/aws/aws-node-termination-handler) provisioned via a Helm Chart manifest +Configuration in this directory creates an AWS EKS cluster with [Karpenter](https://karpenter.sh/) provisioned for managing compute resource scaling. ## Usage @@ -16,6 +12,38 @@ $ terraform plan $ terraform apply ``` +Once the cluster is up and running, you can check that Karpenter is functioning as intended with the following command: + +```bash +# First, make sure you have updated your local kubeconfig +aws eks --region eu-west-1 update-kubeconfig --name ex-karpenter + +# Second, scale the example deployment +kubectl scale deployment inflate --replicas 5 + +# You can watch Karpenter's controller logs with +kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter -c controller +``` + +You should see a new node named `karpenter.sh/provisioner-name/default` eventually come up in the console; this was provisioned by Karpenter in response to the scaled deployment above. + +### Tear Down & Clean-Up + +Because Karpenter manages the state of node resources outside of Terraform, Karpenter created resources will need to be de-provisioned first before removing the remaining resources with Terraform. + +1. Remove the example deployment created above and any nodes created by Karpenter + +```bash +kubectl delete deployment inflate +kubectl delete node -l karpenter.sh/provisioner-name=default +``` + +2. Remove the resources created by Terraform + +```bash +terraform destroy +``` + Note that this example may create resources which cost money. Run `terraform destroy` when you don't need these resources. @@ -25,43 +53,33 @@ Note that this example may create resources which cost money. Run `terraform des |------|---------| | [terraform](#requirement\_terraform) | >= 0.13.1 | | [aws](#requirement\_aws) | >= 3.72 | -| [helm](#requirement\_helm) | >= 2.0 | -| [null](#requirement\_null) | >= 3.0 | +| [helm](#requirement\_helm) | >= 2.4 | +| [kubectl](#requirement\_kubectl) | >= 1.14 | ## Providers | Name | Version | |------|---------| | [aws](#provider\_aws) | >= 3.72 | -| [helm](#provider\_helm) | >= 2.0 | -| [null](#provider\_null) | >= 3.0 | +| [helm](#provider\_helm) | >= 2.4 | +| [kubectl](#provider\_kubectl) | >= 1.14 | ## Modules | Name | Source | Version | |------|--------|---------| -| [aws\_node\_termination\_handler\_sqs](#module\_aws\_node\_termination\_handler\_sqs) | terraform-aws-modules/sqs/aws | ~> 3.0 | -| [cluster\_autoscaler\_irsa](#module\_cluster\_autoscaler\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 4.12 | | [eks](#module\_eks) | ../.. | n/a | -| [node\_termination\_handler\_irsa](#module\_node\_termination\_handler\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 4.12 | +| [karpenter\_irsa](#module\_karpenter\_irsa) | terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks | ~> 4.15 | | [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 3.0 | ## Resources | Name | Type | |------|------| -| [aws_autoscaling_lifecycle_hook.aws_node_termination_handler](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/autoscaling_lifecycle_hook) | resource | -| [aws_cloudwatch_event_rule.aws_node_termination_handler_asg](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource | -| [aws_cloudwatch_event_rule.aws_node_termination_handler_spot](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_rule) | resource | -| [aws_cloudwatch_event_target.aws_node_termination_handler_asg](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource | -| [aws_cloudwatch_event_target.aws_node_termination_handler_spot](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/cloudwatch_event_target) | resource | -| [helm_release.aws_node_termination_handler](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | -| [helm_release.cluster_autoscaler](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | -| [null_resource.apply](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource | -| [aws_caller_identity.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/caller_identity) | data source | -| [aws_eks_cluster_auth.cluster](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster_auth) | data source | -| [aws_eks_cluster_auth.this](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/eks_cluster_auth) | data source | -| [aws_iam_policy_document.aws_node_termination_handler_sqs](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source | +| [aws_iam_instance_profile.karpenter](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_instance_profile) | resource | +| [helm_release.karpenter](https://registry.terraform.io/providers/hashicorp/helm/latest/docs/resources/release) | resource | +| [kubectl_manifest.karpenter_example_deployment](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | +| [kubectl_manifest.karpenter_provisioner](https://registry.terraform.io/providers/gavinbunney/kubectl/latest/docs/resources/manifest) | resource | ## Inputs diff --git a/examples/karpenter/main.tf b/examples/karpenter/main.tf new file mode 100644 index 0000000..fd98f22 --- /dev/null +++ b/examples/karpenter/main.tf @@ -0,0 +1,248 @@ +provider "aws" { + region = local.region +} + +locals { + name = "ex-${replace(basename(path.cwd), "_", "-")}" + cluster_version = "1.22" + region = "eu-west-1" + + tags = { + Example = local.name + GithubRepo = "terraform-aws-eks" + GithubOrg = "terraform-aws-modules" + } +} + +################################################################################ +# EKS Module +################################################################################ + +module "eks" { + source = "../.." + + cluster_name = local.name + cluster_version = local.cluster_version + cluster_endpoint_private_access = true + cluster_endpoint_public_access = true + + vpc_id = module.vpc.vpc_id + subnet_ids = module.vpc.private_subnets + + node_security_group_additional_rules = { + # Control plane invoke Karpenter webhook + ingress_karpenter_webhook_tcp = { + description = "Control plane invoke Karpenter webhook" + protocol = "tcp" + from_port = 8443 + to_port = 8443 + type = "ingress" + source_cluster_security_group = true + } + } + + eks_managed_node_groups = { + karpenter = { + instance_types = ["t3.medium"] + + min_size = 1 + max_size = 2 + desired_size = 1 + + iam_role_additional_policies = [ + # Required by Karpenter + "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" + ] + } + } + + tags = merge(local.tags, { + # NOTE - if creating multiple security groups with this module, only tag the + # security group that Karpenter should utilize with the following tag + # (i.e. - at most, only one security group should have this tag in your account) + "karpenter.sh/discovery" = local.name + }) +} + +################################################################################ +# Karpenter +################################################################################ + +provider "helm" { + kubernetes { + host = module.eks.cluster_endpoint + cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) + + exec { + api_version = "client.authentication.k8s.io/v1alpha1" + command = "aws" + # This requires the awscli to be installed locally where Terraform is executed + args = ["eks", "get-token", "--cluster-name", module.eks.cluster_id] + } + } +} + +provider "kubectl" { + apply_retry_count = 5 + host = module.eks.cluster_endpoint + cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) + load_config_file = false + + exec { + api_version = "client.authentication.k8s.io/v1alpha1" + command = "aws" + # This requires the awscli to be installed locally where Terraform is executed + args = ["eks", "get-token", "--cluster-name", module.eks.cluster_id] + } +} + +module "karpenter_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "~> 4.15" + + role_name = "karpenter-controller-${local.name}" + attach_karpenter_controller_policy = true + + karpenter_controller_cluster_id = module.eks.cluster_id + karpenter_controller_node_iam_role_arns = [ + module.eks.eks_managed_node_groups["karpenter"].iam_role_arn + ] + + oidc_providers = { + ex = { + provider_arn = module.eks.oidc_provider_arn + namespace_service_accounts = ["karpenter:karpenter"] + } + } +} + +resource "aws_iam_instance_profile" "karpenter" { + name = "KarpenterNodeInstanceProfile-${local.name}" + role = module.eks.eks_managed_node_groups["karpenter"].iam_role_name +} + +resource "helm_release" "karpenter" { + namespace = "karpenter" + create_namespace = true + + name = "karpenter" + repository = "https://charts.karpenter.sh" + chart = "karpenter" + version = "0.8.1" + + set { + name = "serviceAccount.annotations.eks\\.amazonaws\\.com/role-arn" + value = module.karpenter_irsa.iam_role_arn + } + + set { + name = "clusterName" + value = module.eks.cluster_id + } + + set { + name = "clusterEndpoint" + value = module.eks.cluster_endpoint + } + + set { + name = "aws.defaultInstanceProfile" + value = aws_iam_instance_profile.karpenter.name + } +} + +# Workaround - https://github.com/hashicorp/terraform-provider-kubernetes/issues/1380#issuecomment-967022975 +resource "kubectl_manifest" "karpenter_provisioner" { + yaml_body = <<-YAML + apiVersion: karpenter.sh/v1alpha5 + kind: Provisioner + metadata: + name: default + spec: + requirements: + - key: karpenter.sh/capacity-type + operator: In + values: ["spot"] + limits: + resources: + cpu: 1000 + provider: + subnetSelector: + karpenter.sh/discovery: ${local.name} + securityGroupSelector: + karpenter.sh/discovery: ${local.name} + tags: + karpenter.sh/discovery: ${local.name} + ttlSecondsAfterEmpty: 30 + YAML + + depends_on = [ + helm_release.karpenter + ] +} + +# Example deployment using the [pause image](https://www.ianlewis.org/en/almighty-pause-container) +# and starts with zero replicas +resource "kubectl_manifest" "karpenter_example_deployment" { + yaml_body = <<-YAML + apiVersion: apps/v1 + kind: Deployment + metadata: + name: inflate + spec: + replicas: 0 + selector: + matchLabels: + app: inflate + template: + metadata: + labels: + app: inflate + spec: + terminationGracePeriodSeconds: 0 + containers: + - name: inflate + image: public.ecr.aws/eks-distro/kubernetes/pause:3.2 + resources: + requests: + cpu: 1 + YAML + + depends_on = [ + helm_release.karpenter + ] +} + +################################################################################ +# Supporting Resources +################################################################################ + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + version = "~> 3.0" + + name = local.name + cidr = "10.0.0.0/16" + + azs = ["${local.region}a", "${local.region}b", "${local.region}c"] + private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"] + public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] + + enable_nat_gateway = true + single_nat_gateway = true + enable_dns_hostnames = true + + public_subnet_tags = { + "kubernetes.io/cluster/${local.name}" = "shared" + "kubernetes.io/role/elb" = 1 + } + + private_subnet_tags = { + "kubernetes.io/cluster/${local.name}" = "shared" + "kubernetes.io/role/internal-elb" = 1 + # Tags subnets for Karpenter auto-discovery + "karpenter.sh/discovery" = local.name + } + + tags = local.tags +} diff --git a/examples/irsa_autoscale_refresh/outputs.tf b/examples/karpenter/outputs.tf similarity index 100% rename from examples/irsa_autoscale_refresh/outputs.tf rename to examples/karpenter/outputs.tf diff --git a/examples/irsa_autoscale_refresh/variables.tf b/examples/karpenter/variables.tf similarity index 100% rename from examples/irsa_autoscale_refresh/variables.tf rename to examples/karpenter/variables.tf diff --git a/examples/irsa_autoscale_refresh/versions.tf b/examples/karpenter/versions.tf similarity index 66% rename from examples/irsa_autoscale_refresh/versions.tf rename to examples/karpenter/versions.tf index 20ac24f..fe18aba 100644 --- a/examples/irsa_autoscale_refresh/versions.tf +++ b/examples/karpenter/versions.tf @@ -6,13 +6,13 @@ terraform { source = "hashicorp/aws" version = ">= 3.72" } - null = { - source = "hashicorp/null" - version = ">= 3.0" - } helm = { source = "hashicorp/helm" - version = ">= 2.0" + version = ">= 2.4" + } + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.14" } } } diff --git a/examples/self_managed_node_group/main.tf b/examples/self_managed_node_group/main.tf index 373567b..a7ad36c 100644 --- a/examples/self_managed_node_group/main.tf +++ b/examples/self_managed_node_group/main.tf @@ -4,7 +4,7 @@ provider "aws" { locals { name = "ex-${replace(basename(path.cwd), "_", "-")}" - cluster_version = "1.21" + cluster_version = "1.22" region = "eu-west-1" tags = {