chore(scaletest): add tls to infrastructure (#19412)

Closes https://github.com/coder/internal/issues/850

This PR has the scaletest infrastructure retrieve and use TLS certificates from the persistent observability cluster.

To support creating multiple instances of the infrastructure simultaneously, `var.name` can be set to `alpha`, `bravo` or `charlie`, which retrieves the corresponding certificates.

Also:
- Adds support for wildcard apps.
- Retrieves the Cloudflare token from GCP secrets.
This commit is contained in:
Ethan
2025-08-25 12:25:09 +10:00
committed by GitHub
parent 236844e5cc
commit 5145cd002d
10 changed files with 270 additions and 133 deletions

View File

@@ -7,7 +7,7 @@ trim_trailing_whitespace = true
insert_final_newline = true
indent_style = tab
[*.{yaml,yml,tf,tfvars,nix}]
[*.{yaml,yml,tf,tftpl,tfvars,nix}]
indent_style = space
indent_size = 2

View File

@@ -5,8 +5,17 @@ data "cloudflare_zone" "domain" {
resource "cloudflare_record" "coder" {
for_each = local.deployments
zone_id = data.cloudflare_zone.domain.zone_id
name = each.value.subdomain
name = "${each.value.subdomain}.${var.cloudflare_domain}"
content = google_compute_address.coder[each.key].address
type = "A"
ttl = 3600
}
resource "cloudflare_record" "coder_wildcard" {
for_each = local.deployments
zone_id = data.cloudflare_zone.domain.id
name = each.value.wildcard_subdomain
content = cloudflare_record.coder[each.key].name
type = "CNAME"
ttl = 3600
}

View File

@@ -22,6 +22,8 @@ coder:
%{~ if workspace_proxy ~}
- name: "CODER_ACCESS_URL"
value: "${access_url}"
- name: "CODER_WILDCARD_ACCESS_URL"
value: "${wildcard_access_url}"
- name: CODER_PRIMARY_ACCESS_URL
value: "${primary_url}"
- name: CODER_PROXY_SESSION_TOKEN
@@ -45,6 +47,8 @@ coder:
%{~ if !workspace_proxy && !provisionerd ~}
- name: "CODER_ACCESS_URL"
value: "${access_url}"
- name: "CODER_WILDCARD_ACCESS_URL"
value: "${wildcard_access_url}"
- name: "CODER_PG_CONNECTION_URL"
valueFrom:
secretKeyRef:
@@ -109,3 +113,8 @@ coder:
- emptyDir:
sizeLimit: 1024Mi
name: cache
%{~ if !provisionerd ~}
tls:
secretNames:
- "${tls_secret_name}"
%{~ endif ~}

View File

@@ -6,25 +6,31 @@ data "google_compute_default_service_account" "default" {
locals {
deployments = {
primary = {
subdomain = "${var.name}-scaletest"
url = "http://${var.name}-scaletest.${var.cloudflare_domain}"
region = "us-east1"
zone = "us-east1-c"
subnet = "scaletest"
subdomain = "primary.${var.name}"
wildcard_subdomain = "*.primary.${var.name}"
url = "https://primary.${var.name}.${var.cloudflare_domain}"
wildcard_access_url = "*.primary.${var.name}.${var.cloudflare_domain}"
region = "us-east1"
zone = "us-east1-c"
subnet = "scaletest"
}
europe = {
subdomain = "${var.name}-europe-scaletest"
url = "http://${var.name}-europe-scaletest.${var.cloudflare_domain}"
region = "europe-west1"
zone = "europe-west1-b"
subnet = "scaletest"
subdomain = "europe.${var.name}"
wildcard_subdomain = "*.europe.${var.name}"
url = "https://europe.${var.name}.${var.cloudflare_domain}"
wildcard_access_url = "*.europe.${var.name}.${var.cloudflare_domain}"
region = "europe-west1"
zone = "europe-west1-b"
subnet = "scaletest"
}
asia = {
subdomain = "${var.name}-asia-scaletest"
url = "http://${var.name}-asia-scaletest.${var.cloudflare_domain}"
region = "asia-southeast1"
zone = "asia-southeast1-a"
subnet = "scaletest"
subdomain = "asia.${var.name}"
wildcard_subdomain = "*.asia.${var.name}"
url = "https://asia.${var.name}.${var.cloudflare_domain}"
wildcard_access_url = "*.asia.${var.name}.${var.cloudflare_domain}"
region = "asia-southeast1"
zone = "asia-southeast1-a"
subnet = "scaletest"
}
}
node_pools = {
@@ -146,6 +152,11 @@ resource "google_container_node_pool" "node_pool" {
}
}
lifecycle {
ignore_changes = [management[0].auto_repair, management[0].auto_upgrade, timeouts]
ignore_changes = [
management[0].auto_repair,
management[0].auto_upgrade,
timeouts,
node_config[0].resource_labels
]
}
}

View File

@@ -43,6 +43,23 @@ resource "kubernetes_secret" "proxy_token_asia" {
}
}
resource "kubernetes_secret" "coder_tls_asia" {
provider = kubernetes.asia
type = "kubernetes.io/tls"
metadata {
name = "coder-tls"
namespace = kubernetes_namespace.coder_asia.metadata.0.name
}
data = {
"tls.crt" = data.kubernetes_secret.coder_tls["asia"].data["tls.crt"]
"tls.key" = data.kubernetes_secret.coder_tls["asia"].data["tls.key"]
}
lifecycle {
ignore_changes = [timeouts, wait_for_service_account_token]
}
}
resource "helm_release" "coder_asia" {
provider = helm.asia
@@ -52,25 +69,27 @@ resource "helm_release" "coder_asia" {
version = var.coder_chart_version
namespace = kubernetes_namespace.coder_asia.metadata.0.name
values = [templatefile("${path.module}/coder_helm_values.tftpl", {
workspace_proxy = true,
provisionerd = false,
primary_url = local.deployments.primary.url,
proxy_token = kubernetes_secret.proxy_token_asia.metadata.0.name,
db_secret = null,
ip_address = google_compute_address.coder["asia"].address,
provisionerd_psk = null,
access_url = local.deployments.asia.url,
node_pool = google_container_node_pool.node_pool["asia_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].coder.replicas,
cpu_request = local.scenarios[var.scenario].coder.cpu_request,
mem_request = local.scenarios[var.scenario].coder.mem_request,
cpu_limit = local.scenarios[var.scenario].coder.cpu_limit,
mem_limit = local.scenarios[var.scenario].coder.mem_limit,
deployment = "asia",
workspace_proxy = true,
provisionerd = false,
primary_url = local.deployments.primary.url,
proxy_token = kubernetes_secret.proxy_token_asia.metadata.0.name,
db_secret = null,
ip_address = google_compute_address.coder["asia"].address,
provisionerd_psk = null,
access_url = local.deployments.asia.url,
wildcard_access_url = local.deployments.asia.wildcard_access_url,
node_pool = google_container_node_pool.node_pool["asia_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].coder.replicas,
cpu_request = local.scenarios[var.scenario].coder.cpu_request,
mem_request = local.scenarios[var.scenario].coder.mem_request,
cpu_limit = local.scenarios[var.scenario].coder.cpu_limit,
mem_limit = local.scenarios[var.scenario].coder.mem_limit,
deployment = "asia",
tls_secret_name = kubernetes_secret.coder_tls_asia.metadata.0.name,
})]
depends_on = [null_resource.license]
@@ -85,25 +104,27 @@ resource "helm_release" "provisionerd_asia" {
version = var.provisionerd_chart_version
namespace = kubernetes_namespace.coder_asia.metadata.0.name
values = [templatefile("${path.module}/coder_helm_values.tftpl", {
workspace_proxy = false,
provisionerd = true,
primary_url = null,
proxy_token = null,
db_secret = null,
ip_address = null,
provisionerd_psk = kubernetes_secret.provisionerd_psk_asia.metadata.0.name,
access_url = local.deployments.primary.url,
node_pool = google_container_node_pool.node_pool["asia_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].provisionerd.replicas,
cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request,
mem_request = local.scenarios[var.scenario].provisionerd.mem_request,
cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit,
mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit,
deployment = "asia",
workspace_proxy = false,
provisionerd = true,
primary_url = null,
proxy_token = null,
db_secret = null,
ip_address = null,
provisionerd_psk = kubernetes_secret.provisionerd_psk_asia.metadata.0.name,
access_url = local.deployments.primary.url,
wildcard_access_url = null,
node_pool = google_container_node_pool.node_pool["asia_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].provisionerd.replicas,
cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request,
mem_request = local.scenarios[var.scenario].provisionerd.mem_request,
cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit,
mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit,
deployment = "asia",
tls_secret_name = null,
})]
depends_on = [null_resource.license]

View File

@@ -43,6 +43,23 @@ resource "kubernetes_secret" "proxy_token_europe" {
}
}
resource "kubernetes_secret" "coder_tls_europe" {
provider = kubernetes.europe
type = "kubernetes.io/tls"
metadata {
name = "coder-tls"
namespace = kubernetes_namespace.coder_europe.metadata.0.name
}
data = {
"tls.crt" = data.kubernetes_secret.coder_tls["europe"].data["tls.crt"]
"tls.key" = data.kubernetes_secret.coder_tls["europe"].data["tls.key"]
}
lifecycle {
ignore_changes = [timeouts, wait_for_service_account_token]
}
}
resource "helm_release" "coder_europe" {
provider = helm.europe
@@ -52,25 +69,27 @@ resource "helm_release" "coder_europe" {
version = var.coder_chart_version
namespace = kubernetes_namespace.coder_europe.metadata.0.name
values = [templatefile("${path.module}/coder_helm_values.tftpl", {
workspace_proxy = true,
provisionerd = false,
primary_url = local.deployments.primary.url,
proxy_token = kubernetes_secret.proxy_token_europe.metadata.0.name,
db_secret = null,
ip_address = google_compute_address.coder["europe"].address,
provisionerd_psk = null,
access_url = local.deployments.europe.url,
node_pool = google_container_node_pool.node_pool["europe_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].coder.replicas,
cpu_request = local.scenarios[var.scenario].coder.cpu_request,
mem_request = local.scenarios[var.scenario].coder.mem_request,
cpu_limit = local.scenarios[var.scenario].coder.cpu_limit,
mem_limit = local.scenarios[var.scenario].coder.mem_limit,
deployment = "europe",
workspace_proxy = true,
provisionerd = false,
primary_url = local.deployments.primary.url,
proxy_token = kubernetes_secret.proxy_token_europe.metadata.0.name,
db_secret = null,
ip_address = google_compute_address.coder["europe"].address,
provisionerd_psk = null,
access_url = local.deployments.europe.url,
wildcard_access_url = local.deployments.europe.wildcard_access_url,
node_pool = google_container_node_pool.node_pool["europe_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].coder.replicas,
cpu_request = local.scenarios[var.scenario].coder.cpu_request,
mem_request = local.scenarios[var.scenario].coder.mem_request,
cpu_limit = local.scenarios[var.scenario].coder.cpu_limit,
mem_limit = local.scenarios[var.scenario].coder.mem_limit,
deployment = "europe",
tls_secret_name = kubernetes_secret.coder_tls_europe.metadata.0.name,
})]
depends_on = [null_resource.license]
@@ -85,25 +104,27 @@ resource "helm_release" "provisionerd_europe" {
version = var.provisionerd_chart_version
namespace = kubernetes_namespace.coder_europe.metadata.0.name
values = [templatefile("${path.module}/coder_helm_values.tftpl", {
workspace_proxy = false,
provisionerd = true,
primary_url = null,
proxy_token = null,
db_secret = null,
ip_address = null,
provisionerd_psk = kubernetes_secret.provisionerd_psk_europe.metadata.0.name,
access_url = local.deployments.primary.url,
node_pool = google_container_node_pool.node_pool["europe_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].provisionerd.replicas,
cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request,
mem_request = local.scenarios[var.scenario].provisionerd.mem_request,
cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit,
mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit,
deployment = "europe",
workspace_proxy = false,
provisionerd = true,
primary_url = null,
proxy_token = null,
db_secret = null,
ip_address = null,
provisionerd_psk = kubernetes_secret.provisionerd_psk_europe.metadata.0.name,
access_url = local.deployments.primary.url,
wildcard_access_url = null,
node_pool = google_container_node_pool.node_pool["europe_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].provisionerd.replicas,
cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request,
mem_request = local.scenarios[var.scenario].provisionerd.mem_request,
cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit,
mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit,
deployment = "europe",
tls_secret_name = null,
})]
depends_on = [null_resource.license]

View File

@@ -63,6 +63,23 @@ resource "kubernetes_secret" "provisionerd_psk_primary" {
}
}
resource "kubernetes_secret" "coder_tls_primary" {
provider = kubernetes.primary
type = "kubernetes.io/tls"
metadata {
name = "coder-tls"
namespace = kubernetes_namespace.coder_primary.metadata.0.name
}
data = {
"tls.crt" = data.kubernetes_secret.coder_tls["primary"].data["tls.crt"]
"tls.key" = data.kubernetes_secret.coder_tls["primary"].data["tls.key"]
}
lifecycle {
ignore_changes = [timeouts, wait_for_service_account_token]
}
}
resource "helm_release" "coder_primary" {
provider = helm.primary
@@ -72,25 +89,27 @@ resource "helm_release" "coder_primary" {
version = var.coder_chart_version
namespace = kubernetes_namespace.coder_primary.metadata.0.name
values = [templatefile("${path.module}/coder_helm_values.tftpl", {
workspace_proxy = false,
provisionerd = false,
primary_url = null,
proxy_token = null,
db_secret = kubernetes_secret.coder_db.metadata.0.name,
ip_address = google_compute_address.coder["primary"].address,
provisionerd_psk = kubernetes_secret.provisionerd_psk_primary.metadata.0.name,
access_url = local.deployments.primary.url,
node_pool = google_container_node_pool.node_pool["primary_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].coder.replicas,
cpu_request = local.scenarios[var.scenario].coder.cpu_request,
mem_request = local.scenarios[var.scenario].coder.mem_request,
cpu_limit = local.scenarios[var.scenario].coder.cpu_limit,
mem_limit = local.scenarios[var.scenario].coder.mem_limit,
deployment = "primary",
workspace_proxy = false,
provisionerd = false,
primary_url = null,
proxy_token = null,
db_secret = kubernetes_secret.coder_db.metadata.0.name,
ip_address = google_compute_address.coder["primary"].address,
provisionerd_psk = kubernetes_secret.provisionerd_psk_primary.metadata.0.name,
access_url = local.deployments.primary.url,
wildcard_access_url = local.deployments.primary.wildcard_access_url,
node_pool = google_container_node_pool.node_pool["primary_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].coder.replicas,
cpu_request = local.scenarios[var.scenario].coder.cpu_request,
mem_request = local.scenarios[var.scenario].coder.mem_request,
cpu_limit = local.scenarios[var.scenario].coder.cpu_limit,
mem_limit = local.scenarios[var.scenario].coder.mem_limit,
deployment = "primary",
tls_secret_name = kubernetes_secret.coder_tls_primary.metadata.0.name,
})]
}
@@ -103,25 +122,27 @@ resource "helm_release" "provisionerd_primary" {
version = var.provisionerd_chart_version
namespace = kubernetes_namespace.coder_primary.metadata.0.name
values = [templatefile("${path.module}/coder_helm_values.tftpl", {
workspace_proxy = false,
provisionerd = true,
primary_url = null,
proxy_token = null,
db_secret = null,
ip_address = null,
provisionerd_psk = kubernetes_secret.provisionerd_psk_primary.metadata.0.name,
access_url = local.deployments.primary.url,
node_pool = google_container_node_pool.node_pool["primary_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].provisionerd.replicas,
cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request,
mem_request = local.scenarios[var.scenario].provisionerd.mem_request,
cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit,
mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit,
deployment = "primary",
workspace_proxy = false,
provisionerd = true,
primary_url = null,
proxy_token = null,
db_secret = null,
ip_address = null,
provisionerd_psk = kubernetes_secret.provisionerd_psk_primary.metadata.0.name,
access_url = local.deployments.primary.url,
wildcard_access_url = null,
node_pool = google_container_node_pool.node_pool["primary_coder"].name,
release_name = local.coder_release_name,
experiments = var.coder_experiments,
image_repo = var.coder_image_repo,
image_tag = var.coder_image_tag,
replicas = local.scenarios[var.scenario].provisionerd.replicas,
cpu_request = local.scenarios[var.scenario].provisionerd.cpu_request,
mem_request = local.scenarios[var.scenario].provisionerd.mem_request,
cpu_limit = local.scenarios[var.scenario].provisionerd.cpu_limit,
mem_limit = local.scenarios[var.scenario].provisionerd.mem_limit,
deployment = "primary",
tls_secret_name = null,
})]
depends_on = [null_resource.license]

View File

@@ -55,6 +55,12 @@ provider "cloudflare" {
api_token = coalesce(var.cloudflare_api_token, data.google_secret_manager_secret_version_access.cloudflare_api_token_dns.secret_data)
}
data "google_container_cluster" "observability" {
name = var.observability_cluster_name
location = var.observability_cluster_location
project = var.project_id
}
provider "kubernetes" {
alias = "primary"
host = "https://${google_container_cluster.cluster["primary"].endpoint}"
@@ -76,6 +82,13 @@ provider "kubernetes" {
token = data.google_client_config.default.access_token
}
provider "kubernetes" {
alias = "observability"
host = "https://${data.google_container_cluster.observability.endpoint}"
cluster_ca_certificate = base64decode(data.google_container_cluster.observability.master_auth.0.cluster_ca_certificate)
token = data.google_client_config.default.access_token
}
provider "kubectl" {
alias = "primary"
host = "https://${google_container_cluster.cluster["primary"].endpoint}"

View File

@@ -0,0 +1,13 @@
locals {
coder_certs_namespace = "coder-certs"
}
# These certificates are managed by flux and cert-manager.
data "kubernetes_secret" "coder_tls" {
for_each = local.deployments
provider = kubernetes.observability
metadata {
name = "coder-${var.name}-${each.key}-tls"
namespace = local.coder_certs_namespace
}
}

View File

@@ -1,5 +1,9 @@
variable "name" {
description = "The name all resources will be prefixed with"
description = "The name all resources will be prefixed with. Must be one of alpha, bravo, or charlie."
validation {
condition = contains(["alpha", "bravo", "charlie"], var.name)
error_message = "Name must be one of alpha, bravo, or charlie."
}
}
variable "scenario" {
@@ -82,6 +86,21 @@ variable "provisionerd_image_tag" {
default = "latest"
}
variable "observability_cluster_name" {
description = "Name of the observability GKE cluster."
default = "observability"
}
variable "observability_cluster_location" {
description = "Location of the observability GKE cluster."
default = "us-east1-b"
}
variable "cloudflare_api_token_secret" {
description = "Name of the Google Secret Manager secret containing the Cloudflare API token."
default = "cloudflare-api-token-dns"
}
// Prometheus
variable "prometheus_remote_write_url" {
description = "URL to push prometheus metrics to."