* Add ephemeral_storage_local_ssd_config support to modules/gke-nodepool Adds ephemeral_storage_local_ssd_count to node_config variable and the corresponding dynamic ephemeral_storage_local_ssd_config block in the node pool resource, enabling use of local SSDs as ephemeral storage. * feat(gke-nodepool): add flex_start support to node_config Add `flex_start` as an optional bool to the `node_config` variable type and wire it through to the `google_container_node_pool` resource's node_config block. This enables DWS (Dynamic Workload Scheduler) flex-start mode for node pools, used for on-demand capacity access without requiring ProvisioningRequest objects (e.g. spot TPU pools). * feat(gke-nodepool): add flex_start support to node_config Add `flex_start` as an optional bool to the `node_config` variable type and wire it through to the `google_container_node_pool` resource's node_config block. This enables DWS (Dynamic Workload Scheduler) flex-start mode for node pools, which allows the Cluster Autoscaler to request capacity on-demand without requiring ProvisioningRequest objects (unlike queued_provisioning). Typical use case is spot TPU node pools. * feat(gke-nodepool): add advanced_machine_features support to node_config Add `advanced_machine_features` as an optional object to the `node_config` variable type and wire it through to the `google_container_node_pool` resource via a dynamic block. This allows callers to configure `threads_per_core` (e.g. set to 1 to disable hyperthreading) and `enable_nested_virtualization` for node pools that require fine-grained CPU threading control or nested hypervisor support. GKE auto-sets `advanced_machine_features` (threads_per_core=1) on ct6e/TPU machine types; exposing this field also lets consumers add it to ignore_changes in their own lifecycle blocks to avoid forced replacements. * feat(gke-nodepool): add containerd_config support to node_config Add `containerd_config` as an optional object to the `node_config` variable and wire it through to the `google_container_node_pool` resource via a dynamic block. This allows callers to configure private registry mirrors or custom containerd registry hosts per node pool — useful for air-gapped environments and internal registry proxies. The `registry_hosts` list maps each upstream server to one or more mirror hosts, with optional `capabilities`, `override_path`, and `dial_timeout` fields (all defaulting to sensible values). * refactor(gke-nodepool): use maps for containerd_config registry_hosts and hosts Convert registry_hosts and hosts from lists to maps so that the registry server and host URLs serve as stable keys, avoiding index-shifting issues with for_each. Add default values for capabilities, override_path, and dial_timeout. Update README example and test inventory accordingly. * Remove default values from containerd_config hosts fields Leave capabilities, override_path, and dial_timeout without defaults so the provider/API picks them rather than the module imposing values. * Refine containerd_config variable interface - Simplify header to optional(map(list(string))) - Flatten ca, client cert/key to strings with descriptive names - Derive private_registry_access_config enabled from ca domain config list - Simplify writable_cgroups to optional(bool) - Flatten gcp_secret_manager_certificate_config to string - Remove redundant defaults where try() handles null in main.tf - Fix long lines in main.tf to stay within 79-char limit - Update copyright year to 2026 in inventory files * fix(gke-nodepool): run terraform fmt to fix attribute alignment in containerd_config * docs(gke-nodepool): regenerate README with updated variable line numbers * fix(gke-nodepool): use coalesce instead of try for null header map in for_each * tests(gke-nodepool): update containerd-config inventory to match actual plan output --------- Co-authored-by: Julio Castillo <jccb@google.com>
312 lines
10 KiB
HCL
312 lines
10 KiB
HCL
/**
|
|
* Copyright 2024 Google LLC
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
variable "cluster_id" {
|
|
description = "Cluster id. Optional, but providing cluster_id is recommended to prevent cluster misconfiguration in some of the edge cases."
|
|
type = string
|
|
default = null
|
|
}
|
|
|
|
variable "cluster_name" {
|
|
description = "Cluster name."
|
|
type = string
|
|
}
|
|
|
|
variable "gke_version" {
|
|
description = "Kubernetes nodes version. Ignored if auto_upgrade is set in management_config."
|
|
type = string
|
|
default = null
|
|
}
|
|
|
|
variable "k8s_labels" {
|
|
description = "Kubernetes labels applied to each node."
|
|
type = map(string)
|
|
default = {}
|
|
nullable = false
|
|
}
|
|
|
|
variable "labels" {
|
|
description = "The resource labels to be applied each node (vm)."
|
|
type = map(string)
|
|
default = {}
|
|
nullable = false
|
|
}
|
|
|
|
variable "location" {
|
|
description = "Cluster location."
|
|
type = string
|
|
}
|
|
|
|
variable "max_pods_per_node" {
|
|
description = "Maximum number of pods per node."
|
|
type = number
|
|
default = null
|
|
}
|
|
|
|
variable "name" {
|
|
description = "Optional nodepool name."
|
|
type = string
|
|
default = null
|
|
}
|
|
|
|
variable "network_config" {
|
|
description = "Network configuration."
|
|
type = object({
|
|
enable_private_nodes = optional(bool)
|
|
pod_range = optional(object({
|
|
cidr = optional(string)
|
|
create = optional(bool, false)
|
|
name = optional(string)
|
|
}), {})
|
|
additional_node_network_configs = optional(list(object({
|
|
network = string
|
|
subnetwork = string
|
|
})), [])
|
|
additional_pod_network_configs = optional(list(object({
|
|
subnetwork = string
|
|
secondary_pod_range = string
|
|
max_pods_per_node = string
|
|
})), [])
|
|
total_egress_bandwidth_tier = optional(string)
|
|
pod_cidr_overprovisioning_disabled = optional(bool, false)
|
|
})
|
|
default = null
|
|
}
|
|
|
|
variable "node_config" {
|
|
description = "Node-level configuration."
|
|
type = object({
|
|
boot_disk = optional(object({
|
|
kms_key = optional(string)
|
|
size_gb = optional(number)
|
|
type = optional(string)
|
|
provisioned_iops = optional(number)
|
|
provisioned_throughput = optional(number)
|
|
}))
|
|
boot_disk_kms_key = optional(string) # usage of this is discouraged
|
|
disk_size_gb = optional(number) # usage of this is discouraged
|
|
disk_type = optional(string, "pd-balanced") # usage of this is discouraged
|
|
ephemeral_ssd_count = optional(number)
|
|
ephemeral_storage_local_ssd_count = optional(number)
|
|
gcfs = optional(bool, false)
|
|
guest_accelerator = optional(object({
|
|
count = number
|
|
type = string
|
|
gpu_driver = optional(object({
|
|
version = string
|
|
partition_size = optional(string)
|
|
max_shared_clients_per_gpu = optional(number)
|
|
}))
|
|
}))
|
|
local_nvme_ssd_count = optional(number)
|
|
gvnic = optional(bool, false)
|
|
image_type = optional(string)
|
|
kubelet_config = optional(object({
|
|
cpu_manager_policy = string
|
|
cpu_cfs_quota = optional(bool)
|
|
cpu_cfs_quota_period = optional(string)
|
|
insecure_kubelet_readonly_port_enabled = optional(string)
|
|
pod_pids_limit = optional(number)
|
|
container_log_max_size = optional(string)
|
|
container_log_max_files = optional(number)
|
|
image_gc_low_threshold_percent = optional(number)
|
|
image_gc_high_threshold_percent = optional(number)
|
|
image_minimum_gc_age = optional(string)
|
|
image_maximum_gc_age = optional(string)
|
|
allowed_unsafe_sysctls = optional(list(string), [])
|
|
}))
|
|
linux_node_config = optional(object({
|
|
sysctls = optional(map(string))
|
|
cgroup_mode = optional(string)
|
|
}))
|
|
local_ssd_count = optional(number)
|
|
machine_type = optional(string)
|
|
metadata = optional(map(string))
|
|
min_cpu_platform = optional(string)
|
|
preemptible = optional(bool)
|
|
sandbox_config_gvisor = optional(bool)
|
|
shielded_instance_config = optional(object({
|
|
enable_integrity_monitoring = optional(bool)
|
|
enable_secure_boot = optional(bool)
|
|
}))
|
|
spot = optional(bool)
|
|
flex_start = optional(bool)
|
|
workload_metadata_config_mode = optional(string)
|
|
advanced_machine_features = optional(object({
|
|
enable_nested_virtualization = optional(bool)
|
|
threads_per_core = optional(number)
|
|
}))
|
|
containerd_config = optional(object({
|
|
private_registry_access_config = optional(object({
|
|
certificate_authority_domain_config = optional(list(object({
|
|
fqdns = list(string)
|
|
gcp_secret_manager_certificate_config_secret_uri = string
|
|
})))
|
|
}))
|
|
writable_cgroups = optional(bool)
|
|
registry_hosts = optional(map(object({
|
|
hosts = optional(map(object({
|
|
capabilities = optional(list(string))
|
|
override_path = optional(bool)
|
|
dial_timeout = optional(string)
|
|
header = optional(map(list(string)))
|
|
ca_gcp_secret_manager_secret_uri = optional(string)
|
|
client = optional(object({
|
|
cert_gcp_secret_manager_secret_uri = string
|
|
key_gcp_secret_manager_secret_uri = optional(string)
|
|
}))
|
|
})), {})
|
|
})), {})
|
|
}))
|
|
})
|
|
default = {}
|
|
nullable = false
|
|
validation {
|
|
condition = (
|
|
alltrue([
|
|
for k, v in try(var.node_config.guest_accelerator[0].gpu_driver, {}) : contains([
|
|
"GPU_DRIVER_VERSION_UNSPECIFIED", "INSTALLATION_DISABLED",
|
|
"DEFAULT", "LATEST"
|
|
], v.version)
|
|
])
|
|
)
|
|
error_message = "Invalid GPU driver version."
|
|
}
|
|
validation {
|
|
condition = contains(
|
|
["GCE_METADATA", "GKE_METADATA", "null"],
|
|
coalesce(var.node_config.workload_metadata_config_mode, "null")
|
|
)
|
|
error_message = "node_config.workload_metadata_config_mode must be GCE_METADATA or GKE_METADATA."
|
|
}
|
|
}
|
|
|
|
variable "node_count" {
|
|
description = "Number of nodes per instance group. Initial value can only be changed by recreation, current is ignored when autoscaling is used."
|
|
type = object({
|
|
current = optional(number)
|
|
initial = number
|
|
})
|
|
default = {
|
|
initial = 1
|
|
}
|
|
nullable = false
|
|
}
|
|
|
|
variable "node_locations" {
|
|
description = "Node locations."
|
|
type = list(string)
|
|
default = null
|
|
}
|
|
|
|
variable "nodepool_config" {
|
|
description = "Nodepool-level configuration."
|
|
type = object({
|
|
autoscaling = optional(object({
|
|
location_policy = optional(string)
|
|
max_node_count = optional(number)
|
|
min_node_count = optional(number)
|
|
use_total_nodes = optional(bool, false)
|
|
}))
|
|
management = optional(object({
|
|
auto_repair = optional(bool)
|
|
auto_upgrade = optional(bool)
|
|
}))
|
|
placement_policy = optional(object({
|
|
type = string
|
|
policy_name = optional(string)
|
|
tpu_topology = optional(string)
|
|
}))
|
|
queued_provisioning = optional(bool, false)
|
|
upgrade_settings = optional(object({
|
|
max_surge = number
|
|
max_unavailable = number
|
|
strategy = optional(string)
|
|
blue_green_settings = optional(object({
|
|
node_pool_soak_duration = optional(string)
|
|
standard_rollout_policy = optional(object({
|
|
batch_percentage = optional(number)
|
|
batch_node_count = optional(number)
|
|
batch_soak_duration = optional(string)
|
|
}))
|
|
}))
|
|
}))
|
|
})
|
|
default = null
|
|
}
|
|
|
|
variable "project_id" {
|
|
description = "Cluster project id."
|
|
type = string
|
|
}
|
|
|
|
variable "reservation_affinity" {
|
|
description = "Configuration of the desired reservation which instances could take capacity from."
|
|
type = object({
|
|
consume_reservation_type = string
|
|
key = optional(string)
|
|
values = optional(list(string))
|
|
})
|
|
default = null
|
|
}
|
|
|
|
variable "resource_manager_tags" {
|
|
description = "A map of resource manager tag keys and values to be attached to the nodes for managing Compute Engine firewalls using Network Firewall Policies."
|
|
type = map(string)
|
|
default = null
|
|
}
|
|
|
|
variable "service_account" {
|
|
description = "Nodepool service account. If this variable is set to null, the default GCE service account will be used. If set and email is null, a service account will be created. If scopes are null a default will be used."
|
|
type = object({
|
|
create = optional(bool, false)
|
|
email = optional(string)
|
|
oauth_scopes = optional(list(string))
|
|
display_name = optional(string)
|
|
})
|
|
default = {}
|
|
nullable = false
|
|
}
|
|
|
|
variable "sole_tenant_nodegroup" {
|
|
description = "Sole tenant node group."
|
|
type = string
|
|
default = null
|
|
}
|
|
|
|
variable "tags" {
|
|
description = "Network tags applied to nodes."
|
|
type = list(string)
|
|
default = null
|
|
}
|
|
|
|
variable "taints" {
|
|
description = "Kubernetes taints applied to all nodes."
|
|
type = map(object({
|
|
value = string
|
|
effect = string
|
|
}))
|
|
nullable = false
|
|
default = {}
|
|
validation {
|
|
condition = alltrue([
|
|
for k, v in var.taints :
|
|
contains(["NO_SCHEDULE", "PREFER_NO_SCHEDULE", "NO_EXECUTE"], v.effect)
|
|
])
|
|
error_message = "Invalid taint effect."
|
|
}
|
|
}
|