[cloud-run-v2] Add ability to deploy OpenTelemetry Collector sidecar (#3071)
* [cloud-run-v2] Add ability to deploy OpenTelemetry Collector sidecar - Adds `depends_on` flag to container definition - Adds `port` to HTTP liveness & startup probes * fix: add port to unmanaged resource's startup & liveness probes * fix: add copyright boilerplate * Fix README --------- Co-authored-by: Julio Castillo <jccb@google.com>
This commit is contained in:
@@ -10,6 +10,7 @@ Cloud Run Services and Jobs, with support for IAM roles and Eventarc trigger cre
|
||||
- [Direct VPC Egress](#direct-vpc-egress)
|
||||
- [VPC Access Connector](#vpc-access-connector)
|
||||
- [Using Customer-Managed Encryption Key](#using-customer-managed-encryption-key)
|
||||
- [Deploying OpenTelemetry Collector sidecar](#deploying-opentelemetry-collector-sidecar)
|
||||
- [Eventarc triggers](#eventarc-triggers)
|
||||
- [PubSub](#pubsub)
|
||||
- [Audit logs](#audit-logs)
|
||||
@@ -308,6 +309,252 @@ module "cloud_run" {
|
||||
# tftest modules=3 resources=11 e2e
|
||||
```
|
||||
|
||||
## Deploying OpenTelemetry Collector sidecar
|
||||
|
||||
```yaml
|
||||
# Reference: https://cloud.google.com/stackdriver/docs/instrumentation/opentelemetry-collector-cloud-run#gotc-provided-config
|
||||
|
||||
receivers:
|
||||
# Open two OTLP servers:
|
||||
# - On port 4317, open an OTLP GRPC server
|
||||
# - On port 4318, open an OTLP HTTP server
|
||||
#
|
||||
# Docs:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver/otlpreceiver
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: localhost:4317
|
||||
http:
|
||||
cors:
|
||||
# This effectively allows any origin
|
||||
# to make requests to the HTTP server.
|
||||
allowed_origins:
|
||||
- http://*
|
||||
- https://*
|
||||
endpoint: localhost:4318
|
||||
|
||||
# Using the prometheus scraper, scrape the Collector's self metrics.
|
||||
#
|
||||
# Docs:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/receiver/prometheusreceiver
|
||||
# https://opentelemetry.io/docs/collector/internal-telemetry/
|
||||
prometheus/self-metrics:
|
||||
config:
|
||||
scrape_configs:
|
||||
- job_name: otel-self-metrics
|
||||
scrape_interval: 1m
|
||||
static_configs:
|
||||
- targets:
|
||||
- localhost:8888
|
||||
|
||||
processors:
|
||||
# The batch processor is in place to regulate both the number of requests
|
||||
# being made and the size of those requests.
|
||||
#
|
||||
# Docs:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/processor/batchprocessor
|
||||
batch:
|
||||
send_batch_max_size: 200
|
||||
send_batch_size: 200
|
||||
timeout: 5s
|
||||
|
||||
# The memorylimiter will check the memory usage of the collector process.
|
||||
#
|
||||
# Docs:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector/tree/main/processor/memorylimiterprocessor
|
||||
memory_limiter:
|
||||
check_interval: 1s
|
||||
limit_percentage: 65
|
||||
spike_limit_percentage: 20
|
||||
|
||||
# The resourcedetection processor is configured to detect GCP resources.
|
||||
# Resource attributes that represent the GCP resource the collector is
|
||||
# running on will be attached to all telemetry that goes through this
|
||||
# processor.
|
||||
#
|
||||
# Docs:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/resourcedetectionprocessor#gcp-metadata
|
||||
resourcedetection:
|
||||
detectors: [gcp]
|
||||
timeout: 10s
|
||||
|
||||
# The transform/collision processor ensures that any attributes that may
|
||||
# collide with the googlemanagedprometheus exporter's monitored resource
|
||||
# construction are moved to a similar name that is not reserved.
|
||||
transform/collision:
|
||||
metric_statements:
|
||||
- context: datapoint
|
||||
statements:
|
||||
- set(attributes["exported_location"], attributes["location"])
|
||||
- delete_key(attributes, "location")
|
||||
- set(attributes["exported_cluster"], attributes["cluster"])
|
||||
- delete_key(attributes, "cluster")
|
||||
- set(attributes["exported_namespace"], attributes["namespace"])
|
||||
- delete_key(attributes, "namespace")
|
||||
- set(attributes["exported_job"], attributes["job"])
|
||||
- delete_key(attributes, "job")
|
||||
- set(attributes["exported_instance"], attributes["instance"])
|
||||
- delete_key(attributes, "instance")
|
||||
- set(attributes["exported_project_id"], attributes["project_id"])
|
||||
- delete_key(attributes, "project_id")
|
||||
|
||||
exporters:
|
||||
# The googlecloud exporter will export telemetry to different
|
||||
# Google Cloud services:
|
||||
# Logs -> Cloud Logging
|
||||
# Metrics -> Cloud Monitoring
|
||||
# Traces -> Cloud Trace
|
||||
#
|
||||
# Docs:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/googlecloudexporter
|
||||
googlecloud:
|
||||
log:
|
||||
default_log_name: opentelemetry-collector
|
||||
|
||||
# The googlemanagedprometheus exporter will send metrics to
|
||||
# Google Managed Service for Prometheus.
|
||||
#
|
||||
# Docs:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/googlemanagedprometheusexporter
|
||||
googlemanagedprometheus:
|
||||
|
||||
extensions:
|
||||
# Opens an endpoint on 13133 that can be used to check the
|
||||
# status of the collector. Since this does not configure the
|
||||
# `path` config value, the endpoint will default to `/`.
|
||||
#
|
||||
# When running on Cloud Run, this extension is required and not optional.
|
||||
# In other environments it is recommended but may not be required for operation
|
||||
# (i.e. in Container-Optimized OS or other GCE environments).
|
||||
#
|
||||
# Docs:
|
||||
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/extension/healthcheckextension
|
||||
health_check:
|
||||
endpoint: 0.0.0.0:13133
|
||||
|
||||
service:
|
||||
extensions:
|
||||
- health_check
|
||||
pipelines:
|
||||
logs:
|
||||
receivers:
|
||||
- otlp
|
||||
processors:
|
||||
- resourcedetection
|
||||
- memory_limiter
|
||||
- batch
|
||||
exporters:
|
||||
- googlecloud
|
||||
metrics/otlp:
|
||||
receivers:
|
||||
- otlp
|
||||
processors:
|
||||
- transform/collision
|
||||
- resourcedetection
|
||||
- memory_limiter
|
||||
- batch
|
||||
exporters:
|
||||
- googlemanagedprometheus
|
||||
metrics/self-metrics:
|
||||
receivers:
|
||||
- prometheus/self-metrics
|
||||
processors:
|
||||
- resourcedetection
|
||||
- memory_limiter
|
||||
- batch
|
||||
exporters:
|
||||
- googlemanagedprometheus
|
||||
traces:
|
||||
receivers:
|
||||
- otlp
|
||||
processors:
|
||||
- resourcedetection
|
||||
- memory_limiter
|
||||
- batch
|
||||
exporters:
|
||||
- googlecloud
|
||||
telemetry:
|
||||
metrics:
|
||||
address: localhost:8888
|
||||
|
||||
# tftest-file id=otel-config path=config/otel-config.yaml
|
||||
```
|
||||
|
||||
```hcl
|
||||
module "secrets" {
|
||||
source = "./fabric/modules/secret-manager"
|
||||
project_id = var.project_id
|
||||
secrets = {
|
||||
otel-config = {}
|
||||
}
|
||||
iam = {
|
||||
otel-config = {
|
||||
"roles/secretmanager.secretAccessor" = [
|
||||
"serviceAccount:${var.project_number}-compute@developer.gserviceaccount.com",
|
||||
]
|
||||
}
|
||||
}
|
||||
versions = {
|
||||
otel-config = {
|
||||
v1 = { enabled = true, data = file("${path.module}/config/otel-config.yaml") }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module "cloud_run" {
|
||||
source = "./fabric/modules/cloud-run-v2"
|
||||
project_id = var.project_id
|
||||
region = var.region
|
||||
name = "hello"
|
||||
containers = {
|
||||
hello = {
|
||||
image = "us-docker.pkg.dev/cloudrun/container/hello"
|
||||
ports = {
|
||||
default = {
|
||||
container_port = 3000
|
||||
}
|
||||
}
|
||||
depends_on = ["collector"]
|
||||
}
|
||||
collector = {
|
||||
image = "us-docker.pkg.dev/cloud-ops-agents-artifacts/google-cloud-opentelemetry-collector/otelcol-google:0.122.1"
|
||||
startup_probe = {
|
||||
http_get = {
|
||||
path = "/"
|
||||
port = 13133
|
||||
}
|
||||
timeout_seconds = 30
|
||||
period_seconds = 30
|
||||
}
|
||||
liveness_probe = {
|
||||
http_get = {
|
||||
path = "/"
|
||||
port = 13133
|
||||
}
|
||||
timeout_seconds = 30
|
||||
period_seconds = 30
|
||||
}
|
||||
volume_mounts = {
|
||||
"otel-config" = "/etc/otelcol-google/"
|
||||
}
|
||||
}
|
||||
}
|
||||
volumes = {
|
||||
otel-config = {
|
||||
secret = {
|
||||
name = "otel-config"
|
||||
version = "1"
|
||||
path = "config.yaml"
|
||||
}
|
||||
}
|
||||
}
|
||||
deletion_protection = false
|
||||
}
|
||||
# tftest modules=2 resources=4 files=otel-config inventory=service-otel-sidecar.yaml e2e
|
||||
```
|
||||
|
||||
## Eventarc triggers
|
||||
|
||||
### PubSub
|
||||
@@ -566,27 +813,27 @@ module "cloud_run" {
|
||||
|
||||
| name | description | type | required | default |
|
||||
|---|---|:---:|:---:|:---:|
|
||||
| [name](variables.tf#L178) | Name used for Cloud Run service. | <code>string</code> | ✓ | |
|
||||
| [project_id](variables.tf#L193) | Project id used for all resources. | <code>string</code> | ✓ | |
|
||||
| [region](variables.tf#L198) | Region used for all resources. | <code>string</code> | ✓ | |
|
||||
| [containers](variables.tf#L17) | Containers in name => attributes format. | <code title="map(object({ image = string command = optional(list(string)) args = optional(list(string)) env = optional(map(string)) env_from_key = optional(map(object({ secret = string version = string }))) liveness_probe = optional(object({ grpc = optional(object({ port = optional(number) service = optional(string) })) http_get = optional(object({ http_headers = optional(map(string)) path = optional(string) })) failure_threshold = optional(number) initial_delay_seconds = optional(number) period_seconds = optional(number) timeout_seconds = optional(number) })) ports = optional(map(object({ container_port = optional(number) name = optional(string) }))) resources = optional(object({ limits = optional(object({ cpu = string memory = string })) cpu_idle = optional(bool) startup_cpu_boost = optional(bool) })) startup_probe = optional(object({ grpc = optional(object({ port = optional(number) service = optional(string) })) http_get = optional(object({ http_headers = optional(map(string)) path = optional(string) })) tcp_socket = optional(object({ port = optional(number) })) failure_threshold = optional(number) initial_delay_seconds = optional(number) period_seconds = optional(number) timeout_seconds = optional(number) })) volume_mounts = optional(map(string)) }))">map(object({…}))</code> | | <code>{}</code> |
|
||||
| [create_job](variables.tf#L77) | Create Cloud Run Job instead of Service. | <code>bool</code> | | <code>false</code> |
|
||||
| [custom_audiences](variables.tf#L83) | Custom audiences for service. | <code>list(string)</code> | | <code>null</code> |
|
||||
| [deletion_protection](variables.tf#L89) | Deletion protection setting for this Cloud Run service. | <code>string</code> | | <code>null</code> |
|
||||
| [encryption_key](variables.tf#L95) | The full resource name of the Cloud KMS CryptoKey. | <code>string</code> | | <code>null</code> |
|
||||
| [eventarc_triggers](variables.tf#L101) | Event arc triggers for different sources. | <code title="object({ audit_log = optional(map(object({ method = string service = string }))) pubsub = optional(map(string)) service_account_email = optional(string) service_account_create = optional(bool, false) })">object({…})</code> | | <code>{}</code> |
|
||||
| [iam](variables.tf#L119) | IAM bindings for Cloud Run service in {ROLE => [MEMBERS]} format. | <code>map(list(string))</code> | | <code>{}</code> |
|
||||
| [ingress](variables.tf#L125) | Ingress settings. | <code>string</code> | | <code>null</code> |
|
||||
| [invoker_iam_disabled](variables.tf#L142) | Disables IAM permission check for run.routes.invoke for callers of this service. | <code>bool</code> | | <code>false</code> |
|
||||
| [labels](variables.tf#L148) | Resource labels. | <code>map(string)</code> | | <code>{}</code> |
|
||||
| [launch_stage](variables.tf#L154) | The launch stage as defined by Google Cloud Platform Launch Stages. | <code>string</code> | | <code>null</code> |
|
||||
| [managed_revision](variables.tf#L171) | Whether the Terraform module should control the deployment of revisions. | <code>bool</code> | | <code>true</code> |
|
||||
| [prefix](variables.tf#L183) | Optional prefix used for resource names. | <code>string</code> | | <code>null</code> |
|
||||
| [revision](variables.tf#L203) | Revision template configurations. | <code title="object({ name = optional(string) gen2_execution_environment = optional(bool) max_concurrency = optional(number) max_instance_count = optional(number) min_instance_count = optional(number) job = optional(object({ max_retries = optional(number) task_count = optional(number) }), {}) vpc_access = optional(object({ connector = optional(string) egress = optional(string) network = optional(string) subnet = optional(string) tags = optional(list(string)) }), {}) timeout = optional(string) })">object({…})</code> | | <code>{}</code> |
|
||||
| [service_account](variables.tf#L241) | Service account email. Unused if service account is auto-created. | <code>string</code> | | <code>null</code> |
|
||||
| [service_account_create](variables.tf#L247) | Auto-create service account. | <code>bool</code> | | <code>false</code> |
|
||||
| [tag_bindings](variables.tf#L253) | Tag bindings for this service, in key => tag value id format. | <code>map(string)</code> | | <code>{}</code> |
|
||||
| [volumes](variables.tf#L260) | Named volumes in containers in name => attributes format. | <code title="map(object({ secret = optional(object({ name = string default_mode = optional(string) path = optional(string) version = optional(string) mode = optional(string) })) cloud_sql_instances = optional(list(string)) empty_dir_size = optional(string) gcs = optional(object({ bucket = string is_read_only = optional(bool) })) nfs = optional(object({ server = string path = optional(string) is_read_only = optional(bool) })) }))">map(object({…}))</code> | | <code>{}</code> |
|
||||
| [name](variables.tf#L181) | Name used for Cloud Run service. | <code>string</code> | ✓ | |
|
||||
| [project_id](variables.tf#L196) | Project id used for all resources. | <code>string</code> | ✓ | |
|
||||
| [region](variables.tf#L201) | Region used for all resources. | <code>string</code> | ✓ | |
|
||||
| [containers](variables.tf#L17) | Containers in name => attributes format. | <code title="map(object({ image = string depends_on = optional(list(string)) command = optional(list(string)) args = optional(list(string)) env = optional(map(string)) env_from_key = optional(map(object({ secret = string version = string }))) liveness_probe = optional(object({ grpc = optional(object({ port = optional(number) service = optional(string) })) http_get = optional(object({ http_headers = optional(map(string)) path = optional(string) port = optional(number) })) failure_threshold = optional(number) initial_delay_seconds = optional(number) period_seconds = optional(number) timeout_seconds = optional(number) })) ports = optional(map(object({ container_port = optional(number) name = optional(string) }))) resources = optional(object({ limits = optional(object({ cpu = string memory = string })) cpu_idle = optional(bool) startup_cpu_boost = optional(bool) })) startup_probe = optional(object({ grpc = optional(object({ port = optional(number) service = optional(string) })) http_get = optional(object({ http_headers = optional(map(string)) path = optional(string) port = optional(number) })) tcp_socket = optional(object({ port = optional(number) })) failure_threshold = optional(number) initial_delay_seconds = optional(number) period_seconds = optional(number) timeout_seconds = optional(number) })) volume_mounts = optional(map(string)) }))">map(object({…}))</code> | | <code>{}</code> |
|
||||
| [create_job](variables.tf#L80) | Create Cloud Run Job instead of Service. | <code>bool</code> | | <code>false</code> |
|
||||
| [custom_audiences](variables.tf#L86) | Custom audiences for service. | <code>list(string)</code> | | <code>null</code> |
|
||||
| [deletion_protection](variables.tf#L92) | Deletion protection setting for this Cloud Run service. | <code>string</code> | | <code>null</code> |
|
||||
| [encryption_key](variables.tf#L98) | The full resource name of the Cloud KMS CryptoKey. | <code>string</code> | | <code>null</code> |
|
||||
| [eventarc_triggers](variables.tf#L104) | Event arc triggers for different sources. | <code title="object({ audit_log = optional(map(object({ method = string service = string }))) pubsub = optional(map(string)) service_account_email = optional(string) service_account_create = optional(bool, false) })">object({…})</code> | | <code>{}</code> |
|
||||
| [iam](variables.tf#L122) | IAM bindings for Cloud Run service in {ROLE => [MEMBERS]} format. | <code>map(list(string))</code> | | <code>{}</code> |
|
||||
| [ingress](variables.tf#L128) | Ingress settings. | <code>string</code> | | <code>null</code> |
|
||||
| [invoker_iam_disabled](variables.tf#L145) | Disables IAM permission check for run.routes.invoke for callers of this service. | <code>bool</code> | | <code>false</code> |
|
||||
| [labels](variables.tf#L151) | Resource labels. | <code>map(string)</code> | | <code>{}</code> |
|
||||
| [launch_stage](variables.tf#L157) | The launch stage as defined by Google Cloud Platform Launch Stages. | <code>string</code> | | <code>null</code> |
|
||||
| [managed_revision](variables.tf#L174) | Whether the Terraform module should control the deployment of revisions. | <code>bool</code> | | <code>true</code> |
|
||||
| [prefix](variables.tf#L186) | Optional prefix used for resource names. | <code>string</code> | | <code>null</code> |
|
||||
| [revision](variables.tf#L206) | Revision template configurations. | <code title="object({ name = optional(string) gen2_execution_environment = optional(bool) max_concurrency = optional(number) max_instance_count = optional(number) min_instance_count = optional(number) job = optional(object({ max_retries = optional(number) task_count = optional(number) }), {}) vpc_access = optional(object({ connector = optional(string) egress = optional(string) network = optional(string) subnet = optional(string) tags = optional(list(string)) }), {}) timeout = optional(string) })">object({…})</code> | | <code>{}</code> |
|
||||
| [service_account](variables.tf#L244) | Service account email. Unused if service account is auto-created. | <code>string</code> | | <code>null</code> |
|
||||
| [service_account_create](variables.tf#L250) | Auto-create service account. | <code>bool</code> | | <code>false</code> |
|
||||
| [tag_bindings](variables.tf#L256) | Tag bindings for this service, in key => tag value id format. | <code>map(string)</code> | | <code>{}</code> |
|
||||
| [volumes](variables.tf#L263) | Named volumes in containers in name => attributes format. | <code title="map(object({ secret = optional(object({ name = string default_mode = optional(string) path = optional(string) version = optional(string) mode = optional(string) })) cloud_sql_instances = optional(list(string)) empty_dir_size = optional(string) gcs = optional(object({ bucket = string is_read_only = optional(bool) })) nfs = optional(object({ server = string path = optional(string) is_read_only = optional(bool) })) }))">map(object({…}))</code> | | <code>{}</code> |
|
||||
| [vpc_connector_create](variables-vpcconnector.tf#L17) | Populate this to create a Serverless VPC Access connector. | <code title="object({ ip_cidr_range = optional(string) machine_type = optional(string) name = optional(string) network = optional(string) instances = optional(object({ max = optional(number) min = optional(number) }), {} ) throughput = optional(object({ max = optional(number) min = optional(number) }), {} ) subnet = optional(object({ name = optional(string) project_id = optional(string) }), {}) })">object({…})</code> | | <code>null</code> |
|
||||
|
||||
## Outputs
|
||||
|
||||
Reference in New Issue
Block a user