diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/README.md b/examples/data-solutions/gcs-to-bq-with-dataflow/README.md index 701e6b7bf..aa6a6eb70 100644 --- a/examples/data-solutions/gcs-to-bq-with-dataflow/README.md +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/README.md @@ -112,21 +112,18 @@ You can check data imported into Google BigQuery from the Google Cloud Console U + ## Variables | name | description | type | required | default | |---|---|:---:|:---:|:---:| -| kms_project_id | Name for the new KMS Project. | string | ✓ | | -| service_project_id | Name for the new Service Project. | string | ✓ | | -| billing_account | Billing account id used as default for new projects. | string | | null | -| project_create | Set to true to create projects, will use existing ones by default. | bool | | false | +| prefix | Unique prefix used for resource names. Not used for project if 'project_create' is null. | string | ✓ | | +| project_id | Project id, references existing project if `project_create` is null. | string | ✓ | | +| project_create | Provide values if project creation is needed, uses existing project if null. Parent is in 'folders/nnn' or 'organizations/nnn' format | object({…}) | | null | | region | The region where resources will be deployed. | string | | "europe-west1" | -| root_node | The resource name of the parent Folder or Organization. Must be of the form folders/folder_id or organizations/org_id. | string | | null | -| vpc_ip_cidr_range | Ip range used in the subnet deployef in the Service Project. | string | | "10.0.0.0/20" | -| vpc_name | Name of the VPC created in the Service Project. | string | | "local" | -| vpc_subnet_name | Name of the subnet created in the Service Project. | string | | "subnet" | +| vpc_subnet_range | Ip range used for the VPC subnet created for the example. | string | | "10.0.0.0/20" | ## Outputs @@ -135,9 +132,10 @@ You can check data imported into Google BigQuery from the Google Cloud Console U | bq_tables | Bigquery Tables. | | | buckets | GCS Bucket Cloud KMS crypto keys. | | | data_ingestion_command | | | -| projects | Project ids. | | +| project_id | Project id. | | | vm | GCE VM. | | + diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/bq.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/bq.tf new file mode 100644 index 000000000..da2f956b9 --- /dev/null +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/bq.tf @@ -0,0 +1,65 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +module "bigquery-dataset" { + source = "../../../modules/bigquery-dataset" + project_id = module.project.project_id + id = "example_dataset" + location = var.region + access = { + reader-group = { role = "READER", type = "user" } + owner = { role = "OWNER", type = "user" } + } + access_identities = { + reader-group = module.service-account-bq.email + owner = module.service-account-bq.email + } + encryption_key = module.kms.keys.key-bq.id + tables = { + bq_import = { + friendly_name = "BQ import" + labels = {} + options = null + partitioning = { + field = null + range = null # use start/end/interval for range + time = null + } + schema = file("${path.module}/schema_bq_import.json") + options = { + clustering = null + expiration_time = null + encryption_key = module.kms.keys.key-bq.id + } + deletion_protection = false + }, + df_import = { + friendly_name = "Dataflow import" + labels = {} + options = null + partitioning = { + field = null + range = null # use start/end/interval for range + time = null + } + schema = file("${path.module}/schema_df_import.json") + options = { + clustering = null + expiration_time = null + encryption_key = module.kms.keys.key-bq.id + } + deletion_protection = false + } + } +} diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/gce.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/gce.tf new file mode 100644 index 000000000..ce2ae6175 --- /dev/null +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/gce.tf @@ -0,0 +1,54 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +locals { + vm-startup-script = join("\n", [ + "#! /bin/bash", + "apt-get update && apt-get install -y bash-completion git python3-venv gcc build-essential python-dev python3-dev", + "pip3 install --upgrade setuptools pip" + ]) +} + +module "vm" { + source = "../../../modules/compute-vm" + project_id = module.project.project_id + zone = "${var.region}-b" + name = "${var.prefix}-vm-0" + network_interfaces = [{ + network = module.vpc.self_link, + subnetwork = local.subnet_self_link, + nat = false, + addresses = null + }] + attached_disks = [{ + name = "data", size = 10, source = null, source_type = null, options = null + }] + boot_disk = { + image = "projects/debian-cloud/global/images/family/debian-10" + type = "pd-ssd" + size = 10 + encrypt_disk = true + } + encryption = { + encrypt_boot = true + disk_encryption_key_raw = null + kms_key_self_link = module.kms.key_ids.key-gce + } + metadata = { + startup-script = local.vm-startup-script + } + service_account = module.service-account-gce.email + service_account_scopes = ["https://www.googleapis.com/auth/cloud-platform"] + tags = ["ssh"] +} diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/gcs.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/gcs.tf new file mode 100644 index 000000000..1bce29d37 --- /dev/null +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/gcs.tf @@ -0,0 +1,49 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +module "gcs-data" { + source = "../../../modules/gcs" + project_id = module.project.project_id + prefix = var.prefix + name = "data" + location = var.region + storage_class = "REGIONAL" + iam = { + "roles/storage.admin" = [ + "serviceAccount:${module.service-account-gce.email}", + ], + "roles/storage.objectViewer" = [ + "serviceAccount:${module.service-account-df.email}", + ] + } + encryption_key = module.kms.keys.key-gcs.id + force_destroy = true +} + +module "gcs-df-tmp" { + source = "../../../modules/gcs" + project_id = module.project.project_id + prefix = var.prefix + name = "df-tmp" + location = var.region + storage_class = "REGIONAL" + iam = { + "roles/storage.admin" = [ + "serviceAccount:${module.service-account-gce.email}", + "serviceAccount:${module.service-account-df.email}", + ] + } + encryption_key = module.kms.keys.key-gcs.id + force_destroy = true +} diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/iam-sa.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/iam-sa.tf new file mode 100644 index 000000000..2ea85f986 --- /dev/null +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/iam-sa.tf @@ -0,0 +1,60 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +module "service-account-bq" { + source = "../../../modules/iam-service-account" + project_id = module.project.project_id + name = "bq-test" + prefix = var.prefix + iam_project_roles = { + (module.project.project_id) = [ + "roles/bigquery.admin", + "roles/logging.logWriter", + "roles/monitoring.metricWriter", + ] + } +} + +module "service-account-df" { + source = "../../../modules/iam-service-account" + project_id = module.project.project_id + name = "df-test" + prefix = var.prefix + iam_project_roles = { + (module.project.project_id) = [ + "roles/bigquery.dataOwner", + "roles/bigquery.jobUser", + "roles/bigquery.metadataViewer", + "roles/dataflow.worker", + "roles/storage.objectViewer", + ] + } +} + +module "service-account-gce" { + source = "../../../modules/iam-service-account" + project_id = module.project.project_id + name = "gce-test" + prefix = var.prefix + iam_project_roles = { + (module.project.project_id) = [ + "roles/bigquery.dataOwner", + "roles/bigquery.jobUser", + "roles/dataflow.admin", + "roles/iam.serviceAccountUser", + "roles/logging.logWriter", + "roles/monitoring.metricWriter", + ] + } +} diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/kms.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/kms.tf new file mode 100644 index 000000000..b522d78b3 --- /dev/null +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/kms.tf @@ -0,0 +1,63 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +module "kms" { + source = "../../../modules/kms" + project_id = module.project.project_id + keyring = { + name = "${var.prefix}-keyring", + location = var.region + } + keys = { + key-df = null + key-gce = null + key-gcs = null + key-bq = null + } + key_iam = { + key-gce = { + "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ + "serviceAccount:${module.project.service_accounts.robots.compute}" + ] + }, + key-gcs = { + "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ + "serviceAccount:${module.project.service_accounts.robots.storage}" + ] + }, + key-bq = { + "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ + "serviceAccount:${module.project.service_accounts.robots.bq}" + ] + }, + key-df = { + "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ + "serviceAccount:${module.project.service_accounts.robots.dataflow}", + "serviceAccount:${module.project.service_accounts.robots.compute}", + ] + } + } +} + +# module "kms-regional" { +# source = "../../../modules/kms" +# project_id = module.project-kms.project_id +# keyring = { +# name = "my-keyring-regional", +# location = var.region +# } +# keys = { key-df = null } +# key_iam = { +# } +# } diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf index 43768f412..e6562e2de 100644 --- a/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/main.tf @@ -13,23 +13,17 @@ # limitations under the License. locals { - vm-startup-script = join("\n", [ - "#! /bin/bash", - "apt-get update && apt-get install -y bash-completion git python3-venv gcc build-essential python-dev python3-dev", - "pip3 install --upgrade setuptools pip" - ]) + subnet_name = module.vpc.subnets["${var.region}/${var.prefix}-subnet-0"].name + subnet_self_link = module.vpc.subnets["${var.region}/${var.prefix}-subnet-0"].self_link } -############################################################################### -# Projects - Centralized # -############################################################################### - -module "project-service" { +module "project" { source = "../../../modules/project" - name = var.service_project_id - parent = var.root_node - billing_account = var.billing_account - project_create = var.project_create + name = var.project_id + parent = try(var.project_create.parent, null) + billing_account = try(var.project_create.billing_account_id, null) + project_create = var.project_create != null + prefix = var.project_create == null ? null : var.prefix services = [ "bigquery.googleapis.com", "bigqueryreservation.googleapis.com", @@ -40,135 +34,19 @@ module "project-service" { "servicenetworking.googleapis.com", "storage.googleapis.com", ] - # TODO(jccb): doesn't work when project_create=false - # oslogin = true -} - -module "project-kms" { - source = "../../../modules/project" - name = var.kms_project_id - parent = var.root_node - billing_account = var.billing_account - project_create = var.project_create - services = [ - "cloudkms.googleapis.com", - ] -} - -############################################################################### -# Project Service Accounts # -############################################################################### - -module "service-account-bq" { - source = "../../../modules/iam-service-account" - project_id = module.project-service.project_id - name = "bq-test" - iam_project_roles = { - (var.service_project_id) = [ - "roles/logging.logWriter", - "roles/monitoring.metricWriter", - "roles/bigquery.admin" - ] + service_config = { + disable_on_destroy = false, disable_dependent_services = false } } -module "service-account-gce" { - source = "../../../modules/iam-service-account" - project_id = module.project-service.project_id - name = "gce-test" - iam_project_roles = { - (var.service_project_id) = [ - "roles/logging.logWriter", - "roles/monitoring.metricWriter", - "roles/dataflow.admin", - "roles/iam.serviceAccountUser", - "roles/bigquery.dataOwner", - "roles/bigquery.jobUser" # Needed to import data using 'bq' command - ] - } -} - -module "service-account-df" { - source = "../../../modules/iam-service-account" - project_id = module.project-service.project_id - name = "df-test" - iam_project_roles = { - (var.service_project_id) = [ - "roles/dataflow.worker", - "roles/bigquery.dataOwner", - "roles/bigquery.metadataViewer", - "roles/storage.objectViewer", - "roles/bigquery.jobUser" - ] - } -} - -############################################################################### -# KMS # -############################################################################### - -module "kms" { - source = "../../../modules/kms" - project_id = module.project-kms.project_id - keyring = { - name = "my-keyring", - location = var.region - } - keys = { - key-df = null - key-gce = null - key-gcs = null - key-bq = null - } - key_iam = { - key-gce = { - "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ - "serviceAccount:${module.project-service.service_accounts.robots.compute}" - ] - }, - key-gcs = { - "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ - "serviceAccount:${module.project-service.service_accounts.robots.storage}" - ] - }, - key-bq = { - "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ - "serviceAccount:${module.project-service.service_accounts.robots.bq}" - ] - }, - key-df = { - "roles/cloudkms.cryptoKeyEncrypterDecrypter" = [ - "serviceAccount:${module.project-service.service_accounts.robots.dataflow}", - "serviceAccount:${module.project-service.service_accounts.robots.compute}", - ] - } - } -} - -# module "kms-regional" { -# source = "../../../modules/kms" -# project_id = module.project-kms.project_id -# keyring = { -# name = "my-keyring-regional", -# location = var.region -# } -# keys = { key-df = null } -# key_iam = { -# } -# } - -############################################################################### -# Networking # -############################################################################### - module "vpc" { source = "../../../modules/net-vpc" - project_id = module.project-service.project_id - name = var.vpc_name + project_id = module.project.project_id + name = "${var.prefix}-vpc" subnets = [ { - ip_cidr_range = var.vpc_ip_cidr_range - name = var.vpc_subnet_name + ip_cidr_range = var.vpc_subnet_range + name = "${var.prefix}-subnet-0" region = var.region secondary_ip_range = {} } @@ -177,150 +55,15 @@ module "vpc" { module "vpc-firewall" { source = "../../../modules/net-vpc-firewall" - project_id = module.project-service.project_id + project_id = module.project.project_id network = module.vpc.name - admin_ranges = [var.vpc_ip_cidr_range] + admin_ranges = [var.vpc_subnet_range] } module "nat" { source = "../../../modules/net-cloudnat" - project_id = module.project-service.project_id + project_id = module.project.project_id region = var.region - name = "default" + name = "${var.prefix}-default" router_network = module.vpc.name } - -############################################################################### -# GCE # -############################################################################### - -module "vm" { - source = "../../../modules/compute-vm" - project_id = module.project-service.project_id - zone = "${var.region}-b" - name = "vm-example" - network_interfaces = [{ - network = module.vpc.self_link, - subnetwork = module.vpc.subnet_self_links["${var.region}/${var.vpc_subnet_name}"], - nat = false, - addresses = null - }] - attached_disks = [ - { - name = "data" - size = 10 - source = null - source_type = null - options = null - } - ] - boot_disk = { - image = "projects/debian-cloud/global/images/family/debian-10" - type = "pd-ssd" - size = 10 - encrypt_disk = true - } - encryption = { - encrypt_boot = true - disk_encryption_key_raw = null - kms_key_self_link = module.kms.key_ids.key-gce - } - metadata = { - startup-script = local.vm-startup-script - } - service_account = module.service-account-gce.email - service_account_scopes = ["https://www.googleapis.com/auth/cloud-platform"] - tags = ["ssh"] -} - -############################################################################### -# GCS # -############################################################################### - -module "kms-gcs" { - source = "../../../modules/gcs" - for_each = { - data = { - members = { - "roles/storage.admin" = [ - "serviceAccount:${module.service-account-gce.email}", - ], - "roles/storage.objectViewer" = [ - "serviceAccount:${module.service-account-df.email}", - ] - } - } - df-tmplocation = { - members = { - "roles/storage.admin" = [ - "serviceAccount:${module.service-account-gce.email}", - "serviceAccount:${module.service-account-df.email}", - ] - } - } - } - project_id = module.project-service.project_id - prefix = module.project-service.project_id - name = each.key - storage_class = "REGIONAL" - iam = each.value.members - location = var.region - encryption_key = module.kms.keys.key-gcs.id - force_destroy = true -} - -############################################################################### -# BQ # -############################################################################### - -module "bigquery-dataset" { - source = "../../../modules/bigquery-dataset" - project_id = module.project-service.project_id - id = "bq_dataset" - location = var.region - access = { - reader-group = { role = "READER", type = "user" } - owner = { role = "OWNER", type = "user" } - } - access_identities = { - reader-group = module.service-account-bq.email - owner = module.service-account-bq.email - } - encryption_key = module.kms.keys.key-bq.id - tables = { - bq_import = { - friendly_name = "BQ import" - labels = {} - options = null - partitioning = { - field = null - range = null # use start/end/interval for range - time = null - } - schema = file("${path.module}/schema_bq_import.json") - options = { - clustering = null - expiration_time = null - encryption_key = module.kms.keys.key-bq.id - } - deletion_protection = true - }, - df_import = { - friendly_name = "Dataflow import" - labels = {} - options = null - partitioning = { - field = null - range = null # use start/end/interval for range - time = null - } - schema = file("${path.module}/schema_df_import.json") - options = { - clustering = null - expiration_time = null - encryption_key = module.kms.keys.key-bq.id - } - deletion_protection = true - } - } -} diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf index 1ae4a9fee..4f6547e92 100644 --- a/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/outputs.tf @@ -20,8 +20,8 @@ output "bq_tables" { output "buckets" { description = "GCS Bucket Cloud KMS crypto keys." value = { - for name, bucket in module.kms-gcs : - bucket.name => bucket.url + data = module.gcs-data.name + df-tmp = module.gcs-df-tmp.name } } @@ -32,25 +32,22 @@ output "data_ingestion_command" { --max_num_workers=10 \ --autoscaling_algorithm=THROUGHPUT_BASED \ --region=${var.region} \ - --staging_location=${module.kms-gcs["df-tmplocation"].url} \ - --temp_location=${module.kms-gcs["df-tmplocation"].url}/ \ - --project=${var.service_project_id} \ - --input=${module.kms-gcs["data"].url}/### FILE NAME ###.csv \ + --staging_location=${module.gcs-df-tmp.url} \ + --temp_location=${module.gcs-df-tmp.url}/ \ + --project=${var.project_id} \ + --input=${module.gcs-data.url}/### FILE NAME ###.csv \ --output=${module.bigquery-dataset.dataset_id}.${module.bigquery-dataset.table_ids.df_import} \ --service_account_email=${module.service-account-df.email} \ - --network=${var.vpc_name} \ - --subnetwork=${var.vpc_subnet_name} \ + --network=${module.vpc.name} \ + --subnetwork=${local.subnet_name} \ --dataflow_kms_key=${module.kms.key_ids.key-df} \ --no_use_public_ips EOF } -output "projects" { - description = "Project ids." - value = { - service-project = module.project-service.project_id - kms-project = module.project-kms.project_id - } +output "project_id" { + description = "Project id." + value = module.project.project_id } output "vm" { diff --git a/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf b/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf index a81599a31..78a6f4bfc 100644 --- a/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf +++ b/examples/data-solutions/gcs-to-bq-with-dataflow/variables.tf @@ -13,25 +13,22 @@ # limitations under the License. -variable "billing_account" { - description = "Billing account id used as default for new projects." +variable "prefix" { + description = "Unique prefix used for resource names. Not used for project if 'project_create' is null." type = string - default = null } variable "project_create" { - description = "Set to true to create projects, will use existing ones by default." - type = bool - default = false + description = "Provide values if project creation is needed, uses existing project if null. Parent is in 'folders/nnn' or 'organizations/nnn' format" + type = object({ + billing_account_id = string + parent = string + }) + default = null } -variable "kms_project_id" { - description = "Name for the new KMS Project." - type = string -} - -variable "service_project_id" { - description = "Name for the new Service Project." +variable "project_id" { + description = "Project id, references existing project if `project_create` is null." type = string } @@ -41,33 +38,8 @@ variable "region" { default = "europe-west1" } -variable "root_node" { - description = "The resource name of the parent Folder or Organization. Must be of the form folders/folder_id or organizations/org_id." - type = string - default = null -} - -# FIXME(jccb): this is not used -# variable "ssh_source_ranges" { -# description = "IP CIDR ranges that will be allowed to connect via SSH to the onprem instance." -# type = list(string) -# default = ["0.0.0.0/0"] -# } - -variable "vpc_ip_cidr_range" { - description = "Ip range used in the subnet deployef in the Service Project." +variable "vpc_subnet_range" { + description = "Ip range used for the VPC subnet created for the example." type = string default = "10.0.0.0/20" } - -variable "vpc_name" { - description = "Name of the VPC created in the Service Project." - type = string - default = "local" -} - -variable "vpc_subnet_name" { - description = "Name of the subnet created in the Service Project." - type = string - default = "subnet" -} diff --git a/modules/bigquery-dataset/main.tf b/modules/bigquery-dataset/main.tf index fc8f8706d..47f8fcb53 100644 --- a/modules/bigquery-dataset/main.tf +++ b/modules/bigquery-dataset/main.tf @@ -191,7 +191,6 @@ resource "google_bigquery_table" "default" { type = each.value.partitioning.time.type } } - } resource "google_bigquery_table" "views" {