diff --git a/FACTORIES.md b/FACTORIES.md index 35e492bc3..1a52b505c 100644 --- a/FACTORIES.md +++ b/FACTORIES.md @@ -90,8 +90,6 @@ The following table details how FAST stages implement factory patterns. | **2-security** | `projects` | Module-Backed (Factory) | `project-factory` | | **2-security** | `certificate_authorities` | Stage-Implemented (Module) | `certificate-authority-service` | | **2-security** | `keyrings` (KMS) | Stage-Implemented (Module) | `kms` | -| **3-data-platform-dev** | `aspect_types` | Module-Backed (Factory) | `dataplex-aspect-types` | -| **3-data-platform-dev** | `data_domains` | Native (Complex) | Multiple | | **3-secops-dev** | `rules`, `reference_lists` | Module-Backed (Factory) | `secops-rules` | ## Maintenance Guide diff --git a/fast/stages/0-org-setup/datasets/classic-gcd/defaults.yaml b/fast/stages/0-org-setup/datasets/classic-gcd/defaults.yaml index f8ac4c1ea..f1e401c26 100644 --- a/fast/stages/0-org-setup/datasets/classic-gcd/defaults.yaml +++ b/fast/stages/0-org-setup/datasets/classic-gcd/defaults.yaml @@ -93,11 +93,3 @@ output_files: bucket: $storage_buckets:iac-0/iac-stage-state prefix: 2-project-factory service_account: $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - 3-data-platform-dev-ro: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-ro diff --git a/fast/stages/0-org-setup/datasets/classic-gcd/projects/core/iac-0.yaml b/fast/stages/0-org-setup/datasets/classic-gcd/projects/core/iac-0.yaml index 18029513e..3fe001a5c 100644 --- a/fast/stages/0-org-setup/datasets/classic-gcd/projects/core/iac-0.yaml +++ b/fast/stages/0-org-setup/datasets/classic-gcd/projects/core/iac-0.yaml @@ -130,12 +130,6 @@ buckets: - $iam_principals:service_accounts/iac-0/iac-pf-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - iam: - roles/storage.admin: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $custom_roles:storage_viewer: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro # Terraform state bucket for FAST outputs iac-outputs: description: Terraform state for the org-level automation. @@ -143,14 +137,12 @@ buckets: iam: roles/storage.admin: - $iam_principals:service_accounts/iac-0/iac-org-rw - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $iam_principals:service_accounts/iac-0/iac-networking-rw - $iam_principals:service_accounts/iac-0/iac-security-rw - $iam_principals:service_accounts/iac-0/iac-pf-rw - $iam_principals:service_accounts/iac-0/iac-vpcsc-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-org-ro - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro - $iam_principals:service_accounts/iac-0/iac-networking-ro - $iam_principals:service_accounts/iac-0/iac-security-ro - $iam_principals:service_accounts/iac-0/iac-pf-ro @@ -196,11 +188,6 @@ service_accounts: display_name: IaC service account for project factory (read-only). iac-pf-rw: display_name: IaC service account for project factory (read-write). - # IaC service accounts for data platform (dev) stage - iac-dp-dev-ro: - display_name: IaC service account for data platform dev (read-only). - iac-dp-dev-rw: - display_name: IaC service account for data platform dev (read-write). # workload_identity_pools: # default: # display_name: Default pool for CI/CD. diff --git a/fast/stages/0-org-setup/datasets/classic/defaults.yaml b/fast/stages/0-org-setup/datasets/classic/defaults.yaml index ae1c16a48..32247348c 100644 --- a/fast/stages/0-org-setup/datasets/classic/defaults.yaml +++ b/fast/stages/0-org-setup/datasets/classic/defaults.yaml @@ -86,11 +86,3 @@ output_files: bucket: $storage_buckets:iac-0/iac-stage-state prefix: 2-project-factory service_account: $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - 3-data-platform-dev-ro: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-ro diff --git a/fast/stages/0-org-setup/datasets/classic/projects/core/iac-0.yaml b/fast/stages/0-org-setup/datasets/classic/projects/core/iac-0.yaml index f648827e0..91e49195a 100644 --- a/fast/stages/0-org-setup/datasets/classic/projects/core/iac-0.yaml +++ b/fast/stages/0-org-setup/datasets/classic/projects/core/iac-0.yaml @@ -135,12 +135,6 @@ buckets: - $iam_principals:service_accounts/iac-0/iac-pf-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - iam: - roles/storage.admin: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $custom_roles:storage_viewer: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro # Terraform state bucket for FAST outputs iac-outputs: description: Terraform state for the org-level automation. @@ -148,14 +142,12 @@ buckets: iam: roles/storage.admin: - $iam_principals:service_accounts/iac-0/iac-org-rw - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $iam_principals:service_accounts/iac-0/iac-networking-rw - $iam_principals:service_accounts/iac-0/iac-security-rw - $iam_principals:service_accounts/iac-0/iac-pf-rw - $iam_principals:service_accounts/iac-0/iac-vpcsc-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-org-ro - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro - $iam_principals:service_accounts/iac-0/iac-networking-ro - $iam_principals:service_accounts/iac-0/iac-security-ro - $iam_principals:service_accounts/iac-0/iac-pf-ro @@ -201,11 +193,6 @@ service_accounts: display_name: IaC service account for project factory (read-only). iac-pf-rw: display_name: IaC service account for project factory (read-write). - # IaC service accounts for data platform (dev) stage - iac-dp-dev-ro: - display_name: IaC service account for data platform dev (read-only). - iac-dp-dev-rw: - display_name: IaC service account for data platform dev (read-write). # workload_identity_pools: # default: # display_name: Default pool for CI/CD. diff --git a/fast/stages/0-org-setup/datasets/hardened/defaults.yaml b/fast/stages/0-org-setup/datasets/hardened/defaults.yaml index ae1c16a48..32247348c 100644 --- a/fast/stages/0-org-setup/datasets/hardened/defaults.yaml +++ b/fast/stages/0-org-setup/datasets/hardened/defaults.yaml @@ -86,11 +86,3 @@ output_files: bucket: $storage_buckets:iac-0/iac-stage-state prefix: 2-project-factory service_account: $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - 3-data-platform-dev-ro: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-ro diff --git a/fast/stages/0-org-setup/datasets/hardened/projects/core/iac-0.yaml b/fast/stages/0-org-setup/datasets/hardened/projects/core/iac-0.yaml index 5fb24b960..a0a302f62 100644 --- a/fast/stages/0-org-setup/datasets/hardened/projects/core/iac-0.yaml +++ b/fast/stages/0-org-setup/datasets/hardened/projects/core/iac-0.yaml @@ -220,12 +220,6 @@ buckets: - $iam_principals:service_accounts/iac-0/iac-pf-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - iam: - roles/storage.admin: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $custom_roles:storage_viewer: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro # Terraform state bucket for FAST outputs iac-outputs: description: Terraform state for the org-level automation. @@ -259,14 +253,12 @@ buckets: iam: roles/storage.admin: - $iam_principals:service_accounts/iac-0/iac-org-rw - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $iam_principals:service_accounts/iac-0/iac-networking-rw - $iam_principals:service_accounts/iac-0/iac-security-rw - $iam_principals:service_accounts/iac-0/iac-pf-rw - $iam_principals:service_accounts/iac-0/iac-vpcsc-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-org-ro - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro - $iam_principals:service_accounts/iac-0/iac-networking-ro - $iam_principals:service_accounts/iac-0/iac-security-ro - $iam_principals:service_accounts/iac-0/iac-pf-ro @@ -312,11 +304,6 @@ service_accounts: display_name: IaC service account for project factory (read-only). iac-pf-rw: display_name: IaC service account for project factory (read-write). - # IaC service accounts for data platform (dev) stage - iac-dp-dev-ro: - display_name: IaC service account for data platform dev (read-only). - iac-dp-dev-rw: - display_name: IaC service account for data platform dev (read-write). # workload_identity_pools: # default: # display_name: Default pool for CI/CD. diff --git a/fast/stages/2-security/datasets/classic/projects/dev-sec-core-0.yaml b/fast/stages/2-security/datasets/classic/projects/dev-sec-core-0.yaml index 82a1d77b1..75321bc22 100644 --- a/fast/stages/2-security/datasets/classic/projects/dev-sec-core-0.yaml +++ b/fast/stages/2-security/datasets/classic/projects/dev-sec-core-0.yaml @@ -28,7 +28,6 @@ iam_bindings: key_delegated: members: - $iam_principals:service_accounts/iac-0/iac-pf-rw - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw role: roles/cloudkms.admin condition: title: Delegated IAM grant on keys. diff --git a/fast/stages/3-data-platform-dev/.fast-stage.env b/fast/stages/3-data-platform-dev/.fast-stage.env deleted file mode 100644 index 470f66550..000000000 --- a/fast/stages/3-data-platform-dev/.fast-stage.env +++ /dev/null @@ -1,5 +0,0 @@ -FAST_STAGE_DESCRIPTION="Data Platform (dev)" -FAST_STAGE_LEVEL=3 -FAST_STAGE_NAME=data-platform-dev -FAST_STAGE_DEPS="0-globals 0-org-setup" -FAST_STAGE_OPTIONAL="2-networking 2-security" diff --git a/fast/stages/3-data-platform-dev/README.md b/fast/stages/3-data-platform-dev/README.md deleted file mode 100644 index 6182510c8..000000000 --- a/fast/stages/3-data-platform-dev/README.md +++ /dev/null @@ -1,376 +0,0 @@ -# Data Platform - -This stage focuses on the creation and management of an opinionated Data Platform architecture based on Google Cloud best practices. Its architecture is designed to be reliable, robust, and scalable, facilitating the continuous onboarding of new Data Products (or data workloads). - -The Data Platform's foundation, established in this stage, provides core capabilities without prescribing specific data handling, computation, or processing for individual workloads or Data Products. This allows flexibility in the technology choices for individual data domains, product and teams. The platform's approach is to encourage shared patterns, aiming to optimize, standardize, accelerate adoption, and ultimately reduce implementation costs and management overhead across Data Products. - -While our solution is conceptually guided by [Data Mesh principles on Google Cloud Platform](https://cloud.google.com/architecture/data-mesh), its adoption doesn't strictly require adherence to every Data Mesh concept. However, the 'Data as a Product' principle that treats data as a first-class citizen is fundamental to our implementation as well. For foundational elements like resource hierarchy, networking, and security, this stage can integrate with established [FAST stages](../README.md). Nevertheless, FAST is not a mandatory prerequisite; the solution can also be deployed independently, as long as all necessary dependencies are satisfied. - - -- [Design Overview and Choices](#design-overview-and-choices) - - [Data Platform Architecture](#data-platform-architecture) - - [Folder and Project Structure](#folder-and-project-structure) - - [Central Shared Services (Federated Governance)](#central-shared-services-federated-governance) - - [Data Domains (Domain-Driven Ownership)](#data-domains-domain-driven-ownership) - - [Data Products (DaaP)](#data-products-daap) - - [Teams and Personas](#teams-and-personas) - - [Central Data Platform Team](#central-data-platform-team) - - [Data Domain Team](#data-domain-team) - - [Data Product Team](#data-product-team) -- [How to run this stage](#how-to-run-this-stage) - - [FAST prerequisites](#fast-prerequisites) - - [Provider and Terraform variables](#provider-and-terraform-variables) - - [Variable Configuration](#variable-configuration) - - [CMEK Configuration](#cmek-configuration) - - [Data Domain and Product Data Files](#data-domain-and-product-data-files) - - [Context replacements](#context-replacements) -- [Files](#files) -- [Variables](#variables) -- [Outputs](#outputs) - - -## Design Overview and Choices - -### Data Platform Architecture - -The following diagram represent the high-level architecture of the Data Platform related projects and their associated resources managed by this stage: - -
-
-
-
-
gcs · iam-service-account | |
-| [data-domains-composer.tf](./data-domains-composer.tf) | None | iam-service-account | google_composer_environment |
-| [data-domains.tf](./data-domains.tf) | None | folder · iam-service-account · project | |
-| [data-products-automation.tf](./data-products-automation.tf) | Data product automation resources. | gcs · iam-service-account | |
-| [data-products-exposure.tf](./data-products-exposure.tf) | Data product exposure layer resources. | bigquery-dataset · gcs | |
-| [data-products.tf](./data-products.tf) | Data product project, service account and exposed resources. | iam-service-account · project | |
-| [factory.tf](./factory.tf) | None | | |
-| [main.tf](./main.tf) | Locals and project-level resources. | data-catalog-policy-tag · dataplex-aspect-types · project | |
-| [outputs.tf](./outputs.tf) | Stage outputs. | | google_storage_bucket_object · local_file |
-| [variables-fast.tf](./variables-fast.tf) | None | | |
-| [variables.tf](./variables.tf) | Module variables. | | |
-
-## Variables
-
-| name | description | type | required | default | producer |
-|---|---|:---:|:---:|:---:|:---:|
-| [automation](variables-fast.tf#L17) | Automation resources created by the bootstrap stage. | object({…}) | ✓ | | 0-org-setup |
-| [billing_account](variables-fast.tf#L26) | Billing account id. If billing account is not part of the same org set `is_org_level` to false. | object({…}) | ✓ | | 0-org-setup |
-| [environments](variables-fast.tf#L34) | Environment names. | object({…}) | ✓ | | 0-org-setup |
-| [prefix](variables-fast.tf#L69) | Prefix used for resources that need unique names. Use a maximum of 9 chars for organizations, and 11 chars for tenants. | string | ✓ | | 0-org-setup |
-| [aspect_types](variables.tf#L17) | Aspect templates. Merged with those defined via the factory. | map(object({…})) | | {} | |
-| [central_project_config](variables.tf#L48) | Configuration for the top-level central project. | object({…}) | | {} | |
-| [encryption_keys](variables.tf#L90) | Default encryption keys for services, in service => { region => key id } format. Overridable on a per-object basis. | object({…}) | | {} | |
-| [exposure_config](variables.tf#L101) | Data exposure configuration. | object({…}) | | {} | |
-| [factories_config](variables.tf#L119) | Configuration for the resource factories. | object({…}) | | {} | |
-| [folder_ids](variables-fast.tf#L45) | Folder name => id mappings. | map(string) | | {} | 0-org-setup |
-| [host_project_ids](variables-fast.tf#L53) | Shared VPC host project name => id mappings. | map(string) | | {} | 2-networking |
-| [kms_keys](variables-fast.tf#L61) | KMS key ids. | map(string) | | {} | 2-security |
-| [location](variables.tf#L134) | Default location used when no location is specified. | string | | "europe-west1" | |
-| [outputs_location](variables.tf#L141) | Enable writing provider, tfvars and CI/CD workflow files to local filesystem. Leave null to disable. | string | | null | |
-| [regions](variables-fast.tf#L79) | Region mappings. | map(string) | | {} | 2-networking |
-| [secure_tags](variables.tf#L147) | Resource manager tags created in the central project. | map(object({…})) | | {} | |
-| [stage_config](variables.tf#L168) | Stage configuration used to find environment and resource ids, and to generate names. | object({…}) | | {…} | |
-| [subnet_self_links](variables-fast.tf#L87) | Subnet VPC name => { name => self link } mappings. | map(map(string)) | | {} | 2-networking |
-| [tag_values](variables-fast.tf#L95) | FAST-managed resource manager tag values. | map(string) | | {} | 0-org-setup |
-| [vpc_self_links](variables-fast.tf#L103) | Shared VPC name => self link mappings. | map(string) | | {} | 2-networking |
-
-## Outputs
-
-| name | description | sensitive | consumers |
-|---|---|:---:|---|
-| [aspect_types](outputs.tf#L201) | Aspect types defined in central project. | | |
-| [central_project](outputs.tf#L206) | Central project attributes. | | |
-| [data_domains](outputs.tf#L211) | Data domain attributes. | | |
-| [policy_tags](outputs.tf#L216) | Policy tags defined in central project. | | |
-| [secure_tags](outputs.tf#L221) | Secure tags defined in central project. | | |
-
diff --git a/fast/stages/3-data-platform-dev/data-domains-automation.tf b/fast/stages/3-data-platform-dev/data-domains-automation.tf
deleted file mode 100644
index f149ed323..000000000
--- a/fast/stages/3-data-platform-dev/data-domains-automation.tf
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Copyright 2025 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-# tfdoc:file:description Data product automation resources.
-
-locals {
- dd_automation = {
- for k, v in local.data_domains :
- k => v if v.automation != null
- }
- dd_automation_keys = {
- for k, v in local.dd_automation : k => try(
- v.automation.encryption_key,
- var.encryption_keys.storage[try(
- v.automation.location,
- var.location
- )],
- null
- )
- }
-}
-
-module "dd-automation-bucket" {
- source = "../../../modules/gcs"
- for_each = local.dd_automation
- project_id = module.dd-projects[each.key].project_id
- prefix = local.prefix
- name = "${each.value.short_name}-state"
- location = try(
- each.value.automation.location,
- var.location
- )
- encryption_key = local.dd_automation_keys[each.key]
- iam = {
- "roles/storage.admin" = [
- module.dd-automation-sa["${each.key}/rw"].iam_email
- ]
- "roles/storage.objectViewer" = concat(
- [
- module.dd-automation-sa["${each.key}/ro"].iam_email
- ],
- [
- for m in each.value.automation.impersonation_principals : lookup(
- var.factories_config.context.iam_principals, m, m
- )
- ]
- )
- }
-}
-
-module "dd-automation-sa" {
- source = "../../../modules/iam-service-account"
- for_each = { for v in local.dd_automation_sa : v.key => v }
- project_id = module.dd-projects[each.value.dd].project_id
- prefix = each.value.prefix
- name = each.value.name
- description = each.value.description
- iam = {
- "roles/iam.serviceAccountTokenCreator" = [
- for m in each.value.impersonation_principals : lookup(
- var.factories_config.context.iam_principals, m, m
- )
- ]
- }
-}
diff --git a/fast/stages/3-data-platform-dev/data-domains-composer.tf b/fast/stages/3-data-platform-dev/data-domains-composer.tf
deleted file mode 100644
index 79fc675ff..000000000
--- a/fast/stages/3-data-platform-dev/data-domains-composer.tf
+++ /dev/null
@@ -1,119 +0,0 @@
-/**
- * Copyright 2025 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-locals {
- dd_composer = {
- for k, v in local.data_domains : k => merge(
- { region = var.location, short_name = v.short_name },
- try(v.deploy_config.composer, {})
- ) if(v.deploy_config.composer != null)
- }
- dd_composer_keys = {
- for k, v in local.dd_composer : k => try(
- v.encryption_key,
- var.encryption_keys.composer[v.region],
- null
- )
- }
-}
-
-module "dd-composer-sa" {
- source = "../../../modules/iam-service-account"
- for_each = local.dd_composer
- project_id = module.dd-projects[each.key].project_id
- prefix = local.prefix
- name = "${each.value.short_name}-cmp-sa"
- description = "Composer Service Account."
-}
-
-resource "google_composer_environment" "default" {
- for_each = local.dd_composer
- project = module.dd-projects-iam[each.key].project_id
- name = "${var.prefix}-${each.key}"
- region = each.value.region
- config {
- enable_private_builds_only = try(each.value.private_builds, true)
- enable_private_environment = try(each.value.private_environment, true)
- environment_size = try(
- each.value.environment_size,
- "ENVIRONMENT_SIZE_SMALL"
- )
- dynamic "encryption_config" {
- for_each = local.dd_composer_keys[each.key] == null ? [] : [""]
- content {
- kms_key_name = lookup(
- local.kms_keys,
- local.dd_composer_keys[each.key],
- local.dd_composer_keys[each.key]
- )
- }
- }
- # TODO: implement the same context fail mode used in the project factory
- node_config {
- service_account = try(
- each.value.node_config.service_account,
- module.dd-composer-sa[each.key].email
- )
- network = try(
- var.vpc_self_links[each.value.node_config.network],
- each.value.node_config.network,
- null
- )
- subnetwork = try(
- var.subnet_self_links[each.value.node_config.network][each.value.node_config.subnetwork],
- each.value.node_config.subnetwork,
- null
- )
- }
- software_config {
- image_version = "composer-3-airflow-2"
- cloud_data_lineage_integration {
- enabled = true
- }
- }
- workloads_config {
- dag_processor {
- cpu = try(each.value.workloads_config.dag_processor.cpu, 0.5)
- memory_gb = try(each.value.workloads_config.dag_processor.memory_gb, 2)
- storage_gb = try(each.value.workloads_config.dag_processor.storage_gb, 1)
- count = try(each.value.workloads_config.dag_processor.count, 1)
- }
- scheduler {
- cpu = try(each.value.workloads_config.scheduler.cpu, 0.5)
- memory_gb = try(each.value.workloads_config.scheduler.memory_gb, 2)
- storage_gb = try(each.value.workloads_config.scheduler.storage_gb, 1)
- count = try(each.value.workloads_config.scheduler.count, 1)
- }
- triggerer {
- cpu = try(each.value.workloads_config.triggerer.cpu, 0.5)
- memory_gb = try(each.value.workloads_config.triggerer.memory_gb, 2)
- count = try(each.value.workloads_config.triggerer.count, 1)
- }
- web_server {
- cpu = try(each.value.workloads_config.web_server.cpu, 0.5)
- memory_gb = try(each.value.workloads_config.web_server.memory_gb, 2)
- storage_gb = try(each.value.workloads_config.web_server.storage_gb, 1)
- }
- worker {
- cpu = try(each.value.workloads_config.worker.cpu, 0.5)
- memory_gb = try(each.value.workloads_config.worker.memory_gb, 2)
- storage_gb = try(each.value.workloads_config.worker.storage_gb, 1)
- min_count = try(each.value.workloads_config.worker.min_count, 1)
- max_count = try(each.value.workloads_config.worker.max_count, 1)
- }
- }
- }
-}
diff --git a/fast/stages/3-data-platform-dev/data-domains.tf b/fast/stages/3-data-platform-dev/data-domains.tf
deleted file mode 100644
index 4d70747f9..000000000
--- a/fast/stages/3-data-platform-dev/data-domains.tf
+++ /dev/null
@@ -1,263 +0,0 @@
-/**
- * Copyright 2025 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-locals {
- dd_services = {
- for k, v in local.data_domains : k => distinct(concat(
- v.project_config.services,
- lookup(local.dd_composer, k, null) == null ? [] : [
- "composer.googleapis.com",
- "storage.googleapis.com"
- ]
- ))
- }
-}
-
-module "dd-folders" {
- source = "../../../modules/folder"
- for_each = local.data_domains
- parent = var.folder_ids[var.stage_config.name]
- name = each.value.name
- iam = {
- for k, v in each.value.folder_config.iam : k => [
- for m in v : lookup(
- var.factories_config.context.iam_principals, m, m
- )
- ]
- }
- iam_bindings = {
- for k, v in each.value.folder_config.iam_bindings : k => merge(v, {
- members = [
- for m in v.members : lookup(
- var.factories_config.context.iam_principals, m, m
- )
- ]
- condition = try(v.condition, null) == null ? null : {
- title = v.condition.title
- description = try(v.condition.description, null)
- expression = templatestring(v.condition.expression, {
- tag_values = local.tag_values
- })
- }
- })
- }
- iam_bindings_additive = {
- for k, v in each.value.folder_config.iam_bindings_additive : k => merge(v, {
- member = lookup(
- var.factories_config.context.iam_principals, v.member, v.member
- )
- condition = try(v.condition, null) == null ? null : {
- title = v.condition.title
- description = try(v.condition.description, null)
- expression = templatestring(v.condition.expression, {
- tag_values = local.tag_values
- })
- }
- })
- }
- iam_by_principals = {
- for principal, roles_list in {
- for k, v in each.value.folder_config.iam_by_principals :
- lookup(var.factories_config.context.iam_principals, k, k) => v...
- } :
- principal => flatten(roles_list)
- }
-}
-
-module "dd-dp-folders" {
- source = "../../../modules/folder"
- for_each = local.data_domains
- parent = module.dd-folders[each.key].id
- name = "Data Products"
- iam = try(each.value.deploy_config.composer, null) == null ? {} : {
- "roles/iam.serviceAccountTokenCreator" = [
- module.dd-composer-sa[each.key].iam_email
- ]
- }
-}
-
-module "dd-projects" {
- source = "../../../modules/project"
- for_each = local.data_domains
- billing_account = var.billing_account.id
- name = "${each.value.short_name}-shared-0"
- parent = module.dd-folders[each.key].id
- prefix = local.prefix
- labels = {
- data_domain = each.key
- }
- services = local.dd_services[each.key]
- service_encryption_key_ids = merge(
- lookup(local.dd_composer, each.key, null) == null ? {} : {
- "composer.googleapis.com" = compact([
- try(local.dd_composer_keys[each.key], null) == null
- ? null
- : lookup(
- local.kms_keys,
- local.dd_composer_keys[each.key],
- local.dd_composer_keys[each.key]
- )
- ])
- },
- lookup(local.dd_automation_keys, each.key, null) == null ? {} : {
- "storage.googleapis.com" = compact([
- try(local.dd_automation_keys[each.key], null) == null
- ? null
- : lookup(
- local.kms_keys,
- local.dd_automation_keys[each.key],
- local.dd_automation_keys[each.key]
- )
- ])
- },
- )
-}
-
-module "dd-projects-iam" {
- source = "../../../modules/project"
- for_each = local.data_domains
- name = module.dd-projects[each.key].project_id
- project_reuse = {
- use_data_source = false
- attributes = {
- name = module.dd-projects[each.key].name
- number = module.dd-projects[each.key].number
- services_enabled = local.dd_services[each.key]
- }
- }
- iam = {
- for k, v in each.value.project_config.iam : k => [
- for m in v : try(
- var.factories_config.context.iam_principals[m],
- module.dd-automation-sa["${each.key}/${m}"].iam_email,
- module.dd-service-accounts["${each.key}/${m}"].iam_email,
- m
- )
- ]
- }
- iam_bindings = {
- for k, v in each.value.project_config.iam_bindings : k => merge(v, {
- members = [
- for m in v.members : try(
- var.factories_config.context.iam_principals[m],
- module.dd-automation-sa["${each.key}/${m}"].iam_email,
- module.dd-service-accounts["${each.key}/${m}"].iam_email,
- m
- )
- ]
- condition = try(v.condition, null) == null ? null : {
- title = v.condition.title
- description = try(v.condition.description, null)
- expression = templatestring(v.condition.expression, {
- tag_values = local.tag_values
- })
- }
- })
- }
- iam_bindings_additive = merge(
- {
- for k, v in each.value.project_config.iam_bindings_additive : k => merge(v, {
- member = try(
- var.factories_config.context.iam_principals[v.member],
- module.dd-automation-sa["${each.key}/${v.member}"].iam_email,
- module.dd-service-accounts["${each.key}/${v.member}"].iam_email,
- v.member
- )
- condition = try(v.condition, null) == null ? null : {
- title = v.condition.title
- description = try(v.condition.description, null)
- expression = templatestring(v.condition.expression, {
- tag_values = local.tag_values
- })
- }
- })
- },
- try(each.value.deploy_config.composer, null) == null ? {} : {
- composer_worker = {
- member = module.dd-composer-sa[each.key].iam_email
- role = "roles/composer.worker"
- }
- }
- )
- iam_by_principals = {
- for principal, roles_list in {
- for k, v in each.value.project_config.iam_by_principals :
- lookup(var.factories_config.context.iam_principals, k, k) => v...
- } :
- principal => flatten(roles_list)
- }
- shared_vpc_service_config = (
- each.value.project_config.shared_vpc_service_config == null
- ? null
- : {
- host_project = lookup(
- var.host_project_ids,
- each.value.project_config.shared_vpc_service_config.host_project,
- each.value.project_config.shared_vpc_service_config.host_project
- )
- network_users = [
- for m in try(each.value.project_config.shared_vpc_service_config.network_users, []) :
- try(
- var.factories_config.context.iam_principals[m],
- module.dd-automation-sa["${each.key}/${m}"].iam_email,
- module.dd-service-accounts["${each.key}/${m}"].iam_email,
- m
- )
- ]
- service_agent_iam = try(
- each.value.project_config.shared_vpc_service_config.service_agent_iam,
- {}
- )
- service_iam_grants = try(
- each.value.project_config.shared_vpc_service_config.service_iam_grants,
- []
- )
- }
- )
-}
-
-module "dd-service-accounts" {
- source = "../../../modules/iam-service-account"
- for_each = { for v in local.dd_service_accounts : v.key => v }
- project_id = module.dd-projects[each.value.dd].project_id
- prefix = local.prefix
- name = each.value.name
- description = each.value.description
- iam = {
- for k, v in each.value.iam : k => [
- for m in v : lookup(
- var.factories_config.context.iam_principals, m, m
- )
- ]
- }
- iam_bindings = {
- for k, v in each.value.iam_bindings : k => merge(v, {
- members = [
- for m in v.members : lookup(
- var.factories_config.context.iam_principals, m, m
- )
- ]
- })
- }
- iam_bindings_additive = {
- for k, v in each.value.iam_bindings_additive : k => merge(v, {
- member = lookup(
- var.factories_config.context.iam_principals, v.member, v.member
- )
- })
- }
- iam_storage_roles = each.value.iam_storage_roles
-}
diff --git a/fast/stages/3-data-platform-dev/data-products-automation.tf b/fast/stages/3-data-platform-dev/data-products-automation.tf
deleted file mode 100644
index eadf61c18..000000000
--- a/fast/stages/3-data-platform-dev/data-products-automation.tf
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Copyright 2025 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-# tfdoc:file:description Data product automation resources.
-
-locals {
- dp_automation_products = {
- for k, v in local.data_products :
- k => v if v.automation != null
- }
- dp_automation_products_keys = {
- for k, v in local.dp_automation_products : k => try(
- v.automation.encryption_key,
- var.encryption_keys.storage[try(
- v.automation.location,
- var.location
- )],
- null
- )
- }
-}
-
-module "dp-automation-bucket" {
- source = "../../../modules/gcs"
- for_each = local.dp_automation_products
- project_id = module.dd-projects[each.value.dd].project_id
- prefix = local.prefix
- name = "${each.value.short_name}-state"
- location = try(
- each.value.automation.location,
- var.location
- )
- encryption_key = local.dp_automation_products_keys[each.key]
- iam = {
- "roles/storage.admin" = [
- module.dp-automation-sa["${each.key}/rw"].iam_email
- ]
- "roles/storage.objectViewer" = concat(
- [
- module.dp-automation-sa["${each.key}/ro"].iam_email
- ],
- [
- for m in each.value.automation.impersonation_principals : lookup(
- var.factories_config.context.iam_principals, m, m
- )
- ]
- )
- }
-}
-
-module "dp-automation-sa" {
- source = "../../../modules/iam-service-account"
- for_each = { for v in local.dp_automation_sa : v.key => v }
- project_id = module.dp-projects[each.value.dp].project_id
- prefix = each.value.prefix
- name = each.value.name
- description = each.value.description
- iam = {
- "roles/iam.serviceAccountTokenCreator" = [
- for m in each.value.impersonation_principals : lookup(
- var.factories_config.context.iam_principals, m, m
- )
- ]
- }
-}
diff --git a/fast/stages/3-data-platform-dev/data-products-exposure.tf b/fast/stages/3-data-platform-dev/data-products-exposure.tf
deleted file mode 100644
index f5c2e0956..000000000
--- a/fast/stages/3-data-platform-dev/data-products-exposure.tf
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Copyright 2025 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-# tfdoc:file:description Data product exposure layer resources.
-
-module "dp-buckets" {
- source = "../../../modules/gcs"
- for_each = {
- for v in local.dp_buckets : "${v.dp}/${v.key}" => v
- }
- project_id = module.dp-projects[each.value.dp].project_id
- prefix = local.prefix
- name = "${each.value.dps}-${each.value.short_name}-0"
- location = each.value.location
- encryption_key = (
- local.dp_bucket_keys[each.key] == null
- ? null
- : lookup(
- local.kms_keys,
- local.dp_bucket_keys[each.key],
- local.dp_bucket_keys[each.key]
- )
- )
- iam = {
- for k, v in each.value.iam : k => [
- for m in v : try(
- var.factories_config.context.iam_principals[m],
- module.dp-automation-sa["${each.key}/${m}"].iam_email,
- module.dp-service-accounts["${each.key}/${m}"].iam_email,
- m
- )
- ]
- }
- tag_bindings = {
- exposure = (
- module.central-project.tag_values[var.exposure_config.tag_name].id
- )
- }
-}
-
-module "dp-datasets" {
- source = "../../../modules/bigquery-dataset"
- for_each = {
- for v in local.dp_datasets : "${v.dp}/${v.key}" => v
- }
- project_id = module.dp-projects[each.value.dp].project_id
- id = "${local.prefix_bq}_${each.value.dps}_${each.value.short_name}_0"
- location = each.value.location
- encryption_key = (
- local.dp_dataset_keys[each.key] == null
- ? null
- : lookup(
- local.kms_keys,
- local.dp_dataset_keys[each.key],
- local.dp_dataset_keys[each.key]
- )
- )
- iam = {
- for k, v in each.value.iam : k => [
- for m in v : try(
- var.factories_config.context.iam_principals[m],
- module.dp-automation-sa["${each.key}/${m}"].iam_email,
- module.dp-service-accounts["${each.key}/${m}"].iam_email,
- m
- )
- ]
- }
- tag_bindings = {
- exposure = (
- module.central-project.tag_values[var.exposure_config.tag_name].id
- )
- }
-}
diff --git a/fast/stages/3-data-platform-dev/data-products.tf b/fast/stages/3-data-platform-dev/data-products.tf
deleted file mode 100644
index 1a2ce5c76..000000000
--- a/fast/stages/3-data-platform-dev/data-products.tf
+++ /dev/null
@@ -1,172 +0,0 @@
-/**
- * Copyright 2025 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-# tfdoc:file:description Data product project, service account and exposed resources.
-
-module "dp-projects" {
- source = "../../../modules/project"
- for_each = local.data_products
- billing_account = var.billing_account.id
- name = "${each.value.dds}-${each.value.short_name}-0"
- parent = module.dd-dp-folders[each.value.dd].id
- prefix = local.prefix
- labels = {
- data_domain = each.value.dd
- data_product = replace(each.key, "/", "_")
- }
- services = each.value.services
- service_encryption_key_ids = {
- "bigquery.googleapis.com" = distinct([
- for k, v in local.dp_dataset_keys :
- lookup(local.kms_keys, v, v)
- if startswith(k, each.key) && v != null
- ])
- "storage.googleapis.com" = distinct([
- for k, v in local.dp_bucket_keys :
- lookup(local.kms_keys, v, v)
- if startswith(k, each.key) && v != null
- ])
- }
-}
-
-module "dp-projects-iam" {
- source = "../../../modules/project"
- for_each = local.data_products
- name = module.dp-projects[each.key].project_id
- project_reuse = {
- use_data_source = false
- attributes = {
- name = module.dp-projects[each.key].name
- number = module.dp-projects[each.key].number
- services_enabled = each.value.services
- }
- }
- iam = {
- for k, v in each.value.iam : k => [
- for m in v : try(
- var.factories_config.context.iam_principals[m],
- module.dp-automation-sa["${each.key}/${m}"].iam_email,
- module.dp-service-accounts["${each.key}/${m}"].iam_email,
- m
- )
- ]
- }
- iam_bindings = {
- for k, v in each.value.iam_bindings : k => merge(v, {
- members = [
- for m in v.members : try(
- var.factories_config.context.iam_principals[m],
- module.dp-automation-sa["${each.key}/${m}"].iam_email,
- module.dp-service-accounts["${each.key}/${m}"].iam_email,
- m
- )
- ]
- condition = try(v.condition, null) == null ? null : {
- title = v.condition.title
- description = try(v.condition.description, null)
- expression = templatestring(v.condition.expression, {
- tag_values = local.tag_values
- })
- }
- })
- }
- iam_bindings_additive = {
- for k, v in each.value.iam_bindings_additive : k => merge(v, {
- member = try(
- var.factories_config.context.iam_principals[v.member],
- module.dp-automation-sa["${each.key}/${v.member}"].iam_email,
- module.dp-service-accounts["${each.key}/${v.member}"].iam_email,
- v.member
- )
- condition = try(v.condition, null) == null ? null : {
- title = v.condition.title
- description = try(v.condition.description, null)
- expression = templatestring(v.condition.expression, {
- tag_values = local.tag_values
- })
- }
- })
- }
- iam_by_principals = {
- for k, v in each.value.iam_by_principals : try(
- var.factories_config.context.iam_principals[k],
- module.dp-automation-sa["${each.key}/${k}"].iam_email,
- module.dp-service-accounts["${each.key}/${k}"].iam_email,
- k
- ) => v
- }
- shared_vpc_service_config = (
- each.value.shared_vpc_service_config == null
- ? null
- : {
- host_project = lookup(
- var.host_project_ids,
- each.value.shared_vpc_service_config.host_project,
- each.value.shared_vpc_service_config.host_project
- )
- network_users = [
- for m in try(each.value.shared_vpc_service_config.network_users, []) :
- try(
- var.factories_config.context.iam_principals[m],
- module.dp-automation-sa["${each.key}/${m}"].iam_email,
- module.dp-service-accounts["${each.key}/${m}"].iam_email,
- m
- )
- ]
- service_agent_iam = try(
- each.value.shared_vpc_service_config.service_agent_iam,
- {}
- )
- service_iam_grants = try(
- each.value.shared_vpc_service_config.service_iam_grants,
- []
- )
- }
- )
-}
-
-module "dp-service-accounts" {
- source = "../../../modules/iam-service-account"
- for_each = { for v in local.dp_service_accounts : v.key => v }
- project_id = module.dp-projects[each.value.dp].project_id
- prefix = each.value.prefix
- name = each.value.name
- description = each.value.description
- iam = {
- for k, v in each.value.iam : k => [
- for m in v : lookup(
- var.factories_config.context.iam_principals, m, m
- )
- ]
- }
- iam_bindings = {
- for k, v in each.value.iam_bindings : k => merge(v, {
- members = [
- for m in v.members : lookup(
- var.factories_config.context.iam_principals, m, m
- )
- ]
- })
- }
- iam_bindings_additive = {
- for k, v in each.value.iam_bindings_additive : k => merge(v, {
- member = lookup(
- var.factories_config.context.iam_principals, v.member, v.member
- )
- })
- }
- iam_storage_roles = each.value.iam_storage_roles
-}
diff --git a/fast/stages/3-data-platform-dev/data/aspect-types/test-0.yaml b/fast/stages/3-data-platform-dev/data/aspect-types/test-0.yaml
deleted file mode 100644
index d81db5678..000000000
--- a/fast/stages/3-data-platform-dev/data/aspect-types/test-0.yaml
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright 2025 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# yaml-language-server: $schema=../../schemas/aspect-type.schema.json
-
-display_name: "Basic template"
-metadata_template: |
- {
- "name": "tf-basic-template",
- "type": "record",
- "recordFields": [
- {
- "name": "source",
- "type": "string",
- "annotations": {
- "displayName": "Source",
- "description": "Specifies the source of data."
- },
- "index": 1,
- "constraints": {
- "required": true
- }
- },
- {
- "name": "owner",
- "type": "string",
- "annotations": {
- "displayName": "Owner",
- "description": "Specifies the data owner."
- },
- "index": 2,
- "constraints": {}
- }
- ]
- }
diff --git a/fast/stages/3-data-platform-dev/data/data-domains/domain-0/_config.yaml b/fast/stages/3-data-platform-dev/data/data-domains/domain-0/_config.yaml
deleted file mode 100644
index df5a0581b..000000000
--- a/fast/stages/3-data-platform-dev/data/data-domains/domain-0/_config.yaml
+++ /dev/null
@@ -1,90 +0,0 @@
-# Copyright 2025 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# yaml-language-server: $schema=../../../schemas/data-domain.schema.json
-
-name: Domain 0
-short_name: d0
-
-automation:
- impersonation_principals:
- - dp-product-a-0
-
-deploy_config:
- composer:
- {}
- # Uncomment for VPC Network Connectivity
- # region defaults to var.location
- # node_config:
- # network: dev-net-spoke-0
- # subnetwork: europe-west1/dev-dataplatform
-
-project_config:
- iam:
- roles/owner:
- - rw
- roles/viewer:
- - ro
- roles/composer.environmentAndStorageObjectAdmin:
- - dp-product-a-0
- iam_by_principals:
- dp-platform:
- - roles/composer.environmentAndStorageObjectUser
- - roles/monitoring.viewer
- - roles/logging.viewer
- dp-product-a-0:
- - roles/composer.environmentAndStorageObjectAdmin
- - roles/monitoring.viewer
- - roles/logging.viewer
- dp-domain-a:
- - roles/composer.environmentAndStorageObjectAdmin
- - roles/monitoring.viewer
- - roles/logging.viewer
-
- services:
- - composer.googleapis.com
- - datacatalog.googleapis.com
- - dataplex.googleapis.com
- - datalineage.googleapis.com
- # Uncomment for shared VPC Network configuration
- # shared_vpc_service_config:
- # host_project: dev-net-spoke-0
- # service_agent_iam:
- # roles/composer.sharedVpcAgent:
- # - composer
-
-folder_config:
- iam_bindings:
- bigquery_metadata_viewer:
- members:
- - dp-platform
- - dp-domain-a
- - dp-product-a-0
- - data-consumer-bi
- role: roles/bigquery.metadataViewer
- condition:
- title: exposure
- description: Expose via secure tag.
- expression: resource.matchTag('exposure', 'allow')
- dataplex_catalog_viewer:
- members:
- - dp-platform
- - dp-domain-a
- - dp-product-a-0
- - data-consumer-bi
- role: roles/dataplex.catalogViewer
- condition:
- title: exposure
- description: Expose via secure tag.
- expression: resource.matchTag('exposure', 'allow')
diff --git a/fast/stages/3-data-platform-dev/data/data-domains/domain-0/product-0.yaml b/fast/stages/3-data-platform-dev/data/data-domains/domain-0/product-0.yaml
deleted file mode 100644
index 94dcc436f..000000000
--- a/fast/stages/3-data-platform-dev/data/data-domains/domain-0/product-0.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-# Copyright 2025 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# yaml-language-server: $schema=../../../schemas/data-product.schema.json
-
-short_name: p0
-
-services:
- - bigquery.googleapis.com
- - cloudaicompanion.googleapis.com
- - cloudresourcemanager.googleapis.com
- - composer.googleapis.com
- - datacatalog.googleapis.com
- - dataplex.googleapis.com
- - datalineage.googleapis.com
- - storage.googleapis.com
-
-automation:
- impersonation_principals:
- - dp-product-a-0
-
-exposure_layer:
- bigquery:
- datasets:
- exposure: {}
- iam:
- "roles/bigquery.dataViewer":
- - data-consumer-bi
- storage:
- buckets:
- exposed: {}
- iam:
- "roles/storage.objectViewer":
- - data-consumer-bi
- "roles/storage.bucketViewer":
- - data-consumer-bi
-
-iam_by_principals:
- rw:
- - roles/editor
- ro:
- - roles/viewer
- dp-product-a-0:
- - "roles/dataplex.catalogEditor"
- - "roles/bigquery.dataOwner"
- - "roles/bigquery.jobUser"
- - "roles/datalineage.viewer"
- - "roles/dataplex.dataScanCreator"
- - "roles/logging.viewer"
- - "roles/monitoring.viewer"
- - "roles/serviceusage.serviceUsageViewer"
- - "roles/storage.bucketViewer"
- - "roles/storage.objectAdmin"
- processing:
- - "roles/bigquery.dataEditor"
- - "roles/bigquery.jobUser"
- - "roles/dataflow.admin"
- - "roles/dataproc.editor"
- - "roles/dataproc.worker"
- - "roles/iam.serviceAccountUser"
- - "roles/storage.bucketViewer"
- - "roles/storage.objectAdmin"
-
-service_accounts:
- processing:
- description: Processing service account.
diff --git a/fast/stages/3-data-platform-dev/demo/.gitignore b/fast/stages/3-data-platform-dev/demo/.gitignore
deleted file mode 100644
index c6077b91e..000000000
--- a/fast/stages/3-data-platform-dev/demo/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-composer/variables.json
diff --git a/fast/stages/3-data-platform-dev/demo/README.md b/fast/stages/3-data-platform-dev/demo/README.md
deleted file mode 100644
index 297f6f785..000000000
--- a/fast/stages/3-data-platform-dev/demo/README.md
+++ /dev/null
@@ -1,190 +0,0 @@
-# Data Product Reference Example
-
-This folder contains a reference implementation of a Data Product showcasing the complete lifecycle from raw data ingestion to curated analytics-ready datasets. The example demonstrates how to create Data Products within the [Data Platform stage](../README.md) of Fabric FAST. It utilizes the automation service account and shared services created by the Data Platform stage.
-
-Our example consists of a batch ELT pipeline that processes and joins individual CSV data files from Cloud Storage to BigQuery using the publicly available theLook eCommerce dataset:
-
-## Components
-
-This reference implementation includes:
-
-- **Infrastructure as Code**: Terraform modules for deploying GCP resources
-- **Data Schemas**: BigQuery table schemas in JSON format for structured data validation
-- **Orchestration**: Cloud Composer (Apache Airflow) DAGs for automated pipeline execution
-- **Sample Data**: Utility script to download theLook eCommerce reference tables
-
-## Getting Started
-
-### Prerequisites
-
-- Google Cloud SDK installed and configured
-- Terraform >= 1.9.0
-- `jq` command-line JSON processor
-- Access to the automation service account from the previous stage
-
-Ensure that you are authenticated with the `gcloud` CLI using the user that has the relevant access to
-both the Domain Shared Resources as well as the Data Product GCP projects:
-
-```bash
-gcloud auth login
-gcloud auth application-default login
-```
-
-### 1. Infrastructure Setup
-
-**1. Configure Terraform Variables**
-
- ```bash
- cp terraform.tfvars.sample terraform.tfvars
- # Edit terraform.tfvars with your specific values
- ```
-
-**2. Deploy Infrastructure**
-
- ```bash
- terraform init
- terraform apply
- ```
-
-### 2. Data Pipeline Setup
-
-**1. Set Environment Variables**
-
- ```bash
- export LANDING_BUCKET=$(terraform output -raw landing_gcs_bucket)
- export COMPOSER_PROJECT_ID=$(terraform output -raw composer_project_id)
- export COMPOSER_ENV_NAME=$(terraform output -raw composer_environment_name)
- export LOCATION=$(terraform output -raw location)
- ```
-
-**2. Deploy Data Schemas**
-
- ```bash
- gcloud storage cp -r data/schemas/* gs://$LANDING_BUCKET/schemas
- ```
-
-**3. Source Sample Data**
-
- ```bash
- ./data/get_thelook_data.sh gs://$LANDING_BUCKET
- ```
-
-**4. Configure Composer Environment**
-
- Update Composer environment variables from `composer/variables.json`:
-
- > **Note**: This step may take several minutes to complete.
-
- ```bash
- # Copy Airflow JSON variable file into Composer data folder
- gcloud composer environments storage data import \
- --project $COMPOSER_PROJECT_ID \
- --environment=$COMPOSER_ENV_NAME \
- --location $LOCATION \
- --source="composer/variables.json"
-
- # Import Airflow variables
- gcloud composer environments run $COMPOSER_ENV_NAME \
- --project $COMPOSER_PROJECT_ID \
- --location $LOCATION \
- variables \
- -- import /home/airflow/gcs/data/variables.json
- ```
-
-**5. Deploy Airflow DAGs**
-
- ```bash
- gcloud composer environments storage dags import \
- --project=$COMPOSER_PROJECT_ID \
- --environment=$COMPOSER_ENV_NAME \
- --location=$LOCATION \
- --source="composer/DAG-dp0"
- ```
-
- > **Note**: It may take several minutes for the DAGs to be parsed and become available in Composer.
-
-### 3. Pipeline Execution
-
-**1. Verify DAG Import**
-
- Navigate to the Composer UI in the Domain Shared Resources project and confirm that the DAGs have been successfully imported.
-
-**2. Execute Pipeline**
-
- Trigger the DAGs in the following sequence (wait for each to complete):
-
- 1. **`gcs2bq_table_create`** - Creates BigQuery tables with proper schemas
- 2. **`gcs2bq_table_elt`** - Executes the ELT pipeline to process data
-
-## Architecture Overview
-
-The data product implements a three-tier architecture:
-
-
-
-
bigquery-dataset · gcs | |
-| [outputs.tf](./outputs.tf) | Module outputs. | | local_file |
-| [variables.tf](./variables.tf) | Module variables. | | |
-
-## Variables
-
-| name | description | type | required | default | producer |
-|---|---|:---:|:---:|:---:|:---:|
-| [authorized_dataset_on_curated](variables.tf#L16) | Authorized Dataset. | string | ✓ | | |
-| [composer_config](variables.tf#L21) | Composer environment configuration. | object({…}) | ✓ | | |
-| [dp_processing_service_account](variables.tf#L30) | Service account for data processing via Composer impersonation. | string | ✓ | | |
-| [impersonate_service_account](variables.tf#L47) | Service account to impersonate for Google Cloud providers. | string | ✓ | | |
-| [prefix](variables.tf#L60) | Prefix used for resources that need unique names. Use a maximum of 9 chars for organizations, and 11 chars for tenants. | string | ✓ | | |
-| [project_id](variables.tf#L69) | Project ID to deploy resources. | string | ✓ | | |
-| [encryption_keys](variables.tf#L36) | Default encryption keys for services, in service => { region => key id } format. Overridable on a per-object basis. | object({…}) | | {} | |
-| [location](variables.tf#L53) | Default location used when no location is specified. | string | | "europe-west8" | |
-
-## Outputs
-
-| name | description | sensitive | consumers |
-|---|---|:---:|---|
-| [composer_environment_name](outputs.tf#L17) | The name of the Composer environment. | | |
-| [composer_project_id](outputs.tf#L22) | The project ID where the Composer environment is located. | | |
-| [dp_processing_service_account](outputs.tf#L27) | Service account for data processing. | | |
-| [landing_gcs_bucket](outputs.tf#L32) | The name of the landing GCS bucket. | | |
-| [location](outputs.tf#L37) | The location/region used for resources. | | |
-
diff --git a/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_elt.py b/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_elt.py
deleted file mode 100644
index c80663ace..000000000
--- a/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_elt.py
+++ /dev/null
@@ -1,308 +0,0 @@
-# Copyright 2025 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-BigQuery ELT Pipeline DAG
-
-This DAG implements a comprehensive customer purchases ELT pipeline that:
-1. Loads data from GCS to BigQuery landing tables (users, orders, order_items, products)
-2. Performs a 4-table join to create a comprehensive customer_purchases table
-3. Creates an exposure view for analytics consumption
-
-Dependencies: Requires gcs2bq_table_create DAG to complete first
-"""
-
-import datetime
-import logging
-import os
-
-from airflow import models
-from airflow.decorators import task
-from airflow.models import Variable
-from airflow.operators import empty
-from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook
-from airflow.providers.google.cloud.operators.bigquery import (
- BigQueryInsertJobOperator,)
-from airflow.providers.google.cloud.sensors.bigquery import (
- BigQueryTableExistenceSensor,)
-from airflow.providers.google.cloud.transfers.gcs_to_bigquery import (
- GCSToBigQueryOperator,)
-from airflow.utils.task_group import TaskGroup
-
-# Configuration
-LANDING_TABLES = ["users", "orders", "order_items", "products"]
-
-# Environment variables (set from composer/variables.json)
-DP_PROJECT = Variable.get("DP_PROJECT")
-LAND_BQ_DATASET = Variable.get("LAND_BQ_DATASET")
-CURATED_BQ_DATASET = Variable.get("CURATED_BQ_DATASET")
-LAND_GCS = Variable.get("LAND_GCS")
-DP_PROCESSING_SERVICE_ACCOUNT = Variable.get("DP_PROCESSING_SERVICE_ACCOUNT")
-LOCATION = Variable.get("LOCATION")
-
-# Validate required environment variables
-required_vars = {
- "DP_PROJECT": DP_PROJECT,
- "LAND_BQ_DATASET": LAND_BQ_DATASET,
- "CURATED_BQ_DATASET": CURATED_BQ_DATASET,
- "LAND_GCS": LAND_GCS,
- "DP_PROCESSING_SERVICE_ACCOUNT": DP_PROCESSING_SERVICE_ACCOUNT,
- "LOCATION": LOCATION,
-}
-
-missing_vars = [var for var, value in required_vars.items() if not value]
-if missing_vars:
- raise ValueError(f"Missing required environment variables: {missing_vars}")
-
-logger = logging.getLogger(__name__)
-
-
-def create_gcs_to_bq_task(table_name: str) -> GCSToBigQueryOperator:
- """
- Factory function to create GCS to BigQuery load tasks.
-
- Args:
- table_name: Name of the table to load
-
- Returns:
- GCSToBigQueryOperator instance
- """
- return GCSToBigQueryOperator(
- task_id=f"{table_name}_load",
- bucket=LAND_GCS,
- source_objects=f"data/{table_name}/{table_name}_*.csv",
- destination_project_dataset_table=
- f"{DP_PROJECT}.{LAND_BQ_DATASET}.{table_name}",
- source_format="CSV",
- create_disposition="CREATE_IF_NEEDED",
- write_disposition="WRITE_TRUNCATE",
- schema_object=f"schemas/landing/{table_name}.json",
- schema_object_bucket=LAND_GCS,
- autodetect=False,
- max_bad_records=1,
- project_id=DP_PROJECT,
- impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT],
- )
-
-
-def create_table_validation_task(
- table_name: str, dataset_name: str,
- task_prefix: str = "validate") -> BigQueryTableExistenceSensor:
- """
- Factory function to create table validation tasks using sensor.
-
- Args:
- table_name: Name of the table to validate
- dataset_name: Name of the dataset
- task_prefix: Prefix for task ID
-
- Returns:
- BigQueryTableExistenceSensor instance
- """
- return BigQueryTableExistenceSensor(
- task_id=f"{task_prefix}_{table_name}_exists",
- project_id=DP_PROJECT,
- dataset_id=dataset_name,
- table_id=table_name,
- poke_interval=30, # Check every 30 seconds
- timeout=600, # Timeout after 10 minutes
- mode="reschedule", # Release worker slot between checks
- impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT],
- )
-
-
-# DAG Definition
-yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
-
-default_args = {
- "owner": "data-platform-team",
- "start_date": yesterday,
- "depends_on_past": False,
- "email": Variable.get("alert_email_list", default_var="").split(","),
- "email_on_failure": True,
- "email_on_retry": False,
- "retries": 2,
- "retry_delay": datetime.timedelta(minutes=5),
- "sla": datetime.timedelta(hours=2),
-}
-
-with models.DAG(
- "gcs2bq_elt",
- default_args=default_args,
- schedule_interval=None,
- catchup=False,
- max_active_runs=1,
- tags=["bigquery", "elt", "data-platform", "customer-purchases"],
- doc_md=__doc__,
- dagrun_timeout=datetime.timedelta(hours=3),
-) as dag:
- # Start and end markers
- start = empty.EmptyOperator(task_id="start", trigger_rule="all_success")
- end = empty.EmptyOperator(task_id="end", trigger_rule="all_done")
-
- # Validate that all required tables exist before starting data load
- with TaskGroup(
- "validate_prerequisites",
- tooltip="Validate all landing tables exist before data load",
- ) as prerequisites_group:
- prerequisite_validations = [
- create_table_validation_task(
- table_name=table,
- dataset_name=LAND_BQ_DATASET,
- task_prefix="validate_landing",
- ) for table in LANDING_TABLES
- ]
- # Validate that the curated customer_purchases table exists from a previous run
- validate_customer_purchases_prereq = create_table_validation_task(
- table_name="customer_purchases",
- dataset_name=CURATED_BQ_DATASET,
- task_prefix="validate_curated",
- )
-
- # Load data from GCS to BigQuery landing tables
- with TaskGroup("load_landing_data",
- tooltip="Load all data files to landing tables") as load_group:
- load_tasks = [
- create_gcs_to_bq_task(table_name=table) for table in LANDING_TABLES
- ]
-
- # Create comprehensive customer purchases join
- customer_purchases_join = BigQueryInsertJobOperator(
- task_id="create_customer_purchases",
- project_id=DP_PROJECT,
- configuration={
- "jobType": "QUERY",
- "query": {
- "query":
- f"""
- SELECT
- -- User information
- u.id as user_id,
- u.first_name,
- u.last_name,
- u.email,
- u.age,
- u.gender,
- u.state,
- u.street_address,
- u.postal_code,
- u.city,
- u.country,
- u.latitude,
- u.longitude,
- u.traffic_source,
- u.created_at as user_created_at,
- u.user_geom,
-
- -- Order information
- o.order_id,
- o.status as order_status,
- o.created_at as order_created_at,
- o.returned_at as order_returned_at,
- o.shipped_at as order_shipped_at,
- o.delivered_at as order_delivered_at,
- o.num_of_item,
-
- -- Order item information
- oi.id as order_item_id,
- oi.product_id,
- oi.inventory_item_id,
- oi.status as order_item_status,
- oi.sale_price,
- oi.created_at as order_item_created_at,
- oi.shipped_at as order_item_shipped_at,
- oi.delivered_at as order_item_delivered_at,
- oi.returned_at as order_item_returned_at,
-
- -- Product information
- p.cost,
- p.category,
- p.name,
- p.brand,
- p.retail_price,
- p.department,
- p.sku,
- p.distribution_center_id
-
- FROM `{DP_PROJECT}.{LAND_BQ_DATASET}.users` u
- JOIN `{DP_PROJECT}.{LAND_BQ_DATASET}.orders` o
- ON u.id = o.user_id
- JOIN `{DP_PROJECT}.{LAND_BQ_DATASET}.order_items` oi
- ON o.order_id = oi.order_id
- JOIN `{DP_PROJECT}.{LAND_BQ_DATASET}.products` p
- ON oi.product_id = p.id
- """,
- "destinationTable": {
- "projectId": DP_PROJECT,
- "datasetId": CURATED_BQ_DATASET,
- "tableId": "customer_purchases",
- },
- "writeDisposition":
- "WRITE_TRUNCATE",
- "useLegacySql":
- False,
- },
- },
- impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT],
- )
-
- @task(task_id="validate_customer_purchases_data")
- def validate_customer_purchases_data_python():
- """
- Checks if the customer_purchases table has data using BigQueryHook
- for robust cross-project execution.
- """
- project_id = DP_PROJECT
- dataset_id = CURATED_BQ_DATASET
- table_id = "customer_purchases"
- impersonation_account = DP_PROCESSING_SERVICE_ACCOUNT
-
- logging.info(
- f"Executing data validation check on table: {project_id}.{dataset_id}.{table_id}"
- )
-
- # The hook will use the impersonation chain for all interactions
- hook = BigQueryHook(
- gcp_conn_id="google_cloud_default", # Assumes default connection
- impersonation_chain=[impersonation_account],
- location=LOCATION,
- )
-
- sql = f"SELECT COUNT(*) FROM `{project_id}.{dataset_id}.{table_id}`"
-
- # Use insert_job for cross-project execution with explicit project_id
- job_config = {"query": {"query": sql, "useLegacySql": False}}
-
- job = hook.insert_job(configuration=job_config, project_id=project_id)
-
- # Extract results from the completed job
- results = job.result()
- records = [list(row) for row in results]
-
- if not records or not records[0] or records[0][0] == 0:
- raise ValueError(
- f"Data quality check failed: Table {project_id}.{dataset_id}.{table_id} is empty or has no rows."
- )
- else:
- row_count = records[0][0]
- logging.info(
- f"Data quality check passed: Table {project_id}.{dataset_id}.{table_id} contains {row_count} rows."
- )
-
- validate_customer_purchases_data = validate_customer_purchases_data_python()
-
- # Define dependencies
- start >> prerequisites_group
- prerequisites_group >> load_group
- load_group >> customer_purchases_join
- customer_purchases_join >> validate_customer_purchases_data >> end
diff --git a/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_table_create.py b/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_table_create.py
deleted file mode 100644
index 1d0aacf60..000000000
--- a/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_table_create.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# Copyright 2025 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-BigQuery Table Creation DAG
-
-This DAG creates BigQuery tables based on configuration stored in GCS.
-It creates landing tables, curated tables, and an exposure view.
-"""
-
-import datetime
-import logging
-import os
-
-from airflow import models
-from airflow.models import Variable
-from airflow.operators import empty
-from airflow.providers.google.cloud.operators.bigquery import (
- BigQueryCreateTableOperator,)
-from airflow.providers.google.cloud.sensors.bigquery import (
- BigQueryTableExistenceSensor,)
-from airflow.utils.task_group import TaskGroup
-
-# Configuration
-LANDING_TABLES = ["users", "orders", "order_items", "products"]
-CURATED_TABLES = ["customer_purchases"]
-
-# Environment variables (set from Composer variables.json)
-DP_PROJECT = Variable.get("DP_PROJECT")
-LAND_BQ_DATASET = Variable.get("LAND_BQ_DATASET")
-CURATED_BQ_DATASET = Variable.get("CURATED_BQ_DATASET")
-EXPOSURE_BQ_DATASET = Variable.get("EXPOSURE_BQ_DATASET")
-LAND_GCS = Variable.get("LAND_GCS")
-DP_PROCESSING_SERVICE_ACCOUNT = Variable.get("DP_PROCESSING_SERVICE_ACCOUNT")
-
-# Validate required environment variables
-required_vars = {
- "DP_PROJECT": DP_PROJECT,
- "LAND_BQ_DATASET": LAND_BQ_DATASET,
- "CURATED_BQ_DATASET": CURATED_BQ_DATASET,
- "EXPOSURE_BQ_DATASET": EXPOSURE_BQ_DATASET,
- "LAND_GCS": LAND_GCS,
- "DP_PROCESSING_SERVICE_ACCOUNT": DP_PROCESSING_SERVICE_ACCOUNT,
-}
-
-missing_vars = [var for var, value in required_vars.items() if not value]
-if missing_vars:
- raise ValueError(f"Missing required environment variables: {missing_vars}")
-
-logger = logging.getLogger(__name__)
-
-
-def create_bq_table_task(table_name: str, dataset_name: str, schema_path: str,
- task_prefix: str = "") -> BigQueryCreateTableOperator:
- """
- Factory function to create BigQuery table tasks.
-
- Args:
- table_name: Name of the table to create
- dataset_name: Name of the dataset
- schema_path: Path to schema files in GCS
- task_prefix: Prefix for task ID
-
- Returns:
- BigQueryCreateTableOperator instance
- """
- task_id = (f"{task_prefix}_{table_name}_create"
- if task_prefix else f"{table_name}_create")
-
- return BigQueryCreateTableOperator(
- task_id=task_id,
- project_id=DP_PROJECT,
- dataset_id=dataset_name,
- table_id=table_name,
- table_resource={},
- if_exists="log",
- gcs_schema_object=f"gs://{LAND_GCS}/{schema_path}/{table_name}.json",
- impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT],
- )
-
-
-def create_table_validation_task(
- table_name: str, dataset_name: str,
- task_prefix: str = "validate") -> BigQueryTableExistenceSensor:
- """
- Factory function to create table validation tasks using sensor.
-
- Args:
- table_name: Name of the table to validate
- dataset_name: Name of the dataset
- task_prefix: Prefix for task ID
-
- Returns:
- BigQueryTableExistenceSensor instance
- """
- return BigQueryTableExistenceSensor(
- task_id=f"{task_prefix}_{table_name}_exists",
- project_id=DP_PROJECT,
- dataset_id=dataset_name,
- table_id=table_name,
- poke_interval=30, # Check every 30 seconds
- timeout=600, # Timeout after 10 minutes
- mode="reschedule", # Release worker slot between checks
- impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT],
- )
-
-
-# DAG Definition
-yesterday = datetime.datetime.now() - datetime.timedelta(days=1)
-
-default_args = {
- "owner": "data-platform-team",
- "start_date": yesterday,
- "depends_on_past": False,
- "email": Variable.get("alert_email_list", default_var="").split(","),
- "email_on_failure": True,
- "email_on_retry": False,
- "retries": 2,
- "retry_delay": datetime.timedelta(minutes=5),
- "sla": datetime.timedelta(hours=1),
-}
-
-with models.DAG(
- "gcs2bq_table_create",
- default_args=default_args,
- schedule_interval=None,
- catchup=False,
- max_active_runs=1,
- tags=["bigquery", "table-creation", "data-platform"],
- doc_md=__doc__,
- dagrun_timeout=datetime.timedelta(hours=2),
-) as dag:
- # Start and end markers
- start = empty.EmptyOperator(task_id="start", trigger_rule="all_success")
- end = empty.EmptyOperator(task_id="end", trigger_rule="all_done")
-
- # Create landing tables
- with TaskGroup("create_landing_tables",
- tooltip="Create all landing layer tables") as landing_group:
- landing_tasks = []
- for table in LANDING_TABLES:
- task = create_bq_table_task(
- table_name=table,
- dataset_name=LAND_BQ_DATASET,
- schema_path="schemas/landing",
- task_prefix="land",
- )
- landing_tasks.append(task)
-
- # Create curated tables
- with TaskGroup("create_curated_tables",
- tooltip="Create all curated layer tables") as curated_group:
- curated_tasks = []
- for table in CURATED_TABLES:
- task = create_bq_table_task(
- table_name=table,
- dataset_name=CURATED_BQ_DATASET,
- schema_path="schemas/curated",
- task_prefix="curated",
- )
- curated_tasks.append(task)
-
- # Validate all tables exist
- with TaskGroup(
- "validate_tables",
- tooltip="Validate all tables were created") as validation_group:
- # Create validation tasks for landing tables
- landing_validations = [
- create_table_validation_task(
- table_name=table,
- dataset_name=LAND_BQ_DATASET,
- task_prefix="validate_landing",
- ) for table in LANDING_TABLES
- ]
-
- # Create validation tasks for curated tables
- curated_validations = [
- create_table_validation_task(
- table_name=table,
- dataset_name=CURATED_BQ_DATASET,
- task_prefix="validate_curated",
- ) for table in CURATED_TABLES
- ]
-
- # Create exposure view
- exposure_view = BigQueryCreateTableOperator(
- task_id="exposure_view_create",
- project_id=DP_PROJECT,
- dataset_id=EXPOSURE_BQ_DATASET,
- table_id="customer_purchases",
- table_resource={
- "view": {
- "query":
- f"SELECT * FROM `{DP_PROJECT}.{CURATED_BQ_DATASET}.customer_purchases`",
- "useLegacySql":
- False,
- },
- },
- if_exists="log",
- impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT],
- )
-
- # Validate exposure view exists
- validate_exposure_view = create_table_validation_task(
- table_name="customer_purchases",
- dataset_name=EXPOSURE_BQ_DATASET,
- task_prefix="validate_exposure",
- )
-
- # Define dependencies
- start >> [landing_group, curated_group]
- [landing_group, curated_group] >> validation_group
- validation_group >> exposure_view
- exposure_view >> validate_exposure_view >> end
diff --git a/fast/stages/3-data-platform-dev/demo/composer/variables.tf.tpl b/fast/stages/3-data-platform-dev/demo/composer/variables.tf.tpl
deleted file mode 100644
index 46ef77b90..000000000
--- a/fast/stages/3-data-platform-dev/demo/composer/variables.tf.tpl
+++ /dev/null
@@ -1,9 +0,0 @@
-{
- "DP_PROJECT": "${dp_project}",
- "LOCATION": "${location}",
- "DP_PROCESSING_SERVICE_ACCOUNT": "${dp_processing_service_account}",
- "LAND_GCS": "${land_gcs}",
- "LAND_BQ_DATASET": "${land_bq_dataset}",
- "CURATED_BQ_DATASET": "${curated_bq_dataset}",
- "EXPOSURE_BQ_DATASET": "${exposure_bq_dataset}"
-}
diff --git a/fast/stages/3-data-platform-dev/demo/data/get_thelook_data.sh b/fast/stages/3-data-platform-dev/demo/data/get_thelook_data.sh
deleted file mode 100755
index b16c83c49..000000000
--- a/fast/stages/3-data-platform-dev/demo/data/get_thelook_data.sh
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/bin/bash
-# Copyright 2025 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Script to export data from BigQuery public dataset to GCS bucket
-# Usage: ./export_bigquery_data.sh gs://your-bucket-name
-
-set -e # Exit on error
-
-# Check if argument is provided
-if [ $# -eq 0 ]; then
- echo "Error: No GCS bucket provided"
- echo "Usage: $0 gs://your-bucket-name"
- exit 1
-fi
-
-GCS_BUCKET=$1
-
-# Validate that the bucket starts with gs://
-if [[ ! "$GCS_BUCKET" =~ ^gs:// ]]; then
- echo "Error: GCS bucket must start with gs://"
- echo "Example: gs://your-bucket-name"
- exit 1
-fi
-
-# Check if bq command is available
-if ! command -v bq &>/dev/null; then
- echo "Error: bq command not found. Please install Google Cloud SDK."
- exit 1
-fi
-
-# Remove trailing slash if present
-GCS_BUCKET=${GCS_BUCKET%/}
-
-# Source project and dataset
-SOURCE_PROJECT="bigquery-public-data"
-SOURCE_DATASET="thelook_ecommerce"
-
-# Tables to export
-TABLES=("users" "orders" "order_items" "products")
-
-echo "Starting export from ${SOURCE_PROJECT}.${SOURCE_DATASET} to $GCS_BUCKET"
-echo "================================================"
-
-# Export each table
-for table in "${TABLES[@]}"; do
- echo -n "Exporting $table..."
-
- # Create destination path
- DESTINATION="${GCS_BUCKET}/data/${table}/${table}_*.csv"
-
- # Execute bq extract command
- if bq extract \
- --destination_format CSV \
- --field_delimiter=',' \
- --print_header=true \
- "bigquery-public-data:thelook_ecommerce.${table}" \
- "${DESTINATION}"; then
- echo " SUCCESS"
- else
- echo " FAILED"
- echo "Error: Failed to export $table"
- exit 1
- fi
-done
-
-echo "================================================"
-echo "All tables exported successfully!"
-echo ""
-echo "Exported tables:"
-for table in "${TABLES[@]}"; do
- echo " - ${GCS_BUCKET}/data/${table}/"
-done
diff --git a/fast/stages/3-data-platform-dev/demo/data/schemas/curated/customer_purchases.json b/fast/stages/3-data-platform-dev/demo/data/schemas/curated/customer_purchases.json
deleted file mode 100644
index bcaa28a6c..000000000
--- a/fast/stages/3-data-platform-dev/demo/data/schemas/curated/customer_purchases.json
+++ /dev/null
@@ -1,201 +0,0 @@
-[
- {
- "mode": "NULLABLE",
- "name": "user_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "first_name",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "last_name",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "email",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "age",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "gender",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "state",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "street_address",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "postal_code",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "city",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "country",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "latitude",
- "type": "FLOAT"
- },
- {
- "mode": "NULLABLE",
- "name": "longitude",
- "type": "FLOAT"
- },
- {
- "mode": "NULLABLE",
- "name": "traffic_source",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "user_created_at",
- "type": "TIMESTAMP"
- },
- {
- "name": "user_geom",
- "type": "GEOGRAPHY"
- },
- {
- "mode": "NULLABLE",
- "name": "order_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "order_status",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "order_created_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "order_returned_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "order_shipped_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "order_delivered_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "num_of_item",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "order_item_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "product_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "inventory_item_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "order_item_status",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "order_item_created_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "order_item_shipped_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "order_item_delivered_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "order_item_returned_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "sale_price",
- "type": "FLOAT"
- },
- {
- "mode": "NULLABLE",
- "name": "cost",
- "type": "FLOAT"
- },
- {
- "mode": "NULLABLE",
- "name": "category",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "name",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "brand",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "retail_price",
- "type": "FLOAT"
- },
- {
- "mode": "NULLABLE",
- "name": "department",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "sku",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "distribution_center_id",
- "type": "INTEGER"
- }
-]
diff --git a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/order_items.json b/fast/stages/3-data-platform-dev/demo/data/schemas/landing/order_items.json
deleted file mode 100644
index 9b0d6829a..000000000
--- a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/order_items.json
+++ /dev/null
@@ -1,57 +0,0 @@
-[
- {
- "mode": "NULLABLE",
- "name": "id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "order_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "user_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "product_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "inventory_item_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "status",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "created_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "shipped_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "delivered_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "returned_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "sale_price",
- "type": "FLOAT"
- }
-]
diff --git a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/orders.json b/fast/stages/3-data-platform-dev/demo/data/schemas/landing/orders.json
deleted file mode 100644
index bb872ca55..000000000
--- a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/orders.json
+++ /dev/null
@@ -1,47 +0,0 @@
-[
- {
- "mode": "NULLABLE",
- "name": "order_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "user_id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "status",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "gender",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "created_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "returned_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "shipped_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "delivered_at",
- "type": "TIMESTAMP"
- },
- {
- "mode": "NULLABLE",
- "name": "num_of_item",
- "type": "INTEGER"
- }
-]
diff --git a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/products.json b/fast/stages/3-data-platform-dev/demo/data/schemas/landing/products.json
deleted file mode 100644
index da9182209..000000000
--- a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/products.json
+++ /dev/null
@@ -1,47 +0,0 @@
-[
- {
- "mode": "NULLABLE",
- "name": "id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "cost",
- "type": "FLOAT"
- },
- {
- "mode": "NULLABLE",
- "name": "category",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "name",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "brand",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "retail_price",
- "type": "FLOAT"
- },
- {
- "mode": "NULLABLE",
- "name": "department",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "sku",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "distribution_center_id",
- "type": "INTEGER"
- }
-]
diff --git a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/users.json b/fast/stages/3-data-platform-dev/demo/data/schemas/landing/users.json
deleted file mode 100644
index d4a86c28d..000000000
--- a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/users.json
+++ /dev/null
@@ -1,81 +0,0 @@
-[
- {
- "mode": "NULLABLE",
- "name": "id",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "first_name",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "last_name",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "email",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "age",
- "type": "INTEGER"
- },
- {
- "mode": "NULLABLE",
- "name": "gender",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "state",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "street_address",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "postal_code",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "city",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "country",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "latitude",
- "type": "FLOAT"
- },
- {
- "mode": "NULLABLE",
- "name": "longitude",
- "type": "FLOAT"
- },
- {
- "mode": "NULLABLE",
- "name": "traffic_source",
- "type": "STRING"
- },
- {
- "mode": "NULLABLE",
- "name": "created_at",
- "type": "TIMESTAMP"
- },
- {
- "name": "user_geom",
- "type": "GEOGRAPHY"
- }
-]
diff --git a/fast/stages/3-data-platform-dev/demo/diagram.png b/fast/stages/3-data-platform-dev/demo/diagram.png
deleted file mode 100644
index 5f4921df3..000000000
Binary files a/fast/stages/3-data-platform-dev/demo/diagram.png and /dev/null differ
diff --git a/fast/stages/3-data-platform-dev/demo/main.tf b/fast/stages/3-data-platform-dev/demo/main.tf
deleted file mode 100644
index 79bd45918..000000000
--- a/fast/stages/3-data-platform-dev/demo/main.tf
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * Copyright 2025 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-module "land-cs-0" {
- source = "../../../../modules/gcs"
- project_id = var.project_id
- prefix = var.prefix
- name = "lnd-cs-0"
- encryption_key = try(var.encryption_keys[var.location].storage, null)
- location = var.location
- storage_class = "REGIONAL"
- force_destroy = true
-}
-
-module "land-bq-0" {
- source = "../../../../modules/bigquery-dataset"
- project_id = var.project_id
- id = "${replace(var.prefix, "-", "_")}_lnd_bq_0"
- encryption_key = try(var.encryption_keys[var.location].bigquery, null)
- location = var.location
-}
-
-module "cur-bq-0" {
- source = "../../../../modules/bigquery-dataset"
- project_id = var.project_id
- id = "${replace(var.prefix, "-", "_")}_cur_bq_0"
- encryption_key = try(var.encryption_keys[var.location].bigquery, null)
- location = var.location
- authorized_datasets = [
- {
- project_id = var.project_id,
- dataset_id = var.authorized_dataset_on_curated
- }
- ]
-}
diff --git a/fast/stages/3-data-platform-dev/demo/outputs.tf b/fast/stages/3-data-platform-dev/demo/outputs.tf
deleted file mode 100644
index e40a5e5e1..000000000
--- a/fast/stages/3-data-platform-dev/demo/outputs.tf
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Copyright 2025 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-output "composer_environment_name" {
- description = "The name of the Composer environment."
- value = var.composer_config.environment_name
-}
-
-output "composer_project_id" {
- description = "The project ID where the Composer environment is located."
- value = var.composer_config.project_id
-}
-
-output "dp_processing_service_account" {
- description = "Service account for data processing."
- value = var.dp_processing_service_account
-}
-
-output "landing_gcs_bucket" {
- description = "The name of the landing GCS bucket."
- value = module.land-cs-0.name
-}
-
-output "location" {
- description = "The location/region used for resources."
- value = var.location
-}
-
-resource "local_file" "composer_variables" {
- content = templatefile("composer/variables.tf.tpl", {
- dp_project = var.project_id
- location = var.location
- dp_processing_service_account = var.dp_processing_service_account
- land_gcs = module.land-cs-0.bucket.name
- land_bq_dataset = module.land-bq-0.dataset_id
- curated_bq_dataset = module.cur-bq-0.dataset_id
- exposure_bq_dataset = var.authorized_dataset_on_curated
- })
- filename = "${path.module}/composer/variables.json"
-}
diff --git a/fast/stages/3-data-platform-dev/demo/providers.tf b/fast/stages/3-data-platform-dev/demo/providers.tf
deleted file mode 100644
index dd56a321b..000000000
--- a/fast/stages/3-data-platform-dev/demo/providers.tf
+++ /dev/null
@@ -1,24 +0,0 @@
-/**
- * Copyright 2025 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-provider "google" {
- impersonate_service_account = var.impersonate_service_account
-}
-provider "google-beta" {
- impersonate_service_account = var.impersonate_service_account
-}
-
-# end provider.tf for data-product
diff --git a/fast/stages/3-data-platform-dev/demo/terraform.tfvars.sample b/fast/stages/3-data-platform-dev/demo/terraform.tfvars.sample
deleted file mode 100644
index f405bf3e1..000000000
--- a/fast/stages/3-data-platform-dev/demo/terraform.tfvars.sample
+++ /dev/null
@@ -1,10 +0,0 @@
-authorized_dataset_on_curated = "