diff --git a/FACTORIES.md b/FACTORIES.md index 35e492bc3..1a52b505c 100644 --- a/FACTORIES.md +++ b/FACTORIES.md @@ -90,8 +90,6 @@ The following table details how FAST stages implement factory patterns. | **2-security** | `projects` | Module-Backed (Factory) | `project-factory` | | **2-security** | `certificate_authorities` | Stage-Implemented (Module) | `certificate-authority-service` | | **2-security** | `keyrings` (KMS) | Stage-Implemented (Module) | `kms` | -| **3-data-platform-dev** | `aspect_types` | Module-Backed (Factory) | `dataplex-aspect-types` | -| **3-data-platform-dev** | `data_domains` | Native (Complex) | Multiple | | **3-secops-dev** | `rules`, `reference_lists` | Module-Backed (Factory) | `secops-rules` | ## Maintenance Guide diff --git a/fast/stages/0-org-setup/datasets/classic-gcd/defaults.yaml b/fast/stages/0-org-setup/datasets/classic-gcd/defaults.yaml index f8ac4c1ea..f1e401c26 100644 --- a/fast/stages/0-org-setup/datasets/classic-gcd/defaults.yaml +++ b/fast/stages/0-org-setup/datasets/classic-gcd/defaults.yaml @@ -93,11 +93,3 @@ output_files: bucket: $storage_buckets:iac-0/iac-stage-state prefix: 2-project-factory service_account: $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - 3-data-platform-dev-ro: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-ro diff --git a/fast/stages/0-org-setup/datasets/classic-gcd/projects/core/iac-0.yaml b/fast/stages/0-org-setup/datasets/classic-gcd/projects/core/iac-0.yaml index 18029513e..3fe001a5c 100644 --- a/fast/stages/0-org-setup/datasets/classic-gcd/projects/core/iac-0.yaml +++ b/fast/stages/0-org-setup/datasets/classic-gcd/projects/core/iac-0.yaml @@ -130,12 +130,6 @@ buckets: - $iam_principals:service_accounts/iac-0/iac-pf-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - iam: - roles/storage.admin: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $custom_roles:storage_viewer: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro # Terraform state bucket for FAST outputs iac-outputs: description: Terraform state for the org-level automation. @@ -143,14 +137,12 @@ buckets: iam: roles/storage.admin: - $iam_principals:service_accounts/iac-0/iac-org-rw - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $iam_principals:service_accounts/iac-0/iac-networking-rw - $iam_principals:service_accounts/iac-0/iac-security-rw - $iam_principals:service_accounts/iac-0/iac-pf-rw - $iam_principals:service_accounts/iac-0/iac-vpcsc-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-org-ro - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro - $iam_principals:service_accounts/iac-0/iac-networking-ro - $iam_principals:service_accounts/iac-0/iac-security-ro - $iam_principals:service_accounts/iac-0/iac-pf-ro @@ -196,11 +188,6 @@ service_accounts: display_name: IaC service account for project factory (read-only). iac-pf-rw: display_name: IaC service account for project factory (read-write). - # IaC service accounts for data platform (dev) stage - iac-dp-dev-ro: - display_name: IaC service account for data platform dev (read-only). - iac-dp-dev-rw: - display_name: IaC service account for data platform dev (read-write). # workload_identity_pools: # default: # display_name: Default pool for CI/CD. diff --git a/fast/stages/0-org-setup/datasets/classic/defaults.yaml b/fast/stages/0-org-setup/datasets/classic/defaults.yaml index ae1c16a48..32247348c 100644 --- a/fast/stages/0-org-setup/datasets/classic/defaults.yaml +++ b/fast/stages/0-org-setup/datasets/classic/defaults.yaml @@ -86,11 +86,3 @@ output_files: bucket: $storage_buckets:iac-0/iac-stage-state prefix: 2-project-factory service_account: $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - 3-data-platform-dev-ro: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-ro diff --git a/fast/stages/0-org-setup/datasets/classic/projects/core/iac-0.yaml b/fast/stages/0-org-setup/datasets/classic/projects/core/iac-0.yaml index f648827e0..91e49195a 100644 --- a/fast/stages/0-org-setup/datasets/classic/projects/core/iac-0.yaml +++ b/fast/stages/0-org-setup/datasets/classic/projects/core/iac-0.yaml @@ -135,12 +135,6 @@ buckets: - $iam_principals:service_accounts/iac-0/iac-pf-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - iam: - roles/storage.admin: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $custom_roles:storage_viewer: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro # Terraform state bucket for FAST outputs iac-outputs: description: Terraform state for the org-level automation. @@ -148,14 +142,12 @@ buckets: iam: roles/storage.admin: - $iam_principals:service_accounts/iac-0/iac-org-rw - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $iam_principals:service_accounts/iac-0/iac-networking-rw - $iam_principals:service_accounts/iac-0/iac-security-rw - $iam_principals:service_accounts/iac-0/iac-pf-rw - $iam_principals:service_accounts/iac-0/iac-vpcsc-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-org-ro - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro - $iam_principals:service_accounts/iac-0/iac-networking-ro - $iam_principals:service_accounts/iac-0/iac-security-ro - $iam_principals:service_accounts/iac-0/iac-pf-ro @@ -201,11 +193,6 @@ service_accounts: display_name: IaC service account for project factory (read-only). iac-pf-rw: display_name: IaC service account for project factory (read-write). - # IaC service accounts for data platform (dev) stage - iac-dp-dev-ro: - display_name: IaC service account for data platform dev (read-only). - iac-dp-dev-rw: - display_name: IaC service account for data platform dev (read-write). # workload_identity_pools: # default: # display_name: Default pool for CI/CD. diff --git a/fast/stages/0-org-setup/datasets/hardened/defaults.yaml b/fast/stages/0-org-setup/datasets/hardened/defaults.yaml index ae1c16a48..32247348c 100644 --- a/fast/stages/0-org-setup/datasets/hardened/defaults.yaml +++ b/fast/stages/0-org-setup/datasets/hardened/defaults.yaml @@ -86,11 +86,3 @@ output_files: bucket: $storage_buckets:iac-0/iac-stage-state prefix: 2-project-factory service_account: $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - 3-data-platform-dev-ro: - bucket: $storage_buckets:iac-0/iac-stage-state - prefix: 3-data-platform-dev - service_account: $iam_principals:service_accounts/iac-0/iac-dp-dev-ro diff --git a/fast/stages/0-org-setup/datasets/hardened/projects/core/iac-0.yaml b/fast/stages/0-org-setup/datasets/hardened/projects/core/iac-0.yaml index 5fb24b960..a0a302f62 100644 --- a/fast/stages/0-org-setup/datasets/hardened/projects/core/iac-0.yaml +++ b/fast/stages/0-org-setup/datasets/hardened/projects/core/iac-0.yaml @@ -220,12 +220,6 @@ buckets: - $iam_principals:service_accounts/iac-0/iac-pf-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-pf-ro - 3-data-platform-dev: - iam: - roles/storage.admin: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $custom_roles:storage_viewer: - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro # Terraform state bucket for FAST outputs iac-outputs: description: Terraform state for the org-level automation. @@ -259,14 +253,12 @@ buckets: iam: roles/storage.admin: - $iam_principals:service_accounts/iac-0/iac-org-rw - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw - $iam_principals:service_accounts/iac-0/iac-networking-rw - $iam_principals:service_accounts/iac-0/iac-security-rw - $iam_principals:service_accounts/iac-0/iac-pf-rw - $iam_principals:service_accounts/iac-0/iac-vpcsc-rw $custom_roles:storage_viewer: - $iam_principals:service_accounts/iac-0/iac-org-ro - - $iam_principals:service_accounts/iac-0/iac-dp-dev-ro - $iam_principals:service_accounts/iac-0/iac-networking-ro - $iam_principals:service_accounts/iac-0/iac-security-ro - $iam_principals:service_accounts/iac-0/iac-pf-ro @@ -312,11 +304,6 @@ service_accounts: display_name: IaC service account for project factory (read-only). iac-pf-rw: display_name: IaC service account for project factory (read-write). - # IaC service accounts for data platform (dev) stage - iac-dp-dev-ro: - display_name: IaC service account for data platform dev (read-only). - iac-dp-dev-rw: - display_name: IaC service account for data platform dev (read-write). # workload_identity_pools: # default: # display_name: Default pool for CI/CD. diff --git a/fast/stages/2-security/datasets/classic/projects/dev-sec-core-0.yaml b/fast/stages/2-security/datasets/classic/projects/dev-sec-core-0.yaml index 82a1d77b1..75321bc22 100644 --- a/fast/stages/2-security/datasets/classic/projects/dev-sec-core-0.yaml +++ b/fast/stages/2-security/datasets/classic/projects/dev-sec-core-0.yaml @@ -28,7 +28,6 @@ iam_bindings: key_delegated: members: - $iam_principals:service_accounts/iac-0/iac-pf-rw - - $iam_principals:service_accounts/iac-0/iac-dp-dev-rw role: roles/cloudkms.admin condition: title: Delegated IAM grant on keys. diff --git a/fast/stages/3-data-platform-dev/.fast-stage.env b/fast/stages/3-data-platform-dev/.fast-stage.env deleted file mode 100644 index 470f66550..000000000 --- a/fast/stages/3-data-platform-dev/.fast-stage.env +++ /dev/null @@ -1,5 +0,0 @@ -FAST_STAGE_DESCRIPTION="Data Platform (dev)" -FAST_STAGE_LEVEL=3 -FAST_STAGE_NAME=data-platform-dev -FAST_STAGE_DEPS="0-globals 0-org-setup" -FAST_STAGE_OPTIONAL="2-networking 2-security" diff --git a/fast/stages/3-data-platform-dev/README.md b/fast/stages/3-data-platform-dev/README.md deleted file mode 100644 index 6182510c8..000000000 --- a/fast/stages/3-data-platform-dev/README.md +++ /dev/null @@ -1,376 +0,0 @@ -# Data Platform - -This stage focuses on the creation and management of an opinionated Data Platform architecture based on Google Cloud best practices. Its architecture is designed to be reliable, robust, and scalable, facilitating the continuous onboarding of new Data Products (or data workloads). - -The Data Platform's foundation, established in this stage, provides core capabilities without prescribing specific data handling, computation, or processing for individual workloads or Data Products. This allows flexibility in the technology choices for individual data domains, product and teams. The platform's approach is to encourage shared patterns, aiming to optimize, standardize, accelerate adoption, and ultimately reduce implementation costs and management overhead across Data Products. - -While our solution is conceptually guided by [Data Mesh principles on Google Cloud Platform](https://cloud.google.com/architecture/data-mesh), its adoption doesn't strictly require adherence to every Data Mesh concept. However, the 'Data as a Product' principle that treats data as a first-class citizen is fundamental to our implementation as well. For foundational elements like resource hierarchy, networking, and security, this stage can integrate with established [FAST stages](../README.md). Nevertheless, FAST is not a mandatory prerequisite; the solution can also be deployed independently, as long as all necessary dependencies are satisfied. - - -- [Design Overview and Choices](#design-overview-and-choices) - - [Data Platform Architecture](#data-platform-architecture) - - [Folder and Project Structure](#folder-and-project-structure) - - [Central Shared Services (Federated Governance)](#central-shared-services-federated-governance) - - [Data Domains (Domain-Driven Ownership)](#data-domains-domain-driven-ownership) - - [Data Products (DaaP)](#data-products-daap) - - [Teams and Personas](#teams-and-personas) - - [Central Data Platform Team](#central-data-platform-team) - - [Data Domain Team](#data-domain-team) - - [Data Product Team](#data-product-team) -- [How to run this stage](#how-to-run-this-stage) - - [FAST prerequisites](#fast-prerequisites) - - [Provider and Terraform variables](#provider-and-terraform-variables) - - [Variable Configuration](#variable-configuration) - - [CMEK Configuration](#cmek-configuration) - - [Data Domain and Product Data Files](#data-domain-and-product-data-files) - - [Context replacements](#context-replacements) -- [Files](#files) -- [Variables](#variables) -- [Outputs](#outputs) - - -## Design Overview and Choices - -### Data Platform Architecture - -The following diagram represent the high-level architecture of the Data Platform related projects and their associated resources managed by this stage: - -

- High level diagram. -

- -### Folder and Project Structure - -The stage manages the following three high-level logical components implemented via GCP folders and projects: - -- "Central Shared Services", a single central project e.g. `dev-dp-0`, in which Dataplex Catalog Aspect Types, Policy Tags, and Resource Manager tags a.k.a. "Secure Tags" are defined -- one or more "Data Domains", each composed of a folder e.g. `Data Domain 0` with a top-level shared project e.g. `dev-dp-domain-0` hosting shared resources such as Composer at the domain level, and an additional sub-folder for hosting data products e.g. "Data Products" -- one or more "Data Products" per domain, each composed of a project e.g. `dev-dp-product-0-a`, and related resources that are optional - -

-Folder structure. -

- -#### Central Shared Services (Federated Governance) - -Central Shared Services Project provides the standardized central capabilities to foster federated governance processes. These are implemented via established foundations that enable cross-domain data discovery, data sharing, self-service functionalities, and consistent governance. A key objective of these centrally managed services is to reduce the operational burden for data domains in producing and consuming data products, while also fostering the cross-domain collaboration necessary for the data mesh to operate efficiently. - -Core, platform-wide capabilities are delivered as shared services managed within a dedicated "Central Shared Services" project. These capabilities include: - -- [Dataplex Catalog Aspect Types](https://cloud.google.com/dataplex/docs/enrich-entries-metadata): Create a YAML file definition for each Aspect Type in the `data/aspect-types` directory. -- [Policy Tags](https://cloud.google.com/bigquery/docs/best-practices-policy-tags): Configure these via the `central_project_config.policy_tags` Terraform variable. - -Ensure the main `central_project_config` Terraform variable is configured according to your requirements. The `terraform.tfvars.sample` file provides a reference. - -#### Data Domains (Domain-Driven Ownership) - -Another foundational principle of a data mesh architecture is domain-driven ownership. A Data Domain, in this context, typically aligns with a business unit (BU) or a distinct function within an enterprise. For instance, Data Domains could represent a bank's mortgage department, or an enterprise's customer, distribution, finance, or HR departments. - -To support this ownership model and ensure clear separation, each logical Data Domain is provisioned with its own isolated GCP folder under the Data Platform parent with its collection of dedicated Google Cloud project(s). This structure establishes a distinct organizational boundary and resource separation, directly mapping to specific lines of business. - -Within each Data Domain, a corresponding Google Cloud "Data Domain" project serves as the primary container for all its specific services and resources. A dedicated Cloud Composer environment is provisioned within this project for orchestrating the domain's data workflows. To adhere to the principle of least privilege, this Composer environment operates with a dedicated IAM Service Account capable of impersonating the necessary Data Product-specific service accounts within that domain. - -Define data domains by creating individual sub-folders within the `data/data-domains` directory. Each domain's configuration, including IAM permissions, services to enable in its shared folder, and settings for its Cloud Composer instance, should be specified in a `_config.yaml` file within its respective subfolder. Refer to the - -We recommend granting data consumers access to exposed data product metadata through IAM Secure Tags created in the central project. - -```yaml -folder_config: - iam_bindings: - bigquery_metadata_viewer: - members: - - data-consumer-bi - role: roles/bigquery.metadataViewer - condition: - title: exposure - description: Expose via secure tag. - expression: resource.matchTag('exposure', 'allow') - dataplex_catalog_viewer: - members: - - data-consumer-bi - role: roles/dataplex.catalogViewer - condition: - title: exposure - description: Expose via secure tag. - expression: resource.matchTag('exposure', 'allow') -``` - -Refer to the ["domain-0"](./data/data-domains/domain-0) directory's [_config.yaml](./data/data-domains/domain-0/_config.yaml) as a starting point for setting up and configuring an example Data Domain. - -#### Data Products (DaaP) - -Each Data Product within a Data Domain (which is organized under the "Data Products" GCP Folder per domain) is encapsulated in its own dedicated Google Cloud Project. This separation is key to achieving modularity, scalability, flexibility, and distinct ownership for each product. - -For every Data Product project created, its exposure layer (e.g. specific BigQuery datasets or Cloud Storage buckets) is carefully configured and deployed. This involves assigning the relevant "Secure Tags" that were established in the central Shared Services project. Applying these tags is crucial as it allows for the implementation of precise IAM bindings based on IAM conditions, thereby ensuring fine-grained and secure data access in line with least privilege principles. - -Following a data-as-a-product approach, individual Data Product Owners are responsible for defining and deploying resources essential for data ingestion and processing (e.g., intermediate datasets, Dataproc instances etc.). The Central Data Platform Team provides a self-service platform and foundational building blocks to enable cross-domain sharing; it does not implement domain-specific resources or dictate how data products are built, including their ingestion and transformation pipelines. Reflecting this distributed ownership, each data product's infrastructure is managed in its own Terraform state. - -Within each Domain, you can define a new data product by creating a `data-product-{x}.yaml` file in the Data Domain's folder. In this YAML file, you can configure IAM permissions, services to enable, and specific resources or settings for the product's exposure layer. - -To grant data consumers access to exposed data, we recommend configuring IAM bindings within the `exposure_layer` variable in the data product's YAML configuration file. - -```yaml -exposure_layer: - bigquery: - datasets: - exposure: {} - iam: - "roles/bigquery.dataViewer": - - data-consumer-bi -``` - -Refer to the ["domain-0"](./data/data-domains/domain-0) directory's [product-0.yaml](./data/data-domains/domain-0/product-0.yaml) data product definition as a starting point for setting up and configuring an example Data Product. - -### Teams and Personas - -Effective data mesh operation relies on well-defined roles and responsibilities. Ownership is typically assigned to team archetypes, also referred to as functions. These functions represent the core user journeys of individual roles interacting with the data mesh. To clearly describe these journeys, specific user roles are defined within these functions. These user roles can be split or combined bases on specific needs and the scale of each enterprise. - -This stage provides four predefined role profiles discussed in this section, designed as initial examples that you can tailor to your needs. To simplify references to IAM principals, leverage the [context replacements](#context-replacements) logic. This involves configuring a mapping between concise short names and full group values in the `factories_config.context.iam_principals` Terraform variable. Once configured, you can use these short names to refer to the groups in your YAML files. - -|Group|Central Shared Services Project|Data Domain Folder|Data Product Project| -|-|:-:|:-:|:-:| -|Central Data Platform Team|`ADMIN`|`Log and Metrics Viewer`|`Log and Metrics Viewer`| -|Data Domain Team|`READ/USAGE`|`ADMIN`|`Log and Metrics Viewer`| -|Data Product Team|`READ/USAGE`|`READ/USAGE`|`ADMIN`| - -Please note that the above access scopes and the example configurations provided as part of this stage are for development purposes and therefore the IAM permissions and roles assigned to each team / user personas would have to be reviewed to follow least privilege principles in a production deployment. - -Refer to the [terraform.tfvars.sample](terraform.tfvars.sample), ["domain-0" _config.yaml](./data/data-domains/domain-0/_config.yaml) and [."domain-0" product-0.yaml](./data/data-domains/domain-0/product-0.yaml) files as a starting point for managing IAM. - -#### Central Data Platform Team - -This team defines the overall data platform architecture, establishes shared infrastructure, and enforces central data governance policies and standards across the data mesh. It empowers Data Producers with building blocks and best practices, ensuring high data quality, security, and trustworthiness for consumers. The primary focus is on providing the foundations for a self-serve data platform and universal governance standards for all users. The Central Data Platform team often collaborates with Data Governance functions within the enterprise. - -Typically, this group has `ADMIN` access to resources in the "Central Shared Services" project. While the team usually doesn't have access to the underlying data stored in each Data Domain and Data Product, it can access log and metrics information to monitor the data platform's health and performance. - -The team is also generally responsible for configuring IAM bindings on Data Domains and Data Products. - -#### Data Domain Team - -Aligned with specific business areas (e.g., customer, finance, distribution), this team holds clearly defined ownership of data within that domain. Key responsibilities include establishing and upholding the purpose, scope, and boundaries for data products within their domain. This is achieved through ongoing activities such as: - -- Creating and maintaining the domain-wide data product roadmap. -- Implementing robust data security measures specific to the domain. -- Ensuring adherence to all relevant compliance obligations for their data. -- Continuously monitoring the usage and performance of their data products. - -Typically, this team has `ADMIN` access to resources in their top-level shared project for the Data Domain and `READ/USAGE` access to relevant resources created in the Central Shared Services (e.g., Aspect Types). While the team usually does not have access to the underlying data stored in each individual Data Product (unless they are also the Product Owner), they can access log and metrics information to monitor the health and usage of their domain's data products and resources. This team is typically not the primary owner for configuring IAM bindings on individual data products, as this responsibility often lies with Data Product Owners or the Central Data Platform team. - -#### Data Product Team - -This team is responsible for the end-to-end lifecycle of a specific Data Product. Data Product Teams, which can be part of or work closely with a Data Domain Team, develop, operate, and maintain their assigned Data Product. Their tasks include defining the Data Product's schema and interfaces, implementing data ingestion and transformation pipelines, ensuring data quality and security for their product, managing its end-to-end lifecycle, and supporting its data consumers. - -Typically, this group has `ADMIN` access to resources within their Data Product project(s). They also usually have `READ/USAGE` access to relevant resources in the Central Shared Services project (e.g., Aspect Types) and the Data Domain's top-level shared project (e.g., Cloud Composer). This team is generally not the primary owner for configuring overarching IAM bindings, as that responsibility often lies elsewhere. - -Key responsibilities for the Data Product Team include: - -- Identifying and configuring the necessary resources within their data product project to perform ETL operations for the exposure layer. These resources should be deployed in a separate Terraform state, using the dedicated automation service account created for each data product. -- Configuring Policy Tags to protect PII and sensitive data. -- Defining and managing metadata for aspects related to tables and other resources within their data product's exposure layer. - -When using BigQuery in the exposure layer, we recommend using [authorized datasets](https://cloud.google.com/bigquery/docs/authorized-datasets). This can be achieved by configuring the exposure dataset to authorize access to the underlying dataset that hosts the curated data. - -## How to run this stage - -If this stage is deployed within a FAST-based GCP organization, we recommend executing it after foundational FAST `stage-2` components like `networking` and `security`. This is the recommended flow as specific data platform features in this stage might depend on configurations from these earlier stages. Although this stage can be run independently, instructions for such a standalone setup are beyond the scope of this document. - -### FAST prerequisites - -This stage needs specific automation resources, and permissions granted on those that allow control of selective IAM roles on specific networking and security resources. - -Network permissions are needed to associate data domain or product projects to Shared VPC hosts and grant network permissions to data platform managed service accounts. They are mandatory when deploying Composer. - -Security permissions are only needed when using CMEK encryption, to grant the relevant IAM roles to data platform service agents on the encryption keys used. - -The ["Classic FAST" dataset](../0-org-setup/README.md#classic-fast-dataset) in the bootstrap stage already contains the configuration for a development Data Platform. Adapting it to multiple environments, or for a multi-environment setup is relatively trivial and left as an exercise to the user. - -What is missing from the default dataset are IAM grants on security resources. They can be added from the security stage by populating the `stage_configs.security.iam_admin_delegated` and `stage_configs.security.iam_viewer` variables with the identities of the Data Platform service account. - -### Provider and Terraform variables - -As all other FAST stages, the [mechanism used to pass variable values and pre-built provider files from one stage to the next](../0-org-setup/README.md#output-files-and-cross-stage-variables) is also leveraged here. - -The commands to link or copy the provider and terraform variable files can be easily derived from the `fast-links.sh` script in the FAST stages folder, passing it a single argument with the local output files folder (if configured) or the GCS output bucket in the automation project (derived from stage 0 outputs). The following examples demonstrate both cases, and the resulting commands that then need to be copy/pasted and run. - -```bash -# File linking commands for Data Platform (dev) stage - -# provider file -ln -s ~/fast-config/providers/3-data-platform-dev-providers.tf ./ - -# input files from other stages -ln -s ~/fast-config/tfvars/0-globals.auto.tfvars.json ./ -ln -s ~/fast-config/tfvars/0-org-setup.auto.tfvars.json ./ - -# conventional location for this stage terraform.tfvars (manually managed) -ln -s ~/fast-config/3-data-platform-dev.auto.tfvars ./ - -# optional files -ln -s ~/fast-config/tfvars/2-networking.auto.tfvars.json ./ -ln -s ~/fast-config/tfvars/2-security.auto.tfvars.json ./ -``` - -```bash -../fast-links.sh gs://xxx-prod-iac-core-outputs-0 - -# File linking commands for Data Platform (dev) stage - -# provider file -gcloud storage cp gs://xxx-prod-iac-core-outputs-0/providers/3-data-platform-dev-providers.tf ./ - -# input files from other stages -gcloud storage cp gs://xxx-prod-iac-core-outputs-0/tfvars/0-globals.auto.tfvars.json ./ -gcloud storage cp gs://xxx-prod-iac-core-outputs-0/tfvars/0-org-setup.auto.tfvars.json ./ - -# conventional location for this stage terraform.tfvars (manually managed) -gcloud storage cp gs://xxx-prod-iac-core-outputs-0/3-data-platform-dev.auto.tfvars ./ - -# optional files -gcloud storage cp gs://xxx-prod-iac-core-outputs-0/tfvars/2-networking.auto.tfvars.json ./ -gcloud storage cp gs://xxx-prod-iac-core-outputs-0/tfvars/2-security.auto.tfvars.json ./ -``` - -### Variable Configuration - -The default data files provided as an example makes a few assumptions that needs to be matched by corresponding Terraform variables configured for the stage: - -- the `location` variable needs to be explicitly configured, as it's used as a default location for buckets, datasets, and Composer; locations can be individually overridden but a default needs to be in place -- the domain `deploy_config.composer.node_config.subnetwork` attribute needs to match the location defined above; Composer network and subnetwork use interpolation from FAST networking outputs, explicit IDs can be used instead if needed -- IAM roles for the domain and product refer to generic `dp-product-a-0` and `data-consumer-bi` groups, these need to be defined via the `factories_config.context.iam_principals` variable, or changed to explicit IAM principals (e.g. `group:foo@example.com`) - -### CMEK Configuration - -The stage can be provisioned with CMEK keys configured for composer, bigquery datasets and storage bucket by using this configuration for the `encryption_keys` variable: - -```hcl -encryption_keys = { - bigquery = { - "europe-west1" = "projects/myproject/locations/europe-west1/keyRings/dev-primary-default/cryptoKeys/bigquery" - } - composer = { - "europe-west1" = "projects/myproject/locations/europe-west1/keyRings/dev-primary-default/cryptoKeys/composer" - } - storage = { - "europe-west1" = "projects/myproject/locations/europe-west1/keyRings/dev-primary-default/cryptoKeys/storage" - } -} -``` - -### Data Domain and Product Data Files - -The formats for both types of data files are controlled via [schemas](./schemas/), which can generally be used directly in development environments to provide error checking and autocompletion. - -### Context replacements - -This stage is designed so that factory files are as much as possible organization and resource agnostic, so that they can be portable across installations (e.g. for different environments, or partner/customer organizations). - -This is mostly achieved via context replacements in factory files, where IAM principals and a few other attributes can use short names from the `factories_config.context` variable or from internally managed resources, which are then expanded to full principals at runtime. - -For example, configuring the `factories_config.context` variable: - -```hcl -factories_config = { - context = { - iam_principals = { - data-consumer-bi = "group:data-consumer-bi@example.com" - } - } -} -``` - -Allows using the group short name in templates: - -```yaml -folder_config: - iam_by_principals: - data-consumer-bi: - - roles/datacatalog.viewer - - roles/dataplex.catalogViewer - - roles/datalineage.viewer -``` - -Or within a data domain definition, service accounts can be referenced in project-level IAM via their short name: - -```yaml -service_accounts: - rw: - description: Automation (rw). -project_config: - iam: - roles/owner: - - rw -``` - -The following table lists the available substitutions. - -| resource | attributes | context expansions | -| --------------- | ----------------------- | -------------------------------------------------------------------------------------- | -| central project | IAM principals | `var.factories_config.context.iam_principals` | -| central project | tag IAM principals | `var.factories_config.context.iam_principals` | -| domain folder | IAM principals | `var.factories_config.context.iam_principals` | -| domain project | shared VPC host project | FAST VPC hosts | -| domain project | IAM principals | `var.factories_config.context.iam_principals` | -| domain sa | IAM principals | `var.factories_config.context.iam_principals`
domain service accounts | -| product project | shared VPC host project | FAST VPC hosts | -| product project | IAM principals | `var.factories_config.context.iam_principals`
product service accounts | -| product project | IAM conditions | `var.factories_config.context.iam_tag_values`
FAST tag values
exposure tag value | -| product sa | IAM principals | `var.factories_config.context.iam_principals` | -| composer | shared VPC network | FAST VPCs | -| composer | shared VPC subnetwork | FAST subnets | -| composer | encryption key | `var.factories_config.context.encryption_keys`
FAST KMS keys | -| exposed bucket | encryption key | `var.factories_config.context.encryption_keys`
FAST KMS keys | -| exposed dataset | encryption key | `var.factories_config.context.encryption_keys`
FAST KMS keys | - - - -## Files - -| name | description | modules | resources | -|---|---|---|---| -| [data-domains-automation.tf](./data-domains-automation.tf) | Data product automation resources. | gcs · iam-service-account | | -| [data-domains-composer.tf](./data-domains-composer.tf) | None | iam-service-account | google_composer_environment | -| [data-domains.tf](./data-domains.tf) | None | folder · iam-service-account · project | | -| [data-products-automation.tf](./data-products-automation.tf) | Data product automation resources. | gcs · iam-service-account | | -| [data-products-exposure.tf](./data-products-exposure.tf) | Data product exposure layer resources. | bigquery-dataset · gcs | | -| [data-products.tf](./data-products.tf) | Data product project, service account and exposed resources. | iam-service-account · project | | -| [factory.tf](./factory.tf) | None | | | -| [main.tf](./main.tf) | Locals and project-level resources. | data-catalog-policy-tag · dataplex-aspect-types · project | | -| [outputs.tf](./outputs.tf) | Stage outputs. | | google_storage_bucket_object · local_file | -| [variables-fast.tf](./variables-fast.tf) | None | | | -| [variables.tf](./variables.tf) | Module variables. | | | - -## Variables - -| name | description | type | required | default | producer | -|---|---|:---:|:---:|:---:|:---:| -| [automation](variables-fast.tf#L17) | Automation resources created by the bootstrap stage. | object({…}) | ✓ | | 0-org-setup | -| [billing_account](variables-fast.tf#L26) | Billing account id. If billing account is not part of the same org set `is_org_level` to false. | object({…}) | ✓ | | 0-org-setup | -| [environments](variables-fast.tf#L34) | Environment names. | object({…}) | ✓ | | 0-org-setup | -| [prefix](variables-fast.tf#L69) | Prefix used for resources that need unique names. Use a maximum of 9 chars for organizations, and 11 chars for tenants. | string | ✓ | | 0-org-setup | -| [aspect_types](variables.tf#L17) | Aspect templates. Merged with those defined via the factory. | map(object({…})) | | {} | | -| [central_project_config](variables.tf#L48) | Configuration for the top-level central project. | object({…}) | | {} | | -| [encryption_keys](variables.tf#L90) | Default encryption keys for services, in service => { region => key id } format. Overridable on a per-object basis. | object({…}) | | {} | | -| [exposure_config](variables.tf#L101) | Data exposure configuration. | object({…}) | | {} | | -| [factories_config](variables.tf#L119) | Configuration for the resource factories. | object({…}) | | {} | | -| [folder_ids](variables-fast.tf#L45) | Folder name => id mappings. | map(string) | | {} | 0-org-setup | -| [host_project_ids](variables-fast.tf#L53) | Shared VPC host project name => id mappings. | map(string) | | {} | 2-networking | -| [kms_keys](variables-fast.tf#L61) | KMS key ids. | map(string) | | {} | 2-security | -| [location](variables.tf#L134) | Default location used when no location is specified. | string | | "europe-west1" | | -| [outputs_location](variables.tf#L141) | Enable writing provider, tfvars and CI/CD workflow files to local filesystem. Leave null to disable. | string | | null | | -| [regions](variables-fast.tf#L79) | Region mappings. | map(string) | | {} | 2-networking | -| [secure_tags](variables.tf#L147) | Resource manager tags created in the central project. | map(object({…})) | | {} | | -| [stage_config](variables.tf#L168) | Stage configuration used to find environment and resource ids, and to generate names. | object({…}) | | {…} | | -| [subnet_self_links](variables-fast.tf#L87) | Subnet VPC name => { name => self link } mappings. | map(map(string)) | | {} | 2-networking | -| [tag_values](variables-fast.tf#L95) | FAST-managed resource manager tag values. | map(string) | | {} | 0-org-setup | -| [vpc_self_links](variables-fast.tf#L103) | Shared VPC name => self link mappings. | map(string) | | {} | 2-networking | - -## Outputs - -| name | description | sensitive | consumers | -|---|---|:---:|---| -| [aspect_types](outputs.tf#L201) | Aspect types defined in central project. | | | -| [central_project](outputs.tf#L206) | Central project attributes. | | | -| [data_domains](outputs.tf#L211) | Data domain attributes. | | | -| [policy_tags](outputs.tf#L216) | Policy tags defined in central project. | | | -| [secure_tags](outputs.tf#L221) | Secure tags defined in central project. | | | - diff --git a/fast/stages/3-data-platform-dev/data-domains-automation.tf b/fast/stages/3-data-platform-dev/data-domains-automation.tf deleted file mode 100644 index f149ed323..000000000 --- a/fast/stages/3-data-platform-dev/data-domains-automation.tf +++ /dev/null @@ -1,78 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -# tfdoc:file:description Data product automation resources. - -locals { - dd_automation = { - for k, v in local.data_domains : - k => v if v.automation != null - } - dd_automation_keys = { - for k, v in local.dd_automation : k => try( - v.automation.encryption_key, - var.encryption_keys.storage[try( - v.automation.location, - var.location - )], - null - ) - } -} - -module "dd-automation-bucket" { - source = "../../../modules/gcs" - for_each = local.dd_automation - project_id = module.dd-projects[each.key].project_id - prefix = local.prefix - name = "${each.value.short_name}-state" - location = try( - each.value.automation.location, - var.location - ) - encryption_key = local.dd_automation_keys[each.key] - iam = { - "roles/storage.admin" = [ - module.dd-automation-sa["${each.key}/rw"].iam_email - ] - "roles/storage.objectViewer" = concat( - [ - module.dd-automation-sa["${each.key}/ro"].iam_email - ], - [ - for m in each.value.automation.impersonation_principals : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - ) - } -} - -module "dd-automation-sa" { - source = "../../../modules/iam-service-account" - for_each = { for v in local.dd_automation_sa : v.key => v } - project_id = module.dd-projects[each.value.dd].project_id - prefix = each.value.prefix - name = each.value.name - description = each.value.description - iam = { - "roles/iam.serviceAccountTokenCreator" = [ - for m in each.value.impersonation_principals : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - } -} diff --git a/fast/stages/3-data-platform-dev/data-domains-composer.tf b/fast/stages/3-data-platform-dev/data-domains-composer.tf deleted file mode 100644 index 79fc675ff..000000000 --- a/fast/stages/3-data-platform-dev/data-domains-composer.tf +++ /dev/null @@ -1,119 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -locals { - dd_composer = { - for k, v in local.data_domains : k => merge( - { region = var.location, short_name = v.short_name }, - try(v.deploy_config.composer, {}) - ) if(v.deploy_config.composer != null) - } - dd_composer_keys = { - for k, v in local.dd_composer : k => try( - v.encryption_key, - var.encryption_keys.composer[v.region], - null - ) - } -} - -module "dd-composer-sa" { - source = "../../../modules/iam-service-account" - for_each = local.dd_composer - project_id = module.dd-projects[each.key].project_id - prefix = local.prefix - name = "${each.value.short_name}-cmp-sa" - description = "Composer Service Account." -} - -resource "google_composer_environment" "default" { - for_each = local.dd_composer - project = module.dd-projects-iam[each.key].project_id - name = "${var.prefix}-${each.key}" - region = each.value.region - config { - enable_private_builds_only = try(each.value.private_builds, true) - enable_private_environment = try(each.value.private_environment, true) - environment_size = try( - each.value.environment_size, - "ENVIRONMENT_SIZE_SMALL" - ) - dynamic "encryption_config" { - for_each = local.dd_composer_keys[each.key] == null ? [] : [""] - content { - kms_key_name = lookup( - local.kms_keys, - local.dd_composer_keys[each.key], - local.dd_composer_keys[each.key] - ) - } - } - # TODO: implement the same context fail mode used in the project factory - node_config { - service_account = try( - each.value.node_config.service_account, - module.dd-composer-sa[each.key].email - ) - network = try( - var.vpc_self_links[each.value.node_config.network], - each.value.node_config.network, - null - ) - subnetwork = try( - var.subnet_self_links[each.value.node_config.network][each.value.node_config.subnetwork], - each.value.node_config.subnetwork, - null - ) - } - software_config { - image_version = "composer-3-airflow-2" - cloud_data_lineage_integration { - enabled = true - } - } - workloads_config { - dag_processor { - cpu = try(each.value.workloads_config.dag_processor.cpu, 0.5) - memory_gb = try(each.value.workloads_config.dag_processor.memory_gb, 2) - storage_gb = try(each.value.workloads_config.dag_processor.storage_gb, 1) - count = try(each.value.workloads_config.dag_processor.count, 1) - } - scheduler { - cpu = try(each.value.workloads_config.scheduler.cpu, 0.5) - memory_gb = try(each.value.workloads_config.scheduler.memory_gb, 2) - storage_gb = try(each.value.workloads_config.scheduler.storage_gb, 1) - count = try(each.value.workloads_config.scheduler.count, 1) - } - triggerer { - cpu = try(each.value.workloads_config.triggerer.cpu, 0.5) - memory_gb = try(each.value.workloads_config.triggerer.memory_gb, 2) - count = try(each.value.workloads_config.triggerer.count, 1) - } - web_server { - cpu = try(each.value.workloads_config.web_server.cpu, 0.5) - memory_gb = try(each.value.workloads_config.web_server.memory_gb, 2) - storage_gb = try(each.value.workloads_config.web_server.storage_gb, 1) - } - worker { - cpu = try(each.value.workloads_config.worker.cpu, 0.5) - memory_gb = try(each.value.workloads_config.worker.memory_gb, 2) - storage_gb = try(each.value.workloads_config.worker.storage_gb, 1) - min_count = try(each.value.workloads_config.worker.min_count, 1) - max_count = try(each.value.workloads_config.worker.max_count, 1) - } - } - } -} diff --git a/fast/stages/3-data-platform-dev/data-domains.tf b/fast/stages/3-data-platform-dev/data-domains.tf deleted file mode 100644 index 4d70747f9..000000000 --- a/fast/stages/3-data-platform-dev/data-domains.tf +++ /dev/null @@ -1,263 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -locals { - dd_services = { - for k, v in local.data_domains : k => distinct(concat( - v.project_config.services, - lookup(local.dd_composer, k, null) == null ? [] : [ - "composer.googleapis.com", - "storage.googleapis.com" - ] - )) - } -} - -module "dd-folders" { - source = "../../../modules/folder" - for_each = local.data_domains - parent = var.folder_ids[var.stage_config.name] - name = each.value.name - iam = { - for k, v in each.value.folder_config.iam : k => [ - for m in v : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - } - iam_bindings = { - for k, v in each.value.folder_config.iam_bindings : k => merge(v, { - members = [ - for m in v.members : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - condition = try(v.condition, null) == null ? null : { - title = v.condition.title - description = try(v.condition.description, null) - expression = templatestring(v.condition.expression, { - tag_values = local.tag_values - }) - } - }) - } - iam_bindings_additive = { - for k, v in each.value.folder_config.iam_bindings_additive : k => merge(v, { - member = lookup( - var.factories_config.context.iam_principals, v.member, v.member - ) - condition = try(v.condition, null) == null ? null : { - title = v.condition.title - description = try(v.condition.description, null) - expression = templatestring(v.condition.expression, { - tag_values = local.tag_values - }) - } - }) - } - iam_by_principals = { - for principal, roles_list in { - for k, v in each.value.folder_config.iam_by_principals : - lookup(var.factories_config.context.iam_principals, k, k) => v... - } : - principal => flatten(roles_list) - } -} - -module "dd-dp-folders" { - source = "../../../modules/folder" - for_each = local.data_domains - parent = module.dd-folders[each.key].id - name = "Data Products" - iam = try(each.value.deploy_config.composer, null) == null ? {} : { - "roles/iam.serviceAccountTokenCreator" = [ - module.dd-composer-sa[each.key].iam_email - ] - } -} - -module "dd-projects" { - source = "../../../modules/project" - for_each = local.data_domains - billing_account = var.billing_account.id - name = "${each.value.short_name}-shared-0" - parent = module.dd-folders[each.key].id - prefix = local.prefix - labels = { - data_domain = each.key - } - services = local.dd_services[each.key] - service_encryption_key_ids = merge( - lookup(local.dd_composer, each.key, null) == null ? {} : { - "composer.googleapis.com" = compact([ - try(local.dd_composer_keys[each.key], null) == null - ? null - : lookup( - local.kms_keys, - local.dd_composer_keys[each.key], - local.dd_composer_keys[each.key] - ) - ]) - }, - lookup(local.dd_automation_keys, each.key, null) == null ? {} : { - "storage.googleapis.com" = compact([ - try(local.dd_automation_keys[each.key], null) == null - ? null - : lookup( - local.kms_keys, - local.dd_automation_keys[each.key], - local.dd_automation_keys[each.key] - ) - ]) - }, - ) -} - -module "dd-projects-iam" { - source = "../../../modules/project" - for_each = local.data_domains - name = module.dd-projects[each.key].project_id - project_reuse = { - use_data_source = false - attributes = { - name = module.dd-projects[each.key].name - number = module.dd-projects[each.key].number - services_enabled = local.dd_services[each.key] - } - } - iam = { - for k, v in each.value.project_config.iam : k => [ - for m in v : try( - var.factories_config.context.iam_principals[m], - module.dd-automation-sa["${each.key}/${m}"].iam_email, - module.dd-service-accounts["${each.key}/${m}"].iam_email, - m - ) - ] - } - iam_bindings = { - for k, v in each.value.project_config.iam_bindings : k => merge(v, { - members = [ - for m in v.members : try( - var.factories_config.context.iam_principals[m], - module.dd-automation-sa["${each.key}/${m}"].iam_email, - module.dd-service-accounts["${each.key}/${m}"].iam_email, - m - ) - ] - condition = try(v.condition, null) == null ? null : { - title = v.condition.title - description = try(v.condition.description, null) - expression = templatestring(v.condition.expression, { - tag_values = local.tag_values - }) - } - }) - } - iam_bindings_additive = merge( - { - for k, v in each.value.project_config.iam_bindings_additive : k => merge(v, { - member = try( - var.factories_config.context.iam_principals[v.member], - module.dd-automation-sa["${each.key}/${v.member}"].iam_email, - module.dd-service-accounts["${each.key}/${v.member}"].iam_email, - v.member - ) - condition = try(v.condition, null) == null ? null : { - title = v.condition.title - description = try(v.condition.description, null) - expression = templatestring(v.condition.expression, { - tag_values = local.tag_values - }) - } - }) - }, - try(each.value.deploy_config.composer, null) == null ? {} : { - composer_worker = { - member = module.dd-composer-sa[each.key].iam_email - role = "roles/composer.worker" - } - } - ) - iam_by_principals = { - for principal, roles_list in { - for k, v in each.value.project_config.iam_by_principals : - lookup(var.factories_config.context.iam_principals, k, k) => v... - } : - principal => flatten(roles_list) - } - shared_vpc_service_config = ( - each.value.project_config.shared_vpc_service_config == null - ? null - : { - host_project = lookup( - var.host_project_ids, - each.value.project_config.shared_vpc_service_config.host_project, - each.value.project_config.shared_vpc_service_config.host_project - ) - network_users = [ - for m in try(each.value.project_config.shared_vpc_service_config.network_users, []) : - try( - var.factories_config.context.iam_principals[m], - module.dd-automation-sa["${each.key}/${m}"].iam_email, - module.dd-service-accounts["${each.key}/${m}"].iam_email, - m - ) - ] - service_agent_iam = try( - each.value.project_config.shared_vpc_service_config.service_agent_iam, - {} - ) - service_iam_grants = try( - each.value.project_config.shared_vpc_service_config.service_iam_grants, - [] - ) - } - ) -} - -module "dd-service-accounts" { - source = "../../../modules/iam-service-account" - for_each = { for v in local.dd_service_accounts : v.key => v } - project_id = module.dd-projects[each.value.dd].project_id - prefix = local.prefix - name = each.value.name - description = each.value.description - iam = { - for k, v in each.value.iam : k => [ - for m in v : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - } - iam_bindings = { - for k, v in each.value.iam_bindings : k => merge(v, { - members = [ - for m in v.members : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - }) - } - iam_bindings_additive = { - for k, v in each.value.iam_bindings_additive : k => merge(v, { - member = lookup( - var.factories_config.context.iam_principals, v.member, v.member - ) - }) - } - iam_storage_roles = each.value.iam_storage_roles -} diff --git a/fast/stages/3-data-platform-dev/data-products-automation.tf b/fast/stages/3-data-platform-dev/data-products-automation.tf deleted file mode 100644 index eadf61c18..000000000 --- a/fast/stages/3-data-platform-dev/data-products-automation.tf +++ /dev/null @@ -1,78 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -# tfdoc:file:description Data product automation resources. - -locals { - dp_automation_products = { - for k, v in local.data_products : - k => v if v.automation != null - } - dp_automation_products_keys = { - for k, v in local.dp_automation_products : k => try( - v.automation.encryption_key, - var.encryption_keys.storage[try( - v.automation.location, - var.location - )], - null - ) - } -} - -module "dp-automation-bucket" { - source = "../../../modules/gcs" - for_each = local.dp_automation_products - project_id = module.dd-projects[each.value.dd].project_id - prefix = local.prefix - name = "${each.value.short_name}-state" - location = try( - each.value.automation.location, - var.location - ) - encryption_key = local.dp_automation_products_keys[each.key] - iam = { - "roles/storage.admin" = [ - module.dp-automation-sa["${each.key}/rw"].iam_email - ] - "roles/storage.objectViewer" = concat( - [ - module.dp-automation-sa["${each.key}/ro"].iam_email - ], - [ - for m in each.value.automation.impersonation_principals : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - ) - } -} - -module "dp-automation-sa" { - source = "../../../modules/iam-service-account" - for_each = { for v in local.dp_automation_sa : v.key => v } - project_id = module.dp-projects[each.value.dp].project_id - prefix = each.value.prefix - name = each.value.name - description = each.value.description - iam = { - "roles/iam.serviceAccountTokenCreator" = [ - for m in each.value.impersonation_principals : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - } -} diff --git a/fast/stages/3-data-platform-dev/data-products-exposure.tf b/fast/stages/3-data-platform-dev/data-products-exposure.tf deleted file mode 100644 index f5c2e0956..000000000 --- a/fast/stages/3-data-platform-dev/data-products-exposure.tf +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -# tfdoc:file:description Data product exposure layer resources. - -module "dp-buckets" { - source = "../../../modules/gcs" - for_each = { - for v in local.dp_buckets : "${v.dp}/${v.key}" => v - } - project_id = module.dp-projects[each.value.dp].project_id - prefix = local.prefix - name = "${each.value.dps}-${each.value.short_name}-0" - location = each.value.location - encryption_key = ( - local.dp_bucket_keys[each.key] == null - ? null - : lookup( - local.kms_keys, - local.dp_bucket_keys[each.key], - local.dp_bucket_keys[each.key] - ) - ) - iam = { - for k, v in each.value.iam : k => [ - for m in v : try( - var.factories_config.context.iam_principals[m], - module.dp-automation-sa["${each.key}/${m}"].iam_email, - module.dp-service-accounts["${each.key}/${m}"].iam_email, - m - ) - ] - } - tag_bindings = { - exposure = ( - module.central-project.tag_values[var.exposure_config.tag_name].id - ) - } -} - -module "dp-datasets" { - source = "../../../modules/bigquery-dataset" - for_each = { - for v in local.dp_datasets : "${v.dp}/${v.key}" => v - } - project_id = module.dp-projects[each.value.dp].project_id - id = "${local.prefix_bq}_${each.value.dps}_${each.value.short_name}_0" - location = each.value.location - encryption_key = ( - local.dp_dataset_keys[each.key] == null - ? null - : lookup( - local.kms_keys, - local.dp_dataset_keys[each.key], - local.dp_dataset_keys[each.key] - ) - ) - iam = { - for k, v in each.value.iam : k => [ - for m in v : try( - var.factories_config.context.iam_principals[m], - module.dp-automation-sa["${each.key}/${m}"].iam_email, - module.dp-service-accounts["${each.key}/${m}"].iam_email, - m - ) - ] - } - tag_bindings = { - exposure = ( - module.central-project.tag_values[var.exposure_config.tag_name].id - ) - } -} diff --git a/fast/stages/3-data-platform-dev/data-products.tf b/fast/stages/3-data-platform-dev/data-products.tf deleted file mode 100644 index 1a2ce5c76..000000000 --- a/fast/stages/3-data-platform-dev/data-products.tf +++ /dev/null @@ -1,172 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -# tfdoc:file:description Data product project, service account and exposed resources. - -module "dp-projects" { - source = "../../../modules/project" - for_each = local.data_products - billing_account = var.billing_account.id - name = "${each.value.dds}-${each.value.short_name}-0" - parent = module.dd-dp-folders[each.value.dd].id - prefix = local.prefix - labels = { - data_domain = each.value.dd - data_product = replace(each.key, "/", "_") - } - services = each.value.services - service_encryption_key_ids = { - "bigquery.googleapis.com" = distinct([ - for k, v in local.dp_dataset_keys : - lookup(local.kms_keys, v, v) - if startswith(k, each.key) && v != null - ]) - "storage.googleapis.com" = distinct([ - for k, v in local.dp_bucket_keys : - lookup(local.kms_keys, v, v) - if startswith(k, each.key) && v != null - ]) - } -} - -module "dp-projects-iam" { - source = "../../../modules/project" - for_each = local.data_products - name = module.dp-projects[each.key].project_id - project_reuse = { - use_data_source = false - attributes = { - name = module.dp-projects[each.key].name - number = module.dp-projects[each.key].number - services_enabled = each.value.services - } - } - iam = { - for k, v in each.value.iam : k => [ - for m in v : try( - var.factories_config.context.iam_principals[m], - module.dp-automation-sa["${each.key}/${m}"].iam_email, - module.dp-service-accounts["${each.key}/${m}"].iam_email, - m - ) - ] - } - iam_bindings = { - for k, v in each.value.iam_bindings : k => merge(v, { - members = [ - for m in v.members : try( - var.factories_config.context.iam_principals[m], - module.dp-automation-sa["${each.key}/${m}"].iam_email, - module.dp-service-accounts["${each.key}/${m}"].iam_email, - m - ) - ] - condition = try(v.condition, null) == null ? null : { - title = v.condition.title - description = try(v.condition.description, null) - expression = templatestring(v.condition.expression, { - tag_values = local.tag_values - }) - } - }) - } - iam_bindings_additive = { - for k, v in each.value.iam_bindings_additive : k => merge(v, { - member = try( - var.factories_config.context.iam_principals[v.member], - module.dp-automation-sa["${each.key}/${v.member}"].iam_email, - module.dp-service-accounts["${each.key}/${v.member}"].iam_email, - v.member - ) - condition = try(v.condition, null) == null ? null : { - title = v.condition.title - description = try(v.condition.description, null) - expression = templatestring(v.condition.expression, { - tag_values = local.tag_values - }) - } - }) - } - iam_by_principals = { - for k, v in each.value.iam_by_principals : try( - var.factories_config.context.iam_principals[k], - module.dp-automation-sa["${each.key}/${k}"].iam_email, - module.dp-service-accounts["${each.key}/${k}"].iam_email, - k - ) => v - } - shared_vpc_service_config = ( - each.value.shared_vpc_service_config == null - ? null - : { - host_project = lookup( - var.host_project_ids, - each.value.shared_vpc_service_config.host_project, - each.value.shared_vpc_service_config.host_project - ) - network_users = [ - for m in try(each.value.shared_vpc_service_config.network_users, []) : - try( - var.factories_config.context.iam_principals[m], - module.dp-automation-sa["${each.key}/${m}"].iam_email, - module.dp-service-accounts["${each.key}/${m}"].iam_email, - m - ) - ] - service_agent_iam = try( - each.value.shared_vpc_service_config.service_agent_iam, - {} - ) - service_iam_grants = try( - each.value.shared_vpc_service_config.service_iam_grants, - [] - ) - } - ) -} - -module "dp-service-accounts" { - source = "../../../modules/iam-service-account" - for_each = { for v in local.dp_service_accounts : v.key => v } - project_id = module.dp-projects[each.value.dp].project_id - prefix = each.value.prefix - name = each.value.name - description = each.value.description - iam = { - for k, v in each.value.iam : k => [ - for m in v : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - } - iam_bindings = { - for k, v in each.value.iam_bindings : k => merge(v, { - members = [ - for m in v.members : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - }) - } - iam_bindings_additive = { - for k, v in each.value.iam_bindings_additive : k => merge(v, { - member = lookup( - var.factories_config.context.iam_principals, v.member, v.member - ) - }) - } - iam_storage_roles = each.value.iam_storage_roles -} diff --git a/fast/stages/3-data-platform-dev/data/aspect-types/test-0.yaml b/fast/stages/3-data-platform-dev/data/aspect-types/test-0.yaml deleted file mode 100644 index d81db5678..000000000 --- a/fast/stages/3-data-platform-dev/data/aspect-types/test-0.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# yaml-language-server: $schema=../../schemas/aspect-type.schema.json - -display_name: "Basic template" -metadata_template: | - { - "name": "tf-basic-template", - "type": "record", - "recordFields": [ - { - "name": "source", - "type": "string", - "annotations": { - "displayName": "Source", - "description": "Specifies the source of data." - }, - "index": 1, - "constraints": { - "required": true - } - }, - { - "name": "owner", - "type": "string", - "annotations": { - "displayName": "Owner", - "description": "Specifies the data owner." - }, - "index": 2, - "constraints": {} - } - ] - } diff --git a/fast/stages/3-data-platform-dev/data/data-domains/domain-0/_config.yaml b/fast/stages/3-data-platform-dev/data/data-domains/domain-0/_config.yaml deleted file mode 100644 index df5a0581b..000000000 --- a/fast/stages/3-data-platform-dev/data/data-domains/domain-0/_config.yaml +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# yaml-language-server: $schema=../../../schemas/data-domain.schema.json - -name: Domain 0 -short_name: d0 - -automation: - impersonation_principals: - - dp-product-a-0 - -deploy_config: - composer: - {} - # Uncomment for VPC Network Connectivity - # region defaults to var.location - # node_config: - # network: dev-net-spoke-0 - # subnetwork: europe-west1/dev-dataplatform - -project_config: - iam: - roles/owner: - - rw - roles/viewer: - - ro - roles/composer.environmentAndStorageObjectAdmin: - - dp-product-a-0 - iam_by_principals: - dp-platform: - - roles/composer.environmentAndStorageObjectUser - - roles/monitoring.viewer - - roles/logging.viewer - dp-product-a-0: - - roles/composer.environmentAndStorageObjectAdmin - - roles/monitoring.viewer - - roles/logging.viewer - dp-domain-a: - - roles/composer.environmentAndStorageObjectAdmin - - roles/monitoring.viewer - - roles/logging.viewer - - services: - - composer.googleapis.com - - datacatalog.googleapis.com - - dataplex.googleapis.com - - datalineage.googleapis.com - # Uncomment for shared VPC Network configuration - # shared_vpc_service_config: - # host_project: dev-net-spoke-0 - # service_agent_iam: - # roles/composer.sharedVpcAgent: - # - composer - -folder_config: - iam_bindings: - bigquery_metadata_viewer: - members: - - dp-platform - - dp-domain-a - - dp-product-a-0 - - data-consumer-bi - role: roles/bigquery.metadataViewer - condition: - title: exposure - description: Expose via secure tag. - expression: resource.matchTag('exposure', 'allow') - dataplex_catalog_viewer: - members: - - dp-platform - - dp-domain-a - - dp-product-a-0 - - data-consumer-bi - role: roles/dataplex.catalogViewer - condition: - title: exposure - description: Expose via secure tag. - expression: resource.matchTag('exposure', 'allow') diff --git a/fast/stages/3-data-platform-dev/data/data-domains/domain-0/product-0.yaml b/fast/stages/3-data-platform-dev/data/data-domains/domain-0/product-0.yaml deleted file mode 100644 index 94dcc436f..000000000 --- a/fast/stages/3-data-platform-dev/data/data-domains/domain-0/product-0.yaml +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# yaml-language-server: $schema=../../../schemas/data-product.schema.json - -short_name: p0 - -services: - - bigquery.googleapis.com - - cloudaicompanion.googleapis.com - - cloudresourcemanager.googleapis.com - - composer.googleapis.com - - datacatalog.googleapis.com - - dataplex.googleapis.com - - datalineage.googleapis.com - - storage.googleapis.com - -automation: - impersonation_principals: - - dp-product-a-0 - -exposure_layer: - bigquery: - datasets: - exposure: {} - iam: - "roles/bigquery.dataViewer": - - data-consumer-bi - storage: - buckets: - exposed: {} - iam: - "roles/storage.objectViewer": - - data-consumer-bi - "roles/storage.bucketViewer": - - data-consumer-bi - -iam_by_principals: - rw: - - roles/editor - ro: - - roles/viewer - dp-product-a-0: - - "roles/dataplex.catalogEditor" - - "roles/bigquery.dataOwner" - - "roles/bigquery.jobUser" - - "roles/datalineage.viewer" - - "roles/dataplex.dataScanCreator" - - "roles/logging.viewer" - - "roles/monitoring.viewer" - - "roles/serviceusage.serviceUsageViewer" - - "roles/storage.bucketViewer" - - "roles/storage.objectAdmin" - processing: - - "roles/bigquery.dataEditor" - - "roles/bigquery.jobUser" - - "roles/dataflow.admin" - - "roles/dataproc.editor" - - "roles/dataproc.worker" - - "roles/iam.serviceAccountUser" - - "roles/storage.bucketViewer" - - "roles/storage.objectAdmin" - -service_accounts: - processing: - description: Processing service account. diff --git a/fast/stages/3-data-platform-dev/demo/.gitignore b/fast/stages/3-data-platform-dev/demo/.gitignore deleted file mode 100644 index c6077b91e..000000000 --- a/fast/stages/3-data-platform-dev/demo/.gitignore +++ /dev/null @@ -1 +0,0 @@ -composer/variables.json diff --git a/fast/stages/3-data-platform-dev/demo/README.md b/fast/stages/3-data-platform-dev/demo/README.md deleted file mode 100644 index 297f6f785..000000000 --- a/fast/stages/3-data-platform-dev/demo/README.md +++ /dev/null @@ -1,190 +0,0 @@ -# Data Product Reference Example - -This folder contains a reference implementation of a Data Product showcasing the complete lifecycle from raw data ingestion to curated analytics-ready datasets. The example demonstrates how to create Data Products within the [Data Platform stage](../README.md) of Fabric FAST. It utilizes the automation service account and shared services created by the Data Platform stage. - -Our example consists of a batch ELT pipeline that processes and joins individual CSV data files from Cloud Storage to BigQuery using the publicly available theLook eCommerce dataset: - -## Components - -This reference implementation includes: - -- **Infrastructure as Code**: Terraform modules for deploying GCP resources -- **Data Schemas**: BigQuery table schemas in JSON format for structured data validation -- **Orchestration**: Cloud Composer (Apache Airflow) DAGs for automated pipeline execution -- **Sample Data**: Utility script to download theLook eCommerce reference tables - -## Getting Started - -### Prerequisites - -- Google Cloud SDK installed and configured -- Terraform >= 1.9.0 -- `jq` command-line JSON processor -- Access to the automation service account from the previous stage - -Ensure that you are authenticated with the `gcloud` CLI using the user that has the relevant access to -both the Domain Shared Resources as well as the Data Product GCP projects: - -```bash -gcloud auth login -gcloud auth application-default login -``` - -### 1. Infrastructure Setup - -**1. Configure Terraform Variables** - - ```bash - cp terraform.tfvars.sample terraform.tfvars - # Edit terraform.tfvars with your specific values - ``` - -**2. Deploy Infrastructure** - - ```bash - terraform init - terraform apply - ``` - -### 2. Data Pipeline Setup - -**1. Set Environment Variables** - - ```bash - export LANDING_BUCKET=$(terraform output -raw landing_gcs_bucket) - export COMPOSER_PROJECT_ID=$(terraform output -raw composer_project_id) - export COMPOSER_ENV_NAME=$(terraform output -raw composer_environment_name) - export LOCATION=$(terraform output -raw location) - ``` - -**2. Deploy Data Schemas** - - ```bash - gcloud storage cp -r data/schemas/* gs://$LANDING_BUCKET/schemas - ``` - -**3. Source Sample Data** - - ```bash - ./data/get_thelook_data.sh gs://$LANDING_BUCKET - ``` - -**4. Configure Composer Environment** - - Update Composer environment variables from `composer/variables.json`: - - > **Note**: This step may take several minutes to complete. - - ```bash - # Copy Airflow JSON variable file into Composer data folder - gcloud composer environments storage data import \ - --project $COMPOSER_PROJECT_ID \ - --environment=$COMPOSER_ENV_NAME \ - --location $LOCATION \ - --source="composer/variables.json" - - # Import Airflow variables - gcloud composer environments run $COMPOSER_ENV_NAME \ - --project $COMPOSER_PROJECT_ID \ - --location $LOCATION \ - variables \ - -- import /home/airflow/gcs/data/variables.json - ``` - -**5. Deploy Airflow DAGs** - - ```bash - gcloud composer environments storage dags import \ - --project=$COMPOSER_PROJECT_ID \ - --environment=$COMPOSER_ENV_NAME \ - --location=$LOCATION \ - --source="composer/DAG-dp0" - ``` - - > **Note**: It may take several minutes for the DAGs to be parsed and become available in Composer. - -### 3. Pipeline Execution - -**1. Verify DAG Import** - - Navigate to the Composer UI in the Domain Shared Resources project and confirm that the DAGs have been successfully imported. - -**2. Execute Pipeline** - - Trigger the DAGs in the following sequence (wait for each to complete): - - 1. **`gcs2bq_table_create`** - Creates BigQuery tables with proper schemas - 2. **`gcs2bq_table_elt`** - Executes the ELT pipeline to process data - -## Architecture Overview - -The data product implements a three-tier architecture: - -

- High level diagram. -

- -Curated data will be made accessible through authorized views within the `exposure` dataset. - -### Data Storage Layers - -- **Landing Zone** (`{prefix}-land-cs-0`): Raw CSV files stored in Cloud Storage -- **Raw Layer** (`{prefix}_lnd_bq_0`): Raw data loaded into BigQuery for processing -- **Curated Layer** (`{prefix}_cur_bq_0`): Processed, analytics-ready datasets - -## Troubleshooting - -### Common Issues - -- **DAG Import Failures**: Ensure the Composer environment is fully initialized before importing DAGs -- **Permission Errors**: Verify that the user that you authenticated with via the `gcloud` CLI has the relevant permissions -- **Variable Configuration**: Double-check that `terraform.tfvars` is properly configured - -### Useful Commands - -```bash -# Check Terraform outputs -terraform output - -# Verify bucket contents -gcloud storage ls gs://$LANDING_BUCKET --recursive - -# Check Composer environment status -gcloud composer environments describe $COMPOSER_ENV_NAME \ - --project $COMPOSER_PROJECT_ID \ - --location $LOCATION -``` - - - -## Files - -| name | description | modules | resources | -|---|---|---|---| -| [main.tf](./main.tf) | Module-level locals and resources. | bigquery-dataset · gcs | | -| [outputs.tf](./outputs.tf) | Module outputs. | | local_file | -| [variables.tf](./variables.tf) | Module variables. | | | - -## Variables - -| name | description | type | required | default | producer | -|---|---|:---:|:---:|:---:|:---:| -| [authorized_dataset_on_curated](variables.tf#L16) | Authorized Dataset. | string | ✓ | | | -| [composer_config](variables.tf#L21) | Composer environment configuration. | object({…}) | ✓ | | | -| [dp_processing_service_account](variables.tf#L30) | Service account for data processing via Composer impersonation. | string | ✓ | | | -| [impersonate_service_account](variables.tf#L47) | Service account to impersonate for Google Cloud providers. | string | ✓ | | | -| [prefix](variables.tf#L60) | Prefix used for resources that need unique names. Use a maximum of 9 chars for organizations, and 11 chars for tenants. | string | ✓ | | | -| [project_id](variables.tf#L69) | Project ID to deploy resources. | string | ✓ | | | -| [encryption_keys](variables.tf#L36) | Default encryption keys for services, in service => { region => key id } format. Overridable on a per-object basis. | object({…}) | | {} | | -| [location](variables.tf#L53) | Default location used when no location is specified. | string | | "europe-west8" | | - -## Outputs - -| name | description | sensitive | consumers | -|---|---|:---:|---| -| [composer_environment_name](outputs.tf#L17) | The name of the Composer environment. | | | -| [composer_project_id](outputs.tf#L22) | The project ID where the Composer environment is located. | | | -| [dp_processing_service_account](outputs.tf#L27) | Service account for data processing. | | | -| [landing_gcs_bucket](outputs.tf#L32) | The name of the landing GCS bucket. | | | -| [location](outputs.tf#L37) | The location/region used for resources. | | | - diff --git a/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_elt.py b/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_elt.py deleted file mode 100644 index c80663ace..000000000 --- a/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_elt.py +++ /dev/null @@ -1,308 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -BigQuery ELT Pipeline DAG - -This DAG implements a comprehensive customer purchases ELT pipeline that: -1. Loads data from GCS to BigQuery landing tables (users, orders, order_items, products) -2. Performs a 4-table join to create a comprehensive customer_purchases table -3. Creates an exposure view for analytics consumption - -Dependencies: Requires gcs2bq_table_create DAG to complete first -""" - -import datetime -import logging -import os - -from airflow import models -from airflow.decorators import task -from airflow.models import Variable -from airflow.operators import empty -from airflow.providers.google.cloud.hooks.bigquery import BigQueryHook -from airflow.providers.google.cloud.operators.bigquery import ( - BigQueryInsertJobOperator,) -from airflow.providers.google.cloud.sensors.bigquery import ( - BigQueryTableExistenceSensor,) -from airflow.providers.google.cloud.transfers.gcs_to_bigquery import ( - GCSToBigQueryOperator,) -from airflow.utils.task_group import TaskGroup - -# Configuration -LANDING_TABLES = ["users", "orders", "order_items", "products"] - -# Environment variables (set from composer/variables.json) -DP_PROJECT = Variable.get("DP_PROJECT") -LAND_BQ_DATASET = Variable.get("LAND_BQ_DATASET") -CURATED_BQ_DATASET = Variable.get("CURATED_BQ_DATASET") -LAND_GCS = Variable.get("LAND_GCS") -DP_PROCESSING_SERVICE_ACCOUNT = Variable.get("DP_PROCESSING_SERVICE_ACCOUNT") -LOCATION = Variable.get("LOCATION") - -# Validate required environment variables -required_vars = { - "DP_PROJECT": DP_PROJECT, - "LAND_BQ_DATASET": LAND_BQ_DATASET, - "CURATED_BQ_DATASET": CURATED_BQ_DATASET, - "LAND_GCS": LAND_GCS, - "DP_PROCESSING_SERVICE_ACCOUNT": DP_PROCESSING_SERVICE_ACCOUNT, - "LOCATION": LOCATION, -} - -missing_vars = [var for var, value in required_vars.items() if not value] -if missing_vars: - raise ValueError(f"Missing required environment variables: {missing_vars}") - -logger = logging.getLogger(__name__) - - -def create_gcs_to_bq_task(table_name: str) -> GCSToBigQueryOperator: - """ - Factory function to create GCS to BigQuery load tasks. - - Args: - table_name: Name of the table to load - - Returns: - GCSToBigQueryOperator instance - """ - return GCSToBigQueryOperator( - task_id=f"{table_name}_load", - bucket=LAND_GCS, - source_objects=f"data/{table_name}/{table_name}_*.csv", - destination_project_dataset_table= - f"{DP_PROJECT}.{LAND_BQ_DATASET}.{table_name}", - source_format="CSV", - create_disposition="CREATE_IF_NEEDED", - write_disposition="WRITE_TRUNCATE", - schema_object=f"schemas/landing/{table_name}.json", - schema_object_bucket=LAND_GCS, - autodetect=False, - max_bad_records=1, - project_id=DP_PROJECT, - impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT], - ) - - -def create_table_validation_task( - table_name: str, dataset_name: str, - task_prefix: str = "validate") -> BigQueryTableExistenceSensor: - """ - Factory function to create table validation tasks using sensor. - - Args: - table_name: Name of the table to validate - dataset_name: Name of the dataset - task_prefix: Prefix for task ID - - Returns: - BigQueryTableExistenceSensor instance - """ - return BigQueryTableExistenceSensor( - task_id=f"{task_prefix}_{table_name}_exists", - project_id=DP_PROJECT, - dataset_id=dataset_name, - table_id=table_name, - poke_interval=30, # Check every 30 seconds - timeout=600, # Timeout after 10 minutes - mode="reschedule", # Release worker slot between checks - impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT], - ) - - -# DAG Definition -yesterday = datetime.datetime.now() - datetime.timedelta(days=1) - -default_args = { - "owner": "data-platform-team", - "start_date": yesterday, - "depends_on_past": False, - "email": Variable.get("alert_email_list", default_var="").split(","), - "email_on_failure": True, - "email_on_retry": False, - "retries": 2, - "retry_delay": datetime.timedelta(minutes=5), - "sla": datetime.timedelta(hours=2), -} - -with models.DAG( - "gcs2bq_elt", - default_args=default_args, - schedule_interval=None, - catchup=False, - max_active_runs=1, - tags=["bigquery", "elt", "data-platform", "customer-purchases"], - doc_md=__doc__, - dagrun_timeout=datetime.timedelta(hours=3), -) as dag: - # Start and end markers - start = empty.EmptyOperator(task_id="start", trigger_rule="all_success") - end = empty.EmptyOperator(task_id="end", trigger_rule="all_done") - - # Validate that all required tables exist before starting data load - with TaskGroup( - "validate_prerequisites", - tooltip="Validate all landing tables exist before data load", - ) as prerequisites_group: - prerequisite_validations = [ - create_table_validation_task( - table_name=table, - dataset_name=LAND_BQ_DATASET, - task_prefix="validate_landing", - ) for table in LANDING_TABLES - ] - # Validate that the curated customer_purchases table exists from a previous run - validate_customer_purchases_prereq = create_table_validation_task( - table_name="customer_purchases", - dataset_name=CURATED_BQ_DATASET, - task_prefix="validate_curated", - ) - - # Load data from GCS to BigQuery landing tables - with TaskGroup("load_landing_data", - tooltip="Load all data files to landing tables") as load_group: - load_tasks = [ - create_gcs_to_bq_task(table_name=table) for table in LANDING_TABLES - ] - - # Create comprehensive customer purchases join - customer_purchases_join = BigQueryInsertJobOperator( - task_id="create_customer_purchases", - project_id=DP_PROJECT, - configuration={ - "jobType": "QUERY", - "query": { - "query": - f""" - SELECT - -- User information - u.id as user_id, - u.first_name, - u.last_name, - u.email, - u.age, - u.gender, - u.state, - u.street_address, - u.postal_code, - u.city, - u.country, - u.latitude, - u.longitude, - u.traffic_source, - u.created_at as user_created_at, - u.user_geom, - - -- Order information - o.order_id, - o.status as order_status, - o.created_at as order_created_at, - o.returned_at as order_returned_at, - o.shipped_at as order_shipped_at, - o.delivered_at as order_delivered_at, - o.num_of_item, - - -- Order item information - oi.id as order_item_id, - oi.product_id, - oi.inventory_item_id, - oi.status as order_item_status, - oi.sale_price, - oi.created_at as order_item_created_at, - oi.shipped_at as order_item_shipped_at, - oi.delivered_at as order_item_delivered_at, - oi.returned_at as order_item_returned_at, - - -- Product information - p.cost, - p.category, - p.name, - p.brand, - p.retail_price, - p.department, - p.sku, - p.distribution_center_id - - FROM `{DP_PROJECT}.{LAND_BQ_DATASET}.users` u - JOIN `{DP_PROJECT}.{LAND_BQ_DATASET}.orders` o - ON u.id = o.user_id - JOIN `{DP_PROJECT}.{LAND_BQ_DATASET}.order_items` oi - ON o.order_id = oi.order_id - JOIN `{DP_PROJECT}.{LAND_BQ_DATASET}.products` p - ON oi.product_id = p.id - """, - "destinationTable": { - "projectId": DP_PROJECT, - "datasetId": CURATED_BQ_DATASET, - "tableId": "customer_purchases", - }, - "writeDisposition": - "WRITE_TRUNCATE", - "useLegacySql": - False, - }, - }, - impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT], - ) - - @task(task_id="validate_customer_purchases_data") - def validate_customer_purchases_data_python(): - """ - Checks if the customer_purchases table has data using BigQueryHook - for robust cross-project execution. - """ - project_id = DP_PROJECT - dataset_id = CURATED_BQ_DATASET - table_id = "customer_purchases" - impersonation_account = DP_PROCESSING_SERVICE_ACCOUNT - - logging.info( - f"Executing data validation check on table: {project_id}.{dataset_id}.{table_id}" - ) - - # The hook will use the impersonation chain for all interactions - hook = BigQueryHook( - gcp_conn_id="google_cloud_default", # Assumes default connection - impersonation_chain=[impersonation_account], - location=LOCATION, - ) - - sql = f"SELECT COUNT(*) FROM `{project_id}.{dataset_id}.{table_id}`" - - # Use insert_job for cross-project execution with explicit project_id - job_config = {"query": {"query": sql, "useLegacySql": False}} - - job = hook.insert_job(configuration=job_config, project_id=project_id) - - # Extract results from the completed job - results = job.result() - records = [list(row) for row in results] - - if not records or not records[0] or records[0][0] == 0: - raise ValueError( - f"Data quality check failed: Table {project_id}.{dataset_id}.{table_id} is empty or has no rows." - ) - else: - row_count = records[0][0] - logging.info( - f"Data quality check passed: Table {project_id}.{dataset_id}.{table_id} contains {row_count} rows." - ) - - validate_customer_purchases_data = validate_customer_purchases_data_python() - - # Define dependencies - start >> prerequisites_group - prerequisites_group >> load_group - load_group >> customer_purchases_join - customer_purchases_join >> validate_customer_purchases_data >> end diff --git a/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_table_create.py b/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_table_create.py deleted file mode 100644 index 1d0aacf60..000000000 --- a/fast/stages/3-data-platform-dev/demo/composer/DAG-dp0/gcs2bq_table_create.py +++ /dev/null @@ -1,224 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -BigQuery Table Creation DAG - -This DAG creates BigQuery tables based on configuration stored in GCS. -It creates landing tables, curated tables, and an exposure view. -""" - -import datetime -import logging -import os - -from airflow import models -from airflow.models import Variable -from airflow.operators import empty -from airflow.providers.google.cloud.operators.bigquery import ( - BigQueryCreateTableOperator,) -from airflow.providers.google.cloud.sensors.bigquery import ( - BigQueryTableExistenceSensor,) -from airflow.utils.task_group import TaskGroup - -# Configuration -LANDING_TABLES = ["users", "orders", "order_items", "products"] -CURATED_TABLES = ["customer_purchases"] - -# Environment variables (set from Composer variables.json) -DP_PROJECT = Variable.get("DP_PROJECT") -LAND_BQ_DATASET = Variable.get("LAND_BQ_DATASET") -CURATED_BQ_DATASET = Variable.get("CURATED_BQ_DATASET") -EXPOSURE_BQ_DATASET = Variable.get("EXPOSURE_BQ_DATASET") -LAND_GCS = Variable.get("LAND_GCS") -DP_PROCESSING_SERVICE_ACCOUNT = Variable.get("DP_PROCESSING_SERVICE_ACCOUNT") - -# Validate required environment variables -required_vars = { - "DP_PROJECT": DP_PROJECT, - "LAND_BQ_DATASET": LAND_BQ_DATASET, - "CURATED_BQ_DATASET": CURATED_BQ_DATASET, - "EXPOSURE_BQ_DATASET": EXPOSURE_BQ_DATASET, - "LAND_GCS": LAND_GCS, - "DP_PROCESSING_SERVICE_ACCOUNT": DP_PROCESSING_SERVICE_ACCOUNT, -} - -missing_vars = [var for var, value in required_vars.items() if not value] -if missing_vars: - raise ValueError(f"Missing required environment variables: {missing_vars}") - -logger = logging.getLogger(__name__) - - -def create_bq_table_task(table_name: str, dataset_name: str, schema_path: str, - task_prefix: str = "") -> BigQueryCreateTableOperator: - """ - Factory function to create BigQuery table tasks. - - Args: - table_name: Name of the table to create - dataset_name: Name of the dataset - schema_path: Path to schema files in GCS - task_prefix: Prefix for task ID - - Returns: - BigQueryCreateTableOperator instance - """ - task_id = (f"{task_prefix}_{table_name}_create" - if task_prefix else f"{table_name}_create") - - return BigQueryCreateTableOperator( - task_id=task_id, - project_id=DP_PROJECT, - dataset_id=dataset_name, - table_id=table_name, - table_resource={}, - if_exists="log", - gcs_schema_object=f"gs://{LAND_GCS}/{schema_path}/{table_name}.json", - impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT], - ) - - -def create_table_validation_task( - table_name: str, dataset_name: str, - task_prefix: str = "validate") -> BigQueryTableExistenceSensor: - """ - Factory function to create table validation tasks using sensor. - - Args: - table_name: Name of the table to validate - dataset_name: Name of the dataset - task_prefix: Prefix for task ID - - Returns: - BigQueryTableExistenceSensor instance - """ - return BigQueryTableExistenceSensor( - task_id=f"{task_prefix}_{table_name}_exists", - project_id=DP_PROJECT, - dataset_id=dataset_name, - table_id=table_name, - poke_interval=30, # Check every 30 seconds - timeout=600, # Timeout after 10 minutes - mode="reschedule", # Release worker slot between checks - impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT], - ) - - -# DAG Definition -yesterday = datetime.datetime.now() - datetime.timedelta(days=1) - -default_args = { - "owner": "data-platform-team", - "start_date": yesterday, - "depends_on_past": False, - "email": Variable.get("alert_email_list", default_var="").split(","), - "email_on_failure": True, - "email_on_retry": False, - "retries": 2, - "retry_delay": datetime.timedelta(minutes=5), - "sla": datetime.timedelta(hours=1), -} - -with models.DAG( - "gcs2bq_table_create", - default_args=default_args, - schedule_interval=None, - catchup=False, - max_active_runs=1, - tags=["bigquery", "table-creation", "data-platform"], - doc_md=__doc__, - dagrun_timeout=datetime.timedelta(hours=2), -) as dag: - # Start and end markers - start = empty.EmptyOperator(task_id="start", trigger_rule="all_success") - end = empty.EmptyOperator(task_id="end", trigger_rule="all_done") - - # Create landing tables - with TaskGroup("create_landing_tables", - tooltip="Create all landing layer tables") as landing_group: - landing_tasks = [] - for table in LANDING_TABLES: - task = create_bq_table_task( - table_name=table, - dataset_name=LAND_BQ_DATASET, - schema_path="schemas/landing", - task_prefix="land", - ) - landing_tasks.append(task) - - # Create curated tables - with TaskGroup("create_curated_tables", - tooltip="Create all curated layer tables") as curated_group: - curated_tasks = [] - for table in CURATED_TABLES: - task = create_bq_table_task( - table_name=table, - dataset_name=CURATED_BQ_DATASET, - schema_path="schemas/curated", - task_prefix="curated", - ) - curated_tasks.append(task) - - # Validate all tables exist - with TaskGroup( - "validate_tables", - tooltip="Validate all tables were created") as validation_group: - # Create validation tasks for landing tables - landing_validations = [ - create_table_validation_task( - table_name=table, - dataset_name=LAND_BQ_DATASET, - task_prefix="validate_landing", - ) for table in LANDING_TABLES - ] - - # Create validation tasks for curated tables - curated_validations = [ - create_table_validation_task( - table_name=table, - dataset_name=CURATED_BQ_DATASET, - task_prefix="validate_curated", - ) for table in CURATED_TABLES - ] - - # Create exposure view - exposure_view = BigQueryCreateTableOperator( - task_id="exposure_view_create", - project_id=DP_PROJECT, - dataset_id=EXPOSURE_BQ_DATASET, - table_id="customer_purchases", - table_resource={ - "view": { - "query": - f"SELECT * FROM `{DP_PROJECT}.{CURATED_BQ_DATASET}.customer_purchases`", - "useLegacySql": - False, - }, - }, - if_exists="log", - impersonation_chain=[DP_PROCESSING_SERVICE_ACCOUNT], - ) - - # Validate exposure view exists - validate_exposure_view = create_table_validation_task( - table_name="customer_purchases", - dataset_name=EXPOSURE_BQ_DATASET, - task_prefix="validate_exposure", - ) - - # Define dependencies - start >> [landing_group, curated_group] - [landing_group, curated_group] >> validation_group - validation_group >> exposure_view - exposure_view >> validate_exposure_view >> end diff --git a/fast/stages/3-data-platform-dev/demo/composer/variables.tf.tpl b/fast/stages/3-data-platform-dev/demo/composer/variables.tf.tpl deleted file mode 100644 index 46ef77b90..000000000 --- a/fast/stages/3-data-platform-dev/demo/composer/variables.tf.tpl +++ /dev/null @@ -1,9 +0,0 @@ -{ - "DP_PROJECT": "${dp_project}", - "LOCATION": "${location}", - "DP_PROCESSING_SERVICE_ACCOUNT": "${dp_processing_service_account}", - "LAND_GCS": "${land_gcs}", - "LAND_BQ_DATASET": "${land_bq_dataset}", - "CURATED_BQ_DATASET": "${curated_bq_dataset}", - "EXPOSURE_BQ_DATASET": "${exposure_bq_dataset}" -} diff --git a/fast/stages/3-data-platform-dev/demo/data/get_thelook_data.sh b/fast/stages/3-data-platform-dev/demo/data/get_thelook_data.sh deleted file mode 100755 index b16c83c49..000000000 --- a/fast/stages/3-data-platform-dev/demo/data/get_thelook_data.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/bash -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Script to export data from BigQuery public dataset to GCS bucket -# Usage: ./export_bigquery_data.sh gs://your-bucket-name - -set -e # Exit on error - -# Check if argument is provided -if [ $# -eq 0 ]; then - echo "Error: No GCS bucket provided" - echo "Usage: $0 gs://your-bucket-name" - exit 1 -fi - -GCS_BUCKET=$1 - -# Validate that the bucket starts with gs:// -if [[ ! "$GCS_BUCKET" =~ ^gs:// ]]; then - echo "Error: GCS bucket must start with gs://" - echo "Example: gs://your-bucket-name" - exit 1 -fi - -# Check if bq command is available -if ! command -v bq &>/dev/null; then - echo "Error: bq command not found. Please install Google Cloud SDK." - exit 1 -fi - -# Remove trailing slash if present -GCS_BUCKET=${GCS_BUCKET%/} - -# Source project and dataset -SOURCE_PROJECT="bigquery-public-data" -SOURCE_DATASET="thelook_ecommerce" - -# Tables to export -TABLES=("users" "orders" "order_items" "products") - -echo "Starting export from ${SOURCE_PROJECT}.${SOURCE_DATASET} to $GCS_BUCKET" -echo "================================================" - -# Export each table -for table in "${TABLES[@]}"; do - echo -n "Exporting $table..." - - # Create destination path - DESTINATION="${GCS_BUCKET}/data/${table}/${table}_*.csv" - - # Execute bq extract command - if bq extract \ - --destination_format CSV \ - --field_delimiter=',' \ - --print_header=true \ - "bigquery-public-data:thelook_ecommerce.${table}" \ - "${DESTINATION}"; then - echo " SUCCESS" - else - echo " FAILED" - echo "Error: Failed to export $table" - exit 1 - fi -done - -echo "================================================" -echo "All tables exported successfully!" -echo "" -echo "Exported tables:" -for table in "${TABLES[@]}"; do - echo " - ${GCS_BUCKET}/data/${table}/" -done diff --git a/fast/stages/3-data-platform-dev/demo/data/schemas/curated/customer_purchases.json b/fast/stages/3-data-platform-dev/demo/data/schemas/curated/customer_purchases.json deleted file mode 100644 index bcaa28a6c..000000000 --- a/fast/stages/3-data-platform-dev/demo/data/schemas/curated/customer_purchases.json +++ /dev/null @@ -1,201 +0,0 @@ -[ - { - "mode": "NULLABLE", - "name": "user_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "first_name", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "last_name", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "email", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "age", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "gender", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "state", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "street_address", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "postal_code", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "city", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "country", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "latitude", - "type": "FLOAT" - }, - { - "mode": "NULLABLE", - "name": "longitude", - "type": "FLOAT" - }, - { - "mode": "NULLABLE", - "name": "traffic_source", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "user_created_at", - "type": "TIMESTAMP" - }, - { - "name": "user_geom", - "type": "GEOGRAPHY" - }, - { - "mode": "NULLABLE", - "name": "order_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "order_status", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "order_created_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "order_returned_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "order_shipped_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "order_delivered_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "num_of_item", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "order_item_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "product_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "inventory_item_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "order_item_status", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "order_item_created_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "order_item_shipped_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "order_item_delivered_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "order_item_returned_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "sale_price", - "type": "FLOAT" - }, - { - "mode": "NULLABLE", - "name": "cost", - "type": "FLOAT" - }, - { - "mode": "NULLABLE", - "name": "category", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "brand", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "retail_price", - "type": "FLOAT" - }, - { - "mode": "NULLABLE", - "name": "department", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "sku", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "distribution_center_id", - "type": "INTEGER" - } -] diff --git a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/order_items.json b/fast/stages/3-data-platform-dev/demo/data/schemas/landing/order_items.json deleted file mode 100644 index 9b0d6829a..000000000 --- a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/order_items.json +++ /dev/null @@ -1,57 +0,0 @@ -[ - { - "mode": "NULLABLE", - "name": "id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "order_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "user_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "product_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "inventory_item_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "status", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "created_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "shipped_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "delivered_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "returned_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "sale_price", - "type": "FLOAT" - } -] diff --git a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/orders.json b/fast/stages/3-data-platform-dev/demo/data/schemas/landing/orders.json deleted file mode 100644 index bb872ca55..000000000 --- a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/orders.json +++ /dev/null @@ -1,47 +0,0 @@ -[ - { - "mode": "NULLABLE", - "name": "order_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "user_id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "status", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "gender", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "created_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "returned_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "shipped_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "delivered_at", - "type": "TIMESTAMP" - }, - { - "mode": "NULLABLE", - "name": "num_of_item", - "type": "INTEGER" - } -] diff --git a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/products.json b/fast/stages/3-data-platform-dev/demo/data/schemas/landing/products.json deleted file mode 100644 index da9182209..000000000 --- a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/products.json +++ /dev/null @@ -1,47 +0,0 @@ -[ - { - "mode": "NULLABLE", - "name": "id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "cost", - "type": "FLOAT" - }, - { - "mode": "NULLABLE", - "name": "category", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "name", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "brand", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "retail_price", - "type": "FLOAT" - }, - { - "mode": "NULLABLE", - "name": "department", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "sku", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "distribution_center_id", - "type": "INTEGER" - } -] diff --git a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/users.json b/fast/stages/3-data-platform-dev/demo/data/schemas/landing/users.json deleted file mode 100644 index d4a86c28d..000000000 --- a/fast/stages/3-data-platform-dev/demo/data/schemas/landing/users.json +++ /dev/null @@ -1,81 +0,0 @@ -[ - { - "mode": "NULLABLE", - "name": "id", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "first_name", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "last_name", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "email", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "age", - "type": "INTEGER" - }, - { - "mode": "NULLABLE", - "name": "gender", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "state", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "street_address", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "postal_code", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "city", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "country", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "latitude", - "type": "FLOAT" - }, - { - "mode": "NULLABLE", - "name": "longitude", - "type": "FLOAT" - }, - { - "mode": "NULLABLE", - "name": "traffic_source", - "type": "STRING" - }, - { - "mode": "NULLABLE", - "name": "created_at", - "type": "TIMESTAMP" - }, - { - "name": "user_geom", - "type": "GEOGRAPHY" - } -] diff --git a/fast/stages/3-data-platform-dev/demo/diagram.png b/fast/stages/3-data-platform-dev/demo/diagram.png deleted file mode 100644 index 5f4921df3..000000000 Binary files a/fast/stages/3-data-platform-dev/demo/diagram.png and /dev/null differ diff --git a/fast/stages/3-data-platform-dev/demo/main.tf b/fast/stages/3-data-platform-dev/demo/main.tf deleted file mode 100644 index 79bd45918..000000000 --- a/fast/stages/3-data-platform-dev/demo/main.tf +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -module "land-cs-0" { - source = "../../../../modules/gcs" - project_id = var.project_id - prefix = var.prefix - name = "lnd-cs-0" - encryption_key = try(var.encryption_keys[var.location].storage, null) - location = var.location - storage_class = "REGIONAL" - force_destroy = true -} - -module "land-bq-0" { - source = "../../../../modules/bigquery-dataset" - project_id = var.project_id - id = "${replace(var.prefix, "-", "_")}_lnd_bq_0" - encryption_key = try(var.encryption_keys[var.location].bigquery, null) - location = var.location -} - -module "cur-bq-0" { - source = "../../../../modules/bigquery-dataset" - project_id = var.project_id - id = "${replace(var.prefix, "-", "_")}_cur_bq_0" - encryption_key = try(var.encryption_keys[var.location].bigquery, null) - location = var.location - authorized_datasets = [ - { - project_id = var.project_id, - dataset_id = var.authorized_dataset_on_curated - } - ] -} diff --git a/fast/stages/3-data-platform-dev/demo/outputs.tf b/fast/stages/3-data-platform-dev/demo/outputs.tf deleted file mode 100644 index e40a5e5e1..000000000 --- a/fast/stages/3-data-platform-dev/demo/outputs.tf +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -output "composer_environment_name" { - description = "The name of the Composer environment." - value = var.composer_config.environment_name -} - -output "composer_project_id" { - description = "The project ID where the Composer environment is located." - value = var.composer_config.project_id -} - -output "dp_processing_service_account" { - description = "Service account for data processing." - value = var.dp_processing_service_account -} - -output "landing_gcs_bucket" { - description = "The name of the landing GCS bucket." - value = module.land-cs-0.name -} - -output "location" { - description = "The location/region used for resources." - value = var.location -} - -resource "local_file" "composer_variables" { - content = templatefile("composer/variables.tf.tpl", { - dp_project = var.project_id - location = var.location - dp_processing_service_account = var.dp_processing_service_account - land_gcs = module.land-cs-0.bucket.name - land_bq_dataset = module.land-bq-0.dataset_id - curated_bq_dataset = module.cur-bq-0.dataset_id - exposure_bq_dataset = var.authorized_dataset_on_curated - }) - filename = "${path.module}/composer/variables.json" -} diff --git a/fast/stages/3-data-platform-dev/demo/providers.tf b/fast/stages/3-data-platform-dev/demo/providers.tf deleted file mode 100644 index dd56a321b..000000000 --- a/fast/stages/3-data-platform-dev/demo/providers.tf +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -provider "google" { - impersonate_service_account = var.impersonate_service_account -} -provider "google-beta" { - impersonate_service_account = var.impersonate_service_account -} - -# end provider.tf for data-product diff --git a/fast/stages/3-data-platform-dev/demo/terraform.tfvars.sample b/fast/stages/3-data-platform-dev/demo/terraform.tfvars.sample deleted file mode 100644 index f405bf3e1..000000000 --- a/fast/stages/3-data-platform-dev/demo/terraform.tfvars.sample +++ /dev/null @@ -1,10 +0,0 @@ -authorized_dataset_on_curated = "" -composer_config = { - environment_name = " yamldecode(file("${local._dd_path}/${f}")) - } - _dp = flatten([ - for k, v in local.data_domains : [ - for f in try(fileset("${local._dd_path}/${k}", "**/*.yaml"), []) : merge( - yamldecode(file("${local._dd_path}/${k}/${f}")), - { - dd = k - dds = v.short_name - key = trimsuffix(basename(f), ".yaml") - } - ) if !endswith(f, "_config.yaml") - ] - ]) - data_domains = { - for k, v in local._dd_raw : k => { - name = v.name - short_name = lookup(v, "short_name", reverse(split("/", k))[0]) - automation = try(v.automation, null) - deploy_config = { - composer = try(v.deploy_config.composer, null) - } - folder_config = { - iam = try(v.folder_config.iam, {}) - iam_bindings = try(v.folder_config.iam_bindings, {}) - iam_bindings_additive = try(v.folder_config.iam_bindings_additive, {}) - iam_by_principals = try(v.folder_config.iam_by_principals, {}) - } - project_config = { - name = try(v.project_config.name, k) - deploy = merge( - { composer = null }, try(v.project_config.deploy, {}) - ) - services = try(v.project_config.services, []) - iam = try(v.project_config.iam, {}) - iam_bindings = try(v.project_config.iam_bindings, {}) - iam_bindings_additive = try(v.project_config.iam_bindings_additive, {}) - iam_by_principals = try(v.project_config.iam_by_principals, {}) - shared_vpc_service_config = try( - v.project_config.shared_vpc_service_config, null - ) - } - service_accounts = lookup(v, "service_accounts", {}) - } - } - data_products = { - for v in local._dp : "${v.dd}/${v.key}" => merge(v, { - short_name = lookup(v, "short_name", v.key) - services = distinct(concat( - lookup(v, "services", []), - try(v.exposed_resources.storage_buckets, null) == null ? [] : [ - "storage.googleapis.com" - ], - try(v.exposed_resources.bigquery_datasets, null) == null ? [] : [ - "bigquery.googleapis.com" - ] - )) - automation = try(v.automation, null) - exposure_layer = { - bigquery = { - datasets = try(v.exposure_layer.bigquery.datasets, {}) - iam = try(v.exposure_layer.bigquery.iam, {}) - } - storage = { - buckets = try(v.exposure_layer.storage.buckets, {}) - iam = try(v.exposure_layer.storage.iam, {}) - } - } - iam = lookup(v, "iam", {}) - iam_bindings = lookup(v, "iam_bindings", {}) - iam_bindings_additive = lookup(v, "iam_bindings_additive", {}) - iam_by_principals = lookup(v, "iam_by_principals", {}) - service_accounts = lookup(v, "service_accounts", {}) - shared_vpc_service_config = try( - v.shared_vpc_service_config, null - ) - }) - } - dd_automation_sa = flatten([ - for k, v in local.data_domains : [ - for n in ["ro", "rw"] : { - dd = k - key = "${k}/${n}" - name = "iac-${n}" - prefix = v.short_name - description = "Automation for ${v.short_name} (${n}.)" - impersonation_principals = lookup( - v.automation, "impersonation_principals", [] - ) - } - ] if v.automation != null - ]) - dd_service_accounts = flatten([ - for k, v in local.data_domains : [ - for sk, sv in v.service_accounts : { - dd = k - key = "${k}/${sk}" - name = lookup(sv, "name", "${v.short_name}-${sk}") - description = lookup(v, "description", null) - iam = lookup(sv, "iam", {}) - iam_bindings = lookup(sv, "iam_bindings", {}) - iam_bindings_additive = lookup(sv, "iam_bindings_additive", {}) - iam_storage_roles = lookup(sv, "iam_storage_roles", {}) - } - ] - ]) - dp_automation_sa = flatten([ - for k, v in local.data_products : [ - for n in ["ro", "rw"] : { - dp = k - key = "${k}/${n}" - name = "iac-${n}" - prefix = "${v.dds}-${v.short_name}" - description = "Automation for ${k} (${n}.)" - impersonation_principals = lookup( - v.automation, "impersonation_principals", [] - ) - } - ] if v.automation != null - ]) - dp_bucket_keys = { - for v in local.dp_buckets : "${v.dp}/${v.key}" => ( - v.encryption_key != null - ? v.encryption_key - : try(var.encryption_keys.storage[v.location], null) - ) - } - dp_buckets = flatten([ - for k, v in local.data_products : [ - for bk, bv in v.exposure_layer.storage.buckets : { - dp = k - dps = "${v.dds}-${v.short_name}" - iam = v.exposure_layer.storage.iam - key = bk - encryption_key = lookup(bv, "encryption_key", null) - short_name = lookup(bv, "short_name", bk) - location = lookup(bv, "location", var.location) - storage_class = lookup(bv, "storage_class", null) - } - ] - ]) - dp_dataset_keys = { - for v in local.dp_datasets : "${v.dp}/${v.key}" => ( - v.encryption_key != null - ? v.encryption_key - : try(var.encryption_keys.bigquery[v.location], null) - ) - } - dp_datasets = flatten([ - for k, v in local.data_products : [ - for dk, dv in v.exposure_layer.bigquery.datasets : { - dp = k - dps = replace("${v.dds}-${v.short_name}", "-", "_") - encryption_key = lookup(dv, "encryption_key", null) - iam = v.exposure_layer.bigquery.iam - key = dk - short_name = replace(lookup(dv, "short_name", dk), "-", "_") - location = lookup(dv, "location", var.location) - } - ] - ]) - dp_service_accounts = flatten([ - for k, v in local.data_products : [ - for sk, sv in v.service_accounts : { - dp = k - key = "${k}/${sk}" - name = lookup(sv, "name", sk) - prefix = "${v.dds}-${v.short_name}" - description = lookup(v, "description", null) - iam = lookup(sv, "iam", {}) - iam_bindings = lookup(sv, "iam_bindings", {}) - iam_bindings_additive = lookup(sv, "iam_bindings_additive", {}) - iam_storage_roles = lookup(sv, "iam_storage_roles", {}) - } - ] - ]) -} diff --git a/fast/stages/3-data-platform-dev/fast_version.txt b/fast/stages/3-data-platform-dev/fast_version.txt deleted file mode 100644 index 6a2f91bcd..000000000 --- a/fast/stages/3-data-platform-dev/fast_version.txt +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# FAST release: v54.3.0 \ No newline at end of file diff --git a/fast/stages/3-data-platform-dev/main.tf b/fast/stages/3-data-platform-dev/main.tf deleted file mode 100644 index 53c472f29..000000000 --- a/fast/stages/3-data-platform-dev/main.tf +++ /dev/null @@ -1,134 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -# tfdoc:file:description Locals and project-level resources. - -locals { - environment = var.environments[var.stage_config.environment] - exp_tag = { - key = split("/", var.exposure_config.tag_name)[0] - value = split("/", var.exposure_config.tag_name)[1] - } - kms_keys = merge( - var.kms_keys, var.factories_config.context.kms_keys - ) - location = lookup(var.regions, var.location, var.location) - prefix = ( - "${var.prefix}-${local.environment.short_name}-${var.stage_config.short_name}" - ) - prefix_bq = replace(local.prefix, "-", "_") - tag_values = merge( - var.tag_values, - var.factories_config.context.tag_values, - { for k, v in module.central-project.tag_values : k => v.id } - ) -} - -module "central-project" { - source = "../../../modules/project" - billing_account = var.billing_account.id - name = var.central_project_config.short_name - parent = var.folder_ids[var.stage_config.name] - prefix = local.prefix - iam = { - for k, v in var.central_project_config.iam : k => [ - for m in v : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - } - iam_bindings = { - for k, v in var.central_project_config.iam_bindings : k => merge(v, { - members = [ - for m in v.members : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - }) - } - iam_bindings_additive = { - for k, v in var.central_project_config.iam_bindings_additive : k => merge(v, { - member = lookup( - var.factories_config.context.iam_principals, v.member, v.member - ) - }) - } - iam_by_principals = { - for principal, roles_list in { - for k, v in var.central_project_config.iam_by_principals : - lookup(var.factories_config.context.iam_principals, k, k) => v... - } : - principal => flatten(roles_list) - } - labels = { - environment = var.stage_config.environment - } - services = var.central_project_config.services - tags = merge(var.secure_tags, { - (local.exp_tag.key) = { - description = try( - var.secure_tags[local.exp_tag.key].description, - "Managed by the Terraform project module." - ) - iam = { - for k, v in try(var.secure_tags[local.exp_tag.key].iam, {}) : - k => [ - for m in v : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - } - values = merge( - try(var.secure_tags[local.exp_tag.key].values, {}), - { - (local.exp_tag.value) = { - description = try( - var.secure_tags[local.exp_tag.key].values[local.exp_tag.value].description, - "Managed by the Terraform project module." - ) - iam = { - for k, v in try(var.secure_tags[local.exp_tag.key].values[local.exp_tag.value].iam, {}) : - k => [ - for m in v : lookup( - var.factories_config.context.iam_principals, m, m - ) - ] - } - } - } - ) - } - }) -} - -module "central-aspect-types" { - source = "../../../modules/dataplex-aspect-types" - project_id = module.central-project.project_id - location = local.location - factories_config = { - aspect_types = var.factories_config.aspect_types - } - aspect_types = var.aspect_types -} - -# TODO: Migrate to new Policy Tag on BQ. -module "central-policy-tags" { - source = "../../../modules/data-catalog-policy-tag" - project_id = module.central-project.project_id - name = "tags" - location = var.location - tags = var.central_project_config.policy_tags -} diff --git a/fast/stages/3-data-platform-dev/outputs.tf b/fast/stages/3-data-platform-dev/outputs.tf deleted file mode 100644 index 74e7d983c..000000000 --- a/fast/stages/3-data-platform-dev/outputs.tf +++ /dev/null @@ -1,224 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# tfdoc:file:description Stage outputs. - -locals { - central_project = { - id = module.central-project.project_id - number = module.central-project.number - } - dd_attrs = { - for k, v in local.data_domains : k => { - automation = v.automation == null ? null : { - bucket = module.dd-automation-bucket[k].name - service_accounts = { - ro = module.dd-automation-sa["${k}/ro"].email - rw = module.dd-automation-sa["${k}/rw"].email - } - } - deployments = { - composer = lookup(local.dd_composer, k, null) == null ? null : { - airflow_uri = try( - google_composer_environment.default[k].config[0].airflow_uri, null - ) - dag_gcs_prefix = try( - google_composer_environment.default[k].config[0].dag_gcs_prefix, null - ) - } - } - data_products = { - for pk in lookup(local.dp_by_dd, k, []) : - split("/", pk)[1] => { - for kk, kv in local.dp_attrs[pk] : kk => kv if kk != "automation" - } - } - folder_ids = { - domain = module.dd-folders[k].id - products = module.dd-dp-folders[k].id - } - project = { - id = module.dd-projects[k].project_id - number = module.dd-projects[k].number - } - service_accounts = { - for sk in keys(v.service_accounts) : - sk => module.dd-service-accounts["${k}/${sk}"].email - } - } - } - dp_attrs = { - for k, v in local.data_products : k => { - automation = local.data_products[k].automation == null ? null : { - bucket = module.dp-automation-bucket[k].name - service_accounts = { - ro = module.dp-automation-sa["${k}/ro"].email - rw = module.dp-automation-sa["${k}/rw"].email - } - } - exposure = { - bigquery = { - for vv in lookup(local.exp_datasets_by_dp, k, []) : - split("/", vv)[2] => module.dp-datasets[vv].id - } - storage = { - for vv in lookup(local.exp_buckets_by_dp, k, []) : - split("/", vv)[2] => module.dp-buckets[vv].id - } - } - project = { - id = module.dp-projects[k].project_id - number = module.dp-projects[k].number - } - service_accounts = { - for sk in keys(v.service_accounts) : - sk => module.dp-service-accounts["${k}/${sk}"].email - } - } - } - dp_by_dd = { - for k, v in local.data_products : - v.dd => k... - } - exp_buckets_by_dp = { - for k, v in module.dp-buckets : - join("/", slice(split("/", k), 0, 2)) => k... - } - exp_datasets_by_dp = { - for k, v in module.dp-datasets : - join("/", slice(split("/", k), 0, 2)) => k... - } - files_prefix = "3-${var.stage_config.name}" - providers = merge( - { - for k, v in local.dd_attrs : - "${k}-providers.tf" => templatefile("templates/providers.tf.tpl", { - backend_extra = null - bucket = v.automation.bucket - name = k - sa = v.automation.service_accounts.rw - }) if v.automation != null - }, - { - for k, v in local.dd_attrs : - "${k}-r-providers.tf" => templatefile("templates/providers.tf.tpl", { - backend_extra = null - bucket = v.automation.bucket - name = k - sa = v.automation.service_accounts.ro - }) if v.automation != null - }, - { - for k, v in local.dp_attrs : - "${replace(k, "/", "-")}-providers.tf" => templatefile("templates/providers.tf.tpl", { - backend_extra = null - bucket = v.automation.bucket - name = k - sa = v.automation.service_accounts.rw - }) if v.automation != null - }, - { - for k, v in local.dp_attrs : - "${replace(k, "/", "-")}-r-providers.tf" => templatefile("templates/providers.tf.tpl", { - backend_extra = null - bucket = v.automation.bucket - name = k - sa = v.automation.service_accounts.ro - }) if v.automation != null - } - ) - tfvars = { - aspect_types = module.central-aspect-types.ids - central_project = local.central_project - policy_tags = module.central-policy-tags.tags - secure_tags = { - for k, v in module.central-project.tag_values : k => v.id - } - } - tfvars_dd = { - for k, v in local.data_domains : k => merge(local.tfvars, { - for kk, vv in local.dd_attrs[k] : - kk => vv if kk != "automation" - }) - } -} - -# tfvars files for data domains and products - -resource "local_file" "tfvars" { - for_each = var.outputs_location == null ? {} : local.tfvars_dd - file_permission = "0644" - filename = "${try(pathexpand(var.outputs_location), "")}/tfvars/${local.files_prefix}/${each.key}.auto.tfvars.json" - content = jsonencode(each.value) -} - -resource "google_storage_bucket_object" "tfvars" { - for_each = local.tfvars_dd - bucket = var.automation.outputs_bucket - name = "tfvars/${local.files_prefix}/${each.key}.auto.tfvars.json" - content = jsonencode(each.value) - source_md5hash = md5(jsonencode(each.value)) -} - -# provider files for data domains and products - -resource "local_file" "providers" { - for_each = var.outputs_location == null ? {} : local.providers - file_permission = "0644" - filename = "${try(pathexpand(var.outputs_location), "")}/providers/${local.files_prefix}/${each.key}" - content = each.value -} - -resource "google_storage_bucket_object" "providers" { - for_each = local.providers - bucket = var.automation.outputs_bucket - name = "providers/${local.files_prefix}/${each.key}" - content = each.value - source_md5hash = md5(each.value) -} - -resource "google_storage_bucket_object" "version" { - count = fileexists("fast_version.txt") ? 1 : 0 - bucket = var.automation.outputs_bucket - name = "versions/3-${var.stage_config.name}-version.txt" - source = "fast_version.txt" - source_md5hash = filemd5("fast_version.txt") -} - -# regular outputs - -output "aspect_types" { - description = "Aspect types defined in central project." - value = local.tfvars.aspect_types -} - -output "central_project" { - description = "Central project attributes." - value = local.central_project -} - -output "data_domains" { - description = "Data domain attributes." - value = local.dd_attrs -} - -output "policy_tags" { - description = "Policy tags defined in central project." - value = local.tfvars.policy_tags -} - -output "secure_tags" { - description = "Secure tags defined in central project." - value = local.tfvars.secure_tags -} diff --git a/fast/stages/3-data-platform-dev/schemas/aspect-type.schema.json b/fast/stages/3-data-platform-dev/schemas/aspect-type.schema.json deleted file mode 120000 index 3813d7b7a..000000000 --- a/fast/stages/3-data-platform-dev/schemas/aspect-type.schema.json +++ /dev/null @@ -1 +0,0 @@ -../../../../modules/dataplex-aspect-types/schemas/aspect-type.schema.json \ No newline at end of file diff --git a/fast/stages/3-data-platform-dev/schemas/aspect-type.schema.md b/fast/stages/3-data-platform-dev/schemas/aspect-type.schema.md deleted file mode 100644 index f3da76a0d..000000000 --- a/fast/stages/3-data-platform-dev/schemas/aspect-type.schema.md +++ /dev/null @@ -1,50 +0,0 @@ -# Dataplex Aspect Type - - - -## Properties - -*additional properties: false* - -- **description**: *string* -- **display_name**: *string* -- **labels**: *object* -- **metadata_template**: *string* -- **iam**: *reference([iam](#refs-iam))* -- **iam_bindings**: *reference([iam_bindings](#refs-iam_bindings))* -- **iam_bindings_additive**: *reference([iam_bindings_additive](#refs-iam_bindings_additive))* - -## Definitions - -- **iam**: *object* -
*additional properties: false* - - **`^(?:roles/|\$custom_roles:)`**: *array* - - items: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:||\$iam_principals:[a-z0-9_-]+)* -- **iam_bindings**: *object* -
*additional properties: false* - - **`^[a-z0-9_-]+$`**: *object* -
*additional properties: false* - - **members**: *array* - - items: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|\$iam_principals:[a-z0-9_-]+)* - - **role**: *string* -
*pattern: ^(?:roles/|\$custom_roles:)* - - **condition**: *object* -
*additional properties: false* - - ⁺**expression**: *string* - - ⁺**title**: *string* - - **description**: *string* -- **iam_bindings_additive**: *object* -
*additional properties: false* - - **`^[a-z0-9_-]+$`**: *object* -
*additional properties: false* - - **member**: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|\$iam_principals:[a-z0-9_-]+)* - - **role**: *string* -
*pattern: ^(?:roles/|\$custom_roles:)* - - **condition**: *object* -
*additional properties: false* - - ⁺**expression**: *string* - - ⁺**title**: *string* - - **description**: *string* diff --git a/fast/stages/3-data-platform-dev/schemas/data-domain.schema.json b/fast/stages/3-data-platform-dev/schemas/data-domain.schema.json deleted file mode 100644 index c1d5632a5..000000000 --- a/fast/stages/3-data-platform-dev/schemas/data-domain.schema.json +++ /dev/null @@ -1,377 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Data Domain", - "type": "object", - "additionalProperties": false, - "required": [ - "name" - ], - "properties": { - "name": { - "type": "string" - }, - "short_name": { - "type": "string" - }, - "automation": { - "type": "object", - "additionalProperties": false, - "properties": { - "location": { - "type": "string" - }, - "impersonation_principals": { - "type": "array", - "items": { - "type": "string", - "pattern": "^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])" - } - } - } - }, - "deploy_config": { - "type": "object", - "additionalProperties": false, - "properties": { - "composer": { - "type": "object", - "additionalProperties": false, - "required": [], - "properties": { - "encryption_key": { - "type": "string" - }, - "environment_size": { - "type": "string", - "enum": [ - "ENVIRONMENT_SIZE_SMALL", - "ENVIRONMENT_SIZE_MEDIUM", - "ENVIRONMENT_SIZE_LARGE" - ], - "default": "ENVIRONMENT_SIZE_SMALL" - }, - "node_config": { - "type": "object", - "additionalProperties": false, - "required": [ - "network", - "subnetwork" - ], - "properties": { - "service_account": { - "type": "string" - }, - "network": { - "type": "string" - }, - "subnetwork": { - "type": "string" - } - } - }, - "private_builds": { - "type": "boolean", - "default": true - }, - "private_environment": { - "type": "boolean", - "default": true - }, - "region": { - "type": "string" - }, - "workloads_config": { - "type": "object", - "additionalProperties": false, - "properties": { - "dag_processor": { - "$ref": "#/$defs/composer_workload" - }, - "triggerer": { - "$ref": "#/$defs/composer_workload" - }, - "scheduler": { - "$ref": "#/$defs/composer_workload" - }, - "web_server": { - "$ref": "#/$defs/composer_workload" - }, - "worker": { - "type": "object", - "additionalProperties": false, - "properties": { - "cpu": { - "type": "number" - }, - "memory_gb": { - "type": "number" - }, - "storage_gb": { - "type": "number" - }, - "min_count": { - "type": "integer" - }, - "max_count": { - "type": "integer" - } - } - } - } - } - } - } - } - }, - "folder_config": { - "type": "object", - "additionalProperties": false, - "properties": { - "iam": { - "$ref": "#/$defs/iam" - }, - "iam_bindings": { - "$ref": "#/$defs/iam_bindings" - }, - "iam_bindings_additive": { - "$ref": "#/$defs/iam_bindings_additive" - }, - "iam_by_principals": { - "$ref": "#/$defs/iam_by_principals" - } - } - }, - "project_config": { - "type": "object", - "additionalProperties": false, - "properties": { - "name": { - "type": "string" - }, - "iam": { - "$ref": "#/$defs/iam" - }, - "iam_bindings": { - "$ref": "#/$defs/iam_bindings" - }, - "iam_bindings_additive": { - "$ref": "#/$defs/iam_bindings_additive" - }, - "iam_by_principals": { - "$ref": "#/$defs/iam_by_principals" - }, - "services": { - "type": "array", - "items": { - "type": "string" - } - }, - "shared_vpc_service_config": { - "type": "object", - "additionalProperties": false, - "required": [ - "host_project" - ], - "properties": { - "host_project": { - "type": "string" - }, - "network_users": { - "type": "array", - "items": { - "type": "string" - } - }, - "service_agent_iam": { - "type": "object", - "additionalItems": false, - "patternProperties": { - "^[a-z0-9_-]+$": { - "type": "array", - "items": { - "type": "string" - } - } - } - }, - "service_iam_grants": { - "type": "array", - "items": { - "type": "string" - } - } - } - } - } - }, - "service_accounts": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-z0-9-]+$": { - "type": "object", - "additionalProperties": false, - "properties": { - "description": { - "type": "string" - }, - "iam": { - "$ref": "#/$defs/iam" - }, - "iam_bindings": { - "$ref": "#/$defs/iam_bindings" - }, - "iam_bindings_additive": { - "$ref": "#/$defs/iam_bindings_additive" - }, - "iam_storage_roles": { - "$ref": "#/$defs/iam_storage_roles" - }, - "name": { - "type": "string" - } - } - } - } - } - }, - "$defs": { - "composer_workload": { - "type": "object", - "additionalProperties": false, - "properties": { - "cpu": { - "type": "number" - }, - "memory_gb": { - "type": "number" - }, - "storage_gb": { - "type": "number" - }, - "count": { - "type": "integer" - } - } - }, - "iam": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^(?:roles/|[a-z_]+)": { - "type": "array", - "items": { - "type": "string", - "pattern": "^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])" - } - } - } - }, - "iam_bindings": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-z0-9_-]+$": { - "type": "object", - "additionalProperties": false, - "properties": { - "members": { - "type": "array", - "items": { - "type": "string", - "pattern": "^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])" - } - }, - "role": { - "type": "string", - "pattern": "^(?:roles/|[a-z])" - }, - "condition": { - "type": "object", - "additionalProperties": false, - "required": [ - "expression", - "title" - ], - "properties": { - "expression": { - "type": "string" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - } - } - } - } - } - } - }, - "iam_bindings_additive": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-z0-9_-]+$": { - "type": "object", - "additionalProperties": false, - "properties": { - "member": { - "type": "string", - "pattern": "^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])" - }, - "role": { - "type": "string", - "pattern": "^(?:roles/|[a-z])" - }, - "condition": { - "type": "object", - "additionalProperties": false, - "required": [ - "expression", - "title" - ], - "properties": { - "expression": { - "type": "string" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - } - } - } - } - } - } - }, - "iam_by_principals": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-z]+[a-z0-9-]+$": { - "type": "array", - "items": { - "type": "string", - "pattern": "^(?:roles/|[a-z_]+)" - } - } - } - }, - "iam_storage_roles": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-z0-9-]+$": { - "type": "array", - "items": { - "type": "string" - } - } - } - } - } -} \ No newline at end of file diff --git a/fast/stages/3-data-platform-dev/schemas/data-domain.schema.md b/fast/stages/3-data-platform-dev/schemas/data-domain.schema.md deleted file mode 100644 index cf401daa3..000000000 --- a/fast/stages/3-data-platform-dev/schemas/data-domain.schema.md +++ /dev/null @@ -1,129 +0,0 @@ -# Data Domain - - - -## Properties - -*additional properties: false* - -- ⁺**name**: *string* -- **short_name**: *string* -- **automation**: *object* -
*additional properties: false* - - **location**: *string* - - **impersonation_principals**: *array* - - items: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])* -- **deploy_config**: *object* -
*additional properties: false* - - **composer**: *object* -
*additional properties: false* - - **encryption_key**: *string* - - **environment_size**: *string* -
*default: ENVIRONMENT_SIZE_SMALL*, *enum: ['ENVIRONMENT_SIZE_SMALL', 'ENVIRONMENT_SIZE_MEDIUM', 'ENVIRONMENT_SIZE_LARGE']* - - **node_config**: *object* -
*additional properties: false* - - **service_account**: *string* - - ⁺**network**: *string* - - ⁺**subnetwork**: *string* - - **private_builds**: *boolean* - - **private_environment**: *boolean* - - **region**: *string* - - **workloads_config**: *object* -
*additional properties: false* - - **dag_processor**: *reference([composer_workload](#refs-composer_workload))* - - **triggerer**: *reference([composer_workload](#refs-composer_workload))* - - **scheduler**: *reference([composer_workload](#refs-composer_workload))* - - **web_server**: *reference([composer_workload](#refs-composer_workload))* - - **worker**: *object* -
*additional properties: false* - - **cpu**: *number* - - **memory_gb**: *number* - - **storage_gb**: *number* - - **min_count**: *integer* - - **max_count**: *integer* -- **folder_config**: *object* -
*additional properties: false* - - **iam**: *reference([iam](#refs-iam))* - - **iam_bindings**: *reference([iam_bindings](#refs-iam_bindings))* - - **iam_bindings_additive**: *reference([iam_bindings_additive](#refs-iam_bindings_additive))* - - **iam_by_principals**: *reference([iam_by_principals](#refs-iam_by_principals))* -- **project_config**: *object* -
*additional properties: false* - - **name**: *string* - - **iam**: *reference([iam](#refs-iam))* - - **iam_bindings**: *reference([iam_bindings](#refs-iam_bindings))* - - **iam_bindings_additive**: *reference([iam_bindings_additive](#refs-iam_bindings_additive))* - - **iam_by_principals**: *reference([iam_by_principals](#refs-iam_by_principals))* - - **services**: *array* - - items: *string* - - **shared_vpc_service_config**: *object* -
*additional properties: false* - - ⁺**host_project**: *string* - - **network_users**: *array* - - items: *string* - - **service_agent_iam**: *object* - - **`^[a-z0-9_-]+$`**: *array* - - items: *string* - - **service_iam_grants**: *array* - - items: *string* -- **service_accounts**: *object* -
*additional properties: false* - - **`^[a-z0-9-]+$`**: *object* -
*additional properties: false* - - **description**: *string* - - **iam**: *reference([iam](#refs-iam))* - - **iam_bindings**: *reference([iam_bindings](#refs-iam_bindings))* - - **iam_bindings_additive**: *reference([iam_bindings_additive](#refs-iam_bindings_additive))* - - **iam_storage_roles**: *reference([iam_storage_roles](#refs-iam_storage_roles))* - - **name**: *string* - -## Definitions - -- **composer_workload**: *object* -
*additional properties: false* - - **cpu**: *number* - - **memory_gb**: *number* - - **storage_gb**: *number* - - **count**: *integer* -- **iam**: *object* -
*additional properties: false* - - **`^(?:roles/|[a-z_]+)`**: *array* - - items: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])* -- **iam_bindings**: *object* -
*additional properties: false* - - **`^[a-z0-9_-]+$`**: *object* -
*additional properties: false* - - **members**: *array* - - items: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])* - - **role**: *string* -
*pattern: ^(?:roles/|[a-z])* - - **condition**: *object* -
*additional properties: false* - - ⁺**expression**: *string* - - ⁺**title**: *string* - - **description**: *string* -- **iam_bindings_additive**: *object* -
*additional properties: false* - - **`^[a-z0-9_-]+$`**: *object* -
*additional properties: false* - - **member**: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])* - - **role**: *string* -
*pattern: ^(?:roles/|[a-z])* - - **condition**: *object* -
*additional properties: false* - - ⁺**expression**: *string* - - ⁺**title**: *string* - - **description**: *string* -- **iam_by_principals**: *object* -
*additional properties: false* - - **`^[a-z]+[a-z0-9-]+$`**: *array* - - items: *string* -
*pattern: ^(?:roles/|[a-z_]+)* -- **iam_storage_roles**: *object* -
*additional properties: false* - - **`^[a-z0-9-]+$`**: *array* - - items: *string* diff --git a/fast/stages/3-data-platform-dev/schemas/data-product.schema.json b/fast/stages/3-data-platform-dev/schemas/data-product.schema.json deleted file mode 100644 index 49bec5e1e..000000000 --- a/fast/stages/3-data-platform-dev/schemas/data-product.schema.json +++ /dev/null @@ -1,293 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "title": "Data Product", - "type": "object", - "additionalProperties": false, - "properties": { - "automation": { - "type": "object", - "additionalProperties": false, - "properties": { - "location": { - "type": "string" - }, - "impersonation_principals": { - "type": "array", - "items": { - "type": "string", - "pattern": "^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])" - } - } - } - }, - "exposure_layer": { - "type": "object", - "additionalProperties": false, - "properties": { - "bigquery": { - "type": "object", - "additionalProperties": false, - "properties": { - "datasets": { - "type": "object", - "patternProperties": { - "^[a-z][a-z0-9_]+$": { - "type": "object", - "additionalProperties": false, - "properties": { - "encryption_key": { - "type": "string" - }, - "location": { - "type": "string" - } - } - } - } - }, - "iam": { - "$ref": "#/$defs/iam" - } - } - }, - "storage": { - "type": "object", - "additionalProperties": false, - "properties": { - "buckets": { - "type": "object", - "patternProperties": { - "^[a-z][a-z0-9-]+$": { - "type": "object", - "additionalProperties": false, - "properties": { - "encryption_key": { - "type": "string" - }, - "location": { - "type": "string" - }, - "storage_class": { - "type": "string" - } - } - } - } - }, - "iam": { - "$ref": "#/$defs/iam" - } - } - } - } - }, - "iam": { - "$ref": "#/$defs/iam" - }, - "iam_bindings": { - "$ref": "#/$defs/iam_bindings" - }, - "iam_bindings_additive": { - "$ref": "#/$defs/iam_bindings_additive" - }, - "iam_by_principals": { - "$ref": "#/$defs/iam_by_principals" - }, - "service_accounts": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-z0-9-]+$": { - "type": "object", - "additionalProperties": false, - "properties": { - "description": { - "type": "string" - }, - "iam": { - "$ref": "#/$defs/iam" - }, - "iam_bindings": { - "$ref": "#/$defs/iam_bindings" - }, - "iam_bindings_additive": { - "$ref": "#/$defs/iam_bindings_additive" - }, - "iam_storage_roles": { - "$ref": "#/$defs/iam_storage_roles" - }, - "name": { - "type": "string" - } - } - } - } - }, - "services": { - "type": "array", - "items": { - "type": "string" - } - }, - "shared_vpc_service_config": { - "type": "object", - "additionalProperties": false, - "required": [ - "host_project" - ], - "properties": { - "host_project": { - "type": "string" - }, - "network_users": { - "type": "array", - "items": { - "type": "string" - } - }, - "service_agent_iam": { - "type": "object", - "additionalItems": false, - "patternProperties": { - "^[a-z0-9_-]+$": { - "type": "array", - "items": { - "type": "string" - } - } - } - }, - "service_iam_grants": { - "type": "array", - "items": { - "type": "string" - } - } - } - }, - "short_name": { - "type": "string" - } - }, - "$defs": { - "iam": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^(?:roles/|[a-z_]+)": { - "type": "array", - "items": { - "type": "string", - "pattern": "^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])" - } - } - } - }, - "iam_bindings": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-z0-9_-]+$": { - "type": "object", - "additionalProperties": false, - "properties": { - "members": { - "type": "array", - "items": { - "type": "string", - "pattern": "^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])" - } - }, - "role": { - "type": "string", - "pattern": "^(?:roles/|[a-z])" - }, - "condition": { - "type": "object", - "additionalProperties": false, - "required": [ - "expression", - "title" - ], - "properties": { - "expression": { - "type": "string" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - } - } - } - } - } - } - }, - "iam_bindings_additive": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-z0-9_-]+$": { - "type": "object", - "additionalProperties": false, - "properties": { - "member": { - "type": "string", - "pattern": "^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])" - }, - "role": { - "type": "string", - "pattern": "^(?:roles/|[a-z])" - }, - "condition": { - "type": "object", - "additionalProperties": false, - "required": [ - "expression", - "title" - ], - "properties": { - "expression": { - "type": "string" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - } - } - } - } - } - } - }, - "iam_by_principals": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-z]+[a-z0-9-]+$": { - "type": "array", - "items": { - "type": "string", - "pattern": "^(?:roles/|[a-z_]+)" - } - } - } - }, - "iam_storage_roles": { - "type": "object", - "additionalProperties": false, - "patternProperties": { - "^[a-z0-9-]+$": { - "type": "array", - "items": { - "type": "string" - } - } - } - } - } -} \ No newline at end of file diff --git a/fast/stages/3-data-platform-dev/schemas/data-product.schema.md b/fast/stages/3-data-platform-dev/schemas/data-product.schema.md deleted file mode 100644 index 32f09f86e..000000000 --- a/fast/stages/3-data-platform-dev/schemas/data-product.schema.md +++ /dev/null @@ -1,104 +0,0 @@ -# Data Product - - - -## Properties - -*additional properties: false* - -- **automation**: *object* -
*additional properties: false* - - **location**: *string* - - **impersonation_principals**: *array* - - items: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])* -- **exposure_layer**: *object* -
*additional properties: false* - - **bigquery**: *object* -
*additional properties: false* - - **datasets**: *object* - - **`^[a-z][a-z0-9_]+$`**: *object* -
*additional properties: false* - - **encryption_key**: *string* - - **location**: *string* - - **iam**: *reference([iam](#refs-iam))* - - **storage**: *object* -
*additional properties: false* - - **buckets**: *object* - - **`^[a-z][a-z0-9-]+$`**: *object* -
*additional properties: false* - - **encryption_key**: *string* - - **location**: *string* - - **storage_class**: *string* - - **iam**: *reference([iam](#refs-iam))* -- **iam**: *reference([iam](#refs-iam))* -- **iam_bindings**: *reference([iam_bindings](#refs-iam_bindings))* -- **iam_bindings_additive**: *reference([iam_bindings_additive](#refs-iam_bindings_additive))* -- **iam_by_principals**: *reference([iam_by_principals](#refs-iam_by_principals))* -- **service_accounts**: *object* -
*additional properties: false* - - **`^[a-z0-9-]+$`**: *object* -
*additional properties: false* - - **description**: *string* - - **iam**: *reference([iam](#refs-iam))* - - **iam_bindings**: *reference([iam_bindings](#refs-iam_bindings))* - - **iam_bindings_additive**: *reference([iam_bindings_additive](#refs-iam_bindings_additive))* - - **iam_storage_roles**: *reference([iam_storage_roles](#refs-iam_storage_roles))* - - **name**: *string* -- **services**: *array* - - items: *string* -- **shared_vpc_service_config**: *object* -
*additional properties: false* - - ⁺**host_project**: *string* - - **network_users**: *array* - - items: *string* - - **service_agent_iam**: *object* - - **`^[a-z0-9_-]+$`**: *array* - - items: *string* - - **service_iam_grants**: *array* - - items: *string* -- **short_name**: *string* - -## Definitions - -- **iam**: *object* -
*additional properties: false* - - **`^(?:roles/|[a-z_]+)`**: *array* - - items: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])* -- **iam_bindings**: *object* -
*additional properties: false* - - **`^[a-z0-9_-]+$`**: *object* -
*additional properties: false* - - **members**: *array* - - items: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])* - - **role**: *string* -
*pattern: ^(?:roles/|[a-z])* - - **condition**: *object* -
*additional properties: false* - - ⁺**expression**: *string* - - ⁺**title**: *string* - - **description**: *string* -- **iam_bindings_additive**: *object* -
*additional properties: false* - - **`^[a-z0-9_-]+$`**: *object* -
*additional properties: false* - - **member**: *string* -
*pattern: ^(?:domain:|group:|serviceAccount:|user:|principal:|principalSet:|[a-z])* - - **role**: *string* -
*pattern: ^(?:roles/|[a-z])* - - **condition**: *object* -
*additional properties: false* - - ⁺**expression**: *string* - - ⁺**title**: *string* - - **description**: *string* -- **iam_by_principals**: *object* -
*additional properties: false* - - **`^[a-z]+[a-z0-9-]+$`**: *array* - - items: *string* -
*pattern: ^(?:roles/|[a-z_]+)* -- **iam_storage_roles**: *object* -
*additional properties: false* - - **`^[a-z0-9-]+$`**: *array* - - items: *string* diff --git a/fast/stages/3-data-platform-dev/templates/providers.tf.tpl b/fast/stages/3-data-platform-dev/templates/providers.tf.tpl deleted file mode 100644 index d1c224c5c..000000000 --- a/fast/stages/3-data-platform-dev/templates/providers.tf.tpl +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Copyright 2022 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -terraform { - backend "gcs" { - bucket = "${bucket}" - impersonate_service_account = "${sa}" - %{~ if backend_extra != null ~} - ${indent(4, backend_extra)} - %{~ endif ~} - } -} -provider "google" { - impersonate_service_account = "${sa}" -} -provider "google-beta" { - impersonate_service_account = "${sa}" -} - -# end provider.tf for ${name} diff --git a/fast/stages/3-data-platform-dev/terraform.tfvars.sample b/fast/stages/3-data-platform-dev/terraform.tfvars.sample deleted file mode 100644 index 5e217a3d7..000000000 --- a/fast/stages/3-data-platform-dev/terraform.tfvars.sample +++ /dev/null @@ -1,33 +0,0 @@ -location = "europe-west1" - -central_project_config = { - iam_by_principals = { - dp-platform = [ - "roles/datacatalog.categoryAdmin", - "roles/dataplex.catalogAdmin", - "roles/dataplex.aspectTypeOwner", - "roles/resourcemanager.tagViewer" - ] - dp-domain-a = [ - "roles/datacatalog.viewer", - "roles/dataplex.aspectTypeUser" - ] - dp-product-a-0 = [ - "roles/datacatalog.viewer", - "roles/dataplex.aspectTypeUser" - ] - } -} - -factories_config = { - context = { - iam_principals = { - dp-platform = "group:dp-platform-0@example.com" - dp-domain-a = "group:dp-domain-a@example.com" - dp-product-a-0 = "group:dp-product-a-0@example.com" - data-consumer-bi = "group:data-consumer-bi@example.com" - } - } - aspect_types = "data/aspect-types" - data_domains = "data/data-domains" -} diff --git a/fast/stages/3-data-platform-dev/variables-fast.tf b/fast/stages/3-data-platform-dev/variables-fast.tf deleted file mode 100644 index 82b096909..000000000 --- a/fast/stages/3-data-platform-dev/variables-fast.tf +++ /dev/null @@ -1,109 +0,0 @@ -/** - * Copyright 2024 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -variable "automation" { - # tfdoc:variable:source 0-org-setup - description = "Automation resources created by the bootstrap stage." - type = object({ - outputs_bucket = string - }) - nullable = false -} - -variable "billing_account" { - # tfdoc:variable:source 0-org-setup - description = "Billing account id. If billing account is not part of the same org set `is_org_level` to false." - type = object({ - id = string - }) -} - -variable "environments" { - # tfdoc:variable:source 0-org-setup - description = "Environment names." - type = object({ - dev = object({ - name = string - short_name = string - }) - }) -} - -variable "folder_ids" { - # tfdoc:variable:source 0-org-setup - description = "Folder name => id mappings." - type = map(string) - nullable = false - default = {} -} - -variable "host_project_ids" { - # tfdoc:variable:source 2-networking - description = "Shared VPC host project name => id mappings." - type = map(string) - nullable = false - default = {} -} - -variable "kms_keys" { - # tfdoc:variable:source 2-security - description = "KMS key ids." - type = map(string) - nullable = false - default = {} -} - -variable "prefix" { - # tfdoc:variable:source 0-org-setup - description = "Prefix used for resources that need unique names. Use a maximum of 9 chars for organizations, and 11 chars for tenants." - type = string - validation { - condition = try(length(var.prefix), 0) < 12 - error_message = "Use a maximum of 9 chars for organizations, and 11 chars for tenants." - } -} - -variable "regions" { - # tfdoc:variable:source 2-networking - description = "Region mappings." - type = map(string) - nullable = false - default = {} -} - -variable "subnet_self_links" { - # tfdoc:variable:source 2-networking - description = "Subnet VPC name => { name => self link } mappings." - type = map(map(string)) - nullable = false - default = {} -} - -variable "tag_values" { - # tfdoc:variable:source 0-org-setup - description = "FAST-managed resource manager tag values." - type = map(string) - nullable = false - default = {} -} - -variable "vpc_self_links" { - # tfdoc:variable:source 2-networking - description = "Shared VPC name => self link mappings." - type = map(string) - nullable = false - default = {} -} diff --git a/fast/stages/3-data-platform-dev/variables.tf b/fast/stages/3-data-platform-dev/variables.tf deleted file mode 100644 index 7d5301666..000000000 --- a/fast/stages/3-data-platform-dev/variables.tf +++ /dev/null @@ -1,179 +0,0 @@ -/** - * Copyright 2025 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -variable "aspect_types" { - description = "Aspect templates. Merged with those defined via the factory." - type = map(object({ - description = optional(string) - display_name = optional(string) - labels = optional(map(string), {}) - metadata_template = optional(string) - iam = optional(map(list(string)), {}) - iam_bindings = optional(map(object({ - members = list(string) - role = string - condition = optional(object({ - expression = string - title = string - description = optional(string) - })) - })), {}) - iam_bindings_additive = optional(map(object({ - member = string - role = string - condition = optional(object({ - expression = string - title = string - description = optional(string) - })) - })), {}) - })) - nullable = false - default = {} -} - -variable "central_project_config" { - description = "Configuration for the top-level central project." - type = object({ - iam = optional(map(list(string)), {}) - iam_bindings = optional(map(object({ - members = list(string) - role = string - condition = optional(object({ - expression = string - title = string - description = optional(string) - })) - })), {}) - iam_bindings_additive = optional(map(object({ - member = string - role = string - condition = optional(object({ - expression = string - title = string - description = optional(string) - })) - })), {}) - iam_by_principals = optional(map(list(string)), {}) - services = optional(list(string), [ - # TODO: define default list of services - "bigquery.googleapis.com", - "datacatalog.googleapis.com", - "logging.googleapis.com", - "monitoring.googleapis.com", - "storage.googleapis.com", - ]) - short_name = optional(string, "central-0") - policy_tags = optional(map(any), { - low = {} - medium = {} - high = {} - }) - }) - nullable = false - default = {} -} - -variable "encryption_keys" { - description = "Default encryption keys for services, in service => { region => key id } format. Overridable on a per-object basis." - type = object({ - bigquery = optional(map(string), {}) - composer = optional(map(string), {}) - storage = optional(map(string), {}) - }) - nullable = false - default = {} -} - -variable "exposure_config" { - description = "Data exposure configuration." - type = object({ - tag_name = optional(string, "exposure/allow") - }) - nullable = false - default = {} - validation { - condition = ( - var.exposure_config.tag_name != null && - length(regexall( - "^[a-z][a-z0-9-]+/[a-z][a-z0-9]+", var.exposure_config.tag_name - )) > 0 - ) - error_message = "Invalid tag name, required format is 'tag_key/tag_value'." - } -} - -variable "factories_config" { - description = "Configuration for the resource factories." - type = object({ - aspect_types = optional(string, "data/aspect-types") - data_domains = optional(string, "data/data-domains") - context = optional(object({ - iam_principals = optional(map(string), {}) - kms_keys = optional(map(string), {}) - tag_values = optional(map(string), {}) - }), {}) - }) - nullable = false - default = {} -} - -variable "location" { - description = "Default location used when no location is specified." - type = string - nullable = false - default = "europe-west1" -} - -variable "outputs_location" { - description = "Enable writing provider, tfvars and CI/CD workflow files to local filesystem. Leave null to disable." - type = string - default = null -} - -variable "secure_tags" { - description = "Resource manager tags created in the central project." - type = map(object({ - description = optional(string, "Managed by the Terraform project module.") - iam = optional(map(list(string)), {}) - values = optional(map(object({ - description = optional(string, "Managed by the Terraform project module.") - iam = optional(map(list(string)), {}) - id = optional(string) - })), {}) - })) - nullable = false - default = {} - validation { - condition = alltrue([ - for k, v in var.secure_tags : v != null - ]) - error_message = "Use an empty map instead of null as value." - } -} - -variable "stage_config" { - description = "Stage configuration used to find environment and resource ids, and to generate names." - type = object({ - environment = string - name = string - short_name = optional(string, "dp") - }) - default = { - environment = "dev" - name = "data-platform-dev" - } -} diff --git a/fast/stages/3-secops-dev/README.md b/fast/stages/3-secops-dev/README.md index e03fb028b..c48bb9f2b 100644 --- a/fast/stages/3-secops-dev/README.md +++ b/fast/stages/3-secops-dev/README.md @@ -40,9 +40,9 @@ If this stage is deployed within a FAST-based GCP organization, we recommend exe This stage needs specific automation resources, and permissions granted on those that allow control of selective IAM roles on specific networking and security resources. -Network permissions are needed to associate data domain or product projects to Shared VPC hosts and grant network permissions to data platform managed service accounts. They are mandatory when deploying Composer. +Network permissions are needed to associate projects to Shared VPC hosts and grant network Network User role on managed service accounts. -Security permissions are only needed when using CMEK encryption, to grant the relevant IAM roles to data platform service agents on the encryption keys used. +Security permissions are only needed when using CMEK encryption, to grant the relevant IAM roles to service agents on the encryption keys used. ## Customizations diff --git a/fast/stages/CLEANUP.md b/fast/stages/CLEANUP.md index 477469b01..8e0f06604 100644 --- a/fast/stages/CLEANUP.md +++ b/fast/stages/CLEANUP.md @@ -2,26 +2,7 @@ If you want to destroy a previous FAST deployment in your organization, follow these steps. -Destruction must be done in reverse order, from stage 3 to stage 0 - -## Stage 3 (Data Platform) - -Terraform refuses to delete non-empty GCS buckets and BigQuery datasets, so they need to be removed manually from the state. - -```bash -cd $FAST_PWD/3-data-platform/dev/ - -# remove GCS buckets and BQ dataset manually. Projects will be destroyed anyway -for x in $(terraform state list | grep google_storage_bucket.bucket); do - terraform state rm "$x"; -done - -for x in $(terraform state list | grep google_bigquery_dataset); do - terraform state rm "$x"; -done - -terraform destroy -``` +Destruction must be done in reverse order, from stage 2 to stage 0 ## Stage 2 (Project Factory) diff --git a/fast/stages/README.md b/fast/stages/README.md index dee025ea7..ab41a4785 100644 --- a/fast/stages/README.md +++ b/fast/stages/README.md @@ -42,11 +42,6 @@ To destroy a previous FAST deployment follow the instructions detailed in [clean - [Project Factory](./2-project-factory/) YAML-based factory to create and configure application or team-level projects. Configuration includes VPC-level settings for Shared VPC, service-level configuration for CMEK encryption via centralized keys, and service account creation for workloads and applications. This stage can be cloned if an org-wide or dedicated per-environment factories are needed. -## Environment-level resources (3) - -- [Data Platform](./3-data-platform-dev/) - - ## Importing existing setup into FAST For brownfield implementations you may need to import existing setting in the organization, folders, etc. These snippets can help you add existing settings into the YAML file diff --git a/tests/fast/stages/s0_org_setup/hardened.yaml b/tests/fast/stages/s0_org_setup/hardened.yaml index ffc260555..67d33a194 100644 --- a/tests/fast/stages/s0_org_setup/hardened.yaml +++ b/tests/fast/stages/s0_org_setup/hardened.yaml @@ -985,11 +985,6 @@ values: force_destroy: false name: 2-security/ timeouts: null - module.factory.module.buckets["iac-0/iac-stage-state"].google_storage_managed_folder.folder["3-data-platform-dev/"]: - bucket: ft0-prod-iac-core-0-iac-stage-state - force_destroy: false - name: 3-data-platform-dev/ - timeouts: null ? module.factory.module.buckets["iac-0/iac-stage-state"].google_storage_managed_folder_iam_binding.authoritative["1-vpcsc/$custom_roles:storage_viewer"] : bucket: ft0-prod-iac-core-0-iac-stage-state condition: [] @@ -1030,16 +1025,6 @@ values: condition: [] managed_folder: 2-security/ role: roles/storage.admin - ? module.factory.module.buckets["iac-0/iac-stage-state"].google_storage_managed_folder_iam_binding.authoritative["3-data-platform-dev/$custom_roles:storage_viewer"] - : bucket: ft0-prod-iac-core-0-iac-stage-state - condition: [] - managed_folder: 3-data-platform-dev/ - role: organizations/1234567890/roles/storageViewer - ? module.factory.module.buckets["iac-0/iac-stage-state"].google_storage_managed_folder_iam_binding.authoritative["3-data-platform-dev/roles/storage.admin"] - : bucket: ft0-prod-iac-core-0-iac-stage-state - condition: [] - managed_folder: 3-data-platform-dev/ - role: roles/storage.admin ? module.factory.module.folder-1-iam["networking"].google_folder_iam_binding.authoritative["$custom_roles:project_iam_viewer"] : condition: [] role: organizations/1234567890/roles/projectIamViewer @@ -2267,26 +2252,6 @@ values: project: ft0-prod-audit-logs-0 service: pubsub.googleapis.com timeouts: null - module.factory.module.service-accounts["iac-0/iac-dp-dev-ro"].google_service_account.service_account[0]: - account_id: iac-dp-dev-ro - create_ignore_already_exists: null - description: null - disabled: false - display_name: IaC service account for data platform dev (read-only). - email: iac-dp-dev-ro@ft0-prod-iac-core-0.iam.gserviceaccount.com - member: serviceAccount:iac-dp-dev-ro@ft0-prod-iac-core-0.iam.gserviceaccount.com - project: ft0-prod-iac-core-0 - timeouts: null - module.factory.module.service-accounts["iac-0/iac-dp-dev-rw"].google_service_account.service_account[0]: - account_id: iac-dp-dev-rw - create_ignore_already_exists: null - description: null - disabled: false - display_name: IaC service account for data platform dev (read-write). - email: iac-dp-dev-rw@ft0-prod-iac-core-0.iam.gserviceaccount.com - member: serviceAccount:iac-dp-dev-rw@ft0-prod-iac-core-0.iam.gserviceaccount.com - project: ft0-prod-iac-core-0 - timeouts: null module.factory.module.service-accounts["iac-0/iac-networking-ro"].google_service_account.service_account[0]: account_id: iac-networking-ro create_ignore_already_exists: null @@ -8567,22 +8532,22 @@ counts: google_project_service: 37 google_project_service_identity: 11 google_scc_management_organization_security_health_analytics_custom_module: 18 - google_service_account: 14 + google_service_account: 12 google_service_account_iam_binding: 2 google_service_account_iam_member: 4 google_storage_bucket: 3 google_storage_bucket_iam_binding: 4 google_storage_bucket_object: 10 - google_storage_managed_folder: 5 - google_storage_managed_folder_iam_binding: 10 + google_storage_managed_folder: 4 + google_storage_managed_folder_iam_binding: 8 google_storage_project_service_account: 3 google_tags_tag_binding: 7 google_tags_tag_key: 3 google_tags_tag_value: 6 google_tags_tag_value_iam_binding: 4 local_file: 9 - modules: 58 - resources: 715 + modules: 56 + resources: 710 terraform_data: 4 outputs: diff --git a/tests/fast/stages/s0_org_setup/simple.yaml b/tests/fast/stages/s0_org_setup/simple.yaml index 70644fc48..0f08d1f96 100644 --- a/tests/fast/stages/s0_org_setup/simple.yaml +++ b/tests/fast/stages/s0_org_setup/simple.yaml @@ -810,11 +810,6 @@ values: force_destroy: false name: 2-security/ timeouts: null - module.factory.module.buckets["iac-0/iac-stage-state"].google_storage_managed_folder.folder["3-data-platform-dev/"]: - bucket: ft0-prod-iac-core-0-iac-stage-state - force_destroy: false - name: 3-data-platform-dev/ - timeouts: null ? module.factory.module.buckets["iac-0/iac-stage-state"].google_storage_managed_folder_iam_binding.authoritative["1-vpcsc/$custom_roles:storage_viewer"] : bucket: ft0-prod-iac-core-0-iac-stage-state condition: [] @@ -855,16 +850,6 @@ values: condition: [] managed_folder: 2-security/ role: roles/storage.admin - ? module.factory.module.buckets["iac-0/iac-stage-state"].google_storage_managed_folder_iam_binding.authoritative["3-data-platform-dev/$custom_roles:storage_viewer"] - : bucket: ft0-prod-iac-core-0-iac-stage-state - condition: [] - managed_folder: 3-data-platform-dev/ - role: organizations/1234567890/roles/storageViewer - ? module.factory.module.buckets["iac-0/iac-stage-state"].google_storage_managed_folder_iam_binding.authoritative["3-data-platform-dev/roles/storage.admin"] - : bucket: ft0-prod-iac-core-0-iac-stage-state - condition: [] - managed_folder: 3-data-platform-dev/ - role: roles/storage.admin ? module.factory.module.folder-1-iam["networking"].google_folder_iam_binding.authoritative["$custom_roles:project_iam_viewer"] : condition: [] role: organizations/1234567890/roles/projectIamViewer @@ -1571,26 +1556,6 @@ values: project: ft0-prod-audit-logs-0 service: pubsub.googleapis.com timeouts: null - module.factory.module.service-accounts["iac-0/iac-dp-dev-ro"].google_service_account.service_account[0]: - account_id: iac-dp-dev-ro - create_ignore_already_exists: null - description: null - disabled: false - display_name: IaC service account for data platform dev (read-only). - email: iac-dp-dev-ro@ft0-prod-iac-core-0.iam.gserviceaccount.com - member: serviceAccount:iac-dp-dev-ro@ft0-prod-iac-core-0.iam.gserviceaccount.com - project: ft0-prod-iac-core-0 - timeouts: null - module.factory.module.service-accounts["iac-0/iac-dp-dev-rw"].google_service_account.service_account[0]: - account_id: iac-dp-dev-rw - create_ignore_already_exists: null - description: null - disabled: false - display_name: IaC service account for data platform dev (read-write). - email: iac-dp-dev-rw@ft0-prod-iac-core-0.iam.gserviceaccount.com - member: serviceAccount:iac-dp-dev-rw@ft0-prod-iac-core-0.iam.gserviceaccount.com - project: ft0-prod-iac-core-0 - timeouts: null module.factory.module.service-accounts["iac-0/iac-networking-ro"].google_service_account.service_account[0]: account_id: iac-networking-ro create_ignore_already_exists: null @@ -2957,22 +2922,22 @@ counts: google_project_iam_member: 15 google_project_service: 33 google_project_service_identity: 9 - google_service_account: 14 + google_service_account: 12 google_service_account_iam_binding: 2 google_service_account_iam_member: 4 google_storage_bucket: 3 google_storage_bucket_iam_binding: 4 google_storage_bucket_object: 10 - google_storage_managed_folder: 5 - google_storage_managed_folder_iam_binding: 10 + google_storage_managed_folder: 4 + google_storage_managed_folder_iam_binding: 8 google_storage_project_service_account: 3 google_tags_tag_binding: 7 google_tags_tag_key: 3 google_tags_tag_value: 5 google_tags_tag_value_iam_binding: 4 local_file: 9 - modules: 50 - resources: 325 + modules: 48 + resources: 320 terraform_data: 4 outputs: diff --git a/tests/fast/stages/s2_security/simple.yaml b/tests/fast/stages/s2_security/simple.yaml index 40aa8c20d..3335c1040 100644 --- a/tests/fast/stages/s2_security/simple.yaml +++ b/tests/fast/stages/s2_security/simple.yaml @@ -117,7 +117,6 @@ values: \ [])\n .hasOnly(['roles/cloudkms.cryptoKeyEncrypterDecrypter']\n)\n" title: Delegated IAM grant on keys. members: - - serviceAccount:iac-dp-dev-rw@test.iam.gserviceaccount.com - serviceAccount:iac-pf-rw@test.iam.gserviceaccount.com project: fast-dev-sec-core-0 role: roles/cloudkms.admin diff --git a/tests/fast/stages/s3_data_platform_dev/__init__.py b/tests/fast/stages/s3_data_platform_dev/__init__.py deleted file mode 100644 index c37e93b74..000000000 --- a/tests/fast/stages/s3_data_platform_dev/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. diff --git a/tests/fast/stages/s3_data_platform_dev/hardened.tfvars b/tests/fast/stages/s3_data_platform_dev/hardened.tfvars deleted file mode 100644 index 94f2f4e9f..000000000 --- a/tests/fast/stages/s3_data_platform_dev/hardened.tfvars +++ /dev/null @@ -1,57 +0,0 @@ -automation = { - outputs_bucket = "fast2-prod-iac-core-outputs" -} -billing_account = { - id = "000000-111111-222222" -} -encryption_keys = { - bigquery = { - "europe-west1" = "projects/fast2-dev-net-spoke-0/locations/europe-west1/keyRings/dev-primary-default/cryptoKeys/bigquery" - } - composer = { - "europe-west1" = "projects/fast2-dev-net-spoke-0/locations/europe-west1/keyRings/dev-primary-default/cryptoKeys/composer" - } - storage = { - "europe-west1" = "projects/fast2-dev-net-spoke-0/locations/europe-west1/keyRings/dev-primary-default/cryptoKeys/storage" - } -} -environments = { - dev = { - is_default = false - name = "Development" - short_name = "dev" - tag_name = "development" - } -} -factories_config = { - context = { - iam_principals = { - data-consumer-bi = "group:gcp-consumer-bi@example.com" - dp-product-a-0 = "group:gcp-data-product-a-0@example.com" - dp-domain-a = "group:gcp-data-domain-a@example.com" - dp-platform = "group:dp-platform-0@example.com" - } - } -} -folder_ids = { - data-platform-dev = "folders/00000000000000" -} -host_project_ids = { - dev-spoke-0 = "fast2-dev-net-spoke-0" -} -organization = { - domain = "fast.example.com" - id = 123456789012 - customer_id = "C00000000" -} -prefix = "fast2" -subnet_self_links = { - dev-spoke-0 = { - "europe-west8/dev-dataplatform" = "projects/fast2-dev-net-spoke-0/regions/europe-west8/subnetworks/dev-dataplatform" - } -} -vpc_self_links = { - dev-spoke-0 = "projects/fast2-dev-net-spoke-0/global/networks/dev-spoke-0" -} - - diff --git a/tests/fast/stages/s3_data_platform_dev/hardened.yaml b/tests/fast/stages/s3_data_platform_dev/hardened.yaml deleted file mode 100644 index 049fc048f..000000000 --- a/tests/fast/stages/s3_data_platform_dev/hardened.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -counts: - google_bigquery_dataset: 1 - google_bigquery_dataset_iam_binding: 1 - google_bigquery_default_service_account: 2 - google_composer_environment: 1 - google_data_catalog_policy_tag: 3 - google_data_catalog_taxonomy: 1 - google_dataplex_aspect_type: 1 - google_folder: 2 - google_folder_iam_binding: 3 - google_kms_crypto_key_iam_member: 5 - google_logging_project_settings: 1 - google_project: 3 - google_project_iam_binding: 23 - google_project_iam_member: 12 - google_project_service: 18 - google_project_service_identity: 6 - google_service_account: 6 - google_service_account_iam_binding: 4 - google_storage_bucket: 3 - google_storage_bucket_iam_binding: 6 - google_storage_bucket_object: 6 - google_storage_project_service_account: 3 - google_tags_location_tag_binding: 2 - google_tags_tag_key: 1 - google_tags_tag_value: 1 - modules: 19 - resources: 115 diff --git a/tests/fast/stages/s3_data_platform_dev/simple.tfvars b/tests/fast/stages/s3_data_platform_dev/simple.tfvars deleted file mode 100644 index 14d0e9406..000000000 --- a/tests/fast/stages/s3_data_platform_dev/simple.tfvars +++ /dev/null @@ -1,46 +0,0 @@ -automation = { - outputs_bucket = "fast2-prod-iac-core-outputs" -} -billing_account = { - id = "000000-111111-222222" -} -environments = { - dev = { - is_default = false - name = "Development" - short_name = "dev" - tag_name = "development" - } -} -factories_config = { - context = { - iam_principals = { - data-consumer-bi = "group:gcp-consumer-bi@example.com" - dp-product-a-0 = "group:gcp-data-product-a-0@example.com" - dp-domain-a = "group:gcp-data-domain-a@example.com" - dp-platform = "group:dp-platform-0@example.com" - } - } -} -folder_ids = { - data-platform-dev = "folders/00000000000000" -} -host_project_ids = { - dev-spoke-0 = "fast2-dev-net-spoke-0" -} -organization = { - domain = "fast.example.com" - id = 123456789012 - customer_id = "C00000000" -} -prefix = "fast2" -subnet_self_links = { - dev-spoke-0 = { - "europe-west8/dev-dataplatform" = "projects/fast2-dev-net-spoke-0/regions/europe-west8/subnetworks/dev-dataplatform" - } -} -vpc_self_links = { - dev-spoke-0 = "projects/fast2-dev-net-spoke-0/global/networks/dev-spoke-0" -} - - diff --git a/tests/fast/stages/s3_data_platform_dev/simple.yaml b/tests/fast/stages/s3_data_platform_dev/simple.yaml deleted file mode 100644 index 43f6bd5fd..000000000 --- a/tests/fast/stages/s3_data_platform_dev/simple.yaml +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -counts: - google_bigquery_dataset: 1 - google_bigquery_dataset_iam_binding: 1 - google_bigquery_default_service_account: 2 - google_composer_environment: 1 - google_data_catalog_policy_tag: 3 - google_data_catalog_taxonomy: 1 - google_dataplex_aspect_type: 1 - google_folder: 2 - google_folder_iam_binding: 3 - google_logging_project_settings: 1 - google_project: 3 - google_project_iam_binding: 23 - google_project_iam_member: 12 - google_project_service: 18 - google_project_service_identity: 6 - google_service_account: 6 - google_service_account_iam_binding: 4 - google_storage_bucket: 3 - google_storage_bucket_iam_binding: 6 - google_storage_bucket_object: 6 - google_storage_project_service_account: 3 - google_tags_location_tag_binding: 2 - google_tags_tag_key: 1 - google_tags_tag_value: 1 - modules: 19 - resources: 110 diff --git a/tests/fast/stages/s3_data_platform_dev/tftest.yaml b/tests/fast/stages/s3_data_platform_dev/tftest.yaml deleted file mode 100644 index 22393e1b3..000000000 --- a/tests/fast/stages/s3_data_platform_dev/tftest.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -module: fast/stages/3-data-platform-dev - -tests: - simple: - inventory: - - simple.yaml - hardened: - inventory: - - hardened.yaml diff --git a/tools/duplicate-diff.py b/tools/duplicate-diff.py index 71b13250d..294f777e4 100755 --- a/tools/duplicate-diff.py +++ b/tools/duplicate-diff.py @@ -32,10 +32,6 @@ duplicates = [ "fast/stages/1-vpcsc/schemas/access-level.schema.json", "modules/vpc-sc/schemas/access-level.schema.json", ], - [ - "fast/stages/3-data-platform-dev/schemas/aspect-type.schema.json", - "modules/dataplex-aspect-types/schemas/aspect-type.schema.json", - ], [ "fast/stages/2-project-factory/schemas/budget.schema.json", "fast/stages/0-org-setup/schemas/budget.schema.json",