diff --git a/fast/stages/03-data-platform/dev/README.md b/fast/stages/03-data-platform/dev/README.md index 19adb0682..57010a9a9 100644 --- a/fast/stages/03-data-platform/dev/README.md +++ b/fast/stages/03-data-platform/dev/README.md @@ -50,6 +50,19 @@ Cloud KMS crypto keys can be configured wither from the [FAST security stage](.. To configure the use of Cloud KMS on resources, you have to specify the key id on the `service_encryption_keys` variable. Key locations should match resource locations. +## Data Catalog + +[Data Catalog](https://cloud.google.com/data-catalog) helps you to document your data entry at scale. Data Catalog relies on [tags](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tags) and [tag template](https://cloud.google.com/data-catalog/docs/tags-and-tag-templates#tag-templates) to manage metadata for all data entries in a unified and centralized service. To implement [column-level security](https://cloud.google.com/bigquery/docs/column-level-security-intro) on BigQuery, we suggest to use `Tags` and `Tag templates`. + +The default configuration will implement 3 tags: + - `3_Confidential`: policy tag for columns that include very sensitive information, such as credit card numbers. + - `2_Private`: policy tag for columns that include sensitive personal identifiable information (PII) information, such as a person's first name. + - `1_Sensitive`: policy tag for columns that include data that cannot be made public, such as the credit limit. + +Anything that is not tagged is available to all users who have access to the data warehouse. + +You can configure your tags and roles associated by configuring the `data_catalog_tags` variable. We suggest useing the "[Best practices for using policy tags in BigQuery](https://cloud.google.com/bigquery/docs/best-practices-policy-tags)" article as a guide to designing your tags structure and access pattern. By default, no groups has access to tagged data. + ### VPC-SC As is often the case in real-world configurations, [VPC-SC](https://cloud.google.com/vpc-service-controls) is needed to mitigate data exfiltration. VPC-SC can be configured from the [FAST security stage](../../02-security). This step is optional, but highly recomended, and depends on customer policies and security best practices. diff --git a/fast/stages/03-data-platform/dev/main.tf b/fast/stages/03-data-platform/dev/main.tf index c10380da7..536e18731 100644 --- a/fast/stages/03-data-platform/dev/main.tf +++ b/fast/stages/03-data-platform/dev/main.tf @@ -21,6 +21,7 @@ module "data-platform" { billing_account_id = var.billing_account.id composer_config = var.composer_config data_force_destroy = var.data_force_destroy + data_catalog_tags = var.data_catalog_tags folder_id = var.folder_ids.data-platform groups = var.groups network_config = { diff --git a/fast/stages/03-data-platform/dev/variables.tf b/fast/stages/03-data-platform/dev/variables.tf index 1f65cf773..3b1645e41 100644 --- a/fast/stages/03-data-platform/dev/variables.tf +++ b/fast/stages/03-data-platform/dev/variables.tf @@ -36,6 +36,17 @@ variable "composer_config" { } } +variable "data_catalog_tags" { + description = "List of Data Catalog Policy tags to be created with optional IAM binging configuration in {tag => {ROLE => [MEMBERS]}} format." + type = map(map(list(string))) + nullable = false + default = { + "3_Confidential" = null + "2_Private" = null + "1_Sensitive" = null + } +} + variable "data_force_destroy" { description = "Flag to set 'force_destroy' on data services like BigQery or Cloud Storage." type = bool