From 3fad96436c1203b3cf5de91889cee0a7d7661746 Mon Sep 17 00:00:00 2001 From: apichick Date: Tue, 24 Sep 2024 17:39:29 +0200 Subject: [PATCH] Added biglake-catalog module (#2572) --- README.md | 2 +- modules/README.md | 1 + modules/biglake-catalog/README.md | 99 +++++++++++++++++++ modules/biglake-catalog/main.tf | 60 +++++++++++ modules/biglake-catalog/outputs.tf | 47 +++++++++ modules/biglake-catalog/variables.tf | 51 ++++++++++ .../biglake_catalog/examples/basic.yaml | 54 ++++++++++ 7 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 modules/biglake-catalog/README.md create mode 100644 modules/biglake-catalog/main.tf create mode 100644 modules/biglake-catalog/outputs.tf create mode 100644 modules/biglake-catalog/variables.tf create mode 100644 tests/modules/biglake_catalog/examples/basic.yaml diff --git a/README.md b/README.md index 0965be2c2..aa537ca80 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Currently available modules: - **process factories** - [project factory](./modules/project-factory/README.md) - **networking** - [DNS](./modules/dns), [DNS Response Policy](./modules/dns-response-policy/), [Cloud Endpoints](./modules/endpoints), [address reservation](./modules/net-address), [NAT](./modules/net-cloudnat), [VLAN Attachment](./modules/net-vlan-attachment/), [External Application LB](./modules/net-lb-app-ext/), [External Passthrough Network LB](./modules/net-lb-ext), [External Regional Application Load Balancer](./modules/net-lb-app-ext-regional/), [Firewall policy](./modules/net-firewall-policy), [Internal Application LB](./modules/net-lb-app-int), [Cross-region Internal Application LB](./modules/net-lb-app-int-cross-region), [Internal Passthrough Network LB](./modules/net-lb-int), [Internal Proxy Network LB](./modules/net-lb-proxy-int), [IPSec over Interconnect](./modules/net-ipsec-over-interconnect), [VPC](./modules/net-vpc), [VPC firewall](./modules/net-vpc-firewall), [VPC peering](./modules/net-vpc-peering), [VPN dynamic](./modules/net-vpn-dynamic), [HA VPN](./modules/net-vpn-ha), [VPN static](./modules/net-vpn-static), [Service Directory](./modules/service-directory), [Secure Web Proxy](./modules/net-swp) - **compute** - [VM/VM group](./modules/compute-vm), [MIG](./modules/compute-mig), [COS container](./modules/cloud-config-container/cos-generic-metadata/) (coredns, mysql, onprem, squid), [GKE cluster](./modules/gke-cluster-standard), [GKE hub](./modules/gke-hub), [GKE nodepool](./modules/gke-nodepool), [GCVE private cloud](./modules/gcve-private-cloud) -- **data** - [AlloyDB instance](./modules/alloydb), [Analytics Hub](./modules/analytics-hub), [BigQuery dataset](./modules/bigquery-dataset), [Bigtable instance](./modules/bigtable-instance), [Dataplex](./modules/dataplex), [Dataplex DataScan](./modules/dataplex-datascan), [Cloud SQL instance](./modules/cloudsql-instance), [Spanner instance](./modules/spanner-instance), [Firestore](./modules/firestore), [Data Catalog Policy Tag](./modules/data-catalog-policy-tag), [Data Catalog Tag](./modules/data-catalog-tag), [Data Catalog Tag Template](./modules/data-catalog-tag-template), [Datafusion](./modules/datafusion), [Dataproc](./modules/dataproc), [GCS](./modules/gcs), [Pub/Sub](./modules/pubsub), [Dataform Repository](./modules/dataform-repository/), [Looker Core](./modules/looker-core) +- **data** - [AlloyDB instance](./modules/alloydb), [Analytics Hub](./modules/analytics-hub), [BigQuery dataset](./modules/bigquery-dataset), [Biglake Catalog](./modules/biglake-catalog), [Bigtable instance](./modules/bigtable-instance), [Dataplex](./modules/dataplex), [Dataplex DataScan](./modules/dataplex-datascan), [Cloud SQL instance](./modules/cloudsql-instance), [Spanner instance](./modules/spanner-instance), [Firestore](./modules/firestore), [Data Catalog Policy Tag](./modules/data-catalog-policy-tag), [Data Catalog Tag](./modules/data-catalog-tag), [Data Catalog Tag Template](./modules/data-catalog-tag-template), [Datafusion](./modules/datafusion), [Dataproc](./modules/dataproc), [GCS](./modules/gcs), [Pub/Sub](./modules/pubsub), [Dataform Repository](./modules/dataform-repository/), [Looker Core](./modules/looker-core) - **development** - [API Gateway](./modules/api-gateway), [Apigee](./modules/apigee), [Artifact Registry](./modules/artifact-registry), [Container Registry](./modules/container-registry), [Cloud Source Repository](./modules/source-repository), [Secure Source Manager instance](./modules/secure-source-manager-instance), [Workstation cluster](./modules/workstation-cluster) - **security** - [Binauthz](./modules/binauthz/), [Certificate Authority Service (CAS)](./modules/certificate-authority-service), [KMS](./modules/kms), [SecretManager](./modules/secret-manager), [VPC Service Control](./modules/vpc-sc), [Certificate Manager](./modules/certificate-manager/) - **serverless** - [Cloud Function v1](./modules/cloud-function-v1), [Cloud Function v2](./modules/cloud-function-v2), [Cloud Run](./modules/cloud-run), [Cloud Run v2](./modules/cloud-run-v2) diff --git a/modules/README.md b/modules/README.md index db3c6cbbb..1cbca21b1 100644 --- a/modules/README.md +++ b/modules/README.md @@ -83,6 +83,7 @@ These modules are used in the examples included in this repository. If you are u - [Analytics Hub](./analytics-hub) - [BigQuery dataset](./bigquery-dataset) - [Bigtable instance](./bigtable-instance) +- [Biglake catalog](./biglake-catalog) - [Cloud SQL instance](./cloudsql-instance) - [Data Catalog Policy Tag](./data-catalog-policy-tag) - [Data Catalog Tag](./data-catalog-tag) diff --git a/modules/biglake-catalog/README.md b/modules/biglake-catalog/README.md new file mode 100644 index 000000000..efb391ead --- /dev/null +++ b/modules/biglake-catalog/README.md @@ -0,0 +1,99 @@ +# Biglake Catalog + +This module allows to create a BigLake Metastore with databases and corresponding tables in each database. + +## Examples + + +- [Examples](#examples) + - [Basic example](#basic-example) +- [Variables](#variables) +- [Outputs](#outputs) + + +### Basic example + +```hcl +module "biglake_catalog" { + source = "./fabric/modules/biglake-catalog" + project_id = var.project_id + name = "my_catalog" + location = "US" + databases = { + my_database = { + type = "HIVE" + hive_options = { + location_uri = "gs://my-bucket/my-database-folder" + parameters = { + "owner" : "John Doe" + } + } + tables = { + my_table = { + type = "HIVE" + hive_options = { + table_type = "MANAGED_TABLE" + location_uri = "gs://my-bucket/my-table-folder" + input_format = "org.apache.hadoop.mapred.SequenceFileInputFormat" + output_format = "org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat" + parameters = { + "spark.sql.create.version" = "3.1.3" + "spark.sql.sources.schema.numParts" = "1" + "transient_lastDdlTime" = "1680894197" + "spark.sql.partitionProvider" = "catalog" + "owner" = "John Doe" + "spark.sql.sources.schema.part.0" = jsonencode({ + type = "struct" + fields = [ + { + name = "id" + type = "integer" + nullable = true + metadata = {} + }, + { + name = "name" + type = "string" + nullable = true + metadata = {} + }, + { + name = "age" + type = "integer" + nullable = true + metadata = {} + } + ] + }) + "spark.sql.sources.provider" = "iceberg" + "provider" = "iceberg" + } + } + } + } + } + } +} +# tftest modules=1 resources=3 inventory=basic.yaml +``` + +## Variables + +| name | description | type | required | default | +|---|---|:---:|:---:|:---:| +| [databases](variables.tf#L17) | Databases. | map(object({…})) | ✓ | | +| [location](variables.tf#L38) | Location. | string | ✓ | | +| [name](variables.tf#L43) | Name. | string | ✓ | | +| [project_id](variables.tf#L48) | Project ID. | string | ✓ | | + +## Outputs + +| name | description | sensitive | +|---|---|:---:| +| [catalog](outputs.tf#L17) | Catalog. | | +| [catalog_id](outputs.tf#L22) | Catalog ID. | | +| [database_ids](outputs.tf#L27) | Database IDs. | | +| [databases](outputs.tf#L32) | Databases. | | +| [table_ids](outputs.tf#L37) | Table ids. | | +| [tables](outputs.tf#L42) | Tables. | | + diff --git a/modules/biglake-catalog/main.tf b/modules/biglake-catalog/main.tf new file mode 100644 index 000000000..055b71056 --- /dev/null +++ b/modules/biglake-catalog/main.tf @@ -0,0 +1,60 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +locals { + tables = merge([ + for k1, v1 in var.databases : { + for k2, v2 in v1.tables : "${k1}-${k2}" => merge({ + database_name = k1 + table_name = k2 + }, v2) + } + ]...) +} + +resource "google_biglake_catalog" "catalog" { + name = var.name + location = var.location + project = var.project_id +} + +resource "google_biglake_database" "databases" { + for_each = var.databases + name = each.key + catalog = google_biglake_catalog.catalog.id + type = each.value.type + hive_options { + location_uri = each.value.hive_options.location_uri + parameters = each.value.hive_options.parameters + } +} + +resource "google_biglake_table" "tables" { + for_each = local.tables + name = each.value.table_name + database = google_biglake_database.databases[each.value.database_name].id + type = each.value.type + hive_options { + table_type = each.value.hive_options.table_type + storage_descriptor { + location_uri = each.value.hive_options.location_uri + input_format = each.value.hive_options.input_format + output_format = each.value.hive_options.output_format + } + # Some Example Parameters. + parameters = each.value.hive_options.parameters + } +} \ No newline at end of file diff --git a/modules/biglake-catalog/outputs.tf b/modules/biglake-catalog/outputs.tf new file mode 100644 index 000000000..76d73242d --- /dev/null +++ b/modules/biglake-catalog/outputs.tf @@ -0,0 +1,47 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +output "catalog" { + description = "Catalog." + value = google_biglake_catalog.catalog +} + +output "catalog_id" { + description = "Catalog ID." + value = google_biglake_catalog.catalog.id +} + +output "database_ids" { + description = "Database IDs." + value = { for k, v in google_biglake_database.databases : k => v.id } +} + +output "databases" { + description = "Databases." + value = google_biglake_database.databases +} + +output "table_ids" { + description = "Table ids." + value = { for k, v in google_biglake_table.tables : k => v.id } +} + +output "tables" { + description = "Tables." + value = google_biglake_table.tables +} + + diff --git a/modules/biglake-catalog/variables.tf b/modules/biglake-catalog/variables.tf new file mode 100644 index 000000000..131b0e4d9 --- /dev/null +++ b/modules/biglake-catalog/variables.tf @@ -0,0 +1,51 @@ +/** + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +variable "databases" { + description = "Databases." + type = map(object({ + type = string + hive_options = object({ + location_uri = string + parameters = optional(map(string), {}) + }) + tables = map(object({ + type = string + hive_options = object({ + table_type = string + location_uri = string + input_format = string + output_format = string + parameters = optional(map(string), {}) + }) + })) + })) +} + +variable "location" { + description = "Location." + type = string +} + +variable "name" { + description = "Name." + type = string +} + +variable "project_id" { + description = "Project ID." + type = string +} diff --git a/tests/modules/biglake_catalog/examples/basic.yaml b/tests/modules/biglake_catalog/examples/basic.yaml new file mode 100644 index 000000000..3a175ff70 --- /dev/null +++ b/tests/modules/biglake_catalog/examples/basic.yaml @@ -0,0 +1,54 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +values: + module.biglake_catalog.google_biglake_catalog.catalog: + location: US + name: my_catalog + project: project-id + timeouts: null + module.biglake_catalog.google_biglake_database.databases["my_database"]: + hive_options: + - location_uri: gs://my-bucket/my-database-folder + parameters: + owner: John Doe + name: my_database + timeouts: null + type: HIVE + module.biglake_catalog.google_biglake_table.tables["my_database-my_table"]: + hive_options: + - parameters: + owner: John Doe + provider: iceberg + spark.sql.create.version: 3.1.3 + spark.sql.partitionProvider: catalog + spark.sql.sources.provider: iceberg + spark.sql.sources.schema.numParts: '1' + spark.sql.sources.schema.part.0: '{"fields":[{"metadata":{},"name":"id","nullable":true,"type":"integer"},{"metadata":{},"name":"name","nullable":true,"type":"string"},{"metadata":{},"name":"age","nullable":true,"type":"integer"}],"type":"struct"}' + transient_lastDdlTime: '1680894197' + storage_descriptor: + - input_format: org.apache.hadoop.mapred.SequenceFileInputFormat + location_uri: gs://my-bucket/my-table-folder + output_format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + table_type: MANAGED_TABLE + name: my_table + timeouts: null + type: HIVE + +counts: + google_biglake_catalog: 1 + google_biglake_database: 1 + google_biglake_table: 1 + modules: 1 + resources: 3 \ No newline at end of file