From 8e0dac84f9f70b55348b218d4c1c3e6d5c143acd Mon Sep 17 00:00:00 2001 From: apichick Date: Thu, 19 Sep 2024 11:13:32 +0200 Subject: [PATCH] Bigquery dataset routines (#2570) * Added routines to bigquery-dataset module * Fixes after review --- modules/bigquery-dataset/README.md | 55 ++++++++++++----- modules/bigquery-dataset/main.tf | 48 +++++++++++++++ modules/bigquery-dataset/outputs.tf | 10 +++ modules/bigquery-dataset/variables.tf | 40 +++++++++++- .../bigquery_dataset/examples/routines.yaml | 61 +++++++++++++++++++ 5 files changed, 199 insertions(+), 15 deletions(-) create mode 100644 tests/modules/bigquery_dataset/examples/routines.yaml diff --git a/modules/bigquery-dataset/README.md b/modules/bigquery-dataset/README.md index 0ef0773ca..6ab90184c 100644 --- a/modules/bigquery-dataset/README.md +++ b/modules/bigquery-dataset/README.md @@ -7,7 +7,7 @@ This module allows managing a single BigQuery dataset, including access configur - [IAM roles](#iam-roles) - [Authorized Views, Datasets, and Routines](#authorized-views-datasets-and-routines) - [Dataset options](#dataset-options) -- [Tables and views](#tables-and-views) +- [Tables, views and routines](#tables-views-and-routines) - [Tag bindings](#tag-bindings) - [TODO](#todo) - [Variables](#variables) @@ -192,9 +192,9 @@ module "bigquery-dataset" { # tftest modules=1 resources=1 inventory=options.yaml ``` -## Tables and views +## Tables, views and routines -Tables are created via the `tables` variable, or the `view` variable for views. Support for external tables will be added in a future release. +Tables are created via the `tables` variable. Support for external tables will be added in a future release. ```hcl locals { @@ -247,7 +247,7 @@ module "bigquery-dataset" { # tftest modules=1 resources=2 inventory=partitioning.yaml ``` -To create views use the `view` variable. If you're querying a table created by the same module `terraform apply` will initially fail and eventually succeed once the underlying table has been created. You can probably also use the module's output in the view's query to create a dependency on the table. +To create views use the `views` variable. If you're querying a table created by the same module `terraform apply` will initially fail and eventually succeed once the underlying table has been created. You can probably also use the module's output in the view's query to create a dependency on the table. ```hcl locals { @@ -277,10 +277,34 @@ module "bigquery-dataset" { } } } - # tftest modules=1 resources=3 inventory=views.yaml ``` +To create routines use the `routines` variable. + +```hcl +module "bigquery-dataset" { + source = "./fabric/modules/bigquery-dataset" + project_id = "my-project" + id = "my_dataset" + routines = { + custom_masking_routine = { + routine_type = "SCALAR_FUNCTION" + language = "SQL" + data_governance_type = "DATA_MASKING" + definition_body = "SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X')" + return_type = "{\"typeKind\" : \"STRING\"}" + arguments = { + ssn = { + data_type = "{\"typeKind\" : \"STRING\"}" + } + } + } + } +} +# tftest modules=1 resources=2 inventory=routines.yaml +``` + ## Tag bindings Refer to the [Creating and managing tags](https://cloud.google.com/resource-manager/docs/tags/tags-creating-and-managing) documentation for details on usage. @@ -326,7 +350,7 @@ module "bigquery-dataset" { | [access](variables.tf#L17) | Map of access rules with role and identity type. Keys are arbitrary and must match those in the `access_identities` variable, types are `domain`, `group`, `special_group`, `user`, `view`. | map(object({…})) | | {} | | [access_identities](variables.tf#L33) | Map of access identities used for basic access roles. View identities have the format 'project_id\|dataset_id\|table_id'. | map(string) | | {} | | [authorized_datasets](variables.tf#L39) | An array of datasets to be authorized on the dataset. | list(object({…})) | | [] | -| [authorized_routines](variables.tf#L48) | An array of authorized routine to be authorized on the dataset. | list(object({…})) | | [] | +| [authorized_routines](variables.tf#L48) | An array of routines to be authorized on the dataset. | list(object({…})) | | [] | | [authorized_views](variables.tf#L58) | An array of views to be authorized on the dataset. | list(object({…})) | | [] | | [dataset_access](variables.tf#L68) | Set access in the dataset resource instead of using separate resources. | bool | | false | | [description](variables.tf#L74) | Optional description. | string | | "Terraform managed." | @@ -337,9 +361,10 @@ module "bigquery-dataset" { | [location](variables.tf#L109) | Dataset location. | string | | "EU" | | [materialized_views](variables.tf#L115) | Materialized views definitions. | map(object({…})) | | {} | | [options](variables.tf#L148) | Dataset options. | object({…}) | | {} | -| [tables](variables.tf#L167) | Table definitions. Options and partitioning default to null. Partitioning can only use `range` or `time`, set the unused one to null. | map(object({…})) | | {} | -| [tag_bindings](variables.tf#L252) | Tag bindings for this dataset, in key => tag value id format. | map(string) | | {} | -| [views](variables.tf#L259) | View definitions. | map(object({…})) | | {} | +| [routines](variables.tf#L167) | Routine definitions. | map(object({…})) | | {} | +| [tables](variables.tf#L205) | Table definitions. Options and partitioning default to null. Partitioning can only use `range` or `time`, set the unused one to null. | map(object({…})) | | {} | +| [tag_bindings](variables.tf#L290) | Tag bindings for this dataset, in key => tag value id format. | map(string) | | {} | +| [views](variables.tf#L297) | View definitions. | map(object({…})) | | {} | ## Outputs @@ -350,9 +375,11 @@ module "bigquery-dataset" { | [id](outputs.tf#L37) | Fully qualified dataset id. | | | [materialized_view_ids](outputs.tf#L52) | Map of fully qualified materialized view ids keyed by view ids. | | | [materialized_views](outputs.tf#L57) | Materialized view resources. | | -| [self_link](outputs.tf#L62) | Dataset self link. | | -| [table_ids](outputs.tf#L77) | Map of fully qualified table ids keyed by table ids. | | -| [tables](outputs.tf#L82) | Table resources. | | -| [view_ids](outputs.tf#L87) | Map of fully qualified view ids keyed by view ids. | | -| [views](outputs.tf#L92) | View resources. | | +| [routine_ids](outputs.tf#L62) | Map of fully qualified routine ids keyed by routine ids. | | +| [routines](outputs.tf#L67) | Routine resources. | | +| [self_link](outputs.tf#L72) | Dataset self link. | | +| [table_ids](outputs.tf#L87) | Map of fully qualified table ids keyed by table ids. | | +| [tables](outputs.tf#L92) | Table resources. | | +| [view_ids](outputs.tf#L97) | Map of fully qualified view ids keyed by view ids. | | +| [views](outputs.tf#L102) | View resources. | | diff --git a/modules/bigquery-dataset/main.tf b/modules/bigquery-dataset/main.tf index c4a02ff55..85b5ba51e 100644 --- a/modules/bigquery-dataset/main.tf +++ b/modules/bigquery-dataset/main.tf @@ -403,3 +403,51 @@ resource "google_bigquery_table" "materialized_view" { allow_non_incremental_definition = each.value.allow_non_incremental_definition } } + +resource "google_bigquery_routine" "default" { + for_each = var.routines + project = var.project_id + dataset_id = google_bigquery_dataset.default.dataset_id + routine_id = each.key + description = each.value.description + routine_type = each.value.routine_type + language = each.value.language + definition_body = each.value.definition_body + imported_libraries = each.value.imported_libraries + determinism_level = each.value.determinism_level + data_governance_type = each.value.data_governance_type + return_table_type = each.value.return_table_type + dynamic "arguments" { + for_each = each.value.arguments + content { + name = arguments.key + argument_kind = arguments.value.argument_kind + mode = arguments.value.mode + data_type = arguments.value.data_type + } + } + dynamic "spark_options" { + for_each = each.value.spark_options == null ? [] : [""] + content { + connection = each.value.spark_options.connection + runtime_version = each.value.spark_options.runtime_version + container_image = each.value.spark_options.container_image + properties = each.value.spark_options.properties + main_file_uri = each.value.spark_options.main_file_uri + py_file_uris = each.value.spark_options.py_file_uris + jar_uris = each.value.spark_options.jar_uris + file_uris = each.value.spark_options.file_uris + archive_uris = each.value.spark_options.archive_uris + main_class = each.value.spark_options.main_class + } + } + dynamic "remote_function_options" { + for_each = each.value.remote_function_options == null ? [] : [""] + content { + endpoint = each.value.remote_function_options.endpoint + connection = each.value.remote_function_options.connection + max_batching_rows = each.value.remote_function_options.value.max_batching_rows + user_defined_context = each.value.remote_function_options.user_defined_context + } + } +} diff --git a/modules/bigquery-dataset/outputs.tf b/modules/bigquery-dataset/outputs.tf index 21b3fe41c..9d056d59c 100644 --- a/modules/bigquery-dataset/outputs.tf +++ b/modules/bigquery-dataset/outputs.tf @@ -59,6 +59,16 @@ output "materialized_views" { value = google_bigquery_table.materialized_view } +output "routine_ids" { + description = "Map of fully qualified routine ids keyed by routine ids." + value = { for k, v in google_bigquery_routine.default : v.routine_id => v.id } +} + +output "routines" { + description = "Routine resources." + value = google_bigquery_routine.default +} + output "self_link" { description = "Dataset self link." value = google_bigquery_dataset.default.self_link diff --git a/modules/bigquery-dataset/variables.tf b/modules/bigquery-dataset/variables.tf index 2eaf308f1..1c3579cea 100644 --- a/modules/bigquery-dataset/variables.tf +++ b/modules/bigquery-dataset/variables.tf @@ -46,7 +46,7 @@ variable "authorized_datasets" { } variable "authorized_routines" { - description = "An array of authorized routine to be authorized on the dataset." + description = "An array of routines to be authorized on the dataset." type = list(object({ project_id = string, dataset_id = string, @@ -164,6 +164,44 @@ variable "project_id" { type = string } +variable "routines" { + description = "Routine definitions." + type = map(object({ + description = optional(string) + routine_type = string + language = optional(string) + definition_body = string + imported_libraries = optional(list(string)) + determinism_level = optional(string) + data_governance_type = optional(string) + return_table_type = optional(string) + arguments = optional(map(object({ + argument_kind = optional(string) + mode = optional(string) + data_type = optional(string) + })), {}) + spark_options = optional(object({ + archive_uris = optional(list(string), []) + connection = string + container_image = optional(string) + file_uris = optional(list(string), []) + jar_uris = optional(list(string), []) + main_file_uri = optional(string) + main_class = optional(string) + properties = optional(map(string), {}) + py_file_uris = optional(list(string), []) + runtime_version = optional(string) + })) + remote_function_options = optional(object({ + connection = string + endpoint = optional(string) + max_batching_rows = optional(string) + user_defined_context = optional(map(string), {}) + })) + })) + default = {} +} + variable "tables" { description = "Table definitions. Options and partitioning default to null. Partitioning can only use `range` or `time`, set the unused one to null." type = map(object({ diff --git a/tests/modules/bigquery_dataset/examples/routines.yaml b/tests/modules/bigquery_dataset/examples/routines.yaml new file mode 100644 index 000000000..03ae72cce --- /dev/null +++ b/tests/modules/bigquery_dataset/examples/routines.yaml @@ -0,0 +1,61 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +values: + module.bigquery-dataset.google_bigquery_dataset.default: + dataset_id: my_dataset + default_encryption_configuration: [] + default_partition_expiration_ms: null + default_table_expiration_ms: null + delete_contents_on_destroy: false + description: Terraform managed. + effective_labels: + goog-terraform-provisioned: 'true' + external_dataset_reference: [] + friendly_name: null + labels: null + location: EU + max_time_travel_hours: '168' + project: my-project + resource_tags: null + terraform_labels: + goog-terraform-provisioned: 'true' + timeouts: null + module.bigquery-dataset.google_bigquery_routine.default["custom_masking_routine"]: + arguments: + - argument_kind: FIXED_TYPE + data_type: '{"typeKind":"STRING"}' + mode: null + name: ssn + data_governance_type: DATA_MASKING + dataset_id: my_dataset + definition_body: SAFE.REGEXP_REPLACE(ssn, '[0-9]', 'X') + description: null + determinism_level: null + imported_libraries: null + language: SQL + project: my-project + remote_function_options: [] + return_table_type: null + return_type: null + routine_id: custom_masking_routine + routine_type: SCALAR_FUNCTION + spark_options: [] + timeouts: null + +counts: + google_bigquery_dataset: 1 + google_bigquery_routine: 1 + modules: 1 + resources: 2 \ No newline at end of file