From b36688ec78226b4a3692d23977cf33b4928c5b96 Mon Sep 17 00:00:00 2001 From: Arseny Chernov Date: Thu, 16 Dec 2021 17:54:38 +0800 Subject: [PATCH] Add optional BQ table as a file export config --- .../cffile/main.py | 96 ++++++++++++++++++ .../cffile/requirements.txt | 3 + .../main.tf | 55 ++++++++++ .../variables.tf | 26 ++++- .../fixture/bundle_cffile.zip | Bin 0 -> 131 bytes .../fixture/cffile/README | 0 .../fixture/main.tf | 1 + .../fixture/variables.tf | 17 ++++ .../test_plan.py | 4 +- 9 files changed, 199 insertions(+), 3 deletions(-) create mode 100755 cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py create mode 100644 cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt create mode 100644 tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip create mode 100644 tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/cffile/README diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py new file mode 100755 index 000000000..ec8572ce9 --- /dev/null +++ b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/main.py @@ -0,0 +1,96 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +'''Cloud Function module to export BQ table as JSON. + +This module is designed to be plugged in a Cloud Function, attached to Cloud +Scheduler trigger to create a JSON of IP to hostname mappings from BigQuery. + +''' + +import base64 +import datetime +import json +import logging +import os +import warnings + +from google.api_core.exceptions import GoogleAPIError +from google.cloud import bigquery + +import googleapiclient.discovery +import googleapiclient.errors + + +def _configure_logging(verbose=True): + '''Basic logging configuration. + Args: + verbose: enable verbose logging + ''' + level = logging.DEBUG if verbose else logging.INFO + logging.basicConfig(level=level) + warnings.filterwarnings('ignore', r'.*end user credentials.*', UserWarning) + + + +@click.command() +@click.option('--bucket', required=True, help='GCS bucket for export') +@click.option('--filename', required=True, help='Path and filename with extension to export e.g. folder/export.json .') +@click.option('--format', required=True, help='The exported file format, e.g. NEWLINE_DELIMITED_JSON or CSV.') +@click.option('--bq-dataset', required=True, help='Bigquery dataset where table for export is located.') +@click.option('--bq-table', required=True, help='Bigquery table to export.') +@click.option('--bq-table-overwrite', required=True, help='Overwrite existing BQ table or create new datetime() one.') +@click.option('--verbose', is_flag=True, help='Verbose output') +def main_cli(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False): + '''Trigger Cloud Asset inventory export from Bigquery to file. Data will be stored in + the dataset specified on a dated table with the name specified. + ''' + try: + _main(bucket, filename, format, bq_dataset, bq_table, verbose) + except RuntimeError: + logging.exception('exception raised') + +def main(event, context): + 'Cloud Function entry point.' + try: + data = json.loads(base64.b64decode(event['data']).decode('utf-8')) + print(data) + _main(**data) + # uncomment once https://issuetracker.google.com/issues/155215191 is fixed + # except RuntimeError: + # raise + except Exception: + logging.exception('exception in cloud function entry point') + + +def _main(bucket=None, filename=None, format=None, bq_dataset=None, bq_table=None, verbose=False): + 'Module entry point used by cli and cloud function wrappers.' + + _configure_logging(verbose) + client = bigquery.Client() + destination_uri = 'gs://{}/{}'.format(bucket, filename) + dataset_ref = client.dataset(bq_dataset) + table_ref = dataset_ref.table(bq_table) + job_config = bigquery.job.ExtractJobConfig() + job_config.destination_format = ( + "bigquery.DestinationFormat." + format) + extract_job = client.extract_table( + table_ref, destination_uri, job_config=job_config + ) + try: + extract_job.result() + except (GoogleAPIError, googleapiclient.errors.HttpError) as e: + logging.debug('API Error: %s', e, exc_info=True) + raise RuntimeError( + 'Error exporting BQ table %s as a file' % bq_table, e) \ No newline at end of file diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt new file mode 100644 index 000000000..d48ebb547 --- /dev/null +++ b/cloud-operations/scheduled-asset-inventory-export-bq/cffile/requirements.txt @@ -0,0 +1,3 @@ +google-api-python-client>=1.10.1 +google-cloud-monitoring>=1.1.0 +google-cloud-bigquery \ No newline at end of file diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/main.tf b/cloud-operations/scheduled-asset-inventory-export-bq/main.tf index 6fb31afc0..e3752362d 100644 --- a/cloud-operations/scheduled-asset-inventory-export-bq/main.tf +++ b/cloud-operations/scheduled-asset-inventory-export-bq/main.tf @@ -66,6 +66,17 @@ module "pubsub" { # at the project level via roles/cloudscheduler.serviceAgent } +module "pubsub_file" { + source = "../../modules/pubsub" + project_id = module.project.project_id + name = var.name_cffile + subscriptions = { + "${var.name_cffile}-default" = null + } + # the Cloud Scheduler robot service account already has pubsub.topics.publish + # at the project level via roles/cloudscheduler.serviceAgent +} + ############################################################################### # Cloud Function # ############################################################################### @@ -93,6 +104,29 @@ module "cf" { } } +module "cffile" { + source = "../../modules/cloud-function" + project_id = module.project.project_id + region = var.region + name = var.name_cffile + bucket_name = "${var.name_cffile}-${random_pet.random.id}" + bucket_config = { + location = var.region + lifecycle_delete_age = null + } + bundle_config = { + source_dir = "cffile" + output_path = var.bundle_path_cffile + excludes = null + } + service_account = module.service-account.email + trigger_config = { + event = "google.pubsub.topic.publish" + resource = module.pubsub_file.topic.id + retry = null + } +} + resource "random_pet" "random" { length = 1 } @@ -128,6 +162,27 @@ resource "google_cloud_scheduler_job" "job" { } } +resource "google_cloud_scheduler_job" "job_file" { + project = google_app_engine_application.app.project + region = var.region + name = "file-export-job" + description = "File export from BQ Job" + schedule = "* 9 * * 1" + time_zone = "Etc/UTC" + + pubsub_target { + attributes = {} + topic_name = module.pubsub_file.topic.id + data = base64encode(jsonencode({ + bucket = var.file_config.bucket + filename = var.file_config.filename + format = var.file_config.format + bq_dataset = var.file_config.bq_dataset + bq_table = var.file_config.bq_table + })) + } +} + ############################################################################### # Bigquery # ############################################################################### diff --git a/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf b/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf index 988674f30..83823b90c 100644 --- a/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf +++ b/cloud-operations/scheduled-asset-inventory-export-bq/variables.tf @@ -26,8 +26,14 @@ variable "bundle_path" { default = "./bundle.zip" } +variable "bundle_path_cffile" { + description = "Path used to write the intermediate Cloud Function code bundle." + type = string + default = "./bundle_cffile.zip" +} + variable "cai_config" { - description = "Cloud Asset inventory export config." + description = "Cloud Asset Inventory export config." type = object({ bq_dataset = string bq_table = string @@ -36,6 +42,17 @@ variable "cai_config" { }) } +variable "file_config" { + description = "Optional BQ table as a file export function config." + type = object({ + bucket = string + filename = string + format = string + bq_dataset = string + bq_table = string + }) +} + variable "location" { description = "Appe Engine location used in the example." type = string @@ -49,6 +66,13 @@ variable "name" { default = "asset-inventory" } + +variable "name_cffile" { + description = "Arbitrary string used to name created resources." + type = string + default = "cffile-exporter" +} + variable "project_create" { description = "Create project instead ofusing an existing one." type = bool diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/bundle_cffile.zip new file mode 100644 index 0000000000000000000000000000000000000000..454bc1f7c7b1d7756e78f008aa5a098485a9de00 GIT binary patch literal 131 zcmWIWW@Zs#-~d7f2E{HQ0S9bAR*t<8 literal 0 HcmV?d00001 diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/cffile/README b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/cffile/README new file mode 100644 index 000000000..e69de29bb diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf index b892dadb7..fd5e22d2e 100644 --- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf +++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/main.tf @@ -18,6 +18,7 @@ module "test" { source = "../../../../cloud-operations/scheduled-asset-inventory-export-bq" billing_account = var.billing_account cai_config = var.cai_config + file_config = var.file_config project_create = var.project_create project_id = var.project_id } diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf index 5c1e0ac57..8b5212f7c 100644 --- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf +++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/fixture/variables.tf @@ -32,6 +32,23 @@ variable "cai_config" { } } +variable "file_config" { + type = object({ + bucket = string + filename = string + format = string + bq_dataset = string + bq_table = string + }) + default = { + bucket = "my-bucket" + filename = "my-folder/myfile.json" + format = "NEWLINE_DELIMITED_JSON" + bq_dataset = "my-dataset" + bq_table = "my_table" + } +} + variable "project_create" { type = bool diff --git a/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py b/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py index de94c82d5..f3d075098 100644 --- a/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py +++ b/tests/cloud_operations/scheduled_asset_inventory_export_bq/test_plan.py @@ -23,5 +23,5 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), 'fixture') def test_resources(e2e_plan_runner): "Test that plan works and the numbers of resources is as expected." modules, resources = e2e_plan_runner(FIXTURES_DIR) - assert len(modules) == 5 - assert len(resources) == 23 + assert len(modules) == 7 + assert len(resources) == 28