Add support for sqlAssertion AutoDQ rule type in dataplex-datascan (#2416)

* Add sql_assertion rule type to Dataplex AutoDQ data_quality_spec

* Fix broken link to API reference public doc for DQ spec

* Update README.md after linting

* Add example tests for dataplex-datascan

* Bump provider versions

* Bump provider versions everywhere
This commit is contained in:
Jay Bana
2024-07-09 22:29:45 +01:00
committed by GitHub
parent cb9945a83d
commit 2a2c4a96ce
100 changed files with 329 additions and 199 deletions

View File

@@ -111,6 +111,19 @@ data_quality_spec = {
table_condition_expectation = {
sql_expression = "COUNT(*) > 0"
}
},
{
dimension = "VALIDITY"
sql_assertion = {
sql_statement = <<-EOT
SELECT
city_asset_number, council_district
FROM $${data()}
WHERE city_asset_number IS NOT NULL
GROUP BY 1,2
HAVING COUNT(*) > 1
EOT
}
}
]
}

View File

@@ -32,6 +32,7 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.99
@@ -50,6 +51,7 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.9
@@ -68,6 +70,7 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.8
@@ -83,6 +86,7 @@ values:
- regex: .*solar.*
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
@@ -100,6 +104,7 @@ values:
- values:
- sidewalk
- parkland
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
@@ -114,6 +119,7 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
@@ -129,6 +135,7 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation:
- max_value: '15'
min_value: '5'
@@ -149,6 +156,7 @@ values:
row_condition_expectation:
- sql_expression: footprint_length > 0 AND footprint_length <= 10
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
@@ -163,11 +171,34 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation:
- sql_expression: COUNT(*) > 0
threshold: null
uniqueness_expectation: []
- column: null
name: null
description: null
dimension: VALIDITY
ignore_null: null
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion:
- sql_statement: |
SELECT
city_asset_number, council_district
FROM ${data()}
WHERE city_asset_number IS NOT NULL
GROUP BY 1,2
HAVING COUNT(*) > 1
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
uniqueness_expectation: []
sampling_percent: 100
data_scan_id: test-datascan
description: Terraform Managed. Custom description.

View File

@@ -32,6 +32,7 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.99
@@ -50,6 +51,7 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.9
@@ -68,6 +70,7 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: 0.8
@@ -83,6 +86,7 @@ values:
- regex: .*solar.*
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
@@ -100,6 +104,7 @@ values:
- values:
- sidewalk
- parkland
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
@@ -114,6 +119,7 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
@@ -129,6 +135,7 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation:
- max_value: '15'
min_value: '5'
@@ -149,6 +156,7 @@ values:
row_condition_expectation:
- sql_expression: footprint_length > 0 AND footprint_length <= 10
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
@@ -163,11 +171,34 @@ values:
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion: []
statistic_range_expectation: []
table_condition_expectation:
- sql_expression: COUNT(*) > 0
threshold: null
uniqueness_expectation: []
- column: null
name: null
description: null
dimension: VALIDITY
ignore_null: null
non_null_expectation: []
range_expectation: []
regex_expectation: []
row_condition_expectation: []
set_expectation: []
sql_assertion:
- sql_statement: |
SELECT
city_asset_number, council_district
FROM ${data()}
WHERE city_asset_number IS NOT NULL
GROUP BY 1,2
HAVING COUNT(*) > 1
statistic_range_expectation: []
table_condition_expectation: []
threshold: null
uniqueness_expectation: []
sampling_percent: 100
data_scan_id: test-datascan
description: Terraform Managed.