Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
piotrczarnas committed Oct 31, 2024
2 parents 31a1ee3 + 4c82dd1 commit 4dfcc98
Show file tree
Hide file tree
Showing 36 changed files with 14,026 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
,DESKTOP-A6A380Q/lyftrondev,DESKTOP-A6A380Q,30.10.2024 09:22,file:///C:/Users/lyftrondev/AppData/Roaming/LibreOffice/4;
6,701 changes: 6,701 additions & 0 deletions dqops/sampledata/house_price_prediction_treated_dataset.csv

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is a marker file to identify a DQO_USER_HOME folder. Please check this file to Git.
8 changes: 8 additions & 0 deletions examples/data-consistency/detect-data-type-changes/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.credentials/
.data/
.index/
.logs/
bin/
jars/
.venv/
.localsettings.dqosettings.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
description: Monitors the count of distinct values in a column and raises an issue
when an anomaly is detected.
monitoring_checks:
daily:
uniqueness:
daily_distinct_count_anomaly:
warning:
anomaly_percent: 0.1
partitioned_checks:
daily:
uniqueness:
daily_partition_distinct_count_anomaly:
warning:
anomaly_percent: 0.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
description: Monitors the scale of null values in columns and raises an issue when
the day-to-day change is significant.
monitoring_checks:
daily:
nulls:
daily_nulls_percent_anomaly:
warning:
anomaly_percent: 0.1
partitioned_checks:
daily:
nulls:
daily_partition_nulls_percent_anomaly:
warning:
anomaly_percent: 0.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
description: Monitors the sum and average (mean) aggregated values of numeric values
and raises a data quality issue when the value changes too much between daily
partitions.
partitioned_checks:
daily:
anomaly:
daily_partition_sum_anomaly:
warning:
anomaly_percent: 0.05
daily_partition_mean_anomaly:
warning:
anomaly_percent: 0.05
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
description: Monitors the sum and average (mean) aggregated values of numeric values
and raises a data quality issue when the value changes too much day-to-day.
monitoring_checks:
daily:
anomaly:
daily_sum_anomaly:
warning:
anomaly_percent: 0.05
daily_mean_anomaly:
warning:
anomaly_percent: 0.05
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/TableLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_table_checks
spec:
priority: 1000
description: Monitors data volume of the whole table daily and raises an issue when
the volume has increased of decreased significantly.
monitoring_checks:
daily:
volume:
daily_row_count_change:
warning:
max_percent: 10.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
description: Detects when the values stored in a text column change their type.
This policy should be activated on raw tables in the landing zones for table that
store all values (also numeric an dates) in text columns.
monitoring_checks:
daily:
datatype:
daily_detected_datatype_in_text_changed:
warning: {}
partitioned_checks:
daily:
datatype:
daily_partition_detected_datatype_in_text_changed:
warning: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
description: "Monitors the schema of columns registered in DQOps. Raises a data\
\ quality issue when the column is missing, or its data has changed."
monitoring_checks:
daily:
schema:
daily_column_exists:
warning: {}
daily_column_type_changed:
warning: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
disabled: true
description: Detects columns containing any null values using both monitoring checks
and daily partitioned checks.
monitoring_checks:
daily:
nulls:
daily_nulls_count:
warning:
max_count: 0
partitioned_checks:
daily:
nulls:
daily_partition_nulls_count:
warning:
max_count: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/TableLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_table_checks
spec:
priority: 1000
description: Monitors data freshness anomalies daily.
monitoring_checks:
daily:
timeliness:
daily_data_freshness_anomaly:
warning:
anomaly_percent: 0.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/TableLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_table_checks
spec:
priority: 1000
description: "Monitors data volume of the whole table (using daily monitoring checks)\
\ and for each daily partition, using daily partition checks."
monitoring_checks:
daily:
volume:
daily_row_count_anomaly:
warning:
anomaly_percent: 0.1
partitioned_checks:
daily:
volume:
daily_partition_row_count_anomaly:
warning:
anomaly_percent: 0.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
disabled: true
description: Detects empty columns using both monitoring checks an daily partitioned
checks.
monitoring_checks:
daily:
nulls:
daily_empty_column_found:
warning: {}
partitioned_checks:
daily:
nulls:
daily_partition_empty_column_found:
warning: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/TableLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_table_checks
spec:
priority: 1000
description: Detects empty tables using daily monitoring checks.
monitoring_checks:
daily:
volume:
daily_row_count:
warning:
min_count: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
disabled: true
description: Monitors numeric columns to detect new smallest (min) or biggest (max)
value for each daily partition. Raises a data quality issue when the partition
contains a big or small value that exceeds regular ranges.
partitioned_checks:
daily:
anomaly:
daily_partition_min_anomaly:
warning:
anomaly_percent: 0.1
daily_partition_max_anomaly:
warning:
anomaly_percent: 0.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
description: "Monitors numeric columns to detect new smallest (min) or biggest (max)\
\ value, which must be an anomaly."
monitoring_checks:
daily:
anomaly:
daily_min_anomaly:
warning:
anomaly_percent: 0.1
daily_max_anomaly:
warning:
anomaly_percent: 0.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
disabled: true
description: Monitors the percentage of null values in columns and raises an issue
when the day-to-day change is above a threshold.
monitoring_checks:
daily:
nulls:
daily_nulls_percent_change:
warning:
max_percent: 10.0
partitioned_checks:
daily:
nulls:
daily_partition_nulls_percent_change:
warning:
max_percent: 10.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/TableLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_table_checks
spec:
priority: 1000
description: Monitors table availability issues daily.
monitoring_checks:
daily:
availability:
daily_table_availability:
warning:
max_failures: 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/TableLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_table_checks
spec:
priority: 1000
description: Monitors the table schema and raises issues when the schema of the
table was changed.
monitoring_checks:
daily:
schema:
daily_column_count_changed:
warning: {}
daily_column_list_changed:
warning: {}
daily_column_list_or_order_changed:
warning: {}
daily_column_types_changed:
warning: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 1000
description: "Activates data profiling checks on all text columns to detect if they\
\ contain sensitive data (emails, phone numbers). Enabling this policy allows\
\ the data quality rule miner to set up PII checks when sensitive values are identified."
profiling_checks:
pii:
profile_contains_usa_phone_percent: {}
profile_contains_email_percent: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/ColumnLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_column_checks
spec:
priority: 2000
description: Monitors the count and the percentage of null values without raising
data quality issues.
monitoring_checks:
daily:
nulls:
daily_nulls_count: {}
daily_nulls_percent: {}
partitioned_checks:
daily:
nulls:
daily_partition_nulls_count: {}
daily_partition_nulls_percent: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# yaml-language-server: $schema=https://cloud.dqops.com/dqo-yaml-schema/TableLevelDataQualityPolicyYaml-schema.json
apiVersion: dqo/v1
kind: default_table_checks
spec:
priority: 1000
description: Monitors volume (row count) of daily partitions.
partitioned_checks:
daily:
volume:
daily_partition_row_count: {}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# packages in this file are installed when DQOps starts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
@echo off
..\..\..\dqo.cmd
2 changes: 2 additions & 0 deletions examples/data-consistency/detect-data-type-changes/run_dqo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/sh
../../../dqo
Loading

0 comments on commit 4dfcc98

Please sign in to comment.