Skip to content

Commit

Permalink
Merge branch 'main' into fxci_fix_worker_cost
Browse files Browse the repository at this point in the history
  • Loading branch information
akkomar authored Jun 21, 2024
2 parents 13a3d58 + 72557a9 commit 3f2191e
Show file tree
Hide file tree
Showing 15 changed files with 74 additions and 25 deletions.
2 changes: 1 addition & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# These datasets are subject to the additional change control procedures
# described in https://docs.google.com/document/d/1TTJi4ht7NuzX6BPG_KTr6omaZg70cEpxe9xlpfnHj9k/
# Active Users
/sql_generators/active_users/templates/ @mozilla/kpi_table_reviewers
/sql_generators/active_users_aggregates_v3/templates/ @mozilla/kpi_table_reviewers
/sql/moz-fx-data-shared-prod/fenix_derived/active_users_aggregates_v3/ @mozilla/kpi_table_reviewers
/sql/moz-fx-data-shared-prod/firefox_desktop_derived/active_users_aggregates_v1/ @mozilla/kpi_table_reviewers
/sql/moz-fx-data-shared-prod/firefox_ios_derived/active_users_aggregates_v3/ @mozilla/kpi_table_reviewers
Expand Down
4 changes: 2 additions & 2 deletions bigquery_etl/cli/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -1430,7 +1430,7 @@ def initialize(
else:
file_regex = re.compile(
r"^.*/([a-zA-Z0-9-]+)/([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+(_v[0-9]+)?)/"
r"(?:query\.sql|init\.sql)$"
r"(?:query\.sql|init\.sql|materialized_view\.sql)$"
)
query_files = paths_matching_name_pattern(
name, sql_dir, project_id, file_regex=file_regex
Expand Down Expand Up @@ -1472,7 +1472,7 @@ def _initialize(query_file):
except NotFound:
# continue with creating the table
pass
else:
elif len(materialized_views) == 0:
return

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ fields:
type: STRING
mode: NULLABLE
description: |-
Description of the localized site language and/or country based on `localized_site_language_code` (if any).
Description of the localized site language and/or country based on `localized_site_code` (if any).
This will be null for anonymized Discover impressions.
- name: localized_site_language_code
type: STRING
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

# Configs
brwsr_usg_configs = {
"timeout_limit": 2000,
"timeout_limit": 2400,
"device_types": ["DESKTOP", "MOBILE", "OTHER", "ALL"],
"max_limit": 20,
"operating_systems": [
Expand Down Expand Up @@ -77,7 +77,7 @@ def move_blob(bucket_name, blob_name, destination_bucket_name, destination_blob_
source_bucket = storage_client.bucket(bucket_name)
source_blob = source_bucket.blob(blob_name)
destination_bucket = storage_client.bucket(destination_bucket_name)
destination_generation_match_precondition = 0
destination_generation_match_precondition = None

blob_copy = source_bucket.copy_blob(
source_blob,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

# Configs
device_usg_configs = {
"timeout_limit": 2200,
"timeout_limit": 2400,
"locations": [
"ALL",
"BE",
Expand Down Expand Up @@ -64,7 +64,7 @@ def move_blob(bucket_name, blob_name, destination_bucket_name, destination_blob_
source_bucket = storage_client.bucket(bucket_name)
source_blob = source_bucket.blob(blob_name)
destination_bucket = storage_client.bucket(destination_bucket_name)
destination_generation_match_precondition = 0
destination_generation_match_precondition = None

blob_copy = source_bucket.copy_blob(
source_blob,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

# Configurations
os_usg_configs = {
"timeout_limit": 2000,
"timeout_limit": 2500,
"device_types": ["DESKTOP", "MOBILE", "OTHER", "ALL"],
"locations": [
"ALL",
Expand Down Expand Up @@ -65,7 +65,7 @@ def move_blob(bucket_name, blob_name, destination_bucket_name, destination_blob_
source_bucket = storage_client.bucket(bucket_name)
source_blob = source_bucket.blob(blob_name)
destination_bucket = storage_client.bucket(destination_bucket_name)
destination_generation_match_precondition = 0
destination_generation_match_precondition = None

blob_copy = source_bucket.copy_blob(
source_blob,
Expand All @@ -91,7 +91,7 @@ def generate_os_timeseries_api_call(strt_dt, end_dt, agg_int, location, device_t
if location == "ALL" and device_type == "ALL":
os_usage_api_url = f"https://api.cloudflare.com/client/v4/radar/http/timeseries_groups/os?dateStart={strt_dt}T00:00:00.000Z&dateEnd={end_dt}T00:00:00.000Z&format=json&aggInterval={agg_int}"
elif location != "ALL" and device_type == "ALL":
os_usage_api_url = f"https://api.cloudflare.com/client/v4/radar/http/timeseries_groups/os?dateStart={strt_dt}T00:00:00.000Z&dateEnd={strt_dt}T00:00:00.000Z&location={location}&format=json&aggInterval={agg_int}"
os_usage_api_url = f"https://api.cloudflare.com/client/v4/radar/http/timeseries_groups/os?dateStart={strt_dt}T00:00:00.000Z&dateEnd={end_dt}T00:00:00.000Z&location={location}&format=json&aggInterval={agg_int}"
elif location == "ALL" and device_type != "ALL":
os_usage_api_url = f"https://api.cloudflare.com/client/v4/radar/http/timeseries_groups/os?dateStart={strt_dt}T00:00:00.000Z&dateEnd={end_dt}T00:00:00.000Z&deviceType={device_type}&format=json&aggInterval={agg_int}"
else:
Expand Down Expand Up @@ -350,7 +350,7 @@ def main():
datetime.strptime(args.date, "%Y-%m-%d").date() - timedelta(days=4),
args.date,
)
result_archive_fpath = os_usg_configs["results_archive_gcs_fpath"] % (
result_archive_fpath = os_usg_configs["results_archive_gcs_fpth"] % (
datetime.strptime(args.date, "%Y-%m-%d").date() - timedelta(days=4),
args.date,
)
Expand All @@ -366,7 +366,7 @@ def main():
datetime.strptime(args.date, "%Y-%m-%d").date() - timedelta(days=4),
args.date,
)
error_archive_fpath = os_usg_configs["errors_archive_gcs_fpath"] % (
error_archive_fpath = os_usg_configs["errors_archive_gcs_fpth"] % (
datetime.strptime(args.date, "%Y-%m-%d").date() - timedelta(days=4),
args.date,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def main():

project = args.project
dataset = args.dataset
table_name = "app_acquisitions"
table_name = "app_acquisitions_v1"

date = args.date
client_id = MS_CLIENT_ID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def main():

project = args.project
dataset = args.dataset
table_name = "app_conversions"
table_name = "app_conversions_v1"

date = args.date
client_id = MS_CLIENT_ID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def main():

project = args.project
dataset = args.dataset
table_name = "app_installs"
table_name = "app_installs_v1"

date = args.date
client_id = MS_CLIENT_ID
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,8 @@ bigquery:
fields:
- conversion_name
references: {}
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:dataops-managed/external-census
- workgroup:mozilla-confidential
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ SELECT
THEN "firefox_first_ad_click"
WHEN "did_returned_second_day"
THEN "firefox_second_run"
WHEN "first_wk_5_actv_days_and_1_or_more_search_w_ads"
THEN "first_wk_5_actv_days_and_1_or_more_search_w_ads"
WHEN "first_wk_3_actv_days_and_1_or_more_search_w_ads"
THEN "first_wk_3_actv_days_and_1_or_more_search_w_ads"
WHEN "first_wk_3_actv_days_and_24_active_minutes"
THEN "first_wk_3_actv_days_and_24_active_minutes"
ELSE NULL
END AS conversion_name,
FROM
Expand All @@ -23,7 +29,10 @@ FROM
did_firefox_first_run,
did_search,
did_click_ad,
did_returned_second_day
did_returned_second_day,
first_wk_5_actv_days_and_1_or_more_search_w_ads,
first_wk_3_actv_days_and_1_or_more_search_w_ads,
first_wk_3_actv_days_and_24_active_minutes
)
)
WHERE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@ labels:
owner1: [email protected]
scheduling:
dag_name: bqetl_google_analytics_derived_ga4
date_partition_parameter: activity_date
date_partition_parameter: submission_date
parameters: ["conversion_window:INT64:30"]
date_partition_offset: 0
bigquery:
time_partitioning:
type: day
field: 'activity_date'
field: activity_date
require_partition_filter: false
expiration_days: null
references: {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ WITH gclids_to_ga_ids AS (
CROSS JOIN
UNNEST(all_reported_stub_session_ids) AS stub_session_id
WHERE
session_date >= DATE_SUB(@activity_date, INTERVAL @conversion_window DAY)
session_date >= DATE_SUB(@submission_date, INTERVAL @conversion_window DAY)
-- Next line is needed for backfilling purposes
AND session_date <= @activity_date
AND session_date <= @submission_date
AND gclid IS NOT NULL
),
--Step 2: Get all the download tokens associated with a known GA client ID & stub session ID
Expand Down Expand Up @@ -50,7 +50,7 @@ telemetry_id_to_activity_staging AS (
FROM
`moz-fx-data-shared-prod.telemetry_derived.clients_daily_v6`
WHERE
submission_date = @activity_date
submission_date = @submission_date
UNION ALL
SELECT
client_id AS telemetry_client_id,
Expand All @@ -65,8 +65,8 @@ telemetry_id_to_activity_staging AS (
`moz-fx-data-shared-prod.google_ads_derived.conversion_event_categorization_v1`
WHERE
(event_1 IS TRUE OR event_2 IS TRUE OR event_3 IS TRUE)
AND report_date = @activity_date
AND first_seen_date < @activity_date --needed since this is a required partition filter
AND report_date = @submission_date
AND first_seen_date < @submission_date --needed since this is a required partition filter
),
telemetry_id_to_activity AS (
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ RETURNS STRING AS (
r'\bfirf',
r'f.r.f.x',
r'faiya-fokkusu', -- fire fox (Japanese)
r'foxfire',
r'huohu', -- fire fox (Chinese)
r'nightly',
r'quantum',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ CREATE TEMP TABLE
{% if not loop.first -%}
UNION ALL
{% endif %}
{% if app['bq_dataset_family'] not in ["telemetry"] %}
{% if app['bq_dataset_family'] not in ["telemetry", "accounts_frontend", "accounts_backend"] %}
SELECT DISTINCT
@submission_date AS submission_date,
ext.value AS flow_id,
Expand All @@ -40,6 +40,39 @@ CREATE TEMP TABLE
WHERE
DATE(submission_timestamp) = @submission_date
AND ext.key = "flow_id"
{% elif app['bq_dataset_family'] in ["accounts_frontend", "accounts_backend"] %}
(WITH events_unnested_with_metrics AS (
-- events_unnested views do not have metrics, accounts send flow_id in a string metric
-- so we need to unnest with metrics here
SELECT
e.* EXCEPT (events),
event.timestamp AS event_timestamp,
event.category AS event_category,
event.name AS event_name,
event.extra AS event_extra
FROM
`moz-fx-data-shared-prod.{{ app['app_name'] }}.events` e
CROSS JOIN
UNNEST(e.events) AS event
)
SELECT DISTINCT
@submission_date AS submission_date,
metrics.string.session_flow_id AS flow_id,
event_category AS category,
event_name AS name,
TIMESTAMP_ADD(
submission_timestamp,
-- limit event.timestamp, otherwise this will cause an overflow
INTERVAL LEAST(event_timestamp, 20000000000000) MILLISECOND
) AS timestamp,
"{{ app['canonical_app_name'] }}" AS normalized_app_name,
client_info.app_channel AS channel
FROM
events_unnested_with_metrics
WHERE
DATE(submission_timestamp) = @submission_date
AND metrics.string.session_flow_id IS NOT NULL
AND metrics.string.session_flow_id != "")
{% endif %}
{% endfor %}
),
Expand Down

1 comment on commit 3f2191e

@dataops-ci-bot
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Integration report for "Merge branch 'main' into fxci_fix_worker_cost"

sql.diff

Click to expand!
diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/worker_costs_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/worker_costs_v1/query.sql
--- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/worker_costs_v1/query.sql	2024-06-21 13:40:43.000000000 +0000
+++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fxci_derived/worker_costs_v1/query.sql	2024-06-21 13:40:41.000000000 +0000
@@ -1,6 +1,10 @@
 SELECT
   project.id AS project,
-  REGEXP_EXTRACT(resource.name, "/instances/(.+)$") AS name,
+  IF(
+    resource.name LIKE "%/instances/%",
+    REGEXP_EXTRACT(resource.name, "/instances/(.+)$"),
+    resource.name
+  ) AS name,
   REGEXP_EXTRACT(resource.global_name, "/zones/([^/]+)") AS zone,
   REGEXP_EXTRACT(resource.global_name, "/instances/([^/]+)") AS instance_id,
   DATE(usage_start_time) AS usage_start_date,

Link to full diff

Please sign in to comment.