mozilla / bigquery-etl

Bigquery ETL
https://mozilla.github.io/bigquery-etl
Mozilla Public License 2.0
241 stars 98 forks source link

Deng 2631 import microsoft store data #5781

Closed Marlene-M-Hirose closed 2 weeks ago

Marlene-M-Hirose commented 2 weeks ago

Checklist for reviewer:

For modifications to schemas in restricted namespaces (see CODEOWNERS):

┆Issue is synchronized with this Jira Task

dataops-ci-bot commented 2 weeks ago

Integration report for "remove scheduling from metadata.yaml files ass as the dag is in telemetry-airflow"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod: microsoft_derived diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml 2024-06-17 20:02:20.000000000 +0000 @@ -0,0 +1,21 @@ +friendly_name: App Acquisitions +description: |- + This table represents acquisition data downloaded from the Microsoft Store for Firefox, FX Beta and FX Nightly + The link for this data is here: https://manage.devcenter.microsoft.com/v1.0/my/analytics/appacquisitions? +owners: +- mhirose@mozilla.com +labels: + owner1: mhirose +bigquery: + time_partitioning: + type: day + field: DATE + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py 2024-06-17 20:00:18.000000000 +0000 @@ -0,0 +1,245 @@ +"""microsoft_store data - download deliverables, clean and upload to BigQuery.""" + +import csv +import json +import tempfile +from argparse import ArgumentParser + +import requests +from google.cloud import bigquery + +API_URI = "https://manage.devcenter.microsoft.com" + +"""MICROSOFT_STORE_APP_LIST is a list of dictionaries. +Keys are the app_name, and app_tenant_id in the form: +{"app_name":" dict: + """Read JSON file.""" + with open(filename, "r") as f: + data = json.loads(f.read()) + return data + + +def write_dict_to_csv(json_data, filename): + """Write a dictionary to a csv.""" + with open(filename, "w") as out_file: + dict_writer = csv.DictWriter(out_file, CSV_FIELDS) + dict_writer.writeheader() + dict_writer.writerows(json_data) + + +def microsoft_authorization(tenant_id, client_id, client_secret, resource_url): + """Microsoft Store Authoriazation. Returns the bearer token required for data download.""" + url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" + query_params = { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + "resource": resource_url, + } + headers = {"Content-Type": "application/x-www-form-urlencoded"} + json_file = post_response(url, headers, query_params) + response_data = json.loads(json_file.text) + bearer_token = response_data["access_token"] + return bearer_token + + +def download_microsoft_store_data(date, application_id, bearer_token): + """Download data from Microsoft - application_id, bearer_token are called here.""" + start_date = date # + end_date = date + token = bearer_token + app_id = application_id + groupBy = [ + "date", + "applicationName", + "acquisitionType", + "ageGroup", + "storeClient", + "gender", + "market", + "osVersion", + "deviceType", + ] + # getting overview metrics for different kpis / Deliverables + url = f"https://manage.devcenter.microsoft.com/v1.0/my/analytics/appacquisitions?applicationId={app_id}" + url_params = f"aggregationLevel=day&startDate={start_date}&endDate={end_date}&skip=0&groupBy={','.join(groupBy)}" + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Bearer {token}", + } + print(url) + response = get_response(url, headers, url_params) + return response + + +def check_json(microsoft_store_response_text): + """Script will return an empty dictionary for apps on days when there is no data. Check for that here.""" + with tempfile.NamedTemporaryFile() as tmp_json: + with open(tmp_json.name, "w") as f_json: + f_json.write(microsoft_store_response_text) + try: + query_export = read_json(f_json.name) + except ( + ValueError + ): # ex. json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) + return None + return query_export + + +def clean_json(query_export, date): + """Turn the json file into a list to be input into a CSV for bq upload.""" + fields_list = [] + for val in query_export["Value"]: + field_dict = { + "date": val["date"], + "application_id": val["applicationId"], + "application_name": val["applicationName"], + "acquisition_type": val["acquisitionType"], + "store_client": val["storeClient"], + "gender": val["gender"], + "market": val["market"], + "os_version": val["osVersion"], + "device_type": val["deviceType"], + "acquisition_quantity": val["acquisitionQuantity"], + "purchase_price_usd_amount": val["purchasePriceUSDAmount"], + "purchase_price_local_amount": val["purchasePriceLocalAmount"], + "purchase_tax_usd_amount": val["purchaseTaxUSDAmount"], + "purchase_tax_local_amount": val["purchaseTaxLocalAmount"], + } + fields_list.append(field_dict) + return fields_list + + +def upload_to_bigquery(csv_data, project, dataset, table_name, date): + """Upload the data to bigquery.""" + date = date + print("writing json to csv") + partition = f"{date}".replace("-", "") + print(partition) + with tempfile.NamedTemporaryFile() as tmp_csv: + with open(tmp_csv.name, "w+b") as f_csv: + write_dict_to_csv(csv_data, f_csv.name) + client = bigquery.Client(project) + job_config = bigquery.LoadJobConfig( + create_disposition="CREATE_IF_NEEDED", + write_disposition="WRITE_TRUNCATE", + time_partitioning=bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", + ), + skip_leading_rows=1, + schema=[ + bigquery.SchemaField("date", "DATE"), + bigquery.SchemaField("application_id", "STRING"), + bigquery.SchemaField("application_name", "STRING"), + bigquery.SchemaField("acquisition_type", "STRING"), + bigquery.SchemaField("store_client", "STRING"), + bigquery.SchemaField("gender", "STRING"), + bigquery.SchemaField("market", "STRING"), + bigquery.SchemaField("os_version", "STRING"), + bigquery.SchemaField("device_type", "STRING"), + bigquery.SchemaField("acquisition_quantity", "INT64"), + bigquery.SchemaField("purchase_price_usd_amount", "NUMERIC"), + bigquery.SchemaField("purchase_price_local_amount", "NUMERIC"), + bigquery.SchemaField("purchase_tax_usd_amount", "NUMERIC"), + bigquery.SchemaField("purchase_tax_local_amount", "NUMERIC"), + ], + ) + destination = f"{project}.{dataset}.{table_name}${partition}" + + job = client.load_table_from_file(f_csv, destination, job_config=job_config) + print( + f"Writing microsoft_store data for all apps to {destination}. BigQuery job ID: {job.job_id}" + ) + job.result() + + +def main(): + """Input data, call functions, get stuff done.""" + parser = ArgumentParser(description=__doc__) + parser.add_argument("--date", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_client_secret", required=True) + parser.add_argument("--microsoft_store_app_list", required=True) + parser.add_argument("--microsoft_store_tenant_id", required=True) + parser.add_argument("--project", default="moz-fx-data-shared-prod") + parser.add_argument("--dataset", default="microsoft_derived") + + args = parser.parse_args() + + app_list = json.loads(args.microsoft_store_app_list) + + project = args.project + dataset = args.dataset + table_name = "microsoft_app_acquisitions" + + date = args.date + client_id = args.microsoft_store_client_id + client_secret = args.microsoft_store_client_secret + app_list = args.microsoft_store_app_list + tenant_id = args.microsoft_store_tenant_id + resource_url = API_URI + + data = [] + + bearer_token = microsoft_authorization( + tenant_id, client_id, client_secret, resource_url + ) + + # Cycle through the apps to get the relevant data + for app in app_list: + print(f'This is data for {app["app_name"]} - {app["app_id"]} ') + # Ping the microsoft_store URL and get a response + json_file = download_microsoft_store_data(date, app["app_id"], bearer_token) + query_export = check_json(json_file.text) + if query_export is not None: + # This section writes the tmp json data into a temp CSV file which will then be put into a BigQuery table + microsoft_store_data = clean_json(query_export, date) + data.extend(microsoft_store_data) + else: + print("no data for today") + + upload_to_bigquery(data, project, dataset, table_name, date) + + +if __name__ == "__main__": + main() diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml 2024-06-17 20:00:18.000000000 +0000 @@ -0,0 +1,70 @@ +fields: + +- mode: NULLABLE + name: date + type: DATE + description: The first date in the date range for the acquisition data + +- mode: NULLABLE + name: application_id + type: STRING + description: The Store ID of the app for which you are retrieving acquisition data + +- mode: NULLABLE + name: application_name + type: STRING + description: The display name of the app + +- mode: NULLABLE + name: acquisition_type + type: STRING + description: The type of acquisition + +- mode: NULLABLE + name: store_client + type: STRING + description: The version of the Store where the acquisition occurred + +- mode: NULLABLE + name: gender + type: STRING + description: The gender of the user who made the acquisition + +- mode: NULLABLE + name: market + type: STRING + description: The ISO 3166 country code of the market where the acquisition occurred + +- mode: NULLABLE + name: os_version + type: STRING + description: the OS version on which the acquisition occurred + +- mode: NULLABLE + name: device_type + type: STRING + description: he type of device on which the acquisition occurred + +- mode: NULLABLE + name: acquisition_quantity + type: INT64 + description: The number of acquisitions that occurred during the specified aggregation level +- mode: NULLABLE + name: purchase_price_usd_amount + type: NUMERIC + description: Purchase price USD amount + +- mode: NULLABLE + name: purchase_price_local_amount + type: NUMERIC + description: Purchase price local currency amount + +- mode: NULLABLE + name: purchase_tax_usd_amount + type: NUMERIC + description: Purchase tax USD amount + +- mode: NULLABLE + name: purchase_tax_local_amount + type: NUMERIC + description: Purchase tax local currency amount diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml 2024-06-17 20:02:20.000000000 +0000 @@ -0,0 +1,21 @@ +friendly_name: App Acquisitions +description: |- + This table represents acquisition data downloaded from the Microsoft Store for Firefox, FX Beta and FX Nightly + The link for this data is here: https://manage.devcenter.microsoft.com/v1.0/my/analytics/appchannelconversions? +owners: +- mhirose@mozilla.com +labels: + owner1: mhirose +bigquery: + time_partitioning: + type: day + field: DATE + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py 2024-06-17 20:00:18.000000000 +0000 @@ -0,0 +1,236 @@ +"""microsoft_store data - download deliverables, clean and upload to BigQuery.""" + +import csv +import json +import tempfile +from argparse import ArgumentParser + +import requests +from google.cloud import bigquery + +API_URI = "https://manage.devcenter.microsoft.com" + +"""MICROSOFT_STORE_APP_LIST is a list of dictionaries. +Keys are the app_name, and app_tenant_id in the form: +{"app_name":" dict: + """Read JSON file.""" + with open(filename, "r") as f: + data = json.loads(f.read()) + return data + + +def write_dict_to_csv(json_data, filename): + """Write a dictionary to a csv.""" + with open(filename, "w") as out_file: + dict_writer = csv.DictWriter(out_file, CSV_FIELDS) + dict_writer.writeheader() + dict_writer.writerows(json_data) + + +def microsoft_authorization(tenant_id, client_id, client_secret, resource_url): + """Microsoft Store Authoriazation. Returns the bearer token required for data download.""" + url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" + query_params = { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + "resource": resource_url, + } + headers = {"Content-Type": "application/x-www-form-urlencoded"} + json_file = post_response(url, headers, query_params) + response_data = json.loads(json_file.text) + bearer_token = response_data["access_token"] + return bearer_token + + +def download_microsoft_store_data(date, application_id, bearer_token): + """Download data from Microsoft - application_id, bearer_token are called here.""" + start_date = date # + end_date = date + token = bearer_token + app_id = application_id + groupBy = [ + "date", + "applicationId", + "applicationName", + "customCampaignId", + "channelType", + "storeClient", + "deviceType", + "market", + ] + # getting overview metrics for different kpis / Deliverables + url = f"https://manage.devcenter.microsoft.com/v1.0/my/analytics/appchannelconversions?applicationId={app_id}" + url_params = f"aggregationLevel=day&startDate={start_date}&endDate={end_date}&skip=0&groupBy={','.join(groupBy)}&orderby=date" + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Bearer {token}", + } + print(url) + response = get_response(url, headers, url_params) + return response + + +def check_json(microsoft_store_response_text): + """Script will return an empty dictionary for apps on days when there is no data. Check for that here.""" + with tempfile.NamedTemporaryFile() as tmp_json: + with open(tmp_json.name, "w") as f_json: + f_json.write(microsoft_store_response_text) + try: + query_export = read_json(f_json.name) + except ( + ValueError + ): # ex. json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) + return None + return query_export + + +def clean_json(query_export, date): + """Turn the json file into a list to be input into a CSV for bq upload.""" + fields_list = [] + for val in query_export["Value"]: + field_dict = { + "date": val["date"], + "application_id": val["applicationId"], + "application_name": val["applicationName"], + "custom_campaign_id": val["customCampaignId"], + "referrer_uri_domain": val["referrerUriDomain"], + "channel_type": val["channelType"], + "store_client": val["storeClient"], + "device_type": val["deviceType"], + "market": val["market"], + "click_count": val["clickCount"], + "conversion_count": val["conversionCount"], + } + fields_list.append(field_dict) + return fields_list + + +def upload_to_bigquery(csv_data, project, dataset, table_name, date): + """Upload the data to bigquery.""" + date = date + print("writing json to csv") + partition = f"{date}".replace("-", "") + print(partition) + with tempfile.NamedTemporaryFile() as tmp_csv: + with open(tmp_csv.name, "w+b") as f_csv: + write_dict_to_csv(csv_data, f_csv.name) + client = bigquery.Client(project) + job_config = bigquery.LoadJobConfig( + create_disposition="CREATE_IF_NEEDED", + write_disposition="WRITE_TRUNCATE", + time_partitioning=bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", + ), + skip_leading_rows=1, + schema=[ + bigquery.SchemaField("date", "DATE"), + bigquery.SchemaField("application_id", "STRING"), + bigquery.SchemaField("application_name", "STRING"), + bigquery.SchemaField("custom_campaign_id", "STRING"), + bigquery.SchemaField("referrer_uri_domain", "STRING"), + bigquery.SchemaField("channel_type", "STRING"), + bigquery.SchemaField("store_client", "STRING"), + bigquery.SchemaField("device_type", "STRING"), + bigquery.SchemaField("market", "STRING"), + bigquery.SchemaField("click_count", "STRING"), + bigquery.SchemaField("conversion_count", "STRING"), + ], + ) + destination = f"{project}.{dataset}.{table_name}${partition}" + destination = f"{project}.analysis.mhirose_{table_name}${partition}" + + job = client.load_table_from_file(f_csv, destination, job_config=job_config) + print( + f"Writing microsoft_store data for all apps to {destination}. BigQuery job ID: {job.job_id}" + ) + job.result() + + +def main(): + """Input data, call functions, get stuff done.""" + parser = ArgumentParser(description=__doc__) + parser.add_argument("--date", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_app_list", required=True) + parser.add_argument("--microsoft_store_tenant_id", required=True) + parser.add_argument("--project", default="moz-fx-data-shared-prod") + parser.add_argument("--dataset", default="microsoft_derived") + + args = parser.parse_args() + + app_list = json.loads(args.microsoft_store_app_list) + + project = args.project + dataset = args.dataset + table_name = "microsoft_app_conversions" + + date = args.date + client_id = args.microsoft_store_client_id + client_secret = args.microsoft_store_client_secret + app_list = args.microsoft_store_app_list + tenant_id = args.microsoft_store_tenant_id + resource_url = API_URI + + data = [] + + bearer_token = microsoft_authorization( + tenant_id, client_id, client_secret, resource_url + ) + + # Cycle through the apps to get the relevant data + for app in app_list: + print(f'This is data for {app["app_name"]} - {app["app_id"]} ') + # Ping the microsoft_store URL and get a response + json_file = download_microsoft_store_data(date, app["app_id"], bearer_token) + query_export = check_json(json_file.text) + if query_export is not None: + # This section writes the tmp json data into a temp CSV file which will then be put into a BigQuery table + microsoft_store_data = clean_json(query_export, date) + data.extend(microsoft_store_data) + else: + print("no data for today") + + upload_to_bigquery(data, project, dataset, table_name, date) + + +if __name__ == "__main__": + main() diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/schema.yaml 2024-06-17 20:00:18.000000000 +0000 @@ -0,0 +1,46 @@ +fields: + +- mode: NULLABLE + name: date + type: DATE + description: The first date in the date range for the acquisition data +- mode: NULLABLE + name: application_id + type: STRING + description: The Store ID of the app for which you are retrieving acquisition data +- mode: NULLABLE + name: application_name + type: STRING + description: The display name of the app +- mode: NULLABLE + name: custom_campaign_id + type: STRING + description: The ID string for a custom app promotion campaign that is associated with the app +- mode: NULLABLE + name: referrer_uri_domain + type: STRING + description: Specifies the domain where the app listing with the custom app promotion campaign ID was activated +- mode: NULLABLE + name: channel_type + type: STRING + description: The channel for the conversion +- mode: NULLABLE + name: store_client + type: STRING + description: The version of the Store where the conversion occurred. Currently, the only supported value is SFC +- mode: NULLABLE + name: device_type + type: STRING + description: Device Type +- mode: NULLABLE + name: market + type: STRING + description: The ISO 3166 country code of the market where the conversion occurred +- mode: NULLABLE + name: click_count + type: INT64 + description: The number of customer clicks on your app listing lin +- mode: NULLABLE + name: conversion_count + type: INT64 + description: The number of customer conversions diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/metadata.yaml 2024-06-17 20:02:20.000000000 +0000 @@ -0,0 +1,20 @@ +friendly_name: App Acquisitionss +description: | + This table represents acquisition data downloaded from the Microsoft Store for Firefox, FX Beta and FX Nightly The link for this data is here: https://manage.devcenter.microsoft.com/v1.0/my/analytics/installs? +owners: +- mhirose@mozilla.com +labels: + owner1: mhirose +bigquery: + time_partitioning: + type: day + field: DATE + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py 2024-06-17 20:00:18.000000000 +0000 @@ -0,0 +1,221 @@ +"""microsoft_store data - download deliverables, clean and upload to BigQuery.""" + +import csv +import json +import tempfile +from argparse import ArgumentParser + +import requests +from google.cloud import bigquery + +API_URI = "https://manage.devcenter.microsoft.com" + +"""MICROSOFT_STORE_APP_LIST is a list of dictionaries. +Keys are the app_name, and app_tenant_id in the form: +{"app_name":" dict: + """Read JSON file.""" + with open(filename, "r") as f: + data = json.loads(f.read()) + return data + + +def write_dict_to_csv(json_data, filename): + """Write a dictionary to a csv.""" + with open(filename, "w") as out_file: + dict_writer = csv.DictWriter(out_file, CSV_FIELDS) + dict_writer.writeheader() + dict_writer.writerows(json_data) + + +def microsoft_authorization(tenant_id, client_id, client_secret, resource_url): + """Microsoft Store Authoriazation. Returns the bearer token required for data download.""" + url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" + query_params = { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + "resource": resource_url, + } + headers = {"Content-Type": "application/x-www-form-urlencoded"} + json_file = post_response(url, headers, query_params) + response_data = json.loads(json_file.text) + bearer_token = response_data["access_token"] + return bearer_token + + +def download_microsoft_store_data(date, application_id, bearer_token): + """Download data from Microsoft - application_id, bearer_token are called here.""" + start_date = date # + end_date = date + token = bearer_token + app_id = application_id + groupBy = [ + "applicationName", + "date", + "deviceType", + "market", + "osVersion", + ] + # getting overview metrics for different kpis / Deliverables + url = f"https://manage.devcenter.microsoft.com/v1.0/my/analytics/installs?applicationId={app_id}" + url_params = f"aggregationLevel=day&startDate={start_date}&endDate={end_date}&skip=0&groupBy={','.join(groupBy)}&filter=date eq '{date}'&orderby=date" + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Bearer {token}", + } + print(url) + response = get_response(url, headers, url_params) + return response + + +def check_json(microsoft_store_response_text): + """Script will return an empty dictionary for apps on days when there is no data. Check for that here.""" + with tempfile.NamedTemporaryFile() as tmp_json: + with open(tmp_json.name, "w") as f_json: + f_json.write(microsoft_store_response_text) + try: + query_export = read_json(f_json.name) + except ( + ValueError + ): # ex. json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) + return None + return query_export + + +def clean_json(query_export, date): + """Turn the json file into a list to be input into a CSV for bq upload.""" + fields_list = [] + for val in query_export["Value"]: + field_dict = { + "date": val["date"], + "application_id": val["applicationId"], + "application_name": val["applicationName"], + "package_version": val["packageVersion"], + "device_type": val["deviceType"], + "market": val["market"], + "os_version": val["osVersion"], + "successful_install_count": val["successfulInstallCount"], + } + fields_list.append(field_dict) + return fields_list + + +def upload_to_bigquery(csv_data, project, dataset, table_name, date): + """Upload the data to bigquery.""" + print("writing json to csv") + partition = f"{date}".replace("-", "") + print(partition) + with tempfile.NamedTemporaryFile() as tmp_csv: + with open(tmp_csv.name, "w+b") as f_csv: + write_dict_to_csv(csv_data, f_csv.name) + client = bigquery.Client(project) + job_config = bigquery.LoadJobConfig( + create_disposition="CREATE_IF_NEEDED", + write_disposition="WRITE_TRUNCATE", + time_partitioning=bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", + ), + skip_leading_rows=1, + schema=[ + bigquery.SchemaField("date", "DATE"), + bigquery.SchemaField("application_id", "STRING"), + bigquery.SchemaField("application_name", "STRING"), + bigquery.SchemaField("package_version", "STRING"), + bigquery.SchemaField("device_type", "STRING"), + bigquery.SchemaField("market", "STRING"), + bigquery.SchemaField("os_version", "STRING"), + bigquery.SchemaField("successful_install_count", "INT64"), + ], + ) + destination = f"{project}.{dataset}.{table_name}${partition}" + + job = client.load_table_from_file(f_csv, destination, job_config=job_config) + print( + f"Writing microsoft_store data for all apps to {destination}. BigQuery job ID: {job.job_id}" + ) + job.result() + + +def main(): + """Input data, call functions, get stuff done.""" + parser = ArgumentParser(description=__doc__) + parser.add_argument("--date", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_app_list", required=True) + parser.add_argument("--microsoft_store_tenant_id", required=True) + parser.add_argument("--project", default="moz-fx-data-shared-prod") + parser.add_argument("--dataset", default="microsoft_derived") + + args = parser.parse_args() + + app_list = json.loads(args.microsoft_store_app_list) + + project = args.project + dataset = args.dataset + table_name = "microsoft_app_installs" + + date = args.date + client_id = args.microsoft_store_client_id + client_secret = args.microsoft_store_client_secret + app_list = args.microsoft_store_app_list + tenant_id = args.microsoft_store_tenant_id + resource_url = API_URI + + data = [] + + bearer_token = microsoft_authorization( + tenant_id, client_id, client_secret, resource_url + ) + + # Cycle through the apps to get the relevant data + for app in app_list: + print(f'This is data for {app["app_name"]} - {app["app_id"]} ') + # Ping the microsoft_store URL and get a response + json_file = download_microsoft_store_data(date, app["app_id"], bearer_token) + query_export = check_json(json_file.text) + if query_export is not None: + # This section writes the tmp json data into a temp CSV file which will then be put into a BigQuery table + microsoft_store_data = clean_json(query_export, date) + data.extend(microsoft_store_data) + else: + print("no data for today") + upload_to_bigquery(data, project, dataset, table_name, date) + + +if __name__ == "__main__": + main() diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/schema.yaml 2024-06-17 20:00:18.000000000 +0000 @@ -0,0 +1,34 @@ +fields: + +- mode: NULLABLE + name: date + type: DATE + description: The first date in the date range for the install data +- mode: NULLABLE + name: application_id + type: STRING + description: The Store ID of the app for which you are retrieving install data +- mode: NULLABLE + name: application_name + type: STRING + description: The display name of the app +- mode: NULLABLE + name: package_version + type: STRING + description: The version of the package that was installed +- mode: NULLABLE + name: device_type + type: STRING + description: Device Type +- mode: NULLABLE + name: market + type: STRING + description: The ISO 3166 country code of the market where the install occurred +- mode: NULLABLE + name: os_version + type: STRING + description: The OS version on which the install occurred +- mode: NULLABLE + name: successful_install_count + type: INT64 + description: The number of successful installs that occurred during the specified aggregation level diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/dataset_metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/dataset_metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/dataset_metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/dataset_metadata.yaml 2024-06-17 20:00:18.000000000 +0000 @@ -0,0 +1,10 @@ +friendly_name: Microsoft Derived +description: |- + Derived tables for Microsoft Store data +dataset_base_acl: derived +user_facing: false +labels: {} +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/newtab_visits_v1/query.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/newtab_visits_v1/query.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/newtab_visits_v1/query.sql 2024-06-17 20:00:16.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/newtab_visits_v1/query.sql 2024-06-17 20:00:18.000000000 +0000 @@ -20,6 +20,16 @@ WHERE DATE(submission_timestamp) = @submission_date AND category IN ('newtab', 'topsites', 'newtab.search', 'newtab.search.ad', 'pocket') + AND name IN ( + 'closed', + 'opened', + 'impression', + 'issued', + 'click', + 'save', + 'topic_click', + 'dismiss' + ) ), visit_metadata AS ( SELECT @@ -44,7 +54,6 @@ ANY_VALUE(metrics.string.newtab_homepage_category) AS newtab_homepage_category, ANY_VALUE(metrics.string.newtab_newtab_category) AS newtab_newtab_category, ANY_VALUE(metrics.boolean.newtab_search_enabled) AS newtab_search_enabled, - ANY_VALUE(metrics.boolean.newtab_weather_enabled) AS newtab_weather_widget_enabled, ANY_VALUE(metrics.quantity.topsites_rows) AS topsites_rows, ANY_VALUE(metrics.string_list.newtab_blocked_sponsors) AS newtab_blocked_sponsors, ANY_VALUE(ping_info.experiments) AS experiments, @@ -101,7 +110,6 @@ events_unnested WHERE event_category IN ('newtab.search', 'newtab.search.ad') - AND event_name IN ('click', 'impression', 'issued') GROUP BY newtab_visit_id, search_engine, @@ -172,7 +180,6 @@ ) WHERE event_category = 'topsites' - AND event_name IN ('dismiss', 'click', 'impression') GROUP BY newtab_visit_id, topsite_tile_position, @@ -211,8 +218,6 @@ SELECT mozfun.map.get_key(event_details, "newtab_visit_id") AS newtab_visit_id, SAFE_CAST(mozfun.map.get_key(event_details, "position") AS INT64) AS pocket_story_position, - mozfun.map.get_key(event_details, "tile_id") AS pocket_tile_id, - mozfun.map.get_key(event_details, "recommendation_id") AS pocket_recommendation_id, COUNTIF(event_name = 'save') AS pocket_saves, COUNTIF(event_name = 'click') AS pocket_clicks, COUNTIF(event_name = 'impression') AS pocket_impressions, @@ -244,12 +249,9 @@ events_unnested WHERE event_category = 'pocket' - AND event_name IN ('impression', 'click', 'save') GROUP BY newtab_visit_id, - pocket_story_position, - pocket_tile_id, - pocket_recommendation_id + pocket_story_position ), pocket_summary AS ( SELECT @@ -257,8 +259,6 @@ ARRAY_AGG( STRUCT( pocket_story_position, - pocket_tile_id, - pocket_recommendation_id, pocket_impressions, sponsored_pocket_impressions, organic_pocket_impressions, @@ -275,99 +275,6 @@ GROUP BY newtab_visit_id ), -wallpaper_events AS ( - SELECT - mozfun.map.get_key(event_details, "newtab_visit_id") AS newtab_visit_id, - mozfun.map.get_key(event_details, "selected_wallpaper") AS wallpaper_selected_wallpaper, - COUNTIF(event_name = 'wallpaper_click') AS wallpaper_clicks, - COUNTIF( - event_name = 'wallpaper_click' - AND mozfun.map.get_key(event_details, "had_previous_wallpaper") = "true" - ) AS wallpaper_clicks_had_previous_wallpaper, - COUNTIF( - event_name = 'wallpaper_click' - AND mozfun.map.get_key(event_details, "had_previous_wallpaper") = "false" - ) AS wallpaper_clicks_first_selected_wallpaper, - COUNTIF(event_name = 'wallpaper_category_click') AS wallpaper_category_clicks, - COUNTIF(event_name = 'wallpaper_highlight_dismissed') AS wallpaper_highlight_dismissals, - COUNTIF(event_name = 'wallpaper_highlight_cta_click') AS wallpaper_highlight_cta_clicks - FROM - events_unnested - WHERE - event_category = 'newtab' - AND event_name IN ( - 'wallpaper_click', - 'wallpaper_category_click', - 'wallpaper_highlight_cta_clicks', - 'wallpaper_highlight_dismissed' - ) - GROUP BY - newtab_visit_id, - wallpaper_selected_wallpaper -), -wallpaper_summary AS ( - SELECT - newtab_visit_id, - ARRAY_AGG( - STRUCT( - wallpaper_selected_wallpaper, - wallpaper_clicks, - wallpaper_clicks_had_previous_wallpaper, - wallpaper_clicks_first_selected_wallpaper, - wallpaper_category_clicks, - wallpaper_highlight_dismissals, - wallpaper_highlight_cta_clicks - ) - ) AS wallpaper_interactions - FROM - wallpaper_events - GROUP BY - newtab_visit_id -), -weather_events AS ( - SELECT - mozfun.map.get_key(event_details, "newtab_visit_id") AS newtab_visit_id, - COUNTIF(event_name = 'weather_impression') AS weather_widget_impressions, - COUNTIF(event_name = 'weather_open_provider_url') AS weather_widget_clicks, - COUNTIF(event_name = 'weather_load_error') AS weather_widget_load_errors, - COUNTIF( - event_name = 'weather_change_display' - AND mozfun.map.get_key(event_details, "weather_display_mode") = "detailed" - ) AS weather_widget_change_display_to_detailed, - COUNTIF( - event_name = 'weather_change_display' - AND mozfun.map.get_key(event_details, "weather_display_mode") = "simple" - ) AS weather_widget_change_display_to_simple - FROM - events_unnested - WHERE - event_category = 'newtab' - AND event_name IN ( - 'weather_impression', - 'weather_open_provider_url', - 'weather_load_error', - 'weather_change_display' - ) - GROUP BY - newtab_visit_id -), -weather_summary AS ( - SELECT - newtab_visit_id, - ARRAY_AGG( - STRUCT( - weather_widget_impressions, - weather_widget_clicks, - weather_widget_load_errors, - weather_widget_change_display_to_detailed, - weather_widget_change_display_to_simple - ) - ) AS weather_interactions - FROM - weather_events - GROUP BY - newtab_visit_id -), combined_newtab_activity AS ( SELECT * @@ -382,12 +289,6 @@ LEFT JOIN pocket_summary USING (newtab_visit_id) - LEFT JOIN - wallpaper_summary - USING (newtab_visit_id) - LEFT JOIN - weather_summary - USING (newtab_visit_id) WHERE -- Keep only rows with interactions, unless we receive a valid newtab.opened event. -- This is meant to drop only interactions that only have a newtab.closed event on the same partition @@ -396,8 +297,6 @@ OR search_interactions IS NOT NULL OR topsite_tile_interactions IS NOT NULL OR pocket_interactions IS NOT NULL - OR wallpaper_interactions IS NOT NULL - OR weather_interactions IS NOT NULL ), client_profile_info AS ( SELECT @@ -412,15 +311,7 @@ client_id ) SELECT - *, - CASE - WHEN ( - (newtab_open_source = "about:home" AND newtab_homepage_category = "enabled") - OR (newtab_open_source = "about:newtab" AND newtab_newtab_category = "enabled") - ) - THEN "default" - ELSE "non-default" - END AS newtab_default_ui, + * FROM combined_newtab_activity LEFT JOIN diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/newtab_visits_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/newtab_visits_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/newtab_visits_v1/schema.yaml 2024-06-17 20:00:16.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/telemetry_derived/newtab_visits_v1/schema.yaml 2024-06-17 20:00:18.000000000 +0000 @@ -215,12 +215,6 @@ - mode: NULLABLE name: organic_pocket_saves type: INTEGER - - name: pocket_tile_id - type: STRING - mode: NULLABLE - - name: pocket_recommendation_id - type: STRING - mode: NULLABLE mode: REPEATED name: pocket_interactions type: RECORD @@ -230,53 +224,3 @@ - mode: NULLABLE name: activity_segment type: STRING -- mode: NULLABLE - name: newtab_default_ui - type: STRING -- name: newtab_weather_widget_enabled - type: BOOLEAN - mode: NULLABLE -- name: wallpaper_interactions - type: RECORD - mode: REPEATED - fields: - - name: wallpaper_selected_wallpaper - type: STRING - mode: NULLABLE - - name: wallpaper_clicks - type: INTEGER - mode: NULLABLE - - name: wallpaper_clicks_had_previous_wallpaper - type: INTEGER - mode: NULLABLE - - name: wallpaper_clicks_first_selected_wallpaper - type: INTEGER - mode: NULLABLE - - name: wallpaper_category_clicks - type: INTEGER - mode: NULLABLE - - name: wallpaper_highlight_dismissals - type: INTEGER - mode: NULLABLE - - name: wallpaper_highlight_cta_clicks - type: INTEGER - mode: NULLABLE -- name: weather_interactions - type: RECORD - mode: REPEATED - fields: - - name: weather_widget_impressions - type: INTEGER - mode: NULLABLE - - name: weather_widget_clicks - type: INTEGER - mode: NULLABLE - - name: weather_widget_load_errors - type: INTEGER - mode: NULLABLE - - name: weather_widget_change_display_to_detailed - type: INTEGER - mode: NULLABLE - - name: weather_widget_change_display_to_simple - type: INTEGER - mode: NULLABLE ```

Link to full diff

dataops-ci-bot commented 2 weeks ago

Integration report for "Merge branch 'main' into deng_2631_import_microsoft_store_data"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod: microsoft_derived diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml 2024-06-17 21:52:11.000000000 +0000 @@ -0,0 +1,21 @@ +friendly_name: App Acquisitions +description: |- + This table represents acquisition data downloaded from the Microsoft Store for Firefox, FX Beta and FX Nightly + The link for this data is here: https://manage.devcenter.microsoft.com/v1.0/my/analytics/appacquisitions? +owners: +- mhirose@mozilla.com +labels: + owner1: mhirose +bigquery: + time_partitioning: + type: day + field: DATE + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py 2024-06-17 21:50:21.000000000 +0000 @@ -0,0 +1,245 @@ +"""microsoft_store data - download deliverables, clean and upload to BigQuery.""" + +import csv +import json +import tempfile +from argparse import ArgumentParser + +import requests +from google.cloud import bigquery + +API_URI = "https://manage.devcenter.microsoft.com" + +"""MICROSOFT_STORE_APP_LIST is a list of dictionaries. +Keys are the app_name, and app_tenant_id in the form: +{"app_name":" dict: + """Read JSON file.""" + with open(filename, "r") as f: + data = json.loads(f.read()) + return data + + +def write_dict_to_csv(json_data, filename): + """Write a dictionary to a csv.""" + with open(filename, "w") as out_file: + dict_writer = csv.DictWriter(out_file, CSV_FIELDS) + dict_writer.writeheader() + dict_writer.writerows(json_data) + + +def microsoft_authorization(tenant_id, client_id, client_secret, resource_url): + """Microsoft Store Authoriazation. Returns the bearer token required for data download.""" + url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" + query_params = { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + "resource": resource_url, + } + headers = {"Content-Type": "application/x-www-form-urlencoded"} + json_file = post_response(url, headers, query_params) + response_data = json.loads(json_file.text) + bearer_token = response_data["access_token"] + return bearer_token + + +def download_microsoft_store_data(date, application_id, bearer_token): + """Download data from Microsoft - application_id, bearer_token are called here.""" + start_date = date # + end_date = date + token = bearer_token + app_id = application_id + groupBy = [ + "date", + "applicationName", + "acquisitionType", + "ageGroup", + "storeClient", + "gender", + "market", + "osVersion", + "deviceType", + ] + # getting overview metrics for different kpis / Deliverables + url = f"https://manage.devcenter.microsoft.com/v1.0/my/analytics/appacquisitions?applicationId={app_id}" + url_params = f"aggregationLevel=day&startDate={start_date}&endDate={end_date}&skip=0&groupBy={','.join(groupBy)}" + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Bearer {token}", + } + print(url) + response = get_response(url, headers, url_params) + return response + + +def check_json(microsoft_store_response_text): + """Script will return an empty dictionary for apps on days when there is no data. Check for that here.""" + with tempfile.NamedTemporaryFile() as tmp_json: + with open(tmp_json.name, "w") as f_json: + f_json.write(microsoft_store_response_text) + try: + query_export = read_json(f_json.name) + except ( + ValueError + ): # ex. json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) + return None + return query_export + + +def clean_json(query_export, date): + """Turn the json file into a list to be input into a CSV for bq upload.""" + fields_list = [] + for val in query_export["Value"]: + field_dict = { + "date": val["date"], + "application_id": val["applicationId"], + "application_name": val["applicationName"], + "acquisition_type": val["acquisitionType"], + "store_client": val["storeClient"], + "gender": val["gender"], + "market": val["market"], + "os_version": val["osVersion"], + "device_type": val["deviceType"], + "acquisition_quantity": val["acquisitionQuantity"], + "purchase_price_usd_amount": val["purchasePriceUSDAmount"], + "purchase_price_local_amount": val["purchasePriceLocalAmount"], + "purchase_tax_usd_amount": val["purchaseTaxUSDAmount"], + "purchase_tax_local_amount": val["purchaseTaxLocalAmount"], + } + fields_list.append(field_dict) + return fields_list + + +def upload_to_bigquery(csv_data, project, dataset, table_name, date): + """Upload the data to bigquery.""" + date = date + print("writing json to csv") + partition = f"{date}".replace("-", "") + print(partition) + with tempfile.NamedTemporaryFile() as tmp_csv: + with open(tmp_csv.name, "w+b") as f_csv: + write_dict_to_csv(csv_data, f_csv.name) + client = bigquery.Client(project) + job_config = bigquery.LoadJobConfig( + create_disposition="CREATE_IF_NEEDED", + write_disposition="WRITE_TRUNCATE", + time_partitioning=bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", + ), + skip_leading_rows=1, + schema=[ + bigquery.SchemaField("date", "DATE"), + bigquery.SchemaField("application_id", "STRING"), + bigquery.SchemaField("application_name", "STRING"), + bigquery.SchemaField("acquisition_type", "STRING"), + bigquery.SchemaField("store_client", "STRING"), + bigquery.SchemaField("gender", "STRING"), + bigquery.SchemaField("market", "STRING"), + bigquery.SchemaField("os_version", "STRING"), + bigquery.SchemaField("device_type", "STRING"), + bigquery.SchemaField("acquisition_quantity", "INT64"), + bigquery.SchemaField("purchase_price_usd_amount", "NUMERIC"), + bigquery.SchemaField("purchase_price_local_amount", "NUMERIC"), + bigquery.SchemaField("purchase_tax_usd_amount", "NUMERIC"), + bigquery.SchemaField("purchase_tax_local_amount", "NUMERIC"), + ], + ) + destination = f"{project}.{dataset}.{table_name}${partition}" + + job = client.load_table_from_file(f_csv, destination, job_config=job_config) + print( + f"Writing microsoft_store data for all apps to {destination}. BigQuery job ID: {job.job_id}" + ) + job.result() + + +def main(): + """Input data, call functions, get stuff done.""" + parser = ArgumentParser(description=__doc__) + parser.add_argument("--date", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_client_secret", required=True) + parser.add_argument("--microsoft_store_app_list", required=True) + parser.add_argument("--microsoft_store_tenant_id", required=True) + parser.add_argument("--project", default="moz-fx-data-shared-prod") + parser.add_argument("--dataset", default="microsoft_derived") + + args = parser.parse_args() + + app_list = json.loads(args.microsoft_store_app_list) + + project = args.project + dataset = args.dataset + table_name = "microsoft_app_acquisitions" + + date = args.date + client_id = args.microsoft_store_client_id + client_secret = args.microsoft_store_client_secret + app_list = args.microsoft_store_app_list + tenant_id = args.microsoft_store_tenant_id + resource_url = API_URI + + data = [] + + bearer_token = microsoft_authorization( + tenant_id, client_id, client_secret, resource_url + ) + + # Cycle through the apps to get the relevant data + for app in app_list: + print(f'This is data for {app["app_name"]} - {app["app_id"]} ') + # Ping the microsoft_store URL and get a response + json_file = download_microsoft_store_data(date, app["app_id"], bearer_token) + query_export = check_json(json_file.text) + if query_export is not None: + # This section writes the tmp json data into a temp CSV file which will then be put into a BigQuery table + microsoft_store_data = clean_json(query_export, date) + data.extend(microsoft_store_data) + else: + print("no data for today") + + upload_to_bigquery(data, project, dataset, table_name, date) + + +if __name__ == "__main__": + main() diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml 2024-06-17 21:50:21.000000000 +0000 @@ -0,0 +1,70 @@ +fields: + +- mode: NULLABLE + name: date + type: DATE + description: The first date in the date range for the acquisition data + +- mode: NULLABLE + name: application_id + type: STRING + description: The Store ID of the app for which you are retrieving acquisition data + +- mode: NULLABLE + name: application_name + type: STRING + description: The display name of the app + +- mode: NULLABLE + name: acquisition_type + type: STRING + description: The type of acquisition + +- mode: NULLABLE + name: store_client + type: STRING + description: The version of the Store where the acquisition occurred + +- mode: NULLABLE + name: gender + type: STRING + description: The gender of the user who made the acquisition + +- mode: NULLABLE + name: market + type: STRING + description: The ISO 3166 country code of the market where the acquisition occurred + +- mode: NULLABLE + name: os_version + type: STRING + description: the OS version on which the acquisition occurred + +- mode: NULLABLE + name: device_type + type: STRING + description: he type of device on which the acquisition occurred + +- mode: NULLABLE + name: acquisition_quantity + type: INT64 + description: The number of acquisitions that occurred during the specified aggregation level +- mode: NULLABLE + name: purchase_price_usd_amount + type: NUMERIC + description: Purchase price USD amount + +- mode: NULLABLE + name: purchase_price_local_amount + type: NUMERIC + description: Purchase price local currency amount + +- mode: NULLABLE + name: purchase_tax_usd_amount + type: NUMERIC + description: Purchase tax USD amount + +- mode: NULLABLE + name: purchase_tax_local_amount + type: NUMERIC + description: Purchase tax local currency amount diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml 2024-06-17 21:52:11.000000000 +0000 @@ -0,0 +1,21 @@ +friendly_name: App Acquisitions +description: |- + This table represents acquisition data downloaded from the Microsoft Store for Firefox, FX Beta and FX Nightly + The link for this data is here: https://manage.devcenter.microsoft.com/v1.0/my/analytics/appchannelconversions? +owners: +- mhirose@mozilla.com +labels: + owner1: mhirose +bigquery: + time_partitioning: + type: day + field: DATE + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py 2024-06-17 21:50:21.000000000 +0000 @@ -0,0 +1,236 @@ +"""microsoft_store data - download deliverables, clean and upload to BigQuery.""" + +import csv +import json +import tempfile +from argparse import ArgumentParser + +import requests +from google.cloud import bigquery + +API_URI = "https://manage.devcenter.microsoft.com" + +"""MICROSOFT_STORE_APP_LIST is a list of dictionaries. +Keys are the app_name, and app_tenant_id in the form: +{"app_name":" dict: + """Read JSON file.""" + with open(filename, "r") as f: + data = json.loads(f.read()) + return data + + +def write_dict_to_csv(json_data, filename): + """Write a dictionary to a csv.""" + with open(filename, "w") as out_file: + dict_writer = csv.DictWriter(out_file, CSV_FIELDS) + dict_writer.writeheader() + dict_writer.writerows(json_data) + + +def microsoft_authorization(tenant_id, client_id, client_secret, resource_url): + """Microsoft Store Authoriazation. Returns the bearer token required for data download.""" + url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" + query_params = { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + "resource": resource_url, + } + headers = {"Content-Type": "application/x-www-form-urlencoded"} + json_file = post_response(url, headers, query_params) + response_data = json.loads(json_file.text) + bearer_token = response_data["access_token"] + return bearer_token + + +def download_microsoft_store_data(date, application_id, bearer_token): + """Download data from Microsoft - application_id, bearer_token are called here.""" + start_date = date # + end_date = date + token = bearer_token + app_id = application_id + groupBy = [ + "date", + "applicationId", + "applicationName", + "customCampaignId", + "channelType", + "storeClient", + "deviceType", + "market", + ] + # getting overview metrics for different kpis / Deliverables + url = f"https://manage.devcenter.microsoft.com/v1.0/my/analytics/appchannelconversions?applicationId={app_id}" + url_params = f"aggregationLevel=day&startDate={start_date}&endDate={end_date}&skip=0&groupBy={','.join(groupBy)}&orderby=date" + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Bearer {token}", + } + print(url) + response = get_response(url, headers, url_params) + return response + + +def check_json(microsoft_store_response_text): + """Script will return an empty dictionary for apps on days when there is no data. Check for that here.""" + with tempfile.NamedTemporaryFile() as tmp_json: + with open(tmp_json.name, "w") as f_json: + f_json.write(microsoft_store_response_text) + try: + query_export = read_json(f_json.name) + except ( + ValueError + ): # ex. json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) + return None + return query_export + + +def clean_json(query_export, date): + """Turn the json file into a list to be input into a CSV for bq upload.""" + fields_list = [] + for val in query_export["Value"]: + field_dict = { + "date": val["date"], + "application_id": val["applicationId"], + "application_name": val["applicationName"], + "custom_campaign_id": val["customCampaignId"], + "referrer_uri_domain": val["referrerUriDomain"], + "channel_type": val["channelType"], + "store_client": val["storeClient"], + "device_type": val["deviceType"], + "market": val["market"], + "click_count": val["clickCount"], + "conversion_count": val["conversionCount"], + } + fields_list.append(field_dict) + return fields_list + + +def upload_to_bigquery(csv_data, project, dataset, table_name, date): + """Upload the data to bigquery.""" + date = date + print("writing json to csv") + partition = f"{date}".replace("-", "") + print(partition) + with tempfile.NamedTemporaryFile() as tmp_csv: + with open(tmp_csv.name, "w+b") as f_csv: + write_dict_to_csv(csv_data, f_csv.name) + client = bigquery.Client(project) + job_config = bigquery.LoadJobConfig( + create_disposition="CREATE_IF_NEEDED", + write_disposition="WRITE_TRUNCATE", + time_partitioning=bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", + ), + skip_leading_rows=1, + schema=[ + bigquery.SchemaField("date", "DATE"), + bigquery.SchemaField("application_id", "STRING"), + bigquery.SchemaField("application_name", "STRING"), + bigquery.SchemaField("custom_campaign_id", "STRING"), + bigquery.SchemaField("referrer_uri_domain", "STRING"), + bigquery.SchemaField("channel_type", "STRING"), + bigquery.SchemaField("store_client", "STRING"), + bigquery.SchemaField("device_type", "STRING"), + bigquery.SchemaField("market", "STRING"), + bigquery.SchemaField("click_count", "STRING"), + bigquery.SchemaField("conversion_count", "STRING"), + ], + ) + destination = f"{project}.{dataset}.{table_name}${partition}" + destination = f"{project}.analysis.mhirose_{table_name}${partition}" + + job = client.load_table_from_file(f_csv, destination, job_config=job_config) + print( + f"Writing microsoft_store data for all apps to {destination}. BigQuery job ID: {job.job_id}" + ) + job.result() + + +def main(): + """Input data, call functions, get stuff done.""" + parser = ArgumentParser(description=__doc__) + parser.add_argument("--date", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_app_list", required=True) + parser.add_argument("--microsoft_store_tenant_id", required=True) + parser.add_argument("--project", default="moz-fx-data-shared-prod") + parser.add_argument("--dataset", default="microsoft_derived") + + args = parser.parse_args() + + app_list = json.loads(args.microsoft_store_app_list) + + project = args.project + dataset = args.dataset + table_name = "microsoft_app_conversions" + + date = args.date + client_id = args.microsoft_store_client_id + client_secret = args.microsoft_store_client_secret + app_list = args.microsoft_store_app_list + tenant_id = args.microsoft_store_tenant_id + resource_url = API_URI + + data = [] + + bearer_token = microsoft_authorization( + tenant_id, client_id, client_secret, resource_url + ) + + # Cycle through the apps to get the relevant data + for app in app_list: + print(f'This is data for {app["app_name"]} - {app["app_id"]} ') + # Ping the microsoft_store URL and get a response + json_file = download_microsoft_store_data(date, app["app_id"], bearer_token) + query_export = check_json(json_file.text) + if query_export is not None: + # This section writes the tmp json data into a temp CSV file which will then be put into a BigQuery table + microsoft_store_data = clean_json(query_export, date) + data.extend(microsoft_store_data) + else: + print("no data for today") + + upload_to_bigquery(data, project, dataset, table_name, date) + + +if __name__ == "__main__": + main() diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/schema.yaml 2024-06-17 21:50:21.000000000 +0000 @@ -0,0 +1,46 @@ +fields: + +- mode: NULLABLE + name: date + type: DATE + description: The first date in the date range for the acquisition data +- mode: NULLABLE + name: application_id + type: STRING + description: The Store ID of the app for which you are retrieving acquisition data +- mode: NULLABLE + name: application_name + type: STRING + description: The display name of the app +- mode: NULLABLE + name: custom_campaign_id + type: STRING + description: The ID string for a custom app promotion campaign that is associated with the app +- mode: NULLABLE + name: referrer_uri_domain + type: STRING + description: Specifies the domain where the app listing with the custom app promotion campaign ID was activated +- mode: NULLABLE + name: channel_type + type: STRING + description: The channel for the conversion +- mode: NULLABLE + name: store_client + type: STRING + description: The version of the Store where the conversion occurred. Currently, the only supported value is SFC +- mode: NULLABLE + name: device_type + type: STRING + description: Device Type +- mode: NULLABLE + name: market + type: STRING + description: The ISO 3166 country code of the market where the conversion occurred +- mode: NULLABLE + name: click_count + type: INT64 + description: The number of customer clicks on your app listing lin +- mode: NULLABLE + name: conversion_count + type: INT64 + description: The number of customer conversions diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/metadata.yaml 2024-06-17 21:52:11.000000000 +0000 @@ -0,0 +1,20 @@ +friendly_name: App Acquisitionss +description: | + This table represents acquisition data downloaded from the Microsoft Store for Firefox, FX Beta and FX Nightly The link for this data is here: https://manage.devcenter.microsoft.com/v1.0/my/analytics/installs? +owners: +- mhirose@mozilla.com +labels: + owner1: mhirose +bigquery: + time_partitioning: + type: day + field: DATE + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/query.py 2024-06-17 21:50:21.000000000 +0000 @@ -0,0 +1,221 @@ +"""microsoft_store data - download deliverables, clean and upload to BigQuery.""" + +import csv +import json +import tempfile +from argparse import ArgumentParser + +import requests +from google.cloud import bigquery + +API_URI = "https://manage.devcenter.microsoft.com" + +"""MICROSOFT_STORE_APP_LIST is a list of dictionaries. +Keys are the app_name, and app_tenant_id in the form: +{"app_name":" dict: + """Read JSON file.""" + with open(filename, "r") as f: + data = json.loads(f.read()) + return data + + +def write_dict_to_csv(json_data, filename): + """Write a dictionary to a csv.""" + with open(filename, "w") as out_file: + dict_writer = csv.DictWriter(out_file, CSV_FIELDS) + dict_writer.writeheader() + dict_writer.writerows(json_data) + + +def microsoft_authorization(tenant_id, client_id, client_secret, resource_url): + """Microsoft Store Authoriazation. Returns the bearer token required for data download.""" + url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" + query_params = { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + "resource": resource_url, + } + headers = {"Content-Type": "application/x-www-form-urlencoded"} + json_file = post_response(url, headers, query_params) + response_data = json.loads(json_file.text) + bearer_token = response_data["access_token"] + return bearer_token + + +def download_microsoft_store_data(date, application_id, bearer_token): + """Download data from Microsoft - application_id, bearer_token are called here.""" + start_date = date # + end_date = date + token = bearer_token + app_id = application_id + groupBy = [ + "applicationName", + "date", + "deviceType", + "market", + "osVersion", + ] + # getting overview metrics for different kpis / Deliverables + url = f"https://manage.devcenter.microsoft.com/v1.0/my/analytics/installs?applicationId={app_id}" + url_params = f"aggregationLevel=day&startDate={start_date}&endDate={end_date}&skip=0&groupBy={','.join(groupBy)}&filter=date eq '{date}'&orderby=date" + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Bearer {token}", + } + print(url) + response = get_response(url, headers, url_params) + return response + + +def check_json(microsoft_store_response_text): + """Script will return an empty dictionary for apps on days when there is no data. Check for that here.""" + with tempfile.NamedTemporaryFile() as tmp_json: + with open(tmp_json.name, "w") as f_json: + f_json.write(microsoft_store_response_text) + try: + query_export = read_json(f_json.name) + except ( + ValueError + ): # ex. json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) + return None + return query_export + + +def clean_json(query_export, date): + """Turn the json file into a list to be input into a CSV for bq upload.""" + fields_list = [] + for val in query_export["Value"]: + field_dict = { + "date": val["date"], + "application_id": val["applicationId"], + "application_name": val["applicationName"], + "package_version": val["packageVersion"], + "device_type": val["deviceType"], + "market": val["market"], + "os_version": val["osVersion"], + "successful_install_count": val["successfulInstallCount"], + } + fields_list.append(field_dict) + return fields_list + + +def upload_to_bigquery(csv_data, project, dataset, table_name, date): + """Upload the data to bigquery.""" + print("writing json to csv") + partition = f"{date}".replace("-", "") + print(partition) + with tempfile.NamedTemporaryFile() as tmp_csv: + with open(tmp_csv.name, "w+b") as f_csv: + write_dict_to_csv(csv_data, f_csv.name) + client = bigquery.Client(project) + job_config = bigquery.LoadJobConfig( + create_disposition="CREATE_IF_NEEDED", + write_disposition="WRITE_TRUNCATE", + time_partitioning=bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", + ), + skip_leading_rows=1, + schema=[ + bigquery.SchemaField("date", "DATE"), + bigquery.SchemaField("application_id", "STRING"), + bigquery.SchemaField("application_name", "STRING"), + bigquery.SchemaField("package_version", "STRING"), + bigquery.SchemaField("device_type", "STRING"), + bigquery.SchemaField("market", "STRING"), + bigquery.SchemaField("os_version", "STRING"), + bigquery.SchemaField("successful_install_count", "INT64"), + ], + ) + destination = f"{project}.{dataset}.{table_name}${partition}" + + job = client.load_table_from_file(f_csv, destination, job_config=job_config) + print( + f"Writing microsoft_store data for all apps to {destination}. BigQuery job ID: {job.job_id}" + ) + job.result() + + +def main(): + """Input data, call functions, get stuff done.""" + parser = ArgumentParser(description=__doc__) + parser.add_argument("--date", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_app_list", required=True) + parser.add_argument("--microsoft_store_tenant_id", required=True) + parser.add_argument("--project", default="moz-fx-data-shared-prod") + parser.add_argument("--dataset", default="microsoft_derived") + + args = parser.parse_args() + + app_list = json.loads(args.microsoft_store_app_list) + + project = args.project + dataset = args.dataset + table_name = "microsoft_app_installs" + + date = args.date + client_id = args.microsoft_store_client_id + client_secret = args.microsoft_store_client_secret + app_list = args.microsoft_store_app_list + tenant_id = args.microsoft_store_tenant_id + resource_url = API_URI + + data = [] + + bearer_token = microsoft_authorization( + tenant_id, client_id, client_secret, resource_url + ) + + # Cycle through the apps to get the relevant data + for app in app_list: + print(f'This is data for {app["app_name"]} - {app["app_id"]} ') + # Ping the microsoft_store URL and get a response + json_file = download_microsoft_store_data(date, app["app_id"], bearer_token) + query_export = check_json(json_file.text) + if query_export is not None: + # This section writes the tmp json data into a temp CSV file which will then be put into a BigQuery table + microsoft_store_data = clean_json(query_export, date) + data.extend(microsoft_store_data) + else: + print("no data for today") + upload_to_bigquery(data, project, dataset, table_name, date) + + +if __name__ == "__main__": + main() diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_installs_v1/schema.yaml 2024-06-17 21:50:21.000000000 +0000 @@ -0,0 +1,34 @@ +fields: + +- mode: NULLABLE + name: date + type: DATE + description: The first date in the date range for the install data +- mode: NULLABLE + name: application_id + type: STRING + description: The Store ID of the app for which you are retrieving install data +- mode: NULLABLE + name: application_name + type: STRING + description: The display name of the app +- mode: NULLABLE + name: package_version + type: STRING + description: The version of the package that was installed +- mode: NULLABLE + name: device_type + type: STRING + description: Device Type +- mode: NULLABLE + name: market + type: STRING + description: The ISO 3166 country code of the market where the install occurred +- mode: NULLABLE + name: os_version + type: STRING + description: The OS version on which the install occurred +- mode: NULLABLE + name: successful_install_count + type: INT64 + description: The number of successful installs that occurred during the specified aggregation level diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/dataset_metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/dataset_metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/dataset_metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/dataset_metadata.yaml 2024-06-17 21:50:21.000000000 +0000 @@ -0,0 +1,10 @@ +friendly_name: Microsoft Derived +description: |- + Derived tables for Microsoft Store data +dataset_base_acl: derived +user_facing: false +labels: {} +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential ```

Link to full diff

dataops-ci-bot commented 2 weeks ago

Integration report for "Merge branch 'main' into deng_2631_import_microsoft_store_data"

sql.diff

Click to expand! ```diff Only in /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod: microsoft_derived Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/monitoring/shredder_progress: schema.yaml Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/pocket/pocket_reach_mau: schema.yaml Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_terms/aggregated_search_terms_daily: schema.yaml Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_terms/sanitization_job_data_validation_metrics: schema.yaml Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_terms/sanitization_job_languages: schema.yaml Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/search_terms/search_terms_daily: schema.yaml Only in /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/telemetry/buildhub2: schema.yaml diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_app_store_territory_source_type_report/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_app_store_territory_source_type_report/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_app_store_territory_source_type_report/schema.yaml 2024-06-17 22:18:43.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_app_store_territory_source_type_report/schema.yaml 2024-06-17 22:10:03.000000000 +0000 @@ -1 +1,34 @@ -{} +fields: +- name: app_id + type: INTEGER + mode: NULLABLE +- name: date + type: TIMESTAMP + mode: NULLABLE +- name: source_type + type: STRING + mode: NULLABLE +- name: territory + type: STRING + mode: NULLABLE +- name: _fivetran_synced + type: TIMESTAMP + mode: NULLABLE +- name: impressions + type: INTEGER + mode: NULLABLE +- name: impressions_unique_device + type: INTEGER + mode: NULLABLE +- name: meets_threshold + type: BOOLEAN + mode: NULLABLE +- name: page_views + type: INTEGER + mode: NULLABLE +- name: page_views_unique_device + type: INTEGER + mode: NULLABLE +- name: date_pst + type: TIMESTAMP + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_downloads_territory_source_type_report/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_downloads_territory_source_type_report/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_downloads_territory_source_type_report/schema.yaml 2024-06-17 22:18:43.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_downloads_territory_source_type_report/schema.yaml 2024-06-17 22:10:03.000000000 +0000 @@ -1 +1,31 @@ -{} +fields: +- name: app_id + type: INTEGER + mode: NULLABLE +- name: date + type: TIMESTAMP + mode: NULLABLE +- name: source_type + type: STRING + mode: NULLABLE +- name: territory + type: STRING + mode: NULLABLE +- name: _fivetran_synced + type: TIMESTAMP + mode: NULLABLE +- name: first_time_downloads + type: INTEGER + mode: NULLABLE +- name: meets_threshold + type: BOOLEAN + mode: NULLABLE +- name: redownloads + type: INTEGER + mode: NULLABLE +- name: total_downloads + type: INTEGER + mode: NULLABLE +- name: date_pst + type: TIMESTAMP + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_usage_territory_source_type_report/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_usage_territory_source_type_report/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_usage_territory_source_type_report/schema.yaml 2024-06-17 22:18:43.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/app_store/firefox_usage_territory_source_type_report/schema.yaml 2024-06-17 22:10:03.000000000 +0000 @@ -1 +1,37 @@ -{} +fields: +- name: app_id + type: INTEGER + mode: NULLABLE +- name: date + type: TIMESTAMP + mode: NULLABLE +- name: source_type + type: STRING + mode: NULLABLE +- name: territory + type: STRING + mode: NULLABLE +- name: _fivetran_synced + type: TIMESTAMP + mode: NULLABLE +- name: active_devices + type: INTEGER + mode: NULLABLE +- name: active_devices_last_30_days + type: INTEGER + mode: NULLABLE +- name: deletions + type: INTEGER + mode: NULLABLE +- name: installations + type: INTEGER + mode: NULLABLE +- name: meets_threshold + type: BOOLEAN + mode: NULLABLE +- name: sessions + type: INTEGER + mode: NULLABLE +- name: date_pst + type: TIMESTAMP + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-06-17 22:18:43.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/bedrock_derived/event_monitoring_live_v1/materialized_view.sql 2024-06-17 22:04:24.000000000 +0000 @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.interaction_v1` + `moz-fx-data-shared-prod.bedrock_live.events_v1` UNION ALL SELECT submission_timestamp, @@ -70,7 +70,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.bedrock_live.events_v1` + `moz-fx-data-shared-prod.bedrock_live.interaction_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/adm_forecasting/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/adm_forecasting/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/adm_forecasting/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/adm_forecasting/schema.yaml 2024-06-17 22:10:07.000000000 +0000 @@ -1 +1,43 @@ -{} +fields: +- name: product + type: STRING + mode: NULLABLE +- name: submission_date + type: DATE + mode: NULLABLE +- name: country + type: STRING + mode: NULLABLE +- name: device + type: STRING + mode: NULLABLE +- name: eligible_share_country + type: FLOAT + mode: NULLABLE +- name: clients + type: INTEGER + mode: NULLABLE +- name: p_amazon + type: FLOAT + mode: NULLABLE +- name: p_other + type: FLOAT + mode: NULLABLE +- name: amazon_clients + type: FLOAT + mode: NULLABLE +- name: other_clients + type: FLOAT + mode: NULLABLE +- name: amazon_clicks + type: INTEGER + mode: NULLABLE +- name: other_clicks + type: INTEGER + mode: NULLABLE +- name: amazon_clicks_per_client + type: FLOAT + mode: NULLABLE +- name: other_clicks_per_client + type: FLOAT + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates/schema.yaml 2024-06-17 22:10:07.000000000 +0000 @@ -1 +1,49 @@ -{} +fields: +- name: submission_date + type: DATE + mode: NULLABLE +- name: source + type: STRING + mode: NULLABLE +- name: event_type + type: STRING + mode: NULLABLE +- name: form_factor + type: STRING + mode: NULLABLE +- name: country + type: STRING + mode: NULLABLE +- name: subdivision1 + type: STRING + mode: NULLABLE +- name: advertiser + type: STRING + mode: NULLABLE +- name: release_channel + type: STRING + mode: NULLABLE +- name: position + type: INTEGER + mode: NULLABLE +- name: provider + type: STRING + mode: NULLABLE +- name: match_type + type: STRING + mode: NULLABLE +- name: normalized_os + type: STRING + mode: NULLABLE +- name: suggest_data_sharing_enabled + type: BOOLEAN + mode: NULLABLE +- name: event_count + type: INTEGER + mode: NULLABLE +- name: user_count + type: INTEGER + mode: NULLABLE +- name: query_type + type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_spons_tiles/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_spons_tiles/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_spons_tiles/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_spons_tiles/schema.yaml 2024-06-17 22:10:06.000000000 +0000 @@ -1 +1,31 @@ -{} +fields: +- name: submission_date + type: DATE + mode: NULLABLE +- name: form_factor + type: STRING + mode: NULLABLE +- name: country + type: STRING + mode: NULLABLE +- name: advertiser + type: STRING + mode: NULLABLE +- name: normalized_os + type: STRING + mode: NULLABLE +- name: release_channel + type: STRING + mode: NULLABLE +- name: position + type: INTEGER + mode: NULLABLE +- name: provider + type: STRING + mode: NULLABLE +- name: impression_count + type: INTEGER + mode: NULLABLE +- name: click_count + type: INTEGER + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/event_aggregates_suggest/schema.yaml 2024-06-17 22:10:06.000000000 +0000 @@ -1 +1,40 @@ -{} +fields: +- name: submission_date + type: DATE + mode: NULLABLE +- name: form_factor + type: STRING + mode: NULLABLE +- name: country + type: STRING + mode: NULLABLE +- name: advertiser + type: STRING + mode: NULLABLE +- name: normalized_os + type: STRING + mode: NULLABLE +- name: release_channel + type: STRING + mode: NULLABLE +- name: position + type: INTEGER + mode: NULLABLE +- name: provider + type: STRING + mode: NULLABLE +- name: match_type + type: STRING + mode: NULLABLE +- name: suggest_data_sharing_enabled + type: BOOLEAN + mode: NULLABLE +- name: impression_count + type: INTEGER + mode: NULLABLE +- name: click_count + type: INTEGER + mode: NULLABLE +- name: query_type + type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/quicksuggest_click_live/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/quicksuggest_click_live/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/quicksuggest_click_live/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/quicksuggest_click_live/schema.yaml 2024-06-17 22:10:06.000000000 +0000 @@ -1 +1,174 @@ -{} +fields: +- name: additional_properties + type: STRING + mode: NULLABLE +- name: advertiser + type: STRING + mode: NULLABLE +- name: block_id + type: INTEGER + mode: NULLABLE +- name: context_id + type: STRING + mode: NULLABLE +- name: document_id + type: STRING + mode: NULLABLE +- name: experiments + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: RECORD + mode: NULLABLE + fields: + - name: branch + type: STRING + mode: NULLABLE +- name: locale + type: STRING + mode: NULLABLE +- name: metadata + type: RECORD + mode: NULLABLE + fields: + - name: geo + type: RECORD + mode: NULLABLE + fields: + - name: city + type: STRING + mode: NULLABLE + - name: country + type: STRING + mode: NULLABLE + - name: db_version + type: STRING + mode: NULLABLE + - name: subdivision1 + type: STRING + mode: NULLABLE + - name: subdivision2 + type: STRING + mode: NULLABLE + - name: header + type: RECORD + mode: NULLABLE + fields: + - name: date + type: STRING + mode: NULLABLE + - name: dnt + type: STRING + mode: NULLABLE + - name: x_debug_id + type: STRING + mode: NULLABLE + - name: x_pingsender_version + type: STRING + mode: NULLABLE + - name: x_source_tags + type: STRING + mode: NULLABLE + - name: x_telemetry_agent + type: STRING + mode: NULLABLE + - name: x_foxsec_ip_reputation + type: STRING + mode: NULLABLE + - name: x_lb_tags + type: STRING + mode: NULLABLE + - name: parsed_date + type: TIMESTAMP + mode: NULLABLE + - name: parsed_x_source_tags + type: STRING + mode: REPEATED + - name: parsed_x_lb_tags + type: RECORD + mode: NULLABLE + fields: + - name: tls_version + type: STRING + mode: NULLABLE + - name: tls_cipher_hex + type: STRING + mode: NULLABLE + - name: isp + type: RECORD + mode: NULLABLE + fields: + - name: db_version + type: STRING + mode: NULLABLE + - name: name + type: STRING + mode: NULLABLE + - name: organization + type: STRING + mode: NULLABLE + - name: user_agent + type: RECORD + mode: NULLABLE + fields: + - name: browser + type: STRING + mode: NULLABLE + - name: os + type: STRING + mode: NULLABLE + - name: version + type: STRING + mode: NULLABLE +- name: normalized_app_name + type: STRING + mode: NULLABLE +- name: normalized_channel + type: STRING + mode: NULLABLE +- name: normalized_country_code + type: STRING + mode: NULLABLE +- name: normalized_os + type: STRING + mode: NULLABLE +- name: normalized_os_version + type: STRING + mode: NULLABLE +- name: position + type: INTEGER + mode: NULLABLE +- name: release_channel + type: STRING + mode: NULLABLE +- name: reporting_url + type: STRING + mode: NULLABLE +- name: sample_id + type: INTEGER + mode: NULLABLE +- name: submission_timestamp + type: TIMESTAMP + mode: NULLABLE +- name: version + type: STRING + mode: NULLABLE +- name: scenario + type: STRING + mode: NULLABLE +- name: request_id + type: STRING + mode: NULLABLE +- name: match_type + type: STRING + mode: NULLABLE +- name: improve_suggest_experience_checked + type: BOOLEAN + mode: NULLABLE +- name: source + type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/quicksuggest_impression_live/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/quicksuggest_impression_live/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/quicksuggest_impression_live/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/quicksuggest_impression_live/schema.yaml 2024-06-17 22:10:06.000000000 +0000 @@ -1 +1,177 @@ -{} +fields: +- name: additional_properties + type: STRING + mode: NULLABLE +- name: advertiser + type: STRING + mode: NULLABLE +- name: block_id + type: INTEGER + mode: NULLABLE +- name: context_id + type: STRING + mode: NULLABLE +- name: document_id + type: STRING + mode: NULLABLE +- name: experiments + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: RECORD + mode: NULLABLE + fields: + - name: branch + type: STRING + mode: NULLABLE +- name: is_clicked + type: BOOLEAN + mode: NULLABLE +- name: locale + type: STRING + mode: NULLABLE +- name: metadata + type: RECORD + mode: NULLABLE + fields: + - name: geo + type: RECORD + mode: NULLABLE + fields: + - name: city + type: STRING + mode: NULLABLE + - name: country + type: STRING + mode: NULLABLE + - name: db_version + type: STRING + mode: NULLABLE + - name: subdivision1 + type: STRING + mode: NULLABLE + - name: subdivision2 + type: STRING + mode: NULLABLE + - name: header + type: RECORD + mode: NULLABLE + fields: + - name: date + type: STRING + mode: NULLABLE + - name: dnt + type: STRING + mode: NULLABLE + - name: x_debug_id + type: STRING + mode: NULLABLE + - name: x_pingsender_version + type: STRING + mode: NULLABLE + - name: x_source_tags + type: STRING + mode: NULLABLE + - name: x_telemetry_agent + type: STRING + mode: NULLABLE + - name: x_foxsec_ip_reputation + type: STRING + mode: NULLABLE + - name: x_lb_tags + type: STRING + mode: NULLABLE + - name: parsed_date + type: TIMESTAMP + mode: NULLABLE + - name: parsed_x_source_tags + type: STRING + mode: REPEATED + - name: parsed_x_lb_tags + type: RECORD + mode: NULLABLE + fields: + - name: tls_version + type: STRING + mode: NULLABLE + - name: tls_cipher_hex + type: STRING + mode: NULLABLE + - name: isp + type: RECORD + mode: NULLABLE + fields: + - name: db_version + type: STRING + mode: NULLABLE + - name: name + type: STRING + mode: NULLABLE + - name: organization + type: STRING + mode: NULLABLE + - name: user_agent + type: RECORD + mode: NULLABLE + fields: + - name: browser + type: STRING + mode: NULLABLE + - name: os + type: STRING + mode: NULLABLE + - name: version + type: STRING + mode: NULLABLE +- name: normalized_app_name + type: STRING + mode: NULLABLE +- name: normalized_channel + type: STRING + mode: NULLABLE +- name: normalized_country_code + type: STRING + mode: NULLABLE +- name: normalized_os + type: STRING + mode: NULLABLE +- name: normalized_os_version + type: STRING + mode: NULLABLE +- name: position + type: INTEGER + mode: NULLABLE +- name: release_channel + type: STRING + mode: NULLABLE +- name: reporting_url + type: STRING + mode: NULLABLE +- name: sample_id + type: INTEGER + mode: NULLABLE +- name: submission_timestamp + type: TIMESTAMP + mode: NULLABLE +- name: version + type: STRING + mode: NULLABLE +- name: scenario + type: STRING + mode: NULLABLE +- name: request_id + type: STRING + mode: NULLABLE +- name: match_type + type: STRING + mode: NULLABLE +- name: improve_suggest_experience_checked + type: BOOLEAN + mode: NULLABLE +- name: source + type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/request_payload_suggest/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/request_payload_suggest/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/request_payload_suggest/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/request_payload_suggest/schema.yaml 2024-06-17 22:10:06.000000000 +0000 @@ -1 +1,28 @@ -{} +fields: +- name: form_factor + type: STRING + mode: NULLABLE +- name: flagged_fraud + type: BOOLEAN + mode: NULLABLE +- name: submission_date + type: DATE + mode: NULLABLE +- name: country_code + type: STRING + mode: NULLABLE +- name: region_code + type: STRING + mode: NULLABLE +- name: os_family + type: STRING + mode: NULLABLE +- name: product_version + type: INTEGER + mode: NULLABLE +- name: impression_count + type: INTEGER + mode: NULLABLE +- name: click_count + type: INTEGER + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/request_payload_tiles/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/request_payload_tiles/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/request_payload_tiles/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/request_payload_tiles/schema.yaml 2024-06-17 22:10:06.000000000 +0000 @@ -1 +1,34 @@ -{} +fields: +- name: form_factor + type: STRING + mode: NULLABLE +- name: flagged_fraud + type: BOOLEAN + mode: NULLABLE +- name: submission_date + type: DATE + mode: NULLABLE +- name: begin_timestamp + type: TIMESTAMP + mode: NULLABLE +- name: end_timestamp + type: TIMESTAMP + mode: NULLABLE +- name: country_code + type: STRING + mode: NULLABLE +- name: region_code + type: STRING + mode: NULLABLE +- name: os_family + type: STRING + mode: NULLABLE +- name: product_version + type: INTEGER + mode: NULLABLE +- name: impression_count + type: INTEGER + mode: NULLABLE +- name: click_count + type: INTEGER + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/suggest_revenue_levers_daily/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/suggest_revenue_levers_daily/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/suggest_revenue_levers_daily/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/suggest_revenue_levers_daily/schema.yaml 2024-06-17 22:10:06.000000000 +0000 @@ -1 +1,34 @@ -{} +fields: +- name: country + type: STRING + mode: NULLABLE +- name: submission_date + type: DATE + mode: NULLABLE +- name: device + type: STRING + mode: NULLABLE +- name: eligible_share_country + type: FLOAT + mode: NULLABLE +- name: live_market_dau + type: INTEGER + mode: NULLABLE +- name: urlbar_search_dau + type: INTEGER + mode: NULLABLE +- name: suggest_exposed_clients + type: INTEGER + mode: NULLABLE +- name: urlbar_search + type: INTEGER + mode: NULLABLE +- name: total_impressions + type: INTEGER + mode: NULLABLE +- name: spons_impressions + type: INTEGER + mode: NULLABLE +- name: spons_clicks + type: INTEGER + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/topsites_click_live/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/topsites_click_live/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/topsites_click_live/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/topsites_click_live/schema.yaml 2024-06-17 22:10:06.000000000 +0000 @@ -1 +1,162 @@ -{} +fields: +- name: additional_properties + type: STRING + mode: NULLABLE +- name: advertiser + type: STRING + mode: NULLABLE +- name: context_id + type: STRING + mode: NULLABLE +- name: document_id + type: STRING + mode: NULLABLE +- name: experiments + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: RECORD + mode: NULLABLE + fields: + - name: branch + type: STRING + mode: NULLABLE +- name: locale + type: STRING + mode: NULLABLE +- name: metadata + type: RECORD + mode: NULLABLE + fields: + - name: geo + type: RECORD + mode: NULLABLE + fields: + - name: city + type: STRING + mode: NULLABLE + - name: country + type: STRING + mode: NULLABLE + - name: db_version + type: STRING + mode: NULLABLE + - name: subdivision1 + type: STRING + mode: NULLABLE + - name: subdivision2 + type: STRING + mode: NULLABLE + - name: header + type: RECORD + mode: NULLABLE + fields: + - name: date + type: STRING + mode: NULLABLE + - name: dnt + type: STRING + mode: NULLABLE + - name: x_debug_id + type: STRING + mode: NULLABLE + - name: x_pingsender_version + type: STRING + mode: NULLABLE + - name: x_source_tags + type: STRING + mode: NULLABLE + - name: x_telemetry_agent + type: STRING + mode: NULLABLE + - name: x_foxsec_ip_reputation + type: STRING + mode: NULLABLE + - name: x_lb_tags + type: STRING + mode: NULLABLE + - name: parsed_date + type: TIMESTAMP + mode: NULLABLE + - name: parsed_x_source_tags + type: STRING + mode: REPEATED + - name: parsed_x_lb_tags + type: RECORD + mode: NULLABLE + fields: + - name: tls_version + type: STRING + mode: NULLABLE + - name: tls_cipher_hex + type: STRING + mode: NULLABLE + - name: isp + type: RECORD + mode: NULLABLE + fields: + - name: db_version + type: STRING + mode: NULLABLE + - name: name + type: STRING + mode: NULLABLE + - name: organization + type: STRING + mode: NULLABLE + - name: user_agent + type: RECORD + mode: NULLABLE + fields: + - name: browser + type: STRING + mode: NULLABLE + - name: os + type: STRING + mode: NULLABLE + - name: version + type: STRING + mode: NULLABLE +- name: normalized_app_name + type: STRING + mode: NULLABLE +- name: normalized_channel + type: STRING + mode: NULLABLE +- name: normalized_country_code + type: STRING + mode: NULLABLE +- name: normalized_os + type: STRING + mode: NULLABLE +- name: normalized_os_version + type: STRING + mode: NULLABLE +- name: position + type: INTEGER + mode: NULLABLE +- name: release_channel + type: STRING + mode: NULLABLE +- name: reporting_url + type: STRING + mode: NULLABLE +- name: sample_id + type: INTEGER + mode: NULLABLE +- name: submission_timestamp + type: TIMESTAMP + mode: NULLABLE +- name: tile_id + type: INTEGER + mode: NULLABLE +- name: version + type: STRING + mode: NULLABLE +- name: source + type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/topsites_impression_live/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/topsites_impression_live/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/contextual_services/topsites_impression_live/schema.yaml 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/contextual_services/topsites_impression_live/schema.yaml 2024-06-17 22:10:06.000000000 +0000 @@ -1 +1,162 @@ -{} +fields: +- name: additional_properties + type: STRING + mode: NULLABLE +- name: advertiser + type: STRING + mode: NULLABLE +- name: context_id + type: STRING + mode: NULLABLE +- name: document_id + type: STRING + mode: NULLABLE +- name: experiments + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: RECORD + mode: NULLABLE + fields: + - name: branch + type: STRING + mode: NULLABLE +- name: locale + type: STRING + mode: NULLABLE +- name: metadata + type: RECORD + mode: NULLABLE + fields: + - name: geo + type: RECORD + mode: NULLABLE + fields: + - name: city + type: STRING + mode: NULLABLE + - name: country + type: STRING + mode: NULLABLE + - name: db_version + type: STRING + mode: NULLABLE + - name: subdivision1 + type: STRING + mode: NULLABLE + - name: subdivision2 + type: STRING + mode: NULLABLE + - name: header + type: RECORD + mode: NULLABLE + fields: + - name: date + type: STRING + mode: NULLABLE + - name: dnt + type: STRING + mode: NULLABLE + - name: x_debug_id + type: STRING + mode: NULLABLE + - name: x_pingsender_version + type: STRING + mode: NULLABLE + - name: x_source_tags + type: STRING + mode: NULLABLE + - name: x_telemetry_agent + type: STRING + mode: NULLABLE + - name: x_foxsec_ip_reputation + type: STRING + mode: NULLABLE + - name: x_lb_tags + type: STRING + mode: NULLABLE + - name: parsed_date + type: TIMESTAMP + mode: NULLABLE + - name: parsed_x_source_tags + type: STRING + mode: REPEATED + - name: parsed_x_lb_tags + type: RECORD + mode: NULLABLE + fields: + - name: tls_version + type: STRING + mode: NULLABLE + - name: tls_cipher_hex + type: STRING + mode: NULLABLE + - name: isp + type: RECORD + mode: NULLABLE + fields: + - name: db_version + type: STRING + mode: NULLABLE + - name: name + type: STRING + mode: NULLABLE + - name: organization + type: STRING + mode: NULLABLE + - name: user_agent + type: RECORD + mode: NULLABLE + fields: + - name: browser + type: STRING + mode: NULLABLE + - name: os + type: STRING + mode: NULLABLE + - name: version + type: STRING + mode: NULLABLE +- name: normalized_app_name + type: STRING + mode: NULLABLE +- name: normalized_channel + type: STRING + mode: NULLABLE +- name: normalized_country_code + type: STRING + mode: NULLABLE +- name: normalized_os + type: STRING + mode: NULLABLE +- name: normalized_os_version + type: STRING + mode: NULLABLE +- name: position + type: INTEGER + mode: NULLABLE +- name: release_channel + type: STRING + mode: NULLABLE +- name: reporting_url + type: STRING + mode: NULLABLE +- name: sample_id + type: INTEGER + mode: NULLABLE +- name: submission_timestamp + type: TIMESTAMP + mode: NULLABLE +- name: tile_id + type: INTEGER + mode: NULLABLE +- name: version + type: STRING + mode: NULLABLE +- name: source + type: STRING + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/client_deduplication/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/client_deduplication/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/client_deduplication/schema.yaml 2024-06-17 22:18:43.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/client_deduplication/schema.yaml 2024-06-17 22:11:30.000000000 +0000 @@ -1 +1,391 @@ -{} +fields: +- name: client_id + type: STRING + mode: NULLABLE +- name: hashed_ad_id + type: STRING + mode: NULLABLE +- name: valid_advertising_id + type: BOOLEAN + mode: NULLABLE +- name: submission_date + type: DATE + mode: NULLABLE +- name: normalized_app_id + type: STRING + mode: NULLABLE +- name: additional_properties + type: STRING + mode: NULLABLE +- name: client_info + type: RECORD + mode: NULLABLE + fields: + - name: android_sdk_version + type: STRING + mode: NULLABLE + - name: app_build + type: STRING + mode: NULLABLE + - name: app_channel + type: STRING + mode: NULLABLE + - name: app_display_version + type: STRING + mode: NULLABLE + - name: architecture + type: STRING + mode: NULLABLE + - name: build_date + type: STRING + mode: NULLABLE + - name: client_id + type: STRING + mode: NULLABLE + - name: device_manufacturer + type: STRING + mode: NULLABLE + - name: device_model + type: STRING + mode: NULLABLE + - name: first_run_date + type: STRING + mode: NULLABLE + - name: locale + type: STRING + mode: NULLABLE + - name: os + type: STRING + mode: NULLABLE + - name: os_version + type: STRING + mode: NULLABLE + - name: telemetry_sdk_build + type: STRING + mode: NULLABLE + - name: windows_build_number + type: INTEGER + mode: NULLABLE + - name: session_count + type: INTEGER + mode: NULLABLE + - name: session_id + type: STRING + mode: NULLABLE +- name: document_id + type: STRING + mode: NULLABLE +- name: events + type: RECORD + mode: REPEATED + fields: + - name: category + type: STRING + mode: NULLABLE + - name: extra + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: STRING + mode: NULLABLE + - name: name + type: STRING + mode: NULLABLE + - name: timestamp + type: INTEGER + mode: NULLABLE +- name: metadata + type: RECORD + mode: NULLABLE + fields: + - name: geo + type: RECORD + mode: NULLABLE + fields: + - name: city + type: STRING + mode: NULLABLE + - name: country + type: STRING + mode: NULLABLE + - name: db_version + type: STRING + mode: NULLABLE + - name: subdivision1 + type: STRING + mode: NULLABLE + - name: subdivision2 + type: STRING + mode: NULLABLE + - name: header + type: RECORD + mode: NULLABLE + fields: + - name: date + type: STRING + mode: NULLABLE + - name: dnt + type: STRING + mode: NULLABLE + - name: x_debug_id + type: STRING + mode: NULLABLE + - name: x_foxsec_ip_reputation + type: STRING + mode: NULLABLE + - name: x_lb_tags + type: STRING + mode: NULLABLE + - name: x_pingsender_version + type: STRING + mode: NULLABLE + - name: x_source_tags + type: STRING + mode: NULLABLE + - name: x_telemetry_agent + type: STRING + mode: NULLABLE + - name: parsed_date + type: TIMESTAMP + mode: NULLABLE + - name: parsed_x_source_tags + type: STRING + mode: REPEATED + - name: parsed_x_lb_tags + type: RECORD + mode: NULLABLE + fields: + - name: tls_version + type: STRING + mode: NULLABLE + - name: tls_cipher_hex + type: STRING + mode: NULLABLE + - name: isp + type: RECORD + mode: NULLABLE + fields: + - name: db_version + type: STRING + mode: NULLABLE + - name: name + type: STRING + mode: NULLABLE + - name: organization + type: STRING + mode: NULLABLE + - name: user_agent + type: RECORD + mode: NULLABLE + fields: + - name: browser + type: STRING + mode: NULLABLE + - name: os + type: STRING + mode: NULLABLE + - name: version + type: STRING + mode: NULLABLE +- name: metrics + type: RECORD + mode: NULLABLE + fields: + - name: boolean + type: RECORD + mode: NULLABLE + fields: + - name: client_deduplication_valid_advertising_id + type: BOOLEAN + mode: NULLABLE + - name: counter + type: RECORD + mode: NULLABLE + fields: + - name: events_normal_and_private_uri_count + type: INTEGER + mode: NULLABLE + - name: metrics_tabs_open_count + type: INTEGER + mode: NULLABLE + - name: labeled_counter + type: RECORD + mode: NULLABLE + fields: + - name: browser_search_ad_clicks + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: INTEGER + mode: NULLABLE + - name: browser_search_in_content + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: INTEGER + mode: NULLABLE + - name: browser_search_with_ads + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: INTEGER + mode: NULLABLE + - name: glean_error_invalid_label + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: INTEGER + mode: NULLABLE + - name: glean_error_invalid_overflow + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: INTEGER + mode: NULLABLE + - name: glean_error_invalid_state + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: INTEGER + mode: NULLABLE + - name: glean_error_invalid_value + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: INTEGER + mode: NULLABLE + - name: metrics_search_count + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: INTEGER + mode: NULLABLE + - name: string + type: RECORD + mode: NULLABLE + fields: + - name: activation_identifier + type: STRING + mode: NULLABLE + - name: client_deduplication_experiment_timeframe + type: STRING + mode: NULLABLE + - name: search_default_engine_code + type: STRING + mode: NULLABLE + - name: search_default_engine_name + type: STRING + mode: NULLABLE + - name: client_deduplication_hashed_gaid + type: STRING + mode: NULLABLE + - name: glean_client_annotation_experimentation_id + type: STRING + mode: NULLABLE +- name: normalized_app_name + type: STRING + mode: NULLABLE +- name: normalized_channel + type: STRING + mode: NULLABLE + description: Normalized channel name +- name: normalized_country_code + type: STRING + mode: NULLABLE +- name: normalized_os + type: STRING + mode: NULLABLE +- name: normalized_os_version + type: STRING + mode: NULLABLE +- name: ping_info + type: RECORD + mode: NULLABLE + fields: + - name: end_time + type: STRING + mode: NULLABLE + - name: experiments + type: RECORD + mode: REPEATED + fields: + - name: key + type: STRING + mode: NULLABLE + - name: value + type: RECORD + mode: NULLABLE + fields: + - name: branch + type: STRING + mode: NULLABLE + - name: extra + type: RECORD + mode: NULLABLE + fields: + - name: type + type: STRING + mode: NULLABLE + - name: enrollment_id + type: STRING + mode: NULLABLE + - name: ping_type + type: STRING + mode: NULLABLE + - name: reason + type: STRING + mode: NULLABLE + - name: seq + type: INTEGER + mode: NULLABLE + - name: start_time + type: STRING + mode: NULLABLE + - name: parsed_start_time + type: TIMESTAMP + mode: NULLABLE + - name: parsed_end_time + type: TIMESTAMP + mode: NULLABLE +- name: sample_id + type: INTEGER + mode: NULLABLE +- name: submission_timestamp + type: TIMESTAMP + mode: NULLABLE diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml 2024-06-17 22:18:43.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/fenix/use_counters/schema.yaml 2024-06-17 22:13:32.000000000 +0000 @@ -1,11 +1,11 @@ fields: - name: normalized_app_id - type: STRING mode: NULLABLE + type: STRING description: App ID of the channel data was received from - name: normalized_channel - type: STRING mode: NULLABLE + type: STRING description: Normalized channel name - name: additional_properties type: STRING diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql 2024-06-17 22:18:42.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_background_tasks_derived/event_monitoring_live_v1/materialized_view.sql 2024-06-17 22:04:24.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.background_tasks_v1` + `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.events_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.events_v1` + `moz-fx-data-shared-prod.firefox_desktop_background_tasks_live.background_tasks_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-06-17 22:18:43.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/firefox_desktop_derived/event_monitoring_live_v1/materialized_view.sql 2024-06-17 22:04:24.000000000 +0000 @@ -50,7 +50,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1` UNION ALL SELECT submission_timestamp, @@ -60,7 +60,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.urlbar_potential_exposure_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` UNION ALL SELECT submission_timestamp, @@ -70,7 +70,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.newtab_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.urlbar_potential_exposure_v1` UNION ALL SELECT submission_timestamp, @@ -80,7 +80,7 @@ client_info.app_display_version AS version, ping_info FROM - `moz-fx-data-shared-prod.firefox_desktop_live.events_v1` + `moz-fx-data-shared-prod.firefox_desktop_live.prototype_no_code_events_v1` ) CROSS JOIN UNNEST(events) AS event, diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/metadata.yaml 2024-06-17 22:20:07.000000000 +0000 @@ -0,0 +1,21 @@ +friendly_name: App Acquisitions +description: |- + This table represents acquisition data downloaded from the Microsoft Store for Firefox, FX Beta and FX Nightly + The link for this data is here: https://manage.devcenter.microsoft.com/v1.0/my/analytics/appacquisitions? +owners: +- mhirose@mozilla.com +labels: + owner1: mhirose +bigquery: + time_partitioning: + type: day + field: DATE + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/query.py 2024-06-17 22:02:36.000000000 +0000 @@ -0,0 +1,245 @@ +"""microsoft_store data - download deliverables, clean and upload to BigQuery.""" + +import csv +import json +import tempfile +from argparse import ArgumentParser + +import requests +from google.cloud import bigquery + +API_URI = "https://manage.devcenter.microsoft.com" + +"""MICROSOFT_STORE_APP_LIST is a list of dictionaries. +Keys are the app_name, and app_tenant_id in the form: +{"app_name":" dict: + """Read JSON file.""" + with open(filename, "r") as f: + data = json.loads(f.read()) + return data + + +def write_dict_to_csv(json_data, filename): + """Write a dictionary to a csv.""" + with open(filename, "w") as out_file: + dict_writer = csv.DictWriter(out_file, CSV_FIELDS) + dict_writer.writeheader() + dict_writer.writerows(json_data) + + +def microsoft_authorization(tenant_id, client_id, client_secret, resource_url): + """Microsoft Store Authoriazation. Returns the bearer token required for data download.""" + url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" + query_params = { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + "resource": resource_url, + } + headers = {"Content-Type": "application/x-www-form-urlencoded"} + json_file = post_response(url, headers, query_params) + response_data = json.loads(json_file.text) + bearer_token = response_data["access_token"] + return bearer_token + + +def download_microsoft_store_data(date, application_id, bearer_token): + """Download data from Microsoft - application_id, bearer_token are called here.""" + start_date = date # + end_date = date + token = bearer_token + app_id = application_id + groupBy = [ + "date", + "applicationName", + "acquisitionType", + "ageGroup", + "storeClient", + "gender", + "market", + "osVersion", + "deviceType", + ] + # getting overview metrics for different kpis / Deliverables + url = f"https://manage.devcenter.microsoft.com/v1.0/my/analytics/appacquisitions?applicationId={app_id}" + url_params = f"aggregationLevel=day&startDate={start_date}&endDate={end_date}&skip=0&groupBy={','.join(groupBy)}" + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Bearer {token}", + } + print(url) + response = get_response(url, headers, url_params) + return response + + +def check_json(microsoft_store_response_text): + """Script will return an empty dictionary for apps on days when there is no data. Check for that here.""" + with tempfile.NamedTemporaryFile() as tmp_json: + with open(tmp_json.name, "w") as f_json: + f_json.write(microsoft_store_response_text) + try: + query_export = read_json(f_json.name) + except ( + ValueError + ): # ex. json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) + return None + return query_export + + +def clean_json(query_export, date): + """Turn the json file into a list to be input into a CSV for bq upload.""" + fields_list = [] + for val in query_export["Value"]: + field_dict = { + "date": val["date"], + "application_id": val["applicationId"], + "application_name": val["applicationName"], + "acquisition_type": val["acquisitionType"], + "store_client": val["storeClient"], + "gender": val["gender"], + "market": val["market"], + "os_version": val["osVersion"], + "device_type": val["deviceType"], + "acquisition_quantity": val["acquisitionQuantity"], + "purchase_price_usd_amount": val["purchasePriceUSDAmount"], + "purchase_price_local_amount": val["purchasePriceLocalAmount"], + "purchase_tax_usd_amount": val["purchaseTaxUSDAmount"], + "purchase_tax_local_amount": val["purchaseTaxLocalAmount"], + } + fields_list.append(field_dict) + return fields_list + + +def upload_to_bigquery(csv_data, project, dataset, table_name, date): + """Upload the data to bigquery.""" + date = date + print("writing json to csv") + partition = f"{date}".replace("-", "") + print(partition) + with tempfile.NamedTemporaryFile() as tmp_csv: + with open(tmp_csv.name, "w+b") as f_csv: + write_dict_to_csv(csv_data, f_csv.name) + client = bigquery.Client(project) + job_config = bigquery.LoadJobConfig( + create_disposition="CREATE_IF_NEEDED", + write_disposition="WRITE_TRUNCATE", + time_partitioning=bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", + ), + skip_leading_rows=1, + schema=[ + bigquery.SchemaField("date", "DATE"), + bigquery.SchemaField("application_id", "STRING"), + bigquery.SchemaField("application_name", "STRING"), + bigquery.SchemaField("acquisition_type", "STRING"), + bigquery.SchemaField("store_client", "STRING"), + bigquery.SchemaField("gender", "STRING"), + bigquery.SchemaField("market", "STRING"), + bigquery.SchemaField("os_version", "STRING"), + bigquery.SchemaField("device_type", "STRING"), + bigquery.SchemaField("acquisition_quantity", "INT64"), + bigquery.SchemaField("purchase_price_usd_amount", "NUMERIC"), + bigquery.SchemaField("purchase_price_local_amount", "NUMERIC"), + bigquery.SchemaField("purchase_tax_usd_amount", "NUMERIC"), + bigquery.SchemaField("purchase_tax_local_amount", "NUMERIC"), + ], + ) + destination = f"{project}.{dataset}.{table_name}${partition}" + + job = client.load_table_from_file(f_csv, destination, job_config=job_config) + print( + f"Writing microsoft_store data for all apps to {destination}. BigQuery job ID: {job.job_id}" + ) + job.result() + + +def main(): + """Input data, call functions, get stuff done.""" + parser = ArgumentParser(description=__doc__) + parser.add_argument("--date", required=True) + parser.add_argument("--microsoft_store_client_id", required=True) + parser.add_argument("--microsoft_store_client_secret", required=True) + parser.add_argument("--microsoft_store_app_list", required=True) + parser.add_argument("--microsoft_store_tenant_id", required=True) + parser.add_argument("--project", default="moz-fx-data-shared-prod") + parser.add_argument("--dataset", default="microsoft_derived") + + args = parser.parse_args() + + app_list = json.loads(args.microsoft_store_app_list) + + project = args.project + dataset = args.dataset + table_name = "microsoft_app_acquisitions" + + date = args.date + client_id = args.microsoft_store_client_id + client_secret = args.microsoft_store_client_secret + app_list = args.microsoft_store_app_list + tenant_id = args.microsoft_store_tenant_id + resource_url = API_URI + + data = [] + + bearer_token = microsoft_authorization( + tenant_id, client_id, client_secret, resource_url + ) + + # Cycle through the apps to get the relevant data + for app in app_list: + print(f'This is data for {app["app_name"]} - {app["app_id"]} ') + # Ping the microsoft_store URL and get a response + json_file = download_microsoft_store_data(date, app["app_id"], bearer_token) + query_export = check_json(json_file.text) + if query_export is not None: + # This section writes the tmp json data into a temp CSV file which will then be put into a BigQuery table + microsoft_store_data = clean_json(query_export, date) + data.extend(microsoft_store_data) + else: + print("no data for today") + + upload_to_bigquery(data, project, dataset, table_name, date) + + +if __name__ == "__main__": + main() diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_acquisitions_v1/schema.yaml 2024-06-17 22:02:36.000000000 +0000 @@ -0,0 +1,70 @@ +fields: + +- mode: NULLABLE + name: date + type: DATE + description: The first date in the date range for the acquisition data + +- mode: NULLABLE + name: application_id + type: STRING + description: The Store ID of the app for which you are retrieving acquisition data + +- mode: NULLABLE + name: application_name + type: STRING + description: The display name of the app + +- mode: NULLABLE + name: acquisition_type + type: STRING + description: The type of acquisition + +- mode: NULLABLE + name: store_client + type: STRING + description: The version of the Store where the acquisition occurred + +- mode: NULLABLE + name: gender + type: STRING + description: The gender of the user who made the acquisition + +- mode: NULLABLE + name: market + type: STRING + description: The ISO 3166 country code of the market where the acquisition occurred + +- mode: NULLABLE + name: os_version + type: STRING + description: the OS version on which the acquisition occurred + +- mode: NULLABLE + name: device_type + type: STRING + description: he type of device on which the acquisition occurred + +- mode: NULLABLE + name: acquisition_quantity + type: INT64 + description: The number of acquisitions that occurred during the specified aggregation level +- mode: NULLABLE + name: purchase_price_usd_amount + type: NUMERIC + description: Purchase price USD amount + +- mode: NULLABLE + name: purchase_price_local_amount + type: NUMERIC + description: Purchase price local currency amount + +- mode: NULLABLE + name: purchase_tax_usd_amount + type: NUMERIC + description: Purchase tax USD amount + +- mode: NULLABLE + name: purchase_tax_local_amount + type: NUMERIC + description: Purchase tax local currency amount diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/metadata.yaml 2024-06-17 22:20:07.000000000 +0000 @@ -0,0 +1,21 @@ +friendly_name: App Acquisitions +description: |- + This table represents acquisition data downloaded from the Microsoft Store for Firefox, FX Beta and FX Nightly + The link for this data is here: https://manage.devcenter.microsoft.com/v1.0/my/analytics/appchannelconversions? +owners: +- mhirose@mozilla.com +labels: + owner1: mhirose +bigquery: + time_partitioning: + type: day + field: DATE + require_partition_filter: false + expiration_days: null + range_partitioning: null + clustering: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} diff -bur --no-dereference --new-file /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py --- /tmp/workspace/main-generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py 1970-01-01 00:00:00.000000000 +0000 +++ /tmp/workspace/generated-sql/sql/moz-fx-data-shared-prod/microsoft_derived/app_conversions_v1/query.py 2024-06-17 22:02:36.000000000 +0000 @@ -0,0 +1,236 @@ +"""microsoft_store data - download deliverables, clean and upload to BigQuery.""" + +import csv +import json +import tempfile +from argparse import ArgumentParser + +import requests +from google.cloud import bigquery + +API_URI = "https://manage.devcenter.microsoft.com" + +"""MICROSOFT_STORE_APP_LIST is a list of dictionaries. +Keys are the app_name, and app_tenant_id in the form: +{"app_name":" dict: + """Read JSON file.""" + with open(filename, "r") as f: + data = json.loads(f.read()) + return data + + +def write_dict_to_csv(json_data, filename): + """Write a dictionary to a csv.""" + with open(filename, "w") as out_file: + dict_writer = csv.DictWriter(out_file, CSV_FIELDS) + dict_writer.writeheader() + dict_writer.writerows(json_data) + + +def microsoft_authorization(tenant_id, client_id, client_secret, resource_url): + """Microsoft Store Authoriazation. Returns the bearer token required for data download.""" + url = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" + query_params = { + "grant_type": "client_credentials", + "client_id": client_id, + "client_secret": client_secret, + "resource": resource_url, + } + headers = {"Content-Type": "application/x-www-form-urlencoded"} + json_file = post_response(url, headers, query_params) + response_data = json.loads(json_file.text) + bearer_token = response_data["access_token"] + return bearer_token + + +def download_microsoft_store_data(date, application_id, bearer_token): + """Download data from Microsoft - application_id, bearer_token are called here.""" + start_date = date # + end_date = date + token = bearer_token + app_id = application_id + groupBy = [ + "date", + "applicationId", + "applicationName", + "customCampaignId", + "channelType", + "storeClient", + "deviceType", + "market", + ] + # getting overview metrics for different kpis / Deliverables + url = f"https://manage.devcenter.microsoft.com/v1.0/my/analytics/appchannelconversions?applicationId={app_id}" + url_params = f"aggregationLevel=day&startDate={start_date}&endDate={end_date}&skip=0&groupBy={','.join(groupBy)}&orderby=date" + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Authorization": f"Bearer {token}", + } + print(url) + response = get_response(url, headers, url_params) + return response + + +def check_json(microsoft_store_response_text): + """Script will return an empty dictionary for apps on days when there is no data. Check for that here.""" + with tempfile.NamedTemporaryFile() as tmp_json: + with open(tmp_json.name, "w") as f_json: + f_json.write(microsoft_store_response_text) + try: + query_export = read_json(f_json.name) + except ( + ValueError + ): # ex. json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0) + return None + return query_export + + +def clean_json(query_export, date): + """Turn the json file into a list to be input into a CSV for bq upload.""" + fields_list = [] + for val in query_export["Value"]: + field_dict = { + "date": val["date"], + "application_id": val["applicationId"], + "application_name": val["applicationName"], + "custom_campaign_id": val["customCampaignId"], + "referrer_uri_domain": val["referrerUriDomain"], + "channel_type": val["channelType"], + "store_client": val["storeClient"], + "device_type": val["deviceType"], + "market": val["market"], + "click_count": val["clickCount"], + "conversion_count": val["conversionCount"], + } + fields_list.append(field_dict) + return fields_list + + +def upload_to_bigquery(csv_data, project, dataset, table_name, date): + """Upload the data to bigquery.""" + date = date + print("writing json to csv") + partition = f"{date}".replace("-", "") + print(partition) + with tempfile.NamedTemporaryFile() as tmp_csv: + with open(tmp_csv.name, "w+b") as f_csv: + write_dict_to_csv(csv_data, f_csv.name) + client = bigquery.Client(project) + job_config = bigquery.LoadJobConfig( + create_disposition="CREATE_IF_NEEDED", + write_disp ```

⚠️ Only part of the diff is displayed.

Link to full diff