Closed danielkhan closed 1 year ago
The first explored possibility is to naively run the query on the database and check which tags are being used.
A simple script to generate that query could be:
SUPPORTED_TAGS = [
"release",
"transaction",
"transaction.status",
"transaction.op",
"http.method"
]
def build_tags_condition():
condition = "("
for index, tag in enumerate(SUPPORTED_TAGS):
condition += f"SQ.query LIKE '%{tag}:%'"
if index < len(SUPPORTED_TAGS) - 1:
condition += " OR "
return condition + ")"
def build_base_query(project_id):
return f'''
SELECT *
FROM sentry_snubaquery AS SQ
INNER JOIN sentry_querysubscription AS QS
ON QS.snuba_query_id = SQ.id
WHERE SQ.dataset = 'transactions' AND QS.project_id = {project_id} AND (NOT {build_tags_condition()}) AND SQ.query <> ''
ORDER BY QS.date_added DESC
'''
if __name__ == '__main__':
project_id = input("Choose a project id: ")
print("The query to run in Redash is: ")
print(build_base_query(project_id))
This query doesn't find entries that have a mix of supported and unsupported tags, which is a problem that should be solved at a different level.
Iterated a second possible solution which can be injected at runtime in sentry shell
by running sentry exec [file].py
in the shell in which production sentry is executed. This script uses internal APIs for performing metrics compatibility check.
from collections import defaultdict
import pytz
from sentry.models import Project, Organization
from sentry.snuba.metrics_enhanced_performance import query as performance_query
from datetime import datetime, timedelta
from sentry.snuba.models import QuerySubscription
# CONFIGURE YOUR PARAMETERS HERE
# Organization to check.
org_id = 1
def write_results(filename, results):
f = open(f"{filename}.txt", "w")
f.write("The following project_id: [alert_ids...] entries are not supported on AM2:")
f.write("\n")
for project_id, alert_ids in results.items():
f.write(f"Project {project_id} alerts that are not supported: {str(alert_ids)}")
f.write("\n")
f.close()
def is_metrics_data(query):
selected_columns = ["count()"]
params = {
"organization_id": org_id,
"project_id": 1,
"start": datetime.now(tz=pytz.UTC) - timedelta(days=1),
"end": datetime.now(tz=pytz.UTC),
}
results = performance_query(
selected_columns=selected_columns,
query=query,
params=params,
referrer="api.organization-events"
)
return results.get("meta", {}).get("isMetricsData", None)
def get_all_alerts_of_project(project_id):
return QuerySubscription.objects.filter(
project_id=project_id,
snuba_query__dataset__in=["transactions", "discover"]
).select_related("snuba_query").values_list("id", "snuba_query__query")
def run():
organization = Organization.objects.get(id=org_id)
print(f"Fetching all projects of organization {org_id}...")
all_projects_ids = Project.objects.using_replica().filter(organization=organization).values_list("id", flat=True)
print(f"Found {len(all_projects_ids)} projects")
print("----")
results = defaultdict(list)
for project_id in all_projects_ids:
for alert_id, query in get_all_alerts_of_project(project_id):
print(f"Found alert {alert_id} with query {query} for project {project_id}")
supports_metrics = is_metrics_data(query)
if supports_metrics:
print(f"Alert {alert_id} supports metrics :)")
else:
print(f"Alert {alert_id} does not support metrics :(")
# We mark whether a metric is not supported.
results[project_id].append(alert_id)
print("----")
print("Writing results to file results.txt")
write_results("results", results)
# Running the script here.
run()
Hi @iambriccardo, thanks for the help! I'm exploring running the script provided in the second solution, I'm part of the Sales/Solutions engineering team, and AFAIK our team does not have access to the production database. Any advice on how we could make this script accessible to our team?
On a separate note, regressions could also be found in Dashboards specifically in widgets. Could we also flag widgets that contain unsupported tags?
@dachakra I have finalized a version that supports widgets this morning. This script can be executed in the sentry shell, which would require admin access to my knowledge.
This is the script containing the support for dashboards:
from collections import defaultdict
import pytz
from sentry.models import Project, Organization, DashboardWidgetQuery
from sentry.snuba.metrics_enhanced_performance import query as performance_query
from sentry.snuba.discover import query as discover_query
from datetime import datetime, timedelta
from sentry.snuba.models import QuerySubscription
# List of minimum SDK versions that support Performance at Scale.
# The list is defined here: https://docs.sentry.io/product/performance/performance-at-scale/getting-started/?original_referrer=https://docs.sentry.io/
SUPPORTED_SDK_VERSIONS = {
"sentry-python": "1.7.2",
"sentry.python.tornado": "1.7.2",
"sentry.python.starlette": "1.7.2",
"sentry.python.flask": "1.7.2",
"sentry.python.fastapi": "1.7.2",
"sentry.python.falcon": "1.7.2",
"sentry.python.django": "1.7.2",
"sentry.python.bottle": "1.7.2",
"sentry.python.aws_lambda": "1.7.2",
"sentry.python.aiohttp": "1.7.2",
"sentry.python": "1.7.2",
"sentry-browser": "7.6.0",
"sentry.javascript.angular": "7.6.0",
"sentry.javascript.browser": "7.6.0",
"sentry.javascript.ember": "7.6.0",
"sentry.javascript.gatsby": "7.6.0",
"sentry.javascript.nextjs": "7.6.0",
"sentry.javascript.react": "7.6.0",
"sentry.javascript.remix": "7.6.0",
"sentry.javascript.serverless": "7.6.0",
"sentry.javascript.svelte": "7.6.0",
"sentry.javascript.vue": "7.6.0",
"sentry-cocoa": "7.23.0",
"sentry-objc": "7.23.0",
"sentry-swift": "7.23.0",
"sentry.cocoa": "7.23.0",
"sentry.swift": "7.23.0",
"SentrySwift": "7.23.0",
"sentry-android": "6.5.0",
"sentry.java.android.timber": "6.5.0",
"sentry.java.android": "6.5.0",
"sentry.native.android": "6.5.0",
"sentry-react-native": "4.3.0",
"sentry.cocoa.react-native": "4.3.0",
"sentry.java.android.react-native": "4.3.0",
"sentry.javascript.react-native": "4.3.0",
"sentry.native.android.react-native": "4.3.0",
"dart": "6.11.0",
"dart-sentry-client": "6.11.0",
"sentry.dart": "6.11.0",
"sentry.dart.logging": "6.11.0",
"sentry.cocoa.flutter": "6.11.0",
"sentry.dart.flutter": "6.11.0",
"sentry.java.android.flutter": "6.11.0",
"sentry.native.android.flutter": "6.11.0",
"sentry.dart.browser": "6.11.0",
"sentry-php": "3.9.0",
"sentry.php": "3.9.0",
"sentry-laravel": "3.0.0",
"sentry.php.laravel": "3.0.0",
"sentry-symfony": "4.4.0",
"sentry.php.symfony": "4.4.0",
"Symphony.SentryClient": "4.4.0",
"sentry-ruby": "5.5.0",
"sentry.ruby": "5.5.0",
"sentry.ruby.delayed_job": "5.5.0",
"sentry.ruby.rails": "5.5.0",
"sentry.ruby.resque": "5.5.0",
"sentry.ruby.sidekiq": "5.5.0",
"sentry-java": "6.5.0",
"sentry.java": "6.5.0",
"sentry.java.jul": "6.5.0",
"sentry.java.log4j2": "6.5.0",
"sentry.java.logback": "6.5.0",
"sentry.java.spring": "6.5.0",
"sentry.java.spring-boot": "6.5.0",
"sentry.java.spring-boot.jakarta": "6.5.0",
"sentry.aspnetcore": "3.22.0",
"Sentry.AspNetCore": "3.22.0",
"sentry.dotnet": "3.22.0",
"sentry.dotnet.android": "3.22.0",
"sentry.dotnet.aspnet": "3.22.0",
"sentry.dotnet.aspnetcore": "3.22.0",
"sentry.dotnet.aspnetcore.grpc": "3.22.0",
"sentry.dotnet.atlasproper": "3.22.0",
"sentry.dotnet.cocoa": "3.22.0",
"sentry.dotnet.ef": "3.22.0",
"sentry.dotnet.extensions.logging": "3.22.0",
"sentry.dotnet.google-cloud-function": "3.22.0",
"sentry.dotnet.log4net": "3.22.0",
"sentry.dotnet.maui": "3.22.0",
"sentry.dotnet.nlog": "3.22.0",
"sentry.dotnet.serilog": "3.22.0",
"sentry.dotnet.xamarin": "3.22.0",
"sentry.dotnet.xamarin-forms": "3.22.0",
"Sentry.Extensions.Logging": "3.22.0",
"Sentry.NET": "3.22.0",
"Sentry.UWP": "3.22.0",
"SentryDotNet": "3.22.0",
"SentryDotNet.AspNetCore": "3.22.0",
"sentry.go": "0.16.0",
}
# CONFIGURE YOUR PARAMETERS HERE
org_id = 1 # Organization that you want to run the script on.
def get_widget_url(org_slug, dashboard_id, widget_id):
return f"https://{org_slug}.sentry.io/organizations/{org_slug}/dashboard/{dashboard_id}/widget/{widget_id}/"
def get_alert_url(org_slug, alert_id):
return f"https://{org_slug}.sentry.io/organizations/{org_slug}/alerts/rules/details/{alert_id}/"
def get_found_sdks_url(org_slug):
return (
f"https://{org_slug}.sentry.io/organizations/{org_slug}/discover/homepage/?field=sdk.version&field=sdk"
f".name&field=project&field"
f"=count%28%29"
)
def compare_versions(version1, version2):
# Split the version strings into individual numbers
nums1 = version1.split(".")
nums2 = version2.split(".")
# Pad the shorter version with zeros to ensure equal length
length = max(len(nums1), len(nums2))
nums1 = (["0"] * (length - len(nums1))) + nums1
nums2 = (["0"] * (length - len(nums2))) + nums2
# Compare the numbers from left to right
for num1, num2 in zip(nums1, nums2):
if int(num1) > int(num2):
return 1
elif int(num1) < int(num2):
return -1
# All numbers are equal
return 0
def format_results(
organization, unsupported_widgets, unsupported_alerts, outdated_sdks_per_project
):
output = ""
output += "Unsupported widgets on AM2"
output += "\n\n"
for dashboard_id, widget_ids in unsupported_widgets.items():
output += f"- Dashboard {dashboard_id}"
output += "\n"
for widget_id in widget_ids:
output += f"\t{get_widget_url(organization.slug, dashboard_id, widget_id)}"
output += "\n"
output += "\n"
if len(unsupported_widgets) == 0:
output += "- All widgets are supported"
output += "\n\n"
output += "Unsupported alerts on AM2"
output += "\n\n"
for project_id, alert_ids in unsupported_alerts.items():
output += f"- Project {project_id}"
output += "\n"
for alert_id in alert_ids:
output += f"\t{get_alert_url(organization.slug, alert_id)}"
output += "\n"
output += "\n"
if len(unsupported_alerts) == 0:
output += "- All alerts are supported"
output += "\n"
output += f"SDKs not supporting Performance at Scale"
output += "\n"
output += f"Source: {get_found_sdks_url(organization.slug)}"
output += "\n\n"
for project, found_sdks in outdated_sdks_per_project.items():
output += f"Project {project}"
output += "\n"
for sdk_name, sdk_versions in found_sdks.items():
output += f"\t{sdk_name}"
output += "\n"
for sdk_version in sdk_versions:
output += f"\t\t{sdk_version}"
output += "\n"
if len(outdated_sdks_per_project) == 0:
output += "- No outdated SDKs found"
output += "\n"
return output
def extract_sdks_from_data(data):
found_sdks_per_project = defaultdict(lambda: defaultdict(set))
for element in data:
project = element.get("project")
sdk_name = element.get("sdk.name")
sdk_version = element.get("sdk.version")
if sdk_name and sdk_version:
found_sdks_per_project[project][sdk_name].add(sdk_version)
return found_sdks_per_project
def get_outdated_sdks(found_sdks_per_project):
outdated_sdks_per_project = defaultdict(lambda: defaultdict(set))
for project, found_sdks in found_sdks_per_project.items():
for sdk_name, sdk_versions in found_sdks.items():
for sdk_version in sdk_versions:
min_sdk_version = SUPPORTED_SDK_VERSIONS.get(sdk_name)
# If we didn't find the SDK, we suppose it doesn't have dynamic sampling.
if min_sdk_version is None:
outdated_sdks_per_project[project][sdk_name].add(sdk_version)
continue
# We check if it is less, thus it is not supported.
comparison = compare_versions(sdk_version, min_sdk_version)
if comparison == -1:
outdated_sdks_per_project[project][sdk_name].add(
f"{sdk_version} found {min_sdk_version} required"
)
return outdated_sdks_per_project
def get_sdks_version_used(organization_id, project_id, project_objects):
# We use the count() operation in order to group by project, sdk.name and sdk.version.
selected_columns = ["count()", "project", "sdk.name", "sdk.version"]
params = {
"organization_id": organization_id,
"project_id": project_id,
"project_objects": project_objects,
"start": datetime.now(tz=pytz.UTC) - timedelta(days=1),
"end": datetime.now(tz=pytz.UTC),
}
results = discover_query(
selected_columns=selected_columns,
query="",
params=params,
referrer="api.organization-events",
)
found_sdks_per_project = extract_sdks_from_data(results.get("data"))
outdated_sdks_per_project = get_outdated_sdks(found_sdks_per_project)
return outdated_sdks_per_project
def is_metrics_data(organization_id, project_id, query):
# We use the count operation since it's the most generic.
selected_columns = ["count()"]
params = {
"organization_id": organization_id,
"project_id": project_id,
"start": datetime.now(tz=pytz.UTC) - timedelta(days=1),
"end": datetime.now(tz=pytz.UTC),
}
results = performance_query(
selected_columns=selected_columns,
query=query,
params=params,
referrer="api.organization-events",
)
return results.get("meta", {}).get("isMetricsData", None)
def get_all_widgets_of_organization(organization_id):
return DashboardWidgetQuery.objects.filter(
widget__dashboard__organization_id=organization_id,
).values_list("id", "widget__dashboard__id", "widget__dashboard__title", "conditions")
def get_all_alerts_of_project(project_id):
return (
QuerySubscription.objects.filter(
project_id=project_id, snuba_query__dataset__in=["transactions", "discover"]
)
.select_related("snuba_query")
.values_list("id", "snuba_query__query")
)
def run():
organization = Organization.objects.get(id=org_id)
print(f"Fetching all projects of organization {organization.id}...")
all_projects = list(Project.objects.using_replica().filter(organization=organization))
print(f"Found {len(all_projects)} projects")
unsupported_widgets = defaultdict(list)
print(f"Fetching all dashboard widgets of organization {organization.id}")
for widget_id, dashboard_id, dashboard_title, query in get_all_widgets_of_organization(
organization.id
):
supports_metrics = is_metrics_data(organization.id, None, query)
if not supports_metrics:
# # We mark whether a metric is not supported.
unsupported_widgets[dashboard_id].append(widget_id)
unsupported_alerts = defaultdict(list)
print(f"Fetching all alerts of organization {organization.id}")
for project in all_projects:
project_id = project.id
for alert_id, query in get_all_alerts_of_project(project_id):
supports_metrics = is_metrics_data(organization.id, project_id, query)
if not supports_metrics:
# We mark whether a metric is not supported.
unsupported_alerts[project_id].append(alert_id)
print(
f"Fetching outdated SDKs versions used in the last 24 hours for organization {organization.id}..."
)
outdated_sdks_per_project = get_sdks_version_used(organization.id, None, all_projects)
print("\n")
print("EXECUTION SUMMARY")
print("\n")
results = format_results(
organization, unsupported_widgets, unsupported_alerts, outdated_sdks_per_project
)
print(results)
# Running the script here.
run()
In order to run this script, you will need to get it executed by ops in production using the command sentry exec [script].py
.
List of orgs that can be used for testing (might not be 100% complete). We can also maybe pick a Sentry org and I can help to manually check it.
from the revenue/SE side -> we want to run this against all non-self-serve am1 customers that are paying for transactions. As we will have to work with those customers to migrate them accordingly.
The compatibility check is now available in _admin. Notion.
Problem Statement
Alerts and Dashboards that rely on custom tags might not work as intended if a user switches pricing plans from AM1 to AM2, as AM2 comes with server-side sampling. To find out if a regression is to be expected involves looking at all Dashboards and Alerts and their underlying queries in an account.
Solution Brainstorm
While we are working on a solution to other underlying problems, we want an easy, deterministic way to find out if a customer would be affected by this problem if they upgraded.
We want to know
cc @jan-auer
Product Area
Performance
Acceptance Criteria: