Open bdtoole opened 1 month ago
Here are the code search results. I'm now analyzing these search results to write the PR.
src/manifest/dbtProject.ts
Read ignore directories configuration and filter out models in those directories.async performDatapilotHealthcheck(args: AltimateConfigProps) { const manifestPath = this.getManifestPath(); if (!manifestPath) { throw new Error( `Unable to find manifest path for project ${this.getProjectName()}`, ); } const healthcheckArgs: HealthcheckArgs = { manifestPath }; if (args.configType === "Manual") { healthcheckArgs.configPath = args.configPath; } else { if (args.configType === "Saas") { healthcheckArgs.config = args.config; } if ( args.configType === "All" || args.config_schema.some((i) => i.files_required.includes("Catalog")) ) { const docsGenerateCommand = this.dbtCommandFactory.createDocsGenerateCommand(); docsGenerateCommand.focus = false; docsGenerateCommand.logToTerminal = false; docsGenerateCommand.showProgress = false; await this.dbtProjectIntegration.executeCommandImmediately( docsGenerateCommand, ); healthcheckArgs.catalogPath = this.getCatalogPath(); if (!healthcheckArgs.catalogPath) { throw new Error( `Unable to find catalog path for project ${this.getProjectName()}`, ); } } } this.terminal.debug( "performDatapilotHealthcheck", "Performing healthcheck", healthcheckArgs, ); const projectHealthcheck = await this.dbtProjectIntegration.performDatapilotHealthcheck( healthcheckArgs, ); // temp fix: ideally datapilot should return absolute path for (const key in projectHealthcheck.model_insights) { for (const item of projectHealthcheck.model_insights[key]) { item.path = path.join(this.projectRoot.fsPath, item.original_file_path); } } return projectHealthcheck; }
async performDatapilotHealthcheck(args: AltimateConfigProps) {
const manifestPath = this.getManifestPath();
if (!manifestPath) {
throw new Error(
`Unable to find manifest path for project ${this.getProjectName()}`,
);
}
const ignoreDirectories = workspace.getConfiguration('dbt').get<string[]>('healthcheck.ignoreDirectories', []);
const healthcheckArgs: HealthcheckArgs = { manifestPath, ignoreDirectories };
if (args.configType === "Manual") {
healthcheckArgs.configPath = args.configPath;
} else {
if (args.configType === "Saas") {
healthcheckArgs.config = args.config;
}
if (
args.configType === "All" ||
args.config_schema.some((i) => i.files_required.includes("Catalog"))
) {
const docsGenerateCommand =
this.dbtCommandFactory.createDocsGenerateCommand();
docsGenerateCommand.focus = false;
docsGenerateCommand.logToTerminal = false;
docsGenerateCommand.showProgress = false;
await this.dbtProjectIntegration.executeCommandImmediately(
docsGenerateCommand,
);
healthcheckArgs.catalogPath = this.getCatalogPath();
if (!healthcheckArgs.catalogPath) {
throw new Error(
`Unable to find catalog path for project ${this.getProjectName()}`,
);
}
}
}
this.terminal.debug(
"performDatapilotHealthcheck",
"Performing healthcheck",
healthcheckArgs,
);
let projectHealthcheck =
await this.dbtProjectIntegration.performDatapilotHealthcheck(
healthcheckArgs,
);
// Filter out issues in ignored directories
for (const key in projectHealthcheck.model_insights) {
projectHealthcheck.model_insights[key] = projectHealthcheck.model_insights[key].filter(
item => !ignoreDirectories.some(dir => item.original_file_path.startsWith(dir))
);
}
// temp fix: ideally datapilot should return absolute path
for (const key in projectHealthcheck.model_insights) {
for (const item of projectHealthcheck.model_insights[key]) {
item.path = path.join(this.projectRoot.fsPath, item.original_file_path);
}
}
return projectHealthcheck;
}
dbt_healthcheck.py
Add ignore_directories parameter to DBTInsightGenerator and skip models in those directories.def project_healthcheck( manifest_path, catalog_path=None, config_path=None, config=None ): try: import logging import json from datapilot.config.config import load_config from datapilot.core.platforms.dbt.utils import load_catalog from datapilot.core.platforms.dbt.utils import load_manifest from datapilot.core.platforms.dbt.constants import MODEL from datapilot.core.platforms.dbt.executor import DBTInsightGenerator logging.basicConfig(level=logging.INFO) manifest = load_manifest(manifest_path) catalog = load_catalog(catalog_path) if catalog_path else None if not config and config_path: config = load_config(config_path) insight_generator = DBTInsightGenerator( manifest=manifest, catalog=catalog, config=config, ) reports = insight_generator.run() # package_insights = reports[PROJECT] model_insights = { k: [json.loads(item.json()) for item in v] for k, v in reports[MODEL].items() } return {"model_insights": model_insights} except Exception as e: raise Exception(str(e))
def project_healthcheck(
manifest_path, catalog_path=None, config_path=None, config=None, ignore_directories=None
):
try:
import logging
import json
import os
from datapilot.config.config import load_config
from datapilot.core.platforms.dbt.utils import load_catalog
from datapilot.core.platforms.dbt.utils import load_manifest
from datapilot.core.platforms.dbt.constants import MODEL
from datapilot.core.platforms.dbt.executor import DBTInsightGenerator
logging.basicConfig(level=logging.INFO)
manifest = load_manifest(manifest_path)
catalog = load_catalog(catalog_path) if catalog_path else None
if not config and config_path:
config = load_config(config_path)
insight_generator = DBTInsightGenerator(
manifest=manifest,
catalog=catalog,
config=config,
ignore_directories=ignore_directories
)
reports = insight_generator.run()
# package_insights = reports[PROJECT]
model_insights = {
k: [json.loads(item.json()) for item in v]
for k, v in reports[MODEL].items()
if not any(k.startswith(dir) for dir in ignore_directories)
}
return {"model_insights": model_insights}
except Exception as e:
raise Exception(str(e))
Your changes have been successfully made to the branch sweep/specify_ignore_directories_for_the_healt
. I have validated these changes using a syntax checker and a linter.
[!TIP] To recreate the pull request, edit the issue title or description.
This is an automated message generated by Sweep AI.
Describe the feature
Right now, the health check looks at the entire directory structure for the project as a whole, but it would be nice to be able to specify one or more directories to be ignored by the health check.
Describe alternatives you've considered
No response
Who will benefit?
We have a scenario where we have a couple of directories in our project that contain old models that will be deprecated. We know these models have problems with them and won't pass the health check, so we'd like to be able to define those directories as skippable so that way we don't get alerted about the issues there every time.
Are you willing to submit PR?