AltimateAI / vscode-dbt-power-user

This extension makes vscode seamlessly work with dbt™: Auto-complete, preview, column lineage, AI docs generation, health checks, cost estimation etc
https://www.altimate.ai
MIT License
427 stars 83 forks source link

Specify ignore directories for the health check #1112

Open bdtoole opened 1 month ago

bdtoole commented 1 month ago

Describe the feature

Right now, the health check looks at the entire directory structure for the project as a whole, but it would be nice to be able to specify one or more directories to be ignored by the health check.

Describe alternatives you've considered

No response

Who will benefit?

We have a scenario where we have a couple of directories in our project that contain old models that will be deprecated. We know these models have problems with them and won't pass the health check, so we'd like to be able to define those directories as skippable so that way we don't get alerted about the issues there every time.

Are you willing to submit PR?

sweep-ai[bot] commented 1 month ago

🚀 Here's the PR! #1162

💎 Sweep Pro: You have unlimited Sweep issues

Actions

Relevant files (click to expand). Mentioned files will always appear here. https://github.com/AltimateAI/vscode-dbt-power-user/blob/76cd20da0218230f0d18d18ac79986087558215e/dbt_healthcheck.py#L1-L32 https://github.com/AltimateAI/vscode-dbt-power-user/blob/76cd20da0218230f0d18d18ac79986087558215e/webview_panels/src/modules/healthCheck/ProjectHealthChecker.tsx#L1-L400 https://github.com/AltimateAI/vscode-dbt-power-user/blob/76cd20da0218230f0d18d18ac79986087558215e/src/webview_provider/insightsPanel.ts#L1-L687 https://github.com/AltimateAI/vscode-dbt-power-user/blob/76cd20da0218230f0d18d18ac79986087558215e/src/manifest/dbtProject.ts#L1-L1126 https://github.com/AltimateAI/vscode-dbt-power-user/blob/76cd20da0218230f0d18d18ac79986087558215e/src/manifest/dbtProjectContainer.ts#L1-L452 https://github.com/AltimateAI/vscode-dbt-power-user/blob/76cd20da0218230f0d18d18ac79986087558215e/src/manifest/dbtWorkspaceFolder.ts#L1-L266

Step 2: ⌨️ Coding

src/manifest/dbtProject.ts

Read ignore directories configuration and filter out models in those directories. async performDatapilotHealthcheck(args: AltimateConfigProps) { const manifestPath = this.getManifestPath(); if (!manifestPath) { throw new Error( `Unable to find manifest path for project ${this.getProjectName()}`, ); } const healthcheckArgs: HealthcheckArgs = { manifestPath }; if (args.configType === "Manual") { healthcheckArgs.configPath = args.configPath; } else { if (args.configType === "Saas") { healthcheckArgs.config = args.config; } if ( args.configType === "All" || args.config_schema.some((i) => i.files_required.includes("Catalog")) ) { const docsGenerateCommand = this.dbtCommandFactory.createDocsGenerateCommand(); docsGenerateCommand.focus = false; docsGenerateCommand.logToTerminal = false; docsGenerateCommand.showProgress = false; await this.dbtProjectIntegration.executeCommandImmediately( docsGenerateCommand, ); healthcheckArgs.catalogPath = this.getCatalogPath(); if (!healthcheckArgs.catalogPath) { throw new Error( `Unable to find catalog path for project ${this.getProjectName()}`, ); } } } this.terminal.debug( "performDatapilotHealthcheck", "Performing healthcheck", healthcheckArgs, ); const projectHealthcheck = await this.dbtProjectIntegration.performDatapilotHealthcheck( healthcheckArgs, ); // temp fix: ideally datapilot should return absolute path for (const key in projectHealthcheck.model_insights) { for (const item of projectHealthcheck.model_insights[key]) { item.path = path.join(this.projectRoot.fsPath, item.original_file_path); } } return projectHealthcheck; }
  async performDatapilotHealthcheck(args: AltimateConfigProps) {
    const manifestPath = this.getManifestPath();
    if (!manifestPath) {
      throw new Error(
        `Unable to find manifest path for project ${this.getProjectName()}`,
      );
    }

    const ignoreDirectories = workspace.getConfiguration('dbt').get<string[]>('healthcheck.ignoreDirectories', []);

    const healthcheckArgs: HealthcheckArgs = { manifestPath, ignoreDirectories };

    if (args.configType === "Manual") {
      healthcheckArgs.configPath = args.configPath;
    } else {
      if (args.configType === "Saas") {
        healthcheckArgs.config = args.config;
      }
      if (
        args.configType === "All" ||
        args.config_schema.some((i) => i.files_required.includes("Catalog"))
      ) {
        const docsGenerateCommand =
          this.dbtCommandFactory.createDocsGenerateCommand();
        docsGenerateCommand.focus = false;
        docsGenerateCommand.logToTerminal = false;
        docsGenerateCommand.showProgress = false;
        await this.dbtProjectIntegration.executeCommandImmediately(
          docsGenerateCommand,
        );
        healthcheckArgs.catalogPath = this.getCatalogPath();
        if (!healthcheckArgs.catalogPath) {
          throw new Error(
            `Unable to find catalog path for project ${this.getProjectName()}`,
          );
        }
      }
    }
    this.terminal.debug(
      "performDatapilotHealthcheck",
      "Performing healthcheck",
      healthcheckArgs,
    );
    let projectHealthcheck =
      await this.dbtProjectIntegration.performDatapilotHealthcheck(
        healthcheckArgs,
      );

    // Filter out issues in ignored directories
    for (const key in projectHealthcheck.model_insights) {
      projectHealthcheck.model_insights[key] = projectHealthcheck.model_insights[key].filter(
        item => !ignoreDirectories.some(dir => item.original_file_path.startsWith(dir))
      );
    }

    // temp fix: ideally datapilot should return absolute path  
    for (const key in projectHealthcheck.model_insights) {
      for (const item of projectHealthcheck.model_insights[key]) {
        item.path = path.join(this.projectRoot.fsPath, item.original_file_path);
      }
    }
    return projectHealthcheck;
  }

dbt_healthcheck.py

Add ignore_directories parameter to DBTInsightGenerator and skip models in those directories. def project_healthcheck( manifest_path, catalog_path=None, config_path=None, config=None ): try: import logging import json from datapilot.config.config import load_config from datapilot.core.platforms.dbt.utils import load_catalog from datapilot.core.platforms.dbt.utils import load_manifest from datapilot.core.platforms.dbt.constants import MODEL from datapilot.core.platforms.dbt.executor import DBTInsightGenerator logging.basicConfig(level=logging.INFO) manifest = load_manifest(manifest_path) catalog = load_catalog(catalog_path) if catalog_path else None if not config and config_path: config = load_config(config_path) insight_generator = DBTInsightGenerator( manifest=manifest, catalog=catalog, config=config, ) reports = insight_generator.run() # package_insights = reports[PROJECT] model_insights = { k: [json.loads(item.json()) for item in v] for k, v in reports[MODEL].items() } return {"model_insights": model_insights} except Exception as e: raise Exception(str(e))
def project_healthcheck(
    manifest_path, catalog_path=None, config_path=None, config=None, ignore_directories=None
):
    try:
        import logging
        import json
        import os

        from datapilot.config.config import load_config
        from datapilot.core.platforms.dbt.utils import load_catalog
        from datapilot.core.platforms.dbt.utils import load_manifest
        from datapilot.core.platforms.dbt.constants import MODEL
        from datapilot.core.platforms.dbt.executor import DBTInsightGenerator

        logging.basicConfig(level=logging.INFO)
        manifest = load_manifest(manifest_path)
        catalog = load_catalog(catalog_path) if catalog_path else None
        if not config and config_path:
            config = load_config(config_path)

        insight_generator = DBTInsightGenerator(
            manifest=manifest,
            catalog=catalog,
            config=config,
            ignore_directories=ignore_directories
        )
        reports = insight_generator.run()

        # package_insights = reports[PROJECT]
        model_insights = {
            k: [json.loads(item.json()) for item in v]
            for k, v in reports[MODEL].items()
            if not any(k.startswith(dir) for dir in ignore_directories)
        }
        return {"model_insights": model_insights}
    except Exception as e:
        raise Exception(str(e))

Step 3: 🔄️ Validating

Your changes have been successfully made to the branch sweep/specify_ignore_directories_for_the_healt. I have validated these changes using a syntax checker and a linter.


[!TIP] To recreate the pull request, edit the issue title or description.

This is an automated message generated by Sweep AI.