Open jacobeatsspam opened 1 week ago
Hey @jacobeatsspam! I'm here to assist you with any bugs, questions, or contributions. Let's tackle this issue together.
I found some similar closed issues that might be relevant to your problem:
To resolve the issues with exports in Superset version 4.0.2, you can make the following changes:
Dependent Charts, Datasets, and Databases Inclusion:
Ensure that the export logic correctly handles the inclusion of dependent charts, datasets, and databases. Adjust the export_to_dict
method to include or exclude specific dependencies as needed.
catalog
Key Breaking Imports:
Ensure that the catalog
key is correctly handled during the export and import processes. Add logic to properly serialize and deserialize the catalog
key.
Differences in metadata.yaml
:
Standardize the metadata.yaml
file format to ensure consistency across different exports. Ensure that the export_to_dict
method produces a consistent output.
Lack of an "Export Everything" Option: Implement an "export everything" option by creating a command that exports all datasets, charts, and databases in one go.
Here is an example of how you can modify the ExportDatasetsCommand
class to address some of these issues:
class ExportDatasetsCommand(ExportModelsCommand):
dao = DatasetDAO
not_found = DatasetNotFoundError
@staticmethod
def _file_name(model: SqlaTable) -> str:
db_file_name = get_filename(
model.database.database_name, model.database.id, skip_id=True
)
ds_file_name = get_filename(model.table_name, model.id, skip_id=True)
return f"datasets/{db_file_name}/{ds_file_name}.yaml"
@staticmethod
def _file_content(model: SqlaTable) -> str:
payload = model.export_to_dict(
recursive=True,
include_parent_ref=False,
include_defaults=True,
export_uuids=True,
)
for key in JSON_KEYS:
if payload.get(key):
try:
payload[key] = json.loads(payload[key])
except json.JSONDecodeError:
logger.info("Unable to decode `%s` field: %s", key, payload[key])
for key in ("metrics", "columns"):
for attributes in payload.get(key, []):
if attributes.get("extra"):
try:
attributes["extra"] = json.loads(attributes["extra"])
except json.JSONDecodeError:
logger.info(
"Unable to decode `extra` field: %s", attributes["extra"]
)
payload["version"] = EXPORT_VERSION
payload["database_uuid"] = str(model.database.uuid)
# Handle the catalog key
if 'catalog' in payload:
try:
payload['catalog'] = json.loads(payload['catalog'])
except json.JSONDecodeError:
logger.info("Unable to decode `catalog` field: %s", payload['catalog'])
file_content = yaml.safe_dump(payload, sort_keys=False)
return file_content
@staticmethod
def _export(
model: SqlaTable, export_related: bool = True
) -> Iterator[tuple[str, Callable[[], str]]]:
yield (
ExportDatasetsCommand._file_name(model),
lambda: ExportDatasetsCommand._file_content(model),
)
if export_related:
db_file_name = get_filename(
model.database.database_name, model.database.id, skip_id=True
)
file_path = f"databases/{db_file_name}.yaml"
payload = model.database.export_to_dict(
recursive=False,
include_parent_ref=False,
include_defaults=True,
export_uuids=True,
)
if payload.get("extra"):
try:
payload["extra"] = json.loads(payload["extra"])
except json.JSONDecodeError:
logger.info("Unable to decode `extra` field: %s", payload["extra"])
if ssh_tunnel := DatabaseDAO.get_ssh_tunnel(model.database.id):
ssh_tunnel_payload = ssh_tunnel.export_to_dict(
recursive=False,
include_parent_ref=False,
include_defaults=True,
export_uuids=False,
)
payload["ssh_tunnel"] = mask_password_info(ssh_tunnel_payload)
payload["version"] = EXPORT_VERSION
yield file_path, lambda: yaml.safe_dump(payload, sort_keys=False)
Additionally, to implement the "export everything" option, you can use the ExportAssetsCommand
class as shown below:
from collections.abc import Iterator
from datetime import datetime, timezone
from typing import Callable
import yaml
from superset.commands.base import BaseCommand
from superset.commands.chart.export import ExportChartsCommand
from superset.commands.dashboard.export import ExportDashboardsCommand
from superset.commands.database.export import ExportDatabasesCommand
from superset.commands.dataset.export import ExportDatasetsCommand
from superset.commands.query.export import ExportSavedQueriesCommand
from superset.utils.dict_import_export import EXPORT_VERSION
METADATA_FILE_NAME = "metadata.yaml"
class ExportAssetsCommand(BaseCommand):
"""
Command that exports all databases, datasets, charts, dashboards and saved queries.
"""
def run(self) -> Iterator[tuple[str, Callable[[], str]]]:
metadata = {
"version": EXPORT_VERSION,
"type": "assets",
"timestamp": datetime.now(tz=timezone.utc).isoformat(),
}
yield METADATA_FILE_NAME, lambda: yaml.safe_dump(metadata, sort_keys=False)
seen = {METADATA_FILE_NAME}
commands = [
ExportDatabasesCommand,
ExportDatasetsCommand,
ExportChartsCommand,
ExportDashboardsCommand,
ExportSavedQueriesCommand,
]
for command in commands:
ids = [model.id for model in command.dao.find_all()]
for file_name, file_content in command(ids, export_related=False).run():
if file_name not in seen:
yield file_name, file_content
seen.add(file_name)
def validate(self) -> None:
pass
To ensure the "export everything" option works correctly, you can refer to the following test case:
from freezegun import freeze_time
from pytest_mock import MockerFixture
def test_export_assets_command(mocker: MockerFixture) -> None:
"""
Test that all assets are exported correctly.
"""
from superset.commands.export.assets import ExportAssetsCommand
ExportDatabasesCommand = mocker.patch(
"superset.commands.export.assets.ExportDatabasesCommand"
)
ExportDatabasesCommand.return_value.run.return_value = [
(
"metadata.yaml",
lambda: "version: 1.0.0\ntype: Database\ntimestamp: '2022-01-01T00:00:00+00:00'\n",
),
("databases/example.yaml", lambda: "<DATABASE CONTENTS>"),
]
ExportDatasetsCommand = mocker.patch(
"superset.commands.export.assets.ExportDatasetsCommand"
)
ExportDatasetsCommand.return_value.run.return_value = [
(
"metadata.yaml",
lambda: "version: 1.0.0\ntype: Dataset\ntimestamp: '2022-01-01T00:00:00+00:00'\n",
),
("datasets/example/dataset.yaml", lambda: "<DATASET CONTENTS>"),
]
ExportChartsCommand = mocker.patch(
"superset.commands.export.assets.ExportChartsCommand"
)
ExportChartsCommand.return_value.run.return_value = [
(
"metadata.yaml",
lambda: "version: 1.0.0\ntype: Slice\ntimestamp: '2022-01-01T00:00:00+00:00'\n",
),
("charts/pie.yaml", lambda: "<CHART CONTENTS>"),
]
ExportDashboardsCommand = mocker.patch(
"superset.commands.export.assets.ExportDashboardsCommand"
)
ExportDashboardsCommand.return_value.run.return_value = [
(
"metadata.yaml",
lambda: "version: 1.0.0\ntype: Dashboard\ntimestamp: '2022-01-01T00:00:00+00:00'\n",
),
("dashboards/sales.yaml", lambda: "<DASHBOARD CONTENTS>"),
]
ExportSavedQueriesCommand = mocker.patch(
"superset.commands.export.assets.ExportSavedQueriesCommand"
)
ExportSavedQueriesCommand.return_value.run.return_value = [
(
"metadata.yaml",
lambda: "version: 1.0.0\ntype: SavedQuery\ntimestamp: '2022-01-01T00:00:00+00:00'\n",
),
("queries/example/metric.yaml", lambda: "<SAVED QUERY CONTENTS>"),
]
with freeze_time("2022-01-01T00:00:00Z"):
command = ExportAssetsCommand()
output = [(file[0], file[1]()) for file in list(command.run())]
assert output == [
(
"metadata.yaml",
"version: 1.0.0\ntype: assets\ntimestamp: '2022-01-01T00:00:00+00:00'\n",
),
("databases/example.yaml", "<DATABASE CONTENTS>"),
("datasets/example/dataset.yaml", "<DATASET CONTENTS>"),
("charts/pie.yaml", "<CHART CONTENTS>"),
("dashboards/sales.yaml", "<DASHBOARD CONTENTS>"),
("queries/example/metric.yaml", "<SAVED QUERY CONTENTS>"),
]
This code addresses the issues by:
catalog
key properly.ExportAssetsCommand
class and verifying it with a test case [4][5][6].To continue talking to Dosu, mention @dosu.
Is this a bug, or is this something that arguably ought to work differently? If there's not a bug, we should move it to an Ideas thread, or you'd be more than welcome to open a SIP if you want to propose an implementation plan.
Bug description
1) Dashboard exports include dependent charts, even though those charts aren't imported at the same time 1) Chart exports include dependent datasets, even though those datasets aren't imported at the same time 1) Dataset exports include dependent databases, even though those datasets aren't imported at the same time 1) Datasets still include the
catalog
key, which breaks imports and requires manual editing 1)metadata.yaml
is different for each type 1) There is no "export everything" option, even though Dashboard exports include everything needed.How to reproduce the bug
Run an export Run an import
Screenshots/recordings
No response
Superset version
4.0.2
Python version
Not applicable
Node version
Not applicable
Browser
Chrome
Additional context
No response
Checklist