microsoft / semantic-link-labs

Early access to new features for Microsoft Fabric's Semantic Link.
MIT License
178 stars 37 forks source link

vertipaq_analyzer fails for Certain Datasets #276

Closed Jai-Prakash-HU closed 1 week ago

Jai-Prakash-HU commented 1 week ago

It is not working for some dataset. I couldn't understand the reason.

error log:

AttributeError Traceback (most recent call last) Cell In[82], line 50 47 for Workspace_Id, Workspace_Name, Dataset_Id, Dataset_Name in zip(Workspace_Id_list, Workspace_Name_list, Dataset_Id_list, Dataset_Name_list): 48 print(f"Running Vertipaq for Workspace: {Workspace_Name} Dataset: {Dataset_Name} DatasetId: {Dataset_Id} ") ---> 50 labs.vertipaq_analyzer(dataset = Dataset_Name , workspace = Workspace_Name , export = 'table') 53 timestamp = datetime.now() 54 result_status = 'Vertipaq scan complete.'

File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sempy/_utils/_log.py:310, in mds_log..get_wrapper..log_decorator_wrapper(*args, *kwargs) 307 raise 309 try: --> 310 result = func(args, **kwargs) 312 # The invocation for get_message_dict moves after the function 313 # so it can access the state after the method call 314 message.update(extractor.get_completion_message_dict(result, arg_dict))

File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sempy_labs/_vertipaq.py:136, in vertipaq_analyzer(dataset, workspace, export, read_stats_from_data, **kwargs) 70 workspace = fabric.resolve_workspace_name(workspace) 72 vertipaq_map = { 73 "Model": { 74 "Dataset Name": [icons.data_type_string, icons.no_format], (...) 133 }, 134 } --> 136 dfT = list_tables(dataset=dataset, extended=True, workspace=workspace) 137 dfT.rename(columns={"Name": "Table Name"}, inplace=True) 138 columns_to_keep = list(vertipaq_map["Tables"].keys())

File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sempy_labs/_list_functions.py:127, in list_tables(dataset, workspace, extended) 123 with connect_semantic_model( 124 dataset=dataset, workspace=workspace, readonly=True 125 ) as tom: 126 if extended: --> 127 dict_df = fabric.evaluate_dax( 128 dataset=dataset, 129 workspace=workspace, 130 dax_string=""" 131 EVALUATE SELECTCOLUMNS(FILTER(INFO.STORAGETABLECOLUMNS(), [COLUMN_TYPE] = "BASIC_DATA"),[DIMENSION_NAME],[DICTIONARY_SIZE]) 132 """, 133 ) 134 dict_sum = dict_df.groupby("[DIMENSION_NAME]")["[DICTIONARY_SIZE]"].sum() 135 data = fabric.evaluate_dax( 136 dataset=dataset, 137 workspace=workspace, 138 dax_string="""EVALUATE SELECTCOLUMNS(INFO.STORAGETABLECOLUMNSEGMENTS(),[TABLE_ID],[DIMENSION_NAME],[USED_SIZE])""", 139 )

File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sempy/_utils/_log.py:310, in mds_log..get_wrapper..log_decorator_wrapper(*args, *kwargs) 307 raise 309 try: --> 310 result = func(args, **kwargs) 312 # The invocation for get_message_dict moves after the function 313 # so it can access the state after the method call 314 message.update(extractor.get_completion_message_dict(result, arg_dict))

File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sempy/fabric/_flat.py:781, in evaluate_dax(dataset, dax_string, workspace, verbose, numrows) 755 """ 756 Compute DAX <https://learn.microsoft.com/en-us/dax/> query for a given dataset. 757 (...) 776 :class:~sempy.fabric.FabricDataFrame holding the result of the DAX query. 777 """ 779 # creating client directly to avoid any workspace access if not needed 780 # a user can have access via XMLA to a dataset, but may not have access to the workspace --> 781 return DatasetXmlaClient(workspace, dataset).evaluate_dax(dax_string, verbose, num_rows)

File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sempy/fabric/_client/_base_dataset_client.py:202, in BaseDatasetClient.evaluate_dax(self, query, verbose, num_rows) 184 """ 185 Retrieve results of DAX query as a FabricDataFrame. 186 (...) 199 FabricDataFrame converted from the results of a DAX query. 200 """ 201 df = self._evaluate_dax(query, verbose, num_rows) --> 202 return FabricDataFrame(df, dataset=self.resolver.dataset_name, workspace=self.resolver.workspace_name)

File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sempy/fabric/_dataframe/_fabric_dataframe.py:184, in FabricDataFrame.init(self, data, column_metadata, dataset, workspace, verbose, *args, **kwargs) 179 from Microsoft.AnalysisServices import OperationException 181 try: 182 self.column_metadata = _get_or_create_workspace_client(workspace) \ 183 .get_dataset_client(dataset) \ --> 184 .resolve_metadata(self.columns, verbose) 185 except (WorkspaceNotFoundException, OperationException, DatasetNotFoundException) as e: 186 if verbose > 0:

File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sempy/fabric/_client/_base_dataset_client.py:406, in BaseDatasetClient.resolve_metadata(self, columns, verbose) 403 database = self.resolver.workspace_client.get_dataset(self.resolver.dataset_name) 405 column_map = defaultdict(lambda: []) --> 406 for table in database.Model.Tables: 407 for column in table.Columns: 408 column_data = self._get_column_data(table, column)

AttributeError: 'NoneType' object has no attribute 'Tables'

Jai-Prakash-HU commented 1 week ago

I will update more on this issue. I am retrying it in multiple way and will provide all the scenarios when I get this issue.

Jai-Prakash-HU commented 1 week ago

One another scenario where this function is not working is below:

UnboundLocalError Traceback (most recent call last) Cell In[13], line 51 48 for Workspace_Id, Workspace_Name, Dataset_Id, Dataset_Name in zip(Workspace_Id_list, Workspace_Name_list, Dataset_Id_list, Dataset_Name_list): 49 print(f"Running Vertipaq for Workspace: {Workspace_Name} Dataset: {Dataset_Name} DatasetId: {Dataset_Id} ") ---> 51 labs.vertipaq_analyzer(dataset = Dataset_Name , workspace = Workspace_Name , export = 'table') 54 timestamp = datetime.now() 55 result_status = 'Vertipaq scan complete.'

File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sempy/_utils/_log.py:310, in mds_log..get_wrapper..log_decorator_wrapper(*args, *kwargs) 307 raise 309 try: --> 310 result = func(args, **kwargs) 312 # The invocation for get_message_dict moves after the function 313 # so it can access the state after the method call 314 message.update(extractor.get_completion_message_dict(result, arg_dict))

File ~/cluster-env/clonedenv/lib/python3.11/site-packages/sempy_labs/_vertipaq.py:397, in vertipaq_analyzer(dataset, workspace, export, read_stats_from_data, *kwargs) 395 elif db_total_size >= 1000: 396 y = db_total_size / (1024) 1000 --> 397 y = round(y) 399 dfModel = pd.DataFrame( 400 { 401 "Dataset Name": dataset, (...) 408 index=[0], 409 ) 410 dfModel.reset_index(drop=True, inplace=True)

UnboundLocalError: cannot access local variable 'y' where it is not associated with a value

m-kovalsky commented 1 week ago

The first issue is because the semantic model has no tables. The first and second issues will be resolved in the aforementioned PR (#272)

m-kovalsky commented 1 week ago

Fixed in 0.8.5.