microsoft / responsible-ai-toolbox

Responsible AI Toolbox is a suite of tools providing model and data exploration and assessment user interfaces and libraries that enable a better understanding of AI systems. These interfaces and libraries empower developers and stakeholders of AI systems to develop and monitor AI more responsibly, and take better data-driven actions.
https://responsibleaitoolbox.ai/
MIT License
1.39k stars 363 forks source link

Flask Issue #2115

Closed dogfood6 closed 1 year ago

dogfood6 commented 1 year ago

Describe the bug I have been trying to use Error Analysis on my XGBoost Regression model for quite some time now but it seems like I am unlucky when it comes to the dependency issues.

To Reproduce I cannot install and import raiwidgets onto my Azure ML Notebook. This time it says there's an issue with Flask.

Desktop (please complete the following information):

To get the package versions please run in your command line:

pip show raiwidgets
pip show responsibleai
imatiach-msft commented 1 year ago

@dogfood6 could you please post the errors you are seeing when importing RAIInsights or RAIDashboard? what version of flask are you running? If you create a new compute instance in AzureML Notebook it should have all compatible & verified dependencies that work together with the latest azure-sdk packages.

dogfood6 commented 1 year ago

Here is exactly what happens because when i import raiwidgets it doesnt work.

SDKv2 Python 3.10


SystemError Traceback (most recent call last) Cell In[6], line 20 18 import datetime 19 import pickle # to save objects ---> 20 import raiwidgets

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/raiwidgets/init.py:7 4 """Package for the fairness, explanation, and error analysis widgets.""" 6 from .version import version ----> 7 from .error_analysis_dashboard import ErrorAnalysisDashboard 8 from .explanation_dashboard import ExplanationDashboard 9 from .fairness_dashboard import FairnessDashboard

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/raiwidgets/error_analysis_dashboard.py:10 8 from .constants import ModelTask 9 from .dashboard import Dashboard ---> 10 from .error_analysis_dashboard_input import ErrorAnalysisDashboardInput 12 DEFAULT_MAX_DEPTH = 3 13 DEFAULT_NUM_LEAVES = 31

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/raiwidgets/error_analysis_dashboard_input.py:11 7 import pandas as pd 9 from erroranalysis._internal.constants import (Metrics, display_name_to_metric, 10 metric_to_display_name) ---> 11 from erroranalysis._internal.error_analyzer import (ModelAnalyzer, 12 PredictionsAnalyzer) 13 from erroranalysis._internal.utils import is_spark 14 from raiutils.data_processing import convert_to_list, serialize_json_safe

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/erroranalysis/_internal/error_analyzer/init.py:5 1 # Copyright (c) Microsoft Corporation 2 # Licensed under the MIT License. 4 """Init file, used for backwards compatibility.""" ----> 5 from erroranalysis.analyzer import ModelAnalyzer, PredictionsAnalyzer 7 all = ['PredictionsAnalyzer', 'ModelAnalyzer']

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/erroranalysis/analyzer/init.py:5 1 # Copyright (c) Microsoft Corporation 2 # Licensed under the MIT License. 4 """Module for defining the analyzers.""" ----> 5 from .error_analyzer import ModelAnalyzer, PredictionsAnalyzer 7 all = ["PredictionsAnalyzer", "ModelAnalyzer"]

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/erroranalysis/analyzer/error_analyzer.py:29 27 from erroranalysis._internal.utils import generate_random_unique_indexes 28 from erroranalysis._internal.version_checker import check_pandas_version ---> 29 from erroranalysis.error_correlation_methods import ( 30 compute_ebm_global_importance, compute_gbm_global_importance) 31 from erroranalysis.report import ErrorReport 33 BIN_THRESHOLD = MatrixParams.BIN_THRESHOLD

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/erroranalysis/error_correlation_methods/init.py:7 4 """Module for defining error correlation methods.""" 6 from .ebm import compute_ebm_global_importance ----> 7 from .gbm import compute_gbm_global_importance 9 all = ["compute_ebm_global_importance", "compute_gbm_global_importance"]

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/erroranalysis/error_correlation_methods/gbm.py:7 4 """Defines the error correlation computation using LightGBM with TreeShap.""" 6 import numpy as np ----> 7 import shap 8 from lightgbm import LGBMClassifier, LGBMRegressor 10 from erroranalysis._internal.constants import ModelTask

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/shap/init.py:12 9 if (sys.version_info < (3, 0)): 10 warnings.warn("As of version 0.29.0 shap only supports Python 3 (not 2)!") ---> 12 from ._explanation import Explanation, Cohorts 14 # explainers 15 from .explainers._explainer import Explainer

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/shap/_explanation.py:12 10 from slicer import Slicer, Alias, Obj 11 # from ._order import Order ---> 12 from .utils._general import OpChain 13 from .utils._exceptions import DimensionError 15 # slicer confuses pylint... 16 # pylint: disable=no-member

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/shap/utils/init.py:1 ----> 1 from ._clustering import hclust_ordering, partition_tree, partition_tree_shuffle, delta_minimization_order, hclust 2 from ._general import approximate_interactions, potential_interactions, sample, safe_isinstance, assert_import, record_import_error 3 from ._general import shapley_coefficients, convert_name, format_value, ordinal_str, OpChain, suppress_stderr

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/shap/utils/_clustering.py:4 2 import scipy as sp 3 from scipy.spatial.distance import pdist ----> 4 from numba import jit 5 import sklearn 6 import warnings

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/numba/init.py:42 38 from numba.core.decorators import (cfunc, generated_jit, jit, njit, stencil, 39 jit_module) 41 # Re-export vectorize decorators and the thread layer querying function ---> 42 from numba.np.ufunc import (vectorize, guvectorize, threading_layer, 43 get_num_threads, set_num_threads) 45 # Re-export Numpy helpers 46 from numba.np.numpy_support import carray, farray, from_dtype

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/numba/np/ufunc/init.py:3 1 # -- coding: utf-8 -- ----> 3 from numba.np.ufunc.decorators import Vectorize, GUVectorize, vectorize, guvectorize 4 from numba.np.ufunc._internal import PyUFunc_None, PyUFunc_Zero, PyUFunc_One 5 from numba.np.ufunc import _internal, array_exprs

File /anaconda/envs/azureml_py310_sdkv2/lib/python3.10/numba/np/ufunc/decorators.py:3 1 import inspect ----> 3 from numba.np.ufunc import _internal 4 from numba.np.ufunc.parallel import ParallelUFuncBuilder, ParallelGUFuncBuilder 6 from numba.core.registry import DelayedRegistry

SystemError: initialization of _internal failed without raising an exception

dogfood6 commented 1 year ago

Python 3.8 AzureML

this is the code i am running

_###############

Libraries

############### import pandas as pd # manipulate data import numpy as np # calculate the mean and standard deviation import sklearn from scipy.stats import stats, randint import random from sklearn import preprocessing from sklearn.model_selection import GridSearchCV, cross_val_score, learning_curve # cross validation and tuning from sklearn.model_selection import train_test_split # split data into training and testing sets from sklearn.metrics import r2_score, mean_squared_error, mean_absoluteerror import xgboost as xgb # XGBoost from xgboost.sklearn import XGBRegressor import itertools import matplotlib.pyplot as plt import seaborn as sns; sns.set() import datetime import pickle # to save objects import raiwidgets

WARNING:root:'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to set this environment variable to '1' in both driver and executor sides if you use pyarrow>=2.0.0. pandas-on-Spark will set it for you but it does not work if there is a Spark context already launched.

AttributeError Traceback (most recent call last) Cell In[2], line 20 18 import datetime 19 import pickle # to save objects ---> 20 import raiwidgets

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/raiwidgets/init.py:7 4 """Package for the fairness, explanation, and error analysis widgets.""" 6 from .version import version ----> 7 from .error_analysis_dashboard import ErrorAnalysisDashboard 8 from .explanation_dashboard import ExplanationDashboard 9 from .fairness_dashboard import FairnessDashboard

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/raiwidgets/error_analysis_dashboard.py:10 8 from .constants import ModelTask 9 from .dashboard import Dashboard ---> 10 from .error_analysis_dashboard_input import ErrorAnalysisDashboardInput 12 DEFAULT_MAX_DEPTH = 3 13 DEFAULT_NUM_LEAVES = 31

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/raiwidgets/error_analysis_dashboard_input.py:11 7 import pandas as pd 9 from erroranalysis._internal.constants import (Metrics, display_name_to_metric, 10 metric_to_display_name) ---> 11 from erroranalysis._internal.error_analyzer import (ModelAnalyzer, 12 PredictionsAnalyzer) 13 from erroranalysis._internal.utils import is_spark 14 from raiutils.data_processing import convert_to_list, serialize_json_safe

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/erroranalysis/_internal/error_analyzer/init.py:5 1 # Copyright (c) Microsoft Corporation 2 # Licensed under the MIT License. 4 """Init file, used for backwards compatibility.""" ----> 5 from erroranalysis.analyzer import ModelAnalyzer, PredictionsAnalyzer 7 all = ['PredictionsAnalyzer', 'ModelAnalyzer']

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/erroranalysis/analyzer/init.py:5 1 # Copyright (c) Microsoft Corporation 2 # Licensed under the MIT License. 4 """Module for defining the analyzers.""" ----> 5 from .error_analyzer import ModelAnalyzer, PredictionsAnalyzer 7 all = ["PredictionsAnalyzer", "ModelAnalyzer"]

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/erroranalysis/analyzer/error_analyzer.py:22 20 from erroranalysis._internal.metrics import metric_to_func 21 from erroranalysis._internal.process_categoricals import process_categoricals ---> 22 from erroranalysis._internal.surrogate_error_tree import \ 23 compute_error_tree as _compute_error_tree 24 from erroranalysis._internal.surrogate_error_tree import \ 25 compute_error_tree_on_dataset as _compute_error_tree_on_dataset 26 from erroranalysis._internal.utils import generate_random_unique_indexes

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/erroranalysis/_internal/surrogate_error_tree.py:9 7 import numpy as np 8 import pandas as pd ----> 9 from lightgbm import Booster, LGBMClassifier, LGBMRegressor 10 from sklearn.metrics import (mean_absolute_error, mean_squared_error, 11 median_absolute_error, r2_score) 13 from erroranalysis._internal.cohort_filter import filter_from_cohort

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/lightgbm/init.py:8 2 """LightGBM, Light Gradient Boosting Machine. 3 4 Contributors: https://github.com/microsoft/LightGBM/graphs/contributors. 5 """ 6 import os ----> 8 from .basic import Booster, Dataset, register_logger 9 from .callback import early_stopping, print_evaluation, record_evaluation, reset_parameter 10 from .engine import CVBooster, cv, train

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/lightgbm/basic.py:17 14 import numpy as np 15 import scipy.sparse ---> 17 from .compat import PANDAS_INSTALLED, concat, dt_DataTable, is_dtype_sparse, pd_DataFrame, pd_Series 18 from .libpath import find_lib_path 21 class _DummyLogger:

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/lightgbm/compat.py:115 113 from dask import delayed 114 from dask.array import Array as dask_Array --> 115 from dask.dataframe import DataFrame as dask_DataFrame 116 from dask.dataframe import Series as dask_Series 117 from dask.distributed import Client, default_client, wait

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dask/dataframe/init.py:3 1 try: 2 from ..base import compute ----> 3 from . import backends, dispatch, rolling 4 from .core import ( 5 DataFrame, 6 Index, (...) 12 to_timedelta, 13 ) 14 from .groupby import Aggregation

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dask/dataframe/backends.py:17 15 from ..utils import is_arraylike, typename 16 from ._compat import PANDAS_GT_100 ---> 17 from .core import DataFrame, Index, Scalar, Series, _Frame 18 from .dispatch import ( 19 categorical_dtype_dispatch, 20 concat, (...) 30 union_categoricals_dispatch, 31 ) 32 from .extensions import make_array_nonempty, make_scalar

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dask/dataframe/core.py:56 36 from ..utils import ( 37 IndexCallable, 38 M, (...) 53 typename, 54 ) 55 from . import methods ---> 56 from .accessor import DatetimeAccessor, StringAccessor 57 from .categorical import CategoricalAccessor, categorize 58 from .dispatch import ( 59 get_parallel_type, 60 group_split_dispatch, 61 hash_object_dispatch, 62 meta_nonempty, 63 )

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dask/dataframe/accessor.py:109 98 """Accessor object for datetimelike properties of the Series values. 99 100 Examples (...) 103 >>> s.dt.microsecond # doctest: +SKIP 104 """ 106 _accessor_name = "dt" --> 109 class StringAccessor(Accessor): 110 """Accessor object for string properties of the Series values. 111 112 Examples (...) 115 >>> s.str.lower() # doctest: +SKIP 116 """ 118 _accessor_name = "str"

File /anaconda/envs/azureml_py38/lib/python3.8/site-packages/dask/dataframe/accessor.py:121, in StringAccessor() 118 _accessor_name = "str" 119 _not_implemented = {"get_dummies"} --> 121 @derived_from(pd.core.strings.StringMethods) 122 def split(self, pat=None, n=-1, expand=False): 123 if expand: 124 if n == -1:

AttributeError: module 'pandas.core.strings' has no attribute 'StringMethods'

dogfood6 commented 1 year ago

FINALLY GOT IT FIXED SORRY FOR BOTHERING

imatiach-msft commented 1 year ago

@dogfood6 great to hear that it is resolved - if you are still having issues, please let us know. I would be happy to get on a Teams call (or your preferred meeting software). I can send you the Teams meeting invite next time if you run into any issues and we can schedule a debugging session.