giotto-ai / giotto-tda

A high-performance topological machine learning toolbox in Python
https://giotto-ai.github.io/gtda-docs
Other
845 stars 173 forks source link

error in running a tutorial notebook #702

Open beew opened 6 days ago

beew commented 6 days ago

I am getting some errors running the mapper_quickstart.ipynb notebook in tutorial.

To reproduce just run the ipython notebook.

The cell

fig = plot_static_mapper_graph(pipe, data)
fig.show(config={'scrollZoom': True})

produces the following errors

---------------------------------------------------------------------------
Empty                                     Traceback (most recent call last)
File ~/lib/python3.10/site-packages/joblib/parallel.py:862, in Parallel.dispatch_one_batch(self, iterator)
    861 try:
--> 862     tasks = self._ready_batches.get(block=False)
    863 except queue.Empty:
    864     # slice the iterator n_jobs * batchsize items at a time. If the
    865     # slice returns less than that, then the current batchsize puts
   (...)
    868     # accordingly to distribute evenly the last items between all
    869     # workers.

File ~/lib/python3.10/queue.py:168, in Queue.get(self, block, timeout)
    167     if not self._qsize():
--> 168         raise Empty
    169 elif timeout is None:

Empty: 

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
Cell In[4], line 1
----> 1 fig = plot_static_mapper_graph(pipe, data)
      2 fig.show(config={'scrollZoom': True})

File ~/lib/python3.10/site-packages/gtda/mapper/visualization.py:172, in plot_static_mapper_graph(pipeline, data, color_data, color_features, node_color_statistic, layout, layout_dim, clone_pipeline, n_sig_figs, node_scale, plotly_params)
    169 # Compute the graph and fetch the indices of points in each node
    170 _pipeline = clone(pipeline) if clone_pipeline else pipeline
--> 172 graph = _pipeline.fit_transform(data)
    173 (color_data_transformed, column_names_dropdown,
    174  node_color_statistic) = \
    175     _validate_color_kwargs(graph, data, color_data, color_features,
    176                            node_color_statistic, interactive=False)
    177 edge_trace, node_trace, node_colors_color_features = \
    178     _calculate_graph_data(
    179         graph, color_data_transformed, node_color_statistic, layout,
    180         layout_dim, n_sig_figs, node_scale
    181         )

File ~/lib/python3.10/site-packages/sklearn/base.py:1473, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   1466     estimator._validate_params()
   1468 with config_context(
   1469     skip_parameter_validation=(
   1470         prefer_skip_nested_validation or global_skip_validation
   1471     )
   1472 ):
-> 1473     return fit_method(estimator, *args, **kwargs)

File ~/lib/python3.10/site-packages/sklearn/pipeline.py:533, in Pipeline.fit_transform(self, X, y, **params)
    490 """Fit the model and transform with the final estimator.
    491 
    492 Fit all the transformers one after the other and sequentially transform
   (...)
    530     Transformed samples.
    531 """
    532 routed_params = self._check_method_params(method="fit_transform", props=params)
--> 533 Xt = self._fit(X, y, routed_params)
    535 last_step = self._final_estimator
    536 with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):

File ~/lib/python3.10/site-packages/sklearn/pipeline.py:406, in Pipeline._fit(self, X, y, routed_params)
    404     cloned_transformer = clone(transformer)
    405 # Fit or load from cache the current transformer
--> 406 X, fitted_transformer = fit_transform_one_cached(
    407     cloned_transformer,
    408     X,
    409     y,
    410     None,
    411     message_clsname="Pipeline",
    412     message=self._log_message(step_idx),
    413     params=routed_params[name],
    414 )
    415 # Replace the transformer of the step with the fitted
    416 # transformer. This is necessary when loading the transformer
    417 # from the cache.
    418 self.steps[step_idx] = (name, fitted_transformer)

File ~/lib/python3.10/site-packages/joblib/memory.py:349, in NotMemorizedFunc.__call__(self, *args, **kwargs)
    348 def __call__(self, *args, **kwargs):
--> 349     return self.func(*args, **kwargs)

File ~/lib/python3.10/site-packages/sklearn/pipeline.py:1310, in _fit_transform_one(transformer, X, y, weight, message_clsname, message, params)
   1308 with _print_elapsed_time(message_clsname, message):
   1309     if hasattr(transformer, "fit_transform"):
-> 1310         res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
   1311     else:
   1312         res = transformer.fit(X, y, **params.get("fit", {})).transform(
   1313             X, **params.get("transform", {})
   1314         )

File ~/lib/python3.10/site-packages/sklearn/utils/_set_output.py:316, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    314 @wraps(f)
    315 def wrapped(self, X, *args, **kwargs):
--> 316     data_to_wrap = f(self, X, *args, **kwargs)
    317     if isinstance(data_to_wrap, tuple):
    318         # only wrap the first output for cross decomposition
    319         return_tuple = (
    320             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    321             *data_to_wrap[1:],
    322         )

File ~/lib/python3.10/site-packages/gtda/mapper/utils/_list_feature_union.py:21, in ListFeatureUnion.fit_transform(self, X, y, **fit_params)
      7 def fit_transform(self, X, y=None, **fit_params):
      8     """Fit all transformers, transform the data and concatenate results.
      9     Parameters
     10     ----------
   (...)
     19 
     20     """
---> 21     results = self._parallel_func(X, y, fit_params, _fit_transform_one)
     22     if not results:
     23         # All transformers are None
     24         return np.zeros((X.shape[0], 0))

File ~/lib/python3.10/site-packages/sklearn/pipeline.py:1757, in FeatureUnion._parallel_func(self, X, y, func, routed_params)
   1754 self._validate_transformer_weights()
   1755 transformers = list(self._iter())
-> 1757 return Parallel(n_jobs=self.n_jobs)(
   1758     delayed(func)(
   1759         transformer,
   1760         X,
   1761         y,
   1762         weight,
   1763         message_clsname="FeatureUnion",
   1764         message=self._log_message(name, idx, len(transformers)),
   1765         params=routed_params[name],
   1766     )
   1767     for idx, (name, transformer, weight) in enumerate(transformers, 1)
   1768 )

File ~/lib/python3.10/site-packages/sklearn/utils/parallel.py:74, in Parallel.__call__(self, iterable)
     69 config = get_config()
     70 iterable_with_config = (
     71     (_with_config(delayed_func, config), args, kwargs)
     72     for delayed_func, args, kwargs in iterable
     73 )
---> 74 return super().__call__(iterable_with_config)

File ~/lib/python3.10/site-packages/joblib/parallel.py:1085, in Parallel.__call__(self, iterable)
   1076 try:
   1077     # Only set self._iterating to True if at least a batch
   1078     # was dispatched. In particular this covers the edge
   (...)
   1082     # was very quick and its callback already dispatched all the
   1083     # remaining jobs.
   1084     self._iterating = False
-> 1085     if self.dispatch_one_batch(iterator):
   1086         self._iterating = self._original_iterator is not None
   1088     while self.dispatch_one_batch(iterator):

File ~/lib/python3.10/site-packages/joblib/parallel.py:873, in Parallel.dispatch_one_batch(self, iterator)
    870 n_jobs = self._cached_effective_n_jobs
    871 big_batch_size = batch_size * n_jobs
--> 873 islice = list(itertools.islice(iterator, big_batch_size))
    874 if len(islice) == 0:
    875     return False

File ~/lib/python3.10/site-packages/sklearn/utils/parallel.py:70, in <genexpr>(.0)
     65 # Capture the thread-local scikit-learn configuration at the time
     66 # Parallel.__call__ is issued since the tasks can be dispatched
     67 # in a different thread depending on the backend and on the value of
     68 # pre_dispatch and n_jobs.
     69 config = get_config()
---> 70 iterable_with_config = (
     71     (_with_config(delayed_func, config), args, kwargs)
     72     for delayed_func, args, kwargs in iterable
     73 )
     74 return super().__call__(iterable_with_config)

File ~/lib/python3.10/site-packages/sklearn/pipeline.py:1765, in <genexpr>(.0)
   1754 self._validate_transformer_weights()
   1755 transformers = list(self._iter())
   1757 return Parallel(n_jobs=self.n_jobs)(
   1758     delayed(func)(
   1759         transformer,
   1760         X,
   1761         y,
   1762         weight,
   1763         message_clsname="FeatureUnion",
   1764         message=self._log_message(name, idx, len(transformers)),
-> 1765         params=routed_params[name],
   1766     )
   1767     for idx, (name, transformer, weight) in enumerate(transformers, 1)
   1768 )

TypeError: 'function' object is not subscriptable

Versions

import platform; print(platform.platform())

Linux-6.8.0-45-generic-x86_64-with-glibc2.35

import sys; print("Python", sys.version)

Python 3.10.9 (main, Mar 31 2023, 07:34:00) [GCC 11.3.0]

import numpy; print("NumPy", numpy.version)

NumPy 1.23.5

import scipy; print("SciPy", scipy.version)

SciPy 1.10.1

import joblib; print("Joblib", joblib.version)

SciPy 1.10.1

import sklearn; print("Scikit-learn", sklearn.version)

Scikit-learn 1.5.2

import gtda; print("Giotto-tda", gtda.version)

Giotto-tda 0.6.2

Additional context All other notebooks in the tutorial work and all the tests passed. The only error is this.

I think it is a conflict between make_mapper_pipeline and sklearn.pipeline because of the latter's change of API

I cannot downgrade scikit-learn as I need it for other packages.

I would appreciate it if you can point me to a workaround. I am thinking maybe it is possible to just change a few lines in make_mapper_module for this to work.

Thanks.