error in running a tutorial notebook #702

beew opened 6 days ago

beew commented 6 days ago

I am getting some errors running the mapper_quickstart.ipynb notebook in tutorial.

To reproduce just run the ipython notebook.

The cell

fig = plot_static_mapper_graph(pipe, data){'scrollZoom': True})

produces the following errors

Empty                                     Traceback (most recent call last)
File ~/lib/python3.10/site-packages/joblib/, in Parallel.dispatch_one_batch(self, iterator)
    861 try:
--> 862     tasks = self._ready_batches.get(block=False)
    863 except queue.Empty:
    864     # slice the iterator n_jobs * batchsize items at a time. If the
    865     # slice returns less than that, then the current batchsize puts
    868     # accordingly to distribute evenly the last items between all
    869     # workers.

File ~/lib/python3.10/, in Queue.get(self, block, timeout)
    167     if not self._qsize():
--> 168         raise Empty
    169 elif timeout is None:


During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
Cell In[4], line 1
----> 1 fig = plot_static_mapper_graph(pipe, data)
      2{'scrollZoom': True})

File ~/lib/python3.10/site-packages/gtda/mapper/, in plot_static_mapper_graph(pipeline, data, color_data, color_features, node_color_statistic, layout, layout_dim, clone_pipeline, n_sig_figs, node_scale, plotly_params)
    169 # Compute the graph and fetch the indices of points in each node
    170 _pipeline = clone(pipeline) if clone_pipeline else pipeline
--> 172 graph = _pipeline.fit_transform(data)
    173 (color_data_transformed, column_names_dropdown,
    174  node_color_statistic) = \
    175     _validate_color_kwargs(graph, data, color_data, color_features,
    176                            node_color_statistic, interactive=False)
    177 edge_trace, node_trace, node_colors_color_features = \
    178     _calculate_graph_data(
    179         graph, color_data_transformed, node_color_statistic, layout,
    180         layout_dim, n_sig_figs, node_scale
    181         )

File ~/lib/python3.10/site-packages/sklearn/, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   1466     estimator._validate_params()
   1468 with config_context(
   1469     skip_parameter_validation=(
   1470         prefer_skip_nested_validation or global_skip_validation
   1471     )
   1472 ):
-> 1473     return fit_method(estimator, *args, **kwargs)

File ~/lib/python3.10/site-packages/sklearn/, in Pipeline.fit_transform(self, X, y, **params)
    490 """Fit the model and transform with the final estimator.
    492 Fit all the transformers one after the other and sequentially transform
    530     Transformed samples.
    531 """
    532 routed_params = self._check_method_params(method="fit_transform", props=params)
--> 533 Xt = self._fit(X, y, routed_params)
    535 last_step = self._final_estimator
    536 with _print_elapsed_time("Pipeline", self._log_message(len(self.steps) - 1)):

File ~/lib/python3.10/site-packages/sklearn/, in Pipeline._fit(self, X, y, routed_params)
    404     cloned_transformer = clone(transformer)
    405 # Fit or load from cache the current transformer
--> 406 X, fitted_transformer = fit_transform_one_cached(
    407     cloned_transformer,
    408     X,
    409     y,
    410     None,
    411     message_clsname="Pipeline",
    412     message=self._log_message(step_idx),
    413     params=routed_params[name],
    414 )
    415 # Replace the transformer of the step with the fitted
    416 # transformer. This is necessary when loading the transformer
    417 # from the cache.
    418 self.steps[step_idx] = (name, fitted_transformer)

File ~/lib/python3.10/site-packages/joblib/, in NotMemorizedFunc.__call__(self, *args, **kwargs)
    348 def __call__(self, *args, **kwargs):
--> 349     return self.func(*args, **kwargs)

File ~/lib/python3.10/site-packages/sklearn/, in _fit_transform_one(transformer, X, y, weight, message_clsname, message, params)
   1308 with _print_elapsed_time(message_clsname, message):
   1309     if hasattr(transformer, "fit_transform"):
-> 1310         res = transformer.fit_transform(X, y, **params.get("fit_transform", {}))
   1311     else:
   1312         res =, y, **params.get("fit", {})).transform(
   1313             X, **params.get("transform", {})
   1314         )

File ~/lib/python3.10/site-packages/sklearn/utils/, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    314 @wraps(f)
    315 def wrapped(self, X, *args, **kwargs):
--> 316     data_to_wrap = f(self, X, *args, **kwargs)
    317     if isinstance(data_to_wrap, tuple):
    318         # only wrap the first output for cross decomposition
    319         return_tuple = (
    320             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    321             *data_to_wrap[1:],
    322         )

File ~/lib/python3.10/site-packages/gtda/mapper/utils/, in ListFeatureUnion.fit_transform(self, X, y, **fit_params)
      7 def fit_transform(self, X, y=None, **fit_params):
      8     """Fit all transformers, transform the data and concatenate results.
      9     Parameters
     10     ----------
     20     """
---> 21     results = self._parallel_func(X, y, fit_params, _fit_transform_one)
     22     if not results:
     23         # All transformers are None
     24         return np.zeros((X.shape[0], 0))

File ~/lib/python3.10/site-packages/sklearn/, in FeatureUnion._parallel_func(self, X, y, func, routed_params)
   1754 self._validate_transformer_weights()
   1755 transformers = list(self._iter())
-> 1757 return Parallel(n_jobs=self.n_jobs)(
   1758     delayed(func)(
   1759         transformer,
   1760         X,
   1761         y,
   1762         weight,
   1763         message_clsname="FeatureUnion",
   1764         message=self._log_message(name, idx, len(transformers)),
   1765         params=routed_params[name],
   1766     )
   1767     for idx, (name, transformer, weight) in enumerate(transformers, 1)
   1768 )

File ~/lib/python3.10/site-packages/sklearn/utils/, in Parallel.__call__(self, iterable)
     69 config = get_config()
     70 iterable_with_config = (
     71     (_with_config(delayed_func, config), args, kwargs)
     72     for delayed_func, args, kwargs in iterable
     73 )
---> 74 return super().__call__(iterable_with_config)

File ~/lib/python3.10/site-packages/joblib/, in Parallel.__call__(self, iterable)
   1076 try:
   1077     # Only set self._iterating to True if at least a batch
   1078     # was dispatched. In particular this covers the edge
   1082     # was very quick and its callback already dispatched all the
   1083     # remaining jobs.
   1084     self._iterating = False
-> 1085     if self.dispatch_one_batch(iterator):
   1086         self._iterating = self._original_iterator is not None
   1088     while self.dispatch_one_batch(iterator):

File ~/lib/python3.10/site-packages/joblib/, in Parallel.dispatch_one_batch(self, iterator)
    870 n_jobs = self._cached_effective_n_jobs
    871 big_batch_size = batch_size * n_jobs
--> 873 islice = list(itertools.islice(iterator, big_batch_size))
    874 if len(islice) == 0:
    875     return False

File ~/lib/python3.10/site-packages/sklearn/utils/, in <genexpr>(.0)
     65 # Capture the thread-local scikit-learn configuration at the time
     66 # Parallel.__call__ is issued since the tasks can be dispatched
     67 # in a different thread depending on the backend and on the value of
     68 # pre_dispatch and n_jobs.
     69 config = get_config()
---> 70 iterable_with_config = (
     71     (_with_config(delayed_func, config), args, kwargs)
     72     for delayed_func, args, kwargs in iterable
     73 )
     74 return super().__call__(iterable_with_config)

File ~/lib/python3.10/site-packages/sklearn/, in <genexpr>(.0)
   1754 self._validate_transformer_weights()
   1755 transformers = list(self._iter())
   1757 return Parallel(n_jobs=self.n_jobs)(
   1758     delayed(func)(
   1759         transformer,
   1760         X,
   1761         y,
   1762         weight,
   1763         message_clsname="FeatureUnion",
   1764         message=self._log_message(name, idx, len(transformers)),
-> 1765         params=routed_params[name],
   1766     )
   1767     for idx, (name, transformer, weight) in enumerate(transformers, 1)
   1768 )

TypeError: 'function' object is not subscriptable


import platform; print(platform.platform())


import sys; print("Python", sys.version)

Python 3.10.9 (main, Mar 31 2023, 07:34:00) [GCC 11.3.0]

import numpy; print("NumPy", numpy.version)

NumPy 1.23.5

import scipy; print("SciPy", scipy.version)

SciPy 1.10.1

import joblib; print("Joblib", joblib.version)

SciPy 1.10.1

import sklearn; print("Scikit-learn", sklearn.version)

Scikit-learn 1.5.2

import gtda; print("Giotto-tda", gtda.version)

Giotto-tda 0.6.2

Additional context All other notebooks in the tutorial work and all the tests passed. The only error is this.

I think it is a conflict between make_mapper_pipeline and sklearn.pipeline because of the latter's change of API

I cannot downgrade scikit-learn as I need it for other packages.

I would appreciate it if you can point me to a workaround. I am thinking maybe it is possible to just change a few lines in make_mapper_module for this to work.
