Open ebolyen opened 6 years ago
A user just reported this in Qiita and I was trying to reproduce with a smaller subset (the meta-analysis files are large) but couldn't; which is pretty intriguing.
A "simple" test should be something like:
(qiime2-2020.2) 14:06:25 test$ curl -sL "https://data.qiime2.org/2020.2/tutorials/moving-pictures/sample_metadata.tsv" > "sample-metadata.tsv"
(qiime2-2020.2) 14:06:26 test$ curl -sL "https://docs.qiime2.org/2020.2/data/tutorials/moving-pictures/table.qza" > table.qza
(qiime2-2020.2) 14:06:26 test$ cat sample-metadata.tsv | grep -v types | sed 's/year/depth/g' > sample-metadata.txt
(qiime2-2020.2) 14:06:26 test$ ipython
Python 3.6.7 | packaged by conda-forge | (default, Feb 28 2019, 02:16:08)
Type 'copyright', 'credits' or 'license' for more information
IPython 7.12.0 -- An enhanced Interactive Python. Type '?' for help.
In [1]:
...: import qiime2
...:
...: pm = qiime2.sdk.PluginManager()
...: method = pm.plugins['diversity'].actions['alpha_rarefaction']
...:
...: parameters = {
...: 'iterations': 10,
...: 'steps': 10,
...: 'min_depth': 1,
...: 'metrics': {'observed_otus'},
...: 'max_depth': 100,
...: 'metadata': qiime2.Metadata.load('sample-metadata.tsv'),
...: 'table': qiime2.Artifact.load('table.qza')
...: }
...: results = method(**parameters)
Duplicate key in file '/Users/antoniog/.matplotlib/matplotlibrc' line #3.
In [2]:
...: parameters = {
...: 'iterations': 10,
...: 'steps': 10,
...: 'min_depth': 1,
...: 'metrics': {'observed_otus'},
...: 'max_depth': 100,
...: 'metadata': qiime2.Metadata.load('sample-metadata.txt'),
...: 'table': qiime2.Artifact.load('table.qza')
...: }
...: results = method(**parameters)
In [3]:
but it works fine.
The error I'm getting is:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-23-4895d7372df2> in <module>
----> 1 results = method(**q2params)
</home/qiita/miniconda3/envs/qiime2.2019.10/lib/python3.6/site-packages/decorator.py:decorator-gen-425> in alpha_rarefaction(table, max_depth, phylogeny, metrics, metadata, min_depth, steps, iterations)
~/miniconda3/envs/qiime2.2019.10/lib/python3.6/site-packages/qiime2/sdk/action.py in bound_callable(*args, **kwargs)
238 # Execute
239 outputs = self._callable_executor_(scope, callable_args,
--> 240 output_types, provenance)
241
242 if len(outputs) != len(self.signature.outputs):
~/miniconda3/envs/qiime2.2019.10/lib/python3.6/site-packages/qiime2/sdk/action.py in _callable_executor_(self, scope, view_args, output_types, provenance)
443 # will also need to be updated to support OutPath instead of str.
444 with tempfile.TemporaryDirectory(prefix='qiime2-temp-') as temp_dir:
--> 445 ret_val = self._callable(output_dir=temp_dir, **view_args)
446 if ret_val is not None:
447 raise TypeError(
~/miniconda3/envs/qiime2.2019.10/lib/python3.6/site-packages/q2_diversity/_alpha/_visualizer.py in alpha_rarefaction(output_dir, table, max_depth, phylogeny, metrics, metadata, min_depth, steps, iterations)
376 columns,
377 merged)
--> 378 c_df = _compute_summary(reindexed_df, column, counts=counts)
379 jsonp_filename = "%s-%s.jsonp" % (metric_name, column_name)
380 _alpha_rarefaction_jsonp(output_dir, jsonp_filename,
~/miniconda3/envs/qiime2.2019.10/lib/python3.6/site-packages/q2_diversity/_alpha/_visualizer.py in _compute_summary(data, id_label, counts)
264 # passed counts)
265 summary_df['count'] = 1
--> 266 summary_df = summary_df.reset_index()
267 summary_df.rename(columns={'level_0': id_label}, inplace=True)
268 return summary_df
~/miniconda3/envs/qiime2.2019.10/lib/python3.6/site-packages/pandas/core/frame.py in reset_index(self, level, drop, inplace, col_level, col_fill)
4707 # to ndarray and maybe infer different dtype
4708 level_values = _maybe_casted_values(lev, lab)
-> 4709 new_obj.insert(0, name, level_values)
4710
4711 new_obj.index = new_index
~/miniconda3/envs/qiime2.2019.10/lib/python3.6/site-packages/pandas/core/frame.py in insert(self, loc, column, value, allow_duplicates)
3589 self._ensure_valid_index(value)
3590 value = self._sanitize_column(column, value, broadcast=False)
-> 3591 self._data.insert(loc, column, value, allow_duplicates=allow_duplicates)
3592
3593 def assign(self, **kwargs):
~/miniconda3/envs/qiime2.2019.10/lib/python3.6/site-packages/pandas/core/internals/managers.py in insert(self, loc, item, value, allow_duplicates)
1171 if not allow_duplicates and item in self.items:
1172 # Should this be a different kind of error??
-> 1173 raise ValueError("cannot insert {}, already exists".format(item))
1174
1175 if not isinstance(loc, int):
ValueError: cannot insert depth, already exists
Any thoughts?
I was able to create a smaller subset to reproduce the error; files here.
Code:
In [1]:
...: import qiime2
...:
...: pm = qiime2.sdk.PluginManager()
...: method = pm.plugins['diversity'].actions['alpha_rarefaction']
...:
...: parameters = {
...: 'iterations': 10,
...: 'steps': 10,
...: 'min_depth': 1,
...: 'metrics': {'observed_otus'},
...: 'max_depth': 100,
...: 'metadata': qiime2.Metadata.load('map_no_depth.txt'),
...: 'table': qiime2.Artifact.load('3_samples.qza')
...: }
...: results = method(**parameters)
...:
...: parameters = {
...: 'iterations': 10,
...: 'steps': 10,
...: 'min_depth': 1,
...: 'metrics': {'observed_otus'},
...: 'max_depth': 100,
...: 'metadata': qiime2.Metadata.load('map.txt'),
...: 'table': qiime2.Artifact.load('3_samples.qza')
...: }
...: results = method(**parameters)
Duplicate key in file '/Users/antoniog/.matplotlib/matplotlibrc' line #3.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-1-40fda68011fa> in <module>
24 'table': qiime2.Artifact.load('3_samples.qza')
25 }
---> 26 results = method(**parameters)
</Users/antoniog/miniconda3/envs/qiime2-2020.2/lib/python3.6/site-packages/decorator.py:decorator-gen-423> in alpha_rarefaction(table, max_depth, phylogeny, metrics, metadata, min_depth, steps, iterations)
~/miniconda3/envs/qiime2-2020.2/lib/python3.6/site-packages/qiime2/sdk/action.py in bound_callable(*args, **kwargs)
243 # Execute
244 outputs = self._callable_executor_(scope, callable_args,
--> 245 output_types, provenance)
246
247 if len(outputs) != len(self.signature.outputs):
~/miniconda3/envs/qiime2-2020.2/lib/python3.6/site-packages/qiime2/sdk/action.py in _callable_executor_(self, scope, view_args, output_types, provenance)
450 # will also need to be updated to support OutPath instead of str.
451 with tempfile.TemporaryDirectory(prefix='qiime2-temp-') as temp_dir:
--> 452 ret_val = self._callable(output_dir=temp_dir, **view_args)
453 if ret_val is not None:
454 raise TypeError(
~/miniconda3/envs/qiime2-2020.2/lib/python3.6/site-packages/q2_diversity/_alpha/_visualizer.py in alpha_rarefaction(output_dir, table, max_depth, phylogeny, metrics, metadata, min_depth, steps, iterations)
379 columns,
380 merged)
--> 381 c_df = _compute_summary(reindexed_df, column, counts=counts)
382 jsonp_filename = "%s-%s.jsonp" % (metric_name, column_name)
383 _alpha_rarefaction_jsonp(output_dir, jsonp_filename,
~/miniconda3/envs/qiime2-2020.2/lib/python3.6/site-packages/q2_diversity/_alpha/_visualizer.py in _compute_summary(data, id_label, counts)
267 # passed counts)
268 summary_df['count'] = 1
--> 269 summary_df = summary_df.reset_index()
270 summary_df.rename(columns={'level_0': id_label}, inplace=True)
271 return summary_df
~/miniconda3/envs/qiime2-2020.2/lib/python3.6/site-packages/pandas/core/frame.py in reset_index(self, level, drop, inplace, col_level, col_fill)
4707 # to ndarray and maybe infer different dtype
4708 level_values = _maybe_casted_values(lev, lab)
-> 4709 new_obj.insert(0, name, level_values)
4710
4711 new_obj.index = new_index
~/miniconda3/envs/qiime2-2020.2/lib/python3.6/site-packages/pandas/core/frame.py in insert(self, loc, column, value, allow_duplicates)
3589 self._ensure_valid_index(value)
3590 value = self._sanitize_column(column, value, broadcast=False)
-> 3591 self._data.insert(loc, column, value, allow_duplicates=allow_duplicates)
3592
3593 def assign(self, **kwargs):
~/miniconda3/envs/qiime2-2020.2/lib/python3.6/site-packages/pandas/core/internals/managers.py in insert(self, loc, item, value, allow_duplicates)
1171 if not allow_duplicates and item in self.items:
1172 # Should this be a different kind of error??
-> 1173 raise ValueError("cannot insert {}, already exists".format(item))
1174
1175 if not isinstance(loc, int):
ValueError: cannot insert depth, already exists
Bug Description Something is up with the dataframe index shuffling. I suspect there was a metadata column named "depth", but a cursory look at the code didn't really explain why that would matter.
Screenshots Stack trace:
References forum x-ref