This error is similar to issue #140. It was resolved when I ran a limited set of samples on branch spike-in-tests. Now that I'm trying to run the pipeline with all selection samples (branch finalized-selections), I'm getting a similar error again.
I initially thought it was re-introduced when I updated the pipeline to 2.4.0 (incorporates antibody count thresholds). But the error persisted when I reverted the pipeline, so it probably has to do with the full set of samples I'm running instead.
@jbloom commit for recreating this error is here. Let me know if any additional information would be helpful!
Full error message:
---------------------------------------------------------------------------
Exception encountered at "In [24]":
---------------------------------------------------------------------------
ZeroDivisionError Traceback (most recent call last)
Cell In[24], line 15
1 variant_counts = (
2 variants.variant_count_df[
3 ["library", "sample", "target", "barcode", "count", "aa_substitutions"]
4 ]
5 .merge(
6 barcode_runs.drop(
7 columns=[
8 "fastq_R1",
9 "notes",
10 "antibody_concentration",
11 "exclude_after_counts",
12 ]
13 )
14 )
---> 15 .assign(
16 percent=lambda x: 100
17 * x["count"]
18 / x.groupby(["library_sample", "target"])["count"].transform("sum")
19 )
20 .sort_values("percent", ascending=False)
21 )
File /fh/fast/bloom_j/computational_notebooks/fwelsh/2022/flu_h3_hk19_dms/.snakemake/conda/d40c1eb554a2599b4d0a1fa4a36f5a46_/lib/python3.11/site-packages/pandas/core/frame.py:4889, in DataFrame.assign(self, **kwargs)
4886 data = self.copy()
4888 for k, v in kwargs.items():
-> 4889 data[k] = com.apply_if_callable(v, data)
4890 return data
File /fh/fast/bloom_j/computational_notebooks/fwelsh/2022/flu_h3_hk19_dms/.snakemake/conda/d40c1eb554a2599b4d0a1fa4a36f5a46_/lib/python3.11/site-packages/pandas/core/common.py:374, in apply_if_callable(maybe_callable, obj, **kwargs)
363 """
364 Evaluate possibly callable input using obj and kwargs if it is callable,
365 otherwise return as it is.
(...)
371 **kwargs
372 """
373 if callable(maybe_callable):
--> 374 return maybe_callable(obj, **kwargs)
376 return maybe_callable
Cell In[24], line 16, in <lambda>(x)
1 variant_counts = (
2 variants.variant_count_df[
3 ["library", "sample", "target", "barcode", "count", "aa_substitutions"]
4 ]
5 .merge(
6 barcode_runs.drop(
7 columns=[
8 "fastq_R1",
9 "notes",
10 "antibody_concentration",
11 "exclude_after_counts",
12 ]
13 )
14 )
15 .assign(
---> 16 percent=lambda x: 100
17 * x["count"]
18 / x.groupby(["library_sample", "target"])["count"].transform("sum")
19 )
20 .sort_values("percent", ascending=False)
21 )
File /fh/fast/bloom_j/computational_notebooks/fwelsh/2022/flu_h3_hk19_dms/.snakemake/conda/d40c1eb554a2599b4d0a1fa4a36f5a46_/lib/python3.11/site-packages/pandas/core/ops/common.py:72, in _unpack_zerodim_and_defer.<locals>.new_method(self, other)
68 return NotImplemented
70 other = item_from_zerodim(other)
---> 72 return method(self, other)
File /fh/fast/bloom_j/computational_notebooks/fwelsh/2022/flu_h3_hk19_dms/.snakemake/conda/d40c1eb554a2599b4d0a1fa4a36f5a46_/lib/python3.11/site-packages/pandas/core/arraylike.py:126, in OpsMixin.__truediv__(self, other)
124 @unpack_zerodim_and_defer("__truediv__")
125 def __truediv__(self, other):
--> 126 return self._arith_method(other, operator.truediv)
File /fh/fast/bloom_j/computational_notebooks/fwelsh/2022/flu_h3_hk19_dms/.snakemake/conda/d40c1eb554a2599b4d0a1fa4a36f5a46_/lib/python3.11/site-packages/pandas/core/series.py:6259, in Series._arith_method(self, other, op)
6257 def _arith_method(self, other, op):
6258 self, other = ops.align_method_SERIES(self, other)
-> 6259 return base.IndexOpsMixin._arith_method(self, other, op)
File /fh/fast/bloom_j/computational_notebooks/fwelsh/2022/flu_h3_hk19_dms/.snakemake/conda/d40c1eb554a2599b4d0a1fa4a36f5a46_/lib/python3.11/site-packages/pandas/core/base.py:1325, in IndexOpsMixin._arith_method(self, other, op)
1322 rvalues = ensure_wrapped_if_datetimelike(rvalues)
1324 with np.errstate(all="ignore"):
-> 1325 result = ops.arithmetic_op(lvalues, rvalues, op)
1327 return self._construct_result(result, name=res_name)
File /fh/fast/bloom_j/computational_notebooks/fwelsh/2022/flu_h3_hk19_dms/.snakemake/conda/d40c1eb554a2599b4d0a1fa4a36f5a46_/lib/python3.11/site-packages/pandas/core/ops/array_ops.py:226, in arithmetic_op(left, right, op)
222 _bool_arith_check(op, left, right)
224 # error: Argument 1 to "_na_arithmetic_op" has incompatible type
225 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected "ndarray[Any, Any]"
--> 226 res_values = _na_arithmetic_op(left, right, op) # type: ignore[arg-type]
228 return res_values
File /fh/fast/bloom_j/computational_notebooks/fwelsh/2022/flu_h3_hk19_dms/.snakemake/conda/d40c1eb554a2599b4d0a1fa4a36f5a46_/lib/python3.11/site-packages/pandas/core/ops/array_ops.py:165, in _na_arithmetic_op(left, right, op, is_cmp)
162 func = partial(expressions.evaluate, op)
164 try:
--> 165 result = func(left, right)
166 except TypeError:
167 if not is_cmp and (is_object_dtype(left.dtype) or is_object_dtype(right)):
168 # For object dtype, fallback to a masked operation (only operating
169 # on the non-missing values)
170 # Don't do this for comparisons, as that will handle complex numbers
171 # incorrectly, see GH#32047
File /fh/fast/bloom_j/computational_notebooks/fwelsh/2022/flu_h3_hk19_dms/.snakemake/conda/d40c1eb554a2599b4d0a1fa4a36f5a46_/lib/python3.11/site-packages/pandas/core/computation/expressions.py:241, in evaluate(op, a, b, use_numexpr)
238 if op_str is not None:
239 if use_numexpr:
240 # error: "None" not callable
--> 241 return _evaluate(op, op_str, a, b) # type: ignore[misc]
242 return _evaluate_standard(op, op_str, a, b)
File /fh/fast/bloom_j/computational_notebooks/fwelsh/2022/flu_h3_hk19_dms/.snakemake/conda/d40c1eb554a2599b4d0a1fa4a36f5a46_/lib/python3.11/site-packages/pandas/core/computation/expressions.py:70, in _evaluate_standard(op, op_str, a, b)
68 if _TEST_MODE:
69 _store_test_result(False)
---> 70 return op(a, b)
ZeroDivisionError: division by zero
This error is similar to issue #140. It was resolved when I ran a limited set of samples on branch spike-in-tests. Now that I'm trying to run the pipeline with all selection samples (branch finalized-selections), I'm getting a similar error again.
I initially thought it was re-introduced when I updated the pipeline to 2.4.0 (incorporates antibody count thresholds). But the error persisted when I reverted the pipeline, so it probably has to do with the full set of samples I'm running instead.
@jbloom commit for recreating this error is here. Let me know if any additional information would be helpful!
Full error message: