Closed joaquincr closed 6 years ago
git pull; sudo -H pip3 install .
gpseqc_estimate
gpseqc_compare
gpseqc_estimate '/home/bicro/Desktop/user_folders_HD2/Quim/1min_exclusion/1Mb/BICRO55/input/TK94_5min_GG__cutsiteLoc-umiCount.bed' '/home/bicro/Desktop/user_folders_HD2/Quim/1min_exclusion/1Mb/BICRO55/input/TK95_10min_GG__cutsiteLoc-umiCount.bed' '/home/bicro/Desktop/user_folders_HD2/Quim/1min_exclusion/1Mb/BICRO55/input/TK96_15min_GG__cutsiteLoc-umiCount.bed' '/home/bicro/Desktop/user_folders_HD2/Quim/1min_exclusion/1Mb/BICRO55/input/TK97_30min_GG__cutsiteLoc-umiCount.bed' '/home/bicro/Desktop/user_folders_HD2/Quim/1min_exclusion/1Mb/BICRO55/input/TK98_on_GG__cutsiteLoc-umiCount.bed' -o '/home/bicro/Desktop/user_folders_HD2/Quim/1min_exclusion/1Mb/BICRO55/output' -s 1000000 -r BICRO55_excluding_1min_1MB_allMetrics -t 7
Estimating centrality... [Parallel(n_jobs=7)]: Batch computation too fast (0.0495s.) Setting batch_size=8. [Parallel(n_jobs=7)]: Done 4 tasks | elapsed: 0.1s [Parallel(n_jobs=7)]: Done 11 tasks | elapsed: 0.1s [Parallel(n_jobs=7)]: Done 46 tasks | elapsed: 0.5s [Parallel(n_jobs=7)]: Done 118 tasks | elapsed: 0.9s [Parallel(n_jobs=7)]: Done 190 tasks | elapsed: 1.3s [Parallel(n_jobs=7)]: Done 278 tasks | elapsed: 1.7s [Parallel(n_jobs=7)]: Done 366 tasks | elapsed: 2.2s [Parallel(n_jobs=7)]: Done 470 tasks | elapsed: 2.9s [Parallel(n_jobs=7)]: Done 574 tasks | elapsed: 3.6s [Parallel(n_jobs=7)]: Done 694 tasks | elapsed: 4.2s [Parallel(n_jobs=7)]: Done 814 tasks | elapsed: 5.0s [Parallel(n_jobs=7)]: Done 950 tasks | elapsed: 5.7s [Parallel(n_jobs=7)]: Done 1086 tasks | elapsed: 6.5s [Parallel(n_jobs=7)]: Done 1238 tasks | elapsed: 7.4s [Parallel(n_jobs=7)]: Done 1390 tasks | elapsed: 8.2s [Parallel(n_jobs=7)]: Done 1558 tasks | elapsed: 9.1s [Parallel(n_jobs=7)]: Done 1726 tasks | elapsed: 10.2s [Parallel(n_jobs=7)]: Done 1910 tasks | elapsed: 11.1s [Parallel(n_jobs=7)]: Done 2094 tasks | elapsed: 12.2s [Parallel(n_jobs=7)]: Done 2294 tasks | elapsed: 13.3s [Parallel(n_jobs=7)]: Done 2494 tasks | elapsed: 14.5s [Parallel(n_jobs=7)]: Done 2710 tasks | elapsed: 15.6s multiprocessing.pool.RemoteTraceback: """ Traceback (most recent call last): File "/usr/local/lib/python3.5/dist-packages/joblib/_parallel_backends.py", line 350, in __call__ return self.func(*args, **kwargs) File "/usr/local/lib/python3.5/dist-packages/joblib/parallel.py", line 131, in __call__ return [func(*args, **kwargs) for func, args, kwargs in self.items] File "/usr/local/lib/python3.5/dist-packages/joblib/parallel.py", line 131, in <listcomp> return [func(*args, **kwargs) for func, args, kwargs in self.items] File "/usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py", line 243, in bin_estimate_single orow[m] = est_2p(st, calc_p, lambda x, y: x / y) File "/usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py", line 123, in est_2p a = f1(st, 0) File "/usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py", line 31, in calc_p row = st.iloc[ci, :] File "/usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py", line 1367, in __getitem__ return self._getitem_tuple(key) File "/usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py", line 1737, in _getitem_tuple self._has_valid_tuple(tup) File "/usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py", line 203, in _has_valid_tuple raise IndexingError('Too many indexers') pandas.core.indexing.IndexingError: Too many indexers During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/lib/python3.5/multiprocessing/pool.py", line 119, in worker result = (True, func(*args, **kwds)) File "/usr/local/lib/python3.5/dist-packages/joblib/_parallel_backends.py", line 359, in __call__ raise TransportableException(text, e_type) joblib.my_exceptions.TransportableException: TransportableException ___________________________________________________________________________ IndexingError Tue Apr 24 13:48:52 2018 PID: 29536 Python 3.5.2: /usr/bin/python3 ........................................................................... /usr/local/lib/python3.5/dist-packages/joblib/parallel.py in __call__(self=<joblib.parallel.BatchedCalls object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] self.items = [(<function bin_estimate_single>, (2870, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2871, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2872, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2873, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2874, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2875, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2876, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2877, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {})] 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /usr/local/lib/python3.5/dist-packages/joblib/parallel.py in <listcomp>(.0=<list_iterator object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] func = <function bin_estimate_single> args = (2877, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']) kwargs = {} 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py in bin_estimate_single(i=2877, df= chrom start end sum ...2 735658 4 [11504 rows x 9 columns], mlist=['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']) 238 239 # Calculate requested metrics 240 for m in mlist: 241 # Probability 242 if m == "prob_2p": # two-points --> 243 orow[m] = est_2p(st, calc_p, lambda x, y: x / y) orow = 'chr6' m = 'prob_2p' st = 0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object 244 elif m == "prob_f": # fixed 245 orow[m] = est_f(st, calc_p, lambda x, y: x / y) 246 elif m == "prob_g": # global 247 orow[m] = est_g(st, calc_p, lambda x, y: x / y) ........................................................................... /usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py in est_2p(st=0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object, f1=<function calc_p>, f2=<function bin_estimate_single.<locals>.<lambda>>) 118 f2 (fun): function for putting conditions together. 119 120 Returns: 121 Estimated centrality. 122 ''' --> 123 a = f1(st, 0) a = undefined f1 = <function calc_p> st = 0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object 124 b = f1(st, st.shape[0] - 1) 125 return(f2(b, a)) 126 127 def est_f(st, f1, f2): ........................................................................... /usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py in calc_p(st=0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object, ci=0) 26 27 Returns: 28 float 29 ''' 30 assert ci < st.shape[0], "requested condition (index) not found." ---> 31 row = st.iloc[ci, :] row = undefined st.iloc = <pandas.core.indexing._iLocIndexer object> ci = 0 32 p = (row['cond_nreads'] * row['count']) 33 p = row['sum'] / p if 0 != p else np.nan 34 return(p) 35 ........................................................................... /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py in __getitem__(self=<pandas.core.indexing._iLocIndexer object>, key=(0, slice(None, None, None))) 1362 try: 1363 if self._is_scalar_access(key): 1364 return self._getitem_scalar(key) 1365 except (KeyError, IndexError): 1366 pass -> 1367 return self._getitem_tuple(key) self._getitem_tuple = <bound method _iLocIndexer._getitem_tuple of <pandas.core.indexing._iLocIndexer object>> key = (0, slice(None, None, None)) 1368 else: 1369 # we by definition only have the 0th axis 1370 axis = self.axis or 0 1371 ........................................................................... /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py in _getitem_tuple(self=<pandas.core.indexing._iLocIndexer object>, tup=(0, slice(None, None, None))) 1732 1733 return True 1734 1735 def _getitem_tuple(self, tup): 1736 -> 1737 self._has_valid_tuple(tup) self._has_valid_tuple = <bound method _NDFrameIndexer._has_valid_tuple of <pandas.core.indexing._iLocIndexer object>> tup = (0, slice(None, None, None)) 1738 try: 1739 return self._getitem_lowerdim(tup) 1740 except: 1741 pass ........................................................................... /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py in _has_valid_tuple(self=<pandas.core.indexing._iLocIndexer object>, key=(0, slice(None, None, None))) 198 199 def _has_valid_tuple(self, key): 200 """ check the key for valid keys across my indexer """ 201 for i, k in enumerate(key): 202 if i >= self.obj.ndim: --> 203 raise IndexingError('Too many indexers') 204 if not self._has_valid_type(k, i): 205 raise ValueError("Location based indexing can only have " 206 "[{types}] types" 207 .format(types=self._valid_types)) IndexingError: Too many indexers ___________________________________________________________________________ """ The above exception was the direct cause of the following exception: Traceback (most recent call last): File "/usr/local/lib/python3.5/dist-packages/joblib/parallel.py", line 699, in retrieve self._output.extend(job.get(timeout=self.timeout)) File "/usr/lib/python3.5/multiprocessing/pool.py", line 608, in get raise self._value joblib.my_exceptions.TransportableException: TransportableException ___________________________________________________________________________ IndexingError Tue Apr 24 13:48:52 2018 PID: 29536 Python 3.5.2: /usr/bin/python3 ........................................................................... /usr/local/lib/python3.5/dist-packages/joblib/parallel.py in __call__(self=<joblib.parallel.BatchedCalls object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] self.items = [(<function bin_estimate_single>, (2870, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2871, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2872, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2873, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2874, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2875, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2876, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2877, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {})] 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /usr/local/lib/python3.5/dist-packages/joblib/parallel.py in <listcomp>(.0=<list_iterator object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] func = <function bin_estimate_single> args = (2877, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']) kwargs = {} 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py in bin_estimate_single(i=2877, df= chrom start end sum ...2 735658 4 [11504 rows x 9 columns], mlist=['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']) 238 239 # Calculate requested metrics 240 for m in mlist: 241 # Probability 242 if m == "prob_2p": # two-points --> 243 orow[m] = est_2p(st, calc_p, lambda x, y: x / y) orow = 'chr6' m = 'prob_2p' st = 0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object 244 elif m == "prob_f": # fixed 245 orow[m] = est_f(st, calc_p, lambda x, y: x / y) 246 elif m == "prob_g": # global 247 orow[m] = est_g(st, calc_p, lambda x, y: x / y) ........................................................................... /usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py in est_2p(st=0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object, f1=<function calc_p>, f2=<function bin_estimate_single.<locals>.<lambda>>) 118 f2 (fun): function for putting conditions together. 119 120 Returns: 121 Estimated centrality. 122 ''' --> 123 a = f1(st, 0) a = undefined f1 = <function calc_p> st = 0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object 124 b = f1(st, st.shape[0] - 1) 125 return(f2(b, a)) 126 127 def est_f(st, f1, f2): ........................................................................... /usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py in calc_p(st=0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object, ci=0) 26 27 Returns: 28 float 29 ''' 30 assert ci < st.shape[0], "requested condition (index) not found." ---> 31 row = st.iloc[ci, :] row = undefined st.iloc = <pandas.core.indexing._iLocIndexer object> ci = 0 32 p = (row['cond_nreads'] * row['count']) 33 p = row['sum'] / p if 0 != p else np.nan 34 return(p) 35 ........................................................................... /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py in __getitem__(self=<pandas.core.indexing._iLocIndexer object>, key=(0, slice(None, None, None))) 1362 try: 1363 if self._is_scalar_access(key): 1364 return self._getitem_scalar(key) 1365 except (KeyError, IndexError): 1366 pass -> 1367 return self._getitem_tuple(key) self._getitem_tuple = <bound method _iLocIndexer._getitem_tuple of <pandas.core.indexing._iLocIndexer object>> key = (0, slice(None, None, None)) 1368 else: 1369 # we by definition only have the 0th axis 1370 axis = self.axis or 0 1371 ........................................................................... /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py in _getitem_tuple(self=<pandas.core.indexing._iLocIndexer object>, tup=(0, slice(None, None, None))) 1732 1733 return True 1734 1735 def _getitem_tuple(self, tup): 1736 -> 1737 self._has_valid_tuple(tup) self._has_valid_tuple = <bound method _NDFrameIndexer._has_valid_tuple of <pandas.core.indexing._iLocIndexer object>> tup = (0, slice(None, None, None)) 1738 try: 1739 return self._getitem_lowerdim(tup) 1740 except: 1741 pass ........................................................................... /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py in _has_valid_tuple(self=<pandas.core.indexing._iLocIndexer object>, key=(0, slice(None, None, None))) 198 199 def _has_valid_tuple(self, key): 200 """ check the key for valid keys across my indexer """ 201 for i, k in enumerate(key): 202 if i >= self.obj.ndim: --> 203 raise IndexingError('Too many indexers') 204 if not self._has_valid_type(k, i): 205 raise ValueError("Location based indexing can only have " 206 "[{types}] types" 207 .format(types=self._valid_types)) IndexingError: Too many indexers ___________________________________________________________________________ During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/usr/local/bin/gpseqc_estimate", line 504, in <module> est = centrality.bin_estimate_parallel(comb, toCalc, args.threads) File "/usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py", line 213, in bin_estimate_parallel for i in list(set(df.index))) File "/usr/local/lib/python3.5/dist-packages/joblib/parallel.py", line 789, in __call__ self.retrieve() File "/usr/local/lib/python3.5/dist-packages/joblib/parallel.py", line 740, in retrieve raise exception joblib.my_exceptions.JoblibIndexingError: JoblibIndexingError ___________________________________________________________________________ Multiprocessing exception: ........................................................................... /usr/local/bin/gpseqc_estimate in <module>() 499 500 # Estimate centrality of each bin 501 if 1 == args.threads: 502 est = centrality.bin_estimate(comb, toCalc) 503 else: --> 504 est = centrality.bin_estimate_parallel(comb, toCalc, args.threads) 505 df_saveas(est, "estimated.%s.tsv" % descr, args) 506 507 # (10) Rank bins --------------------------------------------------------------- 508 print("Ranking bins...") ........................................................................... /usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py in bin_estimate_parallel(df= chrom start end sum ...2 735658 4 [11504 rows x 9 columns], mlist=['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f'], threads=7, progress=True) 208 verbose = 10 if progress else 0 209 210 # Iterate over bins 211 odf = Parallel(n_jobs = threads, verbose = verbose)( 212 delayed(bin_estimate_single)(i, df, mlist) --> 213 for i in list(set(df.index))) df.index = Int64Index([ 370, 2325, 2323, 2328, 2428, 2425, ...9, 142], dtype='int64', length=11504) 214 215 # Assemble output 216 odf = pd.concat(odf, axis = 1).transpose() 217 columns = ['chrom', 'start', 'end'] ........................................................................... /usr/local/lib/python3.5/dist-packages/joblib/parallel.py in __call__(self=Parallel(n_jobs=7), iterable=<generator object bin_estimate_parallel.<locals>.<genexpr>>) 784 if pre_dispatch == "all" or n_jobs == 1: 785 # The iterable was consumed all at once by the above for loop. 786 # No need to wait for async callbacks to trigger to 787 # consumption. 788 self._iterating = False --> 789 self.retrieve() self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=7)> 790 # Make sure that we get a last message telling us we are done 791 elapsed_time = time.time() - self._start_time 792 self._print('Done %3i out of %3i | elapsed: %s finished', 793 (len(self._output), len(self._output), --------------------------------------------------------------------------- Sub-process traceback: --------------------------------------------------------------------------- IndexingError Tue Apr 24 13:48:52 2018 PID: 29536 Python 3.5.2: /usr/bin/python3 ........................................................................... /usr/local/lib/python3.5/dist-packages/joblib/parallel.py in __call__(self=<joblib.parallel.BatchedCalls object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] self.items = [(<function bin_estimate_single>, (2870, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2871, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2872, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2873, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2874, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2875, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2876, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {}), (<function bin_estimate_single>, (2877, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']), {})] 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /usr/local/lib/python3.5/dist-packages/joblib/parallel.py in <listcomp>(.0=<list_iterator object>) 126 def __init__(self, iterator_slice): 127 self.items = list(iterator_slice) 128 self._size = len(self.items) 129 130 def __call__(self): --> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] func = <function bin_estimate_single> args = (2877, chrom start end sum ...2 735658 4 [11504 rows x 9 columns], ['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']) kwargs = {} 132 133 def __len__(self): 134 return self._size 135 ........................................................................... /usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py in bin_estimate_single(i=2877, df= chrom start end sum ...2 735658 4 [11504 rows x 9 columns], mlist=['prob_2p', 'prob_f', 'prob_g', 'cor_2p', 'cor_f', 'cor_g', 'roc_2p', 'roc_f', 'roc_g', 'var_2p', 'var_f', 'ff_2p', 'ff_f', 'cv_2p', 'cv_f']) 238 239 # Calculate requested metrics 240 for m in mlist: 241 # Probability 242 if m == "prob_2p": # two-points --> 243 orow[m] = est_2p(st, calc_p, lambda x, y: x / y) orow = 'chr6' m = 'prob_2p' st = 0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object 244 elif m == "prob_f": # fixed 245 orow[m] = est_f(st, calc_p, lambda x, y: x / y) 246 elif m == "prob_g": # global 247 orow[m] = est_g(st, calc_p, lambda x, y: x / y) ........................................................................... /usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py in est_2p(st=0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object, f1=<function calc_p>, f2=<function bin_estimate_single.<locals>.<lambda>>) 118 f2 (fun): function for putting conditions together. 119 120 Returns: 121 Estimated centrality. 122 ''' --> 123 a = f1(st, 0) a = undefined f1 = <function calc_p> st = 0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object 124 b = f1(st, st.shape[0] - 1) 125 return(f2(b, a)) 126 127 def est_f(st, f1, f2): ........................................................................... /usr/local/lib/python3.5/dist-packages/gpseqc/centrality.py in calc_p(st=0 chr6 1 87000000 2 88000000 3 ... 596776 8 3 Name: 2877, dtype: object, ci=0) 26 27 Returns: 28 float 29 ''' 30 assert ci < st.shape[0], "requested condition (index) not found." ---> 31 row = st.iloc[ci, :] row = undefined st.iloc = <pandas.core.indexing._iLocIndexer object> ci = 0 32 p = (row['cond_nreads'] * row['count']) 33 p = row['sum'] / p if 0 != p else np.nan 34 return(p) 35 ........................................................................... /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py in __getitem__(self=<pandas.core.indexing._iLocIndexer object>, key=(0, slice(None, None, None))) 1362 try: 1363 if self._is_scalar_access(key): 1364 return self._getitem_scalar(key) 1365 except (KeyError, IndexError): 1366 pass -> 1367 return self._getitem_tuple(key) self._getitem_tuple = <bound method _iLocIndexer._getitem_tuple of <pandas.core.indexing._iLocIndexer object>> key = (0, slice(None, None, None)) 1368 else: 1369 # we by definition only have the 0th axis 1370 axis = self.axis or 0 1371 ........................................................................... /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py in _getitem_tuple(self=<pandas.core.indexing._iLocIndexer object>, tup=(0, slice(None, None, None))) 1732 1733 return True 1734 1735 def _getitem_tuple(self, tup): 1736 -> 1737 self._has_valid_tuple(tup) self._has_valid_tuple = <bound method _NDFrameIndexer._has_valid_tuple of <pandas.core.indexing._iLocIndexer object>> tup = (0, slice(None, None, None)) 1738 try: 1739 return self._getitem_lowerdim(tup) 1740 except: 1741 pass ........................................................................... /usr/local/lib/python3.5/dist-packages/pandas/core/indexing.py in _has_valid_tuple(self=<pandas.core.indexing._iLocIndexer object>, key=(0, slice(None, None, None))) 198 199 def _has_valid_tuple(self, key): 200 """ check the key for valid keys across my indexer """ 201 for i, k in enumerate(key): 202 if i >= self.obj.ndim: --> 203 raise IndexingError('Too many indexers') 204 if not self._has_valid_type(k, i): 205 raise ValueError("Location based indexing can only have " 206 "[{types}] types" 207 .format(types=self._valid_types)) IndexingError: Too many indexers
Hi, thanks for the report! It was due to empty bins being erroneously skipped during step 6 of gpseqc_estimate. Fixed in v2.0.2 together with some other minor fixes.
Before submitting an issue, please be sure to
git pull; sudo -H pip3 install .
)This issue affects
gpseqc_estimate
)gpseqc_compare
)What did you do (e.g., steps to reproduce)
What did you expect to happen?
What happened instead?
Additional information