Open panjames opened 9 years ago
Error output
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-8-7d153f5dee79> in <module>()
----> 1 combat_matrix, samples = combat(get_unique_annotations(labels))
/Users/James/Documents/Brain Analysis/star_api/analysis.pyc in combat(df)
688 df = df.set_index('code')
689
--> 690 combined_matrix = combine_matrix(names)
691 # combined_matrix.to_csv("combined_matrix.csv")
692 m = drop_missing_samples(combined_matrix).dropna()
/Users/James/Documents/Brain Analysis/star_api/analysis.pyc in combine_matrix(names)
662 i += 1
663 print "%s/%s"%(i, len(names)), gse_name, gpl_name
--> 664 median_gene_data = query_median_gene_data(gse_name, gpl_name)
665 if median_gene_data.empty:
666 continue
/Users/James/Documents/Brain Analysis/star_api/analysis.pyc in query_median_gene_data(gse_name, gpl_name)
637 .set_index(['mygene_sym', 'mygene_entrez'])
638 else:
--> 639 gene_data = query_gene_data(gse_name, gpl_name)
640 median_gene_data = gene_data \
641 .reset_index() \
/Users/James/Documents/Brain Analysis/star_api/main.py in query_gene_data(gse_name, gpl_name)
136 series_id = query_record(gse_name, "series", "gse_name")['id']
137 platform_id = query_record(gpl_name, "platform", "gpl_name")['id']
--> 138 gene_data = get_gene_data(series_id, platform_id)
139 gene_data.columns = gene_data.columns + "_" + gpl_name + "_" + gse_name
140 return gene_data
/Users/James/Documents/Brain Analysis/star_api/main.py in get_gene_data(series_id, platform_id)
118
119 def get_gene_data(series_id, platform_id):
--> 120 data = get_data(series_id, platform_id)
121 platform_probes = get_platform_probes(platform_id)
122 gene_data = platform_probes[['mygene_sym', 'mygene_entrez']] \
/Users/James/Documents/Brain Analysis/star_api/main.py in get_data(series_id, platform_id, impute)
83 elif impute:
84 data = impute_data(data)
---> 85 data = log_data(data) #logc
86 data.to_csv("log.data.csv")
87
/Users/James/Documents/Brain Analysis/star_api/main.py in log_data(df)
241
242 def log_data(df):
--> 243 if is_logged(df):
244 return df
245
/Users/James/Documents/Brain Analysis/star_api/main.py in is_logged(df)
250
251 def is_logged(df):
--> 252 return np.max(df.values) < 10
253
254 # def is_logged(data):
/Users/James/anaconda/lib/python2.7/site-packages/numpy/core/fromnumeric.pyc in amax(a, axis, out, keepdims)
2138 else:
2139 return _methods._amax(a, axis=axis,
-> 2140 out=out, keepdims=keepdims)
2141
2142 def amin(a, axis=None, out=None, keepdims=False):
/Users/James/anaconda/lib/python2.7/site-packages/numpy/core/_methods.pyc in _amax(a, axis, out, keepdims)
24 # small reductions
25 def _amax(a, axis=None, out=None, keepdims=False):
---> 26 return umr_maximum(a, axis, None, out, keepdims)
27
28 def _amin(a, axis=None, out=None, keepdims=False):
ValueError: zero-size array to reduction operation maximum which has no identity
FYI GSE68850 seems to be causing this specific problem.
The Series Matrix File for this GSE68850 does not include content between series_matrix_table_begin and series_matrix_table_end, which means there is no expression values in the Series Matrix File. If so, the solution will be check whether or not get the expression profiles of a certain GSE before continue.
Encountered when using: