Starlitnightly / omicverse

A python library for multi omics included bulk, single cell and spatial RNA-seq analysis.
https://starlitnightly.github.io/omicverse/
GNU General Public License v3.0
474 stars 57 forks source link

UnicodeEncodeError using SCSA #214

Open pandaqiuqiu opened 1 week ago

pandaqiuqiu commented 1 week ago

When I use:

anno = scsa.cell_anno(clustertype='res1', cluster='all', rank_rep=True),

the following error appears.

Setting import sys os.environ["PYTHONIOENCODING"] = "utf-8" does not solve it.

Could you provide a solution? Thank you.

version: panda: 1.5.3 omicverse: 1.6.8

{ "name": "UnicodeEncodeError", "message": "'charmap' codec can't encode character '\u2010' in position 3: character maps to ", "stack": "--------------------------------------------------------------------------- UnicodeEncodeError Traceback (most recent call last) Cell In[8], line 1 ----> 1 anno=scsa.cell_anno(clustertype='res1', 2 cluster='all',rank_rep=True)

File E:\software\miniforge3\envs\SCSA\lib\site-packages\omicverse\single\_anno.py:464, in pySCSA.cell_anno(self, clustertype, cluster, rank_rep) 461 print('...Auto annotate cell') 463 p = Process() --> 464 p.run_cmd_p(foldchange=self.foldchange, 465 weight=self.weight, 466 pvalue=self.pvalue, 467 tissue=self.tissue, 468 species=self.species, 469 target=self.target, 470 norefdb=self.norefdb, 471 MarkerDB=None, 472 db=self.model_path, 473 noprint=self.noprint, 474 input=\"temp/rna.csv\", 475 output=self.output, 476 source=\"scanpy\", 477 cluster=cluster, 478 fc=self.foldchange, 479 outfmt=self.outfmt, 480 celltype=self.celltype, 481 Gensymbol=self.Gensymbol, 482 list_tissue=self.list_tissue, 483 cellrange=self.cellrange) 486 result=pd.read_csv('temp/rna_anno.txt',sep='\t') 487 self.result=result

File E:\software\miniforge3\envs\SCSA\lib\site-packages\omicverse\single\_SCSA.py:1387, in Process.run_cmd_p(self, foldchange, weight, pvalue, tissue, species, target, norefdb, MarkerDB, db, noprint, input, output, source, cluster, fc, outfmt, celltype, Gensymbol, list_tissue, cellrange) 1381 anno = Annotator(foldchange,weight, 1382 pvalue,tissue,species, 1383 target,norefdb,MarkerDB,db, 1384 noprint,input,output,source,cluster,fc, 1385 outfmt,celltype,Gensymbol,list_tissue,cellrange) 1386 anno.load_pickle_module(rdbname) -> 1387 outs=anno.run_detail_cmd() 1388 print(\"#Cluster\",\"Type\",\"Celltype\",\"Score\",\"Times\") 1389 for o in outs:

File E:\software\miniforge3\envs\SCSA\lib\site-packages\omicverse\single\_SCSA.py:1283, in Annotator.run_detail_cmd(self) 1281 else: 1282 self.read_user_markers('ensemblID') -> 1283 outs = self.calcu_scanpy_group(self.input,self.Gensymbol) 1284 return outs 1285 elif self.source.lower() == \"scran\":

File E:\software\miniforge3\envs\SCSA\lib\site-packages\omicverse\single\_SCSA.py:633, in Annotator.calcu_scanpy_group(self, expfile, hgvc) 631 if self.output: 632 h_values['Cluster'] = cname --> 633 Annotator.to_output(h_values,self.wb,self.outfmt,cname,\"Cell Type\") 635 #print(h_values) 636 #exit() 637 t,o_str,c,v,times = self.print_class(h_values,cname)

File E:\software\miniforge3\envs\SCSA\lib\site-packages\omicverse\single\_SCSA.py:114, in Annotator.to_output(h_values, wb, outtag, cname, title) 112 h_values.to_excel(wb,sheet_name = \"Cluster \" + cname + \" \" + title,index=False) 113 else: --> 114 h_values.to_csv(wb,sep=\"\t\",quotechar = \"\t\",index=False,header=False) 115 pass

File E:\software\miniforge3\envs\SCSA\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.._deprecate_kwarg..wrapper(*args, *kwargs) 209 else: 210 kwargs[new_arg_name] = new_arg_value --> 211 return func(args, **kwargs)

File E:\software\miniforge3\envs\SCSA\lib\site-packages\pandas\core\generic.py:3720, in NDFrame.to_csv(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options) 3709 df = self if isinstance(self, ABCDataFrame) else self.to_frame() 3711 formatter = DataFrameFormatter( 3712 frame=df, 3713 header=header, (...) 3717 decimal=decimal, 3718 ) -> 3720 return DataFrameRenderer(formatter).to_csv( 3721 path_or_buf, 3722 lineterminator=lineterminator, 3723 sep=sep, 3724 encoding=encoding, 3725 errors=errors, 3726 compression=compression, 3727 quoting=quoting, 3728 columns=columns, 3729 index_label=index_label, 3730 mode=mode, 3731 chunksize=chunksize, 3732 quotechar=quotechar, 3733 date_format=date_format, 3734 doublequote=doublequote, 3735 escapechar=escapechar, 3736 storage_options=storage_options, 3737 )

File E:\software\miniforge3\envs\SCSA\lib\site-packages\pandas\util\_decorators.py:211, in deprecate_kwarg.._deprecate_kwarg..wrapper(*args, *kwargs) 209 else: 210 kwargs[new_arg_name] = new_arg_value --> 211 return func(args, **kwargs)

File E:\software\miniforge3\envs\SCSA\lib\site-packages\pandas\io\formats\format.py:1189, in DataFrameRenderer.to_csv(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, lineterminator, chunksize, date_format, doublequote, escapechar, errors, storage_options) 1168 created_buffer = False 1170 csv_formatter = CSVFormatter( 1171 path_or_buf=path_or_buf, 1172 lineterminator=lineterminator, (...) 1187 formatter=self.fmt, 1188 ) -> 1189 csv_formatter.save() 1191 if created_buffer: 1192 assert isinstance(path_or_buf, StringIO)

File E:\software\miniforge3\envs\SCSA\lib\site-packages\pandas\io\formats\csvs.py:261, in CSVFormatter.save(self) 241 with get_handle( 242 self.filepath_or_buffer, 243 self.mode, (...) 249 250 # Note: self.encoding is irrelevant here 251 self.writer = csvlib.writer( 252 handles.handle, 253 lineterminator=self.lineterminator, (...) 258 quotechar=self.quotechar, 259 ) --> 261 self._save()

File E:\software\miniforge3\envs\SCSA\lib\site-packages\pandas\io\formats\csvs.py:266, in CSVFormatter._save(self) 264 if self._need_to_save_header: 265 self._save_header() --> 266 self._save_body()

File E:\software\miniforge3\envs\SCSA\lib\site-packages\pandas\io\formats\csvs.py:304, in CSVFormatter._save_body(self) 302 if start_i >= end_i: 303 break --> 304 self._save_chunk(start_i, end_i)

File E:\software\miniforge3\envs\SCSA\lib\site-packages\pandas\io\formats\csvs.py:315, in CSVFormatter._save_chunk(self, start_i, end_i) 312 data = [res.iget_values(i) for i in range(len(res.items))] 314 ix = self.data_index[slicer]._format_native_types(**self._number_format) --> 315 libwriters.write_csv_rows( 316 data, 317 ix, 318 self.nlevels, 319 self.cols, 320 self.writer, 321 )

File E:\software\miniforge3\envs\SCSA\lib\site-packages\pandas\_libs\writers.pyx:72, in pandas._libs.writers.write_csv_rows()

File E:\software\miniforge3\envs\SCSA\lib\encodings\cp1252.py:19, in IncrementalEncoder.encode(self, input, final) 18 def encode(self, input, final=False): ---> 19 return codecs.charmap_encode(input,self.errors,encoding_table)[0]

UnicodeEncodeError: 'charmap' codec can't encode character '\u2010' in position 3: character maps to " }

Starlitnightly commented 2 days ago

If you want to use SCSA, you need to install pandas==1.5.3