Closed hyjforesight closed 2 years ago
Hi, I am also facing this issue, it seems like the loom file generated from aucell CLI is incompatible with the downstream analysis in jupyter notebook. When I did the integration (last part of the 1st tutorial):
# collect SCENIC AUCell output
lf = lp.connect( f_pyscenic_output, mode='r+', validate=False )
auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)
lf.close()
It manages to run, but if I remove validate=False
, an error is shown:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_9749/3408559978.py in <module>
4
5 # collect SCENIC AUCell output
----> 6 lf = lp.connect( f_pyscenic_output, mode='r+')
7 auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)
8 lf.close()
~/mambaforge/envs/pyscenic/lib/python3.7/site-packages/loompy/loompy.py in connect(filename, mode, validate, spec_version)
1387 Note: if validation is requested, an exception is raised if validation fails.
1388 """
-> 1389 return LoomConnection(filename, mode, validate=validate)
~/mambaforge/envs/pyscenic/lib/python3.7/site-packages/loompy/loompy.py in __init__(self, filename, mode, validate)
80 lv = loompy.LoomValidator()
81 if not lv.validate(filename):
---> 82 raise ValueError("\n".join(lv.errors) + f"\n{filename} does not appead to be a valid Loom file according to Loom spec version '{lv.version}'")
83
84 self._file = h5py.File(filename, mode)
ValueError: Row attribute 'Regulons' dtype [('AHR(+)', '<i8'), ('AR(+)', '<i8'), ('ARNT(+)', '<i8'), ('ARNTL(+)', '<i8'), ('ARNTL2(+)', '<i8'), ('ATF1(+)', '<i8'), ('ATF2(+)', '<i8'), ('ATF3(+)', '<i8'), ('ATF4(+)', '<i8'), ('ATF5(+)', '<i8'), ('ATF6(+)', '<i8'), ('ATF6B(+)', '<i8'), ('ATF7(+)', '<i8'), ('BACH1(+)', '<i8'), ('BACH2(+)', '<i8'), ('BATF(+)', '<i8'), ('BATF3(+)', '<i8'), ('BCL11A(+)', '<i8'), ('BCL3(+)', '<i8'), ('BCLAF1(+)', '<i8'), ('BHLHE40(+)', '<i8'), ('BHLHE41(+)', '<i8'), ('BRCA1(+)', '<i8'), ('BRF1(+)', '<i8'), ('BRF2(+)', '<i8'), ('CBX3(+)', '<i8'), ('CEBPA(+)', '<i8'), ('CEBPB(+)', '<i8'), ('CEBPD(+)', '<i8'), ('CEBPE(+)', '<i8'), ('CEBPG(+)', '<i8'), ('CEBPZ(+)', '<i8'), ('CERS5(+)', '<i8'), ('CHD2(+)', '<i8'), ('CHURC1(+)', '<i8'), ('CIC(+)', '<i8'), ('CLOCK(+)', '<i8'), ('CNOT4(+)', '<i8'), ('CPSF4(+)', '<i8'), ('CREB1(+)', '<i8'), ('CREB3(+)', '<i8'), ('CREB3L2(+)', '<i8'), ('CREB3L4(+)', '<i8'), ('CREB5(+)', '<i8'), ('CREM(+)', '<i8'), ('CTCF(+)', '<i8'), ('CUX1(+)', '<i8'), ('DBP(+)', '<i8'), ('E2F1(+)', '<i8'), ('E2F2(+)', '<i8'), ('E2F3(+)', '<i8'), ('E2F4(+)', '<i8'), ('E2F6(+)', '<i8'), ('E4F1(+)', '<i8'), ('EBF1(+)', '<i8'), ('EGR1(+)', '<i8'), ('EGR2(+)', '<i8'), ('EGR3(+)', '<i8'), ('ELF1(+)', '<i8'), ('ELF2(+)', '<i8'), ('ELF4(+)', '<i8'), ('ELK1(+)', '<i8'), ('ELK3(+)', '<i8'), ('ELK4(+)', '<i8'), ('EOMES(+)', '<i8'), ('EP300(+)', '<i8'), ('ERF(+)', '<i8'), ('ERG(+)', '<i8'), ('ESR1(+)', '<i8'), ('ESR2(+)', '<i8'), ('ESRRA(+)', '<i8'), ('ETS1(+)', '<i8'), ('ETS2(+)', '<i8'), ('ETV2(+)', '<i8'), ('ETV3(+)', '<i8'), ('ETV5(+)', '<i8'), ('ETV6(+)', '<i8'), ('ETV7(+)', '<i8'), ('EZH2(+)', '<i8'), ('FIZ1(+)', '<i8'), ('FLI1(+)', '<i8'), ('FOS(+)', '<i8'), ('FOSB(+)', '<i8'), ('FOSL1(+)', '<i8'), ('FOSL2(+)', '<i8'), ('FOXD2(+)', '<i8'), ('FOXJ2(+)', '<i8'), ('FOXK2(+)', '<i8'), ('FOXN2(+)', '<i8'), ('FOXN3(+)', '<i8'), ('FOXO1(+)', '<i8'), ('FOXO3(+)', '<i8'), ('FOXP1(+)', '<i8'), ('FOXP3(+)', '<i8'), ('FOXP4(+)', '<i8'), ('GABPA(+)', '<i8'), ('GABPB1(+)', '<i8'), ('GATA1(+)', '<i8'), ('GATA2(+)', '<i8'), ('GATA3(+)', '<i8'), ('GRHL1(+)', '<i8'), ('GTF2B(+)', '<i8'), ('GTF2F1(+)', '<i8'), ('GTF3C2(+)', '<i8'), ('HCFC1(+)', '<i8'), ('HDAC2(+)', '<i8'), ('HES6(+)', '<i8'), ('HESX1(+)', '<i8'), ('HINFP(+)', '<i8'), ('HMBOX1(+)', '<i8'), ('HMGA1(+)', '<i8'), ('HOXA10(+)', '<i8'), ('HOXA5(+)', '<i8'), ('HOXA9(+)', '<i8'), ('HOXB2(+)', '<i8'), ('HOXB3(+)', '<i8'), ('IRF1(+)', '<i8'), ('IRF2(+)', '<i8'), ('IRF3(+)', '<i8'), ('IRF4(+)', '<i8'), ('IRF5(+)', '<i8'), ('IRF7(+)', '<i8'), ('IRF8(+)', '<i8'), ('IRF9(+)', '<i8'), ('JDP2(+)', '<i8'), ('JUN(+)', '<i8'), ('JUNB(+)', '<i8'), ('JUND(+)', '<i8'), ('KDM5A(+)', '<i8'), ('KDM5B(+)', '<i8'), ('KLF10(+)', '<i8'), ('KLF11(+)', '<i8'), ('KLF12(+)', '<i8'), ('KLF16(+)', '<i8'), ('KLF2(+)', '<i8'), ('KLF3(+)', '<i8'), ('KLF4(+)', '<i8'), ('KLF5(+)', '<i8'), ('KLF7(+)', '<i8'), ('KLF8(+)', '<i8'), ('KLF9(+)', '<i8'), ('LEF1(+)', '<i8'), ('LMO2(+)', '<i8'), ('LYL1(+)', '<i8'), ('MAF(+)', '<i8'), ('MAFB(+)', '<i8'), ('MAFF(+)', '<i8'), ('MAFG(+)', '<i8'), ('MAFK(+)', '<i8'), ('MAX(+)', '<i8'), ('MAZ(+)', '<i8'), ('MBD4(+)', '<i8'), ('MEF2C(+)', '<i8'), ('MGA(+)', '<i8'), ('MITF(+)', '<i8'), ('MLX(+)', '<i8'), ('MTA3(+)', '<i8'), ('MXD1(+)', '<i8'), ('MXD3(+)', '<i8'), ('MXD4(+)', '<i8'), ('MXI1(+)', '<i8'), ('MYBL2(+)', '<i8'), ('MYC(+)', '<i8'), ('MYCN(+)', '<i8'), ('NELFE(+)', '<i8'), ('NFATC1(+)', '<i8'), ('NFATC2(+)', '<i8'), ('NFE2(+)', '<i8'), ('NFE2L1(+)', '<i8'), ('NFE2L2(+)', '<i8'), ('NFE2L3(+)', '<i8'), ('NFIB(+)', '<i8'), ('NFIC(+)', '<i8'), ('NFIL3(+)', '<i8'), ('NFKB1(+)', '<i8'), ('NFKB2(+)', '<i8'), ('NFYA(+)', '<i8'), ('NFYB(+)', '<i8'), ('NPDC1(+)', '<i8'), ('NR1D1(+)', '<i8'), ('NR1H2(+)', '<i8'), ('NR1H3(+)', '<i8'), ('NR1I3(+)', '<i8'), ('NR2C1(+)', '<i8'), ('NR2C2(+)', '<i8'), ('NR2F6(+)', '<i8'), ('NR3C1(+)', '<i8'), ('NR5A1(+)', '<i8'), ('NR6A1(+)', '<i8'), ('NRF1(+)', '<i8'), ('OSR2(+)', '<i8'), ('PATZ1(+)', '<i8'), ('PAX2(+)', '<i8'), ('PAX5(+)', '<i8'), ('PAXIP1(+)', '<i8'), ('PBX1(+)', '<i8'), ('PBX3(+)', '<i8'), ('PHF8(+)', '<i8'), ('PKNOX1(+)', '<i8'), ('PLAGL1(+)', '<i8'), ('PML(+)', '<i8'), ('POLE3(+)', '<i8'), ('POLR2A(+)', '<i8'), ('POLR3G(+)', '<i8'), ('POU2F1(+)', '<i8'), ('POU2F2(+)', '<i8'), ('POU6F1(+)', '<i8'), ('PPARA(+)', '<i8'), ('RAD21(+)', '<i8'), ('RARA(+)', '<i8'), ('RARG(+)', '<i8'), ('RB1(+)', '<i8'), ('RBBP5(+)', '<i8'), ('RCOR1(+)', '<i8'), ('REL(+)', '<i8'), ('RELA(+)', '<i8'), ('RELB(+)', '<i8'), ('REST(+)', '<i8'), ('RFX2(+)', '<i8'), ('RFX3(+)', '<i8'), ('RFX5(+)', '<i8'), ('RORB(+)', '<i8'), ('RORC(+)', '<i8'), ('RUNX1(+)', '<i8'), ('RUNX2(+)', '<i8'), ('RXRA(+)', '<i8'), ('RXRB(+)', '<i8'), ('SAP30(+)', '<i8'), ('SETDB1(+)', '<i8'), ('SF1(+)', '<i8'), ('SIN3A(+)', '<i8'), ('SMARCA4(+)', '<i8'), ('SMARCA5(+)', '<i8'), ('SMARCC1(+)', '<i8'), ('SMARCC2(+)', '<i8'), ('SNAI1(+)', '<i8'), ('SNAPC4(+)', '<i8'), ('SOX12(+)', '<i8'), ('SOX6(+)', '<i8'), ('SOX8(+)', '<i8'), ('SP1(+)', '<i8'), ('SP2(+)', '<i8'), ('SP3(+)', '<i8'), ('SP4(+)', '<i8'), ('SPI1(+)', '<i8'), ('SPIB(+)', '<i8'), ('SPIC(+)', '<i8'), ('SREBF1(+)', '<i8'), ('SREBF2(+)', '<i8'), ('SRF(+)', '<i8'), ('STAT1(+)', '<i8'), ('STAT2(+)', '<i8'), ('STAT3(+)', '<i8'), ('STAT5A(+)', '<i8'), ('STAT5B(+)', '<i8'), ('STAT6(+)', '<i8'), ('SUPT20H(+)', '<i8'), ('SUZ12(+)', '<i8'), ('TAF1(+)', '<i8'), ('TAF7(+)', '<i8'), ('TAL1(+)', '<i8'), ('TBL1XR1(+)', '<i8'), ('TBP(+)', '<i8'), ('TBX19(+)', '<i8'), ('TBX21(+)', '<i8'), ('TBX6(+)', '<i8'), ('TCF12(+)', '<i8'), ('TCF4(+)', '<i8'), ('TCF7(+)', '<i8'), ('TCF7L1(+)', '<i8'), ('TEF(+)', '<i8'), ('TFAP4(+)', '<i8'), ('TFCP2L1(+)', '<i8'), ('TFDP1(+)', '<i8'), ('TFE3(+)', '<i8'), ('TFEB(+)', '<i8'), ('TFEC(+)', '<i8'), ('TGIF1(+)', '<i8'), ('TGIF2(+)', '<i8'), ('THAP1(+)', '<i8'), ('THAP11(+)', '<i8'), ('THRA(+)', '<i8'), ('TP53(+)', '<i8'), ('TP63(+)', '<i8'), ('TRIM28(+)', '<i8'), ('TWIST2(+)', '<i8'), ('USF1(+)', '<i8'), ('VDR(+)', '<i8'), ('VPS4B(+)', '<i8'), ('XBP1(+)', '<i8'), ('XRCC4(+)', '<i8'), ('YY1(+)', '<i8'), ('YY2(+)', '<i8'), ('ZBTB17(+)', '<i8'), ('ZBTB2(+)', '<i8'), ('ZBTB26(+)', '<i8'), ('ZBTB33(+)', '<i8'), ('ZBTB41(+)', '<i8'), ('ZBTB7A(+)', '<i8'), ('ZBTB7B(+)', '<i8'), ('ZEB1(+)', '<i8'), ('ZFHX3(+)', '<i8'), ('ZFP30(+)', '<i8'), ('ZKSCAN3(+)', '<i8'), ('ZMIZ1(+)', '<i8'), ('ZNF10(+)', '<i8'), ('ZNF101(+)', '<i8'), ('ZNF124(+)', '<i8'), ('ZNF143(+)', '<i8'), ('ZNF16(+)', '<i8'), ('ZNF182(+)', '<i8'), ('ZNF227(+)', '<i8'), ('ZNF263(+)', '<i8'), ('ZNF274(+)', '<i8'), ('ZNF283(+)', '<i8'), ('ZNF30(+)', '<i8'), ('ZNF354C(+)', '<i8'), ('ZNF358(+)', '<i8'), ('ZNF362(+)', '<i8'), ('ZNF384(+)', '<i8'), ('ZNF420(+)', '<i8'), ('ZNF467(+)', '<i8'), ('ZNF470(+)', '<i8'), ('ZNF484(+)', '<i8'), ('ZNF501(+)', '<i8'), ('ZNF513(+)', '<i8'), ('ZNF568(+)', '<i8'), ('ZNF579(+)', '<i8'), ('ZNF580(+)', '<i8'), ('ZNF607(+)', '<i8'), ('ZNF674(+)', '<i8'), ('ZNF721(+)', '<i8'), ('ZNF76(+)', '<i8'), ('ZNF81(+)', '<i8'), ('ZNF831(+)', '<i8'), ('ZNF91(+)', '<i8'), ('ZNF92(+)', '<i8'), ('ZSCAN2(+)', '<i8'), ('ZSCAN31(+)', '<i8')] is not allowed
Column attribute 'RegulonsAUC' dtype [('AHR(+)', '<f8'), ('AR(+)', '<f8'), ('ARNT(+)', '<f8'), ('ARNTL(+)', '<f8'), ('ARNTL2(+)', '<f8'), ('ATF1(+)', '<f8'), ('ATF2(+)', '<f8'), ('ATF3(+)', '<f8'), ('ATF4(+)', '<f8'), ('ATF5(+)', '<f8'), ('ATF6(+)', '<f8'), ('ATF6B(+)', '<f8'), ('ATF7(+)', '<f8'), ('BACH1(+)', '<f8'), ('BACH2(+)', '<f8'), ('BATF(+)', '<f8'), ('BATF3(+)', '<f8'), ('BCL11A(+)', '<f8'), ('BCL3(+)', '<f8'), ('BCLAF1(+)', '<f8'), ('BHLHE40(+)', '<f8'), ('BHLHE41(+)', '<f8'), ('BRCA1(+)', '<f8'), ('BRF1(+)', '<f8'), ('BRF2(+)', '<f8'), ('CBX3(+)', '<f8'), ('CEBPA(+)', '<f8'), ('CEBPB(+)', '<f8'), ('CEBPD(+)', '<f8'), ('CEBPE(+)', '<f8'), ('CEBPG(+)', '<f8'), ('CEBPZ(+)', '<f8'), ('CERS5(+)', '<f8'), ('CHD2(+)', '<f8'), ('CHURC1(+)', '<f8'), ('CIC(+)', '<f8'), ('CLOCK(+)', '<f8'), ('CNOT4(+)', '<f8'), ('CPSF4(+)', '<f8'), ('CREB1(+)', '<f8'), ('CREB3(+)', '<f8'), ('CREB3L2(+)', '<f8'), ('CREB3L4(+)', '<f8'), ('CREB5(+)', '<f8'), ('CREM(+)', '<f8'), ('CTCF(+)', '<f8'), ('CUX1(+)', '<f8'), ('DBP(+)', '<f8'), ('E2F1(+)', '<f8'), ('E2F2(+)', '<f8'), ('E2F3(+)', '<f8'), ('E2F4(+)', '<f8'), ('E2F6(+)', '<f8'), ('E4F1(+)', '<f8'), ('EBF1(+)', '<f8'), ('EGR1(+)', '<f8'), ('EGR2(+)', '<f8'), ('EGR3(+)', '<f8'), ('ELF1(+)', '<f8'), ('ELF2(+)', '<f8'), ('ELF4(+)', '<f8'), ('ELK1(+)', '<f8'), ('ELK3(+)', '<f8'), ('ELK4(+)', '<f8'), ('EOMES(+)', '<f8'), ('EP300(+)', '<f8'), ('ERF(+)', '<f8'), ('ERG(+)', '<f8'), ('ESR1(+)', '<f8'), ('ESR2(+)', '<f8'), ('ESRRA(+)', '<f8'), ('ETS1(+)', '<f8'), ('ETS2(+)', '<f8'), ('ETV2(+)', '<f8'), ('ETV3(+)', '<f8'), ('ETV5(+)', '<f8'), ('ETV6(+)', '<f8'), ('ETV7(+)', '<f8'), ('EZH2(+)', '<f8'), ('FIZ1(+)', '<f8'), ('FLI1(+)', '<f8'), ('FOS(+)', '<f8'), ('FOSB(+)', '<f8'), ('FOSL1(+)', '<f8'), ('FOSL2(+)', '<f8'), ('FOXD2(+)', '<f8'), ('FOXJ2(+)', '<f8'), ('FOXK2(+)', '<f8'), ('FOXN2(+)', '<f8'), ('FOXN3(+)', '<f8'), ('FOXO1(+)', '<f8'), ('FOXO3(+)', '<f8'), ('FOXP1(+)', '<f8'), ('FOXP3(+)', '<f8'), ('FOXP4(+)', '<f8'), ('GABPA(+)', '<f8'), ('GABPB1(+)', '<f8'), ('GATA1(+)', '<f8'), ('GATA2(+)', '<f8'), ('GATA3(+)', '<f8'), ('GRHL1(+)', '<f8'), ('GTF2B(+)', '<f8'), ('GTF2F1(+)', '<f8'), ('GTF3C2(+)', '<f8'), ('HCFC1(+)', '<f8'), ('HDAC2(+)', '<f8'), ('HES6(+)', '<f8'), ('HESX1(+)', '<f8'), ('HINFP(+)', '<f8'), ('HMBOX1(+)', '<f8'), ('HMGA1(+)', '<f8'), ('HOXA10(+)', '<f8'), ('HOXA5(+)', '<f8'), ('HOXA9(+)', '<f8'), ('HOXB2(+)', '<f8'), ('HOXB3(+)', '<f8'), ('IRF1(+)', '<f8'), ('IRF2(+)', '<f8'), ('IRF3(+)', '<f8'), ('IRF4(+)', '<f8'), ('IRF5(+)', '<f8'), ('IRF7(+)', '<f8'), ('IRF8(+)', '<f8'), ('IRF9(+)', '<f8'), ('JDP2(+)', '<f8'), ('JUN(+)', '<f8'), ('JUNB(+)', '<f8'), ('JUND(+)', '<f8'), ('KDM5A(+)', '<f8'), ('KDM5B(+)', '<f8'), ('KLF10(+)', '<f8'), ('KLF11(+)', '<f8'), ('KLF12(+)', '<f8'), ('KLF16(+)', '<f8'), ('KLF2(+)', '<f8'), ('KLF3(+)', '<f8'), ('KLF4(+)', '<f8'), ('KLF5(+)', '<f8'), ('KLF7(+)', '<f8'), ('KLF8(+)', '<f8'), ('KLF9(+)', '<f8'), ('LEF1(+)', '<f8'), ('LMO2(+)', '<f8'), ('LYL1(+)', '<f8'), ('MAF(+)', '<f8'), ('MAFB(+)', '<f8'), ('MAFF(+)', '<f8'), ('MAFG(+)', '<f8'), ('MAFK(+)', '<f8'), ('MAX(+)', '<f8'), ('MAZ(+)', '<f8'), ('MBD4(+)', '<f8'), ('MEF2C(+)', '<f8'), ('MGA(+)', '<f8'), ('MITF(+)', '<f8'), ('MLX(+)', '<f8'), ('MTA3(+)', '<f8'), ('MXD1(+)', '<f8'), ('MXD3(+)', '<f8'), ('MXD4(+)', '<f8'), ('MXI1(+)', '<f8'), ('MYBL2(+)', '<f8'), ('MYC(+)', '<f8'), ('MYCN(+)', '<f8'), ('NELFE(+)', '<f8'), ('NFATC1(+)', '<f8'), ('NFATC2(+)', '<f8'), ('NFE2(+)', '<f8'), ('NFE2L1(+)', '<f8'), ('NFE2L2(+)', '<f8'), ('NFE2L3(+)', '<f8'), ('NFIB(+)', '<f8'), ('NFIC(+)', '<f8'), ('NFIL3(+)', '<f8'), ('NFKB1(+)', '<f8'), ('NFKB2(+)', '<f8'), ('NFYA(+)', '<f8'), ('NFYB(+)', '<f8'), ('NPDC1(+)', '<f8'), ('NR1D1(+)', '<f8'), ('NR1H2(+)', '<f8'), ('NR1H3(+)', '<f8'), ('NR1I3(+)', '<f8'), ('NR2C1(+)', '<f8'), ('NR2C2(+)', '<f8'), ('NR2F6(+)', '<f8'), ('NR3C1(+)', '<f8'), ('NR5A1(+)', '<f8'), ('NR6A1(+)', '<f8'), ('NRF1(+)', '<f8'), ('OSR2(+)', '<f8'), ('PATZ1(+)', '<f8'), ('PAX2(+)', '<f8'), ('PAX5(+)', '<f8'), ('PAXIP1(+)', '<f8'), ('PBX1(+)', '<f8'), ('PBX3(+)', '<f8'), ('PHF8(+)', '<f8'), ('PKNOX1(+)', '<f8'), ('PLAGL1(+)', '<f8'), ('PML(+)', '<f8'), ('POLE3(+)', '<f8'), ('POLR2A(+)', '<f8'), ('POLR3G(+)', '<f8'), ('POU2F1(+)', '<f8'), ('POU2F2(+)', '<f8'), ('POU6F1(+)', '<f8'), ('PPARA(+)', '<f8'), ('RAD21(+)', '<f8'), ('RARA(+)', '<f8'), ('RARG(+)', '<f8'), ('RB1(+)', '<f8'), ('RBBP5(+)', '<f8'), ('RCOR1(+)', '<f8'), ('REL(+)', '<f8'), ('RELA(+)', '<f8'), ('RELB(+)', '<f8'), ('REST(+)', '<f8'), ('RFX2(+)', '<f8'), ('RFX3(+)', '<f8'), ('RFX5(+)', '<f8'), ('RORB(+)', '<f8'), ('RORC(+)', '<f8'), ('RUNX1(+)', '<f8'), ('RUNX2(+)', '<f8'), ('RXRA(+)', '<f8'), ('RXRB(+)', '<f8'), ('SAP30(+)', '<f8'), ('SETDB1(+)', '<f8'), ('SF1(+)', '<f8'), ('SIN3A(+)', '<f8'), ('SMARCA4(+)', '<f8'), ('SMARCA5(+)', '<f8'), ('SMARCC1(+)', '<f8'), ('SMARCC2(+)', '<f8'), ('SNAI1(+)', '<f8'), ('SNAPC4(+)', '<f8'), ('SOX12(+)', '<f8'), ('SOX6(+)', '<f8'), ('SOX8(+)', '<f8'), ('SP1(+)', '<f8'), ('SP2(+)', '<f8'), ('SP3(+)', '<f8'), ('SP4(+)', '<f8'), ('SPI1(+)', '<f8'), ('SPIB(+)', '<f8'), ('SPIC(+)', '<f8'), ('SREBF1(+)', '<f8'), ('SREBF2(+)', '<f8'), ('SRF(+)', '<f8'), ('STAT1(+)', '<f8'), ('STAT2(+)', '<f8'), ('STAT3(+)', '<f8'), ('STAT5A(+)', '<f8'), ('STAT5B(+)', '<f8'), ('STAT6(+)', '<f8'), ('SUPT20H(+)', '<f8'), ('SUZ12(+)', '<f8'), ('TAF1(+)', '<f8'), ('TAF7(+)', '<f8'), ('TAL1(+)', '<f8'), ('TBL1XR1(+)', '<f8'), ('TBP(+)', '<f8'), ('TBX19(+)', '<f8'), ('TBX21(+)', '<f8'), ('TBX6(+)', '<f8'), ('TCF12(+)', '<f8'), ('TCF4(+)', '<f8'), ('TCF7(+)', '<f8'), ('TCF7L1(+)', '<f8'), ('TEF(+)', '<f8'), ('TFAP4(+)', '<f8'), ('TFCP2L1(+)', '<f8'), ('TFDP1(+)', '<f8'), ('TFE3(+)', '<f8'), ('TFEB(+)', '<f8'), ('TFEC(+)', '<f8'), ('TGIF1(+)', '<f8'), ('TGIF2(+)', '<f8'), ('THAP1(+)', '<f8'), ('THAP11(+)', '<f8'), ('THRA(+)', '<f8'), ('TP53(+)', '<f8'), ('TP63(+)', '<f8'), ('TRIM28(+)', '<f8'), ('TWIST2(+)', '<f8'), ('USF1(+)', '<f8'), ('VDR(+)', '<f8'), ('VPS4B(+)', '<f8'), ('XBP1(+)', '<f8'), ('XRCC4(+)', '<f8'), ('YY1(+)', '<f8'), ('YY2(+)', '<f8'), ('ZBTB17(+)', '<f8'), ('ZBTB2(+)', '<f8'), ('ZBTB26(+)', '<f8'), ('ZBTB33(+)', '<f8'), ('ZBTB41(+)', '<f8'), ('ZBTB7A(+)', '<f8'), ('ZBTB7B(+)', '<f8'), ('ZEB1(+)', '<f8'), ('ZFHX3(+)', '<f8'), ('ZFP30(+)', '<f8'), ('ZKSCAN3(+)', '<f8'), ('ZMIZ1(+)', '<f8'), ('ZNF10(+)', '<f8'), ('ZNF101(+)', '<f8'), ('ZNF124(+)', '<f8'), ('ZNF143(+)', '<f8'), ('ZNF16(+)', '<f8'), ('ZNF182(+)', '<f8'), ('ZNF227(+)', '<f8'), ('ZNF263(+)', '<f8'), ('ZNF274(+)', '<f8'), ('ZNF283(+)', '<f8'), ('ZNF30(+)', '<f8'), ('ZNF354C(+)', '<f8'), ('ZNF358(+)', '<f8'), ('ZNF362(+)', '<f8'), ('ZNF384(+)', '<f8'), ('ZNF420(+)', '<f8'), ('ZNF467(+)', '<f8'), ('ZNF470(+)', '<f8'), ('ZNF484(+)', '<f8'), ('ZNF501(+)', '<f8'), ('ZNF513(+)', '<f8'), ('ZNF568(+)', '<f8'), ('ZNF579(+)', '<f8'), ('ZNF580(+)', '<f8'), ('ZNF607(+)', '<f8'), ('ZNF674(+)', '<f8'), ('ZNF721(+)', '<f8'), ('ZNF76(+)', '<f8'), ('ZNF81(+)', '<f8'), ('ZNF831(+)', '<f8'), ('ZNF91(+)', '<f8'), ('ZNF92(+)', '<f8'), ('ZSCAN2(+)', '<f8'), ('ZSCAN31(+)', '<f8')] is not allowed
For help, see http://linnarssonlab.org/loompy/format/
pbmc10k_SCENIC_aucell.loom does not appead to be a valid Loom file according to Loom spec version '3.0.0'
@Jay-Leung
Indeed the loom file generate by pySCENIC is not compatible with loom specification.
For this reason the parameter validate
has to be set to False when reading the file.
If the file reading completes with validate
set to False you can carry on with the downstream analysis.
@SeppeDeWinter I see, thanks for clarifying! I removed the validation because I faced the same issue as @hyjforesight when reading the loom file with Scanpy, so i thought it could be due to file format incompatibility. Do you have any idea why there is an error reading into Scanpy?
Hi @Jay-Leung
I'm not entirely sure what is causing the error, maybe scanpy updated since we made the tutorial.
You can always generate the scanpy AnnData object yourself by reading all elements from the final loom file and passing this to an AnnData object, i.e. something like this:
# scenic output
lf = lp.connect( f_final_loom, mode='r', validate=False )
meta = json.loads(zlib.decompress(base64.b64decode( lf.attrs.MetaData )))
exprMat = pd.DataFrame( lf[:,:], index=lf.ra.Gene, columns=lf.ca.CellID).T
auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)
# cell annotations from the loom column attributes (modify this code so it is relevant for your data):
cellAnnot = pd.concat(
[
pd.DataFrame( lf.ca.Celltype_Garnett, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.ClusterID, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.Louvain_clusters_Scanpy, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.Percent_mito, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.nGene, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.nUMI, index=lf.ca.CellID ),
],
axis=1
)
cellAnnot.columns = [
'Celltype_Garnett',
'ClusterID',
'Louvain_clusters_Scanpy',
'Percent_mito',
'nGene',
'nUMI']
# capture embeddings:
dr = [
pd.DataFrame( lf.ca.Embedding, index=lf.ca.CellID )
]
dr_names = [
meta['embeddings'][0]['name'].replace(" ","_")
]
# add other embeddings
drx = pd.DataFrame( lf.ca.Embeddings_X, index=lf.ca.CellID )
dry = pd.DataFrame( lf.ca.Embeddings_Y, index=lf.ca.CellID )
for i in range( len(drx.columns) ):
dr.append( pd.concat( [ drx.iloc[:,i], dry.iloc[:,i] ], sort=False, axis=1, join='outer' ))
dr_names.append( meta['embeddings'][i+1]['name'].replace(" ","_").replace('/','-') )
# rename columns:
for i,x in enumerate( dr ):
x.columns = ['X','Y']
lf.close()
And then pass to AnnData
from scanpy import AnnData
adata = AnnData(
x = exprMat, #add expression matrix, this could also be the auc_mtx
obs = cellAnnot, #add cell metadata
obsm = {dr_name: dr_.to_numpy() for dr_names, dr_ in zip(dr_names, dr)}) #add dimensionality reductions
Note, I did not test this code so might give an error. But I hope it gets the point across. Let me know if you've issues.
Hi Seppe,
Thanks for the help! Actually what I did was load the adata from part 1 of the pbmc tutorial, and added the embeddings from the output loom file.
Thanks for the help!
Regards, Jay
Hi Jay
That will also work.
Happy to help.
Best,
Seppe
Hello @SeppeDeWinter Thanks for the coding! This issue is more about the errors in the tutorial 'Extended analysis post-SCENIC'. Actually, I can go through the whole procedures of pySCENIC smoothly with my own data by following the cancer dataset tutorial. Here are the updated errors for the tutorial 'Extended analysis post-SCENIC'. I think it may be caused by the incompatibility between pySCENIC and the current version of Scanpy.
# scenic output
lf = lp.connect(f_final_loom, mode='r', validate=False)
meta = json.loads(zlib.decompress(base64.b64decode(lf.attrs.MetaData)))
exprMat = pd.DataFrame(lf[:,:], index=lf.ra.Gene, columns=lf.ca.CellID).T
auc_mtx = pd.DataFrame(lf.ca.RegulonsAUC, index=lf.ca.CellID)
# create a dictionary of regulons:
regulons = {}
for i,r in pd.DataFrame(lf.ra.Regulons, index=lf.ra.Gene).iteritems():
regulons[i] = list(r[r==1].index.values)
lf.ca.keys()
['CellID',
'ClusterID',
'Clusterings',
'Embedding',
'Percent_mito',
'RegulonsAUC',
'leiden_clusters_Scanpy',
'nGene',
'nUMI']
# cell annotations from the loom column attributes:
cellAnnot = pd.concat(
[pd.DataFrame( lf.ca.ClusterID, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.leiden_clusters_Scanpy, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.Percent_mito, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.nGene, index=lf.ca.CellID ),
pd.DataFrame( lf.ca.nUMI, index=lf.ca.CellID ),
],
axis=1
)
cellAnnot.columns = ['ClusterID',
'leiden_clusters_Scanpy',
'Percent_mito',
'nGene',
'nUMI']
# capture embeddings:
dr = [pd.DataFrame(lf.ca.Embedding, index=lf.ca.CellID )]
dr_names = [meta['embeddings'][0]['name'].replace(" ","_")]
# rename columns:
for i,x in enumerate(dr):
x.columns = ['X', 'Y']
lf.close()
from scanpy import AnnData
adata = AnnData(
X = exprMat, #add expression matrix, this could also be the auc_mtx
obs = cellAnnot, #add cell metadata
obsm = {dr_names: dr_.to_numpy() for dr_names, dr_ in zip(dr_names, dr)}) #add dimensionality reductions
# drop the embeddings and extra attributes from the obs object
adata.obs.drop(['Embedding', 'RegulonsAUC'], axis=1, inplace=True)
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_22300/424352549.py in <module>
1 # drop the embeddings and extra attributes from the obs object
----> 2 adata.obs.drop(['Embedding', 'RegulonsAUC'], axis=1, inplace=True)
~\anaconda3\envs\HYJ_py38\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
~\anaconda3\envs\HYJ_py38\lib\site-packages\pandas\core\frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
4904 weight 1.0 0.8
4905 """
-> 4906 return super().drop(
4907 labels=labels,
4908 axis=axis,
~\anaconda3\envs\HYJ_py38\lib\site-packages\pandas\core\generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
4148 for axis, labels in axes.items():
4149 if labels is not None:
-> 4150 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
4151
4152 if inplace:
~\anaconda3\envs\HYJ_py38\lib\site-packages\pandas\core\generic.py in _drop_axis(self, labels, axis, level, errors)
4183 new_axis = axis.drop(labels, level=level, errors=errors)
4184 else:
-> 4185 new_axis = axis.drop(labels, errors=errors)
4186 result = self.reindex(**{axis_name: new_axis})
4187
~\anaconda3\envs\HYJ_py38\lib\site-packages\pandas\core\indexes\base.py in drop(self, labels, errors)
6015 if mask.any():
6016 if errors != "ignore":
-> 6017 raise KeyError(f"{labels[mask]} not found in axis")
6018 indexer = indexer[~mask]
6019 return self.delete(indexer)
KeyError: "['Embedding' 'RegulonsAUC'] not found in axis"
# add the embeddings into the adata.obsm object
for i,x in enumerate(dr):
adata.obsm['X_'+dr_names[i]] = x.as_matrix()
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_22300/3879245023.py in <module>
1 # add the embeddings into the adata.obsm object
2 for i,x in enumerate(dr):
----> 3 adata.obsm['X_'+dr_names[i]] = x.as_matrix()
~\anaconda3\envs\HYJ_py38\lib\site-packages\pandas\core\generic.py in __getattr__(self, name)
5485 ):
5486 return self[name]
-> 5487 return object.__getattribute__(self, name)
5488
5489 def __setattr__(self, name: str, value) -> None:
AttributeError: 'DataFrame' object has no attribute 'as_matrix'
sc.utils.sanitize_anndata(adata)
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_22300/1330352037.py in <module>
----> 1 sc.utils.sanitize_anndata(adata)
AttributeError: module 'scanpy' has no attribute 'utils'
Hi @hyjforesight,
please try to replace sc.utils.sanitize_anndata(adata)
with sc._utils.sanitize_anndata(adata)
Describe the bug Hello pySCENIC and @cflerin , I generated the pbmc10k_scenic_integrated-output.loom from the 1st official tutorial (http://htmlpreview.github.io/?https://github.com/aertslab/SCENICprotocol/blob/master/notebooks/PBMC10k_SCENIC-protocol-CLI.html). And I tried to load this loom file into the 2nd official tutorial (http://htmlpreview.github.io/?https://github.com/aertslab/SCENICprotocol/blob/master/notebooks/PBMC10k_downstream-analysis.html) by
adata = sc.read(f_final_loom, validate=False, cache=True)
, but it generates errors 'TypeError: invalid type promotion with structured datatype(s).' Could you please help me to solve this issue? Thanks! Best, YJSteps to reproduce the behavior
path to loom output, generated from the previous pySCENIC tutorial
f_final_loom = 'C:/Users/Park_Lab/Documents/pbmc10k_scenic_integrated-output.loom'
adata = sc.read_loom(filename=f_final_loom, validate=False) adata
TypeError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_17852/1652251271.py in
----> 1 adata = sc.read_loom(filename=f_final_loom, validate=False)
2 adata
~\anaconda3\envs\Python3812\lib\site-packages\anndata\compat__init__.py in inner_f(*args, *kwargs) 251 extra_args = len(args) - len(all_args) 252 if extra_args <= 0: --> 253 return f(args, **kwargs) 254 255 # extra_args > 0
~\anaconda3\envs\Python3812\lib\site-packages\anndata_io\read.py in read_loom(filename, sparse, cleanup, X_name, obs_names, obsm_names, var_names, varm_names, dtype, obsm_mapping, varm_mapping, **kwargs) 298 uns["loom-var"] = uns_var 299 --> 300 adata = AnnData( 301 X, 302 obs=obs,
~\anaconda3\envs\Python3812\lib\site-packages\anndata_core\anndata.py in init(self, X, obs, var, uns, obsm, varm, layers, raw, dtype, shape, filename, filemode, asview, obsp, varp, oidx, vidx) 306 self._init_as_view(X, oidx, vidx) 307 else: --> 308 self._init_as_actual( 309 X=X, 310 obs=obs,
~\anaconda3\envs\Python3812\lib\site-packages\anndata_core\anndata.py in _init_as_actual(self, X, obs, var, uns, obsm, varm, varp, obsp, raw, layers, dtype, shape, filename, filemode) 500 501 # annotations --> 502 self._obs = _gen_dataframe(obs, self._n_obs, ["obs_names", "row_names"]) 503 self._var = _gen_dataframe(var, self._n_vars, ["var_names", "col_names"]) 504
~\anaconda3\envs\Python3812\lib\functools.py in wrapper(*args, *kw) 873 '1 positional argument') 874 --> 875 return dispatch(args[0].class)(args, **kw) 876 877 funcname = getattr(func, 'name', 'singledispatch function')
~\anaconda3\envs\Python3812\lib\site-packages\anndata_core\anndata.py in _(anno, length, index_names) 116 @_gendataframe.register(pd.DataFrame) 117 def (anno, length, index_names): --> 118 anno = anno.copy() 119 if not is_string_dtype(anno.index): 120 warnings.warn("Transforming to str index.", ImplicitModificationWarning)
~\anaconda3\envs\Python3812\lib\site-packages\pandas\core\generic.py in copy(self, deep) 5931 dtype: object 5932 """ -> 5933 data = self._mgr.copy(deep=deep) 5934 self._clear_item_cache() 5935 return self._constructor(data).finalize(self, method="copy")
~\anaconda3\envs\Python3812\lib\site-packages\pandas\core\internals\managers.py in copy(self, deep) 601 602 if deep: --> 603 res._consolidate_inplace() 604 return res 605
~\anaconda3\envs\Python3812\lib\site-packages\pandas\core\internals\managers.py in _consolidate_inplace(self) 622 def _consolidate_inplace(self) -> None: 623 if not self.is_consolidated(): --> 624 self.blocks = tuple(_consolidate(self.blocks)) 625 self._is_consolidated = True 626 self._known_consolidated = True
~\anaconda3\envs\Python3812\lib\site-packages\pandas\core\internals\managers.py in _consolidate(blocks) 1972 new_blocks: list[Block] = [] 1973 for (_can_consolidate, dtype), group_blocks in grouper: -> 1974 merged_blocks = _merge_blocks( 1975 list(group_blocks), dtype=dtype, can_consolidate=_can_consolidate 1976 )
~\anaconda3\envs\Python3812\lib\site-packages\pandas\core\internals\managers.py in _merge_blocks(blocks, dtype, can_consolidate) 1999 # Sequence[Union[int, float, complex, str, bytes, generic]], 2000 # Sequence[Sequence[Any]], SupportsArray]] -> 2001 new_values = np.vstack([b.values for b in blocks]) # type: ignore[misc] 2002 else: 2003 bvals = [blk.values for blk in blocks]
<__array_function__ internals> in vstack(*args, **kwargs) ~\anaconda3\envs\Python3812\lib\site-packages\numpy\core\shape_base.py in vstack(tup) 281 if not isinstance(arrs, list): 282 arrs = [arrs] --> 283 return _nx.concatenate(arrs, 0) 284 285 <__array_function__ internals> in concatenate(*args, **kwargs) TypeError: invalid type promotion with structured datatype(s). ``` 2. Error encountered: ```pytb ... ``` **Expected behavior** It should be loaded without errors. **Please complete the following information:** - pySCENIC version: 0.11.2 - Installation method: pip - Run environment: Jupyter notebook - OS: Windows 10 64bit - Package versions: [obtain using `pip freeze`, `conda list`, or skip this if using Docker/Singularity]: ``` scanpy==1.8.2 anndata==0.7.8 umap==0.5.2 numpy==1.20.3 scipy==1.7.3 pandas==1.3.5 scikit-learn==1.0.1 statsmodels==0.13.1 python-igraph==0.9.8 pynndescent==0.5.5 ```