Closed snashraf closed 5 years ago
i would check out the schema of the variants table, and see which are of type FLOAT (or whatever sqlite calls it). then iterate over this dictionary and try to float()
each value that has a key where the schema indicates its a float.
Hi Brent , I was facing issue due to same as mentioned here. https://github.com/arq5x/gemini/issues/751 Now I am able to load data properly. I am closing this issue now. Thanks Najeeb
Hi I was trying to upload a vcf file to gemini db using below command. and I am getting below error. What could be the reason for this error ? I am also attaching a VCF file as well. When I am trying to uploadjust SNPEFF annottaed VCF than its working fine but when I am trying with extra annotation than I am getting an error. temp.vcf.gz
python /gpfs/projects/bioinfo/najeeb/tools/vcf2db/vcf2db.py temp.vcf.gz fam1.ped fam1.VQSR_recalibrated_variants_reheaderShufflevcf2db1.db skipping 'AC' because it has Number=A skipping 'AF' because it has Number=A skipping 'MLEAC' because it has Number=A skipping 'MLEAF' because it has Number=A skipping 'ac_exac_afr' because it has Number=A skipping 'ac_exac_all' because it has Number=A skipping 'ac_exac_amr' because it has Number=A skipping 'ac_exac_eas' because it has Number=A skipping 'ac_exac_fin' because it has Number=A skipping 'ac_exac_nfe' because it has Number=A skipping 'ac_exac_oth' because it has Number=A skipping 'ac_exac_sas' because it has Number=A setting gnomadWGS_MAX to Type String because it has Number=. /gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/sql/sqltypes.py:226: SAWarning: Unicode type received non-unicode bind param value 'fam1'. (this warning may be suppressed after 10 occurrences) (util.ellipses_string(value),)) /gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/sql/sqltypes.py:226: SAWarning: Unicode type received non-unicode bind param value '-9'. (this warning may be suppressed after 10 occurrences) (util.ellipses_string(value),)) /gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/sql/sqltypes.py:226: SAWarning: Unicode type received non-unicode bind param value '2'. (this warning may be suppressed after 10 occurrences) (util.ellipses_string(value),)) Traceback (most recent call last): File "/gpfs/projects/bioinfo/najeeb/tools/vcf2db/vcf2db.py", line 923, in
impacts_extras=a.impacts_field, aok=a.a_ok)
File "/gpfs/projects/bioinfo/najeeb/tools/vcf2db/vcf2db.py", line 233, in init
self.load()
File "/gpfs/projects/bioinfo/najeeb/tools/vcf2db/vcf2db.py", line 318, in load
i = self._load(self.cache, create=True, start=1)
File "/gpfs/projects/bioinfo/najeeb/tools/vcf2db/vcf2db.py", line 311, in _load
self.insert(variants, expanded, keys, i, create=create)
File "/gpfs/projects/bioinfo/najeeb/tools/vcf2db/vcf2db.py", line 373, in insert
vilengths, variant_impacts)
File "/gpfs/projects/bioinfo/najeeb/tools/vcf2db/vcf2db.py", line 401, in _insert
self.__insert(v_objs, self.metadata.tables['variants'].insert())
File "/gpfs/projects/bioinfo/najeeb/tools/vcf2db/vcf2db.py", line 443, in __insert
trans.execute(stmt, o)
File "/gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 948, in execute
return meth(self, multiparams, params)
File "/gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/sql/elements.py", line 269, in _execute_on_connection
return connection._execute_clauseelement(self, multiparams, params)
File "/gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 1060, in _execute_clauseelement
compiled_sql, distilled_params
File "/gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 1132, in _execute_context
None, None)
File "/gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 1413, in _handle_dbapi_exception
exc_info
File "/gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/util/compat.py", line 265, in raise_from_cause
reraise(type(exception), exception, tb=exc_tb, cause=cause)
File "/gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/engine/base.py", line 1127, in _execute_context
context = constructor(dialect, self, conn, *args)
File "/gpfs/home/nsyed/.local/lib/python2.7/site-packages/sqlalchemy/engine/default.py", line 669, in _init_compiled
param.append(processorskey)
sqlalchemy.exc.StatementError: (exceptions.TypeError) float() argument must be a string or a number [SQL: u'INSERT INTO variants (variant_id, chrom, start, "end", vcf_id, ref, alt, qual, filter, type, sub_type, call_rate, num_hom_ref, num_het, num_hom_alt, num_unknown, aaf, gene, ensembl_gene_id, transcript, is_exonic, is_coding, is_lof, is_splicing, is_canonical, exon, codon_change, aa_change, aa_length, biotype, impact, impact_so, impact_severity, polyphen_pred, polyphen_score, sift_pred, sift_score, an, baseqranksum, cadd, clinvar_diseases, clippingranksum, db, dp, ds, fs, gerp, ghs_af, hgmd_class, hgmd_mut, hgmd_phen, hgmd_rankscore, lcr, mq, mqranksum, negative_train_site, positive_train_site, primatedl, qat_af, qd, readposranksum, sor, vqslod, aaf_1kg_afr_float, aaf_1kg_all_float, aaf_1kg_amr_float, aaf_1kg_eas_float, aaf_1kg_eur_float, aaf_1kg_sas_float, adj_exp_lof, adj_exp_mis, adj_exp_syn, af_exac_afr, af_exac_all, af_exac_amr, af_exac_eas, af_exac_nfe, af_exac_oth, af_exac_sas, an_exac_afr, an_exac_all, an_exac_amr, an_exac_eas, an_exac_fin, an_exac_nfe, an_exac_oth, an_exac_sas, ccr_pct_v1, common_pathogenic, cosmic_ids, cpg_island, cse_hiseq, culprit, encode_consensus_gm12878, encode_consensus_h1hesc, encode_consensus_helas3, encode_consensus_hepg2, encode_consensus_huvec, encode_consensus_k562, epilogos_bivflnk, epilogos_enh, epilogos_enhbiv, epilogos_enhg, epilogos_het, epilogos_quies, epilogos_reprpc, epilogos_reprpcwk, epilogos_tss, epilogos_tssaflnk, epilogos_tssbiv, epilogos_tx, epilogos_txflnk, epilogos_txwk, epilogos_znf, geno2mp, gnomad_af, gnomadwgs_af, gnomadwgs_af_afr, gnomadwgs_af_amr, gnomadwgs_af_asj, gnomadwgs_af_eas, gnomadwgs_af_fin, gnomadwgs_af_nfe, gnomadwgs_af_oth, gnomadwgs_max, in_1kg, in_exac, lof_z, max_aaf_all, mis_z, n_lof, n_mis, n_syn, pli, pnull, precessive, pfam_domain, phylop_100way, rmsk, rs_ids, syn_z, gts, gt_types, gt_phases, gt_depths, gt_ref_depths, gt_alt_depths, gt_quals, gt_alt_freqs) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)'] [parameters: [{'ensembl_gene_id': None, u'gnomadWGS_AF_AMR': 0.15160000324249268, u'aaf_1kg_amr_float': 0.14990000426769257, u'an_exac_fin': 6612, 'alt': u'T', 'impact': u'missense_variant', u'af_exac_sas': 0.36980000138282776, u'an_exac_oth': 906, u'gnomAD_AF': 0.21060000360012054, u'HGMD_CLASS': u'DP,DFP', u'epilogos_TssBiv': 0.0, u'epilogos_znf': 0.0, u'epilogos_Quies': -0.35339999198913574, 'is_exonic': True, u'epilogos_tssaflnk': 0.0, u'epilogos_tx': 0.7699999809265137, u'gnomadwgs_af_nfe': 0.1673000007867813, 'chrom': u'4', u'ac_exac_fin': 1306, u'dp': 150, u'readposranksum': 1.2400000095367432, u'gnomadWGS_AF_AFR': 0.05820000171661377, 'gt_phases': <read-only buffer for 0x7f415dde0c00, size -1, offset 0 at 0x7f415ddf4c70>, u'encode_consensus_k562': u'T', u'gnomadwgs_af': 0.1559000015258789, 'gt_depths': <read-only buffer for 0x7f415ddc9f80, size -1, offset 0 at 0x7f415ddf4c30>, u'aaf_1kg_all_float': 0.2287999987602234, u'qd': 16.219999313354492, 'effect_severity': 'MED', u'af_exac_nfe': 0.17000000178813934, u'encode_consensus_helas3': u'T', u'gnomadwgs_af_eas': 0.4839000105857849, u'vqslod': 17.84000015258789, u'epilogos_EnhBiv': 0.0, u'ac_exac_oth': 207, u'mqranksum': 0.35199999809265137, 'gt_ref_depths': <read-only buffer for 0x7f415ddc9fb8, size -1, offset 0 at 0x7f415ddf4cb0>, u'GHS_AF': 0.1647000014781952, 'call_rate': 1.0, u'PrimateDL': 0.5042999982833862, u'n_syn': u'47', 'gt_alt_freqs': <read-only buffer for 0x7f415ddf4db0, size -1, offset 0 at 0x7f415ddf4d70>, u'an_exac_all': 121106, u'VQSLOD': 17.84000015258789, u'negative_train_site': False, u'af_exac_amr': 0.1468999981880188, u'epilogos_enhg': 0.03319999948143959, u'NEGATIVE_TRAIN_SITE': None, u'adj_exp_lof': u'12.0860673274', u'ANN': None, 'aa_length': 316, u'MQ': 60.0, u'qat_af': 0.13699999451637268, u'max_aaf_all': 0.4684999883174896, 'gene': u'NUDT6', u'af_exac_eas': 0.4684999883174896, u'FS': 0.8309999704360962, 'gt_types': <read-only buffer for 0x7f415ddc9f48, size -1, offset 0 at 0x7f415df65470>, 'top_consequence': u'missense_variant', 'geno2mp': False, 'num_hom_ref': 2, 'is_splicing': False, u'rs_ids': u'rs1048201', u'ac_exac_all': 25294, u'syn_z': u'0.388453800877012', u'MLEAC': 2, u'MLEAF': 0.25, u'clinvar_diseases': u'Long_QT_syndrome,', 'polyphen_pred': None, u'gerp': 5.409999847412109, u'ac_exac_eas': 4047, u'phylop_100way': 7.013000011444092, 'type': 'snp', u'mleac': 2, u'mleaf': 0.25, u'epilogos_Enh': 0.0, 'impact_severity': 'MED', u'epilogos_bivflnk': 0.0, u'gnomadWGS_AF_ASJ': 0.1688999980688095, u'HGMD_PHEN': u'"Leukaemia_risk_association_with","Bone_mineral_density_association_with"', u'epilogos_reprpc': 0.0, u'epilogos_Tss': 0.0, u'encode_consensus_huvec': u'T', u'epilogos_tss': 0.0, u'mq': 60.0, 'num_hom_alt': 0, u'phyloP_100way': 7.013000011444092, u'gnomadWGS_MAX': 0.4839000105857849, u'ghs_af': 0.1647000014781952, u'gnomadWGS_AF': 0.1559000015258789, u'pRecessive': u'0.882105896226799', 'end': 123814308, u'gnomadWGS_AF_FIN': 0.18889999389648438, u'primatedl': 0.5042999982833862, 'cpg_island': False, u'epilogos_ZNF': 0.0, u'af_exac_oth': 0.22849999368190765, u'SOR': 0.5929999947547913, u'clippingranksum': 0.6819999814033508, u'gnomadwgs_max': '0.483900010586', u'fs': 0.8309999704360962, u'epilogos_TxWk': 0.8101999759674072, u'epilogos_BivFlnk': 0.0, 'biotype': u'protein_coding', u'ac_exac_sas': 6098, u'pnull': u'0.117385554079724', u'gnomadwgs_af_asj': 0.1688999980688095, u'aaf_1kg_afr_float': 0.041600000113248825, u'an_exac_nfe': 66584, u'n_mis': u'110', u'epilogos_tssbiv': 0.0, u'POSITIVE_TRAIN_SITE': True, 'db': False, u'epilogos_txflnk': 0.0, u'epilogos_EnhG': 0.03319999948143959, 'variant_id': 2, u'an_exac_amr': 11566, u'pLI': u'0.000508549693477292', 'num_unknown': 0, 'codon_change': u'c.626G>A', u'hgmd_rankscore': (0.9300000071525574, 0.9300000071525574), 'is_lof': False, 'ds': False, u'GERP': 5.409999847412109, 'gts': 'S\x0f8C/T\x00C/C\x00C/C\x00C/T', u'an_exac_sas': 16492, 'lcr': False, u'ccr_pct_v1': 0.0, 'exon': u'5/5', u'HGMD_MUT': u'ALT,ALT', u'gnomadWGS_AF_NFE': 0.1673000007867813, u'QAT_AF': 0.13699999451637268, u'hgmd_phen': u'"Leukaemia_risk_association_with","Bone_mineral_density_association_with"', u'epilogos_het': 0.0, u'rmsk': False, u'gnomadwgs_af_fin': 0.18889999389648438, u'epilogos_quies': -0.35339999198913574, 'num_het': 2, u'epilogos_ReprPC': 0.0, 'common_pathogenic': False, 'vcf_id': u'rs1048201', u'af_exac_all': 0.20890000462532043, u'ac_exac_amr': 1699, u'epilogos_ReprPCWk': 0.0, 'gt_quals': <read-only buffer for 0x7f415dddf068, size -1, offset 0 at 0x7f415ddf4d30>, 'gt_alt_depths': <read-only buffer for 0x7f415dddf030, size -1, offset 0 at 0x7f415ddf4cf0>, u'aaf_1kg_eas_float': 0.4553999900817871, u'n_lof': u'7', u'culprit': u'MQ', u'gnomadwgs_af_oth': 0.193900004029274, u'encode_consensus_gm12878': u'R', 'polyphen_score': None, 'is_canonical': False, 'ref': u'C', 'sift_pred': None, u'ClippingRankSum': 0.6819999814033508, u'lof_z': u'1.44318101523755', u'af_exac_afr': 0.05990000069141388, 'sub_type': 'ts', u'pli': u'0.000508549693477292', u'gnomadwgs_af_afr': 0.05820000171661377, u'mis_z': u'-0.118145226686434', u'in_1kg': True, 'filter': None, u'adj_exp_syn': u'51.3983717314', 'sift_score': None, u'ac_exac_nfe': 11320, u'gnomadwgs_af_amr': 0.15160000324249268, u'sor': 0.5929999947547913, u'cadd': u'35', u'cosmic_ids': u'COSM149734,COSM1131279', u'an_exac_eas': 8638, u'encode_consensus_hepg2': u'R', 'start': 123814307, u'epilogos_TssAFlnk': 0.0, u'an_exac_afr': 10308, u'MQRankSum': 0.35199999809265137, u'epilogos_Het': 0.0, u'epilogos_txwk': 0.8101999759674072, u'pNull': u'0.117385554079724', u'HGMD_RANKSCORE': (0.9300000071525574, 0.9300000071525574), u'epilogos_enhbiv': 0.0, 'qual': 1297.9000244140625, u'hgmd_class': u'DP,DFP', u'aaf_1kg_sas_float': 0.35690000653266907, 'aaf': 0.25, u'epilogos_enh': 0.0, u'pfam_domain': u'NUDIX', u'gnomad_af': 0.21060000360012054, u'hgmd_mut': u'ALT,ALT', u'ac': 2, u'BaseQRankSum': 4.699999809265137, u'af': 0.25, u'ann': None, u'an': 8, u'encode_consensus_h1hesc': u'T', u'CADD': u'35', 'cse_hiseq': False, u'DP': 150, u'ac_exac_afr': 617, u'positive_train_site': True, u'precessive': u'0.882105896226799', u'epilogos_Tx': 0.7699999809265137, 'so': u'missense_variant', u'gnomadWGS_AF_EAS': 0.4839000105857849, u'AC': 2, u'baseqranksum': 4.699999809265137, 'is_coding': True, u'AF': 0.25, u'ClinVar_Diseases': u'Long_QT_syndrome,', u'epilogos_reprpcwk': 0.0, u'AN': 8, u'epilogos_TxFlnk': 0.0, 'transcript': u'ENST00000304430', u'aaf_1kg_eur_float': 0.17790000140666962, u'ReadPosRankSum': 1.2400000095367432, u'in_exac': True, 'aa_change': u'p.Arg209Gln', u'gnomadWGS_AF_OTH': 0.193900004029274, u'adj_exp_mis': u'107.540804761', u'QD': 16.219999313354492, 'impact_so': u'missense_variant'}]]