Closed ArtPoon closed 3 years ago
@nodrogluap suggests using this Perl script: https://github.com/nodrogluap/pokay/blob/master/text2ui to generate a JSON file from the database, which we can then transfer to client for pre-processing of lineage information.
I ran the text2ui
perl script and here's what the JSON output looks like.
[
{
"description": "gene_expression_increase",
"url": "http://people.ucalgary.ca/~gordonp/S:V483K.html#gene_expression_increase",
"substitutions":
[
{ "refAA": "V", "queryAA": "K", "codon": 483, "gene": "S" }
],
"deletions":
[
]
}
,
{
"description": "monoclonal_antibody_serial_passage_escape",
"url": "http://people.ucalgary.ca/~gordonp/S:V483K.html#monoclonal_antibody_serial_passage_escape",
"substitutions":
[
{ "refAA": "V", "queryAA": "K", "codon": 483, "gene": "S" }
],
"deletions":
[
]
}
,
{
"description": "gene_expression_increase",
"url": "http://people.ucalgary.ca/~gordonp/S:V503E.html#gene_expression_increase",
"substitutions":
[
{ "refAA": "V", "queryAA": "E", "codon": 503, "gene": "S" }
],
"deletions":
[
]
}
,
{
"description": "gene_expression_increase",
"url": "http://people.ucalgary.ca/~gordonp/S:P384K.html#gene_expression_increase",
"substitutions":
[
{ "refAA": "P", "queryAA": "K", "codon": 384, "gene": "S" }
],
"deletions":
[
]
}
,
{
"description": "monoclonal_antibody_serial_passage_escape",
"url": "http://people.ucalgary.ca/~gordonp/S:P384K.html#monoclonal_antibody_serial_passage_escape",
"substitutions":
[
{ "refAA": "P", "queryAA": "K", "codon": 384, "gene": "S" }
],
"deletions":
[
]
}
Integrate Perl script into pipeline, add acknowledgements
aa:S:D614G
)Ran the text2ui perl script as follows;
perl text2ui VOCs.tab data/ html/ mutation_annotations.json
"aa:S:P9S": "monoclonal_antibody_serial_passage_escape"
https://github.com/nodrogluap/pokay
perl text2ui VOCs.tab data/ html/ mutation_annotations.json
You have to create this directory
)
3. Run the python script below, to change mutation annotations to our preferred format:
#import python libraries
import json
import re
#define path to working directory
path = "./" # path to cloned pokay directory
# create function to reformat charaterized mutations
# new format => aa, gene, refAA, codon, queryAA
def reformatMutations(subt, dels, func):
# define list & dictionary object
# to store data
mut_list = []
mut_dict = {}
# iterate through mutations
for c, i in enumerate(subt):
subs = i.values()
muts = "{}:{}:{}{}{}".format("aa",subs[2], subs[0], subs[3], subs[1])
mut_list.append(muts)
# iterate through deletions if any
for n, j in enumerate(dels):
if dels == "None":
dells = []
else:
dell = j.values()
dells = "{}:{}:{}{}{}".format("aa", dell[1], dell[0], dell[2], "-")
mut_list.append(dells)
mut_dict[func] = mut_list
return mut_dict
# open JSON file
# return JSON object as a dictionary
jfile = open(path + 'mutation_annotations.json',)
data = json.load(jfile)
# output JSON
outfile = open(path + 'mut_annotations.json', 'w')
# iterating through pokay generated mutation annotations
for i in range(0, len(data)):
subs = data[i]["substitutions"]
dels = data[i]["deletions"]
func = data[i]["description"]
#run function
result = reformatMutations(subs, dels, func)
# write result to JSON file
json.dump(result, outfile, indent=4, ensure_ascii=False, separators=(',', ':'))
# closing files
jfile.close()
outfile.close()
# fix JSON file issues, add commas
# and a square brackets at the begin and end of file
with open(path + "mut_annotations.json", "r+") as fyl:
edit_f = fyl.read()
fyl.seek(0)
j_son = '[' + edit_f + ']'
j_son = j_son.replace('}{', '},{')
fyl.write(j_son)
fyl.close
@nodrogluap is maintaining a site: https://people.ucalgary.ca/~gordonp/S-monoclonal_antibody_serial_passage_escape.html tracking the literature on phenotypic characterization of SARS-CoV-2 mutations - I'm wondering if it would be possible to call out to that site API to annotate lineage mutations so that ones with known significance are highlighted in the tooltip.