here's what we ended up doing to export geo for projectid 20 (it had 3500 seq files and was taking 4ever to export otherwise ...)
# drop into docker container running cLIMS-web (not DB or ngix)
docker exec -it [container-id] bash
# run interactive python shell that is django aware ...
python manage.py shell
# inside of that thing do:
exec(open("johan.py").read())
where johan.py is
from openpyxl.reader.excel import load_workbook
from organization.excelRow import insert_rows
from organization.models import *
from cLIMS.base import *
from dryLab.models import *
from wetLab.models import *
projectId = 20
prj = Project.objects.get(pk=projectId)
runUnits = SequencingRun.objects.filter(project=projectId)
files = SeqencingFile.objects.filter(sequencingFile_exp__project=projectId).order_by('pk')
experiments = Experiment.objects.filter(project=projectId)
bioSample = Biosample.objects.filter(expBio__project=projectId)
title = prj.project_name
summary = prj.project_notes
contributor1 = str(prj.project_owner)
contributor2 = prj.project_contributor.all()
membersList = [values for values in contributor2]
file_path_new = ROOTFOLDER+'/organization/static/siteWide/geo_template_new.xlsx'
wb = load_workbook(file_path_new)
ws = wb.worksheets[0]
ws.insert_rows = insert_rows
ws.cell(row=9, column=2).value = title
ws.cell(row=10, column=2).value = summary
ws.cell(row=12, column=2).value = contributor1
memberRowNo = 13
for members in membersList:
insert_rows(ws, row_idx= memberRowNo, cnt = 1, above=True, copy_style=True)
ws.cell(row=memberRowNo, column=1).value = "contributor"
ws.cell(row=memberRowNo, column=2).value = str(members)
memberRowNo +=1
rowNo = ws.max_row
for i in range (rowNo):
if((ws.cell(row=i+1, column=1).value)=="Sample name"):
sampleRowNo = i+2
break
count = 1
for exp in experiments:
insert_rows(ws, row_idx= sampleRowNo, cnt = 1, above=False, copy_style=False)
ws.cell(row=sampleRowNo, column=1).value = "Sample " + str(count)
ws.cell(row=sampleRowNo, column=2).value = str(exp.experiment_name)
ws.cell(row=sampleRowNo, column=3).value = str(exp.experiment_biosample.biosample_biosource.biosource_tissue)
ws.cell(row=sampleRowNo, column=4).value = str(exp.experiment_biosample.biosample_individual.individual_type)
if(type(exp.experiment_enzyme)!=type(None)):
ws.cell(row=sampleRowNo, column=6).value = str(exp.experiment_enzyme.enzyme_name)
ws.cell(row=sampleRowNo, column=7).value = str(exp.experiment_biosample.biosample_biosource.biosource_cell_line)
ws.cell(row=sampleRowNo, column=8).value = str("DNA")
ws.cell(row=sampleRowNo, column=9).value = str(exp.experiment_biosample.biosample_biosource.biosource_description)
expFiles=SeqencingFile.objects.filter(sequencingFile_exp=exp)
colC=11
for f in expFiles:
ws.cell(row=sampleRowNo, column=colC).value = str(f.sequencingFile_name)
colC+=1
sampleRowNo += 1
count += 1
rowNo = ws.max_row
for i in range (rowNo):
if((ws.cell(row=i+1, column=1).value)=="RAW FILES"):
rawFilesRowNo = i+3
break
# this is taking FOREVER !!!
for iii, file in enumerate(files):
print("working on file number " + str(iii))
insert_rows(ws, row_idx= rawFilesRowNo, cnt = 1, above=True, copy_style=False)
ws.cell(row=rawFilesRowNo, column=1).value = str(file.sequencingFile_name)
ws.cell(row=rawFilesRowNo, column=2).value = str(file.file_format)
ws.cell(row=rawFilesRowNo, column=3).value = str(file.sequencingFile_md5sum)
if(type(file.sequencingFile_run.run_sequencing_instrument)!=type(None)):
ws.cell(row=rawFilesRowNo, column=4).value = str(file.sequencingFile_run.run_sequencing_instrument.choice_name)
ws.cell(row=rawFilesRowNo, column=5).value = str(file.read_length)
ws.cell(row=rawFilesRowNo, column=6).value = "paired-end"
ws.cell(row=rawFilesRowNo, column=7).value = str(file.sequencingFile_mainPath)
sampleRowNo += 1
rawFilesRowNo += 1
#Find last row with data
for row in reversed(list(ws.rows)):
valueData = [cell.value for cell in row]
if any(valueData):
pairedEndRowNo=int(row[0].row)+1
break
for file in files:
if(file.related_files):
ws.cell(row=pairedEndRowNo, column=1).value = str(file.related_files.sequencingFile_name)
ws.cell(row=pairedEndRowNo, column=2).value = str(file.sequencingFile_name)
pairedEndRowNo+=1
wb.save("ebany-pizdec.xls")
here's what we ended up doing to export geo for projectid 20 (it had 3500 seq files and was taking 4ever to export otherwise ...)
where johan.py is