dekkerlab / cLIMS-docker

cLIMS repository running on Docker Contaienrs
GNU Lesser General Public License v3.0
3 stars 4 forks source link

manual geo export situation #98

Open sergpolly opened 7 months ago

sergpolly commented 7 months ago

here's what we ended up doing to export geo for projectid 20 (it had 3500 seq files and was taking 4ever to export otherwise ...)

# drop into docker container running cLIMS-web (not DB or ngix)
docker exec -it [container-id] bash
# run interactive python shell that is django aware ...
python manage.py shell

# inside of that thing do:
exec(open("johan.py").read())

where johan.py is

from openpyxl.reader.excel import load_workbook
from organization.excelRow import insert_rows
from organization.models import *
from cLIMS.base import *
from dryLab.models import *
from wetLab.models import *
projectId = 20
prj = Project.objects.get(pk=projectId)
runUnits = SequencingRun.objects.filter(project=projectId)
files = SeqencingFile.objects.filter(sequencingFile_exp__project=projectId).order_by('pk')
experiments = Experiment.objects.filter(project=projectId)
bioSample = Biosample.objects.filter(expBio__project=projectId)
title = prj.project_name
summary = prj.project_notes
contributor1 = str(prj.project_owner)
contributor2 = prj.project_contributor.all()
membersList = [values for values in contributor2]
file_path_new = ROOTFOLDER+'/organization/static/siteWide/geo_template_new.xlsx'
wb = load_workbook(file_path_new)
ws = wb.worksheets[0]
ws.insert_rows = insert_rows
ws.cell(row=9, column=2).value = title
ws.cell(row=10, column=2).value = summary
ws.cell(row=12, column=2).value = contributor1

memberRowNo = 13
for members in membersList:
    insert_rows(ws, row_idx= memberRowNo, cnt = 1, above=True, copy_style=True)
    ws.cell(row=memberRowNo, column=1).value = "contributor"
    ws.cell(row=memberRowNo, column=2).value = str(members)
    memberRowNo +=1

rowNo = ws.max_row
for i in range (rowNo):
    if((ws.cell(row=i+1, column=1).value)=="Sample name"):
        sampleRowNo = i+2
        break
count = 1

for exp in experiments:
    insert_rows(ws, row_idx= sampleRowNo, cnt = 1, above=False, copy_style=False)
    ws.cell(row=sampleRowNo, column=1).value = "Sample " + str(count)
    ws.cell(row=sampleRowNo, column=2).value = str(exp.experiment_name)
    ws.cell(row=sampleRowNo, column=3).value = str(exp.experiment_biosample.biosample_biosource.biosource_tissue)
    ws.cell(row=sampleRowNo, column=4).value = str(exp.experiment_biosample.biosample_individual.individual_type)
    if(type(exp.experiment_enzyme)!=type(None)):
        ws.cell(row=sampleRowNo, column=6).value = str(exp.experiment_enzyme.enzyme_name) 
    ws.cell(row=sampleRowNo, column=7).value = str(exp.experiment_biosample.biosample_biosource.biosource_cell_line)  
    ws.cell(row=sampleRowNo, column=8).value = str("DNA")
    ws.cell(row=sampleRowNo, column=9).value = str(exp.experiment_biosample.biosample_biosource.biosource_description)
    expFiles=SeqencingFile.objects.filter(sequencingFile_exp=exp)
    colC=11
    for f in expFiles:
        ws.cell(row=sampleRowNo, column=colC).value = str(f.sequencingFile_name)
        colC+=1
    sampleRowNo += 1
    count += 1

rowNo = ws.max_row
for i in range (rowNo):
    if((ws.cell(row=i+1, column=1).value)=="RAW FILES"):
        rawFilesRowNo = i+3
        break

# this is taking FOREVER !!!
for iii, file in enumerate(files):
    print("working on file number " + str(iii))
    insert_rows(ws, row_idx= rawFilesRowNo, cnt = 1, above=True, copy_style=False)
    ws.cell(row=rawFilesRowNo, column=1).value = str(file.sequencingFile_name)
    ws.cell(row=rawFilesRowNo, column=2).value = str(file.file_format)
    ws.cell(row=rawFilesRowNo, column=3).value = str(file.sequencingFile_md5sum)
    if(type(file.sequencingFile_run.run_sequencing_instrument)!=type(None)):
        ws.cell(row=rawFilesRowNo, column=4).value = str(file.sequencingFile_run.run_sequencing_instrument.choice_name)
    ws.cell(row=rawFilesRowNo, column=5).value = str(file.read_length)
    ws.cell(row=rawFilesRowNo, column=6).value = "paired-end"
    ws.cell(row=rawFilesRowNo, column=7).value = str(file.sequencingFile_mainPath)
    sampleRowNo += 1 
    rawFilesRowNo += 1

#Find last row with data 
for row in reversed(list(ws.rows)):
    valueData = [cell.value for cell in row]
    if any(valueData):
        pairedEndRowNo=int(row[0].row)+1
        break

for file in files:
    if(file.related_files):
        ws.cell(row=pairedEndRowNo, column=1).value = str(file.related_files.sequencingFile_name)
        ws.cell(row=pairedEndRowNo, column=2).value = str(file.sequencingFile_name)
        pairedEndRowNo+=1

wb.save("ebany-pizdec.xls")