queryverse / ExcelFiles.jl

FileIO.jl integration for Excel files
Other
42 stars 11 forks source link

write multiple sheets to the same file #38

Open kafisatz opened 5 years ago

kafisatz commented 5 years ago

I can see that the save command supports a sheetname keyword. However, I understand it is currently not possible to write several sheets into the same Excel file, right? This would be a nice addition in my view.

kafisatz commented 5 years ago

@davidanthoff Notably this was actually fairly easy to achieve with pycall (package 'xlsxwriter'). I do not know if my code is worth anything for ExcelFiles, but I paste it here anyways

module ExcelWriter

using DataFrames 
using PyCall

global const pyModnumpy = PyCall.PyNULL()
global const pyModPandas = PyCall.PyNULL()
global const pyModxlsxwriter = PyCall.PyNULL()

function __init__()
    #the following lines may trigger the installation of the respective python packages 
    copy!(pyModnumpy, PyCall.pyimport_conda("numpy","numpy"))
    copy!(pyModPandas, PyCall.pyimport_conda("pandas","pandas"))
    copy!(pyModxlsxwriter, PyCall.pyimport_conda("xlsxwriter","xlsxwriter"))    
end

export ExcelSheet 
mutable struct ExcelSheet
    name::String
    data::DataFrame
end

export ExcelData 
mutable struct ExcelData
    sheets::Array{ExcelSheet,1}
    #charts::Array{Chart,1}
    function ExcelData()
        #return new(Array{ExcelSheet}(undef,0),Array{Chart}(undef,0))
        return new(Array{ExcelSheet}(undef,0))
    end
    function ExcelData(a)
        return  new(a)
    end
    #function ExcelData(a,b)
    #    return  new(a,b)
    #end
end

function create_custom_dict(df::DataFrame)  
    header=names(df)
    d=Dict{AbstractString,Array{Any,1}}()
    for i=1:length(header)          
        d[string(header[i])]=df[!,i]
    end
    return d
end

export writeToExcel 
function writeToExcel(excelData::ExcelData,xlFile::T,write_header::Bool,write_index::Bool) where {T <: AbstractString}
    #writing an Excel file seems very slow if the file already exists!
    isfile(xlFile)&&rm(xlFile)

    writer=writeDFtoExcel(excelData,xlFile,0,0,write_header,write_index)
    workbook = writer.book
    #=
        #Plot charts    
        for c in excelData.charts
            sheetWhereChartIsLocated=c.sheet        
            worksheet = writer.sheets[sheetWhereChartIsLocated]
            addChartToWorkbook!(workbook,worksheet,c.chartDict,c.location);
        end
    =#
    #save (=write) Excel file and close it  
    writer.save()
    println(xlFile)
    return nothing
end

function writeDFtoExcel(excelData::ExcelData,existingFile::T,row::Int,col::Int,write_header::Bool,write_index::Bool) where {T <: AbstractString}
#http://search.cpan.org/~jmcnamara/Excel-Writer-XLSX/lib/Excel/Writer/XLSX.pm
    @assert min(row,col)>=0
    writer=pyModPandas.ExcelWriter(existingFile, engine = "xlsxwriter")

    for xlSheet in excelData.sheets
        df=xlSheet.data
        sheet=xlSheet.name
        #create python dataframe    
            dataDict = create_custom_dict(df)
        pyDF=PyCall.pycall(pyModPandas.DataFrame, PyCall.PyObject, dataDict,columns=names(df))      
        PyCall.pycall(pyDF."to_excel",PyCall.PyAny,writer, header=write_header,index=write_index, sheet_name = sheet,startrow=row, startcol=col, encoding="utf-8")  #index=false suppress the rowcount      
    end
    return writer
end

end # module

using DataFrames
d1=DataFrame(rand(4,4))
d2=DataFrame(rand(4,40))

using Main.ExcelWriter
sh1=ExcelSheet("a",d1)
sh2=ExcelSheet("ab",d2)
xlData=ExcelData()
push!(xlData.sheets,sh1)
push!(xlData.sheets,sh2)
writeToExcel(xlData,"C:\\temp\\tmp.xlsx",false,false)