diegozea / MIToS.jl

A Julia package to analyze protein sequences, structures, and evolutionary information
https://diegozea.github.io/MIToS.jl/stable/
Other
74 stars 18 forks source link

IO performance #46

Open diegozea opened 5 years ago

diegozea commented 5 years ago

The updated mitos-benchmarks/MIToS benchmark shows small regression in read/write of MSA for Julia 1.0.2 and MIToS 2.3.1. Maybe, it's related to the use of length(::String): https://discourse.julialang.org/t/performance-of-length-string/12672 Also, @profile shows that most of the time is spent in cache inefficient access to the big MSA matrix: setindex! in _convert_to_matrix_residues. Maybe https://github.com/JuliaArrays/TiledIteration.jl or using 8 bits instead of 32/64 bits for Residue can help to avoid cache related problems.

using MIToS.MSA, Profile, ProfileView
@profile read("../data/PF00089.sth", Stockholm, MultipleSequenceAlignment, deletefullgaps=false)

image

322 ./task.jl:259; (::getfield(Revise, Symbol("##58#60")){REPL.REPLBackend})()
 322 /home/elin/.julia/packages/Revise/gStbk/src/Revise.jl:771; run_backend(::REPL.REPLBackend)
  322 ...rker/package_linux64/build/usr/share/julia/stdlib/v1.0/REPL/src/REPL.jl:85; eval_user_input(::Any, ::REPL.REPLBackend)
   322 ./boot.jl:319; eval(::Module, ::Any)
    322 ...ckage_linux64/build/usr/share/julia/stdlib/v1.0/Profile/src/Profile.jl:25; top-level scope
     322 ./none:0; #read
      322 /home/elin/.julia/packages/MIToS/fEKlk/src/Utils/Read.jl:109; #read#10(::Base.Iterators.Pairs{Symbol,Bool,Tuple{Symbol},NamedTuple{(:deletefullgaps,),Tuple{Bool}}}, ::...
       322 ./none:0; (::getfield(MIToS.Utils, Symbol("#kw##_read")))(::NamedTuple{(:deletefullgaps,),Tuple{Bool}}, ::typeof(MIT...
        322 /home/elin/.julia/packages/MIToS/fEKlk/src/Utils/Read.jl:79; #_read#9(::Base.Iterators.Pairs{Symbol,Bool,Tuple{Symbol},NamedTuple{(:deletefullgaps,),Tuple{Bool}}}, ::...
         322 ./none:0; (::getfield(Base, Symbol("#kw##parse")))(::NamedTuple{(:deletefullgaps,),Tuple{Bool}}, ::typeof(parse), :...
          322 /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Stockholm.jl:113; #parse#27
           322 ./none:0; #parse
            86  /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Stockholm.jl:101; #parse#26(::Bool, ::Function, ::IOStream, ::Type{Stockholm}, ::Type{NamedArrays.NamedArray{Residue,2,A...
             54 /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Stockholm.jl:75; _pre_readstockholm_sequences(::IOStream)
              1  /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Stockholm.jl:4; _fill_with_sequence_line!
               1 ./strings/util.jl:25; startswith(::String, ::Char)
              48 /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Stockholm.jl:5; _fill_with_sequence_line!
               48 /home/elin/.julia/packages/MIToS/fEKlk/src/Utils/GeneralUtils.jl:36; get_n_words(::String, ::Int64)
                22 ./strings/string.jl:250; getindex(::String, ::UnitRange{Int64})
                 22 ./strings/string.jl:60; _string_n
                  1 ./essentials.jl:355; cconvert
                   1 ./number.jl:7; convert
                    1 ./boot.jl:722; Type
                     1 ./boot.jl:692; toUInt64
                      1 ./boot.jl:581; check_top_bit
                       1 ./boot.jl:571; is_top_bit_set
                26 ./strings/string.jl:253; getindex(::String, ::UnitRange{Int64})
                 1  ./pointer.jl:118; unsafe_store!
                 1  ./range.jl:575; iterate
                  1 ./promotion.jl:425; ==
                 15 ./strings/string.jl:87; codeunit
                  15 ./strings/basic.jl:193; checkbounds
                   9 ./strings/basic.jl:185; checkbounds
                    8 ./int.jl:428; <=
              3  /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Stockholm.jl:7; _fill_with_sequence_line!
               3 /home/elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_set.jl:26; in
                3 .../elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_dict.jl:367; haskey
                 2 ...elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_dict.jl:232; ht_keyindex(::OrderedCollections.OrderedDict{String,Nothing}, ::String, ::Bool)
                  2 ./array.jl:731; getindex
                 1 ...elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_dict.jl:234; ht_keyindex(::OrderedCollections.OrderedDict{String,Nothing}, ::String, ::Bool)
              1  /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Stockholm.jl:14; _fill_with_sequence_line!
               1 ./array.jl:856; push!
                1 ./array.jl:814; _growend!
              1  /home/elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_set.jl:28; _fill_with_sequence_line!
               1 .../elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_dict.jl:307; setindex!(::OrderedCollections.OrderedDict{String,Nothing}, ::Nothing, ::String)
                1 .../elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_dict.jl:276; _setindex!(::OrderedCollections.OrderedDict{String,Nothing}, ::Nothing, ::String, ::Int64)
             1  /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Stockholm.jl:76; _pre_readstockholm_sequences(::IOStream)
              1 ./strings/util.jl:53; startswith
             31 /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Stockholm.jl:77; _pre_readstockholm_sequences(::IOStream)
              31 ./io.jl:881; iterate(::Base.EachLine{IOStream}, ::Nothing)
               31 ./none:0; #readline
                31 ./iostream.jl:433; #readline#296
            236 /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Stockholm.jl:102; #parse#26(::Bool, ::Function, ::IOStream, ::Type{Stockholm}, ::Type{NamedArrays.NamedArray{Residue,2,A...
             234 ...e/elin/.julia/packages/MIToS/fEKlk/src/MSA/GeneralParserMethods.jl:132; _generate_named_array(::Array{String,1}, ::OrderedCollections.OrderedSet{String})
              7   /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Residues.jl:177; _convert_to_matrix_residues(::Array{String,1}, ::Tuple{Int64,Int64})
              23  /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Residues.jl:249; _convert_to_matrix_residues(::Array{String,1}, ::Tuple{Int64,Int64})
               23 ./boot.jl:409; Type
                23 ./boot.jl:396; Type
              3   /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Residues.jl:251; _convert_to_matrix_residues(::Array{String,1}, ::Tuple{Int64,Int64})
               3 ./iterators.jl:138; iterate
                3 ./iterators.jl:139; iterate
                 3 ./strings/string.jl:176; iterate
                  3 ./strings/string.jl:176; iterate
                   3 ./operators.jl:286; >
                    3 ./int.jl:49; <
              201 /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Residues.jl:252; _convert_to_matrix_residues(::Array{String,1}, ::Tuple{Int64,Int64})
               173 ./array.jl:771; setindex!
                5 /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Residues.jl:176; convert
                 5 ./char.jl:50; Type
                  5 ./char.jl:53; codepoint
                   5 ./char.jl:114; Type
                9 /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/Residues.jl:178; convert
                 9 ./array.jl:731; getindex
               23  ./iterators.jl:139; iterate
                1  ./strings/string.jl:176; iterate
                9  ./strings/string.jl:178; iterate
                 9 ./int.jl:450; <<
                  9 ./int.jl:443; <<
                13 ./strings/string.jl:179; iterate
                 13 ./strings/string.jl:17; between
                  13 ./int.jl:429; <=
             2   ...e/elin/.julia/packages/MIToS/fEKlk/src/MSA/GeneralParserMethods.jl:133; _generate_named_array(::Array{String,1}, ::OrderedCollections.OrderedSet{String})
              2 /home/elin/.julia/packages/MIToS/fEKlk/src/MSA/GeneralParserMethods.jl:116; _ids_ordered_dict(::OrderedCollections.OrderedSet{String}, ::Int64)
               1 .../elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_dict.jl:301; setindex!(::OrderedCollections.OrderedDict{String,Int64}, ::Int64, ::String)
                1 .../elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_dict.jl:253; ht_keyindex2(::OrderedCollections.OrderedDict{String,Int64}, ::String)
                 1 .../elin/.julia/packages/OrderedCollections/Pr9Pa/src/dict_support.jl:6; hashindex
                  1 ./hashing.jl:18; hash
                   1 ./hashing2.jl:179; hash
               1 .../elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_dict.jl:307; setindex!(::OrderedCollections.OrderedDict{String,Int64}, ::Int64, ::String)
                1 .../elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_dict.jl:290; _setindex!(::OrderedCollections.OrderedDict{String,Int64}, ::Int64, ::String, ::Int64)
                 1 ...elin/.julia/packages/OrderedCollections/Pr9Pa/src/ordered_dict.jl:183; rehash!(::OrderedCollections.OrderedDict{String,Int64}, ::Int64)
                  1 .../elin/.julia/packages/OrderedCollections/Pr9Pa/src/dict_support.jl:6; hashindex
                   1 ./hashing.jl:18; hash
                    1 ./hashing2.jl:179; hash