diegozea / MIToS.jl

A Julia package to analyze protein sequences, structures, and evolutionary information
https://diegozea.github.io/MIToS.jl/stable/
Other
75 stars 18 forks source link

Using a limit distance for speed contact calculation between residues #2

Closed diegozea closed 9 years ago

diegozea commented 9 years ago

Calculate CA distance between two residues A and B and contrast against the maximum distance between CA for a pair of residues (A, B) for a limit contact distance

maximum distances ~ len(A) + len(B) + limit + 2 * van der Waals radius + error

Using FAUJ880104 from AAindex for getting len(A) and len(B). len is not taking into account post-translational modifications.

Carbon van der Waals radius is 1.7

Error should be large enough for not missing real contacts, but low enough for speed the calculation avoiding the calculation of not useful distances.

diegozea commented 9 years ago

The improvement on speed (~14%) is not enough for justify the possible lost of accuracy for modified residues o low upper limit values:

"""Heavy, All, CA, CB (CA for GLY)
2*1.7 + 2*7.82 + limit + 10 (error)"""
function contact(a::PDBResidue, b::PDBResidue, limit::FloatingPoint, upperlimit::FloatingPoint; criteria::ASCIIString="All")
  if criteria == "All"
    Na = length(a)
    Nb = length(b)
    @inbounds for i in 1:Na
      for j in 1:Nb
        dist = distance(a.atoms[i], b.atoms[j])
        if dist <= limit
          return(true)
        elseif dist > upperlimit
          return(false)
        end
      end
    end
  elseif criteria == "Heavy"
    indices_a = findheavy(a)
    indices_b = findheavy(b)
    if length(indices_a) != 0 && length(indices_b) != 0
      for i in indices_a
        for j in indices_b
          dist = distance(a.atoms[i], b.atoms[j])
          if dist <= limit
            return(true)
          elseif dist > upperlimit
            return(false)
          end
        end
      end
    end
  elseif criteria == "CA"
    indices_a = findCA(a)
    indices_b = findCA(b)
    if length(indices_a) != 0 && length(indices_b) != 0
      for i in indices_a
        for j in indices_b
          dist = distance(a.atoms[i], b.atoms[j])
          if dist <= limit
            return(true)
          elseif dist > upperlimit
            return(false)
          end
        end
      end
    end
  elseif criteria == "CB"
    indices_a = findCB(a)
    indices_b = findCB(b)
    if length(indices_a) != 0 && length(indices_b) != 0
      for i in indices_a
        for j in indices_b
          dist = distance(a.atoms[i], b.atoms[j])
          if dist <= limit
            return(true)
          elseif dist > upperlimit
            return(false)
          end
        end
      end
    end
  end
  false
end
julia> @time mean( [ @elapsed contacts(reslist, 6.0, criteria="All") for i in 1:100 ] )
elapsed time: 2.799415613 seconds (156032896 bytes allocated, 2.07% gc time)
0.027994017810000004

julia> @time mean( [ @elapsed contacts(reslist, 6.0, 40.0, criteria="All") for i in 1:100 ] )
elapsed time: 2.470455596 seconds (187239296 bytes allocated, 3.31% gc time)
0.02470445565

julia> @time mean( [ @elapsed contacts(reslist, 6.0, 30.0, criteria="All") for i in 1:100 ] )
elapsed time: 2.029824843 seconds (187239296 bytes allocated, 3.79% gc time)
0.02029811525