segmented reduction - Githubissues

I would like to ask, if someone would be willing to write function(s) for segmented reductions. These functions are handy for multiple-instance learning problems, where sample is represented by an unordered set of vectors (each sample can have different number of vectors). It would be nice to have these functions available for use with Flux.

My implementation of the normal Julia code for the forward pass looks like

function segmented_max(x::Matrix{T},bags::Vector) where{T}
  assert(checkbounds(x,bags))
  o = zeros(eltype(x),size(x,1),length(bags))
  fill!(o,typemin(T));
  @inbounds for i in 1:length(bags) #iterate over bags
    for j in bags[i]  #iterate over items (vectors) in bags
      for k in 1:size(x,1)
        if x[k,j]>o[k,i]
          o[k,i]=x[k,j];
        end
      end
    end
  end
  o
end

while for the reverse part it is

    assert(checkbounds(x,bags))
    maxI=zeros(Int,size(x,1),length(bags))
    gx = zeros(x)
    for i in 1:length(bags) #iterate over bags
        bagsize=length(bags[i])
        for j in bags[i]  #iterate over subbags
            for k in 1:size(x,1)
                if x[k,j]>gx[k,i]
                    gx[k,i]=x[k,j];
                    maxI[k,i]=j;
                end
            end
        end
    end

    fill!(gx,0)
    @inbounds for I in CartesianRange(size(Δ))
        if maxI[I]>0 
            gx[I[1],maxI[I]]=Δ[I];    
        end
    end
    gx
end

I have to confess that I have never programmed for Cuda, therefore I am writing here. Thanks for any help.

JuliaGPU / CUDA.jl

segmented reduction #176