JuliaML / MLDataUtils.jl

Utility package for generating, loading, splitting, and processing Machine Learning datasets
http://mldatautilsjl.readthedocs.io/
Other
102 stars 20 forks source link

example from readme does not work #19

Closed gdkrmr closed 7 years ago

gdkrmr commented 7 years ago

running the example from the readme:

julia> # Using KFolds in an unsupervised setting
       for (train_X, test_X) in KFolds(X, 10)
           # The subsets are of a special type to delay evaluation
           # until it is really needed
           @assert typeof(train_X) <: DataSubset
           @assert typeof(test_X) <: DataSubset

           # One can use get to access the underlying data that a
           # DataSubset represents.
           @assert typeof(get(train_X)) <: Matrix
           @assert typeof(get(train_X)) <: Matrix
           @assert size(get(train_X)) == (4, 135)
           @assert size(get(test_X)) == (4, 15)
       end
WARNING: sub is deprecated, use view instead.
 in depwarn(::String, ::Symbol) at ./deprecated.jl:64
 in sub(::Array{Int64,1}, ::Vararg{Any,N}) at ./deprecated.jl:30
 in MLDataUtils.KFolds{Array{Float64,2}}(::Array{Float64,2}, ::Int64) at /home/gkraemer/.julia/v0.5/MLDataUtils/src/datasplits/kfolds.jl:148
 in MLDataUtils.KFolds{TFeatures}(::Array{Float64,2}, ::Int64) at /home/gkraemer/.julia/v0.5/MLDataUtils/src/datasplits/kfolds.jl:155
 in anonymous at ./<missing>:?
 in eval(::Module, ::Any) at ./boot.jl:234
 in eval_user_input(::Any, ::Base.REPL.REPLBackend) at ./REPL.jl:64
 in macro expansion at ./REPL.jl:95 [inlined]
 in (::Base.REPL.##3#4{Base.REPL.REPLBackend})() at ./event.jl:68
while loading no file, in expression starting on line 0
ERROR: MethodError: Cannot `convert` an object of type MLDataUtils.DataSubset{Array{Float64,2},SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true}} to an object of type MLDataUtils.DataSubset{Array{Float64,2},SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},1}}
This may have arisen from a call to the constructor MLDataUtils.DataSubset{Array{Float64,2},SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},1}}(...),
since type constructors fall back to convert methods.
 in MLDataUtils.KFolds{Array{Float64,2}}(::Array{Float64,2}, ::Int64) at /home/gkraemer/.julia/v0.5/MLDataUtils/src/datasplits/kfolds.jl:148
 in MLDataUtils.KFolds{TFeatures}(::Array{Float64,2}, ::Int64) at /home/gkraemer/.julia/v0.5/MLDataUtils/src/datasplits/kfolds.jl:155
 in anonymous at ./<missing>:?

julia> # Using KFolds in a supervised setting
       for ((train_X, train_y), (test_X, test_y)) in KFolds(X, y, 10)
           # Same as above
           @assert typeof(train_X) <: DataSubset
           @assert typeof(train_y) <: DataSubset

           # The real power is in combination with DataIterators.
           # Not only is the actual data-splitting delayed, it is
           # also the case that only as much storage is allocated as
           # is needed to hold the mini batches.
           # The actual code that is executed here can be specially
           # tailored to your custom datatype, thus giving 3rd party
           # ML packages full control over the pattern.
           for (batch_X, batch_y) in MiniBatches(train_X, train_y, size=10)
               # ... train supervised model here
           end
       end
WARNING: sub is deprecated, use view instead.
 in depwarn(::String, ::Symbol) at ./deprecated.jl:64
 in sub(::Array{Int64,1}, ::Vararg{Any,N}) at ./deprecated.jl:30
 in MLDataUtils.LabeledKFolds{Array{Float64,2},Array{String,1}}(::Array{Float64,2}, ::Array{String,1}, ::Int64) at /home/gkraemer/.julia/v0.5/MLDataUtils/src/datasplits/kfolds.jl:187
 in MLDataUtils.KFolds{TFeatures}(::Array{Float64,2}, ::Array{String,1}, ::Int64) at /home/gkraemer/.julia/v0.5/MLDataUtils/src/datasplits/kfolds.jl:198
 in anonymous at ./<missing>:?
 in eval(::Module, ::Any) at ./boot.jl:234
 in eval_user_input(::Any, ::Base.REPL.REPLBackend) at ./REPL.jl:64
 in macro expansion at ./REPL.jl:95 [inlined]
 in (::Base.REPL.##3#4{Base.REPL.REPLBackend})() at ./event.jl:68
while loading no file, in expression starting on line 0
ERROR: MethodError: Cannot `convert` an object of type MLDataUtils.DataSubset{Array{Float64,2},SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true}} to an object of type MLDataUtils.DataSubset{Array{Float64,2},SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},1}}
This may have arisen from a call to the constructor MLDataUtils.DataSubset{Array{Float64,2},SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},1}}(...),
since type constructors fall back to convert methods.
 in MLDataUtils.LabeledKFolds{Array{Float64,2},Array{String,1}}(::Array{Float64,2}, ::Array{String,1}, ::Int64) at /home/gkraemer/.julia/v0.5/MLDataUtils/src/datasplits/kfolds.jl:188
 in MLDataUtils.KFolds{TFeatures}(::Array{Float64,2}, ::Array{String,1}, ::Int64) at /home/gkraemer/.julia/v0.5/MLDataUtils/src/datasplits/kfolds.jl:198
 in anonymous at ./<missing>:?

julia> # LOOFolds is a shortcut for setting k = nobs(X)
       for (train_X, test_X) in LOOFolds(X)
           @assert size(get(test_X)) == (4, 1)
       end
ERROR: MethodError: Cannot `convert` an object of type MLDataUtils.DataSubset{Array{Float64,2},SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},true}} to an object of type MLDataUtils.DataSubset{Array{Float64,2},SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},1}}
This may have arisen from a call to the constructor MLDataUtils.DataSubset{Array{Float64,2},SubArray{Int64,1,Array{Int64,1},Tuple{UnitRange{Int64}},1}}(...),
since type constructors fall back to convert methods.
 in MLDataUtils.KFolds{Array{Float64,2}}(::Array{Float64,2}, ::Int64) at /home/gkraemer/.julia/v0.5/MLDataUtils/src/datasplits/kfolds.jl:148
 in LOOFolds(::Array{Float64,2}) at /home/gkraemer/.julia/v0.5/MLDataUtils/src/datasplits/kfolds.jl:157
 in anonymous at ./<missing>:?

julia> 
Evizero commented 7 years ago

Hi. Sorry for the inconvenience. The back-end is completely rewritten on the dev branch, which we will merge soon hopefully.

Evizero commented 7 years ago

readme updated. New version is begin tagged right now (may take a few days to get into METADATA).