stefanjwojcik / mm2020

5 stars 0 forks source link

Train all MLJ models #2

Open azev77 opened 4 years ago

azev77 commented 4 years ago

Hi @stefanjwojcik & thanks for posting. I have code to automatically train every MLJ model: https://discourse.julialang.org/t/custom-xgboost-loss-function-w-zygote-julia-computing-blog-post/35811/13?u=tlienart

Do you think you can take it for a spin on your dataset?

stefanjwojcik commented 4 years ago

Hey Albert! Cool, sounds super interesting. I'll definitely give it a go and let you know what happens!

stefanjwojcik commented 4 years ago

It looks like that code is for regression problems. What changes would be required to make it work for classification?

azev77 commented 4 years ago

models(matching(X,y)) will recognize if it's a classification problem & choose models accordingly. However, if you're on a mac there are a few classification models that might crash.

###  CLASSIFICATION  ###

################################################################################
#Crabs 27 models of 42 work.
################################################################################
X, y = @load_crabs;
X = AZ(X)
train, test = partition(eachindex(y), .7, rng=333);
#
#@time m_match = models(matching(X, y), x -> x.prediction_type == :deterministic)
#@time m_match = models(matching(X, y), x -> x.prediction_type == :probabilistic)
@time m_match = models(matching(X, y))

dropm = ["NaiveBayes"]; #= dropm = ["ConstantRegressor", "HuberRegressor"]=#
filter!(m -> m.package_name ∉ dropm , m_match)
@time m_names = load_m(m_match);

#@time sc = [train_m(m, X, y, train, test, predict_mode, accuracy) for m in m_names]
sc = 0.0*ones( size(m_names, 1), 2 )
for (i,m) in enumerate(m_names)
    sc[i,:] .= try
        train_m(m, X, y, train, test, predict_mode, accuracy)
    catch
        0,0
    end
end
sc
#@time sc =hcat(sc...)';
showtable( hcat(
    m_names[sortperm(sc[:,1], rev=true)] ,
    sc[sortperm(sc[:,1], rev=true), :]
    ) )
#
#

################################################################################
#Iris AZ: 26/42 models work.
################################################################################
iris = dataset("datasets", "iris");
y, X = unpack(iris, ==(:Species), colname -> true)
train, test = partition(eachindex(y), .7, rng=333);

m_match = models(matching(X, y));
dropm = ["NaiveBayes"]; #= dropm = ["ConstantRegressor", "HuberRegressor"]=#
filter!(m -> m.package_name ∉ dropm , m_match)
@time m_names = load_m(m_match);
sc = 0.0*ones( size(m_names, 1), 2 )
for (i,m) in enumerate(m_names)
    sc[i,:] .= try
        train_m(m, X, y, train, test, predict_mode, accuracy)
    catch
        0,0
    end
end
sc
#@time sc =hcat(sc...)';
@time showtable( hcat(
    m_names[sortperm(sc[:,1], rev=true)] ,
    sc[sortperm(sc[:,1], rev=true), :]
    ) )
#

################################################################################
#Pima   AZ NOT Full Data!: 27/42 models work.
#GaussianProcessClassifier  caused CRASH
#Intel MKL ERROR: Parameter 6 was incorrect on entry to DLASWP.
#First time MKL
#2nd time: ** On entry to DLASWP, parameter number  6 had an illegal value
################################################################################
Pima = dataset("MASS", "Pima.te"); #332 by 8
#Pima = dataset("MASS", "Pima.tr"); #200 by 8
#Pima = dataset("MASS", "Pima.tr2"); #300 by 8
y, X = unpack(Pima, ==(:Type), colname -> true);
train, test = partition(eachindex(y), .7, rng=333);
X = coerce(X, autotype(X, :discrete_to_continuous)); #ScientificTypes

m_match = models(matching(X, y));
dropm = ["NaiveBayes"]; #= dropm = ["ConstantRegressor", "HuberRegressor"]=#
filter!(m -> m.package_name ∉ dropm , m_match)

m_names = load_m(m_match);
dropm = ["GaussianProcessClassifier"];
filter!(m -> m ∉ dropm, m_names)

sc = 0.0*ones( size(m_names, 1), 2 )
for (i,m) in enumerate(m_names)
    sc[i,:] .= try
        train_m(m, X, y, train, test, predict_mode, accuracy)
    catch
        0,0
    end
end
sc
#@time sc =hcat(sc...)';
showtable( hcat(
    m_names[sortperm(sc[:,1], rev=true)] ,
    sc[sortperm(sc[:,1], rev=true), :]
    ) )
#
stefanjwojcik commented 4 years ago

Awesome, I'll give it a go. Thanks!

azev77 commented 4 years ago

Btw, you may also find this discussion useful: https://discourse.julialang.org/t/custom-xgboost-loss-function-w-zygote-julia-computing-blog-post/35811?u=albert_zevelev

azev77 commented 4 years ago

@stefanjwojcik any luck?