topepo / caret

caret (Classification And Regression Training) R package that contains misc functions for training and plotting classification and regression models
http://topepo.github.io/caret/index.html
1.62k stars 632 forks source link

Changing activation function in dnn #1202

Open yPennylane opened 3 years ago

yPennylane commented 3 years ago

I would like to change the activation function activationfun in deepnet (method = "dnn"). Could you implement this, please?

yPennylane commented 3 years ago

I tried to customize the model as follows:

dnn_custom <- list(label = "customized Stacked AutoEncoder Deep Neural Network",
                  library = "deepnet",
                  loop = NULL,
                  type = c("Classification", "Regression"),
                  parameters = data.frame(parameter = c("layer1", "layer2", "layer3", "hidden_dropout", "visible_dropout", "learningrate"),
                                          class = rep("numeric", 6),
                                          label = c("Hidden Layer 1", "Hidden Layer 2", "Hidden Layer 3", 
                                                    "Hidden Dropouts", "Visible Dropout", "Learning rate")),
                  grid = function(x, y, len = NULL, search = "grid") {
                    if(search == "grid") {
                      out <- expand.grid(layer1 = 1:len, layer2 = 0:(len -1), layer3 = 0:(len -1),
                                         hidden_dropout = seq(0, .7, length = len), 
                                         visible_dropout = seq(0, .7, length = len),
                                         learningrate = seq(0.0001,1, length = len))
                    } else {
                      out <- data.frame(layer1 = sample(2:20, replace = TRUE, size = len),
                                        layer2 = sample(2:20, replace = TRUE, size = len),
                                        layer3 = sample(2:20, replace = TRUE, size = len),
                                        hidden_dropout = runif(len, min = 0, max = .7),
                                        visible_dropout = runif(len, min = 0, max = .7),
                                        learningrate = runif(len, min = 0.0001, max = 1))
                    }
                    out
                  },
                  fit = function(x, y, wts, param, lev, last, classProbs, ...) {
                    if(!is.matrix(x)) x <- as.matrix(x)
                    is_class <- is.factor(y)
                    if (is_class) y <- caret:::class2ind(y)
                    layers <- c(param$layer1, param$layer2, param$layer3)
                    layers <- layers[layers > 0]
                    mod <- deepnet::sae.dnn.train(x, y, hidden = layers, activationfun = "linear",    # change activation function here
                                           output = if(is_class) "sigm" else "linear",
                                           hidden_dropout = param$hidden_dropout,
                                           visible_dropout = param$visible_dropout,
                                           learningrate = param$learningrate,
                                           ...)
                  },
                  predict = function(modelFit, newdata, submodels = NULL) {
                    pred <- deepnet::nn.predict(modelFit, as.matrix(newdata))
                    if(ncol(pred) > 1)
                      pred <- modelFit$obsLevels[apply(pred, 1, which.max)]
                    pred
                  },
                  prob = function(modelFit, newdata, submodels = NULL) {
                    out <- exp(deepnet::nn.predict(modelFit, as.matrix(newdata)))
                    out <- apply(out, 1, function(x) x/sum(x))
                    t(out)
                  },
                  predictors = function(x, ...) {
                    NULL
                  },
                  varImp = NULL,
                  levels = function(x) x$classes,
                  tags = c("Neural Network"),
                  sort = function(x) x[order(x[,1]),])

I also added the parameter learningrate for tuning. Unfortunately the model was stopping with this message: Something is wrong; all the RMSE metric values are missing: Error: Stopping

But the model seemed to compute using the linear activation function. Did I include the activation function in the right way?