sahilseth / RvsPython

A repo comparing syntax in R and Python for various tasks. Not comprehensive, but a subset of lines to get one started
12 stars 5 forks source link

Can you help me convert R to Python #1

Open zeromtmu opened 8 years ago

zeromtmu commented 8 years ago

I'm a newbie in python programming and never using R before.

I try to understand R code and converting to Python

rm2 <- function(y, x, ... ){
if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x))))))) } else { return(R2(y,(lm(y ~ x)))) } } rm2.reverse <- function(y, x, ... ){ return(R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) } average.rm2 <- function(y, x, ... ){ if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(((R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x))))))+ R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))))/2) } else { return(((R2(y,(lm(y ~ x)))) + (R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) )/2) } } delta.rm2 <- function(y, x, ... ){ if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(abs((R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x)))))) - R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))))) } else { return(abs((R2(y,(lm(y ~ x)))) - (R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) )) } }

Please help

sahilseth commented 8 years ago

Hi, sure what is the code trying to do. If you could give some background.

zeromtmu commented 8 years ago

Thanks for answering here the code

via CloudMagic for Mac [https://cloudmagic.com/k/d/mailapp?ct=dx&cv=7.12.46&pv=10.11.5&source=email_footer_2] On Thu, Jun 30, 2559 at 07:32, sseth notifications@github.com wrote: Hi, sure what is the code trying to do. If you could give some background.

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub [https://github.com/sahilseth/RvsPython/issues/1#issuecomment-229528825] , or mute the thread [https://github.com/notifications/unsubscribe/ALb29DnweyJ5IFExZh2dl87KGKzMa7xlks5qQw6rgaJpZM4I9qtg] . randomForest_training <- function(x){ library(randomForest)

library(parallel) library(doSNOW) cl <- makeCluster(8) registerDoSNOW(cl)

R2 <- function(y, equation, ... ){ 1 - (sum((y-predict(equation))^2)/sum((y-mean(y))^2)) } rm2 <- function(y, x, ... ){
if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x))))))) } else { return(R2(y,(lm(y ~ x)))) } } rm2.reverse <- function(y, x, ... ){ return(R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) } average.rm2 <- function(y, x, ... ){ if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(((R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x))))))+ R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))))/2) } else { return(((R2(y,(lm(y ~ x)))) + (R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) )/2) } } delta.rm2 <- function(y, x, ... ){ if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(abs((R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x)))))) - R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))))) } else { return(abs((R2(y,(lm(y ~ x)))) - (R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) )) } }

results <- list(100) results <- foreach(i = 1:100 ) %dopar% {

x <- na.omit(x) para <- dplyr::sample_n(x, size = 2570, replace = TRUE) in_train_para <- sample(nrow(para), size = as.integer(nrow(para) * 0.8), replace = FALSE) Train <- para[in_train_para, ] Test <- para[-in_train_para, ]

model_train <- ranger::ranger(pIC50~., data = Train, write.forest = TRUE, save.memory = TRUE)
#actual <- train$Activity
prediction <- predict(model_train, Train)
prediction <- prediction$predictions

#prediction <- predict(model, Train)
value <- data.frame(obs = Train$pIC50, pred = prediction)
rm(para)
rm(Train)
rm(Test)
rm(model_train)
rm(prediction)
labeling <- c("obs", "pred")
colnames(value) <- labeling
result <- caret::defaultSummary(value)
result_rm2 <- rm2(value$obs, value$pred)
names(result_rm2) <- "rm2"
results_reverse <- rm2.reverse(value$obs, value$pred)
names(results_reverse) <- "reverse.rm2"
result_average_rm2 <- average.rm2(value$obs, value$pred)
names(result_average_rm2) <- "average.rm2"
result_delta <- delta.rm2(value$obs, value$pred)
names(result_delta) <- "delta.rm"

results[[i]] <- c(result, result_rm2, results_reverse, result_average_rm2, result_delta)

} return(results) stopCluster(cl) }

mean_and_sd <- function(x) { c(round(rowMeans(x, na.rm = TRUE), digits = 2), round(genefilter::rowSds(x, na.rm = TRUE), digits = 2)) }

randomForest_train <- function(x) { ok <- randomForest_training(x) data <- data.frame(ok) result <- mean_and_sd(data) df <- data.frame(result) R2_and_RMSE <- t(df) label <- c("RMSE_Mean", "Rsquared_Mean", "RM2_Mean", "Reverse_RM2_Mean", "Average_RM2_Mean", "Delta_RM2_Mean", "RMSE_SD", "Rsquared_SD", "RM2_SD", "Reverse_RM2_SD", "Average_RM2_SD", "Delta_RM2_SD") colnames(R2_and_RMSE) <- label return(R2_and_RMSE) }

rf_cross_validation <- function(x){ library(randomForest)

library(parallel) library(doSNOW) cl <- makeCluster(8) registerDoSNOW(cl)

R2 <- function(y, equation, ... ){ 1 - (sum((y-predict(equation))^2)/sum((y-mean(y))^2)) } rm2 <- function(y, x, ... ){
if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x))))))) } else { return(R2(y,(lm(y ~ x)))) } } rm2.reverse <- function(y, x, ... ){ return(R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) } average.rm2 <- function(y, x, ... ){ if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(((R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x))))))+ R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))))/2) } else { return(((R2(y,(lm(y ~ x)))) + (R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) )/2) } } delta.rm2 <- function(y, x, ... ){ if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(abs((R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x)))))) - R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))))) } else { return(abs((R2(y,(lm(y ~ x)))) - (R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) )) } }

results <- list(100) results <- foreach(i = 1:100 ) %dopar% {

cool <- na.omit(x)
para <- dplyr::sample_n(cool, size = 2570, replace = TRUE)
in_train_para <- sample(nrow(para),
                        size = as.integer(nrow(para) * 0.8),
                        replace = FALSE)
myData <- para[in_train_para, ]
Test <- para[-in_train_para, ]
rm(Test)
k = 10
index <- sample(1:k, nrow(myData), replace = TRUE)
folds <- 1:k
myRes <- data.frame()
for (j in 1:k) {
  training <- subset(myData, index %in% folds[-j])
testing <- subset(myData, index %in% c(j))
model_train <- ranger::ranger(pIC50~., data = training, write.forest = TRUE, save.memory = TRUE)
prediction <- predict(model_train, testing)
prediction <- prediction$predictions

ok <- data.frame(obs = testing$pIC50, pred = prediction)
value <- rbind(myRes, ok)
}
rm(myData)
rm(para)
rm(in_trian_para)
rm(training)
rm(testing)
rm(prediction)
labeling <- c("obs", "pred")
colnames(value) <- labeling
result <- caret::defaultSummary(value)
result_rm2 <- rm2(value$obs, value$pred)
names(result_rm2) <- "rm2"
results_reverse <- rm2.reverse(value$obs, value$pred)
names(results_reverse) <- "reverse.rm2"
result_average_rm2 <- average.rm2(value$obs, value$pred)
names(result_average_rm2) <- "average.rm2"
result_delta <- delta.rm2(value$obs, value$pred)
names(result_delta) <- "delta.rm"

results[[i]] <- c(result, result_rm2, results_reverse, result_average_rm2, result_delta)

} return(results) }

rf_10_CV <- function(x) { ok <- rf_cross_validation(x) data <- data.frame(ok) result <- mean_and_sd(data) df <- data.frame(result) R2_and_RMSE <- t(df) label <- c("RMSE_Mean", "Rsquared_Mean", "RM2_Mean", "Reverse_RM2_Mean", "Average_RM2_Mean", "Delta_RM2_Mean", "RMSE_SD", "Rsquared_SD", "RM2_SD", "Reverse_RM2_SD", "Average_RM2_SD", "Delta_RM2_SD") colnames(R2_and_RMSE) <- label return(R2_and_RMSE) }

randomForest_testing <- function(x){ library(parallel) library(doSNOW) cl <- makeCluster(8) registerDoSNOW(cl)

R2 <- function(y, equation, ... ){ 1 - (sum((y-predict(equation))^2)/sum((y-mean(y))^2)) } rm2 <- function(y, x, ... ){
if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x))))))) } else { return(R2(y,(lm(y ~ x)))) } } rm2.reverse <- function(y, x, ... ){ return(R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) } average.rm2 <- function(y, x, ... ){ if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(((R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x))))))+ R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))))/2) } else { return(((R2(y,(lm(y ~ x)))) + (R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) )/2) } } delta.rm2 <- function(y, x, ... ){ if ((R2(y,(lm(y ~ x)))) > R2(y,(lm(y ~ -1 + x)))) { return(abs((R2(y,(lm(y ~ x)))( 1-(sqrt(R2(y,(lm(y ~ x)))-R2(y,(lm(y ~ -1 + x)))))) - R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))))) } else { return(abs((R2(y,(lm(y ~ x)))) - (R2(x,(lm(x ~ y)))( 1-(sqrt(R2(x,(lm(x ~ y)))-R2(x,(lm(x ~ -1 + y))))))) )) } }

results <- list(100) results <- foreach(i = 1:100 ) %dopar% {

x <- na.omit(x)
para <- dplyr::sample_n(x, size = 2570, replace = TRUE)
in_train_para <- sample(nrow(para),
                        size = as.integer(nrow(para) * 0.8),
                        replace = FALSE)
Train <- para[in_train_para, ]
Test <- para[-in_train_para, ]
model_train <- ranger::ranger(pIC50~., data = Train, write.forest = TRUE, save.memory = TRUE)
#actual <- train$Activity
prediction <- predict(model_train, Test)
prediction <- prediction$predictions
value <- data.frame(obs = Test$pIC50, pred = prediction)
rm(Train)
rm(Test)
rm(para)
rm(in_train_para)
rm(prediction)
labeling <- c("obs", "pred")
colnames(value) <- labeling
result <- caret::defaultSummary(value)
result_rm2 <- rm2(value$obs, value$pred)
names(result_rm2) <- "rm2"
results_reverse <- rm2.reverse(value$obs, value$pred)
names(results_reverse) <- "reverse.rm2"
result_average_rm2 <- average.rm2(value$obs, value$pred)
names(result_average_rm2) <- "average.rm2"
result_delta <- delta.rm2(value$obs, value$pred)
names(result_delta) <- "delta.rm"

results[[i]] <- c(result, result_rm2, results_reverse, result_average_rm2, result_delta)

} return(results) stopCluster(cl) }

randomForest_test <- function(x) { ok <- randomForest_testing(x) data <- data.frame(ok) result <- mean_and_sd(data) df <- data.frame(result) R2_and_RMSE <- t(df) label <- c("RMSE_Mean", "Rsquared_Mean", "RM2_Mean", "Reverse_RM2_Mean", "Average_RM2_Mean", "Delta_RM2_Mean", "RMSE_SD", "Rsquared_SD", "RM2_SD", "Reverse_RM2_SD", "Average_RM2_SD", "Delta_RM2_SD") colnames(R2_and_RMSE) <- label return(R2_and_RMSE) }