Closed lucaszago closed 3 years ago
Hey, I actually thought this was your data. If so, I need a reproducible data set with full code to reproduce your issue.
Hey Matt,
Follow my code below, and my csv file attached.
library(tidymodels) library(modeltime) library(tidyverse) library(lubridate) library(timetk) library(StanHeaders) library(rules)
iniciativa <- read_csv("Jira3.csv") %>%
select(Custom field (Completion date)
) %>%
separate(Custom field (Completion date)
,
into = c("data_planejada"),
sep = " ",
remove = TRUE) %>%
mutate(data_planejada = dmy(data_planejada)) %>%
summarise_by_time(data_planejada,
.by ="day",
qtd = n()) %>%
pad_by_time(.pad_value = 0) %>%
mutate(qtd_NA = ifelse(qtd == 0, NA, qtd)) %>%
mutate(qtd_imputed = ts_impute_vec(qtd_NA, period = 7)) %>%
select(-qtd_NA, -qtd) %>%
arrange(data_planejada) %>%
drop_na()
iniciativa <- iniciativa %>% mutate(qtd_trans = log_interval_vec(qtd_imputed, limit_lower = 0, offset = 2)) %>% mutate(qtd_trans = standardize_vec(qtd_trans)) %>% select(-qtd_imputed)
limit_lower <- 0 limit_upper <- 17.5064130926397 offset <- 2 std_mean <- -0.983655732329455 std_sd <- 0.732484118654259
iniciativa %>% plot_acf_diagnostics( data_planejada, .value = qtd_trans, .show_white_noise_bars = T, .lags = 1:305 ) ?plot_acf_diagnostics
horizon <- 20*7 lag_period <- c(1, 2, 30, 61, 89, 91, 117, 119, 121, 147) rolling_periods <- c(30, 60, 90)
iniciativa_full <- iniciativa %>% bind_rows( future_frame(.data = ., .date_var = data_planejada, .length_out = horizon) ) %>% tk_augment_lags(qtd_trans, .lags = lag_period) %>% tk_augment_slidify( .value = qtd_trans_lag89, .f = mean, .period = rolling_periods, .align = "center", .partial = TRUE ) %>% rename_with(.cols = contains("lag"), .fn = ~ strc("lag", .))
?tk_augment_slidify iniciativa_full %>% pivot_longer(-data_planejada) %>% plot_time_series(data_planejada, value, name, .smooth = FALSE)
iniciativa_full1 <- iniciativa_full %>% filter(!is.na(qtd_trans))
forecast_tbl <- iniciativa_full %>% filter(is.na(qtd_trans))
splits <- time_series_split(iniciativa_full1 , assess = horizon, cumulative = TRUE)
splits %>% tk_time_series_cv_plan() %>% plot_time_series_cv_plan(data_planejada, qtd_trans)
calibrate_and_plot <- function(..., type = "testing") {
if (type == "testing") {
new_data <- testing(splits)
} else {
new_data <- training(splits) %>% drop_na()
}
calibration_tbl <- modeltime_table(...) %>%
modeltime_calibrate(new_data)
print(calibration_tbl %>% modeltime_accuracy())
calibration_tbl %>%
modeltime_forecast(
new_data = new_data,
actual_data = iniciativa_full1
) %>%
plot_modeltime_forecast(.conf_interval_show = FALSE)
}
recipe_spec_base <- recipe(qtd_trans ~ ., data = training(splits)) %>% step_timeseries_signature(data_planejada) %>% step_rm(matches("(iso)|(xts)|(hour)|(minute)|(second)|(am.pm)|(concluido_year)")) %>% step_normalize(matches("(index.num)|(year)|(yday)")) %>% step_dummy(all_nominal(), one_hot = TRUE) %>% step_fourier(data_planejada, period = c(7, 14, 30, 90, 365), K = 2)
recipe_spec_base %>% prep() %>% juice() %>% glimpse()
recipe_spec_base_no_lag <- recipe_spec_base %>% step_rm(starts_with("lag"))
recipe_spec_base_no_lag %>% prep() %>% juice() %>% glimpse()
recipe_spec_base %>% prep() %>% juice() %>% glimpse()
recipe_spec_1_spline <- recipe_spec_base %>% step_rm(data_planejada) %>% step_ns(ends_with("index.num"), deg_free = 2) %>% step_rm(startswith("lag"))
recipe_spec_1_spline %>% prep() %>% juice() %>% glimpse()
recipe_spec_base %>% prep() %>% juice() %>% glimpse()
recipe_spec_2_lag <- recipe_spec_base %>% step_rm(data_planejada) %>% step_naomit(startswith("lag"))
recipe_spec_2_lag %>% prep() %>% juice() %>% glimpse()
model_fit_auto_arima <- arima_reg() %>% set_engine("auto_arima") %>% fit( qtd_trans ~ data_planejada
calibration_tbl <- modeltime_table( model_fit_auto_arima ) %>% modeltime_calibrate(testing(splits))
calibration_tbl %>% modeltime_forecast( new_data = testing(splits), actual_data = iniciativa_full1 ) %>% plot_modeltime_forecast()
calibration_tbl %>% modeltime_accuracy()
refit_tbl <- calibration_tbl %>% modeltime_refit(iniciativa_full1)
refit_tbl %>% modeltime_forecast( new_data = forecast_tbl, actual_data = iniciativa_full1 ) %>%
mutate(across(.value:.conf_hi, .fns = ~ standardize_inv_vec( x = ., mean = std_mean, sd = std_sd ))) %>% mutate(across(.value:.conf_hi, .fns = ~ log_interval_inv_vec( x = ., limit_lower = limit_lower, limit_upper = limit_upper, offset = offset ))) %>% plot_modeltime_forecast()
model_spec_arima_boost <- arima_boost( seasonal_period = 1,
mtry = 0.15, #15 trees = 600, #800 min_n = 2, #2 tree_depth = 5, #5 learn_rate = 0.15, loss_reduction = 0.15,
) %>% set_engine("auto_arima_xgboost")
set.seed(123) wflw_fit_arima_boost <- workflow() %>% add_model(model_spec_arima_boost) %>% add_recipe(recipe_spec_base_no_lag) %>% fit(training(splits))
calibration_tbl <- modeltime_table( model_fit_auto_arima, wflw_fit_arima_boost ) %>% modeltime_calibrate(testing(splits))
calibration_tbl %>% modeltime_forecast( new_data = testing(splits), actual_data = iniciativa_full1 ) %>% plot_modeltime_forecast(.title = "Forecast de Iniciativas", .x_lab = "Mês", .y_lab = "Quantidade")
calibration_tbl %>% modeltime_accuracy()
refit_tbl <- calibration_tbl %>% modeltime_refit(data = iniciativa_full1)
refit_tbl %>% modeltime_forecast(new_data = forecast_tbl, actual_data = iniciativa_full1) %>%
mutate(across(.value:.conf_hi, .fns = ~ standardize_inv_vec( x = ., mean = std_mean, sd = std_sd ))) %>% mutate(across(.value:.conf_hi, .fns = ~ log_interval_inv_vec( x = ., limit_lower = limit_lower, limit_upper = limit_upper, offset = offset ))) %>% plot_modeltime_forecast(.title = "Forecast de Iniciativas do Portfolio", .x_lab = "Meses", .y_lab = "Número de Iniciativas")
model_fit_prophet <- prophet_reg( changepoint_num = 10, changepoint_range = 0.8,
) %>% set_engine("prophet") %>% fit(qtd_trans ~ data_planejada, data = training(splits))
calibration_tbl <- modeltime_table( model_fit_prophet ) %>% modeltime_calibrate(testing(splits))
calibration_tbl %>% modeltime_forecast( new_data = testing(splits), actual_data = iniciativa_full1 ) %>% plot_modeltime_forecast()
calibration_tbl %>% modeltime_accuracy()
refit_tbl <- calibration_tbl %>% modeltime_refit(iniciativa_full)
refit_tbl %>% modeltime_forecast( new_data = forecast_tbl, actual_data = iniciativa_full1) %>% plot_modeltime_forecast()
model_spec_prophet_boost <-prophet_boost(
changepoint_num = 25, changepoint_range = 0.8,
min_n = 20, tree_depth = 3, learn_rate = 0.35,
trees = 500 ) %>% set_engine("prophet_xgboost")
set.seed(123) wflw_fit_prophet_boost <- workflow() %>% add_model(model_spec_prophet_boost) %>% add_recipe(recipe_spec_base_no_lag) %>% fit(training(splits))
calibrate_and_plot( model_fit_prophet, wflw_fit_prophet_boost )
modeltime_table( wflw_fit_prophet_boost ) %>% modeltime_residuals( training(splits) ) %>% plot_modeltime_residuals()
model_tbl <- modeltime_table( model_fit_auto_arima, wflw_fit_arima_boost, model_fit_prophet, wflw_fit_prophet_boost )
calibration_tbl <- model_tbl %>% modeltime_calibrate( testing(splits) )
calibration_tbl %>% modeltime_accuracy()
calibration_tbl %>% modeltime_forecast( new_data = testing(splits), actual_data = iniciativa_full1 ) %>% plot_modeltime_forecast()
refit_tbl <- calibration_tbl %>% modeltime_refit( data = iniciativa_full1 )
refit_tbl %>% modeltime_forecast( new_data = forecast_tbl, actual_data = iniciativa_full1 ) %>% plot_modeltime_forecast( .conf_interval_show = FALSE )
model_spec_glmnet <- linear_reg( mode = "regression", penalty = 0.1, mixture = 0.5 ) %>% set_engine("glmnet")
wflw_fit_glmnet_spline <- workflow() %>% add_model(model_spec_glmnet) %>% add_recipe(recipe_spec_1_spline) %>% fit(training(splits))
wflw_fit_glmnet_lag <- workflow() %>% add_model(model_spec_glmnet) %>% add_recipe(recipe_spec_2_lag) %>% fit(training(splits))
calibration_tbl <- modeltime_table( wflw_fit_glmnet_spline, wflw_fit_glmnet_lag ) %>% update_model_description(1, "GLMNET - Spline") %>% update_model_description(2, "GLMNET - Lag") %>% modeltime_calibrate(testing(splits))
calibration_tbl %>% modeltime_accuracy()
calibration_tbl %>% modeltime_forecast( new_data = testing(splits), actual_data = iniciativa_full1 ) %>% plot_modeltime_forecast(.conf_interval_show = FALSE)
model_spec_knn <- nearest_neighbor( mode = "regression", neighbors = 10, weight_func = "triangular" ) %>% set_engine("kknn")
set.seed(123) wflw_fit_knn_spline <- workflow() %>% add_model(model_spec_knn) %>% add_recipe(recipe_spec_1_spline) %>% fit(training(splits))
calibrate_and_plot( wflw_fit_knn_spline )
set.seed(123) wflw_fit_knn_lag <- wflw_fit_knn_spline%>% update_recipe(recipe_spec_2_lag) %>% fit(training(splits))
calibrate_and_plot( wflw_fit_knn_spline, wflw_fit_knn_lag )
model_spec_rf <- rand_forest( mode = "regression", mtry = 50, trees = 500,
) %>% set_engine("randomForest")
set.seed(123) wflw_fit_rf_spline <- workflow() %>% add_model(model_spec_rf) %>% add_recipe(recipe_spec_1_spline) %>% fit(training(splits))
calibrate_and_plot( wflw_fit_rf_spline )
set.seed(123) wflw_fit_rf_lag <- wflw_fit_rf_spline %>% update_recipe(recipe_spec_2_lag) %>% fit(training(splits))
calibrate_and_plot( wflw_fit_rf_spline, wflw_fit_rf_lag )
model_spec_boost <- boost_tree( mode = "regression", mtry = 25, trees = 500, min_n = 2, learn_rate = 0.3, loss_reduction = 0 ) %>% set_engine("xgboost")
set.seed(123) wflw_fit_xgboost_spline <- workflow() %>% add_model(model_spec_boost) %>% add_recipe(recipe_spec_1_spline) %>% fit(training(splits))
set.seed(123) wflw_fit_xgboost_lag <- wflw_fit_xgboost_spline %>% update_recipe(recipe_spec_2_lag) %>% fit(training(splits))
calibrate_and_plot( wflw_fit_xgboost_spline, wflw_fit_xgboost_lag )
model_spec_cubist <- cubist_rules( committees = 5, neighbors = 5, max_rules = 100 ) %>% set_engine("Cubist")
set.seed(123) wflw_fit_cubist_spline <- workflow() %>% add_model(model_spec_cubist) %>% add_recipe(recipe_spec_1_spline) %>% fit(training(splits))
set.seed(123) wflw_fit_cubist_lag <- wflw_fit_cubist_spline %>% update_recipe(recipe_spec_2_lag) %>% fit(training(splits))
calibrate_and_plot( wflw_fit_cubist_spline, wflw_fit_cubist_lag )
model_spec_nnet <- mlp(mode = "regression", hidden_units = 10, penalty = 1, epochs = 50) %>% set_engine("nnet")
set.seed(123) wflw_fit_nnet_spline <- workflow() %>% add_model(model_spec_nnet) %>% add_recipe(recipe_spec_1_spline) %>% fit(training(splits))
set.seed(123) wflw_fit_nnet_lag <- wflw_fit_nnet_spline %>% update_recipe(recipe_spec_2_lag)%>% fit(training(splits))
calibrate_and_plot( wflw_fit_nnet_spline, wflw_fit_nnet_lag )
model_spec_nnetar <- nnetar_reg( non_seasonal_ar = 2, seasonal_ar = 1, hidden_units = 10, penalty = 1, num_networks = 5, epochs = 50 ) %>% set_engine("nnetar")
set.seed(123) wflw_fit_nnetar_base <- workflow() %>% add_model(model_spec_nnetar) %>% add_recipe(recipe_spec_base) %>% fit(training(splits) %>% drop_na())
calibrate_and_plot( wflw_fit_nnetar_base )
model_tbl <- modeltime_table( wflw_fit_glmnet_spline, wflw_fit_glmnet_lag, wflw_fit_knn_spline, wflw_fit_knn_lag, wflw_fit_rf_spline, wflw_fit_rf_lag, wflw_fit_xgboost_spline, wflw_fit_xgboost_lag, wflw_fit_cubist_lag, wflw_fit_nnetar_base ) %>% mutate( .model_desc_2 = str_c(.model_desc, rep_along(.model_desc, c(" - Spline", " - Lag"))) ) %>% mutate( .model_desc = ifelse(.model_id == 11, .model_desc, .model_desc_2) ) %>% select(-.model_desc_2)
calibration_tbl <- model_tbl %>% modeltime_calibrate( testing(splits) )
calibration_tbl
calibration_tbl %>% modeltime_accuracy() %>%
table_modeltime_accuracy(resizable = TRUE, bordered = TRUE)
forecast_test_tbl <- calibration_tbl %>% modeltime_forecast( new_data = testing(splits), actual_data = iniciativa_full1 )
forecast_test_tbl %>% plot_modeltime_forecast( .conf_interval_show = FALSE )
set.seed(123) refit_tbl <- calibration_tbl %>% modeltime_refit(data = iniciativa_full1)
forecast_future_tbl <- refit_tbl %>% modeltime_forecast( new_data = forecast_tbl, actual_data = iniciativa_full1 )
Lucas Zago (11) 98279-2247 Engenheiro Mecânico lukaszago@hotmail.com br.linkedin.com/in/lzagohttp://br.linkedin.com/in/lzago/
De: Matt Dancho notifications@github.com Enviado: terça-feira, 27 de outubro de 2020 11:38 Para: business-science/modeltime modeltime@noreply.github.com Cc: Lucas Zago lukaszago@hotmail.com; Author author@noreply.github.com Assunto: Re: [business-science/modeltime] Lesson 11.11 - Model Inspection - Visualizing the Future Forecast (#47)
Hey, I actually thought this was your data. If so, I need a reproducible data set with full code to reproduce your issue.
— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHubhttps://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fbusiness-science%2Fmodeltime%2Fissues%2F47%23issuecomment-717329928&data=04%7C01%7C%7Cbe5a6fb0a8f5449b1bdf08d87a8e5a8d%7C84df9e7fe9f640afb435aaaaaaaaaaaa%7C1%7C0%7C637394099099360766%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=gccHiEKLYOlCXm3omDVY6dvauYVJRoJF2MPmqMOzuk4%3D&reserved=0, or unsubscribehttps://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fnotifications%2Funsubscribe-auth%2FALQ6T5JWGAE4UPMPOKEHRZDSM3SPJANCNFSM4TA3M7MA&data=04%7C01%7C%7Cbe5a6fb0a8f5449b1bdf08d87a8e5a8d%7C84df9e7fe9f640afb435aaaaaaaaaaaa%7C1%7C0%7C637394099099360766%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=%2BRUi7fFrcjKpQCgc4%2BrS2jSFAphb%2Bg7T0iSSfnkIpcM%3D&reserved=0.
I'm not going to be able to trouble shoot this. My bandwidth is too contrained at the moment.
in the session 11.11 I was trying to forecast my future data and I got the following error: Error: Problem occurred combining processed data with timestamps. Most likely cause is rows being added or removed during preprocessing. Try imputing missing values to retain the full number of rows. All my lag models was removed for my data, follow my code: forecast_future_tbl <- refit_tbl %>% modeltime_forecast( new_data = forecast_tbl, actual_data = iniciativa_full1 )
Where you read : actual_data = iniciativa_full1 is actual_data = data_prepared_tbl Follow my recipe for lag models: recipe_spec_2_lag <- recipe_spec_base %>% step_rm(data_planejada) %>% step_naomit(startswith("lag")) I did not have any issues befofe this step, I could do lag models normally