Closed LilMichelangelo2 closed 2 months ago
@autopilot change CODE 1 by implementing the approach to rolling regressions from CODE 2. I have a feeling that my approach in CODE 1 to estimations based on 60 months windows is flawed. However, keep in mind that it must be a panel regression
CODE 1:
# Natürlichen Logarithmus der Fondsgrößen ziehen log_size <- adjusted_net_alpha log_size <- log_size %>% arrange(Date, FundId) log_size$Fund_Size_log <- log(log_size$Fund_Size) log_size$L1_Fund_Size_log <- log(log_size$L1_Fund_Size) log_size$Fund_Size <- NULL log_size$L1_Fund_Size <- NULL log_size <- log_size %>% arrange(Date) # 60-monatiges Schätzungsfenster formatieren min_valid_date <- min(log_size$Date) %m+% months(59) valid_dates <- unique(log_size$Date[log_size$Date >= min_valid_date]) log_size_list <- lapply(valid_dates, function(current_date) { start_date <- current_date %m-% months(59) subset_data <- log_size %>% filter(Date >= start_date & Date <= current_date) return(subset_data) }) # Fonds mit < 60 Beobachtungen entfernen log_size_list <- lapply(log_size_list, function(df) { df %>% group_by(FundId) %>% mutate(Count = n()) %>% filter(Count >= 36) %>% dplyr::select(-Count) %>% ungroup() }) # Daten formatieren names(log_size_list) <- as.character(ceiling_date(valid_dates %m+% months(1), "month") - days(1)) log_size_list[[385]] <- NULL # log(q_it-1) Koeffizienten pOLS pro Listenelement berechnen plm_results_list <- lapply(log_size_list, function(model) { tryCatch({ plm(Fund_Size_log ~ L1_Fund_Size_log, data = model, index = c("FundId", "Date"), model = "within") }, error = function(e) { message("Error in plm for FundId ", unique(model$FundId), ": ", e$message) return(NULL) }) }) names(plm_results_list) <- names(log_size_list) # pOLS Koeffizienten Zeitreihe erstellen pOLS_list <- lapply(plm_results_list, function(model) { if (is.null(model) || !inherits(model, "plm")) { return(NA) } else { tryCatch({ coef <- coef(model) return(coef) }, error = function(e) { return(NA) }) } }) pOLS <- as.data.frame(do.call(cbind, pOLS_list)) colnames(pOLS) <- names(pOLS_list) pOLS <- as.data.frame(t(pOLS)) pOLS$pOLS <- pOLS$L1_Fund_Size pOLS$L1_Fund_Size_log <- NULL # ##### # pPMUE = 0.9949 # ##### # # pSize_list <- log_size_list # # pSize_list <- lapply(pSize_list, function(df) { # df$pPMUE <- pPMUE # return(df) # }) # # # Schritt 2: Size Innovation Proxy berechnen # names(pSize_list) <- names(log_size_list) # # innovator_list <- lapply(pSize_list, function(df) { # df <- df %>% # mutate(initial_innovation = Fund_Size_log - (pPMUE * L1_Fund_Size_log)) # return(df) # }) pSize_list <- log_size_list pSize_list <- lapply(names(pSize_list), function(list_name) { pOLS_value <- pOLS$pOLS[rownames(pOLS) == list_name] pSize_list[[list_name]]$pOLS <- pOLS_value pSize_list[[list_name]] }) names(pSize_list) <- names(pOLS_list) # Schritt 2: Size Innovation Proxy berechnen innovator_list <- lapply(pSize_list, function(df) { df <- df %>% mutate(initial_innovation = Fund_Size_log - (pOLS * L1_Fund_Size_log)) return(df) }) innovator_list <- lapply(innovator_list, function(df) { df <- df %>% group_by(FundId) %>% mutate(mean_innovation = mean(initial_innovation, na.rm = TRUE)) %>% ungroup() return(df) }) innovator_list <- lapply(innovator_list, function(df) { df <- df %>% mutate(size_innovation = initial_innovation - mean_innovation) return(df) }) # Erwartung des Skalierbarkeitsparameters berechnen nach Van Binsbergen et al. (2023), S. 30f scalability_list <- lapply(innovator_list, function(df) { df %>% mutate(Neg_L1_Fund_Size_log = -L1_Fund_Size_log) %>% group_by(FundId) %>% do({ tryCatch( tidy(lm(net_alpha ~ Neg_L1_Fund_Size_log + size_innovation, data = .)), error = function(e) { tibble( term = NA_character_, estimate = NA_real_, std.error = NA_real_, statistic = NA_real_, p.value = NA_real_ ) } ) }) %>% ungroup() }) scalability_full <- bind_rows(scalability_list, .id = "Date") scalability_full$Date <- as.Date(scalability_full$Date) scalability <- scalability_full %>% filter(term == "Neg_L1_Fund_Size_log") scalability <- scalability[,c(1,2,4)] scalability$Scale <- scalability$estimate scalability$estimate <- NULL scalability <- na.omit(scalability)
CODE 2:
rolling_regression <- function(data, window = 60, min_obs = 24) { mktrf_coefficients <- rep(NA, nrow(data)) intercepts <- rep(NA, nrow(data)) for (i in min_obs:nrow(data)) { start_index <- max(1, i - window + 1) end_index <- i - 1 rolling_data <- data[start_index:end_index, ] if (nrow(rolling_data) >= min_obs) { model <- dynlm(Gross_Excess_Return ~ mktrf, data = rolling_data) mktrf_coefficients[i] <- coef(model)['mktrf'] intercepts[i] <- coef(model)['(Intercept)'] } } return(list(mktrf_coefficients = mktrf_coefficients, intercepts = intercepts)) } regression_results <- panel_test %>% group_by(FundId) %>% do({ regression_coefs <- rolling_regression(., window = 60, min_obs = 24) data.frame(Date = .$Date, mktrf_coefficient = regression_coefs$mktrf_coefficients, intercept = regression_coefs$intercepts) })
@autopilot change CODE 1 by implementing the approach to rolling regressions from CODE 2. I have a feeling that my approach in CODE 1 to estimations based on 60 months windows is flawed. However, keep in mind that it must be a panel regression
CODE 1:
CODE 2: