Expected VA - Githubissues

@autopilot change CODE 1 by implementing the approach to rolling regressions from CODE 2. I have a feeling that my approach in CODE 1 to estimations based on 60 months windows is flawed. However, keep in mind that it must be a panel regression

CODE 1:

# Natürlichen Logarithmus der Fondsgrößen ziehen
log_size <- adjusted_net_alpha

log_size <- log_size %>%
  arrange(Date, FundId)

log_size$Fund_Size_log <- log(log_size$Fund_Size)
log_size$L1_Fund_Size_log <- log(log_size$L1_Fund_Size)
log_size$Fund_Size <- NULL
log_size$L1_Fund_Size <- NULL 
log_size <- log_size %>% arrange(Date)

# 60-monatiges Schätzungsfenster formatieren 
min_valid_date <- min(log_size$Date) %m+% months(59)

valid_dates <- unique(log_size$Date[log_size$Date >= min_valid_date])

log_size_list <- lapply(valid_dates, function(current_date) {
  start_date <- current_date %m-% months(59)
  subset_data <- log_size %>%
    filter(Date >= start_date & Date <= current_date)
  return(subset_data)
})

# Fonds mit < 60 Beobachtungen entfernen 
log_size_list <- lapply(log_size_list, function(df) {
  df %>%
    group_by(FundId) %>%
    mutate(Count = n()) %>%
    filter(Count >= 36) %>%  
    dplyr::select(-Count) %>%
    ungroup()  
})

# Daten formatieren 
names(log_size_list) <- as.character(ceiling_date(valid_dates %m+% months(1), "month") - days(1))
log_size_list[[385]] <- NULL

# log(q_it-1) Koeffizienten pOLS pro Listenelement berechnen 
plm_results_list <- lapply(log_size_list, function(model) {
  tryCatch({
    plm(Fund_Size_log ~ L1_Fund_Size_log, data = model, index = c("FundId", "Date"), model = "within")
  }, error = function(e) {
    message("Error in plm for FundId ", unique(model$FundId), ": ", e$message)
    return(NULL) 
  })
})
names(plm_results_list) <- names(log_size_list)

# pOLS Koeffizienten Zeitreihe erstellen 
pOLS_list <- lapply(plm_results_list, function(model) {
  if (is.null(model) || !inherits(model, "plm")) {
    return(NA)  
  } else {
    tryCatch({
      coef <- coef(model)  
      return(coef)
    }, error = function(e) {
      return(NA)  
    })
  }
})

pOLS <- as.data.frame(do.call(cbind, pOLS_list))
colnames(pOLS) <- names(pOLS_list)
pOLS <- as.data.frame(t(pOLS))
pOLS$pOLS <- pOLS$L1_Fund_Size
pOLS$L1_Fund_Size_log <- NULL

# #####
# pPMUE = 0.9949
# #####
# 
# pSize_list <- log_size_list
# 
# pSize_list <- lapply(pSize_list, function(df) {
#   df$pPMUE <- pPMUE 
#   return(df)
# })
# 
# # Schritt 2: Size Innovation Proxy berechnen 
# names(pSize_list) <- names(log_size_list)
# 
# innovator_list <- lapply(pSize_list, function(df) {
#   df <- df %>% 
#     mutate(initial_innovation = Fund_Size_log - (pPMUE * L1_Fund_Size_log))
#   return(df)
# })

pSize_list <- log_size_list

pSize_list <- lapply(names(pSize_list), function(list_name) {
  pOLS_value <- pOLS$pOLS[rownames(pOLS) == list_name]
  pSize_list[[list_name]]$pOLS <- pOLS_value
  pSize_list[[list_name]]
})

names(pSize_list) <- names(pOLS_list)

# Schritt 2: Size Innovation Proxy berechnen
innovator_list <- lapply(pSize_list, function(df) {
  df <- df %>%
    mutate(initial_innovation = Fund_Size_log - (pOLS * L1_Fund_Size_log))
  return(df)
})

innovator_list <- lapply(innovator_list, function(df) {
  df <- df %>%
    group_by(FundId) %>%
    mutate(mean_innovation = mean(initial_innovation, na.rm = TRUE)) %>%
    ungroup()
  return(df)
})
innovator_list <- lapply(innovator_list, function(df) {
  df <- df %>%
    mutate(size_innovation = initial_innovation - mean_innovation)
  return(df)
})

# Erwartung des Skalierbarkeitsparameters berechnen nach Van Binsbergen et al. (2023), S. 30f
scalability_list <- lapply(innovator_list, function(df) {
  df %>%
    mutate(Neg_L1_Fund_Size_log = -L1_Fund_Size_log) %>%
    group_by(FundId) %>%
    do({
      tryCatch(
        tidy(lm(net_alpha ~ Neg_L1_Fund_Size_log + size_innovation, data = .)),
        error = function(e) {
          tibble(
            term = NA_character_,
            estimate = NA_real_,
            std.error = NA_real_,
            statistic = NA_real_,
            p.value = NA_real_
          )
        }
      )
    }) %>%
    ungroup()
})

scalability_full <- bind_rows(scalability_list, .id = "Date")
scalability_full$Date <- as.Date(scalability_full$Date)

scalability <- scalability_full %>%
  filter(term == "Neg_L1_Fund_Size_log")

scalability <- scalability[,c(1,2,4)]
scalability$Scale <- scalability$estimate
scalability$estimate <- NULL
scalability <- na.omit(scalability)

CODE 2:

rolling_regression <- function(data, window = 60, min_obs = 24) {
  mktrf_coefficients <- rep(NA, nrow(data))
  intercepts <- rep(NA, nrow(data))

  for (i in min_obs:nrow(data)) {
    start_index <- max(1, i - window + 1)
    end_index <- i - 1
    rolling_data <- data[start_index:end_index, ]

    if (nrow(rolling_data) >= min_obs) {
      model <- dynlm(Gross_Excess_Return ~ mktrf, data = rolling_data)
      mktrf_coefficients[i] <- coef(model)['mktrf']
      intercepts[i] <- coef(model)['(Intercept)']
    }
  }

  return(list(mktrf_coefficients = mktrf_coefficients, intercepts = intercepts))
}

regression_results <- panel_test %>%
  group_by(FundId) %>%
  do({
    regression_coefs <- rolling_regression(., window = 60, min_obs = 24)
    data.frame(Date = .$Date, 
               mktrf_coefficient = regression_coefs$mktrf_coefficients, 
               intercept = regression_coefs$intercepts)
  })

LilMichelangelo2 / Amanta

Expected VA #8