shreyam202236 / Probability-Statistics-Assignment

0 stars 1 forks source link

Problem 2 Shreyam Banerjee #1

Open shreyam202236 opened 1 year ago

shreyam202236 commented 1 year ago

title: "Problem 2" author: "Shreyam Banerjee" date: "2022-11-17" output: html_document

knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(fig.align = 'center')
knitr::opts_chunk$set(fig = TRUE)
knitr::opts_chunk$set(warning = FALSE)
knitr::opts_chunk$set(tidy.opts = list(width.cutoff = 60), tidy = TRUE)
library(tidyverse)

Problem 2

mle <- function(log_alpha, data, sigma) {
  l = sum(log(dgamma(data, shape = exp(log_alpha), scale = sigma)))
  return(-l)
}
MyMLE <- function(data, sigma) {
  log_alpha_initial <- log(mean(data)^2/var(data))
  estimator <- optim(log_alpha_initial,
                     mle,
                     data = data,
                     sigma = sigma)
  log_alpha_hat <- estimator$par
  return(log_alpha_hat)
}
get_estimates <- function(n, alpha, sigma) {
  estimates <- c()
  for (i in 1:1000) {
    samples <- rgamma(n, shape = alpha, scale = sigma)
    estimates <- append(estimates, MyMLE(data = samples, sigma = sigma))
  }
  return(estimates)
}
n = 20
alpha = 1.5
sigma = 2.2
estimated_mle <- tibble(get_estimates(n = n, alpha = alpha, sigma = sigma))
colnames(estimated_mle) <- c("estimate")
perc_2.5 <- quantile(estimated_mle$estimate, probs = 0.025, names = FALSE)
perc_97.5 <- quantile(estimated_mle$estimate, probs = 0.975, names = FALSE)
estimated_mle %>% 
  ggplot(aes(estimate)) +
  geom_histogram(color = "black", fill = "yellow") +
  geom_vline(xintercept = log(alpha), 
             size = 2,
             linetype = "dashed") +
  annotate("text", label = "Actual log(alpha)", x = 0.5, y = 95, color = "black") +
  geom_vline(xintercept = perc_2.5, 
             color = "#00B9FF", size = 1.5, linetype = "dashed") +
  annotate("text", label = "2.5 percentile", x = perc_2.5 + 0.1, y = 95, color = "#00B9FF") +
  geom_vline(xintercept = perc_97.5, 
             color = "#E08304", size = 1.5, linetype = "dashed") +
  annotate("text", label = "97.5 percentile", x = perc_97.5 + 0.1, y = 95, color = "#E08304") +
  labs(title = paste("n = ", n, ", alpha = ", alpha, ", sigma = ", sigma),
       x = "Estimated MLE",
       y = "Count")
diff_20 <- perc_97.5 - perc_2.5
n = 40
alpha = 1.5
sigma = 2.2
estimated_mle <- tibble(get_estimates(n = n, alpha = alpha, sigma = sigma))
colnames(estimated_mle) <- c("estimate")
perc_2.5 <- quantile(estimated_mle$estimate, probs = 0.025, names = FALSE)
perc_97.5 <- quantile(estimated_mle$estimate, probs = 0.975, names = FALSE)
estimated_mle %>% 
  ggplot(aes(estimate)) +
  geom_histogram(color = "black", fill = "yellow") +
  geom_vline(xintercept = log(alpha), 
             size = 2,
             linetype = "dashed") +
  annotate("text", label = "Actual log(alpha)", x = log(alpha) + 0.1, y = 95, color = "black") +
  geom_vline(xintercept = perc_2.5, 
             color = "#00B9FF", size = 1.5, linetype = "dashed") +
  annotate("text", label = "2.5 percentile", x = perc_2.5 + 0.1, y = 95, color = "#00B9FF") +
  geom_vline(xintercept = perc_97.5, 
             color = "#E08304", size = 1.5, linetype = "dashed") +
  annotate("text", label = "97.5 percentile", x = perc_97.5 + 0.1, y = 95, color = "#E08304") +
  labs(title = paste("n = ", n, ", alpha = ", alpha, ", sigma = ", sigma),
       x = "Estimated MLE",
       y = "Count")
diff_40 <- perc_97.5 - perc_2.5
n = 100
alpha = 1.5
sigma = 2.2
estimated_mle <- tibble(get_estimates(n = n, alpha = alpha, sigma = sigma))
colnames(estimated_mle) <- c("estimate")
perc_2.5 <- quantile(estimated_mle$estimate, probs = 0.025, names = FALSE)
perc_97.5 <- quantile(estimated_mle$estimate, probs = 0.975, names = FALSE)
estimated_mle %>% 
  ggplot(aes(estimate)) +
  geom_histogram(color = "black", fill = "yellow") +
  geom_vline(xintercept = log(alpha), 
             size = 2,
             linetype = "dashed") +
  annotate("text", label = "Actual log(alpha)", x = log(alpha) + 0.05, y = 95, color = "black") +
  geom_vline(xintercept = perc_2.5, 
             color = "#00B9FF", size = 1.5, linetype = "dashed") +
  annotate("text", label = "2.5 percentile", x = perc_2.5 + 0.05, y = 95, color = "#00B9FF") +
  geom_vline(xintercept = perc_97.5, 
             color = "#E08304", size = 1.5, linetype = "dashed") +
  annotate("text", label = "97.5 percentile", x = perc_97.5 + 0.05, y = 95, color = "#E08304") +
  labs(title = paste("n = ", n, ", alpha = ", alpha, ", sigma = ", sigma),
       x = "Estimated MLE",
       y = "Count")
diff_100 <- perc_97.5 - perc_2.5
diff_20
diff_40
diff_100

Thus we can see from the the plot that as the sample size increases, the gap between the percentile points keep on decreasing.

sourish-cmi commented 1 year ago

Not reviewed by other team member