Closed spsanderson closed 6 months ago
This might work well too, although still finding a good ncp start point might prove somewha elusive:
# Load required package
library(bbmle)
# Sample data (replace with your actual data)
data <- rchisq(100, df = 5, ncp = 20)
# Define negative log-likelihood function
negLogLik <- function(df, ncp) {
-sum(dchisq(data, df = df, ncp = ncp, log = TRUE))
}
# Initial values for optimization (crucial for good convergence)
start_vals <- list(df = 3, ncp = 20)
# Maximum likelihood estimation
mle_fit <- mle2(negLogLik, start = start_vals)
# Extract estimated parameters
df_est <- coef(mle_fit)[1]
ncp_est <- coef(mle_fit)[2]
# Print the results
cat("Estimated df:", df_est, "\n")
cat("Estimated ncp:", ncp_est)
Output:
> # Load required package
> library(bbmle)
>
> # Sample data (replace with your actual data)
> data <- rchisq(100, df = 5, ncp = 20)
>
> # Define negative log-likelihood function
> negLogLik <- function(df, ncp) {
+ -sum(dchisq(data, df = df, ncp = ncp, log = TRUE))
+ }
>
> # Initial values for optimization (crucial for good convergence)
> start_vals <- list(df = 3, ncp = 20)
>
> # Maximum likelihood estimation
> mle_fit <- mle2(negLogLik, start = start_vals)
>
> # Extract estimated parameters
> df_est <- coef(mle_fit)[1]
> ncp_est <- coef(mle_fit)[2]
>
> # Print the results
> cat("Estimated df:", df_est, "\n")
Estimated df: 10.84161
> cat("Estimated ncp:", ncp_est)
Estimated ncp: 14.05248
nd <- rchisq(100, df = df_est, ncp = ncp_est)
hist(data, col = "lightblue", main = "Histogram of data")
hist(nd, col = "lightgreen", add = TRUE)
Another:
> estimate_chisq_params <- function(data) {
+ # Negative log-likelihood function
+ negLogLik <- function(df, ncp) {
+ -sum(dchisq(data, df = df, ncp = ncp, log = TRUE))
+ }
+
+ # Initial values (adjust based on your data if necessary)
+ start_vals <- list(df = trunc(var(data)/2), ncp = trunc(mean(data)))
+
+ # MLE using bbmle
+ mle_fit <- bbmle::mle2(negLogLik, start = start_vals)
+
+ # Return estimated parameters as a named vector
+ c(df = coef(mle_fit)[1], ncp = coef(mle_fit)[2])
+ }
>
> library(purrr)
>
> # List of data vectors (replace with your actual data)
> data_list <- list(rchisq(100, df = 5, ncp = 2),
+ rchisq(80, df = 3, ncp = 1),
+ rchisq(120, df = 7, ncp = 4))
>
> # Apply the estimation function to each data vector
> param_estimates <- map(data_list, estimate_chisq_params)
Warning messages:
1: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
2: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
3: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
4: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
5: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
6: In dchisq(data, df = df, ncp = ncp, log = TRUE) : NaNs produced
>
> # Print results
> print(param_estimates)
[[1]]
df.df ncp.ncp
5.3677154 0.9875087
[[2]]
df.df ncp.ncp
3.2668169 0.3253299
[[3]]
df.df ncp.ncp
6.559403 4.540669
# Lib Load ----
library(tidyverse)
library(bbmle)
# Data ----
# Make parameters and grid
df <- 1:10
ncp <- 1:10
n <- runif(10, 250, 500) |> trunc()
param_grid <- expand_grid(n = n, df = df, ncp = ncp)
# Functions ----
# functions to estimate the parameters of a chisq distribution
# dof
mean_x <- function(x) mean(x)
mean_minus_1 <- function(x) mean(x) - 1
var_div_2 <- function(x) var(x) / 2
length_minus_1 <- function(x) length(x) - 1
# ncp
mean_minus_mean_minus_1 <- function(x) mean(x) - (mean(x) - 1)
ie_mean_minus_var_div_2 <- function(x) ifelse((mean(x) - (var(x) / 2)) < 0, 0, mean(x) - var(x)/2)
ie_optim <- function(x) optim(par = 0,
fn = function(ncp) {
-sum(dchisq(x, df = var(x)/2, ncp = ncp, log = TRUE))
},
method = "Brent",
lower = 0,
upper = 10 * var(x)/2)$par
# both
estimate_chisq_params <- function(data) {
# Negative log-likelihood function
negLogLik <- function(df, ncp) {
-sum(dchisq(data, df = df, ncp = ncp, log = TRUE))
}
# Initial values (adjust based on your data if necessary)
start_vals <- list(df = trunc(var(data)/2), ncp = trunc(mean(data)))
# MLE using bbmle
mle_fit <- bbmle::mle2(negLogLik, start = start_vals)
# Return estimated parameters as a named vector
df <- dplyr::tibble(
est_df = coef(mle_fit)[1],
est_ncp = coef(mle_fit)[2]
)
return(df)
}
safe_estimates <- {
purrr::possibly(
estimate_chisq_params,
otherwise = NA_real_,
quiet = TRUE
)
}
# Simulate data ----
set.seed(123)
dff <- param_grid |>
mutate(x = pmap(pick(everything()), match.fun("rchisq"))) |>
mutate(
safe_est_parms = map(x, safe_estimates),
dfa = map_dbl(x, mean_minus_1),
dfb = map_dbl(x, var_div_2),
dfc = map_dbl(x, length_minus_1),
ncpa = map_dbl(x, mean_minus_mean_minus_1),
ncpb = map_dbl(x, ie_mean_minus_var_div_2),
ncpc = map_dbl(x, ie_optim)
) |>
select(-x) |>
filter(map_lgl(safe_est_parms, ~ any(is.na(.x))) == FALSE) |>
unnest(cols = safe_est_parms) |>
mutate(
dfa_resid = dfa - df,
dfb_resid = dfb - df,
dfc_resid = dfc - df,
dfd_resid = est_df - df,
ncpa_resid = ncpa - ncp,
ncpb_resid = ncpb - ncp,
ncpc_resid = ncpc - ncp,
ncpd_resid = est_ncp - ncp
)
# Visuals ----
boxplot(dff$dfa ~ dff$df, main = "mean(x) -1 ~ df")
boxplot(dff$dfb ~ dff$df, main = "var(x) / 2 ~ df")
boxplot(dff$dfc ~ dff$df, main = "length(x) - 1 ~ df")
boxplot(dff$est_df ~ dff$df, main = "negloglik ~ df - Looks Good")
boxplot(dff$ncpa ~ dff$ncp, main = "mean(x) - (mean(x) - 1) ~ ncp")
boxplot(dff$ncpb ~ dff$ncp, main = "mean(x) - var(x)/2 ~ nc")
boxplot(dff$ncpc ~ dff$ncp, main = "optim ~ ncp")
boxplot(dff$est_ncp ~ dff$ncp, main = "negloglik ~ ncp - Looks Good")
boxplot(dff$dfa_resid ~ dff$df, main = "mean(x) -1 ~ df Residuals")
boxplot(dff$dfb_resid ~ dff$df, main = "var(x) / 2 ~ df Residuals")
boxplot(dff$dfc_resid ~ dff$df, main = "length(x) - 1 ~ df Residuals")
boxplot(dff$dfd_resid ~ dff$df, main = "negloglik ~ df Residuals")
boxplot(dff$ncpa_resid ~ dff$ncp, main = "mean(x) - (mean(x) - 1) ~ ncp Residuals")
boxplot(dff$ncpb_resid ~ dff$ncp, main = "mean(x) - var(x)/2 ~ ncp Residuals")
boxplot(dff$ncpc_resid ~ dff$ncp, main = "optim ~ ncp Residuals")
boxplot(dff$ncpd_resid ~ dff$ncp, main = "negloglik ~ ncp Residuals")
Add a function for
util_chisquare_param_estimate()
Function:
Example: