Open spsanderson opened 4 days ago
The autotest_package()
function stops at util_chisquare_param_estimate()
which will currently produce a warning of NaN's produced. Possible fix to exponentiate the params in the neg log liklihood func and log the initial params as the input to the func to optimize, this seems to work to force a constrained problem. It however produces sub-optimal results as seen here:
util_chisquare_param_estimate <- function(.x, .auto_gen_empirical = TRUE) {
# Tidyeval ----
x_term <- as.numeric(.x)
n <- length(x_term)
minx <- min(as.numeric(x_term))
maxx <- max(as.numeric(x_term))
# Checks ----
if (!is.vector(x_term, mode = "numeric")) {
rlang::abort(
message = "The '.x' term must be a numeric vector.",
use_cli_format = TRUE
)
}
if (n < 2) {
rlang::abort(
message = "You must supply at least two data points for this function.",
use_cli_format = TRUE
)
}
# Parameters ----
# estimate_chisq_params <- function(data) {
# # Negative log-likelihood function
# negLogLik <- function(df, ncp) {
# -sum(stats::dchisq(data, df = df, ncp = ncp, log = TRUE))
# }
#
# # Initial values (adjust based on your data if necessary)
# start_vals <- list(df = trunc(var(data)/2), ncp = trunc(mean(data)))
#
# # MLE using bbmle
# mle_fit <- bbmle::mle2(negLogLik, start = start_vals)
# # Return estimated parameters as a named vector
# df <- dplyr::tibble(
# est_df = bbmle::coef(mle_fit)[1],
# est_ncp = bbmle::coef(mle_fit)[2]
# )
# return(df)
# }
#
# safe_estimates <- {
# purrr::possibly(
# estimate_chisq_params,
# otherwise = NA_real_,
# quiet = TRUE
# )
# }
#
# estimates <- safe_estimates(x_term)
# Define negative log-likelihood function
neg_log_likelihood <- function(params) {
params <- exp(params) * 10^-10
df <- params[1]
ncp <- params[2]
sum_densities <- sum(stats::dchisq(x_term, df = df, ncp = ncp, log = TRUE))
return(-sum_densities)
}
# Initial guess for parameters
initial_params <- c(trunc(var(x_term)/2), trunc(mean(x_term)))
# Optimize parameters using optim() function
opt_result <- stats::optim(par = log(initial_params), fn = neg_log_likelihood)
# Extract estimated parameters
doff <- opt_result$par[1]
ncp <- opt_result$par[2]
# Return Tibble ----
if (.auto_gen_empirical) {
te <- tidy_empirical(.x = x_term)
tc <- tidy_chisquare(.n = n, .df = round(doff, 3), .ncp = round(ncp, 3))
combined_tbl <- tidy_combine_distributions(te, tc)
}
ret <- dplyr::tibble(
dist_type = "Chisquare",
samp_size = n,
min = minx,
max = maxx,
mean = mean(x_term),
dof = doff,
ncp = ncp
)
# Return ----
attr(ret, "tibble_type") <- "parameter_estimation"
attr(ret, "family") <- "chisquare"
attr(ret, "x_term") <- .x
attr(ret, "n") <- n
if (.auto_gen_empirical) {
output <- list(
combined_data_tbl = combined_tbl,
parameter_tbl = ret
)
} else {
output <- list(
parameter_tbl = ret
)
}
return(output)
}
# Test ----
library(TidyDensity)
tc <- tidy_chisquare(.n = 500, .df = 6, .ncp = 1)$y
util_chisquare_param_estimate(.x = rchisq(n = 100, df = 5))
util_chisquare_param_estimate(.x = tc)
util_chisquare_param_estimate(.x = tc)$combined_data_tbl |>
tidy_combined_autoplot()
TidyDensity::util_chisquare_param_estimate(.x = tc)$combined_data_tbl |>
TidyDensity::tidy_combined_autoplot()
Warning messages:
1: In stats::dchisq(x_term, df = df, ncp = ncp, log = TRUE) :
NaNs produced
2: In stats::dchisq(x_term, df = df, ncp = ncp, log = TRUE) :
NaNs produced
3: In stats::dchisq(x_term, df = df, ncp = ncp, log = TRUE) :
NaNs produced
4: In stats::dchisq(x_term, df = df, ncp = ncp, log = TRUE) :
NaNs produced
5: In stats::dchisq(x_term, df = df, ncp = ncp, log = TRUE) :
NaNs produced
6: In stats::dchisq(x_term, df = df, ncp = ncp, log = TRUE) :
NaNs produced
Modified function:
Original function:
> x_term <- tc
> c(trunc(var(x_term)/2), trunc(mean(x_term)))
[1] 7 6
> log(c(trunc(var(x_term)/2), trunc(mean(x_term))))
[1] 1.945910 1.791759
> exp(log(c(trunc(var(x_term)/2), trunc(mean(x_term))))) * 10^-10
[1] 7e-10 6e-10
Need to prepare package for rOpensci
autotest
and run it on package