NCA with Bootstrap for sparse sampling data

j9cc commented 1 month ago

Hi, I wonder if performing a bootstrap first on sparse sampling data and then conducting NCA would be a good approach for data statistical analysis?

Here are some references I found in the literature.

Takemoto-2006-Histogram Analysis of Pharmacoki.pdf Shen-2017-Bioequivalence evaluation of sparse.pdf

j9cc commented 1 month ago

Here is an example of a R script to perform that:

# Load necessary libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(readr)
library(readxl)
library(PKNCA)
library(cowplot)
library(knitr)
library(writexl)
library(ubiquity)
library(boot)

# Set seed for reproducibility
set.seed(123)

# Define parameters
drug_variants <- (1:5)
sample_matrices <- c("Plasma", "Serum", "Urine")
time_points <- seq(1, 24, by = 1)  # Time points from 0 to 24 hours
n_samples <- 3  # Number of concentration samples at each time point

# Create a function to generate decreasing concentration data with noise
generate_concentration <- function(time, dose, decay_rate) {
  base_concentration <- dose * exp(-decay_rate * time)
  concentrations <- base_concentration + rnorm(n_samples, mean = 0, sd = base_concentration * 0.2)
  return(concentrations)
}

# Generate the dataset
data_list <- lapply(drug_variants, function(drug) {
  lapply(sample_matrices, function(matrix) {
    dose <- runif(1, 100, 200)  # Random dose between 100 and 200
    decay_rate <- runif(1, 0.05, 0.15)  # Random decay rate between 0.05 and 0.15
    data.frame(
      drug_variants = rep(drug, each = length(time_points) * n_samples),
      Sample = rep(matrix, each = length(time_points) * n_samples),
      time = rep(time_points, each = n_samples),
      dose = ifelse(rep(time_points, each = n_samples) == 0, dose, NA),
      conc = unlist(lapply(time_points, generate_concentration, dose = dose, decay_rate = decay_rate))
    )
  }) %>% bind_rows()
}) %>% bind_rows()

# Add sample replicate numbers if needed
#data_list <- data_list %>%
#  group_by(drug_variants, Sample, time) %>%
#  mutate(sample_replicate = rep(1:n_samples, length.out = n()))

# Change to dataframe and set up factor (improtant)
data_fake <- as.data.frame(data_list) %>%
  mutate(drug_variants = as.factor(drug_variants), Sample = as.factor(Sample))

# Display the first few rows of the dataset
print(head(data_fake, 30))

##bootstrap

# Calculate original mean concentration
original_means <- data_fake %>%
  group_by(drug_variants, Sample, time) %>%
  mutate(original_mean = mean(conc), .groups = 'drop') %>%
  select(drug_variants, Sample, time, original_mean, dose)

# Function to calculate the mean of log-concentration
log_mean_concentration <- function(data, indices) {
  d <- data[indices, ]
  return(mean(log(d$conc)))
}

# Function to perform bootstrapping
bootstrap_analysis <- function(data, R = 1000) {
  results <- data %>%
    group_by(drug_variants, Sample, time) %>%
    do(boot_out = list(boot(data = ., statistic = log_mean_concentration, R = R))) %>%
    ungroup()

  return(results)
}

# Perform the bootstrap analysis
bootstrap_results <- bootstrap_analysis(data_fake)

# Extract and save the results to a CSV file
bootstrap_simulations <- bootstrap_results %>%
  rowwise() %>%
  mutate(
    simulations = list(boot_out[[1]]$t),
    run = list(seq(1, length.out = length(boot_out[[1]]$t))),
  ) %>%
  unnest(c(simulations, run)) %>%
  select(drug_variants, Sample, time, run, simulations) %>%
  mutate(conc = exp(simulations))

bootstrap_simulations_with_dose <- bootstrap_simulations %>%
  left_join(original_means %>% distinct(time, .keep_all = TRUE) %>% 
              select(drug_variants, Sample, time, dose), by = c("drug_variants", "Sample", "time"))

#write.csv(bootstrap_simulations_with_dose, "bootstrap_simulations.csv", row.names = FALSE)

# Extract and print the results
bootstrap_summary <- bootstrap_results %>%
  rowwise() %>%
  mutate(
    boot_log_mean = mean(boot_out[[1]]$t0),
    boot_log_ci = list(boot.ci(boot_out[[1]], type = "perc")),
    boot_log_ci_low = boot_log_ci$percent[4],
    boot_log_ci_high = boot_log_ci$percent[5]
  ) %>%
  select(drug_variants, Sample, time, boot_log_mean, boot_log_ci_low, boot_log_ci_high) %>%
  mutate(
    boot_log_ci_low = exp(boot_log_ci_low),
    boot_log_ci_high = exp(boot_log_ci_high)
  )

# Convert log results back to original scale
bootstrap_summary <- bootstrap_summary %>%
  mutate(
    boot_mean = exp(boot_log_mean),
    boot_ci_low = boot_log_ci_low,
    boot_ci_high = boot_log_ci_high
  )

print(head(bootstrap_summary))

# Merge the original mean concentration with the bootstrap summary
final_summary <- bootstrap_summary %>%
  left_join(original_means, by = c("drug_variants", "Sample", "time"))

print(final_summary)

ggplot(final_summary #filter()
       , aes(x = time, y = original_mean, color = drug_variants)) +
  geom_line() +
  geom_point(aes(shape = drug_variants)) +
  geom_ribbon(aes(ymin = boot_log_ci_low, ymax = boot_log_ci_high), alpha = 0.2) +
  facet_grid(~Sample) +
  labs(
    title = "Bootstrapped Mean Concentrations with Confidence Intervals",
    x = "Time",
    y = "Concentration"
  ) +
  theme_classic() +
  scale_y_log10(limits = c(0.01, 10000), breaks = c(0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000)) +
  annotation_logticks(sides = "l", size = 1.5) +
  theme(legend.position = c(.85, .8), legend.title = element_text(size = 20, face = "bold"),
        legend.text = element_text(size = 20, face = "bold"),
        legend.key.width = unit(1, "cm"),
        plot.title = element_text(size = 20, face = "bold"),
        axis.title.x = element_text(size = 20, face = "bold"),
        axis.title.y = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 20, face = "bold", color = "black"),  
        axis.text.y = element_text(size = 20, face = "bold", color = "black"),
        axis.line = element_line(linewidth = 1.5), 
        axis.ticks = element_line(linewidth = 1.5),
        strip.text.x = element_text(size = 15, face ="bold"), 
        axis.ticks.length = unit(.25, "cm")) +  
  labs(title = bquote(bold("Bootstrapped Mean Concentrations with Confidence Intervals")~""), 
       x = "Time (hr)", y = "Concentration (nM)", substitle = "Legend at Bottom", color = "Drug", shape = "Drug") +
  scale_shape_manual(labels = c("1" = "Drug 1", "2" = "Drug 2", "3" = "Drug 3", 
                                "4" = "Drug 4", "5" = "Drug 5"),
                     values = c("1" = "square", "2" = "circle", "3" = "triangle",
                                "4" = "triangle down open", "5" = "circle open")) +
  scale_linetype_manual(values = c(3,2,1)) +
  scale_color_manual(labels = c("1" = "Drug 1", "2" = "Drug 2", "3" = "Drug 3",
                                "4" = "Drug 4", "5" = "Drug 5"),
                     values = c("1" = "black", "2" = "red", "3" = "darkgreen", "4" = "blue", "5" = "purple"))

# Convert the data to a suitable format for PKNCA
bootstrap_simulations_with_dose_nca <- bootstrap_simulations_with_dose %>%
  mutate(drug_variants = as.factor(drug_variants),
         Sample = as.factor(Sample)) %>%
  as.data.frame()

bootstrap_simulations_with_dose_nca_dose <- bootstrap_simulations_with_dose_nca %>%
  distinct(drug_variants, Sample, run, .keep_all = TRUE) %>%
  mutate(time = 0)

print(bootstrap_simulations_with_dose_nca_dose)

# Define the function to calculate AUC using PKNCA
calculate_pk <- function(data, data2) {
  print(names(data))  # Print column names to ensure the required columns are present

  # Determine the minimum start time
  min_time <- min(data$time)
  max_time <- max(data$time)

  conc_obj <- PKNCA::PKNCAconc(data, conc ~ time |  drug_variants + Sample + run) #drug_variants + Sample +
  dose_obj <- PKNCA::PKNCAdose(data2, dose ~ time |  drug_variants + Sample + run) #drug_variants + Sample +
  intervals_manual <-
    data.frame(
      start=min_time, end=max_time,
      cmax=TRUE,
      tmax=TRUE,
      auclast=TRUE,
      aucinf.obs=TRUE,
      cl.obs=TRUE,
      cl.pred=TRUE,
      vss.obs=TRUE,
      vss.pred=TRUE
    )
  data_obj <- PKNCA::PKNCAdata(conc_obj,
                               dose_obj, intervals = intervals_manual)

  pknca_results <- PKNCA::pk.nca(data_obj)
  summary_pknca <- as.data.frame(pknca_results)

  return(summary_pknca)
}

#Set up Data for NCA (Plasma and tissue are done separately)

nca_data <- bootstrap_simulations_with_dose_nca
nca_dose <- bootstrap_simulations_with_dose_nca_dose

#Plasma
pl_nca_data <- nca_data %>% filter(Sample == "Plasma")
pl_nca_dose <- nca_dose %>% filter(Sample == "Plasma")

#Due to the automatic interval selection is not working well, set up the plasma data 
#!!!!! Not back extrapolated !!!!!! (Assume C0 = the concentration of the first collected time point)

pl_nca_c0 <- pl_nca_data %>% filter(time == min(time)) %>%
  mutate(time = 0)

pl_nca_data_comb <- pl_nca_data %>% rbind(pl_nca_c0) %>%
  arrange(time)

print(head(pl_nca_data_comb))

pl_pknca <- pl_nca_data_comb %>% do(calculate_pk(., pl_nca_dose))
print(head(pl_pknca))

##Change data structure to wide format if needed
pl_pknca_w <- pl_pknca %>%
  pivot_wider(names_from = PPTESTCD, values_from = PPORRES)
print(head(pl_pknca_w))

#Tissue
tis_nca_data <- nca_data %>% filter(Sample != "Plasma")
tis_nca_dose <- nca_dose %>% filter(Sample != "Plasma")

#Due to the automatic interval selection is not working well
#(Assume C0 = 0 for Tissue if not local administered)

tis_nca_c0 <- tis_nca_data %>% filter(time == min(time)) %>%
  mutate(time = 0, conc = 0)

tis_nca_data_comb <- tis_nca_data %>% rbind(tis_nca_c0) %>%
  arrange(time)

View(tis_nca_data_comb)

tis_pknca <- tis_nca_data_comb %>% do(calculate_pk(., tis_nca_dose))
View(tis_pknca)

##Change data structure to wide format if needed
tis_pknca_w <- tis_pknca %>%
  pivot_wider(names_from = PPTESTCD, values_from = PPORRES)
print(tis_pknca_w)

billdenney commented 1 month ago

Thanks for sharing this. It looks interesting and like a useful alternative option to the Bailer methods for sparse NCA.

In the code, it calculates the mean of the log of concentrations to perform the NCA. How does it handle concentrations that are below the limit of quantification (typically coded as zeros)?

Overall, it looks like it could be integrated into PKNCA by:

Take the original dataset and generate the resampled dataset.
Perform dense NCA on the resampled dataset.
Summarize the dense NCA.

If that is accurate, then it should be easy to integrate.

j9cc commented 1 month ago

The code do not consider handling the concentrations that are below LOQ right now.

Yes, the original theory is:

resampled the dataset to create a bootstrapped dataset.
Then perform regular dense NCA on the resampled bootstrapped dataset.

But the code I have here assumes concentration is a log normal distribution before bootstrapping. For the paper I attached (Takemoto 2006), they perform bootstrap first, then use some statistical inferences to determine whether the metrics is a normal distribution or log normal distribution.

billdenney / pknca

NCA with Bootstrap for sparse sampling data #295