spsanderson / TidyDensity

Create tidy probability/density tibbles and plots of randomly generated and empirical data.
https://www.spsanderson.com/TidyDensity
Other
34 stars 1 forks source link

Add function `tidy_random_walk()` #58

Closed spsanderson closed 2 years ago

spsanderson commented 2 years ago

Add a function that will take output from a tidy_ distribution function and return a random walk tibble

library(tidyverse)
library(healthyverse)

tn <- tidy_normal(.sd = 0.1,.num_sims = 50)
initial_value <- 1

# cum_prod
tn %>%
    # Group each random walk so cumprod will be applied to each of them separately
    dplyr::group_by(sim_number) %>%
    # Calculate cumulative product of each random walk
    dplyr::mutate(cum_y = initial_value * cumprod(1 + y)) %>%
    # Remove grouping to improve future performance
    dplyr::ungroup() %>%
    ggplot(aes(x = x, y = cum_y, group = sim_number, color = sim_number)) +
    geom_line() +
    theme_minimal() +
    theme(legend.position = "none")

# cum_sum
tn %>%
    # Group each random walk so cumprod will be applied to each of them separately
    dplyr::group_by(sim_number) %>%
    # Calculate cumulative product of each random walk
    dplyr::mutate(cum_y = initial_value + cumsum(y)) %>%
    # Remove grouping to improve future performance
    dplyr::ungroup() %>%
    ggplot(aes(x = x, y = cum_y, group = sim_number, color = sim_number)) +
    geom_line() +
    theme_minimal() +
    theme(legend.position = "none")

# cum_sum_sample
tn %>%
    # Group each random walk so cumprod will be applied to each of them separately
    dplyr::group_by(sim_number) %>%
    # Calculate cumulative product of each random walk
    dplyr::mutate(cum_y = initial_value + cumsum(sample(y, replace = TRUE))) %>%
    # Remove grouping to improve future performance
    dplyr::ungroup() %>%
    ggplot(aes(x = x, y = cum_y, group = sim_number, color = sim_number)) +
    geom_line() +
    theme_minimal() +
    theme(legend.position = "none")

tn %>%
    # Group each random walk so cumprod will be applied to each of them separately
    dplyr::group_by(sim_number) %>%
    # Calculate cumulative product of each random walk
    dplyr::mutate(cum_y = initial_value + cumsum(sample(y, replace = FALSE))) %>%
    # Remove grouping to improve future performance
    dplyr::ungroup() %>%
    ggplot(aes(x = x, y = cum_y, group = sim_number, color = sim_number)) +
    geom_line() +
    theme_minimal() +
    theme(legend.position = "none")
spsanderson commented 2 years ago
tidy_random_walk <- function(.data, .initial_value = 1, .sample = FALSE, 
                             .replace = FALSE, .value_type = "cum_sum"){

    # Tidyeval ----
    inital_value <- as.numeric(.initial_value)
    samp <- as.logical(.sample)
    rlace <- as.logical(.replace)
    value_type <- as.character(.value_type)

    # Get data attributes ----
    atb <- attributes(.data)

    # Checks ----
    if (!"tibble_type" %in% names(atb)){
        rlang::abort("Function expects to take in data from a 'tidy_' distribution
                     function.")
    }

    if(inital_value < 0){
        rlang::abort("The .intial_value must be greater than or equal to zero.")
    }

    if(!value_type %in% c("cum_prod","cum_sum","cum_sum_sample")){
        rlang::abort("You chose an unsupported .value_type. Please chose from:
                     'cum_prod', 'cum_sum', 'cum_sum_sample'.")
    }

    # Data ----
    df <- dplyr::as_tibble(.data)

    # Manipulation
    if(value_type == "cum_prod" & samp == FALSE){

        ifelse(initial_value == 0, 1, initial_value)

        dfw <- df %>%
            dplyr::group_by(sim_number) %>%
            dplyr::mutate(random_walk_value = initial_value * cumprod(1 + y))

        if(initial_value == 0){
            dfw <- dfw %>%
                dplyr::mutate(random_walk_value = random_walk_value - 1)
        }

        dfw <- dfw %>% dplyr::ungroup()

    }

    if(value_type == "cum_prod" & samp == TRUE){

        ifelse(initial_value == 0, 1, initial_value)

        dfw <- df %>%
            dplyr::group_by(sim_number) %>%
            dplyr::mutate(random_walk_value = initial_value * cumprod(1 + sample(y, replace = rlace)))

        if(initial_value == 0){
            dfw <- dfw %>%
                dplyr::mutate(random_walk_value = random_walk_value - 1)
        }

        dfw <- dfw %>% dplyr::ungroup()

    }

    if(value_type == "cum_sum" & samp == FALSE){

        dfw <- df %>%
            dplyr::group_by(sim_number) %>%
            dplyr::mutate(random_walk_value = initial_value + cumsum(y)) %>%
            dplyr::ungroup()

    }

    if(value_type == "cum_sum" & samp == TRUE){
        dfw <- df %>%
            dplyr::group_by(sim_number) %>%
            dplyr::mutate(random_walk_value = inital_value + cumsum(sample(y, replace = rlace))) %>%
            dplyr::ungroup()
    }

    # Attributes ----
    attr(dfw, ".initial_value") <- .initial_value
    attr(dfw, ".sample") <- .sample
    attr(dfw, ".replace") <- .replace
    attr(dfw, ".value_type") <- .value_type
    attr(dfw, "tibble_type") <- "tidy_random_walk"
    attr(dfw, "dist_type") <- atb$tibble_type
    attr(dfw, "all") <- atb

    # Return ----
    return(dfw)

}