Check on ascending order of cdf values incorrect when `output_type_id` data type is character

The helper function check_values_ascending arranges by the output_type_id here, which can result in an incorrect ordering and an incorrect validation failure when the output_type_id has a character data type but represents numeric values. This hasn't come up in testing with quantile outputs because the output_type_ids for those have a leading 0 -- but that may not be the case for cdf output types.
Here is an example:
library(dplyr)
library(hubValidations)

ex <- structure(list(location = c("01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01", 
"01", "01", "01", "01", "01", "01", "01", "01", "01", "01", "01"
), reference_date = structure(c(19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371), class = "Date"), horizon = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), target_end_date = structure(c(19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 19371, 
19371, 19371, 19371, 19371, 19371, 19371), class = "Date"), target = c("wk inc flu hosp", 
"wk inc flu hosp", "wk inc flu hosp", "wk inc flu hosp", "wk inc flu hosp", 
"wk inc flu hosp", "wk inc flu hosp", "wk inc flu hosp", "wk inc flu hosp", 
"wk inc flu hosp", "wk inc flu hosp", "wk inc flu hosp", "wk inc flu hosp", 
"wk inc flu hosp", "wk inc flu hosp", "wk inc flu hosp", "wk inc flu hosp", 
"wk inc flu hosp", "wk inc flu hosp", "wk inc flu hosp", "wk inc flu hosp", 
"wk inc flu hosp", "wk inc flu hosp", "wk flu hosp rate category", 
"wk flu hosp rate category", "wk flu hosp rate category", "wk flu hosp rate category", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", 
"wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate", "wk flu hosp rate"
), output_type = c("quantile", "quantile", "quantile", "quantile", 
"quantile", "quantile", "quantile", "quantile", "quantile", "quantile", 
"quantile", "quantile", "quantile", "quantile", "quantile", "quantile", 
"quantile", "quantile", "quantile", "quantile", "quantile", "quantile", 
"quantile", "pmf", "pmf", "pmf", "pmf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf", 
"cdf", "cdf", "cdf", "cdf", "cdf", "cdf", "cdf"), output_type_id = c("0.01", 
"0.025", "0.05", "0.1", "0.15", "0.2", "0.25", "0.3", "0.35", 
"0.4", "0.45", "0.5", "0.55", "0.6", "0.65", "0.7", "0.75", "0.8", 
"0.85", "0.9", "0.95", "0.975", "0.99", "low", "moderate", "high", 
"very high", "0.25", "0.5", "0.75", "1", "1.25", "1.5", "1.75", 
"2", "2.25", "2.5", "2.75", "3", "3.25", "3.5", "3.75", "4", 
"4.25", "4.5", "4.75", "5", "5.25", "5.5", "5.75", "6", "6.25", 
"6.5", "6.75", "7", "7.25", "7.5", "7.75", "8", "8.25", "8.5", 
"8.75", "9", "9.25", "9.5", "9.75", "10", "10.25", "10.5", "10.75", 
"11", "11.25", "11.5", "11.75", "12", "12.25", "12.5", "12.75", 
"13", "13.25", "13.5", "13.75", "14", "14.25", "14.5", "14.75", 
"15", "15.25", "15.5", "15.75", "16", "16.25", "16.5", "16.75", 
"17", "17.25", "17.5", "17.75", "18", "18.25", "18.5", "18.75", 
"19", "19.25", "19.5", "19.75", "20", "20.25", "20.5", "20.75", 
"21", "21.25", "21.5", "21.75", "22", "22.25", "22.5", "22.75", 
"23", "23.25", "23.5", "23.75", "24", "24.25", "24.5", "24.75", 
"25"), value = c(17, 44, 72, 105, 122, 125, 127, 128, 131, 132, 
133, 136, 139, 140, 141, 144, 145, 147, 150, 167, 200, 228, 255, 
0.220842781557067, 0.768398474282558, 0.0107282559276931, 3.04882326815914e-05, 
0.00853380042747561, 0.0135533534527697, 0.0208413454592117, 
0.0299015976961877, 0.0406341808051564, 0.0548617841719505, 0.0720793809659529, 
0.0929593384551091, 0.111686153095421, 0.236379680785012, 0.560349384758665, 
0.864271648664744, 0.89630163333021, 0.918320726070205, 0.937138052331475, 
0.952967021875378, 0.96520141962482, 0.974905656518415, 0.983169904293088, 
0.989315411865382, 0.993311235511738, 0.995919595802813, 0.997577247537977, 
0.998600149380724, 0.999213049267616, 0.999569631735671, 0.999771071520066, 
0.999881567193594, 0.999940419028641, 0.999970855156356, 0.999986139027956, 
0.999993591354829, 0.999997119649744, 0.999998741655025, 0.999999465680723, 
0.999999779493023, 0.999999911561791, 0.999999965530796, 0.999999986945038, 
0.999999995195438, 0.999999998281901, 0.999999999403044, 0.999999999798479, 
0.999999999933905, 0.999999999978939, 0.99999999999348, 0.999999999998039, 
0.999999999999427, 0.999999999999838, 0.999999999999955, 0.999999999999988, 
0.999999999999997, 0.999999999999999, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA, 
-127L), class = "data.frame")

# data type of output_type_id is character (due to some rows with output_type == "pmf" and strings for category)
typeof(ex$output_type_id)

# as given, output_type_id values for cdf type are correctly ordered when considered as numeric values
ex |>
  filter(output_type == "cdf") |>
  pull(output_type_id) |>
  as.numeric() |>
  diff() |>
  min()

# in given order, value entries are also correctly ordered
min_value_diff <- ex |>
  filter(output_type == "cdf") |>
  pull(value) |>
  diff() |>
  min()
min_value_diff >= 0

# this is no longer true after sorting by character values, the operation that is done in check_values_ascending
ex |>
  filter(output_type == "cdf") |>
  arrange(output_type_id) |>
  pull(output_type_id) |>
  as.numeric() |>
  diff() |>
  min()

# after arranging by character values of output_type_id, values not properly ordered
min_value_diff <- ex |>
  filter(output_type == "cdf") |>
  arrange(output_type_id) |>
  pull(value) |>
  diff() |>
  min()
min_value_diff >= 0

# validation check fails (incorrectly)
hubValidations:::check_tbl_value_col_ascending(ex, file_path = "")
If it's more helpful to have an example in data file format, the example above is a subset of this file.
hubverse-org / hubValidations

Check on ascending order of cdf values incorrect when `output_type_id` data type is character #78