Closed helendduncan closed 7 months ago
Proposed layout of R object/Class
primary_filter: AND
subfilter:
data_column_name: “attendance_type”,
type: IN
values: [7]
subfilter:
type: OR
subfilter:
data_column_name: “diagnosis_1”
type: IN
values: [1,2]
subfilter:
data_column_name=“diagnosis_2”
type: IN
values: [1,2]
subfilter:
data_column_name=“diagnosis_3”
type: IN
values, [1,2]
subfilter:
type: GREATER_THAN_OR_EQUAL
data_column_name: “attendance_date”
value: 2020-02-01
subfilter:
type: LESS_THAN
data_column_name: “attendance_date”
value: 2024-02-01
Here this will correspond to a search for attendance_type
of 7, AND (diagnosis_1, 2, or 3 having a value of (1 OR 2)) AND the date being greater than or equal to the first of Feb 2020, AND the date being less than the first of Feb 2024
my_object <- list(
source_file = "ae2.csv",
transformation_type = "your_transformation_type",
primary_filter = "AND",
subfilter_1 = list(data_column_name = "attendance_type", operator = "%in%", values = 7),
subfilter_2 = list(
list(data_column_name = "diagnosis_1", operator = "%in%", values = c(1, 2)),
list(data_column_name = "diagnosis_2", operator = "%in%", values = c(1, 2)),
list(data_column_name = "diagnosis_3", operator = "%in%", values = c(1, 2))
),
subfilter_3 = list(data_column_name = "attendance_date", operator = ">=", values = as.Date("2022-01-01")),
subfilter_4 = list(data_column_name = "attendance_date", operator = "<", values = as.Date("2024-02-01"))
)
Updated proposed object @yongrenjie which would correspond to the case above
how to specify subfilter 2 is OR
Perhaps:
subfilter_1 = list(data_column_name = "attendance_type", type = "IN", values = 7),
subfilter_2 = list(
type = "OR",
filters = list(
list(data_column_name = "diagnosis_1", type = "IN", values = c(1, 2)),
list(data_column_name = "diagnosis_2", type = "IN", values = c(1, 2)),
list(data_column_name = "diagnosis_3", type = "IN", values = c(1, 2))
),
),
my_object <- list(
source_file = "ae2.csv",
transformation_type = "your_transformation_type",
primary_filter = "AND",
filter = list(
subfilter_1 = list(data_column_name = "attendance_type", operator = "%in%", values = 7),
subfilter_2 = "OR",
filter = list(
subfilter_21 = list(data_column_name = "diagnosis_1", operator = "%in%", values = c(1, 2)),
subfilter_22 = list(data_column_name = "diagnosis_2", operator = "%in%", values = c(1, 2)),
subfilter_23 = list(data_column_name = "diagnosis_3", operator = "%in%", values = c(1, 2))
),
subfilter_3 = list(data_column_name = "attendance_date", operator = ">=", values = as.Date("2022-01-01")),
subfilter_4 = list(data_column_name = "attendance_date", operator = "<", values = as.Date("2024-02-01"))
)
)
my_object <- list(
source_file = "ae2.csv",
transformation_type = "your_transformation_type",
primary_filter = list(
type = "AND",
filter = list(
subfilter_1 = list(data_column_name = "attendance_type", type = "IN", values = 7),
subfilter_2 = "OR",
filter = list(
subfilter_21 = list(data_column_name = "diagnosis_1", type = "IN", values = c(1, 2)),
subfilter_22 = list(data_column_name = "diagnosis_2", type = "IN", values = c(1, 2)),
subfilter_23 = list(data_column_name = "diagnosis_3", type = "IN", values = c(1, 2))
),
subfilter_3 = list(data_column_name = "attendance_date", type = "GT_EQ", values = as.Date("2022-01-01")),
subfilter_4 = list(data_column_name = "attendance_date", type = "LT", values = as.Date("2024-02-01"))
)
),
)
my_object <- list(
source_file = "ae2.csv",
transformation_type = "your_transformation_type",
primary_filter = list(
type = "IN",
data_column_name = "attendance_type",
values = c(7, 8)
)
)
my_object <- list(
source_file = "ae2.csv",
transformation_type = "your_transformation_type",
primary_filter = list(
type = "AND",
filter = list(
subfilter_1 = list(data_column_name = "attendance_type", type = "IN", values = 7),
subfilter_2 = list(
type = "OR",
filter = list(
subfilter_21 = list(data_column_name = "diagnosis_1", type = "IN", values = c(1, 2)),
subfilter_22 = list(data_column_name = "diagnosis_2", type = "IN", values = c(1, 2)),
subfilter_23 = list(data_column_name = "diagnosis_3", type = "IN", values = c(1, 2))
)),
subfilter_3 = list(data_column_name = "attendance_date", type = "GT_EQ", values = as.Date("2022-01-01")),
subfilter_4 = list(data_column_name = "attendance_date", type = "LT", values = as.Date("2024-02-01"))
)
)
)
# Loop through all subfilters under the primary filter
for (i in seq_along(my_object$primary_filter$filter)) {
subfilter <- my_object$primary_filter$filter[[i]]
# Print the details of the subfilter
print(paste("-------------", i,"-------------"))
# If the subfilter has an additional filter, display the details and it's subfilters
if (!is.null(subfilter$filter)) {
print(paste("Subfilter", i, "has an additional filter it is a(n)",subfilter$type, "filter."))
print("Within this subfilter, the following (sub)subfilters are present:")
for (j in seq_along(subfilter$filter)) {
print(paste("Subfilter", i,j))
print(paste("The data column name is", subfilter$filter[[j]]$data_column_name,
"and the type of filter is", subfilter$filter[[j]]$type,
"with the value(s)", paste(subfilter$filter[[j]]$values, collapse = ", ")))
}
# Else just display the details for the subfilter
} else {
print(paste("Subfilter", i, "does not have an additional filter."))
print(paste("The data column name is", subfilter$data_column_name, "and the type of filter is", subfilter$type, "with the value(s)", paste(subfilter$values, collapse = ","), "."))
}
}
Potential features object and a potential method of accessing keys? Does that look somewhat sensible @yongrenjie (feel free to say if not!)
Yeah that looks good!!
Hi @yongrenjie
So I have the function to parse the json to the feature object.
The gist is that the user-defined json input file looks something like this And running feature <- json_to_feature("../example_ip.json”)
should produce the feature described in the issue on Monday.
I haven’t been able to update the notebook, but I hope this makes sense?
@helendduncan I think this is great!! 🤩 It was super satisfying to run feature <- json_to_feature("../example_ip.json”)
in the R console hehe
Having taken a real quick look at the detailed structure, I think there are just a couple of teensy differences between what you parse the JSON to and what my functions want. I'm assuming that you're going to be busy tomorrow morning with the Baskerville course too. So I'll go ahead and merge your PR first and see if I can get both sides to play well together before our meeting 😄
Thank @yongrenjie yes sorry it's a busy day today and if you've got time then I'd appreciate it But no worries if not
Spec has been largely defined by a range of closed PRs, and additional features can be added easily now - closing issues
Description Based on issue #8 to produce an R data structure containing the information specified by a user in a human-readable json file, to be passed to a transformer to perform
Tasks
Success Criteria
Relevant Pull Requests