A summarised result
a04_summarised_result.Rmd
Introduction
A summarised result is a table that contains aggregated summary statistics (that is, a result set that contains no patient-level data).
Let’s look at an example result in this format. Here we have just one esitmate
library(omopgenerics)
library(dplyr)
x <- dplyr::tibble(
"result_id" = as.integer(1),
"cdm_name" = "my_cdm",
"group_name" = "sex",
"group_level" = "male",
"strata_name" = "sex",
"strata_level" = "male",
"variable_name" = "Age group",
"variable_level" = "10 to 50",
"estimate_name" = "count",
"estimate_type" = "numeric",
"estimate_value" = "5",
"additional_name" = "overall",
"additional_level" = "overall"
)
result <- newSummarisedResult(x)
result |>
dplyr::glimpse()
#> Rows: 1
#> Columns: 13
#> $ result_id <int> 1
#> $ cdm_name <chr> "my_cdm"
#> $ group_name <chr> "sex"
#> $ group_level <chr> "male"
#> $ strata_name <chr> "sex"
#> $ strata_level <chr> "male"
#> $ variable_name <chr> "Age group"
#> $ variable_level <chr> "10 to 50"
#> $ estimate_name <chr> "count"
#> $ estimate_type <chr> "numeric"
#> $ estimate_value <chr> "5"
#> $ additional_name <chr> "overall"
#> $ additional_level <chr> "overall"
We can also associate settings with our results. These will typically be used to explain how the result was created.
result <- newSummarisedResult(x,
settings = dplyr::tibble(result_id = 1,
package = "PatientProfiles",
study = "my_characterisation_study"))
result |> glimpse()
#> Rows: 1
#> Columns: 13
#> $ result_id <int> 1
#> $ cdm_name <chr> "my_cdm"
#> $ group_name <chr> "sex"
#> $ group_level <chr> "male"
#> $ strata_name <chr> "sex"
#> $ strata_level <chr> "male"
#> $ variable_name <chr> "Age group"
#> $ variable_level <chr> "10 to 50"
#> $ estimate_name <chr> "count"
#> $ estimate_type <chr> "numeric"
#> $ estimate_value <chr> "5"
#> $ additional_name <chr> "overall"
#> $ additional_level <chr> "overall"
settings(result)
#> # A tibble: 1 × 3
#> result_id package study
#> <int> <chr> <chr>
#> 1 1 PatientProfiles my_characterisation_study
Combining summarised results
result_1 <- dplyr::tibble(
"result_id" = as.integer(1),
"cdm_name" = "my_cdm",
"group_name" = "sex",
"group_level" = "male",
"strata_name" = "sex",
"strata_level" = "male",
"variable_name" = "Age group",
"variable_level" = "10 to 50",
"estimate_name" = "count",
"estimate_type" = "numeric",
"estimate_value" = "5",
"additional_name" = "overall",
"additional_level" = "overall"
)
result_1_settings <- dplyr::tibble(result_id = 1,
package = "PatientProfiles",
study = "my_characterisation_study",
analyis = "stratified by age_group")
result_1 <- newSummarisedResult(result_1, settings = result_1_settings)
result_2 <- dplyr::tibble(
"result_id" = as.integer(1),
"cdm_name" = "my_cdm",
"group_name" = "overall",
"group_level" = "overall",
"strata_name" = "overall",
"strata_level" = "overall",
"variable_name" = "overall",
"variable_level" = "overall",
"estimate_name" = "count",
"estimate_type" = "numeric",
"estimate_value" = "55",
"additional_name" = "overall",
"additional_level" = "overall"
)
result_2_settings <- dplyr::tibble(result_id = 1,
package = "PatientProfiles",
study = "my_characterisation_study",
analyis = "overall analysis")
result_2 <- newSummarisedResult(result_2, settings = result_2_settings)
Now we have our results we can combine them using bind. Because the two sets of results contain the same result ID, when the results are combined this will be automatically updated.
result <- bind(list(result_1, result_2))
result |>
dplyr::glimpse()
#> Rows: 2
#> Columns: 13
#> $ result_id <int> 1, 2
#> $ cdm_name <chr> "my_cdm", "my_cdm"
#> $ group_name <chr> "sex", "overall"
#> $ group_level <chr> "male", "overall"
#> $ strata_name <chr> "sex", "overall"
#> $ strata_level <chr> "male", "overall"
#> $ variable_name <chr> "Age group", "overall"
#> $ variable_level <chr> "10 to 50", "overall"
#> $ estimate_name <chr> "count", "count"
#> $ estimate_type <chr> "numeric", "numeric"
#> $ estimate_value <chr> "5", "55"
#> $ additional_name <chr> "overall", "overall"
#> $ additional_level <chr> "overall", "overall"
settings(result)
#> # A tibble: 2 × 4
#> result_id package study analyis
#> <int> <chr> <chr> <chr>
#> 1 1 PatientProfiles my_characterisation_study stratified by age_group
#> 2 2 PatientProfiles my_characterisation_study overall analysis
Minimum cell count suppression
Once we have a summarised result, we can suppress the results based on some minimum cell count.
suppress(result, minCellCount = 7) |>
glimpse()
#> Rows: 2
#> Columns: 13
#> $ result_id <int> 1, 2
#> $ cdm_name <chr> "my_cdm", "my_cdm"
#> $ group_name <chr> "sex", "overall"
#> $ group_level <chr> "male", "overall"
#> $ strata_name <chr> "sex", "overall"
#> $ strata_level <chr> "male", "overall"
#> $ variable_name <chr> "Age group", "overall"
#> $ variable_level <chr> "10 to 50", "overall"
#> $ estimate_name <chr> "count", "count"
#> $ estimate_type <chr> "numeric", "numeric"
#> $ estimate_value <chr> NA, "55"
#> $ additional_name <chr> "overall", "overall"
#> $ additional_level <chr> "overall", "overall"