Skip to content

Commit e85f7e0

Browse files
authored
Make calculation of p-vals options in get_pairwise_comparisons() (#994)
* make NULL an option when computing p-values * add test * add news item * switch back to old pipe
1 parent 728a2c1 commit e85f7e0

5 files changed

Lines changed: 68 additions & 25 deletions

File tree

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# scoringutils (development version)
22

3+
- Made computation of p-values optional in pairwise comparisons by allowing `test_type = NULL` in `compare_forecasts()`. When `test_type = NULL`, p-values will be `NA` (#978).
4+
5+
36
# scoringutils 2.1.0
47

58
Minor spelling / mathematical updates to Scoring rule vignette. (#969)

R/pairwise-comparisons.R

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -410,9 +410,9 @@ pairwise_comparison_one_group <- function(scores,
410410
#' @param one_sided Boolean, default is `FALSE`, whether two conduct a one-sided
411411
#' instead of a two-sided test to determine significance in a pairwise
412412
#' comparison.
413-
#' @param test_type Character, either "non_parametric" (the default) or
414-
#' "permutation". This determines which kind of test shall be conducted to
415-
#' determine p-values.
413+
#' @param test_type Character, either "non_parametric" (the default), "permutation",
414+
#' or NULL. This determines which kind of test shall be conducted to determine
415+
#' p-values. If NULL, no test will be conducted and p-values will be NA.
416416
#' @param n_permutations Numeric, the number of permutations for a
417417
#' permutation test. Default is 999.
418418
#' @returns A list with mean score ratios and p-values for the comparison
@@ -428,7 +428,7 @@ compare_forecasts <- function(scores,
428428
name_comparator2,
429429
metric,
430430
one_sided = FALSE,
431-
test_type = c("non_parametric", "permutation"),
431+
test_type = c("non_parametric", "permutation", NULL),
432432
n_permutations = 999) {
433433
scores <- data.table::as.data.table(scores)
434434

@@ -463,22 +463,28 @@ compare_forecasts <- function(scores,
463463
# note we could also take mean(values_x) / mean(values_y), as it cancels out
464464
ratio <- sum(values_x) / sum(values_y)
465465

466-
# test whether the ratio is significantly different from one
467-
# equivalently, one can test whether the difference between the two values
468-
# is significantly different from zero.
469-
test_type <- match.arg(test_type)
470-
if (test_type == "permutation") {
471-
# adapted from the surveillance package
472-
pval <- permutation_test(values_x, values_y,
473-
n_permutation = n_permutations,
474-
one_sided = one_sided,
475-
comparison_mode = "difference"
476-
)
466+
# If test_type is NULL, return NA for p-value
467+
if (is.null(test_type)) {
468+
pval <- NA_real_
477469
} else {
478-
# this probably needs some more thought
479-
# alternative: do a paired t-test on ranks?
480-
pval <- wilcox.test(values_x, values_y, paired = TRUE)$p.value
470+
# test whether the ratio is significantly different from one
471+
# equivalently, one can test whether the difference between the two values
472+
# is significantly different from zero.
473+
test_type <- match.arg(test_type)
474+
if (test_type == "permutation") {
475+
# adapted from the surveillance package
476+
pval <- permutation_test(values_x, values_y,
477+
n_permutation = n_permutations,
478+
one_sided = one_sided,
479+
comparison_mode = "difference"
480+
)
481+
} else {
482+
# this probably needs some more thought
483+
# alternative: do a paired t-test on ranks?
484+
pval <- wilcox.test(values_x, values_y, paired = TRUE)$p.value
485+
}
481486
}
487+
482488
return(list(
483489
mean_scores_ratio = ratio,
484490
pval = pval
@@ -582,7 +588,8 @@ add_relative_skill <- function(
582588
compare = "model",
583589
by = NULL,
584590
metric = intersect(c("wis", "crps", "brier_score"), names(scores)),
585-
baseline = NULL
591+
baseline = NULL,
592+
...
586593
) {
587594

588595
# input checks are done in `get_pairwise_comparisons()`
@@ -592,7 +599,8 @@ add_relative_skill <- function(
592599
metric = metric,
593600
baseline = baseline,
594601
compare = compare,
595-
by = by
602+
by = by,
603+
...
596604
)
597605

598606
# store original metrics

man/add_relative_skill.Rd

Lines changed: 5 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/compare_forecasts.Rd

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-pairwise_comparison.R

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,3 +545,31 @@ test_that("plot_pairwise_comparisons() works when showing p values", {
545545
skip_on_cran()
546546
vdiffr::expect_doppelganger("plot_pairwise_comparison_pval", p)
547547
})
548+
549+
test_that("add_relative_skill() works without warnings when not computing p-values", {
550+
forecast_quantile <- example_quantile %>%
551+
as_forecast_quantile(
552+
forecast_unit = c(
553+
"location", "forecast_date", "target_end_date",
554+
"target_type", "model", "horizon"
555+
)
556+
)
557+
558+
scores <- forecast_quantile %>%
559+
score(metrics = get_metrics(forecast_quantile, "ae_median"))
560+
561+
expect_no_warning(
562+
scores_w_rel_skill <- scores %>%
563+
add_relative_skill(
564+
compare = "model",
565+
by = "location",
566+
metric = "ae_median",
567+
test_type = NULL
568+
)
569+
)
570+
571+
# Additional checks to ensure the function worked correctly
572+
expect_true("ae_median_relative_skill" %in% names(scores_w_rel_skill))
573+
expect_true(is.numeric(scores_w_rel_skill$ae_median_relative_skill))
574+
expect_false(any(is.na(scores_w_rel_skill$ae_median_relative_skill)))
575+
})

0 commit comments

Comments
 (0)