epiforecasts
diff --git a/‎NEWS.md‎
Lines changed: 3 additions & 0 deletions b/‎NEWS.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/pairwise-comparisons.R‎
Lines changed: 28 additions & 20 deletions b/‎R/pairwise-comparisons.R‎
Lines changed: 28 additions & 20 deletions
diff --git a/‎man/add_relative_skill.Rd‎
Lines changed: 5 additions & 1 deletion b/‎man/add_relative_skill.Rd‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎man/compare_forecasts.Rd‎
Lines changed: 4 additions & 4 deletions b/‎man/compare_forecasts.Rd‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎tests/testthat/test-pairwise_comparison.R‎
Lines changed: 28 additions & 0 deletions b/‎tests/testthat/test-pairwise_comparison.R‎
Lines changed: 28 additions & 0 deletions
@@ -1,5 +1,8 @@
 # scoringutils (development version)
 
+- Made computation of p-values optional in pairwise comparisons by allowing `test_type = NULL` in `compare_forecasts()`. When `test_type = NULL`, p-values will be `NA` (#978).
+
+
 # scoringutils 2.1.0
 
 Minor spelling / mathematical updates to Scoring rule vignette. (#969)
 
@@ -410,9 +410,9 @@ pairwise_comparison_one_group <- function(scores,
 #' @param one_sided Boolean, default is `FALSE`, whether two conduct a one-sided
 #'   instead of a two-sided test to determine significance in a pairwise
 #'   comparison.
-#' @param test_type Character, either "non_parametric" (the default) or
-#'   "permutation". This determines which kind of test shall be conducted to
-#'   determine p-values.
+#' @param test_type Character, either "non_parametric" (the default), "permutation",
+#'   or NULL. This determines which kind of test shall be conducted to determine
+#'   p-values. If NULL, no test will be conducted and p-values will be NA.
 #' @param n_permutations Numeric, the number of permutations for a
 #'   permutation test. Default is 999.
 #' @returns A list with mean score ratios and p-values for the comparison
@@ -428,7 +428,7 @@ compare_forecasts <- function(scores,
                               name_comparator2,
                               metric,
                               one_sided = FALSE,
-                              test_type = c("non_parametric", "permutation"),
+                              test_type = c("non_parametric", "permutation", NULL),
                               n_permutations = 999) {
   scores <- data.table::as.data.table(scores)
 
@@ -463,22 +463,28 @@ compare_forecasts <- function(scores,
   # note we could also take mean(values_x) / mean(values_y), as it cancels out
   ratio <- sum(values_x) / sum(values_y)
 
-  # test whether the ratio is significantly different from one
-  # equivalently, one can test whether the difference between the two values
-  # is significantly different from zero.
-  test_type <- match.arg(test_type)
-  if (test_type == "permutation") {
-    # adapted from the surveillance package
-    pval <- permutation_test(values_x, values_y,
-      n_permutation = n_permutations,
-      one_sided = one_sided,
-      comparison_mode = "difference"
-    )
+  # If test_type is NULL, return NA for p-value
+  if (is.null(test_type)) {
+    pval <- NA_real_
   } else {
-    # this probably needs some more thought
-    # alternative: do a paired t-test on ranks?
-    pval <- wilcox.test(values_x, values_y, paired = TRUE)$p.value
+    # test whether the ratio is significantly different from one
+    # equivalently, one can test whether the difference between the two values
+    # is significantly different from zero.
+    test_type <- match.arg(test_type)
+    if (test_type == "permutation") {
+      # adapted from the surveillance package
+      pval <- permutation_test(values_x, values_y,
+        n_permutation = n_permutations,
+        one_sided = one_sided,
+        comparison_mode = "difference"
+      )
+    } else {
+      # this probably needs some more thought
+      # alternative: do a paired t-test on ranks?
+      pval <- wilcox.test(values_x, values_y, paired = TRUE)$p.value
+    }
   }
+
   return(list(
     mean_scores_ratio = ratio,
     pval = pval
@@ -582,7 +588,8 @@ add_relative_skill <- function(
   compare = "model",
   by = NULL,
   metric = intersect(c("wis", "crps", "brier_score"), names(scores)),
-  baseline = NULL
+  baseline = NULL,
+  ...
 ) {
 
   # input checks are done in `get_pairwise_comparisons()`
@@ -592,7 +599,8 @@ add_relative_skill <- function(
     metric = metric,
     baseline = baseline,
     compare = compare,
-    by = by
+    by = by,
+    ...
   )
 
   # store original metrics
 
@@ -545,3 +545,31 @@ test_that("plot_pairwise_comparisons() works when showing p values", {
   skip_on_cran()
   vdiffr::expect_doppelganger("plot_pairwise_comparison_pval", p)
 })
+
+test_that("add_relative_skill() works without warnings when not computing p-values", {
+  forecast_quantile <- example_quantile %>%
+    as_forecast_quantile(
+      forecast_unit = c(
+        "location", "forecast_date", "target_end_date",
+        "target_type", "model", "horizon"
+      )
+    )
+
+  scores <- forecast_quantile %>%
+    score(metrics = get_metrics(forecast_quantile, "ae_median"))
+
+  expect_no_warning(
+    scores_w_rel_skill <- scores %>%
+      add_relative_skill(
+        compare = "model",
+        by = "location",
+        metric = "ae_median",
+        test_type = NULL
+      )
+  )
+
+  # Additional checks to ensure the function worked correctly
+  expect_true("ae_median_relative_skill" %in% names(scores_w_rel_skill))
+  expect_true(is.numeric(scores_w_rel_skill$ae_median_relative_skill))
+  expect_false(any(is.na(scores_w_rel_skill$ae_median_relative_skill)))
+})