@@ -410,9 +410,9 @@ pairwise_comparison_one_group <- function(scores,
410410# ' @param one_sided Boolean, default is `FALSE`, whether two conduct a one-sided
411411# ' instead of a two-sided test to determine significance in a pairwise
412412# ' comparison.
413- # ' @param test_type Character, either "non_parametric" (the default) or
414- # ' "permutation" . This determines which kind of test shall be conducted to
415- # ' determine p-values.
413+ # ' @param test_type Character, either "non_parametric" (the default), "permutation",
414+ # ' or NULL . This determines which kind of test shall be conducted to determine
415+ # ' p-values. If NULL, no test will be conducted and p-values will be NA .
416416# ' @param n_permutations Numeric, the number of permutations for a
417417# ' permutation test. Default is 999.
418418# ' @returns A list with mean score ratios and p-values for the comparison
@@ -428,7 +428,7 @@ compare_forecasts <- function(scores,
428428 name_comparator2 ,
429429 metric ,
430430 one_sided = FALSE ,
431- test_type = c(" non_parametric" , " permutation" ),
431+ test_type = c(" non_parametric" , " permutation" , NULL ),
432432 n_permutations = 999 ) {
433433 scores <- data.table :: as.data.table(scores )
434434
@@ -463,22 +463,28 @@ compare_forecasts <- function(scores,
463463 # note we could also take mean(values_x) / mean(values_y), as it cancels out
464464 ratio <- sum(values_x ) / sum(values_y )
465465
466- # test whether the ratio is significantly different from one
467- # equivalently, one can test whether the difference between the two values
468- # is significantly different from zero.
469- test_type <- match.arg(test_type )
470- if (test_type == " permutation" ) {
471- # adapted from the surveillance package
472- pval <- permutation_test(values_x , values_y ,
473- n_permutation = n_permutations ,
474- one_sided = one_sided ,
475- comparison_mode = " difference"
476- )
466+ # If test_type is NULL, return NA for p-value
467+ if (is.null(test_type )) {
468+ pval <- NA_real_
477469 } else {
478- # this probably needs some more thought
479- # alternative: do a paired t-test on ranks?
480- pval <- wilcox.test(values_x , values_y , paired = TRUE )$ p.value
470+ # test whether the ratio is significantly different from one
471+ # equivalently, one can test whether the difference between the two values
472+ # is significantly different from zero.
473+ test_type <- match.arg(test_type )
474+ if (test_type == " permutation" ) {
475+ # adapted from the surveillance package
476+ pval <- permutation_test(values_x , values_y ,
477+ n_permutation = n_permutations ,
478+ one_sided = one_sided ,
479+ comparison_mode = " difference"
480+ )
481+ } else {
482+ # this probably needs some more thought
483+ # alternative: do a paired t-test on ranks?
484+ pval <- wilcox.test(values_x , values_y , paired = TRUE )$ p.value
485+ }
481486 }
487+
482488 return (list (
483489 mean_scores_ratio = ratio ,
484490 pval = pval
@@ -582,7 +588,8 @@ add_relative_skill <- function(
582588 compare = " model" ,
583589 by = NULL ,
584590 metric = intersect(c(" wis" , " crps" , " brier_score" ), names(scores )),
585- baseline = NULL
591+ baseline = NULL ,
592+ ...
586593) {
587594
588595 # input checks are done in `get_pairwise_comparisons()`
@@ -592,7 +599,8 @@ add_relative_skill <- function(
592599 metric = metric ,
593600 baseline = baseline ,
594601 compare = compare ,
595- by = by
602+ by = by ,
603+ ...
596604 )
597605
598606 # store original metrics
0 commit comments