Skip to content

Commit b6c3c57

Browse files
bschilderclaude
andcommitted
Fix 3 GHA test failures: sort redirect bug, column name portability
- sort_coordinates_bash: fix in-place redirect that truncated input file before grep could read it (write to temp, then move back) - sort_coordinates_bash: validate column names exist in header before accessing cDict, preventing cryptic "subscript out of bounds" - construct_query: validate start/end aren't NA before calling IRanges - All tests: detect position column dynamically (POS or BP) instead of hardcoding, fixing failures when echodata uses either name - test-run_bgzip: use .tsv instead of .tsv.gz to avoid double-compression - test-convert: replace destructive tempdir cleanup with targeted removal Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2c1770c commit b6c3c57

6 files changed

Lines changed: 113 additions & 53 deletions

File tree

R/construct_query.R

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -249,11 +249,17 @@ construct_query <- function(## Set 1
249249
check_set1(query_chrom = query_chrom,
250250
query_start_pos = query_start_pos,
251251
query_end_pos = query_end_pos)
252+
start_val <- as.integer(min(query_start_pos, na.rm = TRUE))
253+
end_val <- as.integer(max(query_end_pos, na.rm = TRUE))
254+
if(is.na(start_val) || is.na(end_val)){
255+
stop("query_start_pos and query_end_pos must contain ",
256+
"at least one non-NA, finite value.")
257+
}
252258
gr <- GenomicRanges::GRanges(
253259
seqnames = gsub("chr","",query_chrom[1],ignore.case = TRUE),
254260
ranges = IRanges::IRanges(
255-
start = as.integer(min(query_start_pos, na.rm = TRUE)),
256-
end = as.integer(max(query_end_pos, na.rm = TRUE))
261+
start = start_val,
262+
end = end_val
257263
)
258264
)
259265
}

R/sort_coordinates_bash.R

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -27,23 +27,44 @@ sort_coordinates_bash <- function(target_path,
2727
comment_char <- infer_comment_char(target_path = target_path,
2828
comment_char = comment_char,
2929
verbose = verbose)
30-
cDict <- echodata::column_dictionary(path = target_path)
30+
cDict <- echodata::column_dictionary(path = target_path)
31+
#### Validate column names exist in file header ####
32+
for(col_arg in list(c("chrom_col", chrom_col),
33+
c("start_col", start_col),
34+
c("end_col", end_col))){
35+
if(!col_arg[2] %in% names(cDict)){
36+
stop(col_arg[1], " '", col_arg[2],
37+
"' not found in file header. Available columns: ",
38+
paste(names(cDict), collapse = ", "))
39+
}
40+
}
3141
#### create save dir ####
3242
if(!is.null(save_path)){
3343
dir.create(dirname(save_path), showWarnings = FALSE, recursive = TRUE)
3444
}
35-
#### Construct command ####
45+
#### Avoid overwriting input file with shell redirect ####
46+
## If save_path == target_path (after extension stripping), the redirect
47+
## would truncate the input before grep can read it.
48+
use_tmp <- FALSE
49+
actual_save <- save_path
50+
if(!is.null(save_path) &&
51+
normalizePath(save_path, mustWork = FALSE) ==
52+
normalizePath(target_path, mustWork = FALSE)){
53+
save_path <- tempfile(fileext = "_sort_tmp.tsv")
54+
use_tmp <- TRUE
55+
}
56+
#### Construct command ####
3657
cmd <- paste("(",
3758
#### Extract the header col and sort everything else ####
3859
paste0(z_grep," ^",shQuote(comment_char)," ",target_path,"; ",
3960
z_grep," -v ^",shQuote(comment_char)," ",target_path
4061
),
4162
paste0("| sort -k",
4263
cDict[[chrom_col]],",",
43-
## !!IMPORTANT!!: "n" is critical because otherwise
64+
## !!IMPORTANT!!: "n" is critical because otherwise
4465
## `sort` will order numbers
4566
## as: 1,12,18,4
46-
## However, sort only works if chromosome
67+
## However, sort only works if chromosome
4768
## is in numeric format.
4869
cDict[[chrom_col]],"n"
4970
),
@@ -56,21 +77,27 @@ sort_coordinates_bash <- function(target_path,
5677
)
5778
cmd <- trimws(cmd)
5879
#### Return ####
59-
dat <- NULL;
80+
dat <- NULL;
6081
if(any(c("path","data") %in% outputs)){
6182
#### Only execute the command if other outputs are selected ####
6283
echoconda::cmd_print(cmd, verbose = verbose)
6384
system(cmd)
85+
#### Move temp file to actual destination if needed ####
86+
if(use_tmp && file.exists(save_path)){
87+
file.copy(save_path, actual_save, overwrite = TRUE)
88+
file.remove(save_path)
89+
save_path <- actual_save
90+
}
6491
#### Only read in data if selected in outputs ####
6592
if("data" %in% outputs){
6693
if(!is.null(save_path)){
67-
dat <- data.table::fread(save_path, nThread = 1)
68-
}
69-
}
94+
dat <- data.table::fread(save_path, nThread = 1)
95+
}
96+
}
7097
}
71-
out <- construct_outputs(outputs = outputs,
72-
command = cmd,
98+
out <- construct_outputs(outputs = outputs,
99+
command = cmd,
73100
path = save_path,
74-
data = dat)
101+
data = dat)
75102
return(out)
76103
}

tests/testthat/test-convert.R

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,18 @@ test_that("convert works", {
33
run_tests <- function(dat,
44
convert_methods=eval(formals(echotabix::convert)$convert_methods)
55
){
6+
## Detect position column name
7+
pos_col <- intersect(c("POS","BP"), colnames(dat))[1]
8+
if(is.na(pos_col)) testthat::skip("No position column (POS/BP)")
69
tmp <- tempfile()
710
data.table::fwrite(dat, tmp, sep="\t")
811
#### Sorted data ####
912
dat_sorted <- data.table::copy(dat)
10-
data.table::setkey(dat_sorted, CHR, POS)
13+
data.table::setkeyv(dat_sorted, c("CHR", pos_col))
1114
data.table::setkey(dat_sorted, NULL)
1215

1316
tabix_files <- echotabix::convert(target_path = tmp,
17+
start_col = pos_col,
1418
convert_methods = convert_methods) ## <- main func
1519
testthat::expect_true(file.exists(tabix_files$path))
1620
testthat::expect_true(file.exists(tabix_files$index))
@@ -20,10 +24,14 @@ test_that("convert works", {
2024
#### Return to normal for comparison ####
2125
if(grepl("chr",dat_sorted$CHR[1])) {
2226
dat_sorted[,CHR:=as.integer(gsub("chr","",CHR))]
23-
data.table::setkey(dat_sorted, CHR, POS)
27+
data.table::setkeyv(dat_sorted, c("CHR", pos_col))
2428
data.table::setkey(dat_sorted, NULL)
2529
}
26-
testthat::expect_equal(head(dat_sorted,1000), dat2)
30+
## Coerce types for comparison
31+
if(is.character(dat2$CHR)) dat2[, CHR := as.integer(CHR)]
32+
if(is.character(dat_sorted$CHR)) dat_sorted[, CHR := as.integer(CHR)]
33+
testthat::expect_equal(head(dat_sorted,1000), dat2,
34+
check.attributes = FALSE)
2735
### Clean up ####
2836
file.remove(unlist(tabix_files))
2937
file.remove(tmp)
@@ -39,7 +47,10 @@ test_that("convert works", {
3947
#### fullSS ####
4048
target_path <- echodata::example_fullSS()
4149
dat_all <- data.table::fread(target_path)
42-
data.table::setnames(dat_all,"BP","POS")
50+
## Standardise position column to POS if needed
51+
if("BP" %in% colnames(dat_all) && !"POS" %in% colnames(dat_all)){
52+
data.table::setnames(dat_all, "BP", "POS")
53+
}
4354
dat2_all <- run_tests(dat = dat_all,
4455
convert_methods = convert_methods)
4556

@@ -48,7 +59,7 @@ test_that("convert works", {
4859
dat3_all <- run_tests(dat = dat_all,
4960
convert_methods = convert_methods)
5061
### Cleanup ####
51-
file.remove(list.files(tempdir(), full.names = TRUE, recursive = TRUE))
62+
try(file.remove(target_path), silent = TRUE)
5263
}
5364

5465
#### ---- convert_methods combo 1 ---- #####

tests/testthat/test-query_tabular.R

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
11
test_that("query_table works", {
22

33
query_dat <- echodata::BST1
4+
## Detect the position column name (POS or BP depending on echodata version)
5+
pos_col <- intersect(c("POS","BP"), colnames(query_dat))[1]
6+
if(is.na(pos_col)) testthat::skip("No position column (POS/BP) found in BST1")
47

58
#### --- LOCAL FILE --- ####
69
target_path <- echodata::example_fullSS()
10+
## Detect position column in the fullSS file too
11+
fullSS_cols <- colnames(data.table::fread(target_path, nrows = 0))
12+
fullSS_pos <- intersect(c("BP","POS"), fullSS_cols)[1]
13+
if(is.na(fullSS_pos)) testthat::skip("No position column in fullSS file")
714
tabix_files <- echotabix::convert(target_path = target_path,
8-
start_col = "BP")
15+
start_col = fullSS_pos)
916

1017
##### seqminer ####
1118
if(requireNamespace("seqminer", quietly = TRUE)){
@@ -23,8 +30,8 @@ test_that("query_table works", {
2330
target_path = tabix_files$path,
2431
query_granges = echotabix::construct_query(
2532
query_chrom = as.integer(query_dat$CHR[1]),
26-
query_start_pos = as.integer(min(query_dat$POS, na.rm = TRUE)),
27-
query_end_pos = as.integer(min(query_dat$POS, na.rm = TRUE) + 1000),
33+
query_start_pos = as.integer(min(query_dat[[pos_col]], na.rm = TRUE)),
34+
query_end_pos = as.integer(min(query_dat[[pos_col]], na.rm = TRUE) + 1000),
2835
),
2936
query_method = "seqminer"
3037
)
@@ -48,8 +55,8 @@ test_that("query_table works", {
4855
target_path = tabix_files$path,
4956
query_granges = echotabix::construct_query(
5057
query_chrom = as.integer(query_dat$CHR[1]),
51-
query_start_pos = as.integer(min(query_dat$POS, na.rm = TRUE)),
52-
query_end_pos = as.integer(min(query_dat$POS, na.rm = TRUE) + 1000),
58+
query_start_pos = as.integer(min(query_dat[[pos_col]], na.rm = TRUE)),
59+
query_end_pos = as.integer(min(query_dat[[pos_col]], na.rm = TRUE) + 1000),
5360
),
5461
query_method = "rsamtools"
5562
)
@@ -76,8 +83,8 @@ test_that("query_table works", {
7683
target_path = tabix_files$path,
7784
query_granges = echotabix::construct_query(
7885
query_chrom = as.integer(query_dat$CHR[1]),
79-
query_start_pos = as.integer(min(query_dat$POS, na.rm = TRUE)),
80-
query_end_pos = as.integer(min(query_dat$POS, na.rm = TRUE) + 1000),
86+
query_start_pos = as.integer(min(query_dat[[pos_col]], na.rm = TRUE)),
87+
query_end_pos = as.integer(min(query_dat[[pos_col]], na.rm = TRUE) + 1000),
8188
),
8289
query_method = "conda"
8390
)
@@ -104,8 +111,8 @@ test_that("query_table works", {
104111
target_path = target_path,
105112
query_granges = echotabix::construct_query(
106113
query_chrom = as.integer(query_dat$CHR[1]),
107-
query_start_pos = as.integer(min(query_dat$POS, na.rm = TRUE)),
108-
query_end_pos = as.integer(min(query_dat$POS, na.rm = TRUE) + 10),
114+
query_start_pos = as.integer(min(query_dat[[pos_col]], na.rm = TRUE)),
115+
query_end_pos = as.integer(min(query_dat[[pos_col]], na.rm = TRUE) + 10),
109116
),
110117
query_method = "seqminer")
111118
## Check for appropriate range
@@ -130,8 +137,8 @@ test_that("query_table works", {
130137
target_path = target_path,
131138
query_granges = echotabix::construct_query(
132139
query_chrom = as.integer(query_dat$CHR[1]),
133-
query_start_pos = as.integer(min(query_dat$POS, na.rm = TRUE)),
134-
query_end_pos = as.integer(min(query_dat$POS, na.rm = TRUE) + 10),
140+
query_start_pos = as.integer(min(query_dat[[pos_col]], na.rm = TRUE)),
141+
query_end_pos = as.integer(min(query_dat[[pos_col]], na.rm = TRUE) + 10),
135142
),
136143
query_method = "rsamtools"
137144
)

tests/testthat/test-run_bgzip.R

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
test_that("run_bgzip works", {
22

33
dat <- echodata::BST1[1:100,]
4-
tmp <- tempfile(fileext = ".tsv.gz")
4+
## Detect the position column name (POS or BP depending on echodata version)
5+
pos_col <- intersect(c("POS","BP"), colnames(dat))[1]
6+
if(is.na(pos_col)) testthat::skip("No position column (POS/BP) found in BST1")
7+
## Use uncompressed .tsv to avoid gzip/bgzip double-compression issues
8+
tmp <- tempfile(fileext = ".tsv")
59
data.table::fwrite(dat, tmp, sep="\t")
610
### Sort
711
dat_sorted <- data.table::copy(dat)
8-
try({data.table::setkeyv(dat_sorted, c("CHR", "POS"))})
12+
try({data.table::setkeyv(dat_sorted, c("CHR", pos_col))})
913

1014
#### Test missing args ####
1115
testthat::expect_error(
@@ -18,22 +22,24 @@ test_that("run_bgzip works", {
1822
#### Test run: with .tsv: unsorted ####
1923
bgz_file2 <- echotabix::run_bgzip(target_path=tmp,
2024
chrom_col = "CHR",
21-
start_col = "POS",
25+
start_col = pos_col,
2226
sort_rows = FALSE)
2327
dat1 <- echotabix::read_bgz(bgz_file2)
24-
## CHR may be read back as character; coerce for comparison
28+
## Coerce types for comparison
2529
if(is.character(dat1$CHR)) dat1[, CHR := as.integer(CHR)]
26-
testthat::expect_equal(dat, dat1)
30+
if(is.character(dat$CHR)) dat[, CHR := as.integer(CHR)]
31+
testthat::expect_equal(dat, dat1, check.attributes = FALSE)
2732

2833
#### Test run: with .tsv: sorted ####
2934
## Recreate file since previous run consumed it
30-
tmp2 <- tempfile(fileext = ".tsv.gz")
35+
tmp2 <- tempfile(fileext = ".tsv")
3136
data.table::fwrite(dat, tmp2, sep="\t")
3237
bgz_file1 <- echotabix::run_bgzip(target_path=tmp2,
3338
chrom_col = "CHR",
34-
start_col = "POS",
39+
start_col = pos_col,
3540
sort_rows = TRUE)
3641
dat2 <- echotabix::read_bgz(bgz_file1)
3742
if(is.character(dat2$CHR)) dat2[, CHR := as.integer(CHR)]
43+
if(is.character(dat_sorted$CHR)) dat_sorted[, CHR := as.integer(CHR)]
3844
testthat::expect_equal(dat_sorted, dat2, ignore_attr = TRUE)
3945
})

tests/testthat/test-sort_coordinates.R

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,67 @@
11
test_that("sort_coordinates works", {
2-
2+
33
tmp <- echodata::example_fullSS()
44
dat <- data.table::fread(tmp)
5+
## Detect position column name
6+
pos_col <- intersect(c("BP","POS"), colnames(dat))[1]
7+
if(is.na(pos_col)) testthat::skip("No position column (BP/POS)")
58
# dat <- echodata::BST1[1:200,]
6-
## Sort alphanumerically by SNP instead of coordinates
9+
## Sort alphanumerically by SNP instead of coordinates
710
data.table::setkey(dat, SNP)
811
data.table::setkey(dat, NULL)
912
# tmp <- tempfile()
1013
# data.table::fwrite(dat, tmp, sep="\t")
1114
tmp <- R.utils::gzip(tmp)
1215
### sort ####
1316
dat_sorted <- data.table::copy(dat)
14-
data.table::setkey(dat_sorted, CHR, BP)
15-
### Remove key
17+
data.table::setkeyv(dat_sorted, c("CHR", pos_col))
18+
### Remove key
1619
data.table::setkey(dat_sorted, NULL)
1720
#### Check that only a very small proportion of the SNPs match after sorting
1821
testthat::expect_lte(sum(dat$SNP==dat_sorted$SNP)/nrow(dat), 0.05)
19-
22+
2023
#### Return command ####
2124
cmd <- echotabix::sort_coordinates(target_path=tmp,
2225
chrom_col = "CHR",
23-
start_col = "BP",
26+
start_col = pos_col,
2427
outputs = "command")
2528
testthat::expect_true(methods::is(cmd,"character"))
26-
29+
2730
#### Run command and save ####
2831
save_path <- tempfile(fileext = "2.tsv")
2932
save_path_out <- echotabix::sort_coordinates(target_path=tmp,
3033
chrom_col = "CHR",
31-
start_col = "BP",
32-
save_path = save_path,
34+
start_col = pos_col,
35+
save_path = save_path,
3336
outputs = "path")
3437
dat2 <- data.table::fread(save_path_out, nThread = 1)
3538
testthat::expect_equal(dat_sorted, dat2)
36-
39+
3740
#### Run command ####
3841
save_path3 <- tempfile(fileext = "3.tsv")
3942
dat3 <- echotabix::sort_coordinates(target_path=tmp,
4043
chrom_col = "CHR",
41-
start_col = "BP",
44+
start_col = pos_col,
4245
save_path = save_path3,
4346
outputs = "data")
4447
testthat::expect_equal(dat_sorted, dat3)
45-
46-
#### Run command with chr prefix ####
48+
49+
#### Run command with chr prefix ####
4750
dat$CHR <- paste0("chr",dat$CHR)
4851
data.table::fwrite(dat, tmp, sep="\t")
4952
dat4 <- echotabix::sort_coordinates(target_path=tmp,
5053
chrom_col = "CHR",
51-
start_col = "BP",
54+
start_col = pos_col,
5255
outputs = "data")
5356
testthat::expect_equal(dat_sorted, dat4)
54-
55-
#### Run command with chr prefix: supply save_path ####
57+
58+
#### Run command with chr prefix: supply save_path ####
5659
tmp3 <- tempfile(fileext = "3.tsv")
5760
data.table::fwrite(dat, tmp)
5861
sorted_path <- tempfile(fileext = "_sorted.tsv")
5962
out5 <- echotabix::sort_coordinates(target_path=tmp,
6063
chrom_col = "CHR",
61-
start_col = "BP",
64+
start_col = pos_col,
6265
save_path = sorted_path)
6366
testthat::expect_equal(dat_sorted, out5$data)
6467
dat5_saved <- data.table::fread(out5$path)

0 commit comments

Comments
 (0)