-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmaf2vcf_v2.sh
More file actions
76 lines (56 loc) · 1.7 KB
/
maf2vcf_v2.sh
File metadata and controls
76 lines (56 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env bash
set -euo pipefail
MAF="${1:?Usage: $0 <file.maf>}"
THREADS=8
echo "Calling variants on MAF file: $MAF"
export MAF
mkdir -p vcfs
# --- Helper: inject ##contig lines into VCF header so bcftools can parse/sort/index ---
add_contigs_to_header () {
local in_vcf="$1"
local out_vcf="$2"
# Collect contig names from CHROM column (skip headers)
awk '!/^#/ {print $1}' "$in_vcf" | sort -u > "${in_vcf}.contigs"
# Write header, insert contig lines before #CHROM, then write body
awk -v contigs_file="${in_vcf}.contigs" '
BEGIN {
while ((getline c < contigs_file) > 0) contigs[++n]=c
close(contigs_file)
}
/^#CHROM/ {
for (i=1; i<=n; i++) print "##contig=<ID=" contigs[i] ">"
print
next
}
{ print }
' "$in_vcf" > "$out_vcf"
rm -f "${in_vcf}.contigs"
}
export -f add_contigs_to_header
# Get sample names from MAF
mapfile -t SAMPLES < <(grep "^s" "$MAF" | cut -f2 | cut -f1 -d'#' | sort -u)
parallel --jobs "$THREADS" --halt now,fail=1 '
set -euo pipefail
SAMPLE={};
raw="vcfs/${SAMPLE}.raw.vcf"
fixed="vcfs/${SAMPLE}.vcf"
gz="vcfs/${SAMPLE}.vcf.gz"
wgatools call \
--query-regex "^${SAMPLE}#.*" \
--sample "$SAMPLE" \
--snp \
--svlen 0 \
-r "$MAF" \
-o "$raw"
add_contigs_to_header "$raw" "$fixed"
rm -f "$raw"
bcftools sort -Oz -o "$gz" "$fixed"
bcftools index -f "$gz"
rm -f "$fixed"
' ::: "${SAMPLES[@]}"
bcftools merge vcfs/*.vcf.gz -o merged.vcf.gz -O z --threads "$THREADS"
bcftools index -f merged.vcf.gz
bcftools annotate --remove FORMAT/QI merged.vcf.gz -o merged.vcf
sed -E -i.bak 's#\./\.#0#g; s#([0-9])\|\1#1#g' merged.vcf
rm -rf vcfs
rm -f merged.vcf.gz merged.vcf.gz.csi merged.vcf.bak