diff --git a/dockerfiles/sv-base/Dockerfile b/dockerfiles/sv-base/Dockerfile
index ac0608caf..bf1cdc19e 100644
--- a/dockerfiles/sv-base/Dockerfile
+++ b/dockerfiles/sv-base/Dockerfile
@@ -1,7 +1,7 @@
# This is the base dockerfile for the GATK SV pipeline that adds R, a few R packages, and GATK
ARG SAMTOOLS_CLOUD_IMAGE=samtools-cloud:latest
ARG VIRTUAL_ENV_IMAGE=sv-base-virtual-env:latest
-ARG GATK_COMMIT="a33bf19dd3188af0af1bd17bce015eb20ba73227"
+ARG GATK_COMMIT="64348bc9750ebf6cc473ecb8c1ced3fc66f05488"
ARG GATK_JAR="/opt/gatk.jar"
ARG R_INSTALL_PATH=/opt/R
@@ -14,8 +14,8 @@ FROM $SAMTOOLS_CLOUD_IMAGE as samtools_cloud
FROM $VIRTUAL_ENV_IMAGE as virtual_env_image
RUN rm_unneeded_r_library_files.sh
-ARG GATK_BUILD_DEP="git git-lfs openjdk-8-jdk"
-ARG GATK_RUN_DEP="openjdk-8-jre-headless libgomp1"
+ARG GATK_BUILD_DEP="git git-lfs openjdk-17-jdk"
+ARG GATK_RUN_DEP="openjdk-17-jre-headless libgomp1"
ARG GATK_COMMIT
ARG GATK_JAR
ARG DEBIAN_FRONTEND=noninteractive
diff --git a/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl b/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl
index 86e1d8bd0..b210e9b53 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl
@@ -190,7 +190,10 @@ These two workflows make up FilterBatch; they are subdivided in this workspace t
#### 09-MergeBatchSites
Read the full MergeBatchSites documentation [here](https://github.com/broadinstitute/gatk-sv#merge-batch-sites).
-* `09-MergeBatchSites` is a cohort-level workflow, so it is run on a `sample_set_set` containing all of the batches in the cohort. You can create this `sample_set_set` while you are launching the `09-MergeBatchSites` workflow: click "Select Data", choose "Create new sample_set_set [...]", check all the batches to include (all of the ones used in `03-TrainGCNV` through `08-FilterBatchSamples`), and give it a name that follows the **Sample ID requirements**.
+* `09-MergeBatchSites` is a cohort-level workflow, so it is run on a `sample_set_set` containing all of the batches in the cohort. Navigate to the Data tab of your workspace. If there is no `sample_set_set` data table, you will need to create it. To do this, select the `sample_set` data table, then select (with the check boxes) all of the batches (`sample_set`) in your cohort. These should be the `sample_sets` that you used to run steps `03-TrainGCNV` through `08-FilterBatchSamples`. Then click the "Edit" icon above the table and choose "Save selection as set." Enter a name that follows the **Sample ID requirements**. This will create a new `sample_set_set` containing all of the `sample_sets` in your cohort. When you launch MergeBatchSites, you can now select this `sample_set_set`.
+
+
+* If there is already a `sample_set_set` data table in your workspace, you can create this `sample_set_set` while you are launching the `09-MergeBatchSites` workflow: click "Select Data", choose "Create new sample_set_set [...]", check all the batches to include (all of the ones used in `03-TrainGCNV` through `08-FilterBatchSamples`), and give it a name that follows the **Sample ID requirements**.
diff --git a/inputs/values/dockers.json b/inputs/values/dockers.json
index c170783b2..d1f5def70 100644
--- a/inputs/values/dockers.json
+++ b/inputs/values/dockers.json
@@ -1,6 +1,6 @@
{
"name": "dockers",
- "cnmops_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/cnmops:2024-06-04-v0.28.5-beta-a8dfecba",
+ "cnmops_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/cnmops:2024-08-27-v0.29-beta-6b27c39f",
"condense_counts_docker": "us.gcr.io/broad-dsde-methods/tsharpe/gatk:4.2.6.1-57-g9e03432",
"gatk_docker": "us.gcr.io/broad-dsde-methods/eph/gatk:2024-07-02-4.6.0.0-1-g4af2b49e9-NIGHTLY-SNAPSHOT",
"gatk_docker_pesr_override": "us.gcr.io/broad-dsde-methods/tsharpe/gatk:4.2.6.1-57-g9e03432",
@@ -10,10 +10,10 @@
"melt_docker": "us.gcr.io/talkowski-sv-gnomad/melt:a85c92f",
"scramble_docker": "us.gcr.io/broad-dsde-methods/markw/scramble:mw-scramble-99af4c50",
"samtools_cloud_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/samtools-cloud:2024-01-24-v0.28.4-beta-9debd6d7",
- "sv_base_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base:2024-01-24-v0.28.4-beta-9debd6d7",
+ "sv_base_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base:2024-08-27-v0.29-beta-6b27c39f",
"sv_base_mini_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-base-mini:2024-01-24-v0.28.4-beta-9debd6d7",
- "sv_pipeline_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-08-19-v0.28.5-beta-84a0627d",
- "sv_pipeline_qc_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-08-19-v0.28.5-beta-84a0627d",
+ "sv_pipeline_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-08-27-v0.29-beta-6b27c39f",
+ "sv_pipeline_qc_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-pipeline:2024-08-27-v0.29-beta-6b27c39f",
"wham_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/wham:2024-01-24-v0.28.4-beta-9debd6d7",
"igv_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/igv:mw-xz-fixes-2-b1be6a9",
"duphold_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/duphold:mw-xz-fixes-2-b1be6a9",
@@ -28,5 +28,5 @@
"sv_utils_docker": "us.gcr.io/broad-dsde-methods/markw/sv-utils:mw-train-genotype-filtering-a9479501",
"gq_recalibrator_docker": "us.gcr.io/broad-dsde-methods/markw/gatk:mw-tb-form-sv-filter-training-data-899360a",
"str": "us.gcr.io/broad-dsde-methods/gatk-sv/str:2023-05-23-v0.27.3-beta-e537bdd6",
- "denovo": "us.gcr.io/broad-dsde-methods/gatk-sv/denovo:2024-08-19-v0.28.5-beta-84a0627d"
+ "denovo": "us.gcr.io/broad-dsde-methods/gatk-sv/denovo:2024-08-27-v0.29-beta-6b27c39f"
}
\ No newline at end of file
diff --git a/inputs/values/dockers_azure.json b/inputs/values/dockers_azure.json
index 12498e0e3..8ede2fae2 100644
--- a/inputs/values/dockers_azure.json
+++ b/inputs/values/dockers_azure.json
@@ -1,6 +1,6 @@
{
"name": "dockers",
- "cnmops_docker": "vahid.azurecr.io/gatk-sv/cnmops:2024-06-04-v0.28.5-beta-a8dfecba",
+ "cnmops_docker": "vahid.azurecr.io/gatk-sv/cnmops:2024-08-27-v0.29-beta-6b27c39f",
"condense_counts_docker": "vahid.azurecr.io/tsharpe/gatk:4.2.6.1-57-g9e03432",
"gatk_docker": "vahid.azurecr.io/gatk-sv/gatk:2024-07-02-4.6.0.0-1-g4af2b49e9-NIGHTLY-SNAPSHOT",
"gatk_docker_pesr_override": "vahid.azurecr.io/tsharpe/gatk:4.2.6.1-57-g9e03432",
@@ -10,10 +10,10 @@
"melt_docker": "vahid.azurecr.io/melt:a85c92f",
"scramble_docker": "vahid.azurecr.io/scramble:mw-scramble-99af4c50",
"samtools_cloud_docker": "vahid.azurecr.io/gatk-sv/samtools-cloud:2024-01-24-v0.28.4-beta-9debd6d7",
- "sv_base_docker": "vahid.azurecr.io/gatk-sv/sv-base:2024-01-24-v0.28.4-beta-9debd6d7",
+ "sv_base_docker": "vahid.azurecr.io/gatk-sv/sv-base:2024-08-27-v0.29-beta-6b27c39f",
"sv_base_mini_docker": "vahid.azurecr.io/gatk-sv/sv-base-mini:2024-01-24-v0.28.4-beta-9debd6d7",
- "sv_pipeline_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-08-19-v0.28.5-beta-84a0627d",
- "sv_pipeline_qc_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-08-19-v0.28.5-beta-84a0627d",
+ "sv_pipeline_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-08-27-v0.29-beta-6b27c39f",
+ "sv_pipeline_qc_docker": "vahid.azurecr.io/gatk-sv/sv-pipeline:2024-08-27-v0.29-beta-6b27c39f",
"wham_docker": "vahid.azurecr.io/gatk-sv/wham:2024-01-24-v0.28.4-beta-9debd6d7",
"igv_docker": "vahid.azurecr.io/gatk-sv/igv:mw-xz-fixes-2-b1be6a9",
"duphold_docker": "vahid.azurecr.io/gatk-sv/duphold:mw-xz-fixes-2-b1be6a9",
@@ -28,5 +28,5 @@
"sv_utils_docker": "vahid.azurecr.io/gatk-sv/sv-utils:2024-01-24-v0.28.4-beta-9debd6d7",
"gq_recalibrator_docker": "vahid.azurecr.io/markw/gatk:mw-tb-form-sv-filter-training-data-899360a",
"str": "vahid.azurecr.io/gatk-sv/str:2023-05-23-v0.27.3-beta-e537bdd6",
- "denovo": "vahid.azurecr.io/gatk-sv/denovo:2024-08-19-v0.28.5-beta-84a0627d"
+ "denovo": "vahid.azurecr.io/gatk-sv/denovo:2024-08-27-v0.29-beta-6b27c39f"
}
\ No newline at end of file
diff --git a/src/svtk/svtk/pesr/pe_test.py b/src/svtk/svtk/pesr/pe_test.py
index a737e2c0a..bde88cb59 100644
--- a/src/svtk/svtk/pesr/pe_test.py
+++ b/src/svtk/svtk/pesr/pe_test.py
@@ -89,7 +89,8 @@ def _get_coords(pos, strand):
startA, endA = _get_coords(record.pos, strandA)
startB, endB = _get_coords(record.stop, strandB)
- region = '{0}:{1}-{2}'.format(record.chrom, startA, endA)
+ # Add 1 because evidence is stored/indexed with 0-based coordinates
+ region = '{0}:{1}-{2}'.format(record.chrom, startA + 1, endA + 1)
try:
pairs = self.discfile.fetch(region=region, parser=pysam.asTuple())
diff --git a/src/svtk/svtk/pesr/sr_test.py b/src/svtk/svtk/pesr/sr_test.py
index 4fefd6dca..612127de3 100644
--- a/src/svtk/svtk/pesr/sr_test.py
+++ b/src/svtk/svtk/pesr/sr_test.py
@@ -82,7 +82,7 @@ def test_record(self, record, called, background):
# Clean up columns
results['name'] = record.id
results['bg_frac'] = results.called / \
- (results.background + results.called)
+ (results.background + results.called)
results['bg_frac'] = results.bg_frac.fillna(0)
cols = 'name coord pos log_pval called background bg_frac'.split()
@@ -120,7 +120,8 @@ def load_counts(self, chrom, pos, strand):
"""Load pandas DataFrame from tabixfile"""
if pos > 0:
- region = '{0}:{1}-{1}'.format(chrom, pos)
+ # Add 1 because evidence is stored/indexed with 0-based coordinates
+ region = '{0}:{1}-{1}'.format(chrom, pos + 1)
try:
lines = self.countfile.fetch(region)
except ValueError:
diff --git a/wdl/BAFTestChromosome.wdl b/wdl/BAFTestChromosome.wdl
index 1e53f1229..c4f42da77 100644
--- a/wdl/BAFTestChromosome.wdl
+++ b/wdl/BAFTestChromosome.wdl
@@ -113,7 +113,6 @@ task BAFTest {
set -o pipefail
java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \
- --skip-header \
--sequence-dictionary ~{ref_dict} \
--evidence-file ~{baf_metrics} \
-L "${chrom}:${start}-${end}" \
@@ -121,9 +120,9 @@ task BAFTest {
else
touch local.BAF.txt
bgzip local.BAF.txt
+ tabix -0 -s1 -b2 -e2 local.BAF.txt.gz
fi
- tabix -s1 -b2 -e2 local.BAF.txt.gz
svtk baf-test ~{bed} local.BAF.txt.gz --batch batch.key > ~{prefix}.metrics
>>>
diff --git a/wdl/BatchEvidenceMerging.wdl b/wdl/BatchEvidenceMerging.wdl
index 4225784af..7f6f04ad4 100644
--- a/wdl/BatchEvidenceMerging.wdl
+++ b/wdl/BatchEvidenceMerging.wdl
@@ -158,7 +158,7 @@ task MergeEvidence {
fi
awk '/txt\.gz$/' evidence.list | while read fil; do
- tabix -f -s1 -b2 -e2 $fil
+ tabix -f -0 -s1 -b2 -e2 $fil
done
/gatk/gatk --java-options "-Xmx~{java_heap_size_mb}m" PrintSVEvidence -F evidence.list --sample-names samples.list --sequence-dictionary ~{reference_dict} -O "~{batch}.~{evidence}.txt.gz"
diff --git a/wdl/GenotypeCpxCnvsPerBatch.wdl b/wdl/GenotypeCpxCnvsPerBatch.wdl
index aca267819..0db32f435 100644
--- a/wdl/GenotypeCpxCnvsPerBatch.wdl
+++ b/wdl/GenotypeCpxCnvsPerBatch.wdl
@@ -250,9 +250,9 @@ task RdTestGenotype {
else
touch local.RD.txt
bgzip local.RD.txt
+ tabix -p bed local.RD.txt.gz
fi
- tabix -p bed local.RD.txt.gz
tabix -p bed ~{bin_exclude}
Rscript /opt/RdTest/RdTest.R \
diff --git a/wdl/MatrixQC.wdl b/wdl/MatrixQC.wdl
index 902161ac0..e4681f961 100644
--- a/wdl/MatrixQC.wdl
+++ b/wdl/MatrixQC.wdl
@@ -158,10 +158,9 @@ task PESRBAF_QC {
else
touch ~{print_ev_output}
bgzip ~{print_ev_output}
+ tabix -f -0 -s 1 -b 2 -e 2 ~{print_ev_output}
fi
- tabix -f -s 1 -b 2 -e 2 ~{print_ev_output}
-
/opt/sv-pipeline/00_preprocessing/misc_scripts/nonRD_matrix_QC.sh \
-d ~{distance} \
~{print_ev_output} \
@@ -238,10 +237,9 @@ task RD_QC {
else
touch local.RD.txt
bgzip local.RD.txt
+ tabix -f -p bed ~{print_ev_output}
fi
- tabix -f -p bed ~{print_ev_output}
-
/opt/sv-pipeline/00_preprocessing/misc_scripts/RD_matrix_QC.sh \
-d ~{distance} \
~{print_ev_output} \
diff --git a/wdl/PETestChromosome.wdl b/wdl/PETestChromosome.wdl
index a573cce13..360db4bf0 100644
--- a/wdl/PETestChromosome.wdl
+++ b/wdl/PETestChromosome.wdl
@@ -217,7 +217,6 @@ task PETest {
if [ -s region.merged.bed ]; then
java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \
- --skip-header \
--sequence-dictionary ~{ref_dict} \
--evidence-file ~{discfile} \
-L region.merged.bed \
@@ -225,9 +224,9 @@ task PETest {
else
touch local.PE.txt
bgzip local.PE.txt
+ tabix -0 -s1 -b2 -e2 local.PE.txt.gz
fi
- tabix -s1 -b2 -e2 local.PE.txt.gz
svtk pe-test -o ~{window} ~{common_arg} --medianfile ~{medianfile} --samples ~{include_list} ~{vcf} local.PE.txt.gz ~{prefix}.stats
>>>
runtime {
diff --git a/wdl/RDTestChromosome.wdl b/wdl/RDTestChromosome.wdl
index df11fabc4..0668fe5d5 100644
--- a/wdl/RDTestChromosome.wdl
+++ b/wdl/RDTestChromosome.wdl
@@ -176,10 +176,9 @@ task RDTest {
else
touch local.RD.txt
bgzip local.RD.txt
+ tabix -p bed local.RD.txt.gz
fi
- tabix -p bed local.RD.txt.gz
-
Rscript /opt/RdTest/RdTest.R \
-b ~{bed} \
-n ~{prefix} \
diff --git a/wdl/ResolveCpxSv.wdl b/wdl/ResolveCpxSv.wdl
index f63e1979e..cba568831 100644
--- a/wdl/ResolveCpxSv.wdl
+++ b/wdl/ResolveCpxSv.wdl
@@ -345,7 +345,6 @@ task ResolvePrep {
if [ -s regions.bed ]; then
java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \
- --skip-header \
--sequence-dictionary ~{ref_dict} \
--evidence-file $GS_PATH_TO_DISC_FILE \
-L regions.bed \
@@ -385,7 +384,7 @@ task ResolvePrep {
> discfile.PE.txt.gz
fi
- tabix -s 1 -b 2 -e 2 -f discfile.PE.txt.gz
+ tabix -0 -s 1 -b 2 -e 2 -f discfile.PE.txt.gz
>>>
output {
diff --git a/wdl/SRTestChromosome.wdl b/wdl/SRTestChromosome.wdl
index 0f945a972..83987975d 100644
--- a/wdl/SRTestChromosome.wdl
+++ b/wdl/SRTestChromosome.wdl
@@ -218,7 +218,6 @@ task SRTest {
if [ -s region.merged.bed ]; then
java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} PrintSVEvidence \
- --skip-header \
--sequence-dictionary ~{ref_dict} \
--evidence-file ~{splitfile} \
-L region.merged.bed \
@@ -226,9 +225,9 @@ task SRTest {
else
touch local.SR.txt
bgzip local.SR.txt
+ tabix -0 -s1 -b2 -e2 local.SR.txt.gz
fi
- tabix -s1 -b2 -e2 local.SR.txt.gz
svtk sr-test -w 50 --log ~{common_arg} --medianfile ~{medianfile} --samples ~{include_list} ~{vcf} local.SR.txt.gz ~{prefix}.stats
>>>
runtime {
diff --git a/wdl/SetSampleIdLegacy.wdl b/wdl/SetSampleIdLegacy.wdl
index bcc114582..17957d3a3 100644
--- a/wdl/SetSampleIdLegacy.wdl
+++ b/wdl/SetSampleIdLegacy.wdl
@@ -122,13 +122,13 @@ task SetSampleId {
output_name="~{sample_name}.~{file_type}.txt.gz"
if [ ! -f "~{evidence_file}.tbi" ]; then
- tabix -s1 -b2 -e2 ~{evidence_file}
+ tabix -0 -s1 -b2 -e2 ~{evidence_file}
fi
mkfifo $fifo_name
/gatk/gatk --java-options "-Xmx2000m" PrintSVEvidence -F ~{evidence_file} --sequence-dictionary ~{reference_dict} -O $fifo_name &
awk '{$~{sample_column}="~{sample_name}"}' < $fifo_name | bgzip -c > $output_name
- tabix -s1 -b2 -e2 $output_name
+ tabix -0 -s1 -b2 -e2 $output_name
>>>
runtime {
diff --git a/wdl/TasksGenotypeBatch.wdl b/wdl/TasksGenotypeBatch.wdl
index 7a945ff48..aa1221b3e 100644
--- a/wdl/TasksGenotypeBatch.wdl
+++ b/wdl/TasksGenotypeBatch.wdl
@@ -344,10 +344,9 @@ task RDTestGenotype {
else
touch local.RD.txt
bgzip local.RD.txt
+ tabix -p bed local.RD.txt.gz
fi
- tabix -p bed local.RD.txt.gz
-
Rscript /opt/RdTest/RdTest.R \
-b ~{bed} \
-c local.RD.txt.gz \
@@ -435,9 +434,9 @@ task CountPE {
else
touch local.PE.txt
bgzip local.PE.txt
+ tabix -0 -s1 -b2 -e2 local.PE.txt.gz
fi
- tabix -s1 -b2 -e2 local.PE.txt.gz
svtk count-pe -s ~{write_lines(samples)} --medianfile ~{medianfile} ~{vcf} local.PE.txt.gz ~{prefix}.pe_counts.txt
gzip ~{prefix}.pe_counts.txt
@@ -511,9 +510,9 @@ task CountSR {
else
touch local.SR.txt
bgzip local.SR.txt
+ tabix -0 -s1 -b2 -e2 local.SR.txt.gz
fi
- tabix -s1 -b2 -e2 local.SR.txt.gz
svtk count-sr -s ~{write_lines(samples)} --medianfile ~{medianfile} ~{vcf} local.SR.txt.gz ~{prefix}.sr_counts.txt
/opt/sv-pipeline/04_variant_resolution/scripts/sum_SR.sh ~{prefix}.sr_counts.txt ~{prefix}.sr_sum.txt.gz
gzip ~{prefix}.sr_counts.txt