Skip to content

Commit

Permalink
Add PlotSVCountsPerSample subworkflow to the end of ClusterBatch and …
Browse files Browse the repository at this point in the history
…FilterBatchSites (#567)

* Update README to link to SV callers used.

* Imported PlotSVCountsPerSample.wdl into ClusterBatch.wdl and FilterBatchSites.wdl. Added the N_IQR_cutoff input to the workflow with a default value of 6 to both wdls. Call PlotSVCountsPerSample as a subworkflow at the end of each workflow, passing the final VCF's as the input and the batch as the prefix. Added the outputs of PlotSVCountsPerSample to the workflows' outputs with unique names. Updated the JSON templates for ClusterBatch and FilterBatchSites in test and terra to include the N_IQR_cuffott input with a value of 6. Then validated ClusterBatch and FilterBatchSites wofkflows with womtool and the Terra validation script, and ran the updated workflows on the ref_panel_1kg test data. There was successful completion and decent outputs.

* integrate PlotSVCountsPerSample into ClusterBatch and FilterBatchSites directly

* integrate PlotSVCountsPerSample into ClusterBatch and FilterBatchSites directly

* fixed issues from miniwdl

* fixed issues from miniwdl

* fixed issues from miniwdl

* fixed issues from miniwdl

* fixed issues from miniwdl

* fixed issues from miniwdl

* fixed issues from miniwdl

* fixed issues from miniwdl

* fixed issues from miniwdl

* fixed issues from miniwdl

* make edits to location of parameters called

* make edits to location of parameters called

---------

Co-authored-by: Kirtana Veeraraghavan <[email protected]>
  • Loading branch information
kirtanav98 and Kirtana Veeraraghavan authored Aug 16, 2023
1 parent fc9e992 commit e2e76a8
Show file tree
Hide file tree
Showing 10 changed files with 96 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,6 @@
"ClusterBatch.manta_vcf_tar": "${this.std_manta_vcf_tar}",
"ClusterBatch.melt_vcf_tar": "${this.std_melt_vcf_tar}",
"ClusterBatch.scramble_vcf_tar": "${this.std_scramble_vcf_tar}",
"ClusterBatch.ped_file": "${workspace.cohort_ped_file}"
"ClusterBatch.ped_file": "${workspace.cohort_ped_file}",
"ClusterBatch.N_IQR_cutoff_plotting": "6"
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@
"FilterBatchSites.melt_vcf" : "${this.clustered_melt_vcf}",
"FilterBatchSites.scramble_vcf" : "${this.clustered_scramble_vcf}",
"FilterBatchSites.evidence_metrics": "${this.metrics}",
"FilterBatchSites.evidence_metrics_common": "${this.metrics_common}"
"FilterBatchSites.evidence_metrics_common": "${this.metrics_common}",
"FilterBatchSites.N_IQR_cutoff_plotting": "6"
}
3 changes: 2 additions & 1 deletion inputs/templates/test/ClusterBatch/ClusterBatch.json.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@
"ClusterBatch.wham_vcf_tar": {{ test_batch.std_wham_vcf_tar | tojson }},
"ClusterBatch.manta_vcf_tar": {{ test_batch.std_manta_vcf_tar | tojson }},
"ClusterBatch.melt_vcf_tar": {{ test_batch.std_melt_vcf_tar | tojson }},
"ClusterBatch.ped_file": {{ test_batch.ped_file | tojson }}
"ClusterBatch.ped_file": {{ test_batch.ped_file | tojson }},
"ClusterBatch.N_IQR_cutoff_plotting": "6"
}
3 changes: 2 additions & 1 deletion inputs/templates/test/FilterBatch/FilterBatchSites.json.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@
"FilterBatchSites.wham_vcf" : {{ test_batch.merged_wham_vcf | tojson }},
"FilterBatchSites.melt_vcf" : {{ test_batch.merged_melt_vcf | tojson }},
"FilterBatchSites.evidence_metrics": {{ test_batch.evidence_metrics | tojson }},
"FilterBatchSites.evidence_metrics_common": {{ test_batch.evidence_metrics_common | tojson }}
"FilterBatchSites.evidence_metrics_common": {{ test_batch.evidence_metrics_common | tojson }},
"FilterBatchSites.N_IQR_cutoff_plotting": "6"
}
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
"GATKSVPipelineBatch.GATKSVPipelinePhase1.depth_exclude_overlap_fraction": "0.5",
"GATKSVPipelineBatch.GATKSVPipelinePhase1.depth_interval_overlap": "0.8",
"GATKSVPipelineBatch.GATKSVPipelinePhase1.depth_clustering_algorithm": "SINGLE_LINKAGE",

"GATKSVPipelineBatch.N_IQR_cutoff_plotting": "6",
"GATKSVPipelineBatch.GATKSVPipelinePhase1.BAF_split_size": "10000",
"GATKSVPipelineBatch.GATKSVPipelinePhase1.RD_split_size": "10000",
"GATKSVPipelineBatch.GATKSVPipelinePhase1.PE_split_size": "10000",
Expand All @@ -105,7 +105,6 @@

"GATKSVPipelineBatch.outlier_cutoff_table" : {{ test_batch.outlier_cutoff_table | tojson }},
"GATKSVPipelineBatch.GATKSVPipelinePhase1.outlier_cutoff_nIQR": "999999",

"GATKSVPipelineBatch.GenotypeBatch.n_RD_genotype_bins": "100000",
"GATKSVPipelineBatch.GenotypeBatch.n_per_split": "5000",
"GATKSVPipelineBatch.GenotypeBatch.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@

"GATKSVPipelinePhase1.outlier_cutoff_table" : {{ test_batch.outlier_cutoff_table | tojson }},
"GATKSVPipelinePhase1.outlier_cutoff_nIQR": "6",
"GATKSVPipelinePhase1.N_IQR_cutoff_plotting": "6",

"GATKSVPipelinePhase1.ploidy_sample_psi_scale": "0.001",
"GATKSVPipelinePhase1.contig_ploidy_model_tar" : {{ test_batch.contig_ploidy_model_tar | tojson }},
Expand Down
30 changes: 28 additions & 2 deletions wdl/ClusterBatch.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import "DepthClustering.wdl" as depth
import "ClusterBatchMetrics.wdl" as metrics
import "TasksClusterBatch.wdl" as tasks
import "Utils.wdl" as util
import "PlotSVCountsPerSample.wdl" as sv_counts

workflow ClusterBatch {
input {
Expand Down Expand Up @@ -48,6 +49,9 @@ workflow ClusterBatch {
Int pesr_breakend_window
String? pesr_clustering_algorithm

# PlotSVCountsPerSample
Int? N_IQR_cutoff_plotting

# Module metrics parameters
# Run module metrics workflow at the end - on by default
Boolean? run_module_metrics
Expand Down Expand Up @@ -81,6 +85,9 @@ workflow ClusterBatch {
RuntimeAttr? runtime_attr_gatk_to_svtk_vcf_depth
RuntimeAttr? runtime_override_concat_vcfs_depth
RuntimeAttr? runtime_attr_exclude_intervals_pesr
RuntimeAttr? runtime_attr_count_svs
RuntimeAttr? runtime_attr_plot_svcounts
RuntimeAttr? runtime_attr_cat_outliers_preview
}
call util.GetSampleIdsFromVcfTar {
Expand Down Expand Up @@ -282,6 +289,19 @@ workflow ClusterBatch {
}
}

if (defined(N_IQR_cutoff_plotting)){
call sv_counts.PlotSVCountsPerSample {
input:
prefix = batch,
vcfs = [ClusterDepth.clustered_vcf, ClusterPESR_manta.clustered_vcf, ClusterPESR_wham.clustered_vcf, ClusterPESR_melt.clustered_vcf, ClusterPESR_scramble.clustered_vcf],
N_IQR_cutoff = select_first([N_IQR_cutoff_plotting]),
sv_pipeline_docker = sv_pipeline_docker,
runtime_attr_count_svs = runtime_attr_count_svs,
runtime_attr_plot_svcounts = runtime_attr_plot_svcounts,
runtime_attr_cat_outliers_preview = runtime_attr_cat_outliers_preview
}
}
output {
File clustered_depth_vcf = ClusterDepth.clustered_vcf
File clustered_depth_vcf_index = ClusterDepth.clustered_vcf_index
Expand All @@ -293,7 +313,13 @@ workflow ClusterBatch {
File? clustered_melt_vcf_index = ClusterPESR_melt.clustered_vcf_index
File? clustered_scramble_vcf = ClusterPESR_scramble.clustered_vcf
File? clustered_scramble_vcf_index = ClusterPESR_scramble.clustered_vcf_index

Array[File]? clustered_sv_counts = PlotSVCountsPerSample.sv_counts
Array[File]? clustered_sv_count_plots = PlotSVCountsPerSample.sv_count_plots
File? clustered_outlier_samples_preview = PlotSVCountsPerSample.outlier_samples_preview
File? clustered_outlier_samples_with_reason = PlotSVCountsPerSample.outlier_samples_with_reason
Int? clustered_num_outlier_samples = PlotSVCountsPerSample.num_outlier_samples
File? metrics_file_clusterbatch = ClusterBatchMetrics.metrics_file
}
}


}
43 changes: 33 additions & 10 deletions wdl/FilterBatchSites.wdl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
version 1.0

import "Structs.wdl"
import "PlotSVCountsPerSample.wdl" as sv_counts

workflow FilterBatchSites {
input {
Expand All @@ -12,13 +13,19 @@ workflow FilterBatchSites {
File? depth_vcf
File evidence_metrics
File evidence_metrics_common

String sv_pipeline_docker

# PlotSVCountsPerSample metrics
Int N_IQR_cutoff_plotting = 6

RuntimeAttr? runtime_attr_adjudicate
RuntimeAttr? runtime_attr_rewrite_scores
RuntimeAttr? runtime_attr_filter_annotate_vcf
RuntimeAttr? runtime_attr_merge_pesr_vcfs
RuntimeAttr? runtime_attr_count_svs
RuntimeAttr? runtime_attr_plot_svcounts
RuntimeAttr? runtime_attr_cat_outliers_preview
}
Array[String] algorithms = ["manta", "wham", "melt", "scramble", "depth"]
Expand Down Expand Up @@ -58,6 +65,17 @@ workflow FilterBatchSites {
}
}
call sv_counts.PlotSVCountsPerSample {
input:
prefix = batch,
vcfs=[FilterAnnotateVcf.annotated_vcf[0], FilterAnnotateVcf.annotated_vcf[1], FilterAnnotateVcf.annotated_vcf[2], FilterAnnotateVcf.annotated_vcf[3], FilterAnnotateVcf.annotated_vcf[4]],
N_IQR_cutoff = N_IQR_cutoff_plotting,
sv_pipeline_docker = sv_pipeline_docker,
runtime_attr_count_svs = runtime_attr_count_svs,
runtime_attr_plot_svcounts = runtime_attr_plot_svcounts,
runtime_attr_cat_outliers_preview = runtime_attr_cat_outliers_preview
}
output {
File? sites_filtered_manta_vcf = FilterAnnotateVcf.annotated_vcf[0]
File? sites_filtered_wham_vcf = FilterAnnotateVcf.annotated_vcf[1]
Expand All @@ -67,7 +85,13 @@ workflow FilterBatchSites {
File cutoffs = AdjudicateSV.cutoffs
File scores = RewriteScores.updated_scores
File RF_intermediate_files = AdjudicateSV.RF_intermediate_files
Array[File] sites_filtered_sv_counts = PlotSVCountsPerSample.sv_counts
Array[File] sites_filtered_sv_count_plots = PlotSVCountsPerSample.sv_count_plots
File sites_filtered_outlier_samples_preview = PlotSVCountsPerSample.outlier_samples_preview
File sites_filtered_outlier_samples_with_reason = PlotSVCountsPerSample.outlier_samples_with_reason
Int sites_filtered_num_outlier_samples = PlotSVCountsPerSample.num_outlier_samples
}

}

task AdjudicateSV {
Expand All @@ -79,7 +103,7 @@ task AdjudicateSV {
}
RuntimeAttr default_attr = object {
cpu_cores: 1,
cpu_cores: 1,
mem_gb: 3.75,
disk_gb: 10,
boot_disk_gb: 10,
Expand All @@ -101,7 +125,7 @@ task AdjudicateSV {
mv *_trainable.txt ~{batch}.RF_intermediate_files/
mv *_testable.txt ~{batch}.RF_intermediate_files/
tar -czvf ~{batch}.RF_intermediate_files.tar.gz ~{batch}.RF_intermediate_files

>>>
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
Expand All @@ -125,7 +149,7 @@ task RewriteScores {
}
RuntimeAttr default_attr = object {
cpu_cores: 1,
cpu_cores: 1,
mem_gb: 3.75,
disk_gb: 10,
boot_disk_gb: 10,
Expand All @@ -145,7 +169,7 @@ task RewriteScores {
-m ~{metrics} \
-s ~{scores} \
-o ~{batch}.updated_scores

>>>
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
Expand All @@ -170,7 +194,7 @@ task FilterAnnotateVcf {
}
RuntimeAttr default_attr = object {
cpu_cores: 1,
cpu_cores: 1,
mem_gb: 3.75,
disk_gb: 10,
boot_disk_gb: 10,
Expand Down Expand Up @@ -200,7 +224,7 @@ task FilterAnnotateVcf {

/opt/sv-pipeline/03_variant_filtering/scripts/annotate_RF_evidence.py filtered.corrected_coords.vcf.gz ~{scores} ~{prefix}.with_evidence.vcf
bgzip ~{prefix}.with_evidence.vcf

>>>
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
Expand All @@ -212,5 +236,4 @@ task FilterAnnotateVcf {
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
}
}
}
11 changes: 10 additions & 1 deletion wdl/GATKSVPipelineBatch.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ workflow GATKSVPipelineBatch {
File contig_ploidy_model_tar
Array[File] gcnv_model_tars

# PlotSVCountsPerSample metrics from ClusterBatch in GATKSVPipelinePhase1
Int? N_IQR_cutoff_plotting

File? outlier_cutoff_table
File qc_definitions

Expand Down Expand Up @@ -204,13 +207,15 @@ workflow GATKSVPipelineBatch {
counts=counts_files_,
bincov_matrix=EvidenceQC.bincov_matrix,
bincov_matrix_index=EvidenceQC.bincov_matrix_index,
N_IQR_cutoff_plotting = N_IQR_cutoff_plotting,
PE_files=pe_files_,
SR_files=sr_files_,
SD_files=sd_files_,
manta_vcfs=manta_vcfs_,
melt_vcfs=melt_vcfs_,
scramble_vcfs=scramble_vcfs_,
wham_vcfs=wham_vcfs_,

cnmops_chrom_file=autosome_file,
cnmops_allo_file=allosome_file,
allosome_contigs=allosome_file,
Expand Down Expand Up @@ -420,6 +425,11 @@ workflow GATKSVPipelineBatch {
File? merged_melt_vcf_index = GATKSVPipelinePhase1.melt_vcf_index
File? merged_wham_vcf = GATKSVPipelinePhase1.wham_vcf
File? merged_wham_vcf_index = GATKSVPipelinePhase1.wham_vcf_index
Array[File] ?clustered_sv_counts = GATKSVPipelinePhase1.clustered_sv_counts
Array[File]? clustered_sv_count_plots = GATKSVPipelinePhase1.clustered_sv_count_plots
File? clustered_outlier_samples_preview = GATKSVPipelinePhase1.clustered_outlier_samples_preview
File? clustered_outlier_samples_with_reason = GATKSVPipelinePhase1.clustered_outlier_samples_with_reason
Int? clustered_num_outlier_samples = GATKSVPipelinePhase1.clustered_num_outlier_samples

File evidence_metrics = GATKSVPipelinePhase1.evidence_metrics
File evidence_metrics_common = GATKSVPipelinePhase1.evidence_metrics_common
Expand All @@ -432,7 +442,6 @@ workflow GATKSVPipelineBatch {
File? sites_filtered_wham_vcf = GATKSVPipelinePhase1.sites_filtered_wham_vcf
File? sites_filtered_melt_vcf = GATKSVPipelinePhase1.sites_filtered_melt_vcf
File? sites_filtered_depth_vcf = GATKSVPipelinePhase1.sites_filtered_depth_vcf

File cutoffs = GATKSVPipelinePhase1.cutoffs
File genotyped_pesr_vcf = GenotypeBatch.genotyped_pesr_vcf
File genotyped_depth_vcf = GenotypeBatch.genotyped_depth_vcf
Expand Down
16 changes: 15 additions & 1 deletion wdl/GATKSVPipelinePhase1.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ workflow GATKSVPipelinePhase1 {
Int pesr_breakend_window
String? pesr_clustering_algorithm
Int? N_IQR_cutoff_plotting
File? baseline_depth_vcf_cluster_batch
File? baseline_manta_vcf_cluster_batch
File? baseline_wham_vcf_cluster_batch
Expand All @@ -183,6 +185,9 @@ workflow GATKSVPipelinePhase1 {
RuntimeAttr? runtime_attr_gatk_to_svtk_vcf_depth_cluster_batch
RuntimeAttr? runtime_override_concat_vcfs_depth_cluster_batch
RuntimeAttr? runtime_attr_exclude_intervals_pesr_cluster_batch
RuntimeAttr? runtime_attr_count_svs
RuntimeAttr? runtime_attr_plot_svcounts
RuntimeAttr? runtime_attr_cat_outliers_preview
############################################################
## GenerateBatchMetrics
Expand Down Expand Up @@ -358,6 +363,7 @@ workflow GATKSVPipelinePhase1 {
pesr_interval_overlap=pesr_interval_overlap,
pesr_breakend_window=pesr_breakend_window,
pesr_clustering_algorithm=pesr_clustering_algorithm,
N_IQR_cutoff_plotting = N_IQR_cutoff_plotting,
run_module_metrics=run_clusterbatch_metrics,
linux_docker=linux_docker,
sv_pipeline_base_docker=sv_pipeline_base_docker,
Expand All @@ -384,7 +390,10 @@ workflow GATKSVPipelinePhase1 {
runtime_attr_svcluster_depth=runtime_attr_svcluster_depth_cluster_batch,
runtime_attr_gatk_to_svtk_vcf_depth=runtime_attr_gatk_to_svtk_vcf_depth_cluster_batch,
runtime_override_concat_vcfs_depth=runtime_override_concat_vcfs_depth_cluster_batch,
runtime_attr_exclude_intervals_pesr=runtime_attr_exclude_intervals_pesr_cluster_batch
runtime_attr_exclude_intervals_pesr=runtime_attr_exclude_intervals_pesr_cluster_batch,
runtime_attr_count_svs = runtime_attr_count_svs,
runtime_attr_plot_svcounts = runtime_attr_plot_svcounts,
runtime_attr_cat_outliers_preview = runtime_attr_cat_outliers_preview
}
call batchmetrics.GenerateBatchMetrics as GenerateBatchMetrics {
Expand Down Expand Up @@ -500,6 +509,11 @@ workflow GATKSVPipelinePhase1 {
File? melt_vcf_index = ClusterBatch.clustered_melt_vcf_index
File? scramble_vcf = ClusterBatch.clustered_scramble_vcf
File? scramble_vcf_index = ClusterBatch.clustered_scramble_vcf_index
Array[File]? clustered_sv_counts = ClusterBatch.clustered_sv_counts
Array[File]? clustered_sv_count_plots = ClusterBatch.clustered_sv_count_plots
File? clustered_outlier_samples_preview = ClusterBatch.clustered_outlier_samples_preview
File? clustered_outlier_samples_with_reason = ClusterBatch.clustered_outlier_samples_with_reason
Int? clustered_num_outlier_samples = ClusterBatch.clustered_num_outlier_samples

File? metrics_file_clusterbatch = ClusterBatch.metrics_file_clusterbatch

Expand Down

0 comments on commit e2e76a8

Please sign in to comment.