Skip to content

Commit

Permalink
Beta is the new Unified
Browse files Browse the repository at this point in the history
  • Loading branch information
mcovarr committed Jul 13, 2023
1 parent ed8a73d commit 1288478
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 203 deletions.
7 changes: 0 additions & 7 deletions .dockstore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,6 @@ workflows:
branches:
- master
- ah_var_store
- name: GvsUnified
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsUnified.wdl
filters:
branches:
- master
- ah_var_store
- name: GvsJointVariantCalling
subclass: WDL
primaryDescriptorPath: /scripts/variantstore/wdl/GvsJointVariantCalling.wdl
Expand Down
114 changes: 80 additions & 34 deletions scripts/variantstore/wdl/GvsJointVariantCalling.wdl
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
version 1.0

import "GvsUnified.wdl" as GvsUnified
import "GvsBulkIngestGenomes.wdl" as BulkIngestGenomes
import "GvsPopulateAltAllele.wdl" as PopulateAltAllele
import "GvsCreateFilterSet.wdl" as CreateFilterSet
import "GvsPrepareRangesCallset.wdl" as PrepareRangesCallset
import "GvsExtractCallset.wdl" as ExtractCallset

workflow GvsJointVariantCalling {
input {
Expand Down Expand Up @@ -29,6 +33,12 @@ workflow GvsJointVariantCalling {
String extract_output_file_base_name = sub(call_set_identifier, "\\s+|\_+", "-")
String extract_table_prefix = sub(call_set_identifier, "\\s+|\_+", "-")
String filter_set_name = sub(call_set_identifier, "\\s+|\_+", "-")

String query_project = project_id
String destination_project = project_id
String destination_dataset = dataset_name
String fq_temp_table_dataset = "~{destination_project}.~{destination_dataset}"

if (false) {
Int extract_maxretries_override = ""
Int extract_preemptible_override = ""
Expand All @@ -48,53 +58,89 @@ workflow GvsJointVariantCalling {

File interval_weights_bed = "gs://broad-public-datasets/gvs/weights/gvs_vet_weights_1kb.bed"

call GvsUnified.GvsUnified {

call BulkIngestGenomes.GvsBulkIngestGenomes as BulkIngestGenomes {
input:
dataset_name = dataset_name,
project_id = project_id,
gatk_override = gatk_override,
interval_list = interval_list,
drop_state = drop_state,
sample_id_column_name = sample_id_column_name,
vcf_files_column_name = vcf_files_column_name,
vcf_index_files_column_name = vcf_index_files_column_name,
sample_set_name = sample_set_name,
}

call PopulateAltAllele.GvsPopulateAltAllele {
input:
call_set_identifier = call_set_identifier,
go = BulkIngestGenomes.done,
dataset_name = dataset_name,
project_id = project_id,
}

call CreateFilterSet.GvsCreateFilterSet {
input:
go = GvsPopulateAltAllele.done,
dataset_name = dataset_name,
project_id = project_id,
call_set_identifier = call_set_identifier,
filter_set_name = filter_set_name,
use_VQSR_lite = !use_classic_VQSR,
extract_output_gcs_dir = extract_output_gcs_dir,
destination_dataset = dataset_name,
destination_project = project_id,
extract_do_not_filter_override = extract_do_not_filter_override,
extract_maxretries_override = extract_maxretries_override,
extract_output_file_base_name = extract_output_file_base_name,
extract_preemptible_override = extract_preemptible_override,
extract_scatter_count = extract_scatter_count,
extract_table_prefix = extract_table_prefix,
fq_temp_table_dataset = "~{project_id}.~{dataset_name}",
gatk_override = gatk_override,
use_VQSR_lite = use_classic_VQSR,
interval_list = interval_list,
interval_weights_bed = interval_weights_bed,
load_data_batch_size = load_data_batch_size,
load_data_maxretries_override = load_data_maxretries_override,
load_data_preemptible_override = load_data_preemptible_override,
query_labels = query_labels,
query_project = project_id,
sample_names_to_extract = sample_names_to_extract,
split_intervals_disk_size_override = split_intervals_disk_size_override,
split_intervals_mem_override = split_intervals_mem_override,
gatk_override = gatk_override,
INDEL_VQSR_CLASSIC_max_gaussians_override = INDEL_VQSR_CLASSIC_max_gaussians_override,
INDEL_VQSR_CLASSIC_mem_gb_override = INDEL_VQSR_CLASSIC_mem_gb_override,
SNP_VQSR_CLASSIC_max_gaussians_override = SNP_VQSR_CLASSIC_max_gaussians_override,
SNP_VQSR_CLASSIC_mem_gb_override = SNP_VQSR_CLASSIC_mem_gb_override,
}

call PrepareRangesCallset.GvsPrepareCallset {
input:
call_set_identifier = call_set_identifier,
go = GvsCreateFilterSet.done,
dataset_name = dataset_name,
project_id = project_id,
extract_table_prefix = extract_table_prefix,
query_project = query_project,
destination_project = destination_project,
destination_dataset = destination_dataset,
fq_temp_table_dataset = fq_temp_table_dataset,
query_labels = query_labels,
sample_names_to_extract = sample_names_to_extract,
}

call ExtractCallset.GvsExtractCallset {
input:
go = GvsPrepareCallset.done,
dataset_name = dataset_name,
project_id = project_id,
call_set_identifier = call_set_identifier,
extract_table_prefix = extract_table_prefix,
filter_set_name = filter_set_name,
query_project = query_project,
scatter_count = extract_scatter_count,
interval_list = interval_list,
interval_weights_bed = interval_weights_bed,
gatk_override = gatk_override,
output_file_base_name = extract_output_file_base_name,
extract_maxretries_override = extract_maxretries_override,
extract_preemptible_override = extract_preemptible_override,
output_gcs_dir = extract_output_gcs_dir,
split_intervals_disk_size_override = split_intervals_disk_size_override,
split_intervals_mem_override = split_intervals_mem_override,
do_not_filter_override = extract_do_not_filter_override,
drop_state = drop_state,
is_beta_user = tighter_gcp_quotas,
sample_id_column_name = sample_id_column_name,
vcf_files_column_name = vcf_files_column_name,
vcf_index_files_column_name = vcf_index_files_column_name,
sample_set_name = sample_set_name,
}

output {
Array[File] output_vcfs = GvsUnified.output_vcfs
Array[File] output_vcf_indexes = GvsUnified.output_vcf_indexes
Array[File] output_vcf_interval_files = GvsUnified.output_vcf_interval_files
Float total_vcfs_size_mb = GvsUnified.total_vcfs_size_mb
File? sample_name_list = GvsUnified.sample_name_list
File manifest = GvsUnified.manifest
Array[File] output_vcfs = GvsExtractCallset.output_vcfs
Array[File] output_vcf_indexes = GvsExtractCallset.output_vcf_indexes
Array[File] output_vcf_interval_files = GvsExtractCallset.output_vcf_interval_files
Float total_vcfs_size_mb = GvsExtractCallset.total_vcfs_size_mb
File? sample_name_list = GvsExtractCallset.sample_name_list
File manifest = GvsExtractCallset.manifest
Boolean done = true
}
}
1 change: 0 additions & 1 deletion scripts/variantstore/wdl/GvsQuickstartVcfIntegration.wdl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
version 1.0

import "GvsUnified.wdl" as Unified
import "GvsUtils.wdl" as Utils
import "GvsJointVariantCalling.wdl" as JointVariantCalling

Expand Down
161 changes: 0 additions & 161 deletions scripts/variantstore/wdl/GvsUnified.wdl

This file was deleted.

0 comments on commit 1288478

Please sign in to comment.