Skip to content

Commit

Permalink
expose bulk ingest optional parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
mcovarr committed Jul 8, 2023
1 parent d6fd0dc commit dcbe619
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 0 deletions.
9 changes: 9 additions & 0 deletions scripts/variantstore/wdl/GvsJointVariantCalling.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ workflow GvsJointVariantCalling {
# Beta users have accounts with tighter quotas, and we must work around that
Boolean tighter_gcp_quotas = true
String branch_name = "ah_var_store"
# TODO should these all be exposed in this WDL?
String? sample_id_column_name ## Note that a column WILL exist that is the <entity>_id from the table name. However, some users will want to specify an alternate column for the sample_name during ingest
String? vcf_files_column_name
String? vcf_index_files_column_name
String? sample_set_name ## NOTE: currently we only allow the loading of one sample set at a time
}

# the call_set_identifier string is used to name many different things throughout this workflow (BQ tables, vcfs etc),
Expand Down Expand Up @@ -77,6 +82,10 @@ workflow GvsJointVariantCalling {
drop_state = drop_state,
is_beta_user = tighter_gcp_quotas,
branch_name = branch_name,
sample_id_column_name = sample_id_column_name,
vcf_files_column_name = vcf_files_column_name,
vcf_index_files_column_name = vcf_index_files_column_name,
sample_set_name = sample_set_name,
}

output {
Expand Down
8 changes: 8 additions & 0 deletions scripts/variantstore/wdl/GvsQuickstartHailIntegration.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ workflow GvsQuickstartHailIntegration {
String dataset_suffix = "hail"
String? gatk_override
String expected_output_prefix
String? sample_id_column_name ## Note that a column WILL exist that is the <entity>_id from the table name. However, some users will want to specify an alternate column for the sample_name during ingest
String? vcf_files_column_name
String? vcf_index_files_column_name
String? sample_set_name ## NOTE: currently we only allow the loading of one sample set at a time
}

String project_id = "gvs-internal"
Expand All @@ -28,6 +32,10 @@ workflow GvsQuickstartHailIntegration {
gatk_override = gatk_override,
interval_list = interval_list,
expected_output_prefix = expected_output_prefix,
sample_id_column_name = sample_id_column_name,
vcf_files_column_name = vcf_files_column_name,
vcf_index_files_column_name = vcf_index_files_column_name,
sample_set_name = sample_set_name,
}

call ExtractAvroFilesForHail.GvsExtractAvroFilesForHail {
Expand Down
20 changes: 20 additions & 0 deletions scripts/variantstore/wdl/GvsQuickstartIntegration.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ workflow GvsQuickstartIntegration {
String branch_name
Boolean run_vcf_integration = true
Boolean run_hail_integration = true
String? sample_id_column_name ## Note that a column WILL exist that is the <entity>_id from the table name. However, some users will want to specify an alternate column for the sample_name during ingest
String? vcf_files_column_name
String? vcf_index_files_column_name
String? sample_set_name ## NOTE: currently we only allow the loading of one sample set at a time
}

File full_interval_list = "gs://gcp-public-data--broad-references/hg38/v0/wgs_calling_regions.hg38.noCentromeres.noTelomeres.interval_list"
Expand All @@ -57,6 +61,10 @@ workflow GvsQuickstartIntegration {
gatk_override = BuildGATKJar.jar,
interval_list = FilterIntervalListChromosomes.out,
expected_output_prefix = expected_output_prefix,
sample_id_column_name = sample_id_column_name,
vcf_files_column_name = vcf_files_column_name,
vcf_index_files_column_name = vcf_index_files_column_name,
sample_set_name = sample_set_name,
}
call QuickstartHailIntegration.GvsQuickstartHailIntegration as GvsQuickstartHailVQSRClassicIntegration {
input:
Expand All @@ -68,6 +76,10 @@ workflow GvsQuickstartIntegration {
gatk_override = BuildGATKJar.jar,
interval_list = FilterIntervalListChromosomes.out,
expected_output_prefix = expected_output_prefix,
sample_id_column_name = sample_id_column_name,
vcf_files_column_name = vcf_files_column_name,
vcf_index_files_column_name = vcf_index_files_column_name,
sample_set_name = sample_set_name,
}
}

Expand All @@ -81,6 +93,10 @@ workflow GvsQuickstartIntegration {
gatk_override = BuildGATKJar.jar,
interval_list = FilterIntervalListChromosomes.out,
expected_output_prefix = expected_output_prefix,
sample_id_column_name = sample_id_column_name,
vcf_files_column_name = vcf_files_column_name,
vcf_index_files_column_name = vcf_index_files_column_name,
sample_set_name = sample_set_name,
}
call QuickstartVcfIntegration.GvsQuickstartVcfIntegration as QuickstartVcfVQSRClassicIntegration {
input:
Expand All @@ -91,6 +107,10 @@ workflow GvsQuickstartIntegration {
gatk_override = BuildGATKJar.jar,
interval_list = FilterIntervalListChromosomes.out,
expected_output_prefix = expected_output_prefix,
sample_id_column_name = sample_id_column_name,
vcf_files_column_name = vcf_files_column_name,
vcf_index_files_column_name = vcf_index_files_column_name,
sample_set_name = sample_set_name,
}
}
}
8 changes: 8 additions & 0 deletions scripts/variantstore/wdl/GvsQuickstartVcfIntegration.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ workflow GvsQuickstartVcfIntegration {
String drop_state = "FORTY"
String dataset_suffix
File? gatk_override
String? sample_id_column_name ## Note that a column WILL exist that is the <entity>_id from the table name. However, some users will want to specify an alternate column for the sample_name during ingest
String? vcf_files_column_name
String? vcf_index_files_column_name
String? sample_set_name ## NOTE: currently we only allow the loading of one sample set at a time
}
String project_id = "gvs-internal"

Expand Down Expand Up @@ -49,6 +53,10 @@ workflow GvsQuickstartVcfIntegration {
drop_state = drop_state,
interval_list = interval_list,
branch_name = branch_name,
sample_id_column_name = sample_id_column_name,
vcf_files_column_name = vcf_files_column_name,
vcf_index_files_column_name = vcf_index_files_column_name,
sample_set_name = sample_set_name,
}

# Only assert identical outputs if we did not filter (filtering is not deterministic) OR if we are using VQSR Lite (which is deterministic)
Expand Down
8 changes: 8 additions & 0 deletions scripts/variantstore/wdl/GvsUnified.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ workflow GvsUnified {
Boolean extract_do_not_filter_override = false
# End GvsExtractCallset
String branch_name
String? sample_id_column_name ## Note that a column WILL exist that is the <entity>_id from the table name. However, some users will want to specify an alternate column for the sample_name during ingest
String? vcf_files_column_name
String? vcf_index_files_column_name
String? sample_set_name ## NOTE: currently we only allow the loading of one sample set at a time
}

call BulkIngestGenomes.GvsBulkIngestGenomes as BulkIngestGenomes {
Expand All @@ -82,6 +86,10 @@ workflow GvsUnified {
branch_name = branch_name,
interval_list = interval_list,
drop_state = drop_state,
sample_id_column_name = sample_id_column_name,
vcf_files_column_name = vcf_files_column_name,
vcf_index_files_column_name = vcf_index_files_column_name,
sample_set_name = sample_set_name,
}

call PopulateAltAllele.GvsPopulateAltAllele {
Expand Down

0 comments on commit dcbe619

Please sign in to comment.