Skip to content

Commit

Permalink
VS-1113 Update VQSR and VETS naming (#8948)
Browse files Browse the repository at this point in the history
* Refactor:
VQSR Classic -> VQSR
VQSR Lite -> VETS
* Point to new truth
  • Loading branch information
gbggrant authored Aug 16, 2024
1 parent 3a3b90a commit b51fb02
Show file tree
Hide file tree
Showing 18 changed files with 161 additions and 166 deletions.
6 changes: 3 additions & 3 deletions scripts/variantstore/repo/generate_git_filter_repo_command.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ ah_var_store_branch_point() {
files_added_on_ah_var_store() {
# Look for files added to ah_var_store since the branch point. Note that these files were not necessarily *uniquely*
# added to ah_var_store and might represent cherry picks from master (e.g. scripts and build files for the migration
# from Travis to GitHub Actions, VQSR Lite work, etc.)
# from Travis to GitHub Actions, VETS work, etc.)
git diff "$(ah_var_store_branch_point)" $variants_branch --name-status | grep -E '^A' | cut -f 2-
}

Expand All @@ -46,8 +46,8 @@ files_added_on_ah_var_store_not_on_master() {

files_deleted_from_master() {
# This intentionally does not use `git diff` as is used in `files_added_on_ah_var_store` since that would only show
# files deleted from the branch point to the head of master. There are numerous files here (mostly related to VQSR
# Lite) where files added to master after the branch point were cherry picked onto ah_var_store and subsequently
# files deleted from the branch point to the head of master. There are numerous files here (mostly related to VETS)
# where files added to master after the branch point were cherry picked onto ah_var_store and subsequently
# deleted from master. This `git log` finds these while the `git diff` does not.
#
# https://waylonwalker.com/git-find-deleted-files/#git-log-diff-filter
Expand Down
8 changes: 4 additions & 4 deletions scripts/variantstore/tieout/legacy_wdl/JointGenotyping.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ workflow JointGenotyping {
}

if (num_gvcfs <= snps_variant_recalibration_threshold) {
call Tasks.SNPsVariantRecalibrator as SNPsVariantRecalibratorClassic {
call Tasks.SNPsVariantRecalibrator {
input:
sites_only_variant_filtered_vcf = SitesOnlyGatherVcf.output_vcf,
sites_only_variant_filtered_vcf_index = SitesOnlyGatherVcf.output_vcf_index,
Expand Down Expand Up @@ -300,9 +300,9 @@ workflow JointGenotyping {
indels_recalibration = IndelsVariantRecalibrator.recalibration,
indels_recalibration_index = IndelsVariantRecalibrator.recalibration_index,
indels_tranches = IndelsVariantRecalibrator.tranches,
snps_recalibration = if defined(SNPsVariantRecalibratorScattered.recalibration) then select_first([SNPsVariantRecalibratorScattered.recalibration])[idx] else select_first([SNPsVariantRecalibratorClassic.recalibration]),
snps_recalibration_index = if defined(SNPsVariantRecalibratorScattered.recalibration_index) then select_first([SNPsVariantRecalibratorScattered.recalibration_index])[idx] else select_first([SNPsVariantRecalibratorClassic.recalibration_index]),
snps_tranches = select_first([SNPGatherTranches.tranches, SNPsVariantRecalibratorClassic.tranches]),
snps_recalibration = if defined(SNPsVariantRecalibratorScattered.recalibration) then select_first([SNPsVariantRecalibratorScattered.recalibration])[idx] else select_first([SNPsVariantRecalibrator.recalibration]),
snps_recalibration_index = if defined(SNPsVariantRecalibratorScattered.recalibration_index) then select_first([SNPsVariantRecalibratorScattered.recalibration_index])[idx] else select_first([SNPsVariantRecalibrator.recalibration_index]),
snps_tranches = select_first([SNPGatherTranches.tranches, SNPsVariantRecalibrator.tranches]),
indel_filter_level = indel_filter_level,
snp_filter_level = snp_filter_level,
use_allele_specific_annotations = allele_specific_annotations,
Expand Down
22 changes: 11 additions & 11 deletions scripts/variantstore/wdl/GvsCalculatePrecisionAndSensitivity.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ workflow GvsCalculatePrecisionAndSensitivity {
variants_docker = effective_variants_docker,
}

call IsVQSRLite {
call IsVETS {
input:
input_vcf = Add_AS_MAX_VQS_SCORE_ToVcf.output_vcf,
basic_docker = effective_basic_docker,
Expand Down Expand Up @@ -153,7 +153,7 @@ workflow GvsCalculatePrecisionAndSensitivity {
vcf_eval_bed_file = vcf_eval_bed_file,
chromosomes = chromosomes,
output_basename = sample_name + "-bq_roc_filtered",
is_vqsr_lite = IsVQSRLite.is_vqsr_lite,
is_vets = IsVETS.is_vets,
ref_fasta = ref_fasta,
real_time_genomics_docker = effective_real_time_genomics_docker,
}
Expand All @@ -169,7 +169,7 @@ workflow GvsCalculatePrecisionAndSensitivity {
chromosomes = chromosomes,
all_records = true,
output_basename = sample_name + "-bq_all",
is_vqsr_lite = IsVQSRLite.is_vqsr_lite,
is_vets = IsVETS.is_vets,
ref_fasta = ref_fasta,
real_time_genomics_docker = effective_real_time_genomics_docker,
}
Expand Down Expand Up @@ -320,13 +320,13 @@ task Add_AS_MAX_VQS_SCORE_ToVcf {
}
}

task IsVQSRLite {
task IsVETS {
input {
File input_vcf
String basic_docker
}

String is_vqsr_lite_file = "is_vqsr_lite_file.txt"
String is_vets_file = "is_vets_file.txt"

command <<<
# Prepend date, time and pwd to xtrace log entries.
Expand All @@ -337,9 +337,9 @@ task IsVQSRLite {
set +o errexit
grep -v '^#' ~{input_vcf} | grep CALIBRATION_SENSITIVITY > /dev/null
if [[ $? -eq 0 ]]; then
echo "true" > ~{is_vqsr_lite_file}
echo "true" > ~{is_vets_file}
else
echo "false" > ~{is_vqsr_lite_file}
echo "false" > ~{is_vets_file}
fi
set -o errexit
>>>
Expand All @@ -351,7 +351,7 @@ task IsVQSRLite {
preemptible: 3
}
output {
Boolean is_vqsr_lite = read_boolean(is_vqsr_lite_file)
Boolean is_vets = read_boolean(is_vets_file)
}
}

Expand Down Expand Up @@ -406,15 +406,15 @@ task EvaluateVcf {

String output_basename

Boolean is_vqsr_lite
Boolean is_vets

String real_time_genomics_docker
Int cpu = 1
Int memory_mb = 3500
Int disk_size_gb = ceil(2 * size(ref_fasta, "GiB")) + 500
}

String max_score_field_tag = if (is_vqsr_lite == true) then 'MAX_CALIBRATION_SENSITIVITY' else 'MAX_AS_VQSLOD'
String max_score_field_tag = if (is_vets == true) then 'MAX_CALIBRATION_SENSITIVITY' else 'MAX_AS_VQSLOD'

command <<<
chromosomes=( ~{sep=' ' chromosomes} )
Expand All @@ -436,7 +436,7 @@ task EvaluateVcf {
~{if all_records then "--all-records" else ""} \
--roc-subset snp,indel \
--vcf-score-field=INFO.~{max_score_field_tag} \
~{if is_vqsr_lite then "--sort-order ascending" else "--sort-order descending"} \
~{if is_vets then "--sort-order ascending" else "--sort-order descending"} \
-t human_REF_SDF \
-b ~{truth_vcf} \
-e ~{truth_bed}\
Expand Down
50 changes: 25 additions & 25 deletions scripts/variantstore/wdl/GvsCreateFilterSet.wdl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
version 1.0

import "GvsUtils.wdl" as Utils
import "GvsVQSRClassic.wdl" as VQSRClassic
import "../../vcf_site_level_filtering_wdl/JointVcfFiltering.wdl" as VQSRLite
import "GvsVQSR.wdl" as VQSR
import "../../vcf_site_level_filtering_wdl/JointVcfFiltering.wdl" as VETS

workflow GvsCreateFilterSet {
input {
Expand All @@ -21,16 +21,16 @@ workflow GvsCreateFilterSet {
String? git_hash
File? gatk_override

Boolean use_VQSR_lite = true
Boolean use_VETS = true

Int? INDEL_VQSR_CLASSIC_max_gaussians_override = 4
Int? INDEL_VQSR_CLASSIC_mem_gb_override
Int? SNP_VQSR_CLASSIC_max_gaussians_override = 6
Int? SNP_VQSR_CLASSIC_mem_gb_override
Int? INDEL_VQSR_max_gaussians_override = 4
Int? INDEL_VQSR_mem_gb_override
Int? SNP_VQSR_max_gaussians_override = 6
Int? SNP_VQSR_mem_gb_override

RuntimeAttributes? vqsr_lite_extract_runtime_attributes = {"command_mem_gb": 27}
RuntimeAttributes? vqsr_lite_train_runtime_attributes = {"command_mem_gb": 27}
RuntimeAttributes? vqsr_lite_score_runtime_attributes = {"command_mem_gb": 15}
RuntimeAttributes? vets_extract_runtime_attributes = {"command_mem_gb": 27}
RuntimeAttributes? vets_train_runtime_attributes = {"command_mem_gb": 27}
RuntimeAttributes? vets_score_runtime_attributes = {"command_mem_gb": 15}

File? training_python_script
File? scoring_python_script
Expand Down Expand Up @@ -127,10 +127,10 @@ workflow GvsCreateFilterSet {
gatk_docker = effective_gatk_docker,
}

# From this point, the paths diverge depending on whether they're using classic VQSR or VQSR-Lite
# The first branch here is VQSR-Lite, and the second is classic VQSR
if (use_VQSR_lite) {
call VQSRLite.JointVcfFiltering as JointVcfFiltering {
# From this point, the paths diverge depending on whether they're using VQSR or VETS
# The first branch here is VETS, and the second is VQSR
if (use_VETS) {
call VETS.JointVcfFiltering as JointVcfFiltering {
input:
input_vcfs = ExtractFilterTask.output_vcf,
input_vcf_idxs = ExtractFilterTask.output_vcf_index,
Expand All @@ -141,9 +141,9 @@ workflow GvsCreateFilterSet {
resource_args = "--resource:hapmap,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/hapmap_3.3.hg38.vcf.gz --resource:omni,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/1000G_omni2.5.hg38.vcf.gz --resource:1000G,training=true,calibration=false gs://gcp-public-data--broad-references/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz --resource:mills,training=true,calibration=true gs://gcp-public-data--broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz --resource:axiom,training=true,calibration=false gs://gcp-public-data--broad-references/hg38/v0/Axiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz",
extract_extra_args = "-L ${interval_list}",
score_extra_args = "-L ${interval_list}",
extract_runtime_attributes = vqsr_lite_extract_runtime_attributes,
train_runtime_attributes = vqsr_lite_train_runtime_attributes,
score_runtime_attributes = vqsr_lite_score_runtime_attributes,
extract_runtime_attributes = vets_extract_runtime_attributes,
train_runtime_attributes = vets_train_runtime_attributes,
score_runtime_attributes = vets_score_runtime_attributes,
gatk_docker = effective_gatk_docker,
gatk_override = gatk_override,
monitoring_script = "gs://gvs_quickstart_storage/cromwell_monitoring_script.sh",
Expand Down Expand Up @@ -198,12 +198,12 @@ workflow GvsCreateFilterSet {
indel_recal_file = CreateFilteredScoredINDELsVCF.output_vcf,
indel_recal_file_index = CreateFilteredScoredINDELsVCF.output_vcf_index,
project_id = project_id,
useClassic = false
useVQSR = false
}
}

if (!use_VQSR_lite) {
call VQSRClassic.JointVcfFiltering as VQSRClassic {
if (!use_VETS) {
call VQSR.JointVcfFiltering as VQSR {
input:
git_branch_or_tag = git_branch_or_tag,
git_hash = git_hash,
Expand All @@ -218,10 +218,10 @@ workflow GvsCreateFilterSet {
sites_only_variant_filtered_vcf_idx = MergeVCFs.output_vcf_index,
sites_only_variant_filtered_vcfs = ExtractFilterTask.output_vcf,
sites_only_variant_filtered_vcf_idxs = ExtractFilterTask.output_vcf_index,
INDEL_VQSR_max_gaussians_override = INDEL_VQSR_CLASSIC_max_gaussians_override,
INDEL_VQSR_mem_gb_override = INDEL_VQSR_CLASSIC_mem_gb_override,
SNP_VQSR_max_gaussians_override = SNP_VQSR_CLASSIC_max_gaussians_override,
SNP_VQSR_mem_gb_override = SNP_VQSR_CLASSIC_mem_gb_override,
INDEL_VQSR_max_gaussians_override = INDEL_VQSR_max_gaussians_override,
INDEL_VQSR_mem_gb_override = INDEL_VQSR_mem_gb_override,
SNP_VQSR_max_gaussians_override = SNP_VQSR_max_gaussians_override,
SNP_VQSR_mem_gb_override = SNP_VQSR_mem_gb_override,
gatk_docker = effective_gatk_docker,
gatk_override = gatk_override,
}
Expand Down Expand Up @@ -255,7 +255,7 @@ workflow GvsCreateFilterSet {
[CreateFilteredScoredSNPsVCF.monitoring_log],
[CreateFilteredScoredINDELsVCF.monitoring_log],
[PopulateFilterSetInfo.monitoring_log],
select_first([VQSRClassic.monitoring_logs, []]),
select_first([VQSR.monitoring_logs, []]),
[PopulateFilterSetSites.monitoring_log]
]
)
Expand Down
3 changes: 0 additions & 3 deletions scripts/variantstore/wdl/GvsCreateVATfromVDS.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ workflow GvsCreateVATfromVDS {
Int? split_intervals_disk_size_override
Int? split_intervals_mem_override
Int? split_intervals_scatter_count
Boolean use_classic_VQSR = false
Boolean use_reference_disk = true

String? cloud_sdk_docker
Expand Down Expand Up @@ -144,7 +143,6 @@ workflow GvsCreateVATfromVDS {
call GenerateSitesOnlyVcf {
input:
vds_path = select_first([vds_path]),
use_classic_VQSR = use_classic_VQSR,
workspace_project = effective_google_project,
hail_version = effective_hail_version,
hail_wheel = hail_wheel,
Expand Down Expand Up @@ -310,7 +308,6 @@ workflow GvsCreateVATfromVDS {
task GenerateSitesOnlyVcf {
input {
String vds_path
Boolean use_classic_VQSR
String workspace_project
String workspace_bucket
String region
Expand Down
8 changes: 4 additions & 4 deletions scripts/variantstore/wdl/GvsCreateVDS.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ workflow GvsCreateVDS {
Int? cluster_max_age_minutes
Boolean leave_cluster_running_at_end = false
Float? master_memory_fraction
Boolean use_classic_VQSR = false
Boolean use_VQSR = false

String? git_branch_or_tag
String? hail_version
Expand Down Expand Up @@ -111,7 +111,7 @@ workflow GvsCreateVDS {
prefix = cluster_prefix,
vds_path = vds_destination_path,
avro_path = avro_path,
use_classic_VQSR = use_classic_VQSR,
use_VQSR = use_VQSR,
hail_version = effective_hail_version,
hail_wheel = hail_wheel,
hail_temp_path = hail_temp_path,
Expand Down Expand Up @@ -158,7 +158,7 @@ task CreateVds {
String prefix
String vds_path
String avro_path
Boolean use_classic_VQSR
Boolean use_VQSR
Boolean leave_cluster_running_at_end
File hail_gvs_import_script
File gvs_import_script
Expand Down Expand Up @@ -233,7 +233,7 @@ task CreateVds {
"temp-path": "${hail_temp_path}",
"avro-path": "~{avro_path}"
~{', "intermediate-resume-point": ' + intermediate_resume_point}
~{true=', "use-classic-vqsr": ""' false='' use_classic_VQSR}
~{true=', "use-vqsr": ""' false='' use_VQSR}
}
FIN
Expand Down
14 changes: 7 additions & 7 deletions scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ workflow GvsExtractAvroFilesForHail {
String dataset_name
String filter_set_name
String call_set_identifier
Boolean use_VQSR_lite = true
Boolean use_VETS = true
Int scatter_width = 10
String? basic_docker
String? cloud_sdk_docker
Expand Down Expand Up @@ -43,7 +43,7 @@ workflow GvsExtractAvroFilesForHail {
cloud_sdk_docker = effective_cloud_sdk_docker,
}

call Utils.IsVQSRLite {
call Utils.IsVETS {
input:
project_id = project_id,
fq_filter_set_info_table = "~{project_id}.~{dataset_name}.filter_set_info",
Expand Down Expand Up @@ -74,7 +74,7 @@ workflow GvsExtractAvroFilesForHail {
filter_set_name = filter_set_name,
avro_sibling = OutputPath.out,
call_set_identifier = call_set_identifier,
is_vqsr_lite = IsVQSRLite.is_vqsr_lite,
is_vets = IsVETS.is_vets,
variants_docker = effective_variants_docker,
}

Expand Down Expand Up @@ -203,7 +203,7 @@ task ExtractFromSampleInfoTable {

task ExtractFromFilterTables {
meta {
description: "Extracts from the tables: filter_set_sites, filter_set_info/filter_set_info_vqsr_lite, and filter_set_tranches (if using VQSR Classic)"
description: "Extracts from the tables: filter_set_sites, filter_set_info, and filter_set_tranches (if using VQSR)"
# Not dealing with caching for now as that would introduce a lot of complexity.
volatile: true
}
Expand All @@ -214,11 +214,11 @@ task ExtractFromFilterTables {
String filter_set_name
String avro_sibling
String call_set_identifier
Boolean is_vqsr_lite = true
Boolean is_vets = true
String variants_docker
}

String vqs_score_field = if (is_vqsr_lite == true) then 'calibration_sensitivity' else 'vqslod'
String vqs_score_field = if (is_vets == true) then 'calibration_sensitivity' else 'vqslod'

parameter_meta {
avro_sibling: "Cloud path to a file that will be the sibling to the 'avro' 'directory' under which output Avro files will be written."
Expand Down Expand Up @@ -249,7 +249,7 @@ task ExtractFromFilterTables {
ORDER BY location
" --call_set_identifier ~{call_set_identifier} --dataset_name ~{dataset_name} --table_name filter_set_sites --project_id=~{project_id}

if [ ~{is_vqsr_lite} = false ]; then
if [ ~{is_vets} = false ]; then
python3 /app/run_avro_query.py --sql "
EXPORT DATA OPTIONS(
uri='${avro_prefix}/vqsr_tranche_data/vqsr_tranche_data_*.avro', format='AVRO', compression='SNAPPY') AS
Expand Down
Loading

0 comments on commit b51fb02

Please sign in to comment.