From daeb3e20c3604c33c915d78f0183d98088a6db13 Mon Sep 17 00:00:00 2001 From: Miguel Covarrubias Date: Tue, 27 Jun 2023 14:09:07 -0400 Subject: [PATCH] Non-GVS bits required for GVS [VS-971] (#8362) --- .../run_vcf_site_level_filtering_wdl.sh | 7 ++++ .../vcf_site_level_filtering.json | 5 +-- .../JointVcfFiltering.wdl | 36 +++++++++++++++++-- .../bigquery/BigQueryResultAndStatistics.java | 2 +- .../{ => gvs}/bigquery/BigQueryUtils.java | 15 +++++++- .../{ => gvs}/bigquery/GATKAvroReader.java | 2 +- .../bigquery/StorageAPIAvroReader.java | 2 +- ...ageAPIAvroReaderAndBigQueryStatistics.java | 2 +- .../{ => gvs}/bigquery/TableReference.java | 2 +- .../utils/variant/GATKVCFConstants.java | 4 ++- .../bigquery/BigQueryUtilsUnitTest.java | 2 +- 11 files changed, 66 insertions(+), 13 deletions(-) rename src/main/java/org/broadinstitute/hellbender/utils/{ => gvs}/bigquery/BigQueryResultAndStatistics.java (88%) rename src/main/java/org/broadinstitute/hellbender/utils/{ => gvs}/bigquery/BigQueryUtils.java (97%) rename src/main/java/org/broadinstitute/hellbender/utils/{ => gvs}/bigquery/GATKAvroReader.java (80%) rename src/main/java/org/broadinstitute/hellbender/utils/{ => gvs}/bigquery/StorageAPIAvroReader.java (99%) rename src/main/java/org/broadinstitute/hellbender/utils/{ => gvs}/bigquery/StorageAPIAvroReaderAndBigQueryStatistics.java (89%) rename src/main/java/org/broadinstitute/hellbender/utils/{ => gvs}/bigquery/TableReference.java (96%) rename src/test/java/org/broadinstitute/hellbender/utils/{ => gvs}/bigquery/BigQueryUtilsUnitTest.java (99%) diff --git a/scripts/vcf_site_level_filtering_cromwell_tests/run_vcf_site_level_filtering_wdl.sh b/scripts/vcf_site_level_filtering_cromwell_tests/run_vcf_site_level_filtering_wdl.sh index 7eadf76e5d1..f25ad6bb191 100644 --- a/scripts/vcf_site_level_filtering_cromwell_tests/run_vcf_site_level_filtering_wdl.sh +++ b/scripts/vcf_site_level_filtering_cromwell_tests/run_vcf_site_level_filtering_wdl.sh @@ -32,6 +32,13 @@ sed -r "s/__GATK_DOCKER__/broadinstitute\/gatk\:$HASH_TO_USE/g" $CROMWELL_TEST_D echo "Running Filtering WDL through cromwell" +cat > ${WORKING_DIR}/src/test/resources/cromwell_monitoring_script.sh < monitoring.log & + fi + gatk --java-options "-Xmx~{default=6 runtime_attributes.command_mem_gb}G" \ ExtractVariantAnnotations \ -V ~{input_vcf} \ @@ -167,6 +184,7 @@ task ExtractVariantAnnotations { File? unlabeled_annotations_hdf5 = "~{output_prefix}.extract.unlabeled.annot.hdf5" File extracted_vcf = "~{output_prefix}.extract.vcf.gz" # this line will break if extra_args includes the do-not-gzip-vcf-output argument File extracted_vcf_idx = "~{output_prefix}.extract.vcf.gz.tbi" # this line will break if extra_args includes the do-not-gzip-vcf-output argument + File? monitoring_log = "monitoring.log" } } @@ -179,6 +197,7 @@ task TrainVariantAnnotationsModel { File? hyperparameters_json String output_prefix String? extra_args + File? monitoring_script String gatk_docker File? gatk_override @@ -190,6 +209,10 @@ task TrainVariantAnnotationsModel { set -e export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override} + if [ -s ~{monitoring_script} ]; then + bash ~{monitoring_script} > monitoring.log & + fi + gatk --java-options "-Xmx~{default=6 runtime_attributes.command_mem_gb}G" \ TrainVariantAnnotationsModel \ --annotations-hdf5 ~{annotations_hdf5} \ @@ -213,6 +236,7 @@ task TrainVariantAnnotationsModel { output { Array[File] model_files = glob("~{output_prefix}.train.*") + File? monitoring_log = "monitoring.log" } } @@ -228,6 +252,7 @@ task ScoreVariantAnnotations { String model_prefix Array[File] model_files String? extra_args + File? monitoring_script String gatk_docker File? gatk_override @@ -245,6 +270,10 @@ task ScoreVariantAnnotations { command { set -e + if [ -s ~{monitoring_script} ]; then + bash ~{monitoring_script} > monitoring.log & + fi + export GATK_LOCAL_JAR=~{default="/root/gatk.jar" gatk_override} mkdir model-files @@ -276,5 +305,6 @@ task ScoreVariantAnnotations { File scored_vcf_idx = "~{output_prefix}.score.vcf.gz.tbi" # this line will break if extra_args includes the do-not-gzip-vcf-output argument File? annotations_hdf5 = "~{output_prefix}.score.annot.hdf5" # this file will only be produced if the number of sites scored is nonzero File? scores_hdf5 = "~{output_prefix}.score.scores.hdf5" # this file will only be produced if the number of sites scored is nonzero + File? monitoring_log = "monitoring.log" } } \ No newline at end of file diff --git a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryResultAndStatistics.java b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/BigQueryResultAndStatistics.java similarity index 88% rename from src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryResultAndStatistics.java rename to src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/BigQueryResultAndStatistics.java index bb49d82011c..cc878e54b5a 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryResultAndStatistics.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/BigQueryResultAndStatistics.java @@ -1,4 +1,4 @@ -package org.broadinstitute.hellbender.utils.bigquery; +package org.broadinstitute.hellbender.utils.gvs.bigquery; import com.google.cloud.bigquery.JobStatistics; import com.google.cloud.bigquery.TableResult; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/BigQueryUtils.java similarity index 97% rename from src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtils.java rename to src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/BigQueryUtils.java index 4cfdb083bca..6937d00603f 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/BigQueryUtils.java @@ -1,4 +1,4 @@ -package org.broadinstitute.hellbender.utils.bigquery; +package org.broadinstitute.hellbender.utils.gvs.bigquery; import com.google.cloud.bigquery.*; import io.grpc.StatusRuntimeException; @@ -451,6 +451,19 @@ public static StorageAPIAvroReaderAndBigQueryStatistics executeQueryWithStorageA } public static boolean doRowsExistFor(String projectID, String datasetName, String tableName, String columnName, String value) { + String template = "SELECT COUNT(*) FROM `%s.%s.%s` WHERE %s = '%s'"; + String query = String.format(template, projectID, datasetName, tableName, columnName, value); + + BigQueryResultAndStatistics resultAndStatistics = BigQueryUtils.executeQuery(projectID, query, true, null); + for (final FieldValueList row : resultAndStatistics.result.iterateAll()) { + final long count = row.get(0).getLongValue(); + return count != 0; + } + throw new GATKException(String.format("No rows returned from count of `%s.%s.%s` for %s = '%s'", + projectID, datasetName, tableName, columnName, value)); + } + + public static boolean doRowsExistFor(String projectID, String datasetName, String tableName, String columnName, Long value) { String template = "SELECT COUNT(*) FROM `%s.%s.%s` WHERE %s = %s"; String query = String.format(template, projectID, datasetName, tableName, columnName, value); diff --git a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/GATKAvroReader.java b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/GATKAvroReader.java similarity index 80% rename from src/main/java/org/broadinstitute/hellbender/utils/bigquery/GATKAvroReader.java rename to src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/GATKAvroReader.java index 9080e12c48b..34b586a31fc 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/GATKAvroReader.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/GATKAvroReader.java @@ -1,4 +1,4 @@ -package org.broadinstitute.hellbender.utils.bigquery; +package org.broadinstitute.hellbender.utils.gvs.bigquery; import htsjdk.samtools.util.CloseableIterator; import org.apache.avro.generic.GenericRecord; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/StorageAPIAvroReader.java b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/StorageAPIAvroReader.java similarity index 99% rename from src/main/java/org/broadinstitute/hellbender/utils/bigquery/StorageAPIAvroReader.java rename to src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/StorageAPIAvroReader.java index 5a2daf63519..c276c26a478 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/StorageAPIAvroReader.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/StorageAPIAvroReader.java @@ -1,4 +1,4 @@ -package org.broadinstitute.hellbender.utils.bigquery; +package org.broadinstitute.hellbender.utils.gvs.bigquery; import com.google.cloud.bigquery.storage.v1.AvroRows; import com.google.cloud.bigquery.storage.v1.BigQueryReadClient; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/StorageAPIAvroReaderAndBigQueryStatistics.java b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/StorageAPIAvroReaderAndBigQueryStatistics.java similarity index 89% rename from src/main/java/org/broadinstitute/hellbender/utils/bigquery/StorageAPIAvroReaderAndBigQueryStatistics.java rename to src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/StorageAPIAvroReaderAndBigQueryStatistics.java index 68871a504b4..8499593f39a 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/StorageAPIAvroReaderAndBigQueryStatistics.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/StorageAPIAvroReaderAndBigQueryStatistics.java @@ -1,4 +1,4 @@ -package org.broadinstitute.hellbender.utils.bigquery; +package org.broadinstitute.hellbender.utils.gvs.bigquery; import com.google.cloud.bigquery.JobStatistics; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/TableReference.java b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/TableReference.java similarity index 96% rename from src/main/java/org/broadinstitute/hellbender/utils/bigquery/TableReference.java rename to src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/TableReference.java index 49db6c22e2d..66898d7cb32 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/bigquery/TableReference.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/gvs/bigquery/TableReference.java @@ -1,4 +1,4 @@ -package org.broadinstitute.hellbender.utils.bigquery; +package org.broadinstitute.hellbender.utils.gvs.bigquery; import com.google.common.collect.ImmutableList; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java index 5870d1ad745..a81f3c853da 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java @@ -208,7 +208,9 @@ their names (or descriptions) depend on some threshold. Those filters are not i public static final String VQSR_FAILURE_PREFIX = "low_VQSLOD_"; public static final String VQSR_FAILURE_SNP = VQSR_FAILURE_PREFIX + SNP; public static final String VQSR_FAILURE_INDEL = VQSR_FAILURE_PREFIX + INDEL; - public static final String VQS_SENS_FAILURE_PREFIX = "low_VQS_SENS_"; + // Prefix for a site (SNP/INDEL) that failed calibration sensitivity cutoff. In this case, the site would be a + // failure if the sensitivity is greater than the threshold. + public static final String VQS_SENS_FAILURE_PREFIX = "high_VQS_SENS_"; public static final String VQS_SENS_FAILURE_SNP = VQS_SENS_FAILURE_PREFIX + SNP; public static final String VQS_SENS_FAILURE_INDEL = VQS_SENS_FAILURE_PREFIX + INDEL; diff --git a/src/test/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/utils/gvs/bigquery/BigQueryUtilsUnitTest.java similarity index 99% rename from src/test/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtilsUnitTest.java rename to src/test/java/org/broadinstitute/hellbender/utils/gvs/bigquery/BigQueryUtilsUnitTest.java index 3a3aa808e9a..07b5d85e3f3 100644 --- a/src/test/java/org/broadinstitute/hellbender/utils/bigquery/BigQueryUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/utils/gvs/bigquery/BigQueryUtilsUnitTest.java @@ -1,4 +1,4 @@ -package org.broadinstitute.hellbender.utils.bigquery; +package org.broadinstitute.hellbender.utils.gvs.bigquery; import com.google.cloud.bigquery.FieldValueList; import org.apache.avro.generic.GenericRecord;