Skip to content

Commit

Permalink
adding test to match WARP tests edge case (#8928)
Browse files Browse the repository at this point in the history
  • Loading branch information
meganshand authored Jul 30, 2024
1 parent 3d99f22 commit 64dc4b3
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -711,9 +711,38 @@ public void testRemovingAnnotationFromEmptyAttributes() {
args.addReference(hg38Reference)
.addVCF(input)
.addOutput(output)
.add("format-annotations-to-remove","PRI");
.add("format-annotations-to-remove", "PRI");

//make sure it doesn't error
runCommandLine(args);
}

@Test
public void testNoCallWhenGQ0InNonRefAllele() {
final File input = getTestFile("reblockEdgeCase.vcf");
final File output = createTempFile("reblockedgvcf", ".vcf");
final ArgumentsBuilder args = new ArgumentsBuilder();

args.addReference(hg38Reference)
.add("V", input)
.add("do-qual-approx", true)
.add("floor-blocks", true)
.add("GQB", 20)
.add("GQB", 30)
.add("GQB", 40)
.addOutput(output);

runCommandLine(args);

final List<VariantContext> outVCs = VariantContextTestUtils.readEntireVCFIntoMemory(output.getAbsolutePath()).getRight();
boolean foundNoCallSite = false;
for (VariantContext vc : outVCs) {
if (vc.getStart() == 13675308) {
Assert.assertEquals(vc.getEnd(), 13675325);
Assert.assertEquals(vc.getGenotype(0).getGQ(), 0);
foundNoCallSite = true;
}
}
Assert.assertTrue(foundNoCallSite);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
##fileformat=VCFv4.2
##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location">
##FILTER=<ID=LowQual,Description="Low quality">
##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block">
##FORMAT=<ID=PGT,Number=1,Type=String,Description="Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another">
##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.">
##GATKCommandLine.HaplotypeCaller=<ID=HaplotypeCaller,Version=3.5-0-g36282e4,Date="Wed Oct 20 15:43:16 GMT 2021",Epoch=1634744596639,CommandLineOptions="analysis_type=HaplotypeCaller input_file=[local.sharded.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[OverclippedRead] disable_read_filter=[] intervals=[/cromwell_root/broad-gotc-staging-cromwell-execution/WholeGenomeGermlineSingleSample/08b145f8-14d9-4a70-973d-b082a55d9a47/call-BamToGvcf/VariantCalling/8e5f3c30-f5a0-4114-b095-f1da9207e892/call-ScatterIntervalList/cacheCopy/glob-cb4648beeaff920acb03de7603c06f98/31scattered.interval_list] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/cromwell_root/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta nonDeterministicRandomSeed=false disableDithering=false maxRuntime=-1 maxRuntimeUnits=MINUTES downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=500 baq=OFF baqGapOpenPenalty=40.0 refactor_NDN_cigar_string=false fix_misencoded_quality_scores=false allow_potentially_misencoded_quality_scores=false useOriginalQualities=false defaultBaseQualities=-1 performanceLog=null BQSR=null quantize_quals=0 static_quantized_quals=null round_down_quantized=false disable_indel_quals=false emit_original_quals=false preserve_qscores_less_than=6 globalQScorePrior=-1.0 validation_strictness=SILENT remove_program_records=false keep_program_records=false sample_rename_mapping_file=null unsafe=null disable_auto_index_creation_and_locking_when_reading_rods=false no_cmdline_in_header=false sites_only=false never_trim_vcf_format_field=false bcf=false bam_compression=null simplifyBAM=false disable_bam_indexing=false generate_md5=false num_threads=1 num_cpu_threads_per_data_thread=1 num_io_threads=0 monitorThreadEfficiency=false num_bam_file_handles=null read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false generateShadowBCF=false variant_index_type=LINEAR variant_index_parameter=128000 reference_window_stop=0 logging_level=INFO log_to_file=null help=false version=false likelihoodCalculationEngine=PairHMM heterogeneousKmerSizeResolution=COMBO_MIN dbsnp=(RodBinding name= source=UNBOUND) dontTrimActiveRegions=false maxDiscARExtension=25 maxGGAARExtension=300 paddingAroundIndels=150 paddingAroundSNPs=20 comp=[] annotation=[StrandBiasBySample] excludeAnnotation=[ChromosomeCounts, FisherStrand, StrandOddsRatio, QualByDepth] group=[Standard, StandardHCAnnotation] debug=false useFilteredReadsForAnnotations=false emitRefConfidence=GVCF bamOutput=null bamWriterType=CALLED_HAPLOTYPES disableOptimizations=false annotateNDA=false heterozygosity=0.001 indel_heterozygosity=1.25E-4 standard_min_confidence_threshold_for_calling=-0.0 standard_min_confidence_threshold_for_emitting=-0.0 max_alternate_alleles=3 input_prior=[] sample_ploidy=2 genotyping_mode=DISCOVERY alleles=(RodBinding name= source=UNBOUND) contamination_fraction_to_filter=0.0170512 contamination_fraction_per_sample_file=null p_nonref_model=null exactcallslog=null output_mode=EMIT_VARIANTS_ONLY allSitePLs=true gcpHMM=10 pair_hmm_implementation=VECTOR_LOGLESS_CACHING pair_hmm_sub_implementation=ENABLE_ALL always_load_vector_logless_PairHMM_lib=false phredScaledGlobalReadMismappingRate=45 noFpga=false sample_name=null kmerSize=[10, 25] dontIncreaseKmerSizesForCycles=false allowNonUniqueKmersInRef=false numPruningSamples=1 recoverDanglingHeads=false doNotRecoverDanglingBranches=false minDanglingBranchLength=4 consensus=false maxNumHaplotypesInPopulation=128 errorCorrectKmers=false minPruning=2 debugGraphTransformations=false allowCyclesInKmerGraphToGeneratePaths=false graphOutput=null kmerLengthForReadErrorCorrection=25 minObservationsForKmerToBeSolid=20 GVCFGQBands=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 70, 80, 90, 99] indelSizeToEliminateInRefModel=10 min_base_quality_score=10 includeUmappedReads=false useAllelesTrigger=false doNotRunPhysicalPhasing=false keepRG=null justDetermineActiveRegions=false dontGenotype=false dontUseSoftClippedBases=false captureAssemblyFailureBAM=false errorCorrectReads=false pcr_indel_model=CONSERVATIVE maxReadsInRegionPerSample=10000 minReadsPerAlignmentStart=10 mergeVariantsViaLD=false activityProfileOut=null activeRegionOut=null activeRegionIn=null activeRegionExtension=null forceActive=false activeRegionMaxSize=null bandPassSigma=null maxProbPropagationDistance=50 activeProbabilityThreshold=0.002 filter_is_too_short_value=30 do_not_require_softclips_both_ends=false min_mapping_quality_score=20 filter_reads_with_N_cigar=false filter_mismatching_base_and_quals=false filter_bases_not_stored=false">
##GATKCommandLine=<ID=SelectVariants,CommandLine="SelectVariants --output reblockEdgeCase.vcf --variant gs://broad-gotc-test-storage/reblock_gvcf/wgs/scientific/input/G94982.NA12878/NA12878.g.vcf.gz --intervals chr20:13675308-13675330 --apply-jexl-filters-first false --invertSelect false --exclude-non-variants false --exclude-filtered false --preserve-alleles false --remove-unused-alternates false --restrict-alleles-to ALL --keep-original-ac false --keep-original-dp false --mendelian-violation false --invert-mendelian-violation false --mendelian-violation-qual-threshold 0.0 --select-random-fraction 1.0 --remove-fraction-genotypes 0.0 --ignore-non-ref-in-types false --fully-decode false --max-indel-size 2147483647 --min-indel-size 0 --max-filtered-genotypes 2147483647 --min-filtered-genotypes 0 --max-fraction-filtered-genotypes 1.0 --min-fraction-filtered-genotypes 0.0 --max-nocall-number 2147483647 --max-nocall-fraction 1.0 --set-filtered-gt-to-nocall false --allow-nonoverlapping-command-line-samples false --suppress-reference-path false --fail-on-unsorted-genotype false --genomicsdb-max-alternate-alleles 50 --call-genotypes false --genomicsdb-use-bcf-codec false --genomicsdb-shared-posixfs-optimizations false --genomicsdb-use-gcs-hdfs-connector false --interval-set-rule UNION --interval-padding 0 --interval-exclusion-padding 0 --interval-merging-rule ALL --read-validation-stringency SILENT --seconds-between-progress-updates 10.0 --disable-sequence-dictionary-validation false --create-output-bam-index true --create-output-bam-md5 false --create-output-variant-index true --create-output-variant-md5 false --max-variants-per-shard 0 --lenient false --add-output-sam-program-record true --add-output-vcf-command-line true --cloud-prefetch-buffer 40 --cloud-index-prefetch-buffer -1 --disable-bam-index-caching false --sites-only-vcf-output false --help false --version false --showHidden false --verbosity INFO --QUIET false --use-jdk-deflater false --use-jdk-inflater false --gcs-max-retries 20 --gcs-project-for-requester-pays --disable-tool-default-read-filters false",Version="4.6.0.0-7-g3d99f22-SNAPSHOT",Date="July 26, 2024 at 1:07:32 PM EDT">
##GVCFBlock0-1=minGQ=0(inclusive),maxGQ=1(exclusive)
##GVCFBlock1-2=minGQ=1(inclusive),maxGQ=2(exclusive)
##GVCFBlock10-11=minGQ=10(inclusive),maxGQ=11(exclusive)
##GVCFBlock11-12=minGQ=11(inclusive),maxGQ=12(exclusive)
##GVCFBlock12-13=minGQ=12(inclusive),maxGQ=13(exclusive)
##GVCFBlock13-14=minGQ=13(inclusive),maxGQ=14(exclusive)
##GVCFBlock14-15=minGQ=14(inclusive),maxGQ=15(exclusive)
##GVCFBlock15-16=minGQ=15(inclusive),maxGQ=16(exclusive)
##GVCFBlock16-17=minGQ=16(inclusive),maxGQ=17(exclusive)
##GVCFBlock17-18=minGQ=17(inclusive),maxGQ=18(exclusive)
##GVCFBlock18-19=minGQ=18(inclusive),maxGQ=19(exclusive)
##GVCFBlock19-20=minGQ=19(inclusive),maxGQ=20(exclusive)
##GVCFBlock2-3=minGQ=2(inclusive),maxGQ=3(exclusive)
##GVCFBlock20-21=minGQ=20(inclusive),maxGQ=21(exclusive)
##GVCFBlock21-22=minGQ=21(inclusive),maxGQ=22(exclusive)
##GVCFBlock22-23=minGQ=22(inclusive),maxGQ=23(exclusive)
##GVCFBlock23-24=minGQ=23(inclusive),maxGQ=24(exclusive)
##GVCFBlock24-25=minGQ=24(inclusive),maxGQ=25(exclusive)
##GVCFBlock25-26=minGQ=25(inclusive),maxGQ=26(exclusive)
##GVCFBlock26-27=minGQ=26(inclusive),maxGQ=27(exclusive)
##GVCFBlock27-28=minGQ=27(inclusive),maxGQ=28(exclusive)
##GVCFBlock28-29=minGQ=28(inclusive),maxGQ=29(exclusive)
##GVCFBlock29-30=minGQ=29(inclusive),maxGQ=30(exclusive)
##GVCFBlock3-4=minGQ=3(inclusive),maxGQ=4(exclusive)
##GVCFBlock30-31=minGQ=30(inclusive),maxGQ=31(exclusive)
##GVCFBlock31-32=minGQ=31(inclusive),maxGQ=32(exclusive)
##GVCFBlock32-33=minGQ=32(inclusive),maxGQ=33(exclusive)
##GVCFBlock33-34=minGQ=33(inclusive),maxGQ=34(exclusive)
##GVCFBlock34-35=minGQ=34(inclusive),maxGQ=35(exclusive)
##GVCFBlock35-36=minGQ=35(inclusive),maxGQ=36(exclusive)
##GVCFBlock36-37=minGQ=36(inclusive),maxGQ=37(exclusive)
##GVCFBlock37-38=minGQ=37(inclusive),maxGQ=38(exclusive)
##GVCFBlock38-39=minGQ=38(inclusive),maxGQ=39(exclusive)
##GVCFBlock39-40=minGQ=39(inclusive),maxGQ=40(exclusive)
##GVCFBlock4-5=minGQ=4(inclusive),maxGQ=5(exclusive)
##GVCFBlock40-41=minGQ=40(inclusive),maxGQ=41(exclusive)
##GVCFBlock41-42=minGQ=41(inclusive),maxGQ=42(exclusive)
##GVCFBlock42-43=minGQ=42(inclusive),maxGQ=43(exclusive)
##GVCFBlock43-44=minGQ=43(inclusive),maxGQ=44(exclusive)
##GVCFBlock44-45=minGQ=44(inclusive),maxGQ=45(exclusive)
##GVCFBlock45-46=minGQ=45(inclusive),maxGQ=46(exclusive)
##GVCFBlock46-47=minGQ=46(inclusive),maxGQ=47(exclusive)
##GVCFBlock47-48=minGQ=47(inclusive),maxGQ=48(exclusive)
##GVCFBlock48-49=minGQ=48(inclusive),maxGQ=49(exclusive)
##GVCFBlock49-50=minGQ=49(inclusive),maxGQ=50(exclusive)
##GVCFBlock5-6=minGQ=5(inclusive),maxGQ=6(exclusive)
##GVCFBlock50-51=minGQ=50(inclusive),maxGQ=51(exclusive)
##GVCFBlock51-52=minGQ=51(inclusive),maxGQ=52(exclusive)
##GVCFBlock52-53=minGQ=52(inclusive),maxGQ=53(exclusive)
##GVCFBlock53-54=minGQ=53(inclusive),maxGQ=54(exclusive)
##GVCFBlock54-55=minGQ=54(inclusive),maxGQ=55(exclusive)
##GVCFBlock55-56=minGQ=55(inclusive),maxGQ=56(exclusive)
##GVCFBlock56-57=minGQ=56(inclusive),maxGQ=57(exclusive)
##GVCFBlock57-58=minGQ=57(inclusive),maxGQ=58(exclusive)
##GVCFBlock58-59=minGQ=58(inclusive),maxGQ=59(exclusive)
##GVCFBlock59-60=minGQ=59(inclusive),maxGQ=60(exclusive)
##GVCFBlock6-7=minGQ=6(inclusive),maxGQ=7(exclusive)
##GVCFBlock60-70=minGQ=60(inclusive),maxGQ=70(exclusive)
##GVCFBlock7-8=minGQ=7(inclusive),maxGQ=8(exclusive)
##GVCFBlock70-80=minGQ=70(inclusive),maxGQ=80(exclusive)
##GVCFBlock8-9=minGQ=8(inclusive),maxGQ=9(exclusive)
##GVCFBlock80-90=minGQ=80(inclusive),maxGQ=90(exclusive)
##GVCFBlock9-10=minGQ=9(inclusive),maxGQ=10(exclusive)
##GVCFBlock90-99=minGQ=90(inclusive),maxGQ=99(exclusive)
##GVCFBlock99-2147483647=minGQ=99(inclusive),maxGQ=2147483647(exclusive)
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity">
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=RAW_MQ,Number=1,Type=Float,Description="Raw data for RMS Mapping Quality">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##contig=<ID=chr20,length=64444167>
##source=SelectVariants
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878
chr20 13675306 . AGAAGGAAGGAAGGAAGGAAGGAAG A,AGAAG,<NON_REF> 0 . DP=26;ExcessHet=3.0103;MLEAC=0,0,0;MLEAF=0.00,0.00,0.00;RAW_MQ=70609.00 GT:AD:DP:GQ:PL:SB 0/0:8,0,0,0:8:24:0,44,673,41,397,607,24,380,377,360:8,0,0,0
chr20 13675308 . AAGGAAGGAAGGAAG A,AGAAG,<NON_REF> 0 . DP=26;ExcessHet=3.0103;MLEAC=0,0,0;MLEAF=0.00,0.00,0.00;RAW_MQ=70609.00 GT:AD:DP:GQ:PL:SB 0/0:0,0,0,0:0:0:0,20,313,18,39,249,0,21,18,0:0,0,0,0
chr20 13675322 . G A,<NON_REF> 0 . DP=26;ExcessHet=3.0103;MLEAC=0,0;MLEAF=0.00,0.00;RAW_MQ=70609.00 GT:AD:DP:GQ:PL:SB 0/0:6,0,0:6:19:0,19,252,19,252,252:4,2,0,0
chr20 13675323 . G <NON_REF> . . END=13675325 GT:DP:GQ:MIN_DP:PL 0/0:26:0:25:0,0,25
chr20 13675326 . G A,<NON_REF> 0.03 . DP=26;ExcessHet=3.0103;MLEAC=0,0;MLEAF=0.00,0.00;RAW_MQ=70609.00 GT:AD:DP:GQ:PGT:PID:PL:SB 0/1:0,0,0:0:0:0|1:13675295_AAG_A:1,0,0,2,2,3:0,0,0,0
chr20 13675327 . G <NON_REF> . . END=13675329 GT:DP:GQ:MIN_DP:PL 0/0:26:0:24:0,0,49
chr20 13675330 . G A,<NON_REF> 0 . DP=26;ExcessHet=3.0103;MLEAC=0,0;MLEAF=0.00,0.00;RAW_MQ=70609.00 GT:AD:DP:GQ:PL:SB 0/0:7,0,0:7:22:0,22,316,22,316,316:6,1,0,0

0 comments on commit 64dc4b3

Please sign in to comment.