From 27599cdc4d9bae76c1adc8733bf74c2516603f55 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Tue, 6 Aug 2024 12:18:19 -0400 Subject: [PATCH] Support reference bundles. --- .../cmdline/StandardArgumentDefinitions.java | 8 +- .../hellbender/engine/FeatureInput.java | 5 +- .../hellbender/engine/MultiVariantWalker.java | 14 +- .../hellbender/tools/CreateBundle.java | 188 ++++++++++++------ .../CachingIndexedFastaSequenceFile.java | 25 ++- .../AbstractPrintReadsIntegrationTest.java | 56 +++++- .../tools/CreateBundleIntegrationTest.java | 15 +- 7 files changed, 221 insertions(+), 90 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java index 40d6ccde930..f31c8d55655 100644 --- a/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java @@ -56,10 +56,10 @@ private StandardArgumentDefinitions(){} public static final String INTERVALS_SHORT_NAME = "L"; public static final String COMPARISON_SHORT_NAME = "comp"; public static final String READ_INDEX_SHORT_NAME = READ_INDEX_LONG_NAME; - public static final String PRIMARY_INPUT_LONG_NAME = "primary"; - public static final String PRIMARY_INPUT_SHORT_NAME = "PI"; - public static final String SECONDARY_INPUT_LONG_NAME = "secondaryI"; - public static final String SECONDARY_INPUT_SHORT_NAME = "SI"; + public static final String PRIMARY_RESOURCE_LONG_NAME = "primary-resource"; + public static final String PRIMARY_RESOURCE_SHORT_NAME = "PR"; + public static final String SECONDARY_RESOURCE_LONG_NAME = "secondary-resource"; + public static final String SECONDARY_RESOURCE_SHORT_NAME = "SR"; public static final String LENIENT_SHORT_NAME = "LE"; public static final String READ_VALIDATION_STRINGENCY_SHORT_NAME = "VS"; public static final String SAMPLE_ALIAS_SHORT_NAME = "ALIAS"; diff --git a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java index 801c92f13c6..e0a108d58ed 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java @@ -54,7 +54,8 @@ public final class FeatureInput extends GATKPath implements S private transient Class> featureCodecClass; /** - * retain any containing bundle in case we need to extract other resources from it + * retain the parent (enclosing) bundle from which this feature input is derived, in case we need to extract + * other resources from it */ private Bundle parentBundle; @@ -148,7 +149,7 @@ public FeatureInput( final Bundle featureBundle, final String name) { super(primaryResourcePath); - // retain the containing bundle for later so we can interrogate it for other resources, like the index + // retain the enclosing bundle for later, so we can interrogate it for other resources such as the index this.parentBundle = featureBundle; if (name != null) { if (primaryResourcePath.getTag() != null) { diff --git a/src/main/java/org/broadinstitute/hellbender/engine/MultiVariantWalker.java b/src/main/java/org/broadinstitute/hellbender/engine/MultiVariantWalker.java index d1e63da1610..34ae1cb3fb8 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/MultiVariantWalker.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/MultiVariantWalker.java @@ -86,15 +86,17 @@ protected void initializeDrivingVariants() { final List bundles = BundleJSON.toBundleList(IOUtils.getStringFromPath(gatkPath), GATKPath::new); for (final Bundle bundle : bundles) { if (bundle.getPrimaryContentType().equals(BundleResourceType.CT_VARIANT_CONTEXTS)) { - // use the bundle primary resource as the FeatureInput URI, and tear off and attach the - // individual bundle the bundle to the FI as the parent bundle so downstream code can - // extract other resources from it on demand - // note that if the original value from the user has a tag, we can't use it unless there - // is only one input, since FIs have to be unique + // use the bundle primary resource as the FeatureInput URI, and tear off and attach + // the enclosing bundle as the parent bundle for the FI so downstream code can extract + // other resources from it on demand. note that if the original value from the user has + // a tag, we can't propagate it unless there is only one input, since FIs have to be + // unique final FeatureInput bundleFI = new FeatureInput<>( new GATKPath(bundle.getPrimaryResource().getIOPath().get().getURIString()), bundle, - bundles.size() > 1 ? gatkPath.getTag() : "drivingVariants" + bundles.size() > 1 ? + gatkPath.getTag() : + "drivingVariants" ); if (drivingVariantsFeatureInputs.contains(bundleFI)) { throw new UserException.BadInput("Feature inputs must be unique: " + gatkPath); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/CreateBundle.java b/src/main/java/org/broadinstitute/hellbender/tools/CreateBundle.java index cf943b0b62a..a5b392cb0b2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/CreateBundle.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/CreateBundle.java @@ -1,7 +1,9 @@ package org.broadinstitute.hellbender.tools; import htsjdk.beta.io.bundle.*; +import htsjdk.beta.plugin.registry.HaploidReferenceResolver; import htsjdk.beta.plugin.variants.VariantsBundle; +import htsjdk.io.HtsPath; import htsjdk.samtools.util.FileExtensions; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -21,7 +23,44 @@ /** * Create a bundle (JSON) file for use with a GATK tool. * - * other inputs are NEVER inferred, and must always be provided with a content type tag. + * Since most bundles will contain a primary resource plus at least one secondary resource (typically an index), + * the tool will attempt to infer standard secondary resources(s) for a given primary resource if no secondary resource + * is explicitly provided on the command line. Inferred secondary resources are automatically added to the resulting + * bundle. Secondary resource inference can be suppressed by using the --suppress-resource-resolution argument. + * + * Each resource in a bundle must have an associated content type tag. Content types for each resource are either + * specified on the command line via argument tags, or inferred by the tool. For the primary and secondary resources, + * when no content type argument tag is provided, the tool will attempt to infer the content type from the file + * extension. However, the content type for "other" resources (resources that are nether primary nor secondary resources) + * are NEVER inferred, and must always include a content type argument tag. + * + * Bundle output file names must end with the suffix ".json". + * + * Common examples: + * + * VCF Bundles: + * + * 1) Create a resource bundle for a VCF. Let the tool determine the content types, and resolve the secondary resources + * (which for vcfs is the companion index) automatically by finding a sibling index file. If the sibling file cannot + * be found, an exception wil lbe thrown: + * + * CreateBundle --primary path/to/my.vcf --output mybundle.json + * + * 2) Create a resource bundle for a VCF. Let the tool determine the content types, but suppress resolution of the secondary + * resources (which for vcfs is the companion index). The resulting bundle will contain only the vcf resource: + * + * CreateBundle --primary path/to/my.vcf --output mybundle.json + * + * 3) Create a resource bundle for a VCF. Let the tool determine the content type, but specify the secondary + * index resource explicitly (which suppresses secondary resolution). The resulting bundle will contain the vcf + * and index resources: + * + * CreateBundle --primary path/to/my.vcf --secondary some/other/path/to/vcd.idx --output mybundle.json + * + * Reference bundles: create a bundle using explicitly provided values and content types for the primary and + * secondary resources: + * + * CreateBundle --primary: path/to/my.fa */ @DocumentedFeature @CommandLineProgramProperties( @@ -32,38 +71,40 @@ public class CreateBundle extends CommandLineProgram { protected static final Logger logger = LogManager.getLogger(CreateBundle.class); - public static final String SUPPRESS_INDEX_RESOLUTION_FULL_NAME = "suppress-index-resolution"; - public static final String OTHER_INPUT_FULL_NAME = "other-input"; - - @Argument(fullName = StandardArgumentDefinitions.PRIMARY_INPUT_LONG_NAME, - shortName = StandardArgumentDefinitions.PRIMARY_INPUT_SHORT_NAME, - doc="Path to the primary bundle input (content type will be inferred if no content type tag is specified)") - GATKPath primaryInput; + @Argument(fullName = StandardArgumentDefinitions.PRIMARY_RESOURCE_LONG_NAME, + shortName = StandardArgumentDefinitions.PRIMARY_RESOURCE_SHORT_NAME, + doc="Path to the primary bundle resource (content type will be inferred if no content type tag is specified)") + GATKPath primaryResource; - @Argument(fullName = StandardArgumentDefinitions.SECONDARY_INPUT_LONG_NAME, - shortName = StandardArgumentDefinitions.SECONDARY_INPUT_SHORT_NAME, - doc = "Path to a secondary bundle input for" + StandardArgumentDefinitions.PRIMARY_INPUT_LONG_NAME - + "(usually an index). The type will be inferred if no content type tag is specified. If no "+ - " secondary input is specified, an index for the primary bundle file will be automatically inferred unless " + - SUPPRESS_INDEX_RESOLUTION_FULL_NAME + " is specified.", + @Argument(fullName = StandardArgumentDefinitions.SECONDARY_RESOURCE_LONG_NAME, + shortName = StandardArgumentDefinitions.SECONDARY_RESOURCE_SHORT_NAME, + doc = "Path to a secondary bundle resource for" + StandardArgumentDefinitions.PRIMARY_RESOURCE_LONG_NAME + + "(usually an index). The content type will be inferred if no content type tag is specified. If no "+ + " secondary resource is specified, standard secondary resources for the primary bundle resource " + + " will also automatically be inferred unless " + SUPPRESS_SECONDARY_RESOURCE_RESOLUTION_FULL_NAME + + " is specified.", optional = true) - GATKPath secondaryInput; + GATKPath secondaryResource; - @Argument(fullName = OTHER_INPUT_FULL_NAME, - shortName = OTHER_INPUT_FULL_NAME, - doc = "Path to other bundle inputs for " + StandardArgumentDefinitions.PRIMARY_INPUT_LONG_NAME - + " A content type tag MUST be provided for each other input.", + public static final String OTHER_RESOURCE_FULL_NAME = "other-resource"; + @Argument(fullName = OTHER_RESOURCE_FULL_NAME, + shortName = OTHER_RESOURCE_FULL_NAME, + doc = "Path to other bundle resources for " + StandardArgumentDefinitions.PRIMARY_RESOURCE_LONG_NAME + + ". The content is not inferred for \"other\" (non-primary and non-secondary) resources, so a" + + " content type tag MUST be provided for each \"other\" resource.", optional = true) - List otherInputs; + List otherResources; - @Argument(fullName = SUPPRESS_INDEX_RESOLUTION_FULL_NAME, - doc ="Don't attempt to resolve the primary input's index file (defaults to false) if no secondary input is provided", + public static final String SUPPRESS_SECONDARY_RESOURCE_RESOLUTION_FULL_NAME = "suppress-resource-resolution"; + @Argument(fullName = SUPPRESS_SECONDARY_RESOURCE_RESOLUTION_FULL_NAME, + doc ="Don't attempt to resolve the primary resource's secondary resources if no secondary resource is explicitly" + + " provided (defaults to false) ", optional = true) - boolean suppressIndexResolution = false; + boolean suppressResourceResolution = false; @Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, - doc = "Path the output bundle file (must end with the suffix .json.") + doc = "Path the output bundle file (must end with the suffix " + BundleJSON.BUNDLE_EXTENSION + ")") GATKPath outputBundlePath; private enum BundleType { @@ -75,8 +116,8 @@ private enum BundleType { @Override protected String[] customCommandLineValidation() { - if (!outputBundlePath.toString().endsWith(".json")){ - return new String[]{"Output bundle path must end with the suffix .json"}; + if (!outputBundlePath.toString().endsWith(BundleJSON.BUNDLE_EXTENSION)){ + return new String[]{ String.format("Output bundle path must end with the suffix %s", BundleJSON.BUNDLE_EXTENSION) }; } outputBundleType = determinePrimaryContentType(); return super.customCommandLineValidation(); @@ -87,8 +128,7 @@ protected Object doWork() { try (final BufferedWriter writer = Files.newBufferedWriter(outputBundlePath.toPath(), StandardOpenOption.CREATE)) { final Bundle bundle = switch (outputBundleType) { case VCF -> createVCFBundle(); - case REFERENCE -> - throw new IllegalArgumentException ("Reference bundles are not yet supported"); + case REFERENCE -> createHaploidReferenceBundle(); case OTHER -> createOtherBundle(); }; writer.write(BundleJSON.toJSON(bundle)); @@ -102,63 +142,63 @@ private BundleType determinePrimaryContentType() { BundleType bundleType; // determine the type of bundle to create; consult the tag attributes if any, otherwise try to infer from the - // primary input file extension - final String primaryContentTag = primaryInput.getTag(); + // primary resource's file extension + final String primaryContentTag = primaryResource.getTag(); if (primaryContentTag != null && !primaryContentTag.isEmpty()) { if (primaryContentTag.equals(BundleResourceType.CT_VARIANT_CONTEXTS)) { bundleType = BundleType.VCF; } else if (primaryContentTag.equals(BundleResourceType.CT_HAPLOID_REFERENCE)) { bundleType = BundleType.REFERENCE; } else { - logger.info(String.format("Primary input content type %s for %s not recognized. A bundle will be created using content typse from the provided argument tags.", + logger.info(String.format("Primary input content type %s for %s not recognized. A bundle will be created using content types from the provided argument tags.", primaryContentTag, - primaryInput)); + primaryResource)); bundleType = BundleType.OTHER; } } else { - logger.info(String.format("A content type for the primary input was not provided. Attempting to infer the content type from the %s extension.", primaryInput)); - bundleType = inferPrimaryContentType(primaryInput); + logger.info(String.format("A content type for the primary input was not provided. Attempting to infer the content type from the %s extension.", primaryResource)); + bundleType = inferPrimaryContentType(primaryResource); } return bundleType; } - private BundleType inferPrimaryContentType(final GATKPath primaryInput) { + private BundleType inferPrimaryContentType(final GATKPath primaryResource) { logger.info("Attempting to infer bundle content type from file extension."); - if (FileExtensions.VCF_LIST.stream().anyMatch(ext -> primaryInput.hasExtension(ext))) { + if (FileExtensions.VCF_LIST.stream().anyMatch(ext -> primaryResource.hasExtension(ext))) { return BundleType.VCF; - } else if (FileExtensions.FASTA.stream().anyMatch(ext -> primaryInput.hasExtension(ext))) { + } else if (FileExtensions.FASTA.stream().anyMatch(ext -> primaryResource.hasExtension(ext))) { return BundleType.REFERENCE; } else { - throw new IllegalArgumentException(String.format("Unable to infer bundle content type from file extension %s. A content type must be provided as part of the argument.", primaryInput)); + throw new IllegalArgumentException(String.format("Unable to infer bundle content type from file extension %s. A content type must be provided as part of the argument.", primaryResource)); } } private Bundle createVCFBundle() { final Collection bundleResources = new ArrayList<>(); - bundleResources.add(new IOPathResource(primaryInput, BundleResourceType.CT_VARIANT_CONTEXTS)); - if (secondaryInput != null) { - final String secondaryContentType = secondaryInput.getTag(); + bundleResources.add(new IOPathResource(primaryResource, BundleResourceType.CT_VARIANT_CONTEXTS)); + if (secondaryResource != null) { + final String secondaryContentType = secondaryResource.getTag(); if (secondaryContentType == null) { - logger.info(String.format("A content type for the secondary input was not provided. Assuming %s is an index.", secondaryInput)); - bundleResources.add(new IOPathResource(secondaryInput, BundleResourceType.CT_VARIANTS_INDEX)); + logger.info(String.format("A content type for the secondary resource was not provided. Assuming %s is an index.", secondaryResource)); + bundleResources.add(new IOPathResource(secondaryResource, BundleResourceType.CT_VARIANTS_INDEX)); } else { - bundleResources.add(new IOPathResource(secondaryInput, secondaryContentType)); + bundleResources.add(new IOPathResource(secondaryResource, secondaryContentType)); } - } else if (!suppressIndexResolution) { - // secondary input is null, and index resolution suppression is off - final Optional indexPath = VariantsBundle.resolveIndex(primaryInput, GATKPath::new); + } else if (!suppressResourceResolution) { + // secondary resources is null, and resource resolution suppression is off + final Optional indexPath = VariantsBundle.resolveIndex(primaryResource, GATKPath::new); if (indexPath.isEmpty()) { throw new IllegalArgumentException( String.format( "Could not infer an index for %s, you must either specify the index path as a secondary input on the command line or specify the %s argument.", - primaryInput.getRawInputString(), - SUPPRESS_INDEX_RESOLUTION_FULL_NAME)); + primaryResource.getRawInputString(), + SUPPRESS_SECONDARY_RESOURCE_RESOLUTION_FULL_NAME)); } bundleResources.add(new IOPathResource(indexPath.get(), BundleResourceType.CT_VARIANTS_INDEX)); } - if (otherInputs != null) { - for (final GATKPath otherInput : otherInputs) { + if (otherResources != null) { + for (final GATKPath otherInput : otherResources) { final String otherContentType = otherInput.getTag(); if (otherContentType == null) { throw new IllegalArgumentException( @@ -173,19 +213,51 @@ private Bundle createVCFBundle() { return new VariantsBundle(bundleResources); } + private Bundle createHaploidReferenceBundle() { + final Collection bundleResources = new ArrayList<>(); + if (secondaryResource == null && otherResources == null) { + // infer dictionary and index + return HaploidReferenceResolver.referenceBundleFromFastaPath(primaryResource, GATKPath::new); + } + + bundleResources.add(new IOPathResource(primaryResource, primaryResource.getTag())); + if (secondaryResource != null) { + final String secondaryContentType = secondaryResource.getTag(); + if (secondaryContentType == null) { + throw new IllegalArgumentException(String.format("A content type for the secondary input must be provided.")); + } else { + bundleResources.add(new IOPathResource(secondaryResource, secondaryContentType)); + } + } + if (otherResources != null) { + for (final GATKPath otherInput : otherResources) { + final String otherContentType = otherInput.getTag(); + if (otherContentType == null) { + throw new IllegalArgumentException( + String.format( + "A content type must be provided for \"other\" input %s.", + otherInput.getRawInputString())); + } else { + bundleResources.add(new IOPathResource(otherInput, otherContentType)); + } + } + } + return new Bundle(primaryResource.getTag(), bundleResources); + } + private Bundle createOtherBundle() { final Collection bundleResources = new ArrayList<>(); - bundleResources.add(new IOPathResource(primaryInput, primaryInput.getTag())); - if (secondaryInput != null) { - final String secondaryContentType = secondaryInput.getTag(); + bundleResources.add(new IOPathResource(primaryResource, primaryResource.getTag())); + if (secondaryResource != null) { + final String secondaryContentType = secondaryResource.getTag(); if (secondaryContentType == null) { throw new IllegalArgumentException(String.format("A content type for the secondary input must be provided.")); } else { - bundleResources.add(new IOPathResource(secondaryInput, secondaryContentType)); + bundleResources.add(new IOPathResource(secondaryResource, secondaryContentType)); } } - if (otherInputs != null) { - for (final GATKPath otherInput : otherInputs) { + if (otherResources != null) { + for (final GATKPath otherInput : otherResources) { final String otherContentType = otherInput.getTag(); if (otherContentType == null) { throw new IllegalArgumentException( @@ -197,6 +269,6 @@ private Bundle createOtherBundle() { } } } - return new Bundle(primaryInput.getTag(), bundleResources); + return new Bundle(primaryResource.getTag(), bundleResources); } } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/fasta/CachingIndexedFastaSequenceFile.java b/src/main/java/org/broadinstitute/hellbender/utils/fasta/CachingIndexedFastaSequenceFile.java index 6cd2d678347..88b07140257 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/fasta/CachingIndexedFastaSequenceFile.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/fasta/CachingIndexedFastaSequenceFile.java @@ -1,5 +1,9 @@ package org.broadinstitute.hellbender.utils.fasta; +import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.io.bundle.BundleJSON; +import htsjdk.beta.plugin.IOUtils; +import htsjdk.io.IOPath; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; @@ -143,14 +147,24 @@ public CachingIndexedFastaSequenceFile(final Path fasta, boolean preserveAmbigui * @param preserveIUPAC If true, we will keep the IUPAC bases in the FASTA, otherwise they are converted to Ns */ public CachingIndexedFastaSequenceFile(final Path fasta, final long cacheSize, final boolean preserveCase, final boolean preserveIUPAC) { - // Check the FASTA path: - checkFastaPath(fasta); Utils.validate(cacheSize > 0, () -> "Cache size must be > 0 but was " + cacheSize); // Read reference data by creating an IndexedFastaSequenceFile. try { - final ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(fasta, true, true); - sequenceFile = requireIndex(fasta, referenceSequenceFile); + final IOPath fastaPath = new GATKPath(fasta.toUri().toString()); + if (fastaPath.hasExtension(BundleJSON.BUNDLE_EXTENSION)) { + final Bundle referenceBundle = BundleJSON.toBundle(IOUtils.getStringFromPath(fastaPath), GATKPath::new); + final ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFileFromBundle( + referenceBundle, + true, + true); + sequenceFile = requireIndex(fasta, referenceSequenceFile); + } else { + // Check the FASTA path: + checkFastaPath(fasta); + final ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(fasta, GATKPath::new, true, true); + sequenceFile = requireIndex(fasta, referenceSequenceFile); + } this.cacheSize = cacheSize; this.cacheMissBackup = Math.max(cacheSize / 1000, 1); this.preserveCase = preserveCase; @@ -159,9 +173,6 @@ public CachingIndexedFastaSequenceFile(final Path fasta, final long cacheSize, f catch (final IllegalArgumentException e) { throw new UserException.CouldNotReadInputFile(fasta, "Could not read reference sequence. The FASTA must have either a .fasta or .fa extension", e); } - catch (final Exception e) { - throw new UserException.CouldNotReadInputFile(fasta, e); - } } /** diff --git a/src/test/java/org/broadinstitute/hellbender/tools/AbstractPrintReadsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/AbstractPrintReadsIntegrationTest.java index 160d2e43380..220704744a8 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/AbstractPrintReadsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/AbstractPrintReadsIntegrationTest.java @@ -1,5 +1,9 @@ package org.broadinstitute.hellbender.tools; +import htsjdk.beta.io.IOPathUtils; +import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.io.bundle.BundleJSON; +import htsjdk.beta.plugin.registry.HaploidReferenceResolver; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SamReader; @@ -9,6 +13,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions; import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.GATKPath; import org.broadinstitute.hellbender.engine.ReadsDataSource; import org.broadinstitute.hellbender.engine.ReadsPathDataSource; import org.broadinstitute.hellbender.engine.filters.ReadLengthReadFilter; @@ -19,6 +24,7 @@ import org.broadinstitute.hellbender.testutils.SamAssertionUtils; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.gcs.BucketUtils; +import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -37,14 +43,20 @@ public void doFileToFile(String fileIn, String extOut, String reference, boolean String samFile = fileIn; final File outFile = GATKBaseTest.createTempFile(samFile + ".", extOut); final File ORIG_BAM = new File(TEST_DATA_DIR, samFile); - final File refFile; + final GATKPath refFile; final ArrayList args = new ArrayList<>(); args.add("--input"); args.add(ORIG_BAM.getAbsolutePath()); args.add("--output"); args.add(outFile.getAbsolutePath()); if (reference != null) { - refFile = new File(TEST_DATA_DIR, reference); - args.add("-R"); args.add(refFile.getAbsolutePath()); + if (reference.endsWith(BundleJSON.BUNDLE_EXTENSION)) { + // the test json files are temporary files, not files in TEST_DATA_DIR + refFile = new GATKPath(reference); + args.add("-R"); args.add(reference); + } else { + refFile = new GATKPath(new File(TEST_DATA_DIR, reference).getAbsolutePath()); + args.add("-R"); args.add(refFile.toString()); + } } else { refFile = null; @@ -55,13 +67,33 @@ public void doFileToFile(String fileIn, String extOut, String reference, boolean } runCommandLine(args); - SamAssertionUtils.assertSamsEqual(outFile, ORIG_BAM, refFile); + SamAssertionUtils.assertSamsEqual(outFile, ORIG_BAM, refFile == null ? null : refFile.toPath().toFile()); if (testMD5) { checkMD5asExpected(outFile); } } + public void doFileToFileUsingReferenceBundle(String fileIn, String extOut, String reference, boolean testMD5) throws Exception { + final String referenceToUse; + if (reference != null) { + // create the bundle, using inference to find the sibling files, then write the bundle out to a temp file + final Bundle referenceBundle = HaploidReferenceResolver.referenceBundleFromFastaPath( + new GATKPath(new File(TEST_DATA_DIR, reference).toPath().toString()), + GATKPath::new); + final GATKPath tempBundlePath = new GATKPath( + IOUtils.createTempFile("printReadsRefBundle", ".json").getAbsolutePath() + ); + IOPathUtils.writeStringToPath(tempBundlePath, BundleJSON.toJSON(referenceBundle)); + referenceToUse = tempBundlePath.toString(); + } else { + referenceToUse = reference; + } + + // no run the regular test, but using the reference bundle + doFileToFile(fileIn, extOut, referenceToUse, testMD5); + } + private void checkMD5asExpected(final File outFile) throws IOException { final File md5File = new File(outFile.getAbsolutePath() + ".md5"); if (md5File.exists()) { @@ -74,8 +106,8 @@ private void checkMD5asExpected(final File outFile) throws IOException { } @Test(dataProvider="testingData") - public void testFileToFile(String fileIn, String extOut, String reference) throws Exception { - doFileToFile(fileIn, extOut, reference, false); + public void testFileToFileWithReferenceBundle(String fileIn, String extOut, String reference) throws Exception { + doFileToFileUsingReferenceBundle(fileIn, extOut, reference, false); } @DataProvider(name="testingData") @@ -120,6 +152,18 @@ public Object[][] testingData() { }; } + @Test(dataProvider="testingData") + public void testFileToFileUsingReferenceBundle(String fileIn, String extOut, String reference) throws Exception { + if (reference != null) { + doFileToFileUsingReferenceBundle(fileIn, extOut, reference, false); + } + } + + @Test(dataProvider="testingData") + public void testFileToFile(String fileIn, String extOut, String reference) throws Exception { + doFileToFile(fileIn, extOut, reference, false); + } + @Test public void testReadThatConsumesNoReferenceBases() throws IOException { final File zeroRefBasesReadBam = new File(TEST_DATA_DIR, "read_consumes_zero_ref_bases.bam"); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/CreateBundleIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/CreateBundleIntegrationTest.java index a39cfd450f4..e79350a18cd 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/CreateBundleIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/CreateBundleIntegrationTest.java @@ -17,7 +17,8 @@ public class CreateBundleIntegrationTest extends CommandLineProgramTest { // force our local paths to use absolute path names to make BundleResource and IOPath equality checks easier, - // since all serialized JSON bundles always contain absolute path names for local files + // since once a bundle is round-tripped/serialized to JSON, the resources will always contain absolute path names + // for local files private final static String LOCAL_VCF = new GATKPath(getTestDataDir() + "/count_variants_withSequenceDict.vcf").getURIString(); private final static String LOCAL_VCF_IDX = new GATKPath(getTestDataDir() + "/count_variants_withSequenceDict.vcf.idx").getURIString(); private final static String LOCAL_VCF_GZIP = new GATKPath("src/test/resources/large/NA24385.vcf.gz").getURIString(); @@ -97,7 +98,7 @@ public Object[][] negativeBundleCases() { // no index file can be inferred {LOCAL_VCF_WITH_NO_INDEX, null, null, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF_WITH_NO_INDEX))}, - // primary content type not provided and cannot be inferred from the extension + // primary content type not provided, and cannot be inferred from the extension {"primaryFile.ext", null, null, null, null, null, false, null}, // secondary content type not provided {"primaryFile.ext", PRIMARY_CT, "secondaryFile.ext", null, null, null, false, null}, @@ -148,20 +149,20 @@ private void doCreateBundleTest( final List args = new ArrayList<>(); - args.add("--" + StandardArgumentDefinitions.PRIMARY_INPUT_LONG_NAME + (primaryInputTag != null ? ":" + primaryInputTag : "")); + args.add("--" + StandardArgumentDefinitions.PRIMARY_RESOURCE_LONG_NAME + (primaryInputTag != null ? ":" + primaryInputTag : "")); args.add(primaryInput); if (secondaryInput != null) { - args.add("--" + StandardArgumentDefinitions.SECONDARY_INPUT_LONG_NAME + (secondaryInputTag != null ? ":" + secondaryInputTag : "")); + args.add("--" + StandardArgumentDefinitions.SECONDARY_RESOURCE_LONG_NAME + (secondaryInputTag != null ? ":" + secondaryInputTag : "")); args.add(secondaryInput); } if (otherInputs != null) { for (int i = 0; i < otherInputs.size(); i++) { - args.add("--" + CreateBundle.OTHER_INPUT_FULL_NAME + ((otherInputTags != null && otherInputTags.get(i) != null) ? ":" + otherInputTags.get(i) : "")); + args.add("--" + CreateBundle.OTHER_RESOURCE_FULL_NAME + ((otherInputTags != null && otherInputTags.get(i) != null) ? ":" + otherInputTags.get(i) : "")); args.add(otherInputs.get(i) != null ? otherInputs.get(i) : ""); } } if (suppressIndexResolution == true) { - args.add("--" + CreateBundle.SUPPRESS_INDEX_RESOLUTION_FULL_NAME); + args.add("--" + CreateBundle.SUPPRESS_SECONDARY_RESOURCE_RESOLUTION_FULL_NAME); } args.add("--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME); args.add(outputPath.toString()); @@ -170,7 +171,7 @@ private void doCreateBundleTest( final Bundle actualBundle = BundleJSON.toBundle(IOUtils.getStringFromPath(outputPath), GATKPath::new); - // bundle resource order is not preserved when roundtripping through JSON, so compare ignoring order + // bundle resource order is not preserved when round-tripping through JSON, so compare ignoring order Assert.assertTrue(Bundle.equalsIgnoreOrder(actualBundle, expectedBundle)); } }