Skip to content

Commit

Permalink
PrintFileDiagnostics for cram, crai and bai. (#8577)
Browse files Browse the repository at this point in the history
* New experimental tool to print out human readable file diagnostics for cram/crai/bai files.
  • Loading branch information
cmnbroad authored Dec 9, 2023
1 parent 3b8b5bf commit 5839cbd
Show file tree
Hide file tree
Showing 10 changed files with 1,861 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package org.broadinstitute.hellbender.tools;

import org.broadinstitute.barclay.argparser.*;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.tools.filediagnostics.HTSAnalyzer;
import org.broadinstitute.hellbender.tools.filediagnostics.HTSAnalyzerFactory;
import picard.cmdline.programgroups.OtherProgramGroup;

import java.io.File;

/**
* A diagnostic tool that prints meta information about a GATK input file.
*
* Works on files ending in .cram, .crai, and .bai.
*
* Sample Usage:
*
* gatk PrintFileDiagnostics \
* -I input.cram \
* -count-limit 10
*/
@ExperimentalFeature
@WorkflowProperties
@CommandLineProgramProperties(
summary = "Print diagnostic information about a genomics file to stdout",
oneLineSummary = "Print diagnostic information about a genomics file to stdout",
programGroup = OtherProgramGroup.class
)
public class PrintFileDiagnostics extends CommandLineProgram {

@Argument(fullName = StandardArgumentDefinitions.INPUT_LONG_NAME,
shortName = StandardArgumentDefinitions.INPUT_SHORT_NAME,
doc = "Input path for diagnostics",
optional = false,
common = true)
@WorkflowInput
public GATKPath inputPath;

@Argument(fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME,
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
doc = "Outut file for diagnostics (must be a local file)",
optional = false,
common = true)
@WorkflowInput
public File outputFile;

@Argument(shortName="count-limit",
fullName="count-limit",
doc="Limit on how much output to emit (.cram only)")
private long countLimit = 1000;

private HTSAnalyzer htsAnalyzer;

@Override
protected void onStartup() {
super.onStartup();
htsAnalyzer = HTSAnalyzerFactory.getFileAnalyzer(inputPath, outputFile, countLimit);
}

@Override
protected Object doWork() {
htsAnalyzer.analyze();
return 0;
}

@Override
protected void onShutdown() {
if ( htsAnalyzer != null ) {
try {
htsAnalyzer.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package org.broadinstitute.hellbender.tools.filediagnostics;

import htsjdk.samtools.BAMIndexer;
import org.broadinstitute.hellbender.engine.GATKPath;

import java.io.File;
import java.io.IOException;

/**
* Analyzer for BAI files.
*/
public class BAIAnalyzer extends HTSAnalyzer {

public BAIAnalyzer(final GATKPath inputPath, final File outputFile) {
super(inputPath, outputFile);
}

/**
* Run the analyzer for the file.
*/
protected void doAnalysis() {
System.out.println(String.format("\nOutput written to %s\n", outputFile));
BAMIndexer.createAndWriteIndex(inputPath.toPath().toFile(), outputFile, true);
}

@Override
public void close() throws IOException {
}

}

Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package org.broadinstitute.hellbender.tools.filediagnostics;

import htsjdk.samtools.CRAMCRAIIndexer;
import htsjdk.samtools.cram.CRAIIndex;
import htsjdk.samtools.util.RuntimeIOException;
import org.broadinstitute.hellbender.engine.GATKPath;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;

/**
* Analyzer for CRAM (.crai) index files.
*/
public class CRAIAnalyzer extends HTSAnalyzer {

final FileOutputStream fos;

public CRAIAnalyzer(final GATKPath inputPath, final File outputFile) {
super(inputPath, outputFile);
try {
fos = new FileOutputStream(outputFile);
} catch (final IOException e) {
throw new RuntimeIOException(e);
}
}

protected void emitln(final String s) {
try {
fos.write(s.getBytes());
fos.write('\n');
} catch (IOException e) {
throw new RuntimeException(e);
}
}

/**
* Run the analyzer for the file.
*/
protected void doAnalysis() {
try (final InputStream is = inputPath.getInputStream()) {
final CRAIIndex craiIndex = CRAMCRAIIndexer.readIndex(is);
emitln("\nSeqId AlignmentStart AlignmentSpan ContainerOffset SliceOffset SliceSize\n");
craiIndex.getCRAIEntries().stream().forEach(e -> emitln(e.toString()));
} catch (IOException e) {
throw new RuntimeException(e);
}
}

@Override
public void close() throws IOException {
if (fos != null) {
fos.close();
}
}

}

Loading

0 comments on commit 5839cbd

Please sign in to comment.