Skip to content

Commit

Permalink
Fixed unnecessary excepting when running UntemplatedSequenceAnnotator…
Browse files Browse the repository at this point in the history
… with a reference other than the reference used to generate the VCF.
  • Loading branch information
Daniel Cameron committed Jun 20, 2019
1 parent 5808d3e commit 9197c9d
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 37 deletions.
Original file line number Diff line number Diff line change
@@ -1,39 +1,44 @@
package au.edu.wehi.idsv.vcf;

import au.edu.wehi.idsv.GenomicProcessingContext;
import au.edu.wehi.idsv.IdsvVariantContext;
import au.edu.wehi.idsv.IdsvVariantContextBuilder;
import au.edu.wehi.idsv.VariantContextDirectedEvidence;
import au.edu.wehi.idsv.*;
import au.edu.wehi.idsv.alignment.ExternalProcessStreamingAligner;
import au.edu.wehi.idsv.util.AutoClosingIterator;
import com.google.common.collect.Iterators;
import com.google.common.collect.PeekingIterator;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.fastq.FastqRecord;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.Log;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.vcf.VCFFileReader;
import joptsimple.internal.Strings;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class UntemplatedSequenceAnnotator implements CloseableIterator<IdsvVariantContext> {
public class UntemplatedSequenceAnnotator implements CloseableIterator<VariantContext> {
public static final byte DEFAULT_QUAL_SCORE = 20;
private static final Log log = Log.getInstance(UntemplatedSequenceAnnotator.class);
private final GenomicProcessingContext context;
private static final Pattern breakendRegex = Pattern.compile("^(.(?<leftins>.*))?[\\[\\]].*[\\[\\]]((?<rightins>.*).)?$");
private final File referenceFile;
private final File vcf;
private final boolean overwrite;
private final List<String> cmd;
private final int threads;
private CloseableIterator<IdsvVariantContext> vcfStream;
private CloseableIterator<VariantContext> vcfStream;
private PeekingIterator<SAMRecord> samStream;
private ExternalProcessStreamingAligner aligner;
private Thread feedingAligner;
private IdsvVariantContext nextRecord = null;
public UntemplatedSequenceAnnotator(GenomicProcessingContext context, File vcf, boolean overwrite, List<String> aligner_command_line, int threads) {
this.context = context;
private VariantContext nextRecord = null;
public UntemplatedSequenceAnnotator(File referenceFile, File vcf, boolean overwrite, List<String> aligner_command_line, int threads) {
this.referenceFile = referenceFile;
this.vcf = vcf;
this.overwrite = overwrite;
this.cmd = aligner_command_line;
Expand All @@ -42,15 +47,14 @@ public UntemplatedSequenceAnnotator(GenomicProcessingContext context, File vcf,
createRecordAlignmentStream();
this.samStream = Iterators.peekingIterator(this.aligner);
}
private CloseableIterator<IdsvVariantContext> getVcf() {
private CloseableIterator<VariantContext> getVcf() {
VCFFileReader vcfReader = new VCFFileReader(vcf, false);
CloseableIterator<VariantContext> it = vcfReader.iterator();
Iterator<IdsvVariantContext> idsvIt = Iterators.transform(it, variant -> IdsvVariantContext.create(context, null, variant));
return new AutoClosingIterator<IdsvVariantContext>(idsvIt, it, vcfReader);
return new AutoClosingIterator<VariantContext>(it, vcfReader);
}
private ExternalProcessStreamingAligner createRecordAlignmentStream() {
log.debug("Started async external alignment feeder thread.");
aligner = new ExternalProcessStreamingAligner(context.getSamReaderFactory(), cmd, context.getReferenceFile(), threads);
aligner = new ExternalProcessStreamingAligner(SamReaderFactory.make(), cmd, referenceFile, threads);
feedingAligner = new Thread(() -> feedExternalAligner());
feedingAligner.setName("async-feedExternalAligner");
feedingAligner.start();
Expand All @@ -59,20 +63,37 @@ private ExternalProcessStreamingAligner createRecordAlignmentStream() {
private boolean shouldAttemptAlignment(VariantContext v) {
return overwrite || !v.hasAttribute(VcfInfoAttributes.BREAKEND_ALIGNMENTS.attribute());
}
private static String getBreakendSequence(VariantContext seq) {
if (seq.getAlternateAlleles().size() != 1) return null;
Allele allele = seq.getAlternateAllele(0);
String alt = allele.getDisplayString();
if (alt.charAt(0) == '.' || alt.charAt(alt.length() - 1) == '.') {
return alt.substring(1, alt.length() - 1);
} else if (alt.charAt(0) == '[' || alt.charAt(0) == ']' || alt.charAt(alt.length() - 1) == '[' || alt.charAt(alt.length() - 1) == ']') {
Matcher matcher = breakendRegex.matcher(alt);
if (matcher.matches()) {
String leftIns = matcher.group("leftins");
if (!Strings.isNullOrEmpty(leftIns)) {
return leftIns;
}
String rightIns = matcher.group("rightins");
return rightIns;
}
}
return null;
}

private void feedExternalAligner() {
try (CloseableIterator<IdsvVariantContext> it = getVcf()) {
try (CloseableIterator<VariantContext> it = getVcf()) {
while (it.hasNext()) {
IdsvVariantContext vc = it.next();
if (vc instanceof VariantContextDirectedEvidence && shouldAttemptAlignment(vc)) {
VariantContextDirectedEvidence e = (VariantContextDirectedEvidence)vc;
byte[] seq = e.getBreakendSequence();
byte[] qual = e.getBreakendQuality();
if (seq != null && seq.length > 0) {
if (qual == null) {
qual = new byte[seq.length];
Arrays.fill(qual, DEFAULT_QUAL_SCORE);
}
FastqRecord fq = new FastqRecord(e.getID(), seq, null, qual);
VariantContext vc = it.next();
if (shouldAttemptAlignment(vc)) {
String seqstr = getBreakendSequence(vc);
if (!Strings.isNullOrEmpty(seqstr)) {
byte[] seq = seqstr.getBytes(StandardCharsets.UTF_8);
byte[] qual = new byte[seq.length];
Arrays.fill(qual, DEFAULT_QUAL_SCORE);
FastqRecord fq = new FastqRecord(vc.getID(), seq, null, qual);
aligner.asyncAlign(fq);
}
}
Expand All @@ -94,10 +115,10 @@ public boolean hasNext() {
return nextRecord != null;
}
@Override
public IdsvVariantContext next() {
public VariantContext next() {
ensureNext();
if (!hasNext()) throw new NoSuchElementException();
IdsvVariantContext result = nextRecord;
VariantContext result = nextRecord;
nextRecord = null;
return result;
}
Expand Down Expand Up @@ -128,7 +149,7 @@ private void annotateNextRecord() {
}
}
if (alignments.size() > 0) {
IdsvVariantContextBuilder builder = new IdsvVariantContextBuilder(context, nextRecord);
VariantContextBuilder builder = new VariantContextBuilder(nextRecord);
builder.attribute(VcfInfoAttributes.BREAKEND_ALIGNMENTS.attribute(), writeAlignmentAnnotation(alignments));
nextRecord = builder.make();
}
Expand Down
19 changes: 11 additions & 8 deletions src/main/java/gridss/AnnotateUntemplatedSequence.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.VCFFileReader;
Expand All @@ -35,9 +36,11 @@ public class AnnotateUntemplatedSequence extends ReferenceCommandLineProgram {
@Argument(doc="Overwrite existing annotation. Setting to 'true' will replace any existing BEALN annotations. " +
"Setting to 'false' will generate annotations only for records without an existing BEALN annotation. ")
public boolean OVERWRITE = false;
@Argument(doc="Directly pipe the input and output of the aligner instead of writing to intermediate files."
@Argument(doc="Command line arguments to run external aligner. "
+ "Aligner output must be written to stdout and the records MUST match the input fastq order."
+ " The aligner must support using \"-\" as the input filename when reading from stdin."
+ " The sort order of the input file will not be retained.", optional=true)
+ "Java argument formatting is used with %1$s being the fastq file to align, "
+ "%2$s the reference genome, and %3$d the number of threads to use.", optional=true)
public List<String> ALIGNER_COMMAND_LINE = new SoftClipsToSplitReads().ALIGNER_COMMAND_LINE;
public static void main(String[] argv) {
System.exit(new AnnotateUntemplatedSequence().instanceMain(argv));
Expand All @@ -46,19 +49,19 @@ public static void main(String[] argv) {
public int doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
log.info("Annotating variant untemplated sequence in " + INPUT);
GenomicProcessingContext context = new GenomicProcessingContext(getFileSystemContext(), REFERENCE_SEQUENCE, getReference());
try (UntemplatedSequenceAnnotator ann = new UntemplatedSequenceAnnotator(context, INPUT, OVERWRITE, ALIGNER_COMMAND_LINE, WORKER_THREADS)) {
saveVcf(context, INPUT, OUTPUT, ann);
try (UntemplatedSequenceAnnotator ann = new UntemplatedSequenceAnnotator(REFERENCE_SEQUENCE, INPUT, OVERWRITE, ALIGNER_COMMAND_LINE, WORKER_THREADS)) {
saveVcf(INPUT, OUTPUT, ann);
log.info("Annotated variants written to " + OUTPUT);
} catch (IOException e) {
log.error(e);
throw new RuntimeException(e);
}
return 0;
}
protected void saveVcf(GenomicProcessingContext context, File input, File output, Iterator<IdsvVariantContext> calls) throws IOException {
VCFHeader header = null;
protected void saveVcf(File input, File output, Iterator<VariantContext> calls) throws IOException {
VCFHeader header;
try (VCFFileReader vcfReader = new VCFFileReader(input, false)) {
header = vcfReader.getFileHeader();
}
Expand All @@ -72,7 +75,7 @@ protected void saveVcf(GenomicProcessingContext context, File input, File output
try (VariantContextWriter vcfWriter = builder.build()) {
vcfWriter.writeHeader(header);
while (calls.hasNext()) {
IdsvVariantContext record = calls.next();
VariantContext record = calls.next();
vcfWriter.add(record);
writeProgress.record(record.getContig(), record.getStart());
}
Expand Down

0 comments on commit 9197c9d

Please sign in to comment.