Skip to content

Commit

Permalink
Add a --numeric-gt option to VariantsToTable
Browse files Browse the repository at this point in the history
* add an new option to VariantsToTable to allow output VCF style numeric GT fields
previously it always output the actual bases of the Allele in the GT spot
* resolves #8160
* updates htsjdk to 3.0.5
  • Loading branch information
lbergelson committed Aug 16, 2024
1 parent 9f2fbb5 commit 20e34e8
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.broadinstitute.hellbender.tools.walkers.variantutils;

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.*;
import org.apache.logging.log4j.LogManager;
Expand Down Expand Up @@ -122,6 +123,7 @@
public final class VariantsToTable extends VariantWalker {
public final static String SPLIT_MULTI_ALLELIC_LONG_NAME = "split-multi-allelic";
public final static String SPLIT_MULTI_ALLELIC_SHORT_NAME = "SMA";
public static final String NUMERIC_GT_FULLNAME = "numeric-gt";

static final Logger logger = LogManager.getLogger(VariantsToTable.class);

Expand Down Expand Up @@ -204,6 +206,11 @@ public final class VariantsToTable extends VariantWalker {
doc="Fail on missing data", optional=true)
public boolean errorIfMissingData = false;

@Argument(fullName = NUMERIC_GT_FULLNAME,
doc = "write the GT field the way it appears in a VCF ( ex. 0/1 instead of A/T )",
optional = true)
public boolean useNumericGT = false;

private static final String MISSING_DATA = "NA";

private SortedSet<String> samples;
Expand Down Expand Up @@ -341,7 +348,7 @@ private void emitMoltenizedOutput(final List<String> record) {
* @param vc the VariantContext whose field values we can to capture
* @return List of lists of field values
*/
protected List<List<String>> extractFields(final VariantContext vc) {
private List<List<String>> extractFields(final VariantContext vc) {

final int numRecordsToProduce = splitMultiAllelic ? vc.getAlternateAlleles().size() : 1;
final List<List<String>> records = new ArrayList<>(numRecordsToProduce);
Expand Down Expand Up @@ -395,18 +402,23 @@ protected List<List<String>> extractFields(final VariantContext vc) {

private void addGenotypeFieldsToRecords(final VariantContext vc, final List<List<String>> records, final boolean errorIfMissingData) {
for ( final String sample : samples ) {
final Genotype genotype = vc.getGenotype(sample);
for ( final String gf : genotypeFieldsToTake ) {
if ( vc.hasGenotype(sample) && vc.getGenotype(sample).hasAnyAttribute(gf) ) {
if ( vc.hasGenotype(sample) && genotype.hasAnyAttribute(gf) ) {
if (VCFConstants.GENOTYPE_KEY.equals(gf)) {
addFieldValue(vc.getGenotype(sample).getGenotypeString(true), records);
if(useNumericGT) {
addFieldValue(VCFEncoder.encodeGtField(vc, genotype), records);
} else {
addFieldValue(genotype.getGenotypeString(true), records);
}
} else {
/**
* TODO - If gf == "FT" and the GT record is not filtered, Genotype.getAnyAttribute == null. Genotype.hasAnyAttribute should be changed so it
* returns false for this condition. Presently, it always returns true. Once this is fixed, then only the "addFieldValue" statement will
* remain in the following logic block.
*/
if (vc.getGenotype(sample).getAnyAttribute(gf) != null) {
addFieldValue(vc.getGenotype(sample).getAnyAttribute(gf), records);
if (genotype.getAnyAttribute(gf) != null) {
addFieldValue(genotype.getAnyAttribute(gf), records);
} else {
handleMissingData(errorIfMissingData, gf, records, vc);
} }
Expand All @@ -416,21 +428,21 @@ private void addGenotypeFieldsToRecords(final VariantContext vc, final List<List
}

for ( final String field : asGenotypeFieldsToTake) {
if ( vc.hasGenotype(sample) && vc.getGenotype(sample).hasAnyAttribute(field) ) {
if ( vc.hasGenotype(sample) && genotype.hasAnyAttribute(field) ) {
if (splitMultiAllelic) {
if (VCFConstants.GENOTYPE_ALLELE_DEPTHS.equals(field)) {
List<String> altDepths = new ArrayList<>();
int[] allDepths = vc.getGenotype(sample).getAD();
final List<String> altDepths = new ArrayList<>();
int[] allDepths = genotype.getAD();
for (int i = 1; i < allDepths.length; i++) {
altDepths.add(allDepths[0] + "," + allDepths[i]);
}
addFieldValue(altDepths, records);
} else {
addAlleleSpecificFieldValue(split(vc.getGenotype(sample).getExtendedAttribute(field).toString(), ','),
addAlleleSpecificFieldValue(split(genotype.getExtendedAttribute(field).toString(), ','),
records, inputHeader.getFormatHeaderLine(field).getCountType());
}
} else {
final String value = vc.getGenotype(sample).getAnyAttribute(field).toString();
final String value = genotype.getAnyAttribute(field).toString();
if (field.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
addFieldValue(value.replace("[","").replace("]","").replaceAll("\\s",""),records);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.testng.annotations.Test;

Expand Down Expand Up @@ -276,5 +277,20 @@ public void testNoFieldsSpecifiedFormatFieldInHeaderNoSamples() throws IOExcepti

IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile);
}

@Test
public void testNumericGTFlag() throws IOException {
final File inputFile = new File(getToolTestDataDir(), "VCFWithGenotypes_1000G.phase3.snippet.vcf");
final File outputFile = createTempFile("numericGT", ".table");
final File expectedFile = new File(getToolTestDataDir(), "expected.numericGT.table");

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addVCF(inputFile)
.addOutput(outputFile)
.addFlag(VariantsToTable.NUMERIC_GT_FULLNAME);
runCommandLine(args);

IntegrationTestSpec.assertEqualTextFiles(outputFile, expectedFile);
}

}

0 comments on commit 20e34e8

Please sign in to comment.