Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GVS walker to master [VS-964] #8355

Closed
wants to merge 11 commits into from
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package org.broadinstitute.hellbender.tools.gvs.common;

import java.util.HashMap;
import java.util.Map;

public enum ChromosomeEnum {
chr1(1, "1"),
chr2(2, "2"),
chr3(3, "3"),
chr4(4, "4"),
chr5(5, "5"),
chr6(6, "6"),
chr7(7, "7"),
chr8(8, "8"),
chr9(9, "9"),
chr10(10, "10"),
chr11(11, "11"),
chr12(12, "12"),
chr13(13, "13"),
chr14(14, "14"),
chr15(15, "15"),
chr16(16, "16"),
chr17(17, "17"),
chr18(18, "18"),
chr19(19, "19"),
chr20(20, "20"),
chr21(21, "21"),
chr22(22, "22"),
chrX(23, "X"),
chrY(24, "Y"),
chrM(25, "MT");

int index;
String v37ContigName;
private static Map<String, ChromosomeEnum> ref37 = new HashMap<>();
private static final Map<String, ChromosomeEnum> ref38 = new HashMap<>();
private static final Map<Integer, ChromosomeEnum> decodeValues = new HashMap<>();
private static Map<String, ChromosomeEnum> currentVersion = null;

static {
for (ChromosomeEnum contig : ChromosomeEnum.values()) {
ref37.put(contig.v37ContigName, contig);
ref38.put(contig.name(), contig);
decodeValues.put(contig.index, contig);
}
}

public static void setRefVersion(String refVersion) {
if (refVersion.equals("37")) {
currentVersion = ref37;

Check warning on line 50 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/ChromosomeEnum.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/ChromosomeEnum.java#L50

Added line #L50 was not covered by tests
} else {
currentVersion = ref38;
}
}

ChromosomeEnum(int index, String v37identifier) {
this.index = index;
this.v37ContigName = v37identifier;
}

public static ChromosomeEnum valueOfIndex(int index) {
return decodeValues.get(index);
}

public static ChromosomeEnum valueOfContig(String contig) {
if (currentVersion == null) {
throw new RuntimeException("must set reference version");

Check warning on line 67 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/ChromosomeEnum.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/ChromosomeEnum.java#L67

Added line #L67 was not covered by tests
} else {
return currentVersion.get(contig);
}
}

public String getContigName() {
if (currentVersion==ref37) {
return v37ContigName;

Check warning on line 75 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/ChromosomeEnum.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/ChromosomeEnum.java#L75

Added line #L75 was not covered by tests
} else {
return name();
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package org.broadinstitute.hellbender.tools.gvs.common;

import com.google.api.client.util.ExponentialBackOff;
import com.google.api.core.ApiFuture;
import com.google.cloud.bigquery.storage.v1.*;
import io.grpc.StatusRuntimeException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.json.JSONArray;
import org.json.JSONObject;

import java.io.IOException;
import java.util.Date;

import static org.broadinstitute.hellbender.utils.gvs.bigquery.BigQueryUtils.extractCausalStatusRuntimeExceptionOrThrow;

public class CostObservability {
static final Logger logger = LogManager.getLogger(org.broadinstitute.hellbender.tools.gvs.common.CostObservability.class);

private final TableName costObservabilityTable;

public CostObservability(String projectID, String datasetName, String costObservabilityTableName) {
this.costObservabilityTable = TableName.of(projectID, datasetName, costObservabilityTableName);
}

Check warning on line 25 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L23-L25

Added lines #L23 - L25 were not covered by tests

public TableSchema getCostObservabilityTableSchema() {
TableSchema.Builder builder = TableSchema.newBuilder();
builder.addFields(
TableFieldSchema.newBuilder().setName("call_set_identifier").setType(TableFieldSchema.Type.STRING).setMode(TableFieldSchema.Mode.REQUIRED).build()

Check warning on line 30 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L28-L30

Added lines #L28 - L30 were not covered by tests
);
builder.addFields(
TableFieldSchema.newBuilder().setName("step").setType(TableFieldSchema.Type.STRING).setMode(TableFieldSchema.Mode.REQUIRED).build()

Check warning on line 33 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L32-L33

Added lines #L32 - L33 were not covered by tests
);
builder.addFields(
TableFieldSchema.newBuilder().setName("call").setType(TableFieldSchema.Type.STRING).setMode(TableFieldSchema.Mode.NULLABLE).build()

Check warning on line 36 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L35-L36

Added lines #L35 - L36 were not covered by tests
);
builder.addFields(
TableFieldSchema.newBuilder().setName("shard_identifier").setType(TableFieldSchema.Type.STRING).setMode(TableFieldSchema.Mode.NULLABLE).build()

Check warning on line 39 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L38-L39

Added lines #L38 - L39 were not covered by tests
);
builder.addFields(
TableFieldSchema.newBuilder().setName("call_start_timestamp").setType(TableFieldSchema.Type.TIMESTAMP).setMode(TableFieldSchema.Mode.REQUIRED).build()

Check warning on line 42 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L41-L42

Added lines #L41 - L42 were not covered by tests
);
builder.addFields(
TableFieldSchema.newBuilder().setName("event_timestamp").setType(TableFieldSchema.Type.TIMESTAMP).setMode(TableFieldSchema.Mode.REQUIRED).build()

Check warning on line 45 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L44-L45

Added lines #L44 - L45 were not covered by tests
);
builder.addFields(
TableFieldSchema.newBuilder().setName("event_key").setType(TableFieldSchema.Type.STRING).setMode(TableFieldSchema.Mode.REQUIRED).build()

Check warning on line 48 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L47-L48

Added lines #L47 - L48 were not covered by tests
);
builder.addFields(
TableFieldSchema.newBuilder().setName("event_bytes").setType(TableFieldSchema.Type.INT64).setMode(TableFieldSchema.Mode.REQUIRED).build()

Check warning on line 51 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L50-L51

Added lines #L50 - L51 were not covered by tests
);
return builder.build();

Check warning on line 53 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L53

Added line #L53 was not covered by tests
}

public void writeCostObservability(String callSetIdentifier, String step, String call, String shardIdentifier,
Date callStartTimestamp, Date eventTimestamp, String eventKey, long eventBytes) {
final ExponentialBackOff backoff = new ExponentialBackOff.Builder().
setInitialIntervalMillis(2000).
setMaxIntervalMillis(30000).
setMultiplier(2).
setRandomizationFactor(0.5).
build();
int retryCount = 0;
int maxRetries = 3;

Check warning on line 65 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L58-L65

Added lines #L58 - L65 were not covered by tests

while (true) {
// This uses the default stream since it (a) commits immediately and (b) doesn't count
// towards the CreateStreamWriter quota
try (JsonStreamWriter writer =
JsonStreamWriter.newBuilder(costObservabilityTable.toString(), getCostObservabilityTableSchema()).build()) {

Check warning on line 71 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L70-L71

Added lines #L70 - L71 were not covered by tests

// Create a JSON object that is compatible with the table schema.
JSONArray jsonArr = new JSONArray();
JSONObject jsonObject = new JSONObject();
jsonObject.put("call_set_identifier", callSetIdentifier);
jsonObject.put("step", step);
jsonObject.put("call", call);
jsonObject.put("shard_identifier", shardIdentifier);
jsonObject.put("call_start_timestamp", callStartTimestamp.getTime() * 1000); // google wants this in microseconds since epoch...
jsonObject.put("event_timestamp", eventTimestamp.getTime() * 1000); // google wants this in microseconds since epoch...
jsonObject.put("event_key", eventKey);
jsonObject.put("event_bytes", eventBytes);
jsonArr.put(jsonObject);

Check warning on line 84 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L74-L84

Added lines #L74 - L84 were not covered by tests

ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
future.get();

Check warning on line 87 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L86-L87

Added lines #L86 - L87 were not covered by tests

logger.info("Cost Observability for " + callSetIdentifier + "." + step + " appended successfully");

Check warning on line 89 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L89

Added line #L89 was not covered by tests
break;
} catch (Exception e) {

Check warning on line 91 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L91

Added line #L91 was not covered by tests
@SuppressWarnings("ThrowableNotThrown")
StatusRuntimeException se = extractCausalStatusRuntimeExceptionOrThrow(e);

Check warning on line 93 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L93

Added line #L93 was not covered by tests

if (retryCount >= maxRetries) {
throw new GATKException("Caught exception writing to BigQuery and " + maxRetries + " write retries are exhausted", e);

Check warning on line 96 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L96

Added line #L96 was not covered by tests
}

switch (se.getStatus().getCode()) {
case ALREADY_EXISTS:
// This is okay, no need to retry
break;

Check warning on line 102 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L102

Added line #L102 was not covered by tests
case INVALID_ARGUMENT:
case NOT_FOUND:
case OUT_OF_RANGE:
case PERMISSION_DENIED:
throw new GATKException("Caught non-retryable StatusRuntimeException based exception", e);

Check warning on line 107 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L107

Added line #L107 was not covered by tests
default:
try {
logger.warn("Caught exception writing to BigQuery, " + (maxRetries - retryCount - 1) + " retries remaining.", e);
long backOffMillis = backoff.nextBackOffMillis();

Check warning on line 111 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L110-L111

Added lines #L110 - L111 were not covered by tests
//noinspection BusyWait
Thread.sleep(backOffMillis);
retryCount++;
} catch (final IOException | InterruptedException ie) {
throw new GATKException("Error attempting to sleep between retry attempts", ie);
}

Check warning on line 117 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L113-L117

Added lines #L113 - L117 were not covered by tests
}
}

Check warning on line 119 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L119

Added line #L119 was not covered by tests
}
}

Check warning on line 121 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java

View check run for this annotation

Codecov / codecov/patch

src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L121

Added line #L121 was not covered by tests
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package org.broadinstitute.hellbender.tools.gvs.common;

import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.engine.GATKTool;
import org.broadinstitute.hellbender.engine.ReferenceDataSource;
import org.broadinstitute.hellbender.tools.walkers.annotator.*;
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.*;

import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

public abstract class ExtractTool extends GATKTool {
public static final int DEFAULT_LOCAL_SORT_MAX_RECORDS_IN_RAM = 1000000;
protected VariantAnnotatorEngine annotationEngine;
protected ReferenceDataSource reference;

@Argument(
fullName = "project-id",
doc = "ID of the Google Cloud project to use when executing queries",
optional = true
)
protected String projectID = null;

@Argument(
fullName = "dataset-id",
doc = "ID of the Google Cloud dataset to use when executing queries",
optional = true // I guess, but won't it break otherwise or require that a dataset be created with the name temp_tables?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be optional or required?

)
protected String datasetID = null;

@Argument(
fullName = "sample-table",
doc = "Fully qualified name of a bigquery table containing a single column `sample` that describes the full list of samples to extract",
optional = true,
mutex={"sample-file"}
)
protected String sampleTableName = null;

@Argument(
fullName = "sample-file",
doc = "Alternative to `sample-table`. Pass in a (sample_id,sample_name) CSV that describes the full list of samples to extract. No header",
optional = true,
mutex={"sample-table"}

)
Comment on lines +41 to +47
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
@Argument(
fullName = "sample-file",
doc = "Alternative to `sample-table`. Pass in a (sample_id,sample_name) CSV that describes the full list of samples to extract. No header",
optional = true,
mutex={"sample-table"}
)
@Argument(
fullName = "sample-file",
doc = "Alternative to `sample-table`. Pass in a (sample_id,sample_name) CSV that describes the full list of samples to extract. No header",
optional = true,
mutex={"sample-table"}
)

protected File sampleFileName = null;

@Argument(
fullName = "print-debug-information",
doc = "If true, print extra debugging output",
optional = true)
Comment on lines +50 to +53
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
@Argument(
fullName = "print-debug-information",
doc = "If true, print extra debugging output",
optional = true)
@Argument(
fullName = "print-debug-information",
doc = "If true, print extra debugging output",
optional = true
)

protected boolean printDebugInformation = false;

@Argument(
fullName = "local-sort-max-records-in-ram",
doc = "When doing local sort, store at most this many records in memory at once",
optional = true
)
protected int localSortMaxRecordsInRam = DEFAULT_LOCAL_SORT_MAX_RECORDS_IN_RAM;

@Argument(
fullName = "ref-version",
doc = "Remove this option!!!! only for ease of testing. Valid options are 37 or 38",
optional = true
)
protected String refVersion = "37";

@Argument(
fullName = "min-location",
doc = "When extracting data, only include locations >= this value",
optional = true
)
protected Long minLocation = null;

@Argument(
fullName = "max-location",
doc = "When extracting data, only include locations <= this value",
optional = true
)
protected Long maxLocation = null;

@Override
public boolean requiresReference() {
return true;
}

@Override
public boolean useVariantAnnotations() {
return true;
}

@Override
public List<Annotation> getDefaultVariantAnnotations() {
return Arrays.asList(
// All the `AS_StandardAnnotation` implementers minus `AS_InbreedingCoeff`.
new AS_FisherStrand(),
new AS_StrandOddsRatio(),
new AS_BaseQualityRankSumTest(),
new AS_MappingQualityRankSumTest(),
new AS_ReadPosRankSumTest(),
new AS_RMSMappingQuality(),
new AS_QualByDepth(),

// All the `StandardAnnotation` implementers minus `InbreedingCoeff` and `ExcessHet`.
new BaseQualityRankSumTest(),
new ChromosomeCounts(),
new Coverage(),
new DepthPerAlleleBySample(),
new FisherStrand(),
new MappingQualityRankSumTest(),
new QualByDepth(),
new RMSMappingQuality(),
new ReadPosRankSumTest(),
new StrandOddsRatio()
);
}

@Override
protected void onStartup() {
super.onStartup();

//TODO verify what we really need here
annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), null, Collections.emptyList(), false, false);
Comment on lines +124 to +125
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do we really need here?


ChromosomeEnum.setRefVersion(refVersion);

reference = directlyAccessEngineReferenceDataSource();
}
}
Loading