-
Notifications
You must be signed in to change notification settings - Fork 589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GVS walker to master [VS-964] #8355
Changes from all commits
739c91c
f9899a2
0cc3cb0
f770c36
22da04b
e7b7aba
af6c5b0
bd39e8e
013b11b
dab2446
fa74fc7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
package org.broadinstitute.hellbender.tools.gvs.common; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
public enum ChromosomeEnum { | ||
chr1(1, "1"), | ||
chr2(2, "2"), | ||
chr3(3, "3"), | ||
chr4(4, "4"), | ||
chr5(5, "5"), | ||
chr6(6, "6"), | ||
chr7(7, "7"), | ||
chr8(8, "8"), | ||
chr9(9, "9"), | ||
chr10(10, "10"), | ||
chr11(11, "11"), | ||
chr12(12, "12"), | ||
chr13(13, "13"), | ||
chr14(14, "14"), | ||
chr15(15, "15"), | ||
chr16(16, "16"), | ||
chr17(17, "17"), | ||
chr18(18, "18"), | ||
chr19(19, "19"), | ||
chr20(20, "20"), | ||
chr21(21, "21"), | ||
chr22(22, "22"), | ||
chrX(23, "X"), | ||
chrY(24, "Y"), | ||
chrM(25, "MT"); | ||
|
||
int index; | ||
String v37ContigName; | ||
private static Map<String, ChromosomeEnum> ref37 = new HashMap<>(); | ||
private static final Map<String, ChromosomeEnum> ref38 = new HashMap<>(); | ||
private static final Map<Integer, ChromosomeEnum> decodeValues = new HashMap<>(); | ||
private static Map<String, ChromosomeEnum> currentVersion = null; | ||
|
||
static { | ||
for (ChromosomeEnum contig : ChromosomeEnum.values()) { | ||
ref37.put(contig.v37ContigName, contig); | ||
ref38.put(contig.name(), contig); | ||
decodeValues.put(contig.index, contig); | ||
} | ||
} | ||
|
||
public static void setRefVersion(String refVersion) { | ||
if (refVersion.equals("37")) { | ||
currentVersion = ref37; | ||
} else { | ||
currentVersion = ref38; | ||
} | ||
} | ||
|
||
ChromosomeEnum(int index, String v37identifier) { | ||
this.index = index; | ||
this.v37ContigName = v37identifier; | ||
} | ||
|
||
public static ChromosomeEnum valueOfIndex(int index) { | ||
return decodeValues.get(index); | ||
} | ||
|
||
public static ChromosomeEnum valueOfContig(String contig) { | ||
if (currentVersion == null) { | ||
throw new RuntimeException("must set reference version"); | ||
} else { | ||
return currentVersion.get(contig); | ||
} | ||
} | ||
|
||
public String getContigName() { | ||
if (currentVersion==ref37) { | ||
return v37ContigName; | ||
} else { | ||
return name(); | ||
} | ||
} | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
package org.broadinstitute.hellbender.tools.gvs.common; | ||
|
||
import com.google.api.client.util.ExponentialBackOff; | ||
import com.google.api.core.ApiFuture; | ||
import com.google.cloud.bigquery.storage.v1.*; | ||
import io.grpc.StatusRuntimeException; | ||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.broadinstitute.hellbender.exceptions.GATKException; | ||
import org.json.JSONArray; | ||
import org.json.JSONObject; | ||
|
||
import java.io.IOException; | ||
import java.util.Date; | ||
|
||
import static org.broadinstitute.hellbender.utils.gvs.bigquery.BigQueryUtils.extractCausalStatusRuntimeExceptionOrThrow; | ||
|
||
public class CostObservability { | ||
static final Logger logger = LogManager.getLogger(org.broadinstitute.hellbender.tools.gvs.common.CostObservability.class); | ||
|
||
private final TableName costObservabilityTable; | ||
|
||
public CostObservability(String projectID, String datasetName, String costObservabilityTableName) { | ||
this.costObservabilityTable = TableName.of(projectID, datasetName, costObservabilityTableName); | ||
} | ||
Check warning on line 25 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L23-L25
|
||
|
||
public TableSchema getCostObservabilityTableSchema() { | ||
TableSchema.Builder builder = TableSchema.newBuilder(); | ||
builder.addFields( | ||
TableFieldSchema.newBuilder().setName("call_set_identifier").setType(TableFieldSchema.Type.STRING).setMode(TableFieldSchema.Mode.REQUIRED).build() | ||
Check warning on line 30 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L28-L30
|
||
); | ||
builder.addFields( | ||
TableFieldSchema.newBuilder().setName("step").setType(TableFieldSchema.Type.STRING).setMode(TableFieldSchema.Mode.REQUIRED).build() | ||
Check warning on line 33 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L32-L33
|
||
); | ||
builder.addFields( | ||
TableFieldSchema.newBuilder().setName("call").setType(TableFieldSchema.Type.STRING).setMode(TableFieldSchema.Mode.NULLABLE).build() | ||
Check warning on line 36 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L35-L36
|
||
); | ||
builder.addFields( | ||
TableFieldSchema.newBuilder().setName("shard_identifier").setType(TableFieldSchema.Type.STRING).setMode(TableFieldSchema.Mode.NULLABLE).build() | ||
Check warning on line 39 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L38-L39
|
||
); | ||
builder.addFields( | ||
TableFieldSchema.newBuilder().setName("call_start_timestamp").setType(TableFieldSchema.Type.TIMESTAMP).setMode(TableFieldSchema.Mode.REQUIRED).build() | ||
Check warning on line 42 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L41-L42
|
||
); | ||
builder.addFields( | ||
TableFieldSchema.newBuilder().setName("event_timestamp").setType(TableFieldSchema.Type.TIMESTAMP).setMode(TableFieldSchema.Mode.REQUIRED).build() | ||
Check warning on line 45 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L44-L45
|
||
); | ||
builder.addFields( | ||
TableFieldSchema.newBuilder().setName("event_key").setType(TableFieldSchema.Type.STRING).setMode(TableFieldSchema.Mode.REQUIRED).build() | ||
Check warning on line 48 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L47-L48
|
||
); | ||
builder.addFields( | ||
TableFieldSchema.newBuilder().setName("event_bytes").setType(TableFieldSchema.Type.INT64).setMode(TableFieldSchema.Mode.REQUIRED).build() | ||
Check warning on line 51 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L50-L51
|
||
); | ||
return builder.build(); | ||
} | ||
|
||
public void writeCostObservability(String callSetIdentifier, String step, String call, String shardIdentifier, | ||
Date callStartTimestamp, Date eventTimestamp, String eventKey, long eventBytes) { | ||
final ExponentialBackOff backoff = new ExponentialBackOff.Builder(). | ||
setInitialIntervalMillis(2000). | ||
setMaxIntervalMillis(30000). | ||
setMultiplier(2). | ||
setRandomizationFactor(0.5). | ||
build(); | ||
int retryCount = 0; | ||
int maxRetries = 3; | ||
Check warning on line 65 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L58-L65
|
||
|
||
while (true) { | ||
// This uses the default stream since it (a) commits immediately and (b) doesn't count | ||
// towards the CreateStreamWriter quota | ||
try (JsonStreamWriter writer = | ||
JsonStreamWriter.newBuilder(costObservabilityTable.toString(), getCostObservabilityTableSchema()).build()) { | ||
Check warning on line 71 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L70-L71
|
||
|
||
// Create a JSON object that is compatible with the table schema. | ||
JSONArray jsonArr = new JSONArray(); | ||
JSONObject jsonObject = new JSONObject(); | ||
jsonObject.put("call_set_identifier", callSetIdentifier); | ||
jsonObject.put("step", step); | ||
jsonObject.put("call", call); | ||
jsonObject.put("shard_identifier", shardIdentifier); | ||
jsonObject.put("call_start_timestamp", callStartTimestamp.getTime() * 1000); // google wants this in microseconds since epoch... | ||
jsonObject.put("event_timestamp", eventTimestamp.getTime() * 1000); // google wants this in microseconds since epoch... | ||
jsonObject.put("event_key", eventKey); | ||
jsonObject.put("event_bytes", eventBytes); | ||
jsonArr.put(jsonObject); | ||
Check warning on line 84 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L74-L84
|
||
|
||
ApiFuture<AppendRowsResponse> future = writer.append(jsonArr); | ||
future.get(); | ||
Check warning on line 87 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L86-L87
|
||
|
||
logger.info("Cost Observability for " + callSetIdentifier + "." + step + " appended successfully"); | ||
break; | ||
} catch (Exception e) { | ||
@SuppressWarnings("ThrowableNotThrown") | ||
StatusRuntimeException se = extractCausalStatusRuntimeExceptionOrThrow(e); | ||
|
||
if (retryCount >= maxRetries) { | ||
throw new GATKException("Caught exception writing to BigQuery and " + maxRetries + " write retries are exhausted", e); | ||
} | ||
|
||
switch (se.getStatus().getCode()) { | ||
case ALREADY_EXISTS: | ||
// This is okay, no need to retry | ||
break; | ||
Check warning on line 102 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L102
|
||
case INVALID_ARGUMENT: | ||
case NOT_FOUND: | ||
case OUT_OF_RANGE: | ||
case PERMISSION_DENIED: | ||
throw new GATKException("Caught non-retryable StatusRuntimeException based exception", e); | ||
Check warning on line 107 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L107
|
||
default: | ||
try { | ||
logger.warn("Caught exception writing to BigQuery, " + (maxRetries - retryCount - 1) + " retries remaining.", e); | ||
long backOffMillis = backoff.nextBackOffMillis(); | ||
Check warning on line 111 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L110-L111
|
||
//noinspection BusyWait | ||
Thread.sleep(backOffMillis); | ||
retryCount++; | ||
} catch (final IOException | InterruptedException ie) { | ||
throw new GATKException("Error attempting to sleep between retry attempts", ie); | ||
} | ||
Check warning on line 117 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L113-L117
|
||
} | ||
} | ||
Check warning on line 119 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L119
|
||
} | ||
} | ||
Check warning on line 121 in src/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java Codecov / codecov/patchsrc/main/java/org/broadinstitute/hellbender/tools/gvs/common/CostObservability.java#L121
|
||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,131 @@ | ||||||||||||||||||||||||||||
package org.broadinstitute.hellbender.tools.gvs.common; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
import org.broadinstitute.barclay.argparser.Argument; | ||||||||||||||||||||||||||||
import org.broadinstitute.hellbender.engine.GATKTool; | ||||||||||||||||||||||||||||
import org.broadinstitute.hellbender.engine.ReferenceDataSource; | ||||||||||||||||||||||||||||
import org.broadinstitute.hellbender.tools.walkers.annotator.*; | ||||||||||||||||||||||||||||
import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.*; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
import java.io.File; | ||||||||||||||||||||||||||||
import java.util.Arrays; | ||||||||||||||||||||||||||||
import java.util.Collections; | ||||||||||||||||||||||||||||
import java.util.List; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
public abstract class ExtractTool extends GATKTool { | ||||||||||||||||||||||||||||
public static final int DEFAULT_LOCAL_SORT_MAX_RECORDS_IN_RAM = 1000000; | ||||||||||||||||||||||||||||
protected VariantAnnotatorEngine annotationEngine; | ||||||||||||||||||||||||||||
protected ReferenceDataSource reference; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Argument( | ||||||||||||||||||||||||||||
fullName = "project-id", | ||||||||||||||||||||||||||||
doc = "ID of the Google Cloud project to use when executing queries", | ||||||||||||||||||||||||||||
optional = true | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
protected String projectID = null; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Argument( | ||||||||||||||||||||||||||||
fullName = "dataset-id", | ||||||||||||||||||||||||||||
doc = "ID of the Google Cloud dataset to use when executing queries", | ||||||||||||||||||||||||||||
optional = true // I guess, but won't it break otherwise or require that a dataset be created with the name temp_tables? | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
protected String datasetID = null; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Argument( | ||||||||||||||||||||||||||||
fullName = "sample-table", | ||||||||||||||||||||||||||||
doc = "Fully qualified name of a bigquery table containing a single column `sample` that describes the full list of samples to extract", | ||||||||||||||||||||||||||||
optional = true, | ||||||||||||||||||||||||||||
mutex={"sample-file"} | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
protected String sampleTableName = null; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Argument( | ||||||||||||||||||||||||||||
fullName = "sample-file", | ||||||||||||||||||||||||||||
doc = "Alternative to `sample-table`. Pass in a (sample_id,sample_name) CSV that describes the full list of samples to extract. No header", | ||||||||||||||||||||||||||||
optional = true, | ||||||||||||||||||||||||||||
mutex={"sample-table"} | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
Comment on lines
+41
to
+47
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||
protected File sampleFileName = null; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Argument( | ||||||||||||||||||||||||||||
fullName = "print-debug-information", | ||||||||||||||||||||||||||||
doc = "If true, print extra debugging output", | ||||||||||||||||||||||||||||
optional = true) | ||||||||||||||||||||||||||||
Comment on lines
+50
to
+53
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||
protected boolean printDebugInformation = false; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Argument( | ||||||||||||||||||||||||||||
fullName = "local-sort-max-records-in-ram", | ||||||||||||||||||||||||||||
doc = "When doing local sort, store at most this many records in memory at once", | ||||||||||||||||||||||||||||
optional = true | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
protected int localSortMaxRecordsInRam = DEFAULT_LOCAL_SORT_MAX_RECORDS_IN_RAM; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Argument( | ||||||||||||||||||||||||||||
fullName = "ref-version", | ||||||||||||||||||||||||||||
doc = "Remove this option!!!! only for ease of testing. Valid options are 37 or 38", | ||||||||||||||||||||||||||||
optional = true | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
protected String refVersion = "37"; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Argument( | ||||||||||||||||||||||||||||
fullName = "min-location", | ||||||||||||||||||||||||||||
doc = "When extracting data, only include locations >= this value", | ||||||||||||||||||||||||||||
optional = true | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
protected Long minLocation = null; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Argument( | ||||||||||||||||||||||||||||
fullName = "max-location", | ||||||||||||||||||||||||||||
doc = "When extracting data, only include locations <= this value", | ||||||||||||||||||||||||||||
optional = true | ||||||||||||||||||||||||||||
) | ||||||||||||||||||||||||||||
protected Long maxLocation = null; | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Override | ||||||||||||||||||||||||||||
public boolean requiresReference() { | ||||||||||||||||||||||||||||
return true; | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Override | ||||||||||||||||||||||||||||
public boolean useVariantAnnotations() { | ||||||||||||||||||||||||||||
return true; | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Override | ||||||||||||||||||||||||||||
public List<Annotation> getDefaultVariantAnnotations() { | ||||||||||||||||||||||||||||
return Arrays.asList( | ||||||||||||||||||||||||||||
// All the `AS_StandardAnnotation` implementers minus `AS_InbreedingCoeff`. | ||||||||||||||||||||||||||||
new AS_FisherStrand(), | ||||||||||||||||||||||||||||
new AS_StrandOddsRatio(), | ||||||||||||||||||||||||||||
new AS_BaseQualityRankSumTest(), | ||||||||||||||||||||||||||||
new AS_MappingQualityRankSumTest(), | ||||||||||||||||||||||||||||
new AS_ReadPosRankSumTest(), | ||||||||||||||||||||||||||||
new AS_RMSMappingQuality(), | ||||||||||||||||||||||||||||
new AS_QualByDepth(), | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
// All the `StandardAnnotation` implementers minus `InbreedingCoeff` and `ExcessHet`. | ||||||||||||||||||||||||||||
new BaseQualityRankSumTest(), | ||||||||||||||||||||||||||||
new ChromosomeCounts(), | ||||||||||||||||||||||||||||
new Coverage(), | ||||||||||||||||||||||||||||
new DepthPerAlleleBySample(), | ||||||||||||||||||||||||||||
new FisherStrand(), | ||||||||||||||||||||||||||||
new MappingQualityRankSumTest(), | ||||||||||||||||||||||||||||
new QualByDepth(), | ||||||||||||||||||||||||||||
new RMSMappingQuality(), | ||||||||||||||||||||||||||||
new ReadPosRankSumTest(), | ||||||||||||||||||||||||||||
new StrandOddsRatio() | ||||||||||||||||||||||||||||
); | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
@Override | ||||||||||||||||||||||||||||
protected void onStartup() { | ||||||||||||||||||||||||||||
super.onStartup(); | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
//TODO verify what we really need here | ||||||||||||||||||||||||||||
annotationEngine = new VariantAnnotatorEngine(makeVariantAnnotations(), null, Collections.emptyList(), false, false); | ||||||||||||||||||||||||||||
Comment on lines
+124
to
+125
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do we really need here? |
||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
ChromosomeEnum.setRefVersion(refVersion); | ||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||
reference = directlyAccessEngineReferenceDataSource(); | ||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be optional or required?