diff --git a/build.gradle b/build.gradle index ae89bd6566e..6aeac65eac7 100644 --- a/build.gradle +++ b/build.gradle @@ -60,14 +60,14 @@ repositories { final htsjdkVersion = System.getProperty('htsjdk.version','4.1.1') final picardVersion = System.getProperty('picard.version','3.2.0') final barclayVersion = System.getProperty('barclay.version','5.0.0') -final sparkVersion = System.getProperty('spark.version', '3.5.0') -final hadoopVersion = System.getProperty('hadoop.version', '3.3.6') +final sparkVersion = System.getProperty('spark.version', '3.5.1') +final hadoopVersion = System.getProperty('hadoop.version', '3.4.0') final disqVersion = System.getProperty('disq.version','0.3.8') final genomicsdbVersion = System.getProperty('genomicsdb.version','1.5.3') final bigQueryVersion = System.getProperty('bigQuery.version', '2.35.0') final bigQueryStorageVersion = System.getProperty('bigQueryStorage.version', '2.47.0') final guavaVersion = System.getProperty('guava.version', '32.1.3-jre') -final log4j2Version = System.getProperty('log4j2Version', '2.17.1') +final log4j2Version = System.getProperty('log4j2Version', '2.23.1') final testNGVersion = '7.0.0' final googleCloudNioDependency = 'com.google.cloud:google-cloud-nio:0.127.8' @@ -170,12 +170,19 @@ configurations.all { // force the htsjdk version so we don't get a different one transitively force 'com.github.samtools:htsjdk:' + htsjdkVersion force 'com.google.protobuf:protobuf-java:3.23.4' + force 'dnsjava:dnsjava:3.6.1' // force testng dependency so we don't pick up a different version via GenomicsDB force 'org.testng:testng:' + testNGVersion force 'org.broadinstitute:barclay:' + barclayVersion force 'com.twitter:chill_2.12:0.10.0' force 'org.apache.commons:commons-math3:3.5' + force 'org.apache.avro:avro:1.11.3' + force 'io.airlift:aircompressor:0.27' + force 'org.apache.commons:commons-compress:1.26.0' + force 'ch.qos.logback:logback-classic:1.2.13' + force 'ch.qos.logback:logback-core:1.2.13' + force 'net.minidev:json-smart:2.5.0' // make sure we don't pick up an incorrect version of the GATK variant of the google-nio library // via Picard, etc. force googleCloudNioDependency @@ -185,6 +192,7 @@ configurations.all { configurations*.exclude group: 'org.slf4j', module: 'slf4j-jdk14' //exclude this to prevent slf4j complaining about to many slf4j bindings configurations*.exclude group: 'com.google.guava', module: 'guava-jdk5' configurations*.exclude group: 'junit', module: 'junit' + configurations*.exclude group: 'log4j', module: 'log4j' } tasks.withType(JavaCompile) { @@ -232,6 +240,15 @@ configurations { } dependencies { + implementation('net.minidev:json-smart:2.4.9') { + exclude group: 'net.minidev', module: 'json-smart' + } + implementation('dnsjava:dnsjava:3.6.0') { + exclude group: 'dnsjava', module: 'dnsjava' + } + // Example dependencies + implementation 'biz.aQute.bnd:biz.aQute.bndlib:5.1.2' + implementation 'org.scala-lang:scala-library:2.13.14' implementation ('org.freemarker:freemarker:2.3.30') implementation 'org.broadinstitute:barclay:' + barclayVersion @@ -299,7 +316,7 @@ dependencies { } // TODO: migrate to mllib_2.12.15? - implementation ('org.apache.spark:spark-mllib_2.12:' + sparkVersion) { + implementation ('org.apache.spark:spark-mllib_2.13:' + sparkVersion) { // JUL is used by Google Dataflow as the backend logger, so exclude jul-to-slf4j to avoid a loop exclude module: 'jul-to-slf4j' exclude module: 'javax.servlet' @@ -518,6 +535,7 @@ tasks.withType(ShadowJar) { mergeServiceFiles() relocate 'com.google.common', 'org.broadinstitute.hellbender.relocated.com.google.common' zip64 true + exclude 'META-INF/maven/com.google.protobuf/protobuf-java/**' exclude 'log4j.properties' // from adam jar as it clashes with hellbender's log4j2.xml exclude '**/*.SF' // these are Manifest signature files and exclude '**/*.RSA' // keys which may accidentally be imported from other signed projects and then fail at runtime diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescer.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescer.java index fc1105c7d14..8c6274b9c69 100644 --- a/src/main/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescer.java +++ b/src/main/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescer.java @@ -4,7 +4,8 @@ import org.apache.spark.rdd.PartitionCoalescer; import org.apache.spark.rdd.PartitionGroup; import org.apache.spark.rdd.RDD; -import scala.collection.JavaConversions; +// import scala.collection.JavaConversions; +import scala.jdk.javaapi.CollectionConverters; import scala.collection.Seq; import java.io.Serializable; @@ -14,7 +15,7 @@ /** * A {@link PartitionCoalescer} that allows a range of partitions to be coalesced into groups. */ -class RangePartitionCoalescer implements PartitionCoalescer, Serializable, scala.Serializable { +class RangePartitionCoalescer implements PartitionCoalescer, Serializable { private static final long serialVersionUID = 1L; @@ -45,7 +46,8 @@ public PartitionGroup[] coalesce(int maxPartitions, RDD parent) { PartitionGroup group = new PartitionGroup(preferredLocation); List partitionsInGroup = partitions.subList(i, maxEndPartitionIndexes.get(i) + 1); - group.partitions().append(JavaConversions.asScalaBuffer(partitionsInGroup)); + // group.partitions().append(CollectionConverters.asScala(partitionsInGroup).toSeq()); + group.partitions().addAll(CollectionConverters.asScala(partitionsInGroup).toList()); groups[i] = group; } return groups; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSBuildReferenceTaxonomyUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSBuildReferenceTaxonomyUtils.java index eb4a7687080..43e57b6fd78 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSBuildReferenceTaxonomyUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/pathseq/PSBuildReferenceTaxonomyUtils.java @@ -313,13 +313,13 @@ public static BufferedReader getBufferedReaderTarGz(final String tarPath, final try { InputStream result = null; final TarArchiveInputStream tarStream = new TarArchiveInputStream(new GZIPInputStream(new FileInputStream(tarPath))); - TarArchiveEntry entry = tarStream.getNextTarEntry(); + TarArchiveEntry entry = tarStream.getNextEntry(); while (entry != null) { if (entry.getName().equals(fileName)) { result = tarStream; break; } - entry = tarStream.getNextTarEntry(); + entry = tarStream.getNextEntry(); } if (result == null) { throw new UserException.BadInput("Could not find file " + fileName + " in tarball " + tarPath); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java index 1ac964daeac..ca4bd73ade4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java @@ -41,7 +41,8 @@ import org.broadinstitute.hellbender.utils.io.IOUtils; import org.broadinstitute.hellbender.utils.read.GATKRead; import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter; -import scala.Serializable; +// import scala.Serializable; +import java.io.Serializable; import java.io.IOException; import java.nio.file.Paths; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java index 167b0a8611d..bf597fa9c8e 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java @@ -468,7 +468,7 @@ private static void extractFilesFromArchiveStream(final TarArchiveInputStream ar // Go through the archive and get the entries: TarArchiveEntry entry; - while ((entry = archiveStream.getNextTarEntry()) != null) { + while ((entry = archiveStream.getNextEntry()) != null) { logger.info("Extracting file: " + entry.getName()); @@ -549,7 +549,8 @@ private static void addToTar(TarArchiveOutputStream out, File file, String dir) if (file.isFile()){ out.putArchiveEntry(new TarArchiveEntry(file, entry)); try (FileInputStream in = new FileInputStream(file)){ - org.apache.commons.compress.utils.IOUtils.copy(in, out); + // org.apache.commons.compress.utils.IOUtils.copy(in, out); + org.apache.commons.io.IOUtils.copy(in, out); } out.closeArchiveEntry(); } else if (file.isDirectory()) { diff --git a/src/main/java/org/broadinstitute/hellbender/utils/logging/OneShotLogger.java b/src/main/java/org/broadinstitute/hellbender/utils/logging/OneShotLogger.java index d94c3747b3a..ea688b9afd7 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/logging/OneShotLogger.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/logging/OneShotLogger.java @@ -3,7 +3,7 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.logging.log4j.util.Supplier; +import java.util.function.Supplier; /** * A logger wrapper class which only outputs the first warning provided to it diff --git a/src/main/java/org/broadinstitute/hellbender/utils/runtime/CapturedStreamOutput.java b/src/main/java/org/broadinstitute/hellbender/utils/runtime/CapturedStreamOutput.java index c2c565b8601..695cd487e99 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/runtime/CapturedStreamOutput.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/runtime/CapturedStreamOutput.java @@ -48,7 +48,7 @@ public CapturedStreamOutput(OutputStreamSettings settings, InputStream processSt } else { outputStream = new HardThresholdingOutputStream(bufferSize) { @Override - protected OutputStream getStream() { + protected OutputStream getOutputStream() { return bufferTruncated ? NullOutputStream.INSTANCE : bufferStream; } diff --git a/src/main/resources/large/cnn_score_variants/1d_cnn_mix_train_full_bn.hd5 b/src/main/resources/large/cnn_score_variants/1d_cnn_mix_train_full_bn.hd5 index 48581550b03..bb211b160c7 100644 Binary files a/src/main/resources/large/cnn_score_variants/1d_cnn_mix_train_full_bn.hd5 and b/src/main/resources/large/cnn_score_variants/1d_cnn_mix_train_full_bn.hd5 differ diff --git a/src/main/resources/large/cnn_score_variants/1d_cnn_mix_train_full_bn.json b/src/main/resources/large/cnn_score_variants/1d_cnn_mix_train_full_bn.json index 666a73dfb9a..f9a2e5a3de2 100644 --- a/src/main/resources/large/cnn_score_variants/1d_cnn_mix_train_full_bn.json +++ b/src/main/resources/large/cnn_score_variants/1d_cnn_mix_train_full_bn.json @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eda2517817b23238c2b28f69a1fa39e9b85b45985854f0a5d5508280e76da39e -size 519 +{"input_tensor_map": {"A": 0, "C": 1, "T": 3, "G": 2}, "data_dir": "/dsde/data/deep/vqsr/tensors/mix_1d/", "input_tensor_map_name": "reference", "input_annotations": ["MQ", "DP", "SOR", "FS", "QD", "MQRankSum", "ReadPosRankSum"], "input_annotation_set": "annotations", "output_labels": {"NOT_SNP": 0, "INDEL": 3, "SNP": 2, "NOT_INDEL": 1}, "window_size": 128, "architecture": "1d_cnn_mix_train_full_bn.hd5", "read_limit": 128, "input_symbols": {"A": 0, "C": 1, "*": 4, "T": 3, "G": 2}, "id": "1d_cnn_mix_train_full_bn"} \ No newline at end of file diff --git a/src/main/resources/large/cnn_score_variants/2d_cnn_mix_train.hd5 b/src/main/resources/large/cnn_score_variants/2d_cnn_mix_train.hd5 index c7d8916236d..9488f71212f 100644 Binary files a/src/main/resources/large/cnn_score_variants/2d_cnn_mix_train.hd5 and b/src/main/resources/large/cnn_score_variants/2d_cnn_mix_train.hd5 differ diff --git a/src/main/resources/large/cnn_score_variants/2d_cnn_mix_train.json b/src/main/resources/large/cnn_score_variants/2d_cnn_mix_train.json index 8efe4242f46..8f26483c9b3 100644 --- a/src/main/resources/large/cnn_score_variants/2d_cnn_mix_train.json +++ b/src/main/resources/large/cnn_score_variants/2d_cnn_mix_train.json @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4ed7feb0343e9ac03135b1456b2c8d2edab1b359c4950908c4d44152c0634a89 -size 732 +{"input_tensor_map": {"read_T": 3, "flag_bit_5": 11, "reference_T": 8, "flag_bit_7": 13, "flag_bit_6": 12, "mapping_quality": 14, "read_G": 2, "read_C": 1, "read_A": 0, "reference_*": 9, "reference_G": 7, "flag_bit_4": 10, "reference_A": 5, "read_*": 4, "reference_C": 6}, "data_dir": "/dsde/data/deep/vqsr/tensors/mix_big_ref_read_anno/", "input_tensor_map_name": "read_tensor", "input_annotations": ["MQ", "DP", "SOR", "FS", "QD", "MQRankSum", "ReadPosRankSum"], "input_annotation_set": "gatk", "output_labels": {"NOT_SNP": 0, "INDEL": 3, "SNP": 2, "NOT_INDEL": 1}, "window_size": 128, "architecture": "2d_cnn_mix_train.hd5", "read_limit": 128, "input_symbols": {"A": 0, "C": 1, "*": 4, "T": 3, "G": 2}, "id": "2d_cnn_mix_train"} diff --git a/src/main/resources/large/cnn_score_variants/small_2d.hd5 b/src/main/resources/large/cnn_score_variants/small_2d.hd5 index deb36d22e04..cd296b98fa2 100644 Binary files a/src/main/resources/large/cnn_score_variants/small_2d.hd5 and b/src/main/resources/large/cnn_score_variants/small_2d.hd5 differ diff --git a/src/main/resources/large/cnn_score_variants/small_2d.json b/src/main/resources/large/cnn_score_variants/small_2d.json index c35cfbdfcae..2705637a092 100644 --- a/src/main/resources/large/cnn_score_variants/small_2d.json +++ b/src/main/resources/large/cnn_score_variants/small_2d.json @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e38e09cfe7b7ffbc80dce4972bc9c382148520147d46738a3f6f3235b2d876c6 -size 758 +{"id": "nova_hiseq_mix_small_tf", "output_labels": {"NOT_SNP": 0, "NOT_INDEL": 1, "SNP": 2, "INDEL": 3}, "architecture": "small_2d.hd5", "input_symbols": {"A": 0, "C": 1, "G": 2, "T": 3, "*": 4}, "input_tensor_map_name": "read_tensor", "input_tensor_map": {"read_A": 0, "read_C": 1, "read_G": 2, "read_T": 3, "read_*": 4, "reference_A": 5, "reference_C": 6, "reference_G": 7, "reference_T": 8, "reference_*": 9, "flag_bit_4": 10, "flag_bit_5": 11, "flag_bit_6": 12, "flag_bit_7": 13, "mapping_quality": 14}, "window_size": 128, "read_limit": 128, "input_annotations": ["MQ", "DP", "SOR", "FS", "QD", "MQRankSum", "ReadPosRankSum"], "input_annotation_set": "best_practices", "data_dir": "/tensors/g94794_wgs1_hg38_full_channels_last/", "channels_last": true} diff --git a/src/main/resources/large/testResourceFile.txt b/src/main/resources/large/testResourceFile.txt index 7b4e6dc4075..b70095dd5bf 100644 --- a/src/main/resources/large/testResourceFile.txt +++ b/src/main/resources/large/testResourceFile.txt @@ -1,3 +1 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5e69a86f301ab9ab0d507ad7659abb4ad3732382ccbeb714db497e51eb3cf87b -size 29 +this is a test resource file diff --git a/src/test/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescerUnitTest.java b/src/test/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescerUnitTest.java index 118c72736e7..bb664b6cb59 100644 --- a/src/test/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescerUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescerUnitTest.java @@ -8,7 +8,7 @@ import org.broadinstitute.hellbender.GATKBaseTest; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; -import scala.collection.JavaConversions; +import scala.jdk.javaapi.CollectionConverters; import java.util.List; @@ -38,9 +38,9 @@ public void testIdentity() { RangePartitionCoalescer coalescer = new RangePartitionCoalescer(maxEndPartitionIndexes); PartitionGroup[] groups = coalescer.coalesce(rdd.getNumPartitions(), rdd.rdd()); assertEquals(groups.length, 3); - assertEquals(groups[0].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[0]))); - assertEquals(groups[1].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[1]))); - assertEquals(groups[2].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[2]))); + assertEquals(groups[0].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[0]))); + assertEquals(groups[1].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[1]))); + assertEquals(groups[2].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[2]))); } @Test @@ -49,8 +49,8 @@ public void testNonIdentity() { RangePartitionCoalescer coalescer = new RangePartitionCoalescer(maxEndPartitionIndexes); PartitionGroup[] groups = coalescer.coalesce(rdd.getNumPartitions(), rdd.rdd()); assertEquals(groups.length, 3); - assertEquals(groups[0].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[0], partitions[1]))); - assertEquals(groups[1].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[1], partitions[2]))); - assertEquals(groups[2].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[2]))); + assertEquals(groups[0].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[0], partitions[1]))); + assertEquals(groups[1].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[1], partitions[2]))); + assertEquals(groups[2].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[2]))); } }