broadinstitute · vilay-nference · Jul 11, 2024 · Aug 13, 2024 · Aug 14, 2024 · Aug 14, 2024
diff --git a/Dockerfile.nfer b/Dockerfile.nfer
@@ -0,0 +1,40 @@
+# Use the specified base image
+FROM hz-registry.nferx.com/ubuntu as builder
+
+# Set the working directory in the container
+WORKDIR /usr/src/app
+
+# Install required packages and dependencies
+RUN apt-get update && \
+    apt-get install -y \
+    openjdk-17-jdk \
+    python3-pip \
+    python3-venv \
+    git \
+    build-essential \
+    libtool \
+    zlib1g-dev \
+    liblzma-dev \
+    git-lfs && \
+    rm -rf /var/lib/apt/lists/*
+
+# Set up Python environment and install packages
+RUN python3 -m venv venv && \
+    . venv/bin/activate && \
+    pip install --upgrade pip setuptools wheel && \
+    pip install Flask pandas && \
+    pip cache purge && \
+    ln -s /usr/bin/python3 /usr/bin/python &&\
+    git lfs install
+
+# Copy the application files
+COPY . gatk/
+
+# Build the GATK jar file
+RUN cd gatk && \
+    ./gradlew localJar && \
+    mv build/libs/gatk-package* /usr/src/app/gatk.jar && \
+    cd .. && \
+    rm -rf gatk/ && \
+    rm -rf /root/.gradle && \
+    apt-get remove -y git-lfs
diff --git a/build.gradle b/build.gradle
@@ -60,14 +60,14 @@ repositories {
 final htsjdkVersion = System.getProperty('htsjdk.version','4.1.1')
 final picardVersion = System.getProperty('picard.version','3.2.0')
 final barclayVersion = System.getProperty('barclay.version','5.0.0')
-final sparkVersion = System.getProperty('spark.version', '3.5.0')
-final hadoopVersion = System.getProperty('hadoop.version', '3.3.6')
+final sparkVersion = System.getProperty('spark.version', '3.5.1')
+final hadoopVersion = System.getProperty('hadoop.version', '3.4.0')
 final disqVersion = System.getProperty('disq.version','0.3.8')
 final genomicsdbVersion = System.getProperty('genomicsdb.version','1.5.3')
 final bigQueryVersion = System.getProperty('bigQuery.version', '2.35.0')
 final bigQueryStorageVersion = System.getProperty('bigQueryStorage.version', '2.47.0')
 final guavaVersion = System.getProperty('guava.version', '32.1.3-jre')
-final log4j2Version = System.getProperty('log4j2Version', '2.17.1')
+final log4j2Version = System.getProperty('log4j2Version', '2.23.1')
 final testNGVersion = '7.0.0'
 
 final googleCloudNioDependency = 'com.google.cloud:google-cloud-nio:0.127.8'
@@ -176,6 +176,12 @@ configurations.all {
         force 'com.twitter:chill_2.12:0.10.0'
         force 'org.apache.commons:commons-math3:3.5'
 
+        force 'org.apache.avro:avro:1.11.3'
+        force 'io.airlift:aircompressor:0.27'
+        force 'org.apache.commons:commons-compress:1.26.0'
+        force 'ch.qos.logback:logback-classic:1.2.13'
+        force 'ch.qos.logback:logback-core:1.2.13'
+        force 'net.minidev:json-smart:2.5.0'
         // make sure we don't pick up an incorrect version of the GATK variant of the google-nio library
         // via Picard, etc.
         force googleCloudNioDependency
@@ -185,6 +191,7 @@ configurations.all {
     configurations*.exclude group: 'org.slf4j', module: 'slf4j-jdk14' //exclude this to prevent slf4j complaining about to many slf4j bindings
     configurations*.exclude group: 'com.google.guava', module: 'guava-jdk5'
     configurations*.exclude group: 'junit', module: 'junit'
+    configurations*.exclude group: 'log4j', module: 'log4j'
 }
 
 tasks.withType(JavaCompile) {
@@ -232,6 +239,12 @@ configurations {
 }
 
 dependencies {
+    implementation('net.minidev:json-smart:2.4.9') {
+        exclude group: 'net.minidev', module: 'json-smart'
+    }
+    // Example dependencies
+    implementation 'biz.aQute.bnd:biz.aQute.bndlib:5.1.2'
+    implementation 'org.scala-lang:scala-library:2.13.14'
 
     implementation ('org.freemarker:freemarker:2.3.30')
     implementation 'org.broadinstitute:barclay:' + barclayVersion
@@ -299,7 +312,7 @@ dependencies {
     }
 
     // TODO: migrate to mllib_2.12.15?
-    implementation ('org.apache.spark:spark-mllib_2.12:' + sparkVersion) {
+    implementation ('org.apache.spark:spark-mllib_2.13:' + sparkVersion) {
         // JUL is used by Google Dataflow as the backend logger, so exclude jul-to-slf4j to avoid a loop
         exclude module: 'jul-to-slf4j'
         exclude module: 'javax.servlet'
@@ -518,6 +531,7 @@ tasks.withType(ShadowJar) {
     mergeServiceFiles()
     relocate 'com.google.common', 'org.broadinstitute.hellbender.relocated.com.google.common'
     zip64 true
+    exclude 'META-INF/maven/com.google.protobuf/protobuf-java/**'
     exclude 'log4j.properties' // from adam jar as it clashes with hellbender's log4j2.xml
     exclude '**/*.SF' // these are Manifest signature files and
     exclude '**/*.RSA' // keys which may accidentally be imported from other signed projects and then fail at runtime

diff --git a/src/main/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescer.java b/src/main/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescer.java
@@ -4,7 +4,8 @@
 import org.apache.spark.rdd.PartitionCoalescer;
 import org.apache.spark.rdd.PartitionGroup;
 import org.apache.spark.rdd.RDD;
-import scala.collection.JavaConversions;
+// import scala.collection.JavaConversions;
+import scala.jdk.javaapi.CollectionConverters;
 import scala.collection.Seq;
 
 import java.io.Serializable;
@@ -14,7 +15,7 @@
 /**
  * A {@link PartitionCoalescer} that allows a range of partitions to be coalesced into groups.
  */
-class RangePartitionCoalescer implements PartitionCoalescer, Serializable, scala.Serializable {
+class RangePartitionCoalescer implements PartitionCoalescer, Serializable {
 
     private static final long serialVersionUID = 1L;
 
@@ -45,7 +46,8 @@ public PartitionGroup[] coalesce(int maxPartitions, RDD<?> parent) {
             PartitionGroup group = new PartitionGroup(preferredLocation);
             List<Partition> partitionsInGroup =
                     partitions.subList(i, maxEndPartitionIndexes.get(i) + 1);
-            group.partitions().append(JavaConversions.asScalaBuffer(partitionsInGroup));
+            // group.partitions().append(CollectionConverters.asScala(partitionsInGroup).toSeq());
+            group.partitions().addAll(CollectionConverters.asScala(partitionsInGroup).toList());
             groups[i] = group;
         }
         return groups;

diff --git a/...java/org/broadinstitute/hellbender/tools/spark/pathseq/PSBuildReferenceTaxonomyUtils.java b/...java/org/broadinstitute/hellbender/tools/spark/pathseq/PSBuildReferenceTaxonomyUtils.java
@@ -313,13 +313,13 @@ public static BufferedReader getBufferedReaderTarGz(final String tarPath, final
         try {
             InputStream result = null;
             final TarArchiveInputStream tarStream = new TarArchiveInputStream(new GZIPInputStream(new FileInputStream(tarPath)));
-            TarArchiveEntry entry = tarStream.getNextTarEntry();
+            TarArchiveEntry entry = tarStream.getNextEntry();
             while (entry != null) {
                 if (entry.getName().equals(fileName)) {
                     result = tarStream;
                     break;
                 }
-                entry = tarStream.getNextTarEntry();
+                entry = tarStream.getNextEntry();
             }
             if (result == null) {
                 throw new UserException.BadInput("Could not find file " + fileName + " in tarball " + tarPath);

diff --git a/...g/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java b/...g/broadinstitute/hellbender/tools/spark/sv/StructuralVariationDiscoveryPipelineSpark.java
@@ -41,7 +41,8 @@
 import org.broadinstitute.hellbender.utils.io.IOUtils;
 import org.broadinstitute.hellbender.utils.read.GATKRead;
 import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter;
-import scala.Serializable;
+// import scala.Serializable;
+import java.io.Serializable;
 
 import java.io.IOException;
 import java.nio.file.Paths;

diff --git a/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/io/IOUtils.java
@@ -468,7 +468,7 @@ private static void extractFilesFromArchiveStream(final TarArchiveInputStream ar
 
         // Go through the archive and get the entries:
         TarArchiveEntry entry;
-        while ((entry = archiveStream.getNextTarEntry()) != null) {
+        while ((entry = archiveStream.getNextEntry()) != null) {
 
             logger.info("Extracting file: " + entry.getName());
 
@@ -549,7 +549,8 @@ private static void addToTar(TarArchiveOutputStream out, File file, String dir)
         if (file.isFile()){
             out.putArchiveEntry(new TarArchiveEntry(file, entry));
             try (FileInputStream in = new FileInputStream(file)){
-                org.apache.commons.compress.utils.IOUtils.copy(in, out);
+                // org.apache.commons.compress.utils.IOUtils.copy(in, out);
+                org.apache.commons.io.IOUtils.copy(in, out);
             }
             out.closeArchiveEntry();
         } else if (file.isDirectory()) {

diff --git a/src/main/java/org/broadinstitute/hellbender/utils/logging/OneShotLogger.java b/src/main/java/org/broadinstitute/hellbender/utils/logging/OneShotLogger.java
@@ -3,7 +3,7 @@
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
-import org.apache.logging.log4j.util.Supplier;
+import java.util.function.Supplier;
 
 /**
  * A logger wrapper class which only outputs the first warning provided to it

diff --git a/src/main/java/org/broadinstitute/hellbender/utils/runtime/CapturedStreamOutput.java b/src/main/java/org/broadinstitute/hellbender/utils/runtime/CapturedStreamOutput.java
@@ -48,7 +48,7 @@ public CapturedStreamOutput(OutputStreamSettings settings, InputStream processSt
                     } else {
                         outputStream = new HardThresholdingOutputStream(bufferSize) {
                             @Override
-                            protected OutputStream getStream() {
+                            protected OutputStream getOutputStream() {
                                 return bufferTruncated ? NullOutputStream.INSTANCE : bufferStream;
                             }
 

diff --git a/...test/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescerUnitTest.java b/...test/java/org/broadinstitute/hellbender/engine/spark/RangePartitionCoalescerUnitTest.java
@@ -8,7 +8,7 @@
 import org.broadinstitute.hellbender.GATKBaseTest;
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;
-import scala.collection.JavaConversions;
+import scala.jdk.javaapi.CollectionConverters;
 
 import java.util.List;
 
@@ -38,9 +38,9 @@ public void testIdentity() {
         RangePartitionCoalescer coalescer = new RangePartitionCoalescer(maxEndPartitionIndexes);
         PartitionGroup[] groups = coalescer.coalesce(rdd.getNumPartitions(), rdd.rdd());
         assertEquals(groups.length, 3);
-        assertEquals(groups[0].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[0])));
-        assertEquals(groups[1].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[1])));
-        assertEquals(groups[2].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[2])));
+        assertEquals(groups[0].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[0])));
+        assertEquals(groups[1].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[1])));
+        assertEquals(groups[2].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[2])));
     }
 
     @Test
@@ -49,8 +49,8 @@ public void testNonIdentity() {
         RangePartitionCoalescer coalescer = new RangePartitionCoalescer(maxEndPartitionIndexes);
         PartitionGroup[] groups = coalescer.coalesce(rdd.getNumPartitions(), rdd.rdd());
         assertEquals(groups.length, 3);
-        assertEquals(groups[0].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[0], partitions[1])));
-        assertEquals(groups[1].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[1], partitions[2])));
-        assertEquals(groups[2].partitions(), JavaConversions.asScalaBuffer(ImmutableList.of(partitions[2])));
+        assertEquals(groups[0].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[0], partitions[1])));
+        assertEquals(groups[1].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[1], partitions[2])));
+        assertEquals(groups[2].partitions(), CollectionConverters.asScala(ImmutableList.of(partitions[2])));
     }
 }