Skip to content

Commit

Permalink
feat: honor original algorithm, increase test coverage (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval authored Jul 28, 2024
1 parent 62b9da7 commit db7a512
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 24 deletions.
18 changes: 12 additions & 6 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ jobs:
strategy:
matrix:
suite: [ neodisambiguate ]
java-version: [ 1.8, 11, 17, 21 ]
java-version: [ 8, 11, 17, 21 ]
steps:
- uses: actions/checkout@v2
- uses: actions/setup-java@v1
- uses: actions/checkout@v4
- uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: ${{ matrix.java-version }}
- name: Cache for Scala Dependencies
uses: actions/cache@v2
uses: actions/cache@v4
with:
path: |
~/.mill/download
Expand All @@ -33,5 +34,10 @@ jobs:
- name: Create Code Coverage Report
if: matrix.java-version == '11'
run: |
./mill --no-server --disable-ticker ${{ matrix.suite }}.scoverage.xmlReport
# TODO: build an HTML report and upload as an artifact
./mill --no-server --disable-ticker ${{ matrix.suite }}.scoverage.htmlReport
- name: Upload Code Coverage Report
uses: actions/upload-artifact@v4
if: matrix.java-version == '11'
with:
name: code-coverage
path: out/neodisambiguate/scoverage/htmlReport.dest/
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ object Disambiguate {
}

/** Command line configuration. */
private class NeodisambiguateConf(args: Seq[String]) extends ScallopConf(args) {
private[neodisambiguate] class NeodisambiguateConf(args: Seq[String]) extends ScallopConf(args) {
private val packageName: String = Option(this.getClass.getPackage.getImplementationTitle).getOrElse("neodisambiguate")
private val version: String = Option(this.getClass.getPackage.getImplementationVersion).getOrElse("UNKNOWN")
version(s"$packageName $version\n")
Expand Down Expand Up @@ -168,10 +168,12 @@ object Disambiguate {
IOUtil.setCompressionLevel(conf.compression())
Io.compressionLevel = conf.compression()

// $COVERAGE-OFF$
if (IntelCompressionLibrarySupported) {
BlockCompressedOutputStream.setDefaultDeflaterFactory(new IntelDeflaterFactory)
BlockGunzipper.setDefaultInflaterFactory(new IntelInflaterFactory)
}
// $COVERAGE-ON$

Io.tmpDir = conf.tmpDir()
System.setProperty("java.io.tmpdir", conf.tmpDir().toAbsolutePath.toString)
Expand Down
10 changes: 0 additions & 10 deletions neodisambiguate/src/io/cvbio/neodisambiguate/MathUtil.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,6 @@ object MathUtil {
def countMinBy[B](fn: A => B)(implicit cmp: Ordering[B]): Int = MathUtil.countMin(self.map(fn))
}

/** Implicits for optionally finding the maximum and minimum items in a collection. */
implicit class WithMaxMinOption[A](self: IterableOnce[A]) {

/** Optionally return the maximum item from a container. */
def maxOption(implicit cmp: Ordering[A]): Option[A] = if (self.iterator.isEmpty) None else Some(self.iterator.max)

/** Optionally return the minimum item from a container. */
def minOption(implicit cmp: Ordering[A]): Option[A] = if (self.iterator.isEmpty) None else Some(self.iterator.min)
}

/** Implicits picking the single maximum or single minimum item in a collection. */
implicit class WithPickMaxMinBy[A](self: Seq[A]) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,19 @@ object TemplateOrdering extends FgBioEnum[TemplateOrdering] {
* If neither template is clearly better, then the templates are equivalent.
*/
case object ClassicOrdering extends TemplateOrdering {
// $COVERAGE-OFF$
private def bestAlignmentScore(template: Template): MetricPair[Int] = MetricPair[Int](template, AS)(_ max _)
private def worstAlignmentScore(template: Template): MetricPair[Int] = MetricPair[Int](template, AS)(_ min _)
private def bestNumMismatches(template: Template): MetricPair[Int] = MetricPair[Int](template, NM)(_ min _)
private def worstNumMismatches(template: Template): MetricPair[Int] = MetricPair[Int](template, NM)(_ max _)
// $COVERAGE-ON$

/** Compare two templates using the original published algorithm. */
override def compare(x: Template, y: Template): Int = {
val alignmentScore = (template: Template) => MetricPair[Int](template, AS.toString)(_ max _)
val numMismatches = (template: Template) => MetricPair[Int](template, NM.toString)(_ min _)

var compare = alignmentScore(x).compare(alignmentScore(y))
if (compare == 0) compare = -numMismatches(x).compare(numMismatches(y)) // Negate because less is better.
var compare = bestAlignmentScore(x).compare(bestAlignmentScore(y))
if (compare == 0) worstAlignmentScore(x).compare(worstAlignmentScore(y))
if (compare == 0) compare = -bestNumMismatches(x).compare(bestNumMismatches(y)) // Negate because less is better.
if (compare == 0) compare = -worstNumMismatches(x).compare(worstNumMismatches(y)) // Negate because less is better.
compare
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ object Bams {

/** Advance to the next sequence of templates. */
override def next(): Seq[Template] = {
require(hasNext, "next() called on empty iterator")
val templates = iterators.map(_.next())
require(
templates.map(_.name).distinct.length <= 1,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package io.cvbio.neodisambiguate.metric

import com.fulcrumgenomics.bam.Template
import htsjdk.samtools.SAMTag
import io.cvbio.neodisambiguate.CommonsDef.SamTag
import io.cvbio.neodisambiguate.bam.Bams.ReadOrdinal.{Read1, Read2}
import io.cvbio.neodisambiguate.bam.Bams.TemplateUtil
Expand Down Expand Up @@ -33,6 +34,11 @@ object MetricPair {
)
}

/** Build a [[MetricPair]] from a [[Template]]. A function is required to reduce the tag values to one canonical value. */
def apply[T](template: Template, tag: SAMTag)(fn: (T, T) => T)(implicit cmp: Ordering[T]): MetricPair[T] = {
apply(template = template, tag = tag.toString)(fn = fn)(cmp = cmp)
}

/** Build an empty [[MetricPair]]. */
def empty[T](implicit cmp: Ordering[T]): MetricPair[T] = new MetricPair[T](read1 = None, read2 = None)
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import htsjdk.samtools.SAMSequenceRecord
import htsjdk.samtools.SAMTag.{AS, NM}
import io.cvbio.neodisambiguate.CommonsDef.BamExtension
import io.cvbio.neodisambiguate.DisambiguationStrategy.Classic
import io.cvbio.neodisambiguate.Disambiguate.NeodisambiguateConf
import io.cvbio.neodisambiguate.testing.{TemplateBuilder, UnitSpec}

class DisambiguateTest extends UnitSpec {
Expand Down Expand Up @@ -113,7 +114,7 @@ class DisambiguateTest extends UnitSpec {
val input = builder.toTempFile(deleteOnExit = true)
val prefix = PathUtil.pathTo(dir.toString, more = "insilico")

val disambiguate = new Disambiguate(input = Seq(input), prefix = prefix)
val disambiguate = new Disambiguate(input = Seq(input), prefix = prefix, referenceNames = Seq("hg38"))
disambiguate.execute()

val source = SamSource(PathUtil.pathTo(Seq(prefix, assembly).mkString(".") + BamExtension))
Expand Down Expand Up @@ -152,4 +153,76 @@ class DisambiguateTest extends UnitSpec {

Io.assertWritableDirectory(PathUtil.pathTo(dir.toString, more = "ambiguous-alignments")) // will raise exception if non-existent.
}

it should "write out ambiguous templates to ambiguous-specific BAM files" in {
val humanAssembly: String = "hg38"
val mouseAssembly: String = "mm10"
val dir: DirPath = Io.makeTempDir("disambiguate")
//dir.toFile.deleteOnExit()

val humanBuilder = new SamBuilder(sort = Some(SamOrder.Queryname))
val mouseBuilder = new SamBuilder(sort = Some(SamOrder.Queryname))

val humanPair = humanBuilder.addPair(name = "pair", attrs = Map(NM.toString -> 2, AS.toString -> 32), start1 = 2, start2 = 101)
val mousePair = mouseBuilder.addPair(name = "pair", attrs = Map(NM.toString -> 2, AS.toString -> 32), start1 = 2, start2 = 101)

humanBuilder.header.getSequenceDictionary.getSequences.forEach { seq: SAMSequenceRecord => val _ = seq.setAssembly(humanAssembly) }
mouseBuilder.header.getSequenceDictionary.getSequences.forEach { seq: SAMSequenceRecord => val _ = seq.setAssembly(mouseAssembly) }

val prefix = PathUtil.pathTo(dir.toString, more = "insilico")

val humanBam = humanBuilder.toTempFile(deleteOnExit = false)
val mouseBam = mouseBuilder.toTempFile(deleteOnExit = false)

val disambiguate = new Disambiguate(input = Seq(mouseBam, humanBam), prefix = prefix)
disambiguate.execute()

val humanSource = SamSource(PathUtil.pathTo(Seq(prefix, humanAssembly).mkString(".") + BamExtension))
val mouseSource = SamSource(PathUtil.pathTo(Seq(prefix, mouseAssembly).mkString(".") + BamExtension))

humanSource.iterator.toSeq shouldBe empty
mouseSource.iterator.toSeq shouldBe empty

val ambiguousAlignments = PathUtil.pathTo(dir.toString, more = "ambiguous-alignments")

Io.assertWritableDirectory(ambiguousAlignments) // will raise exception if non-existent.

val ambiguousHumanSource = SamSource(ambiguousAlignments.resolve(PathUtil.replaceExtension(humanBam.getFileName, ".ambiguous" + BamExtension)))
val ambiguousMouseSource = SamSource(ambiguousAlignments.resolve(PathUtil.replaceExtension(mouseBam.getFileName, ".ambiguous" + BamExtension)))

ambiguousHumanSource.map(_.name).toSeq should contain theSameElementsInOrderAs humanPair.map(_.name)
ambiguousMouseSource.map(_.name).toSeq should contain theSameElementsInOrderAs mousePair.map(_.name)
}

"NeoNeodisambiguate" should "run end-to-end via the CLI entrypoint" in {
val humanAssembly: String = "hg38"
val mouseAssembly: String = "mm10"
val dir: DirPath = Io.makeTempDir("disambiguate")
dir.toFile.deleteOnExit()

val humanBuilder = new SamBuilder(sort = Some(SamOrder.Queryname))
val mouseBuilder = new SamBuilder(sort = Some(SamOrder.Queryname))

val _ = humanBuilder.addPair(name = "pair", attrs = Map(NM.toString -> 6, AS.toString -> 32), start1 = 2, start2 = 101)
val mousePair = mouseBuilder.addPair(name = "pair", attrs = Map(NM.toString -> 2, AS.toString -> 32), start1 = 2, start2 = 101)

humanBuilder.header.getSequenceDictionary.getSequences.forEach { seq: SAMSequenceRecord => val _ = seq.setAssembly(humanAssembly) }
mouseBuilder.header.getSequenceDictionary.getSequences.forEach { seq: SAMSequenceRecord => val _ = seq.setAssembly(mouseAssembly) }

val prefix = PathUtil.pathTo(dir.toString, more = "insilico")

val mouseBam = mouseBuilder.toTempFile(deleteOnExit = true)
val humanBam = humanBuilder.toTempFile(deleteOnExit = true)

val conf = new NeodisambiguateConf(args = Seq("--input", mouseBam.toString, humanBam.toString, "--output", prefix.toString))
Disambiguate.main(Array.from(conf.args))

val humanSource = SamSource(PathUtil.pathTo(Seq(prefix, humanAssembly).mkString(".") + BamExtension))
val mouseSource = SamSource(PathUtil.pathTo(Seq(prefix, mouseAssembly).mkString(".") + BamExtension))

humanSource.iterator.toSeq shouldBe empty
mouseSource.map(_.name).toSeq should contain theSameElementsInOrderAs mousePair.map(_.name)

Io.assertWritableDirectory(PathUtil.pathTo(dir.toString, more = "ambiguous-alignments")) // will raise exception if non-existent.
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ class BamsTest extends UnitSpec {
iterator.hasNext shouldBe false
}

it should "raise an exception when empty and next() is called" in {
val builder = new SamBuilder(sort = Some(SamOrder.Queryname))
val iterator = Bams.templatesIterator(builder.toSource)
an[NoSuchElementException] shouldBe thrownBy { iterator.next() }
}

it should "accept a single valid SamSource with more than zero alignments" in {
val builder = new SamBuilder(sort = Some(SamOrder.Queryname))
builder.addPair(name = "pair1", start1 = 1, start2 = 2)
Expand Down

0 comments on commit db7a512

Please sign in to comment.