From a0e8e8a0cd11b00fd2c670a098d7d57bf3c886f2 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Wed, 17 Jul 2024 11:13:15 -0400 Subject: [PATCH] Sort minimizers by score in Giraffe single-end mode Apparently in 40bf613207369f2e35d4c5367a7199a2f3bff4ce I meant to keep the minimizers sorted by score in single-end mapping mode, but when https://github.com/vgteam/vg/pull/3810 actually merged it changed the order that find_minimizers returns results in to read order and added a score sort to the paired-end non-chaining codepath but not the single-end non-chaining codepath. This should restore the vg 1.44.0 behavior of taking or not taking identical minimizers all across the read, and also of prioritizing minimizers to take by score, in single-end mode. --- src/minimizer_mapper.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp index 9def875f42e..b8fa8ed8bd8 100644 --- a/src/minimizer_mapper.cpp +++ b/src/minimizer_mapper.cpp @@ -589,10 +589,13 @@ vector MinimizerMapper::map_from_extensions(Alignment& aln) { return aln.sequence(); }); - - // Minimizers sorted by score in descending order. - std::vector minimizers = this->find_minimizers(aln.sequence(), funnel); - + // Minimizers sorted by position + std::vector minimizers_in_read = this->find_minimizers(aln.sequence(), funnel); + // Indexes of minimizers, sorted into score order, best score first + std::vector minimizer_score_order = sort_minimizers_by_score(minimizers_in_read); + // Minimizers sorted by best score first + VectorView minimizers{minimizers_in_read, minimizer_score_order}; + // Find the seeds and mark the minimizers that were located. vector seeds = this->find_seeds(minimizers, aln, funnel);