Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into split-out-band-padding
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Jul 19, 2024
2 parents 25ad71b + 2d34df2 commit 011e1fe
Show file tree
Hide file tree
Showing 10 changed files with 280 additions and 159 deletions.
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,9 @@ ifeq ($(shell uname -s),Darwin)
START_STATIC =
END_STATIC =

# We need to use special flags to let us rename libraries
LD_RENAMEABLE_FLAGS = -Wl,-headerpad -Wl,-headerpad_max_install_names
# We need to use special flags to let us rename libraries.
# See <https://github.com/Homebrew/legacy-homebrew/issues/20233>
LD_RENAMEABLE_FLAGS = -Wl,-headerpad_max_install_names
else
# We are not running on OS X
$(info OS is Linux)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ As with Linux, you can add `-j16` or other numbers at the end to run multiple bu

**Note that static binaries cannot yet be built for Mac.**

Our team has successfully built vg on Mac with GCC versions 4.9, 5.3, 6, 7, and 7.3, as well as Clang 9.0.
The vg Mac build targets whatever the current version of Apple Clang is, and whatever version of Apple Clang is provided by our Github Actions Mac CI system. If your Clang is up to date and vg does not build for you, please open an issue.

#### Mac: Run

Expand Down
11 changes: 7 additions & 4 deletions src/minimizer_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -589,10 +589,13 @@ vector<Alignment> MinimizerMapper::map_from_extensions(Alignment& aln) {
return aln.sequence();
});


// Minimizers sorted by score in descending order.
std::vector<Minimizer> minimizers = this->find_minimizers(aln.sequence(), funnel);

// Minimizers sorted by position
std::vector<Minimizer> minimizers_in_read = this->find_minimizers(aln.sequence(), funnel);
// Indexes of minimizers, sorted into score order, best score first
std::vector<size_t> minimizer_score_order = sort_minimizers_by_score(minimizers_in_read);
// Minimizers sorted by best score first
VectorView<Minimizer> minimizers{minimizers_in_read, minimizer_score_order};

// Find the seeds and mark the minimizers that were located.
vector<Seed> seeds = this->find_seeds(minimizers, aln, funnel);

Expand Down
352 changes: 223 additions & 129 deletions src/multipath_alignment_graph.cpp

Large diffs are not rendered by default.

19 changes: 12 additions & 7 deletions src/multipath_alignment_graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,11 @@ namespace vg {
/// order, even if this MultipathAlignmentGraph is. You MUST sort it
/// with topologically_order_subpaths() before trying to run DP on it.
void align(const Alignment& alignment, const HandleGraph& align_graph, const GSSWAligner* aligner, bool score_anchors_as_matches,
size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier, bool simplify_topologies,
size_t unmergeable_len, size_t band_padding, multipath_alignment_t& multipath_aln_out, SnarlManager* cutting_snarls = nullptr,
SnarlDistanceIndex* dist_index = nullptr, const function<pair<id_t, bool>(id_t)>* project = nullptr,
bool allow_negative_scores = false, unordered_map<handle_t, bool>* left_align_strand = nullptr);
size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier, size_t max_tail_length,
bool simplify_topologies, size_t unmergeable_len, size_t band_padding, multipath_alignment_t& multipath_aln_out,
SnarlManager* cutting_snarls = nullptr, SnarlDistanceIndex* dist_index = nullptr,
const function<pair<id_t, bool>(id_t)>* project = nullptr, bool allow_negative_scores = false,
unordered_map<handle_t, bool>* left_align_strand = nullptr);

/// Do intervening and tail alignments between the anchoring paths and
/// store the result in a multipath_alignment_t. Reachability edges must
Expand All @@ -212,8 +213,8 @@ namespace vg {
/// order, even if this MultipathAlignmentGraph is. You MUST sort it
/// with topologically_order_subpaths() before trying to run DP on it.
void align(const Alignment& alignment, const HandleGraph& align_graph, const GSSWAligner* aligner, bool score_anchors_as_matches,
size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier, bool simplify_topologies,
size_t unmergeable_len, const function<size_t(const Alignment&,const HandleGraph&)>& band_padding_function,
size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier, size_t max_tail_length,
bool simplify_topologies, size_t unmergeable_len, const function<size_t(const Alignment&,const HandleGraph&)>& band_padding_function,
multipath_alignment_t& multipath_aln_out, SnarlManager* cutting_snarls = nullptr, SnarlDistanceIndex* dist_index = nullptr,
const function<pair<id_t, bool>(id_t)>* project = nullptr, bool allow_negative_scores = false,
unordered_map<handle_t, bool>* left_align_strand = nullptr);
Expand Down Expand Up @@ -312,12 +313,16 @@ namespace vg {
/// Alignments of the tail off of that subpath. Also computes the
/// source subpaths and adds their numbers to the given set if not
/// null.
///
/// If a tail is longer than max_tail_length, produces an alignment
/// softclipping it.
///
/// If dynamic alignment count is also selected, can indicate a minimum number
/// of paths that must be in the extending graph in order to do an alignment
unordered_map<bool, unordered_map<size_t, vector<Alignment>>>
align_tails(const Alignment& alignment, const HandleGraph& align_graph, const GSSWAligner* aligner,
size_t max_alt_alns, bool dynamic_alt_alns, size_t max_gap, double pessimistic_tail_gap_multiplier,
size_t min_paths, unordered_set<size_t>* sources = nullptr);
size_t min_paths, size_t max_tail_length, unordered_set<size_t>* sources = nullptr);

/// Removes alignments that follow the same path through the graph, retaining only the
/// highest scoring ones. If deduplicating leftward, then also removes paths that take a
Expand Down
10 changes: 5 additions & 5 deletions src/multipath_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6243,9 +6243,9 @@ namespace vg {

// do the connecting alignments and fill out the multipath_alignment_t object
multi_aln_graph.align(alignment, *align_dag, aligner, true, num_alt_alns, dynamic_max_alt_alns, max_alignment_gap,
use_pessimistic_tail_alignment ? pessimistic_gap_multiplier : 0.0, simplify_topologies,
max_tail_merge_supress_length, choose_band_padding, multipath_aln_out, snarl_manager,
distance_index, &translator);
use_pessimistic_tail_alignment ? pessimistic_gap_multiplier : 0.0, std::numeric_limits<size_t>::max(),
simplify_topologies, max_tail_merge_supress_length, choose_band_padding, multipath_aln_out,
snarl_manager, distance_index, &translator);

// Note that we do NOT topologically order the multipath_alignment_t. The
// caller has to do that, after it is finished breaking it up into
Expand Down Expand Up @@ -6294,8 +6294,8 @@ namespace vg {

// do the connecting alignments and fill out the multipath_alignment_t object
multi_aln_graph.align(alignment, subgraph, aligner, false, num_alt_alns, dynamic_max_alt_alns, max_alignment_gap,
use_pessimistic_tail_alignment ? pessimistic_gap_multiplier : 0.0, simplify_topologies,
max_tail_merge_supress_length, choose_band_padding, multipath_aln_out);
use_pessimistic_tail_alignment ? pessimistic_gap_multiplier : 0.0, std::numeric_limits<size_t>::max(),
simplify_topologies, max_tail_merge_supress_length, choose_band_padding, multipath_aln_out);

for (size_t j = 0; j < multipath_aln_out.subpath_size(); j++) {
translate_oriented_node_ids(*multipath_aln_out.mutable_subpath(j)->mutable_path(), translator);
Expand Down
24 changes: 19 additions & 5 deletions src/subcommand/surject_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ void help_surject(char** argv) {
<< " -b, --bam-output write BAM to stdout" << endl
<< " -s, --sam-output write SAM to stdout" << endl
<< " -l, --subpath-local let the multipath mapping surjection produce local (rather than global) alignments" << endl
<< " -T, --max-tail-len N only align up to N bases of read tails (default: 10000)" << endl
<< " -P, --prune-low-cplx prune short and low complexity anchors during realignment" << endl
<< " -a, --max-anchors N use no more than N anchors per target path (default: 200)" << endl
<< " -S, --spliced interpret long deletions against paths as spliced alignments" << endl
Expand Down Expand Up @@ -96,6 +97,7 @@ int main_surject(int argc, char** argv) {
int min_splice_length = 20;
size_t watchdog_timeout = 10;
bool subpath_global = true; // force full length alignments in mpmap resolution
size_t max_tail_len = 10000;
bool qual_adj = false;
bool prune_anchors = false;
size_t max_anchors = 200;
Expand All @@ -114,7 +116,8 @@ int main_surject(int argc, char** argv) {
{"into-path", required_argument, 0, 'p'},
{"into-paths", required_argument, 0, 'F'},
{"ref-paths", required_argument, 0, 'F'}, // Now an alias for --into-paths
{"subpath-local", required_argument, 0, 'l'},
{"subpath-local", no_argument, 0, 'l'},
{"max-tail-len", required_argument, 0, 'T'},
{"interleaved", no_argument, 0, 'i'},
{"multimap", no_argument, 0, 'M'},
{"gaf-input", no_argument, 0, 'G'},
Expand All @@ -137,7 +140,7 @@ int main_surject(int argc, char** argv) {
};

int option_index = 0;
c = getopt_long (argc, argv, "hx:p:F:liGmcbsN:R:f:C:t:SPa:ALMVw:",
c = getopt_long (argc, argv, "hx:p:F:lT:iGmcbsN:R:f:C:t:SPa:ALMVw:",
long_options, &option_index);

// Detect the end of the options.
Expand All @@ -163,6 +166,10 @@ int main_surject(int argc, char** argv) {
subpath_global = false;
break;

case 'T':
max_tail_len = parse<size_t>(optarg);
break;

case 'i':
interleaved = true;
break;
Expand Down Expand Up @@ -251,6 +258,14 @@ int main_surject(int argc, char** argv) {
}
}

string file_name = get_input_file_name(optind, argc, argv);

if (have_input_file(optind, argc, argv)) {
// We only take one input file.
cerr << "error[vg surject] Extra argument provided: " << get_input_file_name(optind, argc, argv, false) << endl;
exit(1);
}

// Create a preprocessor to apply read group and sample name overrides in place
auto set_metadata = [&](Alignment& update) {
if (!sample_name.empty()) {
Expand All @@ -261,8 +276,6 @@ int main_surject(int argc, char** argv) {
}
};

string file_name = get_input_file_name(optind, argc, argv);

PathPositionHandleGraph* xgidx = nullptr;
unique_ptr<PathHandleGraph> path_handle_graph;
// If we add an overlay for path position queries, use one optimized for
Expand Down Expand Up @@ -303,8 +316,9 @@ int main_surject(int argc, char** argv) {
else {
surjector.min_splice_length = numeric_limits<int64_t>::max();
}
surjector.max_tail_length = max_tail_len;
surjector.annotate_with_all_path_scores = annotate_with_all_path_scores;

// Count our threads
int thread_count = vg::get_thread_count();

Expand Down
1 change: 1 addition & 0 deletions src/surjector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3095,6 +3095,7 @@ using namespace std;
false, // dynamic alt alns
numeric_limits<int64_t>::max(), // max gap
0.0, // pessimistic tail gap multiplier
max_tail_length, // max length of tail to align
false, // simplify topologies
0, // unmergeable len
1, // band padding
Expand Down
3 changes: 3 additions & 0 deletions src/surjector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ using namespace std;

/// the minimum length apparent intron that we will try to repair
int64_t min_splice_repair_length = 250;

/// the maximum length of a tail that we will try to align
size_t max_tail_length = 10000;

/// How big of a graph in bp should we ever try to align against for realigning surjection?
size_t max_subgraph_bases = 100 * 1024;
Expand Down
12 changes: 6 additions & 6 deletions src/unittest/multipath_alignment_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ TEST_CASE( "MultipathAlignmentGraph::align handles tails correctly", "[multipath
mpg.resect_snarls_from_paths(&snarl_manager, identity, 5);
// Make it align, with alignments per gap/tail
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, 5, out);
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, std::numeric_limits<size_t>::max(), false, 5, out);
// Make sure to topologically sort the resulting alignment. TODO: Should
// the MultipathAlignmentGraph guarantee this for us by construction?
Expand Down Expand Up @@ -148,7 +148,7 @@ TEST_CASE( "MultipathAlignmentGraph::align handles tails correctly", "[multipath
SECTION("Handles tails when anchors for them are not generated") {
// Make it align, with alignments per gap/tail
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, 5, out);
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, std::numeric_limits<size_t>::max(), false, 5, out);
// Make sure to topologically sort the resulting alignment. TODO: Should
// the MultipathAlignmentGraph guarantee this for us by construction?
Expand Down Expand Up @@ -208,7 +208,7 @@ TEST_CASE( "MultipathAlignmentGraph::align handles tails correctly", "[multipath
MultipathAlignmentGraph::create_projector(identity), 5);

// Make it align, with alignments per gap/tail
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, false, 0, 5, out);
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, std::numeric_limits<size_t>::max(), false, 0, 5, out);

// Make sure to topologically sort the resulting alignment. TODO: Should
// the MultipathAlignmentGraph guarantee this for us by construction?
Expand Down Expand Up @@ -244,7 +244,7 @@ TEST_CASE( "MultipathAlignmentGraph::align handles tails correctly", "[multipath

SECTION("Handles tails when anchors for them are not generated") {
// Make it align, with alignments per gap/tail
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, false, 0, 5, out);
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, std::numeric_limits<size_t>::max(), false, 0, 5, out);

// Make sure to topologically sort the resulting alignment. TODO: Should
// the MultipathAlignmentGraph guarantee this for us by construction?
Expand Down Expand Up @@ -301,7 +301,7 @@ TEST_CASE( "MultipathAlignmentGraph::align handles tails correctly", "[multipath
MultipathAlignmentGraph::create_projector(identity), 5);

// Make it align, with alignments per gap/tail
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, false, 0, 5, out);
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, std::numeric_limits<size_t>::max(), false, 0, 5, out);

// Make sure to topologically sort the resulting alignment. TODO: Should
// the MultipathAlignmentGraph guarantee this for us by construction?
Expand Down Expand Up @@ -338,7 +338,7 @@ TEST_CASE( "MultipathAlignmentGraph::align handles tails correctly", "[multipath

SECTION("Handles tails when anchors for them are not generated") {
// Make it align, with alignments per gap/tail
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, false, 0, 5, out);
mpg.align(query, vg, &aligner, true, 2, false, 100, 0.0, std::numeric_limits<size_t>::max(), false, 0, 5, out);

// Make sure to topologically sort the resulting alignment. TODO: Should
// the MultipathAlignmentGraph guarantee this for us by construction?
Expand Down

0 comments on commit 011e1fe

Please sign in to comment.