Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into kojix2-elfutils
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Sep 23, 2024
2 parents b093eea + 8d35cdb commit d4493aa
Show file tree
Hide file tree
Showing 132 changed files with 8,117 additions and 3,141 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ include/
obj/
cpp/
.pre-build
src/vg_git_version.hpp
src/vg_environment_version.hpp
*.o
vg
*.a
Expand All @@ -24,6 +26,8 @@ test/**/*.index/
trash
src/*.gch
.vscode
# Temporary subcommand used for debugging something
debug_main.cpp
# Ignore a bunch of files people might dump in the root when testing
/*.vg
/*.gcsa
Expand All @@ -43,4 +47,4 @@ src/*.gch
/*.svg
/*.fa
/*.gfa
.vscode/*
.vscode/*
7 changes: 3 additions & 4 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ before_script:
- sudo apt-get -q -y update
# Make sure we have some curl stuff for pycurl which we need for some Python stuff
# And the CI report upload needs uuidgen from uuid-runtime
- sudo apt-get -q -y install --no-upgrade docker.io python3-pip python3-virtualenv libcurl4-gnutls-dev python-dev npm nodejs node-gyp uuid-runtime libgnutls28-dev doxygen libzstd-dev
- sudo apt-get -q -y install --no-upgrade docker.io python3-pip python3-virtualenv libcurl4-gnutls-dev python-dev npm nodejs node-gyp uuid-runtime libgnutls28-dev doxygen libzstd-dev bcftools
- which junit-merge || sudo npm install -g junit-merge
# Configure Docker to use a mirror for Docker Hub and restart the daemon
- |
Expand Down Expand Up @@ -122,8 +122,7 @@ build-job:
- PLATFORMS=linux/amd64
- THREADS=8
- DOCKER_TAG=ci-${CI_PIPELINE_IID}-${CI_COMMIT_SHA}
- make include/vg_git_version.hpp
- cat include/vg_git_version.hpp
- make version
# Connect so we can upload our images
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
# Note that A LOCAL CACHE CAN ONLY HOLD ONE TAG/TARGET AT A TIME!
Expand Down Expand Up @@ -165,7 +164,7 @@ production-build-job:
- THREADS=8 # Oversubscribe since the ARM build will take way longer anyway.
# Determine what we should be tagging vg Dockers as. If we're running on a Git tag we want to use that. Otherwise push over the tag we made already.
- if [[ ! -z "${CI_COMMIT_TAG}" ]]; then DOCKER_TAG="${CI_COMMIT_TAG}" ; else DOCKER_TAG="ci-${CI_PIPELINE_IID}-${CI_COMMIT_SHA}"; fi
- make include/vg_git_version.hpp
- make version
# Make sure ARM emulation is available.
- if [[ "${CI_BUILDKIT_DRIVER}" != "kubernetes" ]] ; then docker run --privileged --rm tonistiigi/binfmt --install all || true ; fi
# TODO: deduplicate this code with normal build above
Expand Down
16 changes: 6 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ FROM base AS build
ARG THREADS=8
ARG TARGETARCH

# If you didn't `make version` berfore building the Docker, you can provide a
# version value here to claim to be.
ARG VG_GIT_VERSION
ENV VG_GIT_VERSION=${VG_GIT_VERSION:-unknown}

RUN echo build > /stage.txt

RUN apt-get -qq -y update && \
Expand Down Expand Up @@ -56,22 +61,13 @@ RUN find . -name CMakeCache.txt | xargs rm -f
COPY Makefile /vg/Makefile
RUN . ./source_me.sh && CXXFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" CFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" make -j $((THREADS < $(nproc) ? THREADS : $(nproc))) deps

# Bring in the sources, which we need in order to build
# Bring in the sources, which we need in order to build.
COPY src /vg/src

# Build all the object files for vg, but don't link.
# Also pass the arch here
RUN . ./source_me.sh && CXXFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" make -j $((THREADS < $(nproc) ? THREADS : $(nproc))) objs

# Bring in any includes we pre-made, like the git version, if present
COPY include /vg/include

# Make sure version introspection is up to date
RUN rm -f obj/version.o && . ./source_me.sh && CXXFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" make -j $((THREADS < $(nproc) ? THREADS : $(nproc))) obj/version.o

# Announce the version file, which must exist by now
RUN ls /vg/include && cat /vg/include/vg_git_version.hpp

# Do the final build and link, knowing the version. Trim down the resulting binary but make sure to include enough debug info for profiling.
RUN . ./source_me.sh && CXXFLAGS="$(if [ -z "${TARGETARCH}" ] || [ "${TARGETARCH}" = "amd64" ] ; then echo " -march=nehalem "; fi)" make -j $((THREADS < $(nproc) ? THREADS : $(nproc))) static && strip -d bin/vg

Expand Down
222 changes: 129 additions & 93 deletions Makefile

Large diffs are not rendered by default.

101 changes: 81 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,18 @@ Paths provide stable coordinates for graphs built in different ways from the sam

![example variation graph](https://raw.githubusercontent.com/vgteam/vg/master/doc/figures/smallgraph.png)

## Citing VG

Please cite:

* [The VG Paper](https://doi.org/10.1038/nbt.4227) when using `vg`
* [The VG Giraffe Paper](https://doi.org/10.1126/science.abg8871) when using `vg giraffe`
* [The VG Call Paper](https://doi.org/10.1186/s13059-020-1941-7) when SV genotyping with `vg call`
* [The GBZ Paper](https://doi.org/10.1093/bioinformatics/btac656) when using GBZ
* [The HPRC Paper](https://doi.org/10.1038/s41586-023-05896-x) when using `vg deconstruct`
* [The Snarls Paper](https://doi.org/10.1089/cmb.2017.0251) when using `vg snarls`
* [The Personalized Pangenome Paper](https://doi.org/10.1101/2023.12.13.571553) when using `vg haplotypes` and/or `vg giraffe --haplotype-name`

## Support

We maintain a support forum on biostars: https://www.biostars.org/tag/vg/
Expand All @@ -43,27 +55,41 @@ The easiest way to get vg is to download one of our release builds for Linux. We

If you don't want to or can't use a pre-built release of vg, or if you want to become a vg developer, you can build it from source instead.

#### Linux: Clone VG

First, obtain the repo and its submodules:

git clone --recursive https://github.com/vgteam/vg.git
cd vg

#### Linux: Install Dependencies

Then, install VG's dependencies. You'll need the protobuf and jansson development libraries installed, and to run the tests you will need:
* `jq`, `bc`, `rs`, and `parallel`
* `hexdump` and `column` from `bsdmainutils`
* [`npm` for testing documentation examples](https://github.com/anko/txm)).
* `jq`, `bc`, `rs`, and `parallel`
* `hexdump` and `column` from `bsdmainutils`
* [`npm` for testing documentation examples](https://github.com/anko/txm)).

On Ubuntu, you should be able to do:

make get-deps

If you get complaints that `sudo` is not found, install it:

apt update
apt install sudo

If you get a bunch of errors like `E: Unable to locate package build-essential`, make sure your package index files are up to date by running:

sudo apt update

On other distros, you will need to perform the equivalent of:
On other distros, or if you do not have root access, you will need to perform the equivalent of:

sudo apt-get install build-essential git cmake pkg-config libncurses-dev libbz2-dev \
protobuf-compiler libprotoc-dev libprotobuf-dev libjansson-dev \
automake gettext autopoint libtool jq bsdmainutils bc rs parallel \
npm curl unzip redland-utils librdf-dev bison flex gawk lzma-dev \
liblzma-dev liblz4-dev libffi-dev libcairo-dev libboost-all-dev \
libzstd-devel pybind11-dev python3-pybind11
libzstd-dev pybind11-dev python3-pybind11
Note that **Ubuntu 16.04** does not ship a sufficiently new Protobuf; vg requires **Protobuf 3** which will have to be manually installed.

Expand All @@ -73,22 +99,47 @@ Other libraries may be required. Please report any build difficulties.

Note that a 64-bit OS is required. Ubuntu 20.04 should work.

When you are ready, build with `. ./source_me.sh && make`, and run with `./bin/vg`.
#### Linux: Build

When you are ready, build with `. ./source_me.sh && make`. You can use `make -j16` to run 16 build threads at a time, which greatly accelerates the process. If you have more CPU cores, you can use higher numbers.

Note that vg can take anywhere from 10 minutes to more than an hour to compile depending on your machine and the number of threads used.

You can also produce a static binary with `make static`, assuming you have static versions of all the dependencies installed on your system.

#### Linux: Run

Once vg is built, the binary will be at `bin/vg` inside the vg repository directory. You can run it with:

```
./bin/vg
```

You can also add its directory to your `PATH` enviornment variable, so that you can invoke `vg` from any directory. To do that on Bash, use this command from the vg repository directory:

```
echo 'export PATH="${PATH}:'"$(pwd)"'/bin"' >>~/.bashrc
```

Then close your terminal and open a new one. Run `vg` to make sure it worked.

If it did not work, make sure that you have a `.bash_profile` file in your home directory that will run your `.bashrc`:
```
if [ -f ~/.bashrc ]; then
source ~/.bashrc
fi
```

### Building on MacOS

#### Clone VG
#### Mac: Clone VG

The first step is to clone the vg repository:

git clone --recursive https://github.com/vgteam/vg.git
cd vg

#### Install Dependencies
#### Mac: Install Dependencies

VG depends on a number of packages being installed on the system where it is being built. Dependencies can be installed using either [MacPorts](https://www.macports.org/install.php) or [Homebrew](http://brew.sh/).

Expand All @@ -106,17 +157,35 @@ Homebrew provides another package management solution for OSX, and may be prefer
# Install all the dependencies in the Brewfile
brew bundle

#### Build
#### Mac: Build

With dependencies installed, VG can now be built:

. ./source_me.sh && make

As with Linux, you can add `-j16` or other numbers at the end to run multiple build tasks at once, if your computer can handle them.

**Note that static binaries cannot yet be built for Mac.**

Our team has successfully built vg on Mac with GCC versions 4.9, 5.3, 6, 7, and 7.3, as well as Clang 9.0.
The vg Mac build targets whatever the current version of Apple Clang is, and whatever version of Apple Clang is provided by our Github Actions Mac CI system. If your Clang is up to date and vg does not build for you, please open an issue.

#### Mac: Run

Once vg is built, the binary will be at `bin/vg` inside the vg repository directory. You can run it with:

```
./bin/vg
```

#### Migrating to ARM Macs
You can also add its directory to your `PATH` enviornment variable, so that you can invoke `vg` from any directory. To do that on the default `zsh` Mac shell, use this command from the vg repository directory:

```
echo 'export PATH="${PATH}:'"$(pwd)"'/bin"' >>~/.zshrc
```

Then close your terminal and open a new one. Run `vg` to make sure it worked.

##### Migrate a VG installation from x86 to ARM

The Mac platform is moving to ARM, with Apple's M1, M1 Pro, M1 Max, and subsequent chip designs. The vg codebase supports ARM on Mac as well as on Linux. **The normal installation instructions work on a factory-fresh ARM Mac**.

Expand Down Expand Up @@ -216,7 +285,7 @@ There are multiple read mappers in `vg`:

* `vg giraffe` is designed to be fast for highly accurate short reads, against graphs with haplotype information.
* `vg map` is a general-purpose read mapper.
* `vg mpmap` does "munti-path" mapping, to allow describing local alignment uncertainty. [This is useful for transcriptomics.](#Transcriptomic-analysis)
* `vg mpmap` does "multi-path" mapping, to allow describing local alignment uncertainty. [This is useful for transcriptomics.](#Transcriptomic-analysis)

#### Mapping with `vg giraffe`

Expand Down Expand Up @@ -384,14 +453,6 @@ vg index hla.vg -x hla.xg
vg deconstruct hla.xg -e -p "gi|568815592:29791752-29792749" > hla_variants.vcf
```

Variants can also be inferred strictly from topology by not using `-e`, though unlike the above example, cycles are not supported. "Deconstruct" the VCF variants that were used to construct the graph. The output will be similar but identical to `small/x.vcf.gz` as `vg construct` can add edges between adjacent alts and/or do some normalization:

<!-- !test check Deconstruct from construct -->
```sh
# using the same graph from the `map` example
vg deconstruct x.xg -p x > x.vcf
```

Haplotype paths from `.gbz` or `.gbwt` indexes input can be considered using `-z` and `-g', respectively.

As with `vg call`, it is best to compute snarls separately and pass them in with `-r` when working with large graphs.
Expand Down
2 changes: 1 addition & 1 deletion deps/dozeu
Submodule dozeu updated 5 files
+1 −1 Makefile
+416 −52 dozeu.h
+21 −18 example.2bit.c
+21 −18 example.c
+21 −18 example.protein.c
2 changes: 1 addition & 1 deletion deps/gbwt
2 changes: 1 addition & 1 deletion deps/gcsa2
2 changes: 1 addition & 1 deletion deps/htslib
Submodule htslib updated 320 files
2 changes: 1 addition & 1 deletion deps/kff-cpp-api
Submodule kff-cpp-api updated 4 files
+1 −0 .gitignore
+5 −3 CMakeLists.txt
+6 −1 kff_io.cpp
+1 −0 main.cpp
2 changes: 1 addition & 1 deletion deps/sublinear-Li-Stephens
2 changes: 1 addition & 1 deletion ontology/vg.html
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ <h2>
<td><a href="http://www.w3.org/2000/01/rdf-schema#comment">
rdfs:comment</a></td>
<td>
"A step along a path in the variant graph. A series of steps along a path represent an assembled sequence that was originally inserted into the the variant graph. A step points to a :Node or the reverse complement of a node and has a rank (step number)."
"A step along a path in the variant graph. A series of steps along a path represent an assembled sequence that was originally inserted into the variant graph. A step points to a :Node or the reverse complement of a node and has a rank (step number)."
<sup><a href="http://www.w3.org/1999/02/22-rdf-syntax-ns#http://www.w3.org/2001/XMLSchema#string">
xsd:string</a></sup>
</td>
Expand Down
2 changes: 1 addition & 1 deletion ontology/vg.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
.
:Step
rdf:type owl:Class ;
rdfs:comment "A step along a path in the variant graph. A series of steps along a path represent an assembled sequence that was originally inserted into the the variant graph. A step points to a :Node or the reverse complement of a node and has a rank (step number)."^^xsd:string ;
rdfs:comment "A step along a path in the variant graph. A series of steps along a path represent an assembled sequence that was originally inserted into the variant graph. A step points to a :Node or the reverse complement of a node and has a rank (step number)."^^xsd:string ;
rdfs:label "Step"^^xsd:string ;
rdfs:subClassOf owl:Thing ;
.
Expand Down
6 changes: 3 additions & 3 deletions scripts/mcmc_Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ clean:
rm -r $(TOIL_OS)

CHR21.fa:
wget https://courtyard.gi.ucsc.edu/~anovak/vg-data/bakeoff/CHR21.fa
wget https://public.gi.ucsc.edu/~anovak/vg-data/bakeoff/CHR21.fa

1kg_hg19-CHR21.vcf.gz:
wget https://courtyard.gi.ucsc.edu/~anovak/vg-data/bakeoff/1kg_hg19-CHR21.vcf.gz
wget https://public.gi.ucsc.edu/~anovak/vg-data/bakeoff/1kg_hg19-CHR21.vcf.gz

1kg_hg19-CHR21.vcf.gz.tbi:
wget https://courtyard.gi.ucsc.edu/~anovak/vg-data/bakeoff/1kg_hg19-CHR21.vcf.gz.tbi
wget https://public.gi.ucsc.edu/~anovak/vg-data/bakeoff/1kg_hg19-CHR21.vcf.gz.tbi
Loading

1 comment on commit d4493aa

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch kojix2-elfutils. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17386 seconds

Please sign in to comment.