diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index 8ecec213094a..5b4cbfc92f08 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -56,24 +56,44 @@ jobs: working-directory: ./packages/framework run: | bash -l -c "./get_dependencies.sh --container" - - name: Generate PR cmake fragment + - name: PullRequestLinuxDriverTest.py + shell: bash -l {0} working-directory: /home/Trilinos/build run: | - bash -l -c "source ${GITHUB_WORKSPACE}/packages/framework/GenConfig/gen-config.sh rhel8_gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables --force --cmake-fragment PR-configuration.cmake ${GITHUB_WORKSPACE}" - - name: Generate enable packages cmake fragment - working-directory: /home/Trilinos/build - run: | - bash -l -c "TRILINOS_DIR=${GITHUB_WORKSPACE} ${GITHUB_WORKSPACE}/commonTools/framework/get-changed-trilinos-packages.sh HEAD origin/develop package-enables.cmake ctest-labels.cmake" || bash -l -c "cat TribitsDumpDepsXmlScript.log" - - name: configure trilinos - working-directory: /home/Trilinos/build - run: | - bash -l -c "cmake -C PR-configuration.cmake -C package-enables.cmake ${GITHUB_WORKSPACE}" - - name: build trilinos - working-directory: /home/Trilinos/build - run: bash -l -c "ninja -j16" - - name: ctest - working-directory: /home/Trilinos/build - run: bash -l -c "ctest -j16" + # Set up python-is-python3 hackery + mkdir bin + pushd bin + ln -s $(type -p python3) python + export PATH=$(pwd):${PATH} + popd + + export TRILINOS_DIR=${GITHUB_WORKSPACE:?} + export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/GenConfig + export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/pr_tools + + sed -i "/\[rhel8\]/a `cat /etc/hostname`" /home/runner/_work/Trilinos/Trilinos/packages/framework/ini-files/supported-systems.ini + printf "\n\n\n" + + echo "image: ${AT2_IMAGE:-unknown}" + + python3 ${GITHUB_WORKSPACE}/packages/framework/pr_tools/PullRequestLinuxDriverTest.py \ + --source-repo-url ${GITHUB_WORKSPACE} \ + --target-repo-url ${GITHUB_WORKSPACE} \ + --target-branch-name ${{ github.event.pull_request.base.ref }} \ + --pullrequest-build-name PR-${{ github.event.pull_request.number }} \ + --genconfig-build-name rhel8_gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables \ + --pullrequest-number ${{ github.event.pull_request.number }} \ + --jenkins-job-number 0 \ + --pullrequest-env-config-file ${GITHUB_WORKSPACE}/packages/framework/pr_tools/trilinos_pr.ini \ + --pullrequest-gen-config-file ${GITHUB_WORKSPACE}/packages/framework/GenConfig/src/gen-config.ini \ + --workspace-dir /home/runner/_work/Trilinos \ + --source-dir ${GITHUB_WORKSPACE} \ + --build-dir /home/Trilinos/build \ + --dashboard-build-name `cat /etc/hostname` \ + --ctest-driver /home/runner/_work/Trilinos/Trilinos/cmake/SimpleTesting/cmake/ctest-driver.cmake \ + --ctest-drop-site sems-cdash-son.sandia.gov/cdash \ + --filename-subprojects ./package_subproject_list.cmake \ + --filename-packageenables ./packageEnables.cmake gcc830-serial-EXPERIMENTAL: runs-on: [self-hosted, gcc-8.3.0_serial] @@ -114,22 +134,120 @@ jobs: working-directory: ./packages/framework run: | bash -l -c "./get_dependencies.sh --container" - - name: Generate PR cmake fragment + - name: PullRequestLinuxDriverTest.py + shell: bash -l {0} working-directory: /home/Trilinos/build run: | - bash -l -c "source ${GITHUB_WORKSPACE}/packages/framework/GenConfig/gen-config.sh rhel8_gcc-serial_release-debug_shared_no-kokkos-arch_no-asan_no-complex_no-fpic_no-mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables --force --cmake-fragment PR-configuration.cmake ${GITHUB_WORKSPACE}" - - name: Generate enable packages cmake fragment - working-directory: /home/Trilinos/build + # Set up python-is-python3 hackery + mkdir bin + pushd bin + ln -s $(type -p python3) python + export PATH=$(pwd):${PATH} + popd + + export TRILINOS_DIR=${GITHUB_WORKSPACE:?} + export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/GenConfig + export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/pr_tools + + sed -i "/\[rhel8\]/a `cat /etc/hostname`" /home/runner/_work/Trilinos/Trilinos/packages/framework/ini-files/supported-systems.ini + printf "\n\n\n" + + echo "image: ${AT2_IMAGE:-unknown}" + + python3 ${GITHUB_WORKSPACE}/packages/framework/pr_tools/PullRequestLinuxDriverTest.py \ + --source-repo-url ${GITHUB_WORKSPACE} \ + --target-repo-url ${GITHUB_WORKSPACE} \ + --target-branch-name ${{ github.event.pull_request.base.ref }} \ + --pullrequest-build-name PR-${{ github.event.pull_request.number }} \ + --genconfig-build-name rhel8_gcc-serial_release-debug_shared_no-kokkos-arch_no-asan_no-complex_no-fpic_no-mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables \ + --pullrequest-number ${{ github.event.pull_request.number }} \ + --jenkins-job-number 0 \ + --pullrequest-env-config-file ${GITHUB_WORKSPACE}/packages/framework/pr_tools/trilinos_pr.ini \ + --pullrequest-gen-config-file ${GITHUB_WORKSPACE}/packages/framework/GenConfig/src/gen-config.ini \ + --workspace-dir /home/runner/_work/Trilinos \ + --source-dir ${GITHUB_WORKSPACE} \ + --build-dir /home/Trilinos/build \ + --dashboard-build-name `cat /etc/hostname` \ + --ctest-driver /home/runner/_work/Trilinos/Trilinos/cmake/SimpleTesting/cmake/ctest-driver.cmake \ + --ctest-drop-site sems-cdash-son.sandia.gov/cdash \ + --filename-subprojects ./package_subproject_list.cmake \ + --filename-packageenables ./packageEnables.cmake + + cuda11-uvm-EXPERIMENTAL: + runs-on: [self-hosted, cuda-11.4.2_gcc-10.3.0_openmpi-4.1.6] + if: ${{ github.event.action == 'synchronize' || github.event.action == 'opened' || github.event.review.state == 'APPROVED' }} + steps: + - name: env + env: + GITHUB_CONTEXT: ${{ toJson(github) }} run: | - bash -l -c "TRILINOS_DIR=${GITHUB_WORKSPACE} ${GITHUB_WORKSPACE}/commonTools/framework/get-changed-trilinos-packages.sh HEAD origin/develop package-enables.cmake ctest-labels.cmake" || bash -l -c "cat TribitsDumpDepsXmlScript.log" - - name: configure trilinos - working-directory: /home/Trilinos/build + env + - name: module list + shell: bash run: | - bash -l -c "cmake -C PR-configuration.cmake -C package-enables.cmake ${GITHUB_WORKSPACE}" - - name: build trilinos - working-directory: /home/Trilinos/build - run: bash -l -c "ninja -j16" - - name: ctest + bash -l -c "module list" + printenv PATH + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 + with: + access_token: ${{ github.token }} + - name: make dirs + working-directory: / + run: | + mkdir -p /home/Trilinos/src/Trilinos + mkdir -p /home/Trilinos/build + - name: Clone trilinos + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + with: + fetch-depth: 0 + - name: Repo status + run: | + git fetch --all + pwd + ls -lhat + git status + git branch -vv + git branch -a + - name: get dependencies + working-directory: ./packages/framework + run: | + bash -l -c "./get_dependencies.sh --container" + - name: PullRequestLinuxDriverTest.py + shell: bash -l {0} working-directory: /home/Trilinos/build - run: bash -l -c "ctest -j16" + run: | + mkdir bin + pushd bin + ln -s $(type -p python3) python + export PATH=$(pwd):${PATH} + popd + + export TRILINOS_DIR=${GITHUB_WORKSPACE:?} + export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/GenConfig + export PYTHONPATH=${PYTHONPATH}:${GITHUB_WORKSPACE}/packages/framework/pr_tools + + sed -i "/\[rhel8\]/a `cat /etc/hostname`" /home/runner/_work/Trilinos/Trilinos/packages/framework/ini-files/supported-systems.ini + printf "\n\n\n" + echo "image: ${AT2_IMAGE:-unknown}" + type python + python3 ${GITHUB_WORKSPACE}/packages/framework/pr_tools/PullRequestLinuxDriverTest.py \ + --source-repo-url ${GITHUB_WORKSPACE} \ + --target-repo-url ${GITHUB_WORKSPACE} \ + --target-branch-name ${{ github.event.pull_request.base.ref }} \ + --pullrequest-build-name PR-${{ github.event.pull_request.number }} \ + --genconfig-build-name rhel8_cuda-gcc-openmpi_release_static_Ampere80_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables \ + --pullrequest-number ${{ github.event.pull_request.number }} \ + --jenkins-job-number 0 \ + --pullrequest-env-config-file ${GITHUB_WORKSPACE}/packages/framework/pr_tools/trilinos_pr.ini \ + --pullrequest-gen-config-file ${GITHUB_WORKSPACE}/packages/framework/GenConfig/src/gen-config.ini \ + --workspace-dir /home/runner/_work/Trilinos \ + --source-dir ${GITHUB_WORKSPACE} \ + --build-dir /home/Trilinos/build \ + --dashboard-build-name `cat /etc/hostname` \ + --ctest-driver /home/runner/_work/Trilinos/Trilinos/cmake/SimpleTesting/cmake/ctest-driver.cmake \ + --ctest-drop-site sems-cdash-son.sandia.gov/cdash \ + --filename-subprojects ./package_subproject_list.cmake \ + --filename-packageenables ./packageEnables.cmake \ + --max-cores-allowed=96 \ + --num-concurrent-tests=96 diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index 1aa6c7249eb5..4a094e22f91f 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -4,7 +4,7 @@ on: [pull_request] permissions: contents: read - issues: write + pull-requests: write jobs: build: diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index c97fa64481e6..4a195bbf4e60 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -62,7 +62,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@9fdb3e49720b44c48891d036bb502feb25684276 # v3.25.6 + uses: github/codeql-action/init@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # v3.25.8 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -85,6 +85,6 @@ jobs: make -j 2 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@9fdb3e49720b44c48891d036bb502feb25684276 # v3.25.6 + uses: github/codeql-action/analyze@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # v3.25.8 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 7141d0933730..b9113179704b 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -17,11 +17,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0 + uses: step-security/harden-runner@17d0e2bd7d51742c71671bd19fa12bdc9d40a3d6 # v2.8.1 with: egress-policy: audit - name: 'Checkout Repository' uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - name: 'Dependency Review' - uses: actions/dependency-review-action@0c155c5e8556a497adf53f2c18edabf945ed8e70 # v4.3.2 + uses: actions/dependency-review-action@72eb03d02c7872a771aacd928f3123ac62ad6d3a # v4.3.3 diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index f6cefad9914b..ba2ac51e5ef4 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@9fdb3e49720b44c48891d036bb502feb25684276 # v3.25.6 + uses: github/codeql-action/upload-sarif@2e230e8fe0ad3a14a340ad0815ddb96d599d2aff # v3.25.8 with: sarif_file: results.sarif diff --git a/cmake/SimpleTesting/cmake/ctest-driver.cmake b/cmake/SimpleTesting/cmake/ctest-driver.cmake index 124b3d70381d..9eb4e7e9d07b 100644 --- a/cmake/SimpleTesting/cmake/ctest-driver.cmake +++ b/cmake/SimpleTesting/cmake/ctest-driver.cmake @@ -19,6 +19,10 @@ if(STAGE_CONFIGURE_ERROR OR STAGE_BUILD_ERROR OR STAGE_TEST_ERROR OR STAGE_COVER message(FATAL_ERROR "STAGE_CONFIGURE_ERROR: ${STAGE_CONFIGURE_ERROR}, STAGE_BUILD_ERROR: ${STAGE_BUILD_ERROR}, STAGE_TEST_ERROR: ${STAGE_TEST_ERROR}, STAGE_COVERAGE_ERROR: ${STAGE_COVERAGE_ERROR}") endif() +message(">>> CDash URL1 = ${build_url1}") +message(">>> CDash URL2 = ${build_url2}") +message(">>> CDash URL3 = ${build_url3}") + message("+--------------------------------------+") message("| ctest-driver.cmake FINISH |") -message("+--------------------------------------+") \ No newline at end of file +message("+--------------------------------------+") diff --git a/cmake/ctest/drivers/enigma/CMakeLists.txt b/cmake/ctest/drivers/enigma/CMakeLists.txt deleted file mode 100644 index 6b1c8e808fd9..000000000000 --- a/cmake/ctest/drivers/enigma/CMakeLists.txt +++ /dev/null @@ -1,100 +0,0 @@ -TRILINOS_DRIVER_SETUP() - -TRILINOS_DRIVER_ADD_DASHBOARD( - OPENMPI_1.10.1_RELEASE_DEFAULT - ctest_linux_nightly_mpi_release_muelu_enigma.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 -) - -TRILINOS_DRIVER_ADD_DASHBOARD( - OPENMPI_1.10.1_DEBUG_NO-SUPERLU_BASKER - ctest_linux_nightly_mpi_debug_muelu_basker_enigma.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - ) - -TRILINOS_DRIVER_ADD_DASHBOARD( - OPENMPI_1.10.1_DEBUG_NO-SUPERLU_KLU2 - ctest_linux_nightly_mpi_debug_muelu_klu2_enigma.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - ) - -TRILINOS_DRIVER_ADD_DASHBOARD( - SERIAL_DEBUG_LONG-LONG - ctest_linux_nightly_mpi_debug_muelu_extratypes_ei_enigma.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - ) - -TRILINOS_DRIVER_ADD_DASHBOARD( - SERIAL_DEBUG_LONG-LONG_COMPLEX-DOUBLE - ctest_linux_nightly_serial_debug_muelu_extratypes_enigma.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - ) - -TRILINOS_DRIVER_ADD_DASHBOARD( - SERIAL_RELEASE_EXPERIMENTAL - ctest_linux_nightly_serial_release_muelu_experimental_enigma.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - ) - -TRILINOS_DRIVER_ADD_DASHBOARD( - OPENMPI-1.10.1_RELEASE_NO-SERIAL_OPENMP_COMPLEX_EXPERIMENTAL - ctest_linux_mpi_release_no_serial_openmp_complex_experimental_enigma.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - ) - -TRILINOS_DRIVER_ADD_DASHBOARD( - OPENMPI-1.10.1_RELEASE_NO-EPETRA_NO-INT_OPENMP_COMPLEX_EXPERIMENTAL - ctest_linux_mpi_release_openmp_no_epetra_no_int_complex_experimental_enigma.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - ) - -TRILINOS_DRIVER_ADD_DASHBOARD( - OPENMPI-1.10.1_RELEASE_NO-INT_NO-SERIAL_OPENMP_EXPERIMENTAL - ctest_linux_mpi_release_muelu_no_int_no_serial_openmp_experimental_enigma.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - ) - -TRILINOS_DRIVER_ADD_DASHBOARD( - OPENMPI-1.10.1_RELEASE_NO-INT_OPENMP_EXPERIMENTAL - ctest_linux_nightly_mpi_release_muelu_no_int_openmp_experimental_enigma.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - ) - - -#TRILINOS_DRIVER_ADD_DASHBOARD( -# SERIAL_RELEASE_MueLu_Experimental -# ctest_linux_nightly_serial_release_muelu_experimental_typhon.cmake -# CTEST_INSTALLER_TYPE release -# RUN_SERIAL -# TIMEOUT_MINUTES 330 -#) - -#TRILINOS_DRIVER_ADD_DASHBOARD( -# MPICH2_1.4.1p1_RELEASE_MueLu -# ctest_linux_nightly_mpi_release_muelu_typhon.cmake -# CTEST_INSTALLER_TYPE release -# RUN_SERIAL -# TIMEOUT_MINUTES 330 -#) - -TRILINOS_ADD_REQUIRED_CMAKE_INSTALLS() diff --git a/cmake/ctest/drivers/enigma/TrilinosCTestDriverCore.enigma.gcc.cmake b/cmake/ctest/drivers/enigma/TrilinosCTestDriverCore.enigma.gcc.cmake deleted file mode 100644 index 3e77d7d878d3..000000000000 --- a/cmake/ctest/drivers/enigma/TrilinosCTestDriverCore.enigma.gcc.cmake +++ /dev/null @@ -1,125 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/../../TrilinosCTestDriverCore.cmake") - -# -# Platform/compiler specific options for enigma using gcc -# - -MACRO(TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER) - - # Base of Trilinos/cmake/ctest then BUILD_DIR_NAME - - IF(COMM_TYPE STREQUAL MPI) - string(TOUPPER $ENV{LMOD_FAMILY_MPI} UC_MPI_NAME) - SET(BUILD_DIR_NAME ${UC_MPI_NAME}-$ENV{LMOD_FAMILY_MPI_VERSION}_${BUILD_TYPE}_${BUILD_NAME_DETAILS}) - ELSE() - SET(BUILD_DIR_NAME ${COMM_TYPE}-${BUILD_TYPE}_${BUILD_NAME_DETAILS}) - ENDIF() - - SET(Trilinos_REPOSITORY_LOCATION_NIGHTLY_DEFAULT "https://github.com/muelu/Trilinos.git") - - SET( CTEST_DASHBOARD_ROOT "${TRILINOS_CMAKE_DIR}/../../${BUILD_DIR_NAME}" ) - - SET( CTEST_NOTES_FILES "${CTEST_SCRIPT_DIRECTORY}/${CTEST_SCRIPT_NAME}" ) - - SET( CTEST_BUILD_FLAGS "-j12 -i" ) - - SET_DEFAULT( CTEST_PARALLEL_LEVEL "12" ) - - SET_DEFAULT( Trilinos_ENABLE_SECONDARY_TESTED_CODE ON) - - SET(Trilinos_CTEST_DO_ALL_AT_ONCE ON) - - SET(Trilinos_SKIP_EXTRAREPOS_FILE TRUE) - - # Only turn on PyTrilinos for shared libraries - SET_DEFAULT(Trilinos_EXCLUDE_PACKAGES ${EXTRA_EXCLUDE_PACKAGES} TriKota Optika) - - SET( EXTRA_SYSTEM_CONFIGURE_OPTIONS - "-DBUILD_SHARED_LIBS:BOOL=ON" - "-DCMAKE_BUILD_TYPE:STRING=${BUILD_TYPE}" - "-DCMAKE_CXX_STANDARD=17" - "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" - "-DCMAKE_AR=/usr/bin/ar" - - "-DTrilinos_ENABLE_ShyLU_Node:BOOL=OFF" - - "-DSuperLU_INCLUDE_DIRS=$ENV{SUPERLU_INC}" - "-DSuperLU_LIBRARY_DIRS=$ENV{SUPERLU_LIB}" - "-DTPL_BLAS_LIBRARIES=/usr/lib64/libblas.so.3" - "-DTPL_LAPACK_LIBRARIES=/usr/lib64/liblapack.so.3" - ) - - SET_DEFAULT(COMPILER_VERSION "$ENV{LMOD_FAMILY_COMPILER}-$ENV{LMOD_FAMILY_COMPILER_VERSION}") - - # no CUDA on this machine, yet... - # "-DCUDA_TOOLKIT_ROOT_DIR=/opt/nvidia/cuda/6.5.14" - - #Ensuring that MPI is on for all parallel builds that might be run. - IF(COMM_TYPE STREQUAL MPI) - SET( EXTRA_SYSTEM_CONFIGURE_OPTIONS - ${EXTRA_SYSTEM_CONFIGURE_OPTIONS} - "-DTPL_ENABLE_MPI:BOOL=ON" - "-DMPI_BASE_DIR:PATH=$ENV{OPENMPI_ROOT}" - "-DMPI_EXEC_POST_NUMPROCS_FLAGS:STRING=--bind-to\\\;socket\\\;--map-by\\\;socket" - ) - ENDIF() - - TRILINOS_CTEST_DRIVER() - -ENDMACRO() diff --git a/cmake/ctest/drivers/enigma/cron_driver.sh b/cmake/ctest/drivers/enigma/cron_driver.sh deleted file mode 100755 index b96a3cc7d656..000000000000 --- a/cmake/ctest/drivers/enigma/cron_driver.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash - -echo -echo "Starting nightly Trilinos development testing on $HOSTNAME: `date`" -echo - -# Undefine the next line while making/testing local driver changes. Otherwise, the nightly -# testing system will pull a fresh version of Trilinos and wipe out your changes. -# export TDD_IN_TESTING_MODE=1 - -# -# TrilinosDriver settings: -# - -export TDD_PARALLEL_LEVEL=2 - -# Trilinos settings: -# - -# Submission mode for the *TrilinosDriver* dashboard -export TDD_CTEST_TEST_TYPE=Nightly -#export TDD_CTEST_TEST_TYPE=Experimental - -export TDD_DEBUG_VERBOSE=1 -export TRIBITS_TDD_USE_SYSTEM_CTEST=1 - -#export CTEST_DO_SUBMIT=FALSE -#export CTEST_START_WITH_EMPTY_BINARY_DIRECTORY=FALSE - -# Machine specific environment -# -. /etc/profile - -export TDD_HTTP_PROXY=$http_proxy -export TDD_HTTPS_PROXY=$https_proxy - -export TDD_FORCE_CMAKE_INSTALL=0 - -. ~/.bashrc - - -# Machine independent cron_driver: -# - -SCRIPT_DIR=`cd "\`dirname \"$0\"\`";pwd` -export MODULEPATH=$SCRIPT_DIR:$MODULEPATH -# Trilinos source repo -export TRILINOS_SOURCE=$SCRIPT_DIR/../../../.. - -# If you update the list of modules, go to ~/code/trilinos-test/trilinos/ and -# do "git pull". Otherwise, the tests could fail on the first night, as we -# would first run old cron_driver.sh and only then pull - -module load muelu-gcc -module list -env - -pushd $TRILINOS_SOURCE -ctest -S $SCRIPT_DIR/ctest_linux_nightly_mpi_release_muelu_enigma.cmake -ctest -S $SCRIPT_DIR/ctest_linux_nightly_mpi_debug_muelu_basker_enigma.cmake -ctest -S $SCRIPT_DIR/ctest_linux_nightly_mpi_debug_muelu_klu2_enigma.cmake -ctest -S $SCRIPT_DIR/ctest_linux_nightly_mpi_debug_muelu_extratypes_ei_enigma.cmake -ctest -S $SCRIPT_DIR/ctest_linux_nightly_serial_debug_muelu_extratypes_enigma.cmake -ctest -S $SCRIPT_DIR/ctest_linux_nightly_serial_release_muelu_experimental_enigma.cmake -ctest -S $SCRIPT_DIR/ctest_linux_mpi_release_no_serial_openmp_complex_experimental_enigma.cmake -ctest -S $SCRIPT_DIR/ctest_linux_mpi_release_openmp_no_epetra_no_int_complex_experimental_enigma.cmake -ctest -S $SCRIPT_DIR/ctest_linux_mpi_release_muelu_no_int_no_serial_openmp_experimental_enigma.cmake -ctest -S $SCRIPT_DIR/ctest_linux_nightly_mpi_release_muelu_no_int_openmp_experimental_enigma.cmake -popd - -echo -echo "Ending nightly Trilinos development testing on $HOSTNAME: `date`" -echo diff --git a/cmake/ctest/drivers/enigma/ctest_linux_mpi_release_muelu_no_int_no_serial_openmp_experimental_enigma.cmake b/cmake/ctest/drivers/enigma/ctest_linux_mpi_release_muelu_no_int_no_serial_openmp_experimental_enigma.cmake deleted file mode 100644 index 87b6901ee851..000000000000 --- a/cmake/ctest/drivers/enigma/ctest_linux_mpi_release_muelu_no_int_no_serial_openmp_experimental_enigma.cmake +++ /dev/null @@ -1,105 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - -# JJH corresponds to do-configure-no_serial_no_int - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.enigma.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS NO-INT_NO-SERIAL_OPENMP_EXPERIMENTAL) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Experimental) # Set the CDash track -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTpetra_INST_INT_INT:BOOL=OFF" - "-DTpetra_INST_INT_LONG:BOOL=OFF" - "-DTpetra_INST_INT_LONG_LONG:BOOL=ON" - "-DTpetra_INST_SERIAL:BOOL=OFF" - "-DTrilinos_ENABLE_OpenMP=ON" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTrilinos_ENABLE_OpenMP:BOOL=ON" - "-DTPL_ENABLE_HWLOC:BOOL=OFF" - - ### PACKAGES CONFIGURATION ### - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/enigma/ctest_linux_mpi_release_no_serial_openmp_complex_experimental_enigma.cmake b/cmake/ctest/drivers/enigma/ctest_linux_mpi_release_no_serial_openmp_complex_experimental_enigma.cmake deleted file mode 100644 index 590b96f74fc3..000000000000 --- a/cmake/ctest/drivers/enigma/ctest_linux_mpi_release_no_serial_openmp_complex_experimental_enigma.cmake +++ /dev/null @@ -1,105 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.enigma.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS NO-SERIAL_OPENMP_COMPLEX_EXPERIMENTAL) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE EXPERIMENTAL) -SET(Trilinos_TRACK Experimental) # Set the CDash track -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTrilinos_ENABLE_COMPLEX:BOOL=ON" - "-DTeuchos_ENABLE_COMPLEX:BOOL=ON" - "-DTpetra_INST_COMPLEX_DOUBLE:BOOL=ON" - "-DTpetra_INST_COMPLEX_FLOAT:BOOL=OFF" - "-DTpetra_INST_SERIAL:BOOL=OFF" - "-DKokkos_ENABLE_SERIAL:BOOL=ON" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTrilinos_ENABLE_OpenMP:BOOL=ON" - "-DTPL_ENABLE_HWLOC:BOOL=OFF" - - ### PACKAGES CONFIGURATION ### - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/enigma/ctest_linux_mpi_release_openmp_no_epetra_no_int_complex_experimental_enigma.cmake b/cmake/ctest/drivers/enigma/ctest_linux_mpi_release_openmp_no_epetra_no_int_complex_experimental_enigma.cmake deleted file mode 100644 index ca8edcae36aa..000000000000 --- a/cmake/ctest/drivers/enigma/ctest_linux_mpi_release_openmp_no_epetra_no_int_complex_experimental_enigma.cmake +++ /dev/null @@ -1,108 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.enigma.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS NO-EPETRA_NO-INT_COMPLEX_EXPERIMENTAL) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(Trilinos_TRACK Experimental) # Set the CDash track -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTrilinos_ENABLE_COMPLEX:BOOL=ON" - "-DTeuchos_ENABLE_COMPLEX:BOOL=ON" - "-DTpetra_INST_INT_INT:BOOL=OFF" - "-DTpetra_INST_INT_LONG_LONG:BOOL=ON" - "-DTpetra_INST_COMPLEX_DOUBLE:BOOL=ON" - "-DTpetra_INST_COMPLEX_FLOAT:BOOL=OFF" - "-DTpetra_INST_SERIAL:BOOL=ON" - "-DKokkos_ENABLE_SERIAL:BOOL=ON" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTrilinos_ENABLE_OpenMP:BOOL=ON" - "-DTPL_ENABLE_HWLOC:BOOL=OFF" - - ### PACKAGES CONFIGURATION ### - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" - "-DTrilinos_ENABLE_Epetra:BOOL=OFF" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_debug_muelu_basker_enigma.cmake b/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_debug_muelu_basker_enigma.cmake deleted file mode 100644 index 550138005a46..000000000000 --- a/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_debug_muelu_basker_enigma.cmake +++ /dev/null @@ -1,91 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.enigma.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE DEBUG) -#SET(BUILD_DIR_NAME ${UC_MPI_NAME}_$ENV{SEMS_MPI_VERSION}_${BUILD_TYPE}_DEV_MueLu_Basker) -SET(BUILD_NAME_DETAILS NO-SUPERLU_BASKER) - - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Specialized) # Set the CDash Track -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTPL_ENABLE_SuperLU=OFF" - "-DAmesos2_ENABLE_Basker=ON" - "-DAmesos2_ENABLE_KLU2:BOOL=OFF" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_debug_muelu_extratypes_ei_enigma.cmake b/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_debug_muelu_extratypes_ei_enigma.cmake deleted file mode 100644 index b08927aeb722..000000000000 --- a/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_debug_muelu_extratypes_ei_enigma.cmake +++ /dev/null @@ -1,92 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.enigma.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE DEBUG) -#SET(BUILD_DIR_NAME ${UC_MPI_NAME}_$ENV{SEMS_MPI_VERSION}_${BUILD_TYPE}_DEV_MueLu_ExtraTypes_EI) -SET(BUILD_NAME_DETAILS LONG-LONG) - - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Experimental) # Set the CDash track. -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_BRANCH develop) - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - - "-DZoltan2_ENABLE_Experimental:BOOL=ON" - "-DTPL_ENABLE_SuperLU:BOOL=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_debug_muelu_klu2_enigma.cmake b/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_debug_muelu_klu2_enigma.cmake deleted file mode 100644 index ef868a5ffe0a..000000000000 --- a/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_debug_muelu_klu2_enigma.cmake +++ /dev/null @@ -1,90 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.enigma.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE DEBUG) -#SET(BUILD_DIR_NAME ${UC_MPI_NAME}_$ENV{SEMS_MPI_VERSION}_${BUILD_TYPE}_DEV_MueLu_KLU2) -SET(BUILD_NAME_DETAILS NO-SUPERLU_KLU2) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Specialized) # Set the CDash track. -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind - -#SET(Trilinos_PACKAGES MueLu Kokkos Tpetra Xpetra Belos Amesos2 Ifpack2 Zoltan2) -SET(Trilinos_PACKAGES MueLu) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DMueLu_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTPL_ENABLE_SuperLU:BOOL=OFF" - "-DAmesos2_ENABLE_KLU2:BOOL=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_release_muelu_enigma.cmake b/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_release_muelu_enigma.cmake deleted file mode 100644 index 8ea80f4903a8..000000000000 --- a/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_release_muelu_enigma.cmake +++ /dev/null @@ -1,89 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.enigma.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS DEFAULT) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Experimental) # Set CDash board to Nightly -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=ON" - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DZoltan2_ENABLE_Experimental=ON" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_release_muelu_no_int_openmp_experimental_enigma.cmake b/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_release_muelu_no_int_openmp_experimental_enigma.cmake deleted file mode 100644 index 0ebccf2b8b0c..000000000000 --- a/cmake/ctest/drivers/enigma/ctest_linux_nightly_mpi_release_muelu_no_int_openmp_experimental_enigma.cmake +++ /dev/null @@ -1,106 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - -# JJH corresponds to do-configure-no_int - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.enigma.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS NO-INT_OPENMP_EXPERIMENTAL) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Experimental) # Set the CDash track -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTpetra_INST_INT_INT:BOOL=OFF" - "-DTpetra_INST_INT_LONG:BOOL=OFF" - "-DTpetra_INST_INT_LONG_LONG:BOOL=ON" - "-DTpetra_INST_SERIAL:BOOL=ON" - "-DKokkos_ENABLE_SERIAL:BOOL=ON" - "-DTrilinos_ENABLE_OpenMP=ON" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTrilinos_ENABLE_OpenMP:BOOL=ON" - "-DTPL_ENABLE_HWLOC:BOOL=OFF" - - ### PACKAGES CONFIGURATION ### - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/enigma/ctest_linux_nightly_serial_debug_muelu_extratypes_enigma.cmake b/cmake/ctest/drivers/enigma/ctest_linux_nightly_serial_debug_muelu_extratypes_enigma.cmake deleted file mode 100644 index 7caee7398c3d..000000000000 --- a/cmake/ctest/drivers/enigma/ctest_linux_nightly_serial_debug_muelu_extratypes_enigma.cmake +++ /dev/null @@ -1,92 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.enigma.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE SERIAL) -SET(BUILD_TYPE DEBUG) -#SET(BUILD_DIR_NAME SERIAL_${BUILD_TYPE}_DEV_MueLu_ExtraTypes) -SET(BUILD_NAME_DETAILS LONG-LONG_COMPLEX-DOUBLE) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Nightly) # set the CDash track -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTpetra_INST_INT_LONG_LONG=ON" - "-DTeuchos_ENABLE_COMPLEX=ON" - "-DTrilinos_ENABLE_COMPLEX_DOUBLE=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/enigma/ctest_linux_nightly_serial_release_muelu_experimental_enigma.cmake b/cmake/ctest/drivers/enigma/ctest_linux_nightly_serial_release_muelu_experimental_enigma.cmake deleted file mode 100644 index 93897ce971e7..000000000000 --- a/cmake/ctest/drivers/enigma/ctest_linux_nightly_serial_release_muelu_experimental_enigma.cmake +++ /dev/null @@ -1,91 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.enigma.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE SERIAL) -SET(BUILD_TYPE RELEASE) -#SET(BUILD_DIR_NAME SERIAL_${BUILD_TYPE}_DEV_MueLu_Experimental) -SET(BUILD_NAME_DETAILS EXPERIMENTAL) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Experimental) # Set the CDash track. -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTPL_ENABLE_SuperLU=ON" - "-DMueLu_ENABLE_Experimental=ON" - "-DXpetra_ENABLE_Experimental=ON" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/enigma/muelu-gcc.lua b/cmake/ctest/drivers/enigma/muelu-gcc.lua deleted file mode 100644 index 9e1a19e1c3a4..000000000000 --- a/cmake/ctest/drivers/enigma/muelu-gcc.lua +++ /dev/null @@ -1,28 +0,0 @@ -load("sems-gcc/8.3.0") -load("sems-openmpi") -load("sems-cmake") -load("sems-ninja") -load("sems-git") - -load("sems-superlu") - -load("sems-yaml-cpp") -load("sems-hdf5") -load("sems-netcdf-c") -load("sems-parallel-netcdf") -load("sems-zlib") - -load("sems-boost") -load("sems-python") - -load("sems-metis") -load("sems-parmetis") - -load("sems-cuda") - -pushenv("OMP_NUM_THREADS","2") -pushenv("CUDA_LAUNCH_BLOCKING","1") -pushenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC","1") - --- Only run on the Tesla K40, not the Quadro -- -pushenv("CUDA_VISIBLE_DEVICES","0") \ No newline at end of file diff --git a/cmake/ctest/drivers/enigma/sendTestSummary.sh b/cmake/ctest/drivers/enigma/sendTestSummary.sh deleted file mode 100755 index e5ee3ccd9266..000000000000 --- a/cmake/ctest/drivers/enigma/sendTestSummary.sh +++ /dev/null @@ -1,326 +0,0 @@ -#!/bin/sh - -# Parse command line options. -DEBUGMODE=0 -USAGE="sendTestSummary.sh [-d] " -while getopts d OPT; do - case "$OPT" in - d) - # debug mode, send email summary to me only - DEBUGMODE=1 - ;; - \?) - # getopts issues an error message - echo ${USAGE} - echo - exit 1 - ;; - esac -done - -# Remove the options we parsed above. -shift `expr $OPTIND - 1` -# The logfile is required. Error out if it's not provided. -if [ $# -eq 0 ]; then - echo $USAGE >&2 - exit 1 -fi -### end parsing ### - -######################################################################### -# Variables you might want to modify. -######################################################################### - -#Perl script to produce prettified HTML -HTMLPERLSCRIPT="/data/nightlyTesting/Trilinos/packages/muelu/utils/misc/drakify-email.pl" -#root of cdash testing directory -TESTLOCATION="/data/nightlyTesting" -LOGBACKUPDIRECTORY="/data/nightlyTesting/logs" - -#packages to be summarized -PATTERN="(Xpetra|MueLu)" - -#variables to be passed to the perl script -MACHINENAME=`hostname -s` -USER=`whoami` - -#who gets the email summary -if [[ $DEBUGMODE == 1 ]]; then - RECIPIENTS=( - "${USER}@sandia.gov" - ) -else - RECIPIENTS=( - "muelu-regression@software.sandia.gov" - ) -fi -#suffix for all the log files -timeStamp="$(date +%F_%R)" - -#cron driver log file -INFILE=$1 -#root of file to be emailed. The correct suffix must be appended whenever you use this. -OUTFILE="test-summary-${timeStamp}" -MAILCOMMAND="/usr/sbin/sendmail" -######################################################################### - -cd ${TESTLOCATION} - -backupFile="cron_driver.log.$timeStamp" -cp cron_driver.log $backupFile - -testStartString=`egrep "Starting nightly Trilinos development" cron_driver.log` -testStartDate=`echo $testStartString | sed "s/:/#/" | cut -f 2 -d#` -ttt=`echo $testStartString | cut -f 1 -d:` -testMachine=${ttt##* } -testEndString=`egrep "Ending nightly Trilinos development" cron_driver.log` -testEndDate=`echo $testEndString | sed "s/:/#/" | cut -f 2 -d#` - -awk -v packagesToMatch="$PATTERN" -v summaryFile="${OUTFILE}.txt" -v machine="$testMachine" -v startTime="$testStartDate" -v endTime="$testEndDate" ' - -################################################### -# Commands to run before the file is processed -################################################### -BEGIN { - print "Machine : " machine > summaryFile - print "Start time : " startTime > summaryFile - print "End time : " endTime > summaryFile - testctr=0 - gitUpdateFailed=0 - dashboardErrors=0 -} - -################################################### -# Commands to run while processing the file -################################################### -{ - - if ($0 ~ "Update command failed") - { - gitUpdateFailed=1 - } - - if ($0 ~ "^test [0-9]*$") - { - #start of test found, e.g., test 4 - FOUND=2 - testNum=$0 - sub(/test /,"",testNum) - testNum=testNum":" - #\x27 is hex code for single quote - packageLibBuild=testNum" Building target: \x27" packagesToMatch "_libs" - packageTestBuild=testNum" Build ALL target for \x27" packagesToMatch "\x27" - runTestPattern=testNum" Running test for package \x27" packagesToMatch "\x27" - next #skip any more processing, go on to next line - } - - if (FOUND==2) - { - FOUND-- - dashboardName=$0 - sub(/Start [ ]*[0-9]*: /,"",dashboardName) - dashboardName=RemoveWhiteSpace(dashboardName) - listOfDashboardNames[testctr] = dashboardName - testctr++ - dashBoardPattern="Test [ ]*#[0-9]*: " dashboardName - } - -# Record the "track" for this dashboard, which could be "Nightly", "Experimental", or "Specialized" - if (FOUND && $0 ~ "-- Trilinos_TRACK=") - { - thisLine=$0 - sub(/^[0-9]*: -- Trilinos_TRACK=\x27/,"",thisLine) - sub(/\x27/,"",thisLine) - if (length(thisLine) > 0) { - dashboardTrack[dashboardName] = thisLine - trackTypes[thisLine]++ - } - } - - if (FOUND && $0 ~ dashBoardPattern) - { - thisLine=$0 - thisLine=RemoveWhiteSpace(thisLine) - if (dashboardErrors == 0) - dashBoardSummary[dashboardName] = "passed" - else - dashBoardSummary[dashboardName] = "FAILED" - match(thisLine,"[0-9]*\\.[0-9]* sec$") - timeSummary[dashboardName] = substr(thisLine,RSTART,RLENGTH) - #done with this dashboard, reset error flag - dashboardErrors=0 - } - - # library build - if (FOUND && $0 ~ packageLibBuild) - { - getCompilerSummary=2 - thisLine = $0 - pat = "\x27" packagesToMatch "_libs\x27" - match(thisLine,pat) - currentPackage = substr(thisLine,RSTART+1,RLENGTH-2) - listOfPackages[currentPackage] = currentPackage - } - - # tests build - if (FOUND && $0 ~ packageTestBuild) - { - getCompilerSummary=2 - thisLine = $0 - pat = "\x27" packagesToMatch "\x27" - match(thisLine,pat) - currentPackage = substr(thisLine,RSTART+1,RLENGTH-2) - listOfPackages[currentPackage] = currentPackage - } - - if (getCompilerSummary>0 && $0 ~ " Compiler errors") - { - thisLine=$0 - sub(testNum,"",thisLine) - pat="[0-9]*" - thisLine=RemoveWhiteSpace(thisLine) - - match(thisLine,pat) - numErrors = substr(thisLine,RSTART,RLENGTH) - errorSummary[dashboardName,currentPackage] = numErrors - - getCompilerSummary-- - } - - if (getCompilerSummary>0 && $0 ~ " Compiler warnings") - { - thisLine=$0 - sub(testNum,"",thisLine) - thisLine=RemoveWhiteSpace(thisLine) - pat="[0-9]*" - match(thisLine,pat) - numWarnings = substr(thisLine,RSTART,RLENGTH); - warningSummary[dashboardName,currentPackage] = numWarnings - getCompilerSummary-- - } - - #Look for pattern indicating that the tests of interest have in fact run. - if (FOUND && match($0,runTestPattern)) - { - packageTested = substr($0,RSTART,RLENGTH); - sub(testNum,"",packageTested) - packageTested=RemoveWhiteSpace(packageTested) - getTestSummary=1 - } - - if (getTestSummary && $0 ~ "No tests were found!!!") - { - getTestSummary=0 - } - - #Calculate the number of failing, passing, and total tests. - if (getTestSummary && $0 ~ "tests failed out of") - { - thisLine=$0 - sub(testNum,"",thisLine) - thisLine=RemoveWhiteSpace(thisLine) - getTestSummary=0 - pat = "[0-9]* tests failed out of [0-9]*" - match(thisLine,pat) - ttt = substr(thisLine,RSTART,RLENGTH); - pat = "^[0-9]*" - match(ttt,pat) - numFailed = substr(ttt,RSTART,RLENGTH); - pat = "[0-9]*$" - match(ttt,pat) - numTotal = substr(ttt,RSTART,RLENGTH); - failSummary[dashboardName,currentPackage] = numFailed - passSummary[dashboardName,currentPackage] = numTotal+0-numFailed - totalSummary[dashboardName,currentPackage] = numTotal - if (numFailed != 0) - dashboardErrors=1 - } -} - -################################################### -# helper functions -################################################### -function RemoveWhiteSpace(theString) -{ - sub(/^[ ]*/,"",theString); sub(/[ ]*$/,"",theString); - return (theString) -} - -################################################### -# Commands to run after the file is processed -################################################### -END { - - if (gitUpdateFailed == 1) { - print "\n *** git update FAILED ***\n" > summaryFile - } - - # do some nice formatting - numPlusses=73 - thePluses=" " - while (jj++ summaryFile - for (track in trackTypes) { - printf("%s\n",thePluses) > summaryFile - trackNameLength = length(track) - numPlussesToTheRight = numPlusses - trackNameLength - 4 - plussesToTheRight="" - kk = 0 - while (kk++ summaryFile - printf("%s\n",thePluses) > summaryFile - for (i in listOfDashboardNames) { - db=listOfDashboardNames[i] - if (dashboardTrack[db] == track) - printf(" %61-s ... %s\n",db,dashBoardSummary[db]) > summaryFile; - } - } - printf("-----------------------------------------------------------------------------\n\n") > summaryFile - - for (i in listOfDashboardNames) { - db=listOfDashboardNames[i] - spaces=" " - printf("%55-s\n%s%8-s, %5.1f seconds\n",db, spaces, dashBoardSummary[db], timeSummary[db]) > summaryFile; - for (k in listOfPackages) { - pat = "_lib" - if (match(k,pat)) isLib = 1; - else isLib = 0; - if ((db,k) in warningSummary) nwarn = warningSummary[db,k] - else nwarn = "-"; - if ((db,k) in errorSummary) nerr = errorSummary[db,k] - else nerr = "-"; - if ((db,k) in failSummary) nfail = failSummary[db,k] - else nfail = "-"; - if ((db,k) in passSummary) npass = passSummary[db,k] - else npass = "-"; - if ((db,k) in totalSummary) ntotal = totalSummary[db,k] - else ntotal = "-"; - if (isLib) { - summaryString = sprintf("%15s | %3d warnings | %3d errors",k,nwarn,nerr); - } - else { - summaryString = sprintf("%15s | %3d warnings | %3d errors | %d/%d passed",k,nwarn,nerr,npass,ntotal); - } - print spaces summaryString > summaryFile - } - } - -} -' $INFILE - -date2=`echo $(date) | sed "s/ /_/g"` -cdashDate="$(date +%F)" -cat ${OUTFILE}.txt | perl ${HTMLPERLSCRIPT} ${date2} ${cdashDate} ${MACHINENAME} ${USER} > ${OUTFILE}.html - -${MAILCOMMAND} -it < /home/jhu/code/trilinos-test/cron_driver.log; cd /home/jhu/code/trilinos-test; /home/jhu/bin/sendTestSummary.sh cron_driver.log - - -# -# Synchronize github.com/muelu/Trilinos with github.com/trilinos/Trilinos -# -0 * * * * eval `python /home/jhu/code/trilinos-test/setSshEnv.py`; /home/jhu/bin/muelu-sync &> /home/jhu/code/trilinos/muelu-sync.log -# -@reboot /home/jhu/bin/send-reboot-email.sh diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_muelu_coverage_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_muelu_coverage_geminga.cmake deleted file mode 100644 index ff90d7b26234..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_muelu_coverage_geminga.cmake +++ /dev/null @@ -1,93 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE DEBUG) -SET(BUILD_NAME_DETAILS COVERAGE_EXPERIMENTAL) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_DO_COVERAGE_TESTING TRUE) #The quickstart says this is redundant, but I'm setting it just in case. - -#SET(Trilinos_PACKAGES Teuchos Kokkos Epetra Tpetra Xpetra Amesos Amesos2 Ifpack Ifpack2 Zoltan Zoltan2) -SET(Trilinos_PACKAGES MueLu) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=ON" - "-DTrilinos_ENABLE_COVERAGE_TESTING=ON" - "-DTPL_ENABLE_SuperLU=ON" - "-DMueLu_ENABLE_Experimental=ON" - "-DXpetra_ENABLE_Experimental=ON" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS=ON" - "-DTrilinos_EXTRA_LINK_FLAGS='-lgcov'" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_muelu_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_muelu_geminga.cmake deleted file mode 100644 index 45c93367b43f..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_muelu_geminga.cmake +++ /dev/null @@ -1,88 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE DEBUG) -SET(BUILD_NAME_DETAILS DEFAULT) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_MEMORYCHECK_COMMAND /usr/local/bin/valgrind) -SET(CTEST_DO_MEMORY_TESTING FALSE) - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTPL_ENABLE_SuperLU=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_muelu_kokkos_refactor_cuda_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_muelu_kokkos_refactor_cuda_geminga.cmake deleted file mode 100644 index 5204be51b7f4..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_muelu_kokkos_refactor_cuda_geminga.cmake +++ /dev/null @@ -1,105 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc-cuda.cmake") - -# -# Set the options specific to this build case -# - -SET(COMM_TYPE MPI) -SET(BUILD_TYPE DEBUG) -string(TOUPPER $ENV{SEMS_MPI_NAME} UC_MPI_NAME) -SET(BUILD_DIR_NAME ${UC_MPI_NAME}-$ENV{SEMS_MPI_VERSION}_${BUILD_TYPE}_KOKKOS-REFACTOR_CUDA-$ENV{SEMS_CUDA_VERSION}) -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTeuchos_ENABLE_COMPLEX:BOOL=OFF" - "-DTpetra_INST_COMPLEX_DOUBLE:BOOL=OFF" - "-DTpetra_INST_COMPLEX_FLOAT:BOOL=OFF" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - - ### PACKAGES CONFIGURATION ### - "-DTrilinos_ENABLE_TESTS:BOOL=OFF" - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DMueLu_ENABLE_Kokkos_Refactor:BOOL=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" - "-DXpetra_ENABLE_Kokkos_Refactor:BOOL=ON" - - # Disable Pamgen and Shards due to weird nvcc errors - "-DTPL_ENABLE_Pamgen:BOOL=OFF" - "-DTPL_ENABLE_Shards:BOOL=OFF" - - -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_valgrind_muelu_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_valgrind_muelu_geminga.cmake deleted file mode 100644 index b663cad98902..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_debug_valgrind_muelu_geminga.cmake +++ /dev/null @@ -1,91 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE DEBUG) -SET(BUILD_NAME_DETAILS VALGRIND) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Specialized) # Set CDash track. -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_MEMORYCHECK_COMMAND /usr/bin/valgrind) -SET(CTEST_DO_MEMORY_TESTING TRUE) - -SET(Trilinos_PACKAGES MueLu Xpetra ML) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTPL_ENABLE_SuperLU=ON" - "-DMueLu_ENABLE_MEMORY_TESTING=ON" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_amgx_cuda_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_amgx_cuda_geminga.cmake deleted file mode 100644 index f9d85f2ca788..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_amgx_cuda_geminga.cmake +++ /dev/null @@ -1,111 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc-cuda.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS AMGX_CUDA-$ENV{SEMS_CUDA_VERSION}) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2 Tpetra) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTpetra_INST_COMPLEX_DOUBLE:BOOL=OFF" - "-DTpetra_INST_COMPLEX_FLOAT:BOOL=OFF" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTPL_ENABLE_AmgX=ON" - "-DAmgX_LIBRARY_DIRS=/usr/local/amgx/lib" - "-DAmgX_INCLUDE_DIRS=/usr/local/amgx/include" - - ### PACKAGES CONFIGURATION ### - "-DTpetra_INST_INT_INT=ON" - "-DTpetra_INST_INT_LONG_LONG=OFF" - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DMueLu_ENABLE_Kokkos_Refactor:BOOL=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" - "-DXpetra_ENABLE_Kokkos_Refactor:BOOL=ON" - - # Disable Pamgen due to weird nvcc errors - "-DTrilinos_ENABLE_Pamgen:BOOL=OFF" - - - -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_cuda_no_uvm_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_cuda_no_uvm_geminga.cmake deleted file mode 100644 index e08324d2e2bc..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_cuda_no_uvm_geminga.cmake +++ /dev/null @@ -1,108 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc-cuda.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELWITHDEBINFO) -SET(BUILD_NAME_DETAILS KOKKOS-REFACTOR_EXPERIMENTAL_CUDA-$ENV{SEMS_CUDA_VERSION}_NO_UVM) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2 Tpetra Ifpack2 Belos Panzer) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTrilinos_ENABLE_COMPLEX:BOOL=OFF" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTPL_ENABLE_BinUtils:BOOL=ON" - "-DTPL_ENABLE_Matio:BOOL=OFF" - - ### PACKAGES CONFIGURATION ### - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DMueLu_ENABLE_Kokkos_Refactor:BOOL=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" - "-DXpetra_ENABLE_Kokkos_Refactor:BOOL=ON" - - # Disable Pamgen due to weird nvcc errors - "-DTrilinos_ENABLE_Pamgen:BOOL=OFF" - - ### Disable UVM ### - "-DKokkos_ENABLE_CUDA_UVM:BOOL=OFF" - "-DTpetra_ENABLE_CUDA_UVM:BOOL=OFF" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_kokkos_refactor_cuda_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_kokkos_refactor_cuda_geminga.cmake deleted file mode 100644 index d5d7bec98594..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_kokkos_refactor_cuda_geminga.cmake +++ /dev/null @@ -1,104 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc-cuda.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS KOKKOS-REFACTOR_EXPERIMENTAL_CUDA-$ENV{SEMS_CUDA_VERSION}) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTrilinos_ENABLE_COMPLEX:BOOL=ON" - "-DTeuchos_ENABLE_COMPLEX:BOOL=ON" - "-DTpetra_INST_COMPLEX_DOUBLE:BOOL=ON" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - - ### PACKAGES CONFIGURATION ### - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DMueLu_ENABLE_Kokkos_Refactor:BOOL=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" - "-DXpetra_ENABLE_Kokkos_Refactor:BOOL=ON" - - # Disable Pamgen due to weird nvcc errors - "-DTrilinos_ENABLE_Pamgen:BOOL=OFF" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_no_epetra_no_serial_openmp_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_no_epetra_no_serial_openmp_geminga.cmake deleted file mode 100644 index 5772c7d5d33f..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_no_epetra_no_serial_openmp_geminga.cmake +++ /dev/null @@ -1,105 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - -# JJH corresponds to do-configure-no_epetra_no_serial - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS NO-EPETRA_NO-SERIAL_OPENMP_EXPERIMENTAL) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Experimental) # Set the CDash track -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTpetra_INST_INT_LONG:BOOL=OFF" - "-DTpetra_INST_INT_LONG_LONG:BOOL=ON" - "-DTrilinos_ENABLE_OpenMP=ON" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTrilinos_ENABLE_OpenMP:BOOL=ON" - "-DTPL_ENABLE_HWLOC:BOOL=OFF" - - ### PACKAGES CONFIGURATION ### - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" - "-DTrilinos_ENABLE_Epetra:BOOL=OFF" - -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_tpetra_no_int_no_serial_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_tpetra_no_int_no_serial_geminga.cmake deleted file mode 100644 index e36a6644c244..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_muelu_tpetra_no_int_no_serial_geminga.cmake +++ /dev/null @@ -1,108 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS NO-EPETRA_NO-INT_COMPLEX_NO-SERIAL_OPENMP_EXPERIMENTAL) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Experimental) # Set the CDash track -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTrilinos_ENABLE_COMPLEX:BOOL=ON" - "-DTeuchos_ENABLE_COMPLEX:BOOL=ON" - "-DTpetra_INST_INT_INT:BOOL=OFF" - "-DTpetra_INST_INT_LONG_LONG:BOOL=ON" - "-DTpetra_INST_COMPLEX_DOUBLE:BOOL=ON" - "-DTpetra_INST_COMPLEX_FLOAT:BOOL=OFF" - "-DTpetra_INST_SERIAL:BOOL=OFF" - "-DKokkos_ENABLE_SERIAL:BOOL=ON" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTrilinos_ENABLE_OpenMP:BOOL=ON" - "-DTPL_ENABLE_HWLOC:BOOL=OFF" - - ### PACKAGES CONFIGURATION ### - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" - "-DTrilinos_ENABLE_Epetra:BOOL=OFF" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_tpetra_no_int_experimental_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_tpetra_no_int_experimental_geminga.cmake deleted file mode 100644 index 8327a2379df5..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_mpi_release_tpetra_no_int_experimental_geminga.cmake +++ /dev/null @@ -1,103 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS NO-EPETRA_NO-INT_EXPERIMENTAL) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Experimental) # Set the CDash track -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES Tpetra) - -SET(EXTRA_CONFIGURE_OPTIONS - ### ETI ### - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTpetra_INST_INT_INT:BOOL=OFF" - "-DTpetra_INST_INT_LONG_LONG:BOOL=ON" - "-DTpetra_INST_COMPLEX_FLOAT:BOOL=OFF" - "-DTpetra_INST_SERIAL:BOOL=ON" - "-DKokkos_ENABLE_SERIAL:BOOL=ON" - - ### MISC ### - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS:BOOL=OFF" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS:BOOL=ON" - - ### TPLS ### - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTPL_ENABLE_HWLOC:BOOL=OFF" - - ### PACKAGES CONFIGURATION ### - "-DTpetra_ENABLE_Experimental:BOOL=ON" - "-DTrilinos_ENABLE_Epetra:BOOL=OFF" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_muelu_epetra_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_muelu_epetra_geminga.cmake deleted file mode 100644 index c7d80e890e5d..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_muelu_epetra_geminga.cmake +++ /dev/null @@ -1,89 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE SERIAL) -SET(BUILD_TYPE DEBUG) -SET(BUILD_NAME_DETAILS NO-TPETRA) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Nightly) # Set the CDash board to Nightly -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTrilinos_ENABLE_Tpetra=OFF" - "-DTrilinos_ENABLE_ML=OFF" - "-DTPL_ENABLE_SuperLU=ON" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_muelu_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_muelu_geminga.cmake deleted file mode 100644 index 6ec729163646..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_muelu_geminga.cmake +++ /dev/null @@ -1,89 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE SERIAL) -SET(BUILD_TYPE DEBUG) -SET(BUILD_NAME_DETAILS DEFAULT) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Nightly) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_MEMORYCHECK_COMMAND /usr/local/bin/valgrind) -SET(CTEST_DO_MEMORY_TESTING FALSE) - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=ON" - "-DTPL_ENABLE_SuperLU:BOOL=ON" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_muelu_tpetra_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_muelu_tpetra_geminga.cmake deleted file mode 100644 index f9cf4e62c8cd..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_muelu_tpetra_geminga.cmake +++ /dev/null @@ -1,90 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE SERIAL) -SET(BUILD_TYPE DEBUG) -SET(BUILD_NAME_DETAILS NO-EPETRA) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Nightly) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 900) - -SET(Trilinos_PACKAGES MueLu Xpetra) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_Epetra=OFF" - "-DTrilinos_ENABLE_ML=OFF" - "-DTrilinos_ENABLE_Zoltan=OFF" - "-DTPL_ENABLE_SuperLU=ON" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_valgrind_muelu_geminga.cmake b/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_valgrind_muelu_geminga.cmake deleted file mode 100644 index ccfeceb6a6d7..000000000000 --- a/cmake/ctest/drivers/geminga/ctest_linux_nightly_serial_debug_valgrind_muelu_geminga.cmake +++ /dev/null @@ -1,101 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.geminga.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE SERIAL) -SET(BUILD_TYPE DEBUG) -SET(BUILD_NAME_DETAILS VALGRIND) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Specialized) # Set CDash track. -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_MEMORYCHECK_COMMAND /usr/bin/valgrind) -SET(CTEST_DO_MEMORY_TESTING TRUE) - -SET(Trilinos_PACKAGES MueLu Xpetra ML) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTPL_ENABLE_SuperLU=ON" - "-DTeuchos_GLOBALLY_REDUCE_UNITTEST_RESULTS=ON" - "-DTeuchosCore_MemoryManagement_RCP_Abort_Verify_DISABLE=ON" - "-DTeuchosCore_testDisablePrintActiveRcpNodesOnExit_noprint_DISABLE=ON" - "-DTeuchosNumerics_DenseMatrix_example_DISABLE=ON" - "-DTeuchosCore_TypeConversions_UnitTest_DISABLE=ON" - "-DML_ValidateParameters_compareTestOutput_DISABLE=ON" -) - -# NOTE: We cannot SET() the test disables directly in here since those get ignored by -# the calling file. They have to be disabled through EXTRA_CONFIGURE_OPTIONS above. -# - The Teuchos tests don't valgrind cleanly and probably shouldn't --- they're purposely -# triggering error cases. -# - The ML test won't valgrind cleanly because of python on geminga. - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/geminga/muelu-gcc.lua b/cmake/ctest/drivers/geminga/muelu-gcc.lua deleted file mode 100644 index 9e1a19e1c3a4..000000000000 --- a/cmake/ctest/drivers/geminga/muelu-gcc.lua +++ /dev/null @@ -1,28 +0,0 @@ -load("sems-gcc/8.3.0") -load("sems-openmpi") -load("sems-cmake") -load("sems-ninja") -load("sems-git") - -load("sems-superlu") - -load("sems-yaml-cpp") -load("sems-hdf5") -load("sems-netcdf-c") -load("sems-parallel-netcdf") -load("sems-zlib") - -load("sems-boost") -load("sems-python") - -load("sems-metis") -load("sems-parmetis") - -load("sems-cuda") - -pushenv("OMP_NUM_THREADS","2") -pushenv("CUDA_LAUNCH_BLOCKING","1") -pushenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC","1") - --- Only run on the Tesla K40, not the Quadro -- -pushenv("CUDA_VISIBLE_DEVICES","0") \ No newline at end of file diff --git a/cmake/ctest/drivers/geminga/setSshEnv.py b/cmake/ctest/drivers/geminga/setSshEnv.py deleted file mode 100755 index 238f9bd86825..000000000000 --- a/cmake/ctest/drivers/geminga/setSshEnv.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -import os -import pwd -import sys -# support major versions 2 and 3 -if sys.version_info < (3,0): - import commands as subp -else: - import subprocess as subp -import re - -# -# Utility to find out if you have an ssh-agent running that is holding your -# private key. To use this in bash: -# -# eval $(python ./setSetSshEnv.python) -# -# It assumes that ssh creates files of the form /tmp/ssh-Abcdefg12345/agent.12345 . -# -# Fingerprint of the ssh keypair that you will use. This is the only line of this script that you need to modify. -# You can find the fingerprint with the command "ssh-keygen -lf /path/to/your/private/key/file". -# Important: You must delete the string after the fingerprint itself that contains the key filename and type. -keyFingerprint="4096 db:7b:ce:13:b4:88:64:59:b2:e9:b8:17:f0:a6:97:d5" - -# socket query tool -socketCommand="/usr/sbin/ss" - -shell = os.environ["SHELL"] -if shell == "/bin/bash" or shell == "/bin/sh": - envCmd="export SSH_AUTH_SOCK=" -elif shell == "/bin/tcsh" or shell == "/bin/csh": - envCmd="setenv SSH_AUTH_SOCK " -else: - print ("Only bash, csh, and tcsh are supported.") - quit() - -# Your username. -userid = pwd.getpwuid(os.getuid())[0] -[status,charlist]=subp.getstatusoutput(socketCommand + " -xl | grep -o '/tmp/ssh-[[:alnum:]]*/agent.[[:digit:]]*'") -# convert raw characters into list -agentList=[s.strip() for s in charlist.splitlines()] -agentFound=0 -keyFound=0 -myagent = "" -for agent in agentList: - # See if this is your agent by checking ownership of root of lock directory - # Check only the root, because if it's not yours, you can't see down into it. - pieces=agent.split("/") - rootDir = "/" + pieces[1] + "/" + pieces[2] - # JJH: On redsky, the socket command returned nonexistent directories - # So I check for existence first to avoid an exception when calling os.stat - # on a nonexistent directory. - if os.path.isdir(rootDir): - st = os.stat(rootDir) - dirOwner = pwd.getpwuid(st.st_uid).pw_name - if dirOwner == userid: - agentFound=1 - myagent = agent - # Your ssh agent has been found - sshAgentCmd="SSH_AUTH_SOCK=" + agent + " ssh-add -l" - [status,result]=subp.getstatusoutput(sshAgentCmd) - keyList=[s.strip() for s in result.splitlines()] - - # Check whether this key's fingerprint matches the desired key's - for key in keyList: - if keyFingerprint in key: - keyFound=1 - print (envCmd + myagent) - break - -# If no key matches, just use the last owned agent found -if keyFound == 0 and agentFound == 1: - #print ("export SSH_AUTH_SOCK=" + myagent) - print (envCmd + myagent) diff --git a/cmake/ctest/drivers/geminga/valgrind_suppressions.txt b/cmake/ctest/drivers/geminga/valgrind_suppressions.txt deleted file mode 100644 index 54cc3d139269..000000000000 --- a/cmake/ctest/drivers/geminga/valgrind_suppressions.txt +++ /dev/null @@ -1,1042 +0,0 @@ - -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:vasprintf - fun:asprintf - fun:orte_odls_base_default_launch_local - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:opal_argv_join - fun:orte_odls_base_default_launch_local - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:opal_argv_join - fun:orte_odls_base_default_launch_local - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:strdup - fun:opal_basename - obj:* - obj:* - obj:* - fun:orte_init - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:opal_os_path - fun:opal_path_access - fun:opal_path_findv - obj:* - obj:* - obj:* - fun:orte_init - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:opal_dss_unpack_byte_object - fun:opal_dss_unpack_buffer - fun:opal_dss_unpack - fun:orte_grpcomm_base_xcast_recv - fun:orte_rml_base_process_msg - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:opal_dss_unpack_byte_object - fun:opal_dss_unpack_buffer - fun:opal_dss_unpack - fun:orte_grpcomm_base_xcast_recv - fun:orte_rml_base_process_msg - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:opal_dss_unpack_byte_object - fun:opal_dss_unpack_buffer - fun:opal_dss_unpack - fun:orte_odls_base_default_construct_child_list - obj:* - fun:orte_daemon_recv - fun:orte_rml_base_process_msg - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:strdup - fun:opal_basename - fun:orte_util_check_context_app - fun:setup_path - fun:orte_odls_base_default_launch_local - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:realloc - fun:opal_argv_append_nosize - obj:* - obj:* - obj:* - fun:orte_init - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - obj:* - obj:* - fun:orte_iof_base_select - obj:* - fun:orte_init - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:realloc - fun:vasprintf - fun:asprintf - fun:orte_oob_base_get_addr - obj:* - obj:* - fun:orte_init - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - obj:* - fun:orte_dfs_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - obj:* - fun:orte_dfs_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - obj:* - fun:orte_filem_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - obj:* - fun:orte_filem_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - obj:* - fun:orte_iof_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - fun:orte_grpcomm_base_comm_stop - fun:orte_grpcomm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - fun:orte_grpcomm_base_comm_stop - fun:orte_grpcomm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - fun:orte_grpcomm_base_comm_stop - fun:orte_grpcomm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - fun:orte_grpcomm_base_comm_stop - fun:orte_grpcomm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - fun:orte_grpcomm_base_comm_stop - fun:orte_grpcomm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - fun:orte_plm_base_comm_stop - obj:* - fun:orte_plm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - fun:prq_cons - obj:* - fun:orte_plm_base_comm_stop - obj:* - fun:orte_plm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:vasprintf - fun:asprintf - fun:set_dest - fun:opal_cmd_line_parse - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:vasprintf - fun:asprintf - fun:set_dest - fun:opal_cmd_line_parse - fun:create_app - fun:parse_locals - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - obj:* - fun:orte_dfs_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - obj:* - fun:orte_dfs_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - obj:* - fun:orte_filem_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - obj:* - fun:orte_filem_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - obj:* - fun:orte_iof_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - fun:orte_grpcomm_base_comm_stop - fun:orte_grpcomm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - fun:orte_grpcomm_base_comm_stop - fun:orte_grpcomm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - fun:orte_grpcomm_base_comm_stop - fun:orte_grpcomm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - fun:orte_grpcomm_base_comm_stop - fun:orte_grpcomm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - fun:orte_grpcomm_base_comm_stop - fun:orte_grpcomm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - fun:orte_plm_base_comm_stop - obj:* - fun:orte_plm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: possible - fun:malloc - obj:* - fun:orte_plm_base_comm_stop - obj:* - fun:orte_plm_base_close - fun:mca_base_framework_close - obj:* - fun:orte_finalize - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:strdup - fun:parse_locals - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:show_help - fun:orte_show_help_norender - fun:orte_show_help - fun:bind_in_place.isra.0 - fun:orte_rmaps_base_compute_bindings - fun:orte_rmaps_base_map_job - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:realloc - fun:opal_argv_append_nosize - fun:opal_argv_append - fun:opal_setenv - fun:orte_plm_base_setup_job - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:strdup - fun:opal_show_help_vstring - fun:orte_show_help - fun:bind_in_place.isra.0 - fun:orte_rmaps_base_compute_bindings - fun:orte_rmaps_base_map_job - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:orte_plm_base_launch_apps - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:orte_iof_base_write_output - obj:* - fun:opal_libevent2021_event_base_loop - fun:orterun - fun:(below main) -} - -{ - - Memcheck:Leak - match-leak-kinds: definite - ... - fun:orterun - ... -} - -{ - - Memcheck:Leak - match-leak-kinds: possible - ... - fun:orterun - ... -} - -{ - - Memcheck:Param - write(buf) - ... - fun:orte_finalize - fun:orterun - ... -} - -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_savepvn - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_savepv - fun:Perl_new_collate - fun:Perl_init_i18nl10n - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_savepv - fun:Perl_new_numeric - fun:Perl_init_i18nl10n - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_sv_grow - fun:Perl_sv_setpv - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_sv_grow - fun:Perl_sv_setpv - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_sv_grow - fun:Perl_sv_setpvn - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_sv_grow - fun:Perl_sv_setpvn - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_sv_grow - fun:Perl_sv_setpvn - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_savepv - fun:Perl_find_script - fun:perl_parse - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_sv_vcatpvfn - fun:Perl_vnewSVpvf - fun:Perl_newSVpvf - fun:Perl_pp_gmtime - fun:Perl_runops_standard - fun:perl_run - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:Perl_newSVOP - fun:Perl_yylex - fun:Perl_yyparse - fun:perl_parse - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:realloc - fun:Perl_safesysrealloc - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_regexec_flags - fun:Perl_pp_match - fun:Perl_runops_standard - fun:perl_run - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_init_stacks - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_my_cxt_init - fun:boot_DynaLoader - fun:Perl_pp_entersub - fun:Perl_runops_standard - fun:Perl_call_sv - fun:Perl_call_list - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_newATTRSUB_flags - fun:Perl_newATTRSUB - fun:Perl_utilize -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_savepv - fun:Perl_gv_init_pvn - fun:Perl_gv_fetchpvn_flags - fun:Perl_gv_fetchsv - fun:Perl_sv_2cv - fun:Perl_pp_rv2cv - fun:Perl_runops_standard - fun:Perl_call_sv - fun:Perl_call_list - obj:/usr/lib64/perl5/CORE/libperl.so -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_savepv - fun:Perl_gv_init_pvn - fun:Perl_gv_fetchpvn_flags - fun:Perl_gv_fetchsv - fun:Perl_ck_rvconst - fun:Perl_newUNOP - fun:Perl_yylex - fun:Perl_yyparse - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_pp_require -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_savepv - fun:Perl_gv_init_pvn - fun:Perl_gv_fetchpvn_flags - fun:Perl_gv_fetchsv - fun:Perl_newATTRSUB_flags - fun:Perl_newATTRSUB - fun:Perl_yyparse - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_pp_require - fun:Perl_runops_standard -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:realloc - fun:Perl_safesysrealloc - fun:Perl_push_scope - fun:Perl_pp_entersub - fun:Perl_runops_standard - fun:Perl_call_sv - fun:Perl_call_list - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_newATTRSUB_flags - fun:Perl_newATTRSUB - fun:Perl_utilize - fun:Perl_yyparse -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:Perl_safesyscalloc - fun:Perl_gv_fetchpvn_flags - fun:Perl_yylex - fun:Perl_yyparse - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_pp_require - fun:Perl_runops_standard - fun:Perl_call_sv - fun:Perl_call_list - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_newATTRSUB_flags -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:Perl_newSVOP - fun:Perl_yylex - fun:Perl_yyparse - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_pp_require - fun:Perl_runops_standard - fun:Perl_call_sv - fun:Perl_call_list - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_newATTRSUB_flags - fun:Perl_newATTRSUB -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_init_stacks - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:Perl_safesyscalloc - fun:Perl_gv_fetchpvn_flags - fun:Perl_yylex - fun:Perl_yyparse - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_pp_require - fun:Perl_runops_standard - fun:Perl_call_sv - fun:Perl_call_list - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_newATTRSUB_flags -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_savepv - fun:Perl_newXS_len_flags - fun:Perl_newCONSTSUB_flags - fun:Perl_gv_init_pvn - fun:Perl_gv_fetchpvn_flags - fun:Perl_gv_fetchsv - fun:Perl_ck_rvconst - fun:Perl_newUNOP - fun:Perl_yylex - fun:Perl_yyparse -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:realloc - fun:Perl_safesysrealloc - fun:Perl_savestack_grow - fun:Perl_save_pushptrptr - fun:Perl_yylex - fun:Perl_yyparse - obj:/usr/lib64/perl5/CORE/libperl.so - fun:Perl_pp_require - fun:Perl_runops_standard - fun:Perl_call_sv - fun:Perl_call_list - obj:/usr/lib64/perl5/CORE/libperl.so -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_new_stackinfo - fun:Perl_init_stacks - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:malloc - fun:Perl_safesysmalloc - fun:Perl_reentrant_init - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} -{ - - Memcheck:Leak - match-leak-kinds: definite - fun:calloc - fun:Perl_safesyscalloc - fun:Perl_more_bodies - fun:Perl_sv_upgrade - fun:Perl_sv_setpv - fun:perl_construct - obj:/usr/bin/perl - fun:(below main) -} diff --git a/cmake/ctest/drivers/lightsaber/CMakeLists.txt b/cmake/ctest/drivers/lightsaber/CMakeLists.txt deleted file mode 100644 index d0561703f1ef..000000000000 --- a/cmake/ctest/drivers/lightsaber/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -TRILINOS_DRIVER_SETUP() - -TRILINOS_DRIVER_ADD_DASHBOARD( - SERIAL_RELEASE_DEFAULT - ctest_linux_experimental_mpi_release_avatar_lightsaber.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - ) - -TRILINOS_DRIVER_ADD_DASHBOARD( - SERIAL_RELEASE_FLOAT - ctest_linux_experimental_mpi_release_float_lightsaber.cmake - CTEST_INSTALLER_TYPE release - RUN_SERIAL - TIMEOUT_MINUTES 330 - - -TRILINOS_DRIVER_ADD_DASHBOARD( - SYCL_CPU_RELEASE - ctest_linux_experimental_mpi_release_sycl_cpu_lightsaber.cmake - CTEST_INSTALLER_TYPE release - TIMEOUT_MINUTES 330 - ) - - -TRILINOS_ADD_REQUIRED_CMAKE_INSTALLS() diff --git a/cmake/ctest/drivers/lightsaber/TrilinosCTestDriverCore.lightsaber.gcc.cmake b/cmake/ctest/drivers/lightsaber/TrilinosCTestDriverCore.lightsaber.gcc.cmake deleted file mode 100644 index 3c3ff4aa104a..000000000000 --- a/cmake/ctest/drivers/lightsaber/TrilinosCTestDriverCore.lightsaber.gcc.cmake +++ /dev/null @@ -1,138 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/../../TrilinosCTestDriverCore.cmake") - -# -# Platform/compiler specific options for rocketman using gcc -# - -MACRO(TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER) - SET(CTEST_DROP_SITE "sems-cdash-son.sandia.gov") - # Base of Trilinos/cmake/ctest then BUILD_DIR_NAME - - IF(COMM_TYPE STREQUAL MPI) - string(TOUPPER $ENV{LMOD_FAMILY_MPI} UC_MPI_NAME) - SET(BUILD_DIR_NAME ${UC_MPI_NAME}-$ENV{LMOD_FAMILY_MPI_VERSION}_${BUILD_TYPE}_${BUILD_NAME_DETAILS}) - ELSE() - SET(BUILD_DIR_NAME ${COMM_TYPE}-${BUILD_TYPE}_${BUILD_NAME_DETAILS}) - ENDIF() - - SET(Trilinos_REPOSITORY_LOCATION_NIGHTLY_DEFAULT "https://github.com/muelu/Trilinos.git") - - SET(CTEST_DASHBOARD_ROOT "${TRILINOS_CMAKE_DIR}/../../${BUILD_DIR_NAME}" ) - SET(CTEST_NOTES_FILES "${CTEST_SCRIPT_DIRECTORY}/${CTEST_SCRIPT_NAME}" ) - SET(CTEST_BUILD_FLAGS "-j14 -i" ) - - SET_DEFAULT(CTEST_PARALLEL_LEVEL "14" ) - SET_DEFAULT(Trilinos_ENABLE_SECONDARY_TESTED_CODE ON) - SET(Trilinos_CTEST_DO_ALL_AT_ONCE ON) - SET_DEFAULT(Trilinos_EXCLUDE_PACKAGES ${EXTRA_EXCLUDE_PACKAGES} TriKota Optika) - - SET(EXTRA_SYSTEM_CONFIGURE_OPTIONS - "-DBUILD_SHARED_LIBS=ON" - "-DCMAKE_BUILD_TYPE=${BUILD_TYPE}" - "-DCMAKE_CXX_STANDARD=17" - "-DCMAKE_VERBOSE_MAKEFILE=ON" - - - "-DTrilinos_ENABLE_Fortran=OFF" - - "-DSuperLU_INCLUDE_DIRS=$ENV{SUPERLU_INC}" - "-DSuperLU_LIBRARY_DIRS=$ENV{SUPERLU_LIB}" - - "-DBoost_INCLUDE_DIRS:STRING=$ENV{BOOST_INC}" - "-DBoost_LIBRARY_DIRS:STRING=$ENV{BOOST_LIB}" - "-DBoostLib_INCLUDE_DIRS:STRING=$ENV{BOOST_INC}" - "-DBoostLib_LIBRARY_DIRS:STRING=$ENV{BOOST_LIB}" - - "-DNetcdf_LIBRARY_DIRS:STRING=$ENV{NETCDF_C_LIB}" - "-DNetcdf_INCLUDE_DIRS:STRING=$ENV{NETCDF_C_INC}" - - ### PACKAGE CONFIGURATION ### - - ### MISC ### - "-DCMAKE_VERBOSE_MAKEFILE:BOOL=ON" - ) - SET_DEFAULT(COMPILER_VERSION "$ENV{LMOD_FAMILY_COMPILER}-$ENV{LMOD_FAMILY_COMPILER_VERSION}") - - - # Ensure that MPI is on for all parallel builds that might be run. - IF(COMM_TYPE STREQUAL MPI) - - SET(EXTRA_SYSTEM_CONFIGURE_OPTIONS - ${EXTRA_SYSTEM_CONFIGURE_OPTIONS} - "-DTPL_ENABLE_MPI=ON" - "-DMPI_BASE_DIR:PATH=$ENV{OPENMPI_ROOT}" - "-DMPI_EXEC_POST_NUMPROCS_FLAGS:STRING=--bind-to\\\;socket\\\;--map-by\\\;socket" - ) - - SET(CTEST_MEMORYCHECK_COMMAND_OPTIONS - "--gen-suppressions=all --error-limit=no --log-file=nightly_suppressions.txt" ${CTEST_MEMORYCHECK_COMMAND_OPTIONS} ) - - ELSE() - - SET( EXTRA_SYSTEM_CONFIGURE_OPTIONS - ${EXTRA_SYSTEM_CONFIGURE_OPTIONS} - "-DCMAKE_CXX_COMPILER=$ENV{GCC_ROOT}/bin/g++" - "-DCMAKE_C_COMPILER=$ENV{GCC_ROOT}/bin/gcc" - ) - - ENDIF() - - TRILINOS_CTEST_DRIVER() - -ENDMACRO() diff --git a/cmake/ctest/drivers/lightsaber/cron_driver.sh b/cmake/ctest/drivers/lightsaber/cron_driver.sh deleted file mode 100755 index 216c6855bf64..000000000000 --- a/cmake/ctest/drivers/lightsaber/cron_driver.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash - -echo -echo "Starting nightly Trilinos development testing on lightsaber: `date`" -echo - -# -# TrilinosDriver settings: -# - -export TDD_PARALLEL_LEVEL=2 - -# Trilinos settings: -# - -# Submission mode for the *TrilinosDriver* dashboard -export TDD_CTEST_TEST_TYPE=Nightly - - -# Machine specific environment -# - -export TDD_HTTP_PROXY="http://wwwproxy.sandia.gov:80" -export TDD_HTTPS_PROXY="https://wwwproxy.sandia.gov:80" -export http_proxy="http://wwwproxy.sandia.gov:80" -export https_proxy="https://wwwproxy.sandia.gov:80" -export TDD_FORCE_CMAKE_INSTALL=1 -export TDD_DEBUG_VERBOSE=1 - -. /etc/profile -source ~/.bashrc - - -# Machine independent cron_driver: -SCRIPT_DIR=`cd "\`dirname \"$0\"\`";pwd` - -# Trilinos source repo -export TRILINOS_SOURCE=$SCRIPT_DIR/../../../.. - -# folder with the machine specific build info -export BUILDS_DIR=$TRILINOS_SOURCE/cmake/ctest/drivers/$HOSTNAME - -# OneAPI -export MODULEPATH="$MODULEPATH":/opt/intel/oneapi/modulefiles:/opt/apps/modulefiles - -# If you update the list of modules, go to ~/code/trilinos-test/trilinos/ and -# do "git pull". Otherwise, the tests could fail on the first night, as we -# would first run old cron_driver.sh and only then pull - -# =========================================================================== -# GCC family -echo "GREP: *** GCC Family Tests ***" -export CTEST_CONFIGURATION="default" -module purge -module load sems-gcc/10.1.0 -module load sems-openmpi/4.0.5 -module load sems-cmake -module load sems-superlu/4.3 -module load sems-zlib -module load sems-boost -module load sems-hdf5 -module load sems-netcdf-c -module load sems-parallel-netcdf - -# Remove colors (-fdiagnostics-color) from OMPI flags -# It may result in non-XML characters on the Dashboard -#setenv OMPI_CFLAGS="`echo $OMPI_CFLAGS | sed 's/-fdiagnostics-color//'`" -#setenv OMPI_CXXFLAGS="`echo $OMPI_CXXFLAGS | sed 's/-fdiagnostics-color//'`" - -echo "Configuration = $CTEST_CONFIGURATION" -env - -export OMP_NUM_THREADS=2 - -# Update Avatar -(cd /home/nightlyTesting/avatar; git pull --rebase ) - -# Set variables to work aroun TriBITS problems -#setenv TDD_FORCE_CMAKE_INSTALL 0 -export TRIBITS_TDD_USE_SYSTEM_CTEST=1 - -# Actually run stuff -ctest -S $BUILDS_DIR/ctest_linux_experimental_mpi_release_avatar_lightsaber.cmake -ctest -S $BUILDS_DIR/ctest_linux_experimental_mpi_release_float_lightsaber.cmake - -module unload sems-parallel-netcdf -module unload sems-netcdf-c -module unload sems-hdf5 -module unload sems-boost -module unload sems-zlib -module unload sems-superlu -module unload sems-cmake -module unload sems-openmpi -module unload sems-gcc -# =========================================================================== - - -echo -echo "Ending nightly Trilinos development testing on lightsaber: `date`" -echo diff --git a/cmake/ctest/drivers/lightsaber/crontab b/cmake/ctest/drivers/lightsaber/crontab deleted file mode 100644 index 429e545d5557..000000000000 --- a/cmake/ctest/drivers/lightsaber/crontab +++ /dev/null @@ -1,31 +0,0 @@ -SHELL=/bin/bash -# * * * * * command to execute -# │ │ │ │ │ -# │ │ │ │ │ -# │ │ │ │ └───── day of week (0 - 6) (0 to 6 are Sunday to Saturday, or use names; 7 is Sunday, the same as 0) -# │ │ │ └────────── month (1 - 12) -# │ │ └─────────────── day of month (1 - 31) -# │ └──────────────────── hour (0 - 23) -# └───────────────────────── min (0 - 59) - -# MIN HOUR DAY MONTH - -# nightly tests - -0 22 * * * cd /home/nightlyTesting/Trilinos/cmake/ctest/drivers/lightsaber; git pull ; ./cron_driver.csh >&! /home/nightlyTesting/cron_driver.log; cd /home/nightlyTesting; /home/nightlyTesting/Trilinos/cmake/ctest/drivers/rocketman/sendTestSummary.sh cron_driver.log - -# -#@reboot /home/jhu/bin/send-reboot-email.sh - - -#description of fields -#minute This controls what minute of the hour the command will run on, -# and is between '0' and '59' -#hour This controls what hour the command will run on, and is specified in -# the 24 hour clock, values must be between 0 and 23 (0 is midnight) -#dom This is the Day of Month, that you want the command run on, e.g. to -# run a command on the 19th of each month, the dom would be 19. -#month This is the month a specified command will run on, it may be specified -# numerically (0-12), or as the name of the month (e.g. May) -#dow This is the Day of Week that you want a command to be run on, it can -# also be numeric (0-7) or as the name of the day (e.g. sun). diff --git a/cmake/ctest/drivers/lightsaber/ctest_linux_experimental_mpi_release_avatar_lightsaber.cmake b/cmake/ctest/drivers/lightsaber/ctest_linux_experimental_mpi_release_avatar_lightsaber.cmake deleted file mode 100644 index aa5e2da40295..000000000000 --- a/cmake/ctest/drivers/lightsaber/ctest_linux_experimental_mpi_release_avatar_lightsaber.cmake +++ /dev/null @@ -1,89 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.lightsaber.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS AvatarExternalRepo) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(Trilinos_TRACK Experimental) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_DO_MEMORY_TESTING FALSE) -SET(Trilinos_PACKAGES TrilinosCouplings MueLu AvatarT) - - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_COMPLEX:BOOL=OFF" - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTPL_ENABLE_SuperLU=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/lightsaber/ctest_linux_experimental_mpi_release_float_lightsaber.cmake b/cmake/ctest/drivers/lightsaber/ctest_linux_experimental_mpi_release_float_lightsaber.cmake deleted file mode 100644 index a5b555c21e6b..000000000000 --- a/cmake/ctest/drivers/lightsaber/ctest_linux_experimental_mpi_release_float_lightsaber.cmake +++ /dev/null @@ -1,113 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.lightsaber.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS Float) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(Trilinos_TRACK Experimental) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_DO_MEMORY_TESTING FALSE) -SET(Trilinos_PACKAGES MueLu Tpetra) - -# Disable the Epetra stack, Stratimikos & Thyra -set (Trilinos_ENABLE_Amesos OFF CACHE BOOL "We do not want Amesos" FORCE) -set (Trilinos_ENABLE_AztecOO OFF CACHE BOOL "We do not want AztecOO" FORCE) -set (Trilinos_ENABLE_Epetra OFF CACHE BOOL "We do not want Epetra" FORCE) -set (Trilinos_ENABLE_EpetraExt OFF CACHE BOOL "We do not want EpetraExt" FORCE) -set (Trilinos_ENABLE_Ifpack OFF CACHE BOOL "We do not want Ifpack" FORCE) -set (Trilinos_ENABLE_ML OFF CACHE BOOL "We do not want ML" FORCE) -set (Trilinos_ENABLE_Zoltan OFF CACHE BOOL "We do not want Zoltan" FORCE) - - - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_Epetra=OFF" - "-DTrilinos_ENABLE_EpetraExt=OFF" - "-DTrilinos_ENABLE_ML=OFF" - "-DTrilinos_ENABLE_COMPLEX:BOOL=OFF" - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTPL_ENABLE_SuperLU=ON" - "-DTeuchos_ENABLE_FLOAT=ON" - "-DTeuchos_ENABLE_COMPLEX=OFF" - "-DTpetra_INST_INT_INT=OFF" - "-DTpetra_INST_INT_LONG_LONG=ON" - "-DTpetra_INST_FLOAT=ON" - "-DTpetra_INST_COMPLEX_FLOAT=OFF" - "-DTpetra_INST_DOUBLE=ON" - "-DTrilinos_ENABLE_Stratimikos=ON" - "-DStratimikos_ENABLE_TESTS=ON" - "-DStratimikos_ENABLE_EXAMPLES=ON" - "-DTrilinos_ENABLE_Thyra=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/lightsaber/sendTestSummary.sh b/cmake/ctest/drivers/lightsaber/sendTestSummary.sh deleted file mode 100755 index b48f437bb431..000000000000 --- a/cmake/ctest/drivers/lightsaber/sendTestSummary.sh +++ /dev/null @@ -1,327 +0,0 @@ -#!/bin/sh - -# Parse command line options. -DEBUGMODE=0 -USAGE="sendTestSummary.sh [-d] " -while getopts d OPT; do - case "$OPT" in - d) - # debug mode, send email summary to me only - DEBUGMODE=1 - ;; - \?) - # getopts issues an error message - echo ${USAGE} - echo - exit 1 - ;; - esac -done - -# Remove the options we parsed above. -shift `expr $OPTIND - 1` -# The logfile is required. Error out if it's not provided. -if [ $# -eq 0 ]; then - echo $USAGE >&2 - exit 1 -fi -### end parsing ### - -######################################################################### -# Variables you might want to modify. -######################################################################### - -#Perl script to produce prettified HTML -HTMLPERLSCRIPT="/home/nightlyTesting/Trilinos/packages/muelu/utils/misc/drakify-email.pl" -#root of cdash testing directory -TESTLOCATION="/home/nightlyTesting" -LOGBACKUPDIRECTORY="/home/nightlyTesting/logs" - -#packages to be summarized -PATTERN="(Avatar|Xpetra|MueLu|TrilinosCouplings)" - -#variables to be passed to the perl script -MACHINENAME=`hostname -s` -USER=`whoami` - -#who gets the email summary -if [[ $DEBUGMODE == 1 ]]; then - RECIPIENTS=( - "${USER}@sandia.gov" - ) -else - RECIPIENTS=( - "muelu-regression@software.sandia.gov" - ) -fi -#suffix for all the log files -timeStamp="$(date +%F_%R)" - -#cron driver log file -INFILE=$1 -#root of file to be emailed. The correct suffix must be appended whenever you use this. -OUTFILE="test-summary-${timeStamp}" -#MAILCOMMAND="/usr/sbin/sendmail" -MAILCOMMAND="/bin/mail" -######################################################################### - -cd ${TESTLOCATION} - -backupFile="cron_driver.log.$timeStamp" -cp cron_driver.log $backupFile - -testStartString=`egrep "Starting nightly Trilinos development" cron_driver.log` -testStartDate=`echo $testStartString | sed "s/:/#/" | cut -f 2 -d#` -ttt=`echo $testStartString | cut -f 1 -d:` -testMachine=${ttt##* } -testEndString=`egrep "Ending nightly Trilinos development" cron_driver.log` -testEndDate=`echo $testEndString | sed "s/:/#/" | cut -f 2 -d#` - -awk -v packagesToMatch="$PATTERN" -v summaryFile="${OUTFILE}.txt" -v machine="$testMachine" -v startTime="$testStartDate" -v endTime="$testEndDate" ' - -################################################### -# Commands to run before the file is processed -################################################### -BEGIN { - print "Machine : " machine > summaryFile - print "Start time : " startTime > summaryFile - print "End time : " endTime > summaryFile - testctr=0 - gitUpdateFailed=0 - dashboardErrors=0 -} - -################################################### -# Commands to run while processing the file -################################################### -{ - - if ($0 ~ "Update command failed") - { - gitUpdateFailed=1 - } - - if ($0 ~ "^test [0-9]*$") - { - #start of test found, e.g., test 4 - FOUND=2 - testNum=$0 - sub(/test /,"",testNum) - testNum=testNum":" - #\x27 is hex code for single quote - packageLibBuild=testNum" Building target: \x27" packagesToMatch "_libs" - packageTestBuild=testNum" Build ALL target for \x27" packagesToMatch "\x27" - runTestPattern=testNum" Running test for package \x27" packagesToMatch "\x27" - next #skip any more processing, go on to next line - } - - if (FOUND==2) - { - FOUND-- - dashboardName=$0 - sub(/Start [ ]*[0-9]*: /,"",dashboardName) - dashboardName=RemoveWhiteSpace(dashboardName) - listOfDashboardNames[testctr] = dashboardName - testctr++ - dashBoardPattern="Test [ ]*#[0-9]*: " dashboardName - } - -# Record the "track" for this dashboard, which could be "Nightly", "Experimental", or "Specialized" - if (FOUND && $0 ~ "-- Trilinos_TRACK=") - { - thisLine=$0 - sub(/^[0-9]*: -- Trilinos_TRACK=\x27/,"",thisLine) - sub(/\x27/,"",thisLine) - if (length(thisLine) > 0) { - dashboardTrack[dashboardName] = thisLine - trackTypes[thisLine]++ - } - } - - if (FOUND && $0 ~ dashBoardPattern) - { - thisLine=$0 - thisLine=RemoveWhiteSpace(thisLine) - if (dashboardErrors == 0) - dashBoardSummary[dashboardName] = "passed" - else - dashBoardSummary[dashboardName] = "FAILED" - match(thisLine,"[0-9]*\\.[0-9]* sec$") - timeSummary[dashboardName] = substr(thisLine,RSTART,RLENGTH) - #done with this dashboard, reset error flag - dashboardErrors=0 - } - - # library build - if (FOUND && $0 ~ packageLibBuild) - { - getCompilerSummary=2 - thisLine = $0 - pat = "\x27" packagesToMatch "_libs\x27" - match(thisLine,pat) - currentPackage = substr(thisLine,RSTART+1,RLENGTH-2) - listOfPackages[currentPackage] = currentPackage - } - - # tests build - if (FOUND && $0 ~ packageTestBuild) - { - getCompilerSummary=2 - thisLine = $0 - pat = "\x27" packagesToMatch "\x27" - match(thisLine,pat) - currentPackage = substr(thisLine,RSTART+1,RLENGTH-2) - listOfPackages[currentPackage] = currentPackage - } - - if (getCompilerSummary>0 && $0 ~ " Compiler errors") - { - thisLine=$0 - sub(testNum,"",thisLine) - pat="[0-9]*" - thisLine=RemoveWhiteSpace(thisLine) - - match(thisLine,pat) - numErrors = substr(thisLine,RSTART,RLENGTH) - errorSummary[dashboardName,currentPackage] = numErrors - - getCompilerSummary-- - } - - if (getCompilerSummary>0 && $0 ~ " Compiler warnings") - { - thisLine=$0 - sub(testNum,"",thisLine) - thisLine=RemoveWhiteSpace(thisLine) - pat="[0-9]*" - match(thisLine,pat) - numWarnings = substr(thisLine,RSTART,RLENGTH); - warningSummary[dashboardName,currentPackage] = numWarnings - getCompilerSummary-- - } - - #Look for pattern indicating that the tests of interest have in fact run. - if (FOUND && match($0,runTestPattern)) - { - packageTested = substr($0,RSTART,RLENGTH); - sub(testNum,"",packageTested) - packageTested=RemoveWhiteSpace(packageTested) - getTestSummary=1 - } - - if (getTestSummary && $0 ~ "No tests were found!!!") - { - getTestSummary=0 - } - - #Calculate the number of failing, passing, and total tests. - if (getTestSummary && $0 ~ "tests failed out of") - { - thisLine=$0 - sub(testNum,"",thisLine) - thisLine=RemoveWhiteSpace(thisLine) - getTestSummary=0 - pat = "[0-9]* tests failed out of [0-9]*" - match(thisLine,pat) - ttt = substr(thisLine,RSTART,RLENGTH); - pat = "^[0-9]*" - match(ttt,pat) - numFailed = substr(ttt,RSTART,RLENGTH); - pat = "[0-9]*$" - match(ttt,pat) - numTotal = substr(ttt,RSTART,RLENGTH); - failSummary[dashboardName,currentPackage] = numFailed - passSummary[dashboardName,currentPackage] = numTotal+0-numFailed - totalSummary[dashboardName,currentPackage] = numTotal - if (numFailed != 0) - dashboardErrors=1 - } -} - -################################################### -# helper functions -################################################### -function RemoveWhiteSpace(theString) -{ - sub(/^[ ]*/,"",theString); sub(/[ ]*$/,"",theString); - return (theString) -} - -################################################### -# Commands to run after the file is processed -################################################### -END { - - if (gitUpdateFailed == 1) { - print "\n *** git update FAILED ***\n" > summaryFile - } - - # do some nice formatting - numPlusses=73 - thePluses=" " - while (jj++ summaryFile - for (track in trackTypes) { - printf("%s\n",thePluses) > summaryFile - trackNameLength = length(track) - numPlussesToTheRight = numPlusses - trackNameLength - 4 - plussesToTheRight="" - kk = 0 - while (kk++ summaryFile - printf("%s\n",thePluses) > summaryFile - for (i in listOfDashboardNames) { - db=listOfDashboardNames[i] - if (dashboardTrack[db] == track) - printf(" %61-s ... %s\n",db,dashBoardSummary[db]) > summaryFile; - } - } - printf("-----------------------------------------------------------------------------\n\n") > summaryFile - - for (i in listOfDashboardNames) { - db=listOfDashboardNames[i] - spaces=" " - printf("%55-s\n%s%8-s, %5.1f seconds\n",db, spaces, dashBoardSummary[db], timeSummary[db]) > summaryFile; - for (k in listOfPackages) { - pat = "_lib" - if (match(k,pat)) isLib = 1; - else isLib = 0; - if ((db,k) in warningSummary) nwarn = warningSummary[db,k] - else nwarn = "-"; - if ((db,k) in errorSummary) nerr = errorSummary[db,k] - else nerr = "-"; - if ((db,k) in failSummary) nfail = failSummary[db,k] - else nfail = "-"; - if ((db,k) in passSummary) npass = passSummary[db,k] - else npass = "-"; - if ((db,k) in totalSummary) ntotal = totalSummary[db,k] - else ntotal = "-"; - if (isLib) { - summaryString = sprintf("%15s | %3d warnings | %3d errors",k,nwarn,nerr); - } - else { - summaryString = sprintf("%15s | %3d warnings | %3d errors | %d/%d passed",k,nwarn,nerr,npass,ntotal); - } - print spaces summaryString > summaryFile - } - } - -} -' $INFILE - -date2=`echo $(date) | sed "s/ /_/g"` -cdashDate="$(date +%F)" -cat ${OUTFILE}.txt | perl ${HTMLPERLSCRIPT} ${date2} ${cdashDate} ${MACHINENAME} ${USER} > ${OUTFILE}.html - -${MAILCOMMAND} -it < /dev/null 2>&1; cd /home/nightlyTesting/trilinos/cmake/ctest/drivers/rocketman; ./cron_driver_tpetra.sh &> /home/nightlyTesting/cron_driver_tpetra.log; echo -e "Testing results for Tpetra with deprecated code disabled\nhttps://testing-dev.sandia.gov/cdash/index.php?project=Trilinos&filtercount=2&showfilters=1&filtercombine=and&field1=site&compare1=63&value1=rocketman&field2=buildname&compare2=63&value2=DEPRECATED" | mail -s "$(date) Tpetra testing is done on rocketman" tpetra-developers@software.sandia.gov - -# MueLu & Tpetra testing - -#0 22 * * * cd /home/nightlyTesting/trilinos/cmake/ctest/drivers/rocketman; eval `python ./setSshEnv.py`; cd /home/nightlyTesting/trilinos; git pull > /dev/null 2>&1; cd /home/nightlyTesting/trilinos/cmake/ctest/drivers/rocketman; ./cron_driver.sh &> /home/nightlyTesting/cron_driver.log; cd /home/nightlyTesting; /home/nightlyTesting/trilinos/cmake/ctest/drivers/rocketman/sendTestSummary.sh cron_driver.log; cd /home/nightlyTesting/trilinos/cmake/ctest/drivers/rocketman; ./cron_driver_tpetra.sh &> /home/nightlyTesting/cron_driver_tpetra.log; echo -e "Testing results for Tpetra with deprecated code disabled\nhttps://testing-dev.sandia.gov/cdash/index.php?project=Trilinos&filtercount=2&showfilters=1&filtercombine=and&field1=site&compare1=63&value1=rocketman&field2=buildname&compare2=63&value2=DEPRECATED" | mail -s "$(date) Tpetra testing is done on rocketman" tpetra-developers@software.sandia.gov - -0 22 * * * cd /data/testing/trilinos/cmake/ctest/drivers/rocketman; eval `python ./setSshEnv.py`; cd /data/testing/trilinos; git pull > /dev/null 2>&1; cd /data/testing/trilinos/cmake/ctest/drivers/rocketman; ./cron_driver.sh &> /data/testing/cron_driver.log; cd /data/testing; /data/testing/trilinos/cmake/ctest/drivers/rocketman/sendTestSummary.sh cron_driver.log; - -# -@reboot /home/jhu/bin/send-reboot-email.sh - - -#description of fields -#minute This controls what minute of the hour the command will run on, -# and is between '0' and '59' -#hour This controls what hour the command will run on, and is specified in -# the 24 hour clock, values must be between 0 and 23 (0 is midnight) -#dom This is the Day of Month, that you want the command run on, e.g. to -# run a command on the 19th of each month, the dom would be 19. -#month This is the month a specified command will run on, it may be specified -# numerically (0-12), or as the name of the month (e.g. May) -#dow This is the Day of Week that you want a command to be run on, it can -# also be numeric (0-7) or as the name of the day (e.g. sun). diff --git a/cmake/ctest/drivers/rocketman/ctest_linux_experimental_mpi_release_avatar_rocketman.cmake b/cmake/ctest/drivers/rocketman/ctest_linux_experimental_mpi_release_avatar_rocketman.cmake deleted file mode 100644 index a983d34a6f7d..000000000000 --- a/cmake/ctest/drivers/rocketman/ctest_linux_experimental_mpi_release_avatar_rocketman.cmake +++ /dev/null @@ -1,107 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.rocketman.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS AVATAR) -SET(CTEST_BUILD_FLAGS "-j35 -i" ) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(Trilinos_TRACK Experimental) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_DO_MEMORY_TESTING FALSE) - -SET(Trilinos_PACKAGES TrilinosCouplings MueLu ) -SET(Trilinos_EXCLUDE_PACKAGES Epetra Domi PyTrilinos Moertel) - -# If true, this option yields faster builds. In that case, however, it won't disable any upstream package that fails to compile. -SET(Trilinos_CTEST_DO_ALL_AT_ONCE TRUE) - -# GH 20230111: changing Rocketman to pull from the sandialabs Avatar repo, https://github.com/sandialabs/avatar.git -# GH : since Avatar depends on gsl version 1.16, pull the gsl source from https://ftp.gnu.org/gnu/gsl/gsl-1.16.tar.gz -# GH : it doesn't seem like Avatar actually need gsl, despite listing it as required in the cmake configure script... - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTPL_ENABLE_SuperLU=ON" - "-DTPL_ENABLE_Netcdf=ON" - "-DTPL_ENABLE_HDF5=ON" - "-DTPL_ENABLE_Avatar=ON" - "-DTPL_ENABLE_Matio=OFF" - "-DTPL_ENABLE_Boost=ON" - "-DTPL_ENABLE_BoostLib=ON" - "-DTPL_ENABLE_X11=OFF" - "-DAvatar_INCLUDE_DIRS=/home/gbharpe/Programming/cpp/avatar/avatar-source/src" - "-DAvatar_LIBRARY_DIRS=/home/gbharpe/Programming/cpp/avatar/avatar-build/src" - "-DTPL_Avatar_LIBRARIES=/home/gbharpe/Programming/cpp/avatar/avatar-build/src/libavatar.a" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/rocketman/ctest_linux_experimental_mpi_release_tpetra_performance_rocketman.cmake b/cmake/ctest/drivers/rocketman/ctest_linux_experimental_mpi_release_tpetra_performance_rocketman.cmake deleted file mode 100644 index 9520f1ef6aa8..000000000000 --- a/cmake/ctest/drivers/rocketman/ctest_linux_experimental_mpi_release_tpetra_performance_rocketman.cmake +++ /dev/null @@ -1,99 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.rocketman.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS PERFORMANCE) -SET(CTEST_BUILD_FLAGS "-j35 -i" ) - -SET(CTEST_PARALLEL_LEVEL 36) -SET(CTEST_TEST_TYPE Experimental) -SET(Trilinos_TRACK Experimental) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_DO_MEMORY_TESTING FALSE) -SET(Trilinos_ENABLE_ALL_FORWARD_DEP_PACKAGES FALSE) -SET(Trilinos_DISABLE_ENABLED_FORWARD_DEP_PACKAGES TRUE) -SET(Trilinos_PACKAGES "Tpetra;Galeri;MueLu") -SET(Trilinos_EXCLUDE_PACKAGES "Sacado;RTOp;Stratimikos;Shards;TrilinosSS;Epetra") - -# If true, this option yields faster builds. In that case, however, it won't disable any upstream package that fails to compile. -SET(Trilinos_CTEST_DO_ALL_AT_ONCE TRUE) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTrilinos_TEST_CATEGORIES:STRING=PERFORMANCE" - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DXpetra_ENABLE_Experimental:BOOL=ON" - "-DMueLu_ENABLE_Experimental:BOOL=ON" - "-DMueLu_ENABLE_TESTS=ON" - "-DMPI_EXEC_MAX_NUMPROCS=28" - "-DMPI_EXEC_DEFAULT_NUMPROCS=28" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/rocketman/ctest_linux_nightly_mpi_release_muelu_rocketman.cmake b/cmake/ctest/drivers/rocketman/ctest_linux_nightly_mpi_release_muelu_rocketman.cmake deleted file mode 100644 index f15ef6d4ae93..000000000000 --- a/cmake/ctest/drivers/rocketman/ctest_linux_nightly_mpi_release_muelu_rocketman.cmake +++ /dev/null @@ -1,93 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.rocketman.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS DEFAULT) -SET(CTEST_BUILD_FLAGS "-j35 -i" ) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Nightly) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_DO_MEMORY_TESTING FALSE) - -SET(Trilinos_PACKAGES MueLu Xpetra Amesos2) -SET(Trilinos_EXCLUDE_PACKAGES Epetra Domi PyTrilinos Moertel) - -# If true, this option yields faster builds. In that case, however, it won't disable any upstream package that fails to compile. -SET(Trilinos_CTEST_DO_ALL_AT_ONCE TRUE) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" - "-DTPL_ENABLE_SuperLU=ON" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/rocketman/ctest_linux_nightly_mpi_release_tpetra_rocketman.cmake b/cmake/ctest/drivers/rocketman/ctest_linux_nightly_mpi_release_tpetra_rocketman.cmake deleted file mode 100644 index 9d58436ad0de..000000000000 --- a/cmake/ctest/drivers/rocketman/ctest_linux_nightly_mpi_release_tpetra_rocketman.cmake +++ /dev/null @@ -1,92 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.rocketman.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS TPETRA_DEPRECATED_CODE_OFF) -SET(CTEST_BUILD_FLAGS "-j35 -i" ) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Nightly) -SET(Trilinos_TRACK Nightly) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_DO_MEMORY_TESTING FALSE) - -SET(Trilinos_PACKAGES Tpetra) - -# If true, this option yields faster builds. In that case, however, it won't disable any upstream package that fails to compile. -SET(Trilinos_CTEST_DO_ALL_AT_ONCE TRUE) - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTpetra_ENABLE_DEPRECATED_CODE=OFF" - "-DKOKKOS_ENABLE_DEPRECATED_CODE=OFF" - "-DTrilinos_ENABLE_DEPENDENCY_UNIT_TESTS=OFF" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/rocketman/mpi_release_tpetra_deprecated_code_off_downstream_enabled.cmake b/cmake/ctest/drivers/rocketman/mpi_release_tpetra_deprecated_code_off_downstream_enabled.cmake deleted file mode 100644 index 0b63f28f95ef..000000000000 --- a/cmake/ctest/drivers/rocketman/mpi_release_tpetra_deprecated_code_off_downstream_enabled.cmake +++ /dev/null @@ -1,104 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.rocketman.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS TPETRA_DEPRECATED_CODE_OFF_ENABLE_DOWNSTREAM) -SET(CTEST_BUILD_FLAGS "-j20 -i" ) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(Trilinos_TRACK Experimental) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_DO_MEMORY_TESTING FALSE) - -SET(Trilinos_PACKAGES Tpetra) -SET(Trilinos_EXCLUDE_PACKAGES Epetra Domi PyTrilinos Moertel) -SET(Trilinos_ENABLE_ALL_FORWARD_DEP_PACKAGES ON) - -# If true, this option yields faster builds. In that case, however, it won't disable any upstream package that fails to compile. -SET(Trilinos_CTEST_DO_ALL_AT_ONCE TRUE) - -# Because Trilinos_CTEST_DO_ALL_AT_ONCE is set to OFF above, -# the packages in Trilinos_EXCLUDE_PACKAGES above must also be disabled explicitly in EXTRA_CONFIGURE_OPTIONS -# below. - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTpetra_ENABLE_DEPRECATED_CODE=OFF" - "-DKOKKOS_ENABLE_DEPRECATED_CODE=OFF" - "-DTPL_ENABLE_Matio=OFF" - "-DTPL_ENABLE_X11=OFF" - "-DTPL_ENABLE_BoostLib=ON" - "-DTrilinos_ENABLE_Domi:BOOL=OFF" - "-DTrilinos_ENABLE_PyTrilinos:BOOL=OFF" - "-DTrilinos_ENABLE_Moertel:BOOL=OFF" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/rocketman/mpi_release_tpetra_deprecated_code_off_downstream_enabled_GO_int.cmake b/cmake/ctest/drivers/rocketman/mpi_release_tpetra_deprecated_code_off_downstream_enabled_GO_int.cmake deleted file mode 100644 index 780f1f47031f..000000000000 --- a/cmake/ctest/drivers/rocketman/mpi_release_tpetra_deprecated_code_off_downstream_enabled_GO_int.cmake +++ /dev/null @@ -1,105 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.rocketman.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS TPETRA_DEPRECATED_CODE_OFF_ENABLE_DOWNSTREAM_GO_INT) -SET(CTEST_BUILD_FLAGS "-j20 -i" ) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(Trilinos_TRACK Experimental) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_DO_MEMORY_TESTING FALSE) - -SET(Trilinos_PACKAGES Tpetra) -SET(Trilinos_EXCLUDE_PACKAGES Epetra Domi PyTrilinos Moertel) -SET(Trilinos_ENABLE_ALL_FORWARD_DEP_PACKAGES ON) - -# If true, this option yields faster builds. In that case, however, it won't disable any upstream package that fails to compile. -SET(Trilinos_CTEST_DO_ALL_AT_ONCE TRUE) - -# Because Trilinos_CTEST_DO_ALL_AT_ONCE is set to OFF above, -# the packages in Trilinos_EXCLUDE_PACKAGES above must also be disabled explicitly in EXTRA_CONFIGURE_OPTIONS -# below. - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTpetra_INST_INT_INT=ON" - "-DTpetra_ENABLE_DEPRECATED_CODE=OFF" - "-DKOKKOS_ENABLE_DEPRECATED_CODE=OFF" - "-DTPL_ENABLE_Matio=OFF" - "-DTPL_ENABLE_X11=OFF" - "-DTPL_ENABLE_BoostLib=ON" - "-DTrilinos_ENABLE_Domi:BOOL=OFF" - "-DTrilinos_ENABLE_PyTrilinos:BOOL=OFF" - "-DTrilinos_ENABLE_Moertel:BOOL=OFF" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/rocketman/mpi_release_tpetra_deprecated_code_off_downstream_enabled_no_epetra.cmake b/cmake/ctest/drivers/rocketman/mpi_release_tpetra_deprecated_code_off_downstream_enabled_no_epetra.cmake deleted file mode 100644 index e2493a754ce9..000000000000 --- a/cmake/ctest/drivers/rocketman/mpi_release_tpetra_deprecated_code_off_downstream_enabled_no_epetra.cmake +++ /dev/null @@ -1,105 +0,0 @@ -# @HEADER -# ************************************************************************ -# -# Trilinos: An Object-Oriented Solver Framework -# Copyright (2001) Sandia Corporation -# -# -# Copyright (2001) Sandia Corporation. Under the terms of Contract -# DE-AC04-94AL85000, there is a non-exclusive license for use of this -# work by or on behalf of the U.S. Government. Export of this program -# may require a license from the United States Government. -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# NOTICE: The United States Government is granted for itself and others -# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide -# license in this data to reproduce, prepare derivative works, and -# perform publicly and display publicly. Beginning five (5) years from -# July 25, 2001, the United States Government is granted for itself and -# others acting on its behalf a paid-up, nonexclusive, irrevocable -# worldwide license in this data to reproduce, prepare derivative works, -# distribute copies to the public, perform publicly and display -# publicly, and to permit others to do so. -# -# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT -# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES -# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR -# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY -# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS -# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. -# -# ************************************************************************ -# @HEADER - - -INCLUDE("${CTEST_SCRIPT_DIRECTORY}/TrilinosCTestDriverCore.rocketman.gcc.cmake") - -# -# Set the options specific to this build case -# - -# The variable BUILD_DIR_NAME is based COMM_TYPE, BUILD_TYPE, and BUILD_NAME_DETAILS. -# Tribits creates the variable listed under "Build Name" by prepending the OS type and compiler -# details to BUILD_DIR_NAME. -SET(COMM_TYPE MPI) -SET(BUILD_TYPE RELEASE) -SET(BUILD_NAME_DETAILS TPETRA_DEPRECATED_CODE_OFF_ENABLE_DOWNSTREAM_NO_EPETRA) -SET(CTEST_BUILD_FLAGS "-j20 -i" ) - -SET(CTEST_PARALLEL_LEVEL 8) -SET(CTEST_TEST_TYPE Experimental) -SET(Trilinos_TRACK Experimental) # Set the CDash track to Nightly -SET(CTEST_TEST_TIMEOUT 14400) # twice the default value, for valgrind -SET(CTEST_DO_MEMORY_TESTING FALSE) - -SET(Trilinos_PACKAGES Tpetra) -SET(Trilinos_EXCLUDE_PACKAGES Epetra Domi PyTrilinos Moertel) -SET(Trilinos_ENABLE_ALL_FORWARD_DEP_PACKAGES ON) - -# If true, this option yields faster builds. In that case, however, it won't disable any upstream package that fails to compile. -SET(Trilinos_CTEST_DO_ALL_AT_ONCE TRUE) - -# Because Trilinos_CTEST_DO_ALL_AT_ONCE is set to OFF above, -# the packages in Trilinos_EXCLUDE_PACKAGES above must also be disabled explicitly in EXTRA_CONFIGURE_OPTIONS -# below. - -SET(EXTRA_CONFIGURE_OPTIONS - "-DTrilinos_ENABLE_EXPLICIT_INSTANTIATION=ON" - "-DTpetra_ENABLE_DEPRECATED_CODE=OFF" - "-DKOKKOS_ENABLE_DEPRECATED_CODE=OFF" - "-DTPL_ENABLE_Matio=OFF" - "-DTPL_ENABLE_X11=OFF" - "-DTPL_ENABLE_BoostLib=ON" - "-DTrilinos_ENABLE_Epetra:BOOL=OFF" - "-DTrilinos_ENABLE_Domi:BOOL=OFF" - "-DTrilinos_ENABLE_PyTrilinos:BOOL=OFF" - "-DTrilinos_ENABLE_Moertel:BOOL=OFF" -) - -# -# Set the rest of the system-specific options and run the dashboard build/test -# - -TRILINOS_SYSTEM_SPECIFIC_CTEST_DRIVER() diff --git a/cmake/ctest/drivers/rocketman/muelu-gcc.lua b/cmake/ctest/drivers/rocketman/muelu-gcc.lua deleted file mode 100644 index 9e1a19e1c3a4..000000000000 --- a/cmake/ctest/drivers/rocketman/muelu-gcc.lua +++ /dev/null @@ -1,28 +0,0 @@ -load("sems-gcc/8.3.0") -load("sems-openmpi") -load("sems-cmake") -load("sems-ninja") -load("sems-git") - -load("sems-superlu") - -load("sems-yaml-cpp") -load("sems-hdf5") -load("sems-netcdf-c") -load("sems-parallel-netcdf") -load("sems-zlib") - -load("sems-boost") -load("sems-python") - -load("sems-metis") -load("sems-parmetis") - -load("sems-cuda") - -pushenv("OMP_NUM_THREADS","2") -pushenv("CUDA_LAUNCH_BLOCKING","1") -pushenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC","1") - --- Only run on the Tesla K40, not the Quadro -- -pushenv("CUDA_VISIBLE_DEVICES","0") \ No newline at end of file diff --git a/cmake/ctest/drivers/rocketman/sendTestSummary.sh b/cmake/ctest/drivers/rocketman/sendTestSummary.sh deleted file mode 100755 index 05844be2b7c7..000000000000 --- a/cmake/ctest/drivers/rocketman/sendTestSummary.sh +++ /dev/null @@ -1,338 +0,0 @@ -#!/bin/sh - -# Parse command line options. -DEBUGMODE=0 -USAGE="sendTestSummary.sh [-d] " -while getopts dp:r: OPT; do - case "$OPT" in - d) - # debug mode, send email summary to me only - DEBUGMODE=1 - ;; - p) - # pattern - PATTERN=$OPTARG - ;; - r) - # recipients - RECIPIENTS=$OPTARG - ;; - \?) - # getopts issues an error message - echo ${USAGE} - echo - exit 1 - ;; - esac -done - -# Remove the options we parsed above. -shift `expr $OPTIND - 1` -# The logfile is required. Error out if it's not provided. -if [ $# -eq 0 ]; then - echo $USAGE >&2 - exit 1 -fi -### end parsing ### - -######################################################################### -# Variables you might want to modify. -######################################################################### - -#Perl script to produce prettified HTML -HTMLPERLSCRIPT="/home/nightlyTesting/trilinos/packages/muelu/utils/misc/drakify-email.pl" -#root of cdash testing directory -TESTLOCATION="/home/nightlyTesting" -LOGBACKUPDIRECTORY="/home/nightlyTesting/logs" - -#packages to be summarized -if [[ -z $PATTERN ]]; then - PATTERN="(Xpetra|MueLu)" -fi - -#variables to be passed to the perl script -MACHINENAME=`hostname -s` -USER=`whoami` - -#who gets the email summary -if [[ $DEBUGMODE == 1 ]]; then - RECIPIENTS=( - "${USER}@sandia.gov" - ) -else - if [[ -z $RECIPIENTS ]]; then - RECIPIENTS=( - "muelu-regression@software.sandia.gov" - ) - fi -fi -#suffix for all the log files -timeStamp="$(date +%F_%R)" - -#cron driver log file -INFILE=$1 -#root of file to be emailed. The correct suffix must be appended whenever you use this. -OUTFILE="test-summary-${timeStamp}" -MAILCOMMAND="/usr/sbin/sendmail" -######################################################################### - -cd ${TESTLOCATION} - -backupFile="cron_driver.log.$timeStamp" -cp cron_driver.log $backupFile - -testStartString=`egrep "Starting nightly Trilinos development" cron_driver.log` -testStartDate=`echo $testStartString | sed "s/:/#/" | cut -f 2 -d#` -ttt=`echo $testStartString | cut -f 1 -d:` -testMachine=${ttt##* } -testEndString=`egrep "Ending nightly Trilinos development" cron_driver.log` -testEndDate=`echo $testEndString | sed "s/:/#/" | cut -f 2 -d#` - -awk -v packagesToMatch="$PATTERN" -v summaryFile="${OUTFILE}.txt" -v machine="$testMachine" -v startTime="$testStartDate" -v endTime="$testEndDate" ' - -################################################### -# Commands to run before the file is processed -################################################### -BEGIN { - print "Machine : " machine > summaryFile - print "Start time : " startTime > summaryFile - print "End time : " endTime > summaryFile - testctr=0 - gitUpdateFailed=0 - dashboardErrors=0 -} - -################################################### -# Commands to run while processing the file -################################################### -{ - - if ($0 ~ "Update command failed") - { - gitUpdateFailed=1 - } - - if ($0 ~ "^test [0-9]*$") - { - #start of test found, e.g., test 4 - FOUND=2 - testNum=$0 - sub(/test /,"",testNum) - testNum=testNum":" - #\x27 is hex code for single quote - packageLibBuild=testNum" Building target: \x27" packagesToMatch "_libs" - packageTestBuild=testNum" Build ALL target for \x27" packagesToMatch "\x27" - runTestPattern=testNum" Running test for package \x27" packagesToMatch "\x27" - next #skip any more processing, go on to next line - } - - if (FOUND==2) - { - FOUND-- - dashboardName=$0 - sub(/Start [ ]*[0-9]*: /,"",dashboardName) - dashboardName=RemoveWhiteSpace(dashboardName) - listOfDashboardNames[testctr] = dashboardName - testctr++ - dashBoardPattern="Test [ ]*#[0-9]*: " dashboardName - } - -# Record the "track" for this dashboard, which could be "Nightly", "Experimental", or "Specialized" - if (FOUND && $0 ~ "-- Trilinos_TRACK=") - { - thisLine=$0 - sub(/^[0-9]*: -- Trilinos_TRACK=\x27/,"",thisLine) - sub(/\x27/,"",thisLine) - if (length(thisLine) > 0) { - dashboardTrack[dashboardName] = thisLine - trackTypes[thisLine]++ - } - } - - if (FOUND && $0 ~ dashBoardPattern) - { - thisLine=$0 - thisLine=RemoveWhiteSpace(thisLine) - if (dashboardErrors == 0) - dashBoardSummary[dashboardName] = "passed" - else - dashBoardSummary[dashboardName] = "FAILED" - match(thisLine,"[0-9]*\\.[0-9]* sec$") - timeSummary[dashboardName] = substr(thisLine,RSTART,RLENGTH) - #done with this dashboard, reset error flag - dashboardErrors=0 - } - - # library build - if (FOUND && $0 ~ packageLibBuild) - { - getCompilerSummary=2 - thisLine = $0 - pat = "\x27" packagesToMatch "_libs\x27" - match(thisLine,pat) - currentPackage = substr(thisLine,RSTART+1,RLENGTH-2) - listOfPackages[currentPackage] = currentPackage - } - - # tests build - if (FOUND && $0 ~ packageTestBuild) - { - getCompilerSummary=2 - thisLine = $0 - pat = "\x27" packagesToMatch "\x27" - match(thisLine,pat) - currentPackage = substr(thisLine,RSTART+1,RLENGTH-2) - listOfPackages[currentPackage] = currentPackage - } - - if (getCompilerSummary>0 && $0 ~ " Compiler errors") - { - thisLine=$0 - sub(testNum,"",thisLine) - pat="[0-9]*" - thisLine=RemoveWhiteSpace(thisLine) - - match(thisLine,pat) - numErrors = substr(thisLine,RSTART,RLENGTH) - errorSummary[dashboardName,currentPackage] = numErrors - - getCompilerSummary-- - } - - if (getCompilerSummary>0 && $0 ~ " Compiler warnings") - { - thisLine=$0 - sub(testNum,"",thisLine) - thisLine=RemoveWhiteSpace(thisLine) - pat="[0-9]*" - match(thisLine,pat) - numWarnings = substr(thisLine,RSTART,RLENGTH); - warningSummary[dashboardName,currentPackage] = numWarnings - getCompilerSummary-- - } - - #Look for pattern indicating that the tests of interest have in fact run. - if (FOUND && match($0,runTestPattern)) - { - packageTested = substr($0,RSTART,RLENGTH); - sub(testNum,"",packageTested) - packageTested=RemoveWhiteSpace(packageTested) - getTestSummary=1 - } - - if (getTestSummary && $0 ~ "No tests were found!!!") - { - getTestSummary=0 - } - - #Calculate the number of failing, passing, and total tests. - if (getTestSummary && $0 ~ "tests failed out of") - { - thisLine=$0 - sub(testNum,"",thisLine) - thisLine=RemoveWhiteSpace(thisLine) - getTestSummary=0 - pat = "[0-9]* tests failed out of [0-9]*" - match(thisLine,pat) - ttt = substr(thisLine,RSTART,RLENGTH); - pat = "^[0-9]*" - match(ttt,pat) - numFailed = substr(ttt,RSTART,RLENGTH); - pat = "[0-9]*$" - match(ttt,pat) - numTotal = substr(ttt,RSTART,RLENGTH); - failSummary[dashboardName,currentPackage] = numFailed - passSummary[dashboardName,currentPackage] = numTotal+0-numFailed - totalSummary[dashboardName,currentPackage] = numTotal - if (numFailed != 0) - dashboardErrors=1 - } -} - -################################################### -# helper functions -################################################### -function RemoveWhiteSpace(theString) -{ - sub(/^[ ]*/,"",theString); sub(/[ ]*$/,"",theString); - return (theString) -} - -################################################### -# Commands to run after the file is processed -################################################### -END { - - if (gitUpdateFailed == 1) { - print "\n *** git update FAILED ***\n" > summaryFile - } - - # do some nice formatting - numPlusses=73 - thePluses=" " - while (jj++ summaryFile - for (track in trackTypes) { - printf("%s\n",thePluses) > summaryFile - trackNameLength = length(track) - numPlussesToTheRight = numPlusses - trackNameLength - 4 - plussesToTheRight="" - kk = 0 - while (kk++ summaryFile - printf("%s\n",thePluses) > summaryFile - for (i in listOfDashboardNames) { - db=listOfDashboardNames[i] - if (dashboardTrack[db] == track) - printf(" %61-s ... %s\n",db,dashBoardSummary[db]) > summaryFile; - } - } - printf("-----------------------------------------------------------------------------\n\n") > summaryFile - - for (i in listOfDashboardNames) { - db=listOfDashboardNames[i] - spaces=" " - printf("%55-s\n%s%8-s, %5.1f seconds\n",db, spaces, dashBoardSummary[db], timeSummary[db]) > summaryFile; - for (k in listOfPackages) { - pat = "_lib" - if (match(k,pat)) isLib = 1; - else isLib = 0; - if ((db,k) in warningSummary) nwarn = warningSummary[db,k] - else nwarn = "-"; - if ((db,k) in errorSummary) nerr = errorSummary[db,k] - else nerr = "-"; - if ((db,k) in failSummary) nfail = failSummary[db,k] - else nfail = "-"; - if ((db,k) in passSummary) npass = passSummary[db,k] - else npass = "-"; - if ((db,k) in totalSummary) ntotal = totalSummary[db,k] - else ntotal = "-"; - if (isLib) { - summaryString = sprintf("%15s | %3d warnings | %3d errors",k,nwarn,nerr); - } - else { - summaryString = sprintf("%15s | %3d warnings | %3d errors | %d/%d passed",k,nwarn,nerr,npass,ntotal); - } - print spaces summaryString > summaryFile - } - } - -} -' $INFILE - -date2=`echo $(date) | sed "s/ /_/g"` -cdashDate="$(date +%F)" -cat ${OUTFILE}.txt | perl ${HTMLPERLSCRIPT} ${date2} ${cdashDate} ${MACHINENAME} ${USER} > ${OUTFILE}.html - -${MAILCOMMAND} -it <; -# close(FILE); - -my @LINES; -while () -{ - push @LINES, $_; -} - -$numLines = scalar @LINES; - -use Class::Struct; -struct Entry => { - name => '$', - XpetraBuildWarnings => '$', - XpetraBuildErrors => '$', - XpetraTestPasses => '$', - XpetraTestFailures => '$', - MueLuBuildWarnings => '$', - MueLuBuildErrors => '$', - MueLuTestPasses => '$', - MueLuTestFailures => '$', - XpetraLibBuildWarnings => '$', - XpetraLibBuildErrors => '$', - MueLuLibBuildWarnings => '$', - MueLuLibBuildErrors => '$' -}; - -sub extractLeadingNumber -{ - my $str = shift; -# print "str is $str.\n"; - # maybe not the best way to do this, but this is what I know how to do without Google's help - my @fields = split /\s+/, $str; - return $fields[0]; -} - -sub trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s }; - -# my $trimTest = " string with trailing spaces "; -# my $trimmed = trim $trimTest; -# print "Trimmed string: \"$trimmed\"\n"; - -my @entries, my @entriesNightly, my @entriesExperimental, my@entriesSpecialized; -my $gitFailure = ""; - -my %testType; -$testType = "Nightly"; -my $pastSummary = 0; - -for ($i=0; $i<$numLines; $i++) -{ - my $line = $LINES[$i]; - #print $line; - if ($pastSummary == 0) - { -# print "NOT past summary.\n"; - if ($line =~ /git update FAILED/) - { - $gitFailure = "*** git update FAILED ***"; - } - if ($line =~ /\++ Nightly \++/) - { - $testType = "Nightly"; - } - elsif ($line =~ /\++ Experimental \++/) - { - $testType = "Experimental"; - } - elsif ($line =~ /\++ Specialized \++/) - { - $testType = "Specialized"; - } - - if ($line =~ /(.*)\s+\.\.\. [pF]/) - { - $testName = trim $1; - $testType{$testName} = $testType; - } - else - { - #print "line did not match the test listing regex: $line.\n"; - } - if ($line =~ /-----------------------------------------------------------------------------/) - { - $pastSummary = 1; - #print "FOUND THE END OF THE SUMMARY"; - $i++; # blank line follows the --- line: we ignore this one. - $i++; - } - } - - if ($pastSummary == 1) - { -#entries are of the following form: -# OPENMPI_1.10.0_RELEASE_DEV_MueLu_NO_SERIAL -# passed , 14.3 seconds -# Xpetra | 0 warnings | 0 errors | 0/0 passed -# MueLu_libs | 0 warnings | 0 errors -# MueLu | 0 warnings | 0 errors | 0/0 passed -# Xpetra_libs | 0 warnings | 0 errors - my $entry = Entry->new(); - my $name = trim $LINES[$i]; -# print "name: $name.\n"; - $entry->name($name); #trim gets rid of any leading/trailing whitespace - $i++; - $line = $LINES[$i]; # timing/overall result line -- for now, we ignore - my $numEntryLines = 4; - for (my $j=0; $j<$numEntryLines; $j++) - { - $i++; - $line = $LINES[$i]; - my @fields = split /\s*\|\s*/, $line; -# print "field 0: " . $fields[0] . "\n"; -# print "num fields: " . scalar @fields . "\n"; - my $whichTests = trim $fields[0]; - if ($whichTests =~ /Xpetra_libs/) - { - # get numWarnings from $fields[1] = "\d+ warnings" - $numWarnings = extractLeadingNumber $fields[1]; - $entry->XpetraLibBuildWarnings($numWarnings); - # get numErrors from $fields[2] = "\d+ errors" - $numErrors = extractLeadingNumber $fields[2]; - $entry->XpetraLibBuildErrors($numErrors); - } - elsif ($whichTests =~ /MueLu_libs/) - { - # get numWarnings from $fields[1] = "\d+ warnings" - $numWarnings = extractLeadingNumber $fields[1]; - $entry->MueLuLibBuildWarnings($numWarnings); - # get numErrors from $fields[2] = "\d+ errors" - $numErrors = extractLeadingNumber $fields[2]; - $entry->MueLuLibBuildErrors($numErrors); - } - elsif ( $whichTests =~ /Xpetra/) - { - # get numWarnings from $fields[1] = "\d+ warnings" -# print "getting numWarnings from fields 1: $fields[1]\n"; - $numWarnings = extractLeadingNumber $fields[1]; - $entry->XpetraBuildWarnings($numWarnings); - # get numErrors from $fields[2] = "\d+ errors" - # print "getting numErrors from fields 2: $fields[2]\n"; - $numErrors = extractLeadingNumber $fields[2]; - $entry->XpetraBuildErrors($numErrors); - # get pass/total - my ($pass,$totalString) = split /\//, $fields[3]; - # print "getting total from totalString: $totalString\n"; - my $total = extractLeadingNumber $totalString; - my $fail = $total - $pass; - $entry->XpetraTestPasses($pass); - $entry->XpetraTestFailures($fail); - } - elsif ($whichTests =~ /MueLu/) - { - # get numWarnings from $fields[1] = "\d+ warnings" - $numWarnings = extractLeadingNumber $fields[1]; - $entry->MueLuBuildWarnings($numWarnings); - # get numErrors from $fields[2] = "\d+ errors" - $numErrors = extractLeadingNumber $fields[2]; - $entry->MueLuBuildErrors($numErrors); - # get pass/total - my ($pass,$totalString) = split /\//, $fields[3]; - my $total = extractLeadingNumber $totalString; - my $fail = $total - $pass; - $entry->MueLuTestPasses($pass); - $entry->MueLuTestFailures($fail); - } - else - { - print "UNMATCHED whichTests: $whichTests.\n"; - } - } - push @entries, $entry; - $name = $entry->name; - if ($testType{$name} eq "Nightly") - { - push @entriesNightly, $entry; - } - elsif ($testType{$name} eq "Experimental") - { - push @entriesExperimental, $entry; - } - elsif ($testType{$name} eq "Specialized") - { - push @entriesSpecialized, $entry; - } - } -} - -# print "There are " . (scalar @entries) . " entries.\n"; - -my $theDate = $ARGV[0]; -my $cdashDate = $ARGV[1]; -my $cdashMachine = $ARGV[2]; -my $capMachine = ucfirst($cdashMachine); -my $senderName = $ARGV[3]; -$theDate =~ s/_/ /g; - -print < - - - - - Dashboard - - - - -Go to the full report -
- - -

$capMachine Test Summary

-

$gitFailure

-EOF - -sub printTableHeader -{ - print < - - -MueLu -MueLu_libs -Xpetra -Xpetra_libs - - - -Test -pass -fail/notrun -errors -warnings -errors -warnings -pass -fail -errors -warnings -errors -warnings - -EOF -} -sub printTableFooter -{ - print < -EOF -} - -sub printEntries { - my @entriesToPrint = @{$_[0]}; - for $entry (@entriesToPrint) - { - $name = $entry->name; - $XpetraBuildWarnings = $entry->XpetraBuildWarnings; - $XpetraBuildErrors = $entry->XpetraBuildErrors; - $XpetraTestPasses = $entry->XpetraTestPasses; - $XpetraTestFailures = $entry->XpetraTestFailures; - $XpetraLibBuildWarnings = $entry->XpetraLibBuildWarnings; - $XpetraLibBuildErrors = $entry->XpetraLibBuildErrors; - $MueLuBuildWarnings = $entry->MueLuBuildWarnings; - $MueLuBuildErrors = $entry->MueLuBuildErrors; - $MueLuTestPasses = $entry->MueLuTestPasses; - $MueLuTestFailures = $entry->MueLuTestFailures; - $MueLuLibBuildWarnings = $entry->MueLuLibBuildWarnings; - $MueLuLibBuildErrors = $entry->MueLuLibBuildErrors; - my $MueLuTestPassColor = 'g'; - my $MueLuTestFailureColor = 'g'; - if ($MueLuTestPasses + $MueLuTestFailures == 0) - { - $MueLuTestPassColor = 'y'; - $MueLuTestFailureColor = 'y'; - } - elsif ($MueLuTestFailures > 0) - { - $MueLuTestFailureColor = 'r'; - } - my $MueLuBuildErrorColor = ($MueLuBuildErrors > 0) ? 'r' : 'g'; - my $MueLuBuildWarningColor = ($MueLuBuildWarnings > 0) ? 'y' : 'g'; - my $MueLuLibBuildErrorColor = ($MueLuLibBuildErrors > 0) ? 'r' : 'g'; - my $MueLuLibBuildWarningColor = ($MueLuLibBuildWarnings > 0) ? 'y' : 'g'; - my $XpetraTestPassColor = 'g'; - my $XpetraTestFailureColor = 'g'; - if ($XpetraTestPasses + $XpetraTestFailures == 0) - { - $XpetraTestPassColor = 'y'; - $XpetraTestFailureColor = 'y'; - } - elsif ($XpetraTestFailures > 0) - { - $XpetraTestFailureColor = 'r'; - } - my $XpetraBuildErrorColor = ($XpetraBuildErrors > 0) ? 'r' : 'g'; - my $XpetraBuildWarningColor = ($XpetraBuildWarnings > 0) ? 'y' : 'g'; - my $XpetraLibBuildErrorColor = ($XpetraLibBuildErrors > 0) ? 'r' : 'g'; - my $XpetraLibBuildWarningColor = ($XpetraLibBuildWarnings > 0) ? 'y' : 'g'; - print < -$name -$MueLuTestPasses -$MueLuTestFailures -$MueLuBuildErrors -$MueLuBuildWarnings -$MueLuLibBuildErrors -$MueLuLibBuildWarnings -$XpetraTestPasses -$XpetraTestFailures -$XpetraBuildErrors -$XpetraBuildWarnings -$XpetraLibBuildErrors -$XpetraLibBuildWarnings - -EOE - } -} - -print "

Nightly Tests

\n"; -if (@entriesNightly > 0) -{ - printTableHeader; - printEntries(\@entriesNightly); - printTableFooter; -} -else -{ - print "no results reported\n"; -} - -print "

Specialized Tests

\n"; -if (@entriesSpecialized > 0) -{ - printTableHeader; - printEntries(\@entriesSpecialized); - printTableFooter; -} -else -{ - print "no results reported\n"; -} - -print "

Experimental Tests

\n"; -if (@entriesExperimental > 0) -{ - printTableHeader; - printEntries(\@entriesExperimental); - printTableFooter; -} -else -{ - print "no results reported\n"; -} - -print < -
- - -
-EOF
-
-foreach $line (@LINES)
-{
-  print $line;
-}
-
-print <<"EOF";
-
- - -EOF diff --git a/cmake/ctest/drivers/trappist/muelu-clang.lua b/cmake/ctest/drivers/trappist/muelu-clang.lua deleted file mode 100644 index bdf9a8b31664..000000000000 --- a/cmake/ctest/drivers/trappist/muelu-clang.lua +++ /dev/null @@ -1,27 +0,0 @@ -load("sems-clang/11.0.1") -load("sems-openmpi") -load("sems-cmake") -load("sems-ninja") -load("sems-git") - -load("sems-superlu") - -load("sems-yaml-cpp") -load("sems-hdf5") -load("sems-parallel-netcdf") -load("sems-zlib") - -load("sems-boost") -load("sems-python") - -load("sems-metis") -load("sems-parmetis") - -load("sems-cuda") - -pushenv("OMP_NUM_THREADS","2") -pushenv("CUDA_LAUNCH_BLOCKING","1") -pushenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC","1") - --- Only run on the Tesla K40, not the Quadro -- -pushenv("CUDA_VISIBLE_DEVICES","0") \ No newline at end of file diff --git a/cmake/ctest/drivers/trappist/muelu-gcc.lua b/cmake/ctest/drivers/trappist/muelu-gcc.lua deleted file mode 100644 index 9e1a19e1c3a4..000000000000 --- a/cmake/ctest/drivers/trappist/muelu-gcc.lua +++ /dev/null @@ -1,28 +0,0 @@ -load("sems-gcc/8.3.0") -load("sems-openmpi") -load("sems-cmake") -load("sems-ninja") -load("sems-git") - -load("sems-superlu") - -load("sems-yaml-cpp") -load("sems-hdf5") -load("sems-netcdf-c") -load("sems-parallel-netcdf") -load("sems-zlib") - -load("sems-boost") -load("sems-python") - -load("sems-metis") -load("sems-parmetis") - -load("sems-cuda") - -pushenv("OMP_NUM_THREADS","2") -pushenv("CUDA_LAUNCH_BLOCKING","1") -pushenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC","1") - --- Only run on the Tesla K40, not the Quadro -- -pushenv("CUDA_VISIBLE_DEVICES","0") \ No newline at end of file diff --git a/cmake/ctest/drivers/trappist/sendTestSummary.sh b/cmake/ctest/drivers/trappist/sendTestSummary.sh deleted file mode 100755 index a0824b2838bb..000000000000 --- a/cmake/ctest/drivers/trappist/sendTestSummary.sh +++ /dev/null @@ -1,326 +0,0 @@ -#!/bin/sh - -# Parse command line options. -DEBUGMODE=0 -USAGE="sendTestSummary.sh [-d] " -while getopts d OPT; do - case "$OPT" in - d) - # debug mode, send email summary to me only - DEBUGMODE=1 - ;; - \?) - # getopts issues an error message - echo ${USAGE} - echo - exit 1 - ;; - esac -done - -# Remove the options we parsed above. -shift `expr $OPTIND - 1` -# The logfile is required. Error out if it's not provided. -if [ $# -eq 0 ]; then - echo $USAGE >&2 - exit 1 -fi -### end parsing ### - -######################################################################### -# Variables you might want to modify. -######################################################################### - -#Perl script to produce prettified HTML -HTMLPERLSCRIPT="/storage/lberge/nightlyTests/Trilinos/cmake/ctest/drivers/trappist/drakify-email.pl" -#root of cdash testing directory -TESTLOCATION="/storage/lberge/nightlyTests" -LOGBACKUPDIRECTORY="/storage/lberge/nightlyTests/logs" - -#packages to be summarized -PATTERN="(Xpetra|MueLu)" - -#variables to be passed to the perl script -MACHINENAME=`hostname -s` -USER=`whoami` - -#who gets the email summary -if [[ $DEBUGMODE == 1 ]]; then - RECIPIENTS=( - "${USER}@sandia.gov" - ) -else - RECIPIENTS=( - "muelu-regression@software.sandia.gov" - ) -fi -#suffix for all the log files -timeStamp="$(date +%F_%R)" - -#cron driver log file -INFILE=$1 -#root of file to be emailed. The correct suffix must be appended whenever you use this. -OUTFILE="test-summary-${timeStamp}" -MAILCOMMAND="/usr/sbin/sendmail" -######################################################################### - -cd ${TESTLOCATION} - -backupFile="cron_driver.log.$timeStamp" -cp cron_driver.log $backupFile - -testStartString=`egrep "Starting nightly Trilinos development" cron_driver.log` -testStartDate=`echo $testStartString | sed "s/:/#/" | cut -f 2 -d#` -ttt=`echo $testStartString | cut -f 1 -d:` -testMachine=${ttt##* } -testEndString=`egrep "Ending nightly Trilinos development" cron_driver.log` -testEndDate=`echo $testEndString | sed "s/:/#/" | cut -f 2 -d#` - -awk -v packagesToMatch="$PATTERN" -v summaryFile="${OUTFILE}.txt" -v machine="$testMachine" -v startTime="$testStartDate" -v endTime="$testEndDate" ' - -################################################### -# Commands to run before the file is processed -################################################### -BEGIN { - print "Machine : " machine > summaryFile - print "Start time : " startTime > summaryFile - print "End time : " endTime > summaryFile - testctr=0 - gitUpdateFailed=0 - dashboardErrors=0 -} - -################################################### -# Commands to run while processing the file -################################################### -{ - - if ($0 ~ "Update command failed") - { - gitUpdateFailed=1 - } - - if ($0 ~ "^test [0-9]*$") - { - #start of test found, e.g., test 4 - FOUND=2 - testNum=$0 - sub(/test /,"",testNum) - testNum=testNum":" - #\x27 is hex code for single quote - packageLibBuild=testNum" Building target: \x27" packagesToMatch "_libs" - packageTestBuild=testNum" Build ALL target for \x27" packagesToMatch "\x27" - runTestPattern=testNum" Running test for package \x27" packagesToMatch "\x27" - next #skip any more processing, go on to next line - } - - if (FOUND==2) - { - FOUND-- - dashboardName=$0 - sub(/Start [ ]*[0-9]*: /,"",dashboardName) - dashboardName=RemoveWhiteSpace(dashboardName) - listOfDashboardNames[testctr] = dashboardName - testctr++ - dashBoardPattern="Test [ ]*#[0-9]*: " dashboardName - } - -# Record the "track" for this dashboard, which could be "Nightly", "Experimental", or "Specialized" - if (FOUND && $0 ~ "-- Trilinos_TRACK=") - { - thisLine=$0 - sub(/^[0-9]*: -- Trilinos_TRACK=\x27/,"",thisLine) - sub(/\x27/,"",thisLine) - if (length(thisLine) > 0) { - dashboardTrack[dashboardName] = thisLine - trackTypes[thisLine]++ - } - } - - if (FOUND && $0 ~ dashBoardPattern) - { - thisLine=$0 - thisLine=RemoveWhiteSpace(thisLine) - if (dashboardErrors == 0) - dashBoardSummary[dashboardName] = "passed" - else - dashBoardSummary[dashboardName] = "FAILED" - match(thisLine,"[0-9]*\\.[0-9]* sec$") - timeSummary[dashboardName] = substr(thisLine,RSTART,RLENGTH) - #done with this dashboard, reset error flag - dashboardErrors=0 - } - - # library build - if (FOUND && $0 ~ packageLibBuild) - { - getCompilerSummary=2 - thisLine = $0 - pat = "\x27" packagesToMatch "_libs\x27" - match(thisLine,pat) - currentPackage = substr(thisLine,RSTART+1,RLENGTH-2) - listOfPackages[currentPackage] = currentPackage - } - - # tests build - if (FOUND && $0 ~ packageTestBuild) - { - getCompilerSummary=2 - thisLine = $0 - pat = "\x27" packagesToMatch "\x27" - match(thisLine,pat) - currentPackage = substr(thisLine,RSTART+1,RLENGTH-2) - listOfPackages[currentPackage] = currentPackage - } - - if (getCompilerSummary>0 && $0 ~ " Compiler errors") - { - thisLine=$0 - sub(testNum,"",thisLine) - pat="[0-9]*" - thisLine=RemoveWhiteSpace(thisLine) - - match(thisLine,pat) - numErrors = substr(thisLine,RSTART,RLENGTH) - errorSummary[dashboardName,currentPackage] = numErrors - - getCompilerSummary-- - } - - if (getCompilerSummary>0 && $0 ~ " Compiler warnings") - { - thisLine=$0 - sub(testNum,"",thisLine) - thisLine=RemoveWhiteSpace(thisLine) - pat="[0-9]*" - match(thisLine,pat) - numWarnings = substr(thisLine,RSTART,RLENGTH); - warningSummary[dashboardName,currentPackage] = numWarnings - getCompilerSummary-- - } - - #Look for pattern indicating that the tests of interest have in fact run. - if (FOUND && match($0,runTestPattern)) - { - packageTested = substr($0,RSTART,RLENGTH); - sub(testNum,"",packageTested) - packageTested=RemoveWhiteSpace(packageTested) - getTestSummary=1 - } - - if (getTestSummary && $0 ~ "No tests were found!!!") - { - getTestSummary=0 - } - - #Calculate the number of failing, passing, and total tests. - if (getTestSummary && $0 ~ "tests failed out of") - { - thisLine=$0 - sub(testNum,"",thisLine) - thisLine=RemoveWhiteSpace(thisLine) - getTestSummary=0 - pat = "[0-9]* tests failed out of [0-9]*" - match(thisLine,pat) - ttt = substr(thisLine,RSTART,RLENGTH); - pat = "^[0-9]*" - match(ttt,pat) - numFailed = substr(ttt,RSTART,RLENGTH); - pat = "[0-9]*$" - match(ttt,pat) - numTotal = substr(ttt,RSTART,RLENGTH); - failSummary[dashboardName,currentPackage] = numFailed - passSummary[dashboardName,currentPackage] = numTotal+0-numFailed - totalSummary[dashboardName,currentPackage] = numTotal - if (numFailed != 0) - dashboardErrors=1 - } -} - -################################################### -# helper functions -################################################### -function RemoveWhiteSpace(theString) -{ - sub(/^[ ]*/,"",theString); sub(/[ ]*$/,"",theString); - return (theString) -} - -################################################### -# Commands to run after the file is processed -################################################### -END { - - if (gitUpdateFailed == 1) { - print "\n *** git update FAILED ***\n" > summaryFile - } - - # do some nice formatting - numPlusses=73 - thePluses=" " - while (jj++ summaryFile - for (track in trackTypes) { - printf("%s\n",thePluses) > summaryFile - trackNameLength = length(track) - numPlussesToTheRight = numPlusses - trackNameLength - 4 - plussesToTheRight="" - kk = 0 - while (kk++ summaryFile - printf("%s\n",thePluses) > summaryFile - for (i in listOfDashboardNames) { - db=listOfDashboardNames[i] - if (dashboardTrack[db] == track) - printf(" %61-s ... %s\n",db,dashBoardSummary[db]) > summaryFile; - } - } - printf("-----------------------------------------------------------------------------\n\n") > summaryFile - - for (i in listOfDashboardNames) { - db=listOfDashboardNames[i] - spaces=" " - printf("%55-s\n%s%8-s, %5.1f seconds\n",db, spaces, dashBoardSummary[db], timeSummary[db]) > summaryFile; - for (k in listOfPackages) { - pat = "_lib" - if (match(k,pat)) isLib = 1; - else isLib = 0; - if ((db,k) in warningSummary) nwarn = warningSummary[db,k] - else nwarn = "-"; - if ((db,k) in errorSummary) nerr = errorSummary[db,k] - else nerr = "-"; - if ((db,k) in failSummary) nfail = failSummary[db,k] - else nfail = "-"; - if ((db,k) in passSummary) npass = passSummary[db,k] - else npass = "-"; - if ((db,k) in totalSummary) ntotal = totalSummary[db,k] - else ntotal = "-"; - if (isLib) { - summaryString = sprintf("%15s | %3d warnings | %3d errors",k,nwarn,nerr); - } - else { - summaryString = sprintf("%15s | %3d warnings | %3d errors | %d/%d passed",k,nwarn,nerr,npass,ntotal); - } - print spaces summaryString > summaryFile - } - } - -} -' $INFILE - -date2=`echo $(date) | sed "s/ /_/g"` -cdashDate="$(date +%F)" -cat ${OUTFILE}.txt | perl ${HTMLPERLSCRIPT} ${date2} ${cdashDate} ${MACHINENAME} ${USER} > ${OUTFILE}.html - -${MAILCOMMAND} -it <,int,int); +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE + AMESOS2_SOLVER_TPETRA_INST(CssMKL,std::complex,int,int); +#endif +} +#endif //END INST_INT_INT + +#ifdef HAVE_TPETRA_INST_INT_UNSIGNED +namespace Amesos2 { +#ifdef HAVE_TPETRA_INST_FLOAT + AMESOS2_SOLVER_TPETRA_INST(CssMKL,float,int,unsigned int); +#endif +#ifdef HAVE_TPETRA_INST_DOUBLE + AMESOS2_SOLVER_TPETRA_INST(CssMKL,double,int,unsigned int); +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT + AMESOS2_SOLVER_TPETRA_INST(CssMKL,std::complex,int,unsigned int); +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE + AMESOS2_SOLVER_TPETRA_INST(CssMKL,std::complex,int,unsigned int); +#endif +} +#endif //END INST_INST_UNSIGNED + + +#ifdef HAVE_TPETRA_INST_INT_LONG +namespace Amesos2 { +#ifdef HAVE_TPETRA_INST_FLOAT + AMESOS2_SOLVER_TPETRA_INST(CssMKL,float,int,long); +#endif +#ifdef HAVE_TPETRA_INST_DOUBLE + AMESOS2_SOLVER_TPETRA_INST(CssMKL,double,int,long); +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT + AMESOS2_SOLVER_TPETRA_INST(CssMKL,std::complex,int,long); +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE + AMESOS2_SOLVER_TPETRA_INST(CssMKL,std::complex,int,long); +#endif +} +#endif + +#ifdef HAVE_TPETRA_INST_INT_LONG_LONG +namespace Amesos2 { +#ifdef HAVE_TPETRA_INST_FLOAT + AMESOS2_SOLVER_TPETRA_INST(CssMKL,float,int,long long); +#endif +#ifdef HAVE_TPETRA_INST_DOUBLE + AMESOS2_SOLVER_TPETRA_INST(CssMKL,double,int,long long); +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT + AMESOS2_SOLVER_TPETRA_INST(CssMKL,std::complex,int,long long); +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE + AMESOS2_SOLVER_TPETRA_INST(CssMKL,std::complex,int,long long); +#endif +} +#endif + +#include +#include "TpetraCore_ETIHelperMacros.h" + +#define AMESOS2_CSSMKL_LOCAL_INSTANT(S,LO,GO,N) \ + template class Amesos2::CssMKL, \ + Tpetra::MultiVector >; + +TPETRA_ETI_MANGLING_TYPEDEFS() + +#if defined(HAVE_TPETRA_INST_SERIAL) && !defined(HAVE_TPETRA_DEFAULTNODE_SERIALWRAPPERNODE) && defined(HAVE_TPETRA_INST_DOUBLE) +#define NODETYPE Tpetra_KokkosCompat_KokkosSerialWrapperNode +#ifdef HAVE_TPETRA_INST_FLOAT + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_DOUBLE + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) + #endif +#endif +#undef NODETYPE +#endif + +#if defined(HAVE_TPETRA_INST_PTHREAD) && !defined(HAVE_TPETRA_DEFAULTNODE_THREADSWRAPPERNODE) && defined(HAVE_TPETRA_INST_DOUBLE) +#define NODETYPE Tpetra_KokkosCompat_KokkosThreadsWrapperNode +#ifdef HAVE_TPETRA_INST_FLOAT + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_DOUBLE + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) + #endif +#endif +#undef NODETYPE +#endif + +#if defined(HAVE_TPETRA_INST_OPENMP) && !defined(HAVE_TPETRA_DEFAULTNODE_OPENMPWRAPPERNODE) && defined(HAVE_TPETRA_INST_DOUBLE) +#define NODETYPE Tpetra_KokkosCompat_KokkosOpenMPWrapperNode +#ifdef HAVE_TPETRA_INST_FLOAT + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_DOUBLE + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) + #endif +#endif +#undef NODETYPE +#endif + +#if defined(HAVE_TPETRA_INST_CUDA) && !defined(HAVE_TPETRA_DEFAULTNODE_CUDAWRAPPERNODE) && defined(HAVE_TPETRA_INST_DOUBLE) +#define NODETYPE Tpetra_KokkosCompat_KokkosCudaWrapperNode +#ifdef HAVE_TPETRA_INST_FLOAT + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(float, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_DOUBLE + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(double, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_FLOAT + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) + #endif +#endif +#ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE + #ifdef HAVE_TPETRA_INST_INT_INT + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, int, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_LONG_LONG + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, long long, NODETYPE) + #endif + #ifdef HAVE_TPETRA_INST_INT_UNSIGNED + AMESOS2_CSSMKL_LOCAL_INSTANT(std::complex, int, unsigned int, NODETYPE) + #endif +#endif +#undef NODETYPE +#endif +#endif // HAVE_AMESOS2_EXPLICIT_INSTANTIATION diff --git a/packages/amesos2/src/Amesos2_CssMKL.hpp b/packages/amesos2/src/Amesos2_CssMKL.hpp new file mode 100644 index 000000000000..21eb18d15a99 --- /dev/null +++ b/packages/amesos2/src/Amesos2_CssMKL.hpp @@ -0,0 +1,54 @@ +// @HEADER +// +// *********************************************************************** +// +// Amesos2: Templated Direct Sparse Solver Package +// Copyright 2011 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +// +// @HEADER + + +#ifndef AMESOS2_CSSMKL_HPP +#define AMESOS2_CSSMKL_HPP + +#include "Amesos2_CssMKL_decl.hpp" + +#ifndef HAVE_AMESOS2_EXPLICIT_INSTANTIATION +# include "Amesos2_CssMKL_def.hpp" +#endif + +#endif // AMESOS2_CSSMKL_HPP diff --git a/packages/amesos2/src/Amesos2_CssMKL_FunctionMap.hpp b/packages/amesos2/src/Amesos2_CssMKL_FunctionMap.hpp new file mode 100644 index 000000000000..a4a3fa61d35d --- /dev/null +++ b/packages/amesos2/src/Amesos2_CssMKL_FunctionMap.hpp @@ -0,0 +1,123 @@ +// @HEADER +// +// *********************************************************************** +// +// Amesos2: Templated Direct Sparse Solver Package +// Copyright 2011 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +// +// @HEADER + + +/** + \file Amesos2_NewSolver_FunctionMap.hpp + \author Eric Bavier + \date Wed Jul 27 12:53:10 MDT 2011 + + \brief Template for providing a mechanism to map function calls to the + correct Solver function based on the scalar type of Matrices and + MultiVectors +*/ + +#ifndef AMESOS2_CSSMKL_FUNCTIONMAP_HPP +#define AMESOS2_CSSMKL_FUNCTIONMAP_HPP + +#ifdef HAVE_TEUCHOS_COMPLEX +#include +#endif + +#include "Amesos2_FunctionMap.hpp" +#include "Amesos2_CssMKL_TypeMap.hpp" + + +namespace Amesos2 { + + namespace PMKL { + #ifdef __MKL_PARDISO_H + #undef __MKL_PARDISO_H + #endif + #include "mkl_pardiso.h" + #ifdef __MKL_CLUSTER_SPARSE_SOLVER_H + #undef __MKL_CLUSTER_SPARSE_SOLVER_H + #endif + #include "mkl_cluster_sparse_solver.h" + } + + /** \internal + * + * For Pardiso we bind to the library functions based on the local + * ordinal type. If the local ordinal type is bigger than int, then + * we use pardiso_64 instead. The void* arrays are interpreted by + * the function based on the value of mtype and iparm(28) as + * single/double and complex/real. + */ + template <> + struct FunctionMap + { + static void cluster_sparse_solver( void* pt, + PMKL::_INTEGER_t* maxfct, PMKL::_INTEGER_t* mnum, + PMKL::_INTEGER_t* mtype , PMKL::_INTEGER_t* phase, + PMKL::_INTEGER_t* n , void* a, PMKL::_INTEGER_t* ia, + PMKL::_INTEGER_t* ja , PMKL::_INTEGER_t* perm, + PMKL::_INTEGER_t* nrhs , PMKL::_INTEGER_t* iparm, + PMKL::_INTEGER_t* msglvl, void* b, void* x, + const MPI_Fint * comm , PMKL::_INTEGER_t* error) + { + PMKL::cluster_sparse_solver(pt, maxfct, mnum, mtype, phase, n, a, ia, ja, + perm, nrhs, iparm, msglvl, b, x, comm, error); + } + }; + + + template <> + struct FunctionMap + { + static void cluster_sparse_solver( void* pt, + long long int* maxfct, long long int* mnum, + long long int* mtype , long long int* phase, + long long int* n , void* a, long long int* ia, + long long int* ja , long long int* perm, + long long int* nrhs , long long int* iparm, + long long int* msglvl, void* b, void* x, + const MPI_Fint* comm , long long int* error) + { + PMKL::cluster_sparse_solver_64(pt, maxfct, mnum, mtype, phase, n, a, ia, ja, + perm, nrhs, iparm, msglvl, b, x, comm, error); + } + }; +} // end namespace Amesos2 + +#endif // AMESOS2_NEWSOLVER_FUNCTIONMAP_HPP diff --git a/packages/amesos2/src/Amesos2_CssMKL_TypeMap.hpp b/packages/amesos2/src/Amesos2_CssMKL_TypeMap.hpp new file mode 100644 index 000000000000..d6411e217583 --- /dev/null +++ b/packages/amesos2/src/Amesos2_CssMKL_TypeMap.hpp @@ -0,0 +1,169 @@ +// @HEADER +// +// *********************************************************************** +// +// Amesos2: Templated Direct Sparse Solver Package +// Copyright 2011 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +// +// @HEADER + + +/** + \file Amesos2_CssMKL_TypeMap.hpp + \author John Doe + \date + + \brief Provides definition of CssMKL types as well as + conversions and type traits. For the purpose of + demonstration, we assume that CssMKL has defined its own + complex data-types called `complex' and `doublecomplex'. +*/ + +#ifndef AMESOS2_CLUSTERSPARSEMKL_TYPEMAP_HPP +#define AMESOS2_CLUSTERSPARSEMKL_TYPEMAP_HPP + +#ifdef HAVE_TEUCHOS_COMPLEX +#include +#endif + +#include +#include + +#include +#ifdef HAVE_TEUCHOS_COMPLEX +#include +#endif + +#include "Amesos2_TypeMap.hpp" +#include "Amesos2_PardisoMKL_TypeMap.hpp" + +namespace Amesos2 { + + // forward declaration due to circular reference + template class CssMKL; + + /* Specialize the Amesos::TypeMap struct for CssMKL types. + * + * Additional nested types may be added without harm. For an example, look at + * Amesos2_Superlu_TypeMap.hpp + */ + + template <> + struct TypeMap + { + typedef PMKL::_REAL_t type; + typedef PMKL::_REAL_t magnitude_type; + }; + + + template <> + struct TypeMap + { + typedef PMKL::_DOUBLE_PRECISION_t type; + typedef PMKL::_DOUBLE_PRECISION_t magnitude_type; + }; + +#ifdef HAVE_TEUCHOS_COMPLEX + + /* + * We map the std complex types to the appropriate CssMKL complex + * types. + */ + + template <> + struct TypeMap > + { + typedef PMKL::_MKL_Complex8 type; + typedef PMKL::_REAL_t magnitude_type; + }; + + + template <> + struct TypeMap > + { + typedef PMKL::_DOUBLE_COMPLEX_t type; + typedef PMKL::_DOUBLE_PRECISION_t magnitude_type; + }; + + + template <> + struct TypeMap + { + typedef PMKL::_MKL_Complex8 type; + typedef PMKL::_REAL_t magnitude_type; + }; + + + template <> + struct TypeMap + { + typedef PMKL::_DOUBLE_COMPLEX_t type; + typedef PMKL::_DOUBLE_PRECISION_t magnitude_type; + }; +#endif // HAVE_TEUCHOS_COMPLEX + + template <> + struct TypeMap + { + typedef PMKL::_INTEGER_t type; + //typedef int type; + }; + + template <> + struct TypeMap + { + typedef long long int type; + }; + + /* + * We check whether the size of long int is bigger than an int. If + * it is, then long int should be the same size as a long long int, + * so we can safely promote. Otherwise, long int will probably be + * the same size as int, and we can safely treat it as such. + */ + template <> + struct TypeMap + { + typedef std::conditional_t< + sizeof(int) < sizeof(long int), + TypeMap::type, + TypeMap::type > type; + }; + +} // end namespace Amesos + +#endif // AMESOS2_CLUSTERSPARSEMKL_TYPEMAP_HPP diff --git a/packages/amesos2/src/Amesos2_CssMKL_decl.hpp b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp new file mode 100644 index 000000000000..73872f3e99a6 --- /dev/null +++ b/packages/amesos2/src/Amesos2_CssMKL_decl.hpp @@ -0,0 +1,348 @@ +// @HEADER +// +// *********************************************************************** +// +// Amesos2: Templated Direct Sparse Solver Package +// Copyright 2011 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +// +// @HEADER + + +/** + \file Amesos2_CssMKL_decl.hpp + \author Eric Bavier + \date Wed Jul 27 12:52:30 MDT 2011 + + \brief A template class that does nothing useful besides show developers + what, in general, needs to be done to add a new solver interface to + the Amesos2 collection. +*/ + + +#ifndef AMESOS2_CSSMKL_DECL_HPP +#define AMESOS2_CSSMKL_DECL_HPP + +#include + +#include + +#include "Amesos2_SolverTraits.hpp" +#include "Amesos2_SolverCore.hpp" +#include "Amesos2_CssMKL_FunctionMap.hpp" + + +namespace Amesos2 { + + + /** \brief Amesos2 interface to the CssMKL package. + * + * This class provides access to the Pardiso (MKL version 10.3 and + * compatible) sparse direct solver with out-of-core solve support. + * Access is provided for \c float and \c double scalar types, in + * both real and complex. Access to to Pardiso's 64-bit integer + * routines is also provided. + * + * \ingroup amesos2_solver_interfaces + */ + template + class CssMKL : public SolverCore + { + friend class SolverCore; // Give our base access + // to our private + // implementation funcs + public: + + /// The name of this solver interface + static const char* name; // declaration. Initialization outside. + + typedef CssMKL type; + typedef SolverCore super_type; + + // Since typedef's are not inheritted, go grab them + typedef typename super_type::scalar_type scalar_type; + typedef typename super_type::local_ordinal_type local_ordinal_type; + typedef typename super_type::global_ordinal_type global_ordinal_type; + typedef typename super_type::global_size_type global_size_type; + typedef typename super_type::node_type node_type; + typedef Tpetra::Map map_type; + + typedef TypeMap type_map; + + typedef typename type_map::type solver_scalar_type; + typedef typename type_map::magnitude_type solver_magnitude_type; + + // This may be PMKL::_INTEGER_t or long long int depending on the + // mapping and input ordinal + typedef typename TypeMap::type int_t; + + /* For CssMKL we dispatch based on the integer type instead of + * the scalar type: + * - _INTEGER_t => use the cluster_sparse_solver(...) method + * - long long int => use the cluster_sparse_solver_64(...) method + */ + typedef FunctionMap function_map; + + typedef Kokkos::DefaultHostExecutionSpace HostExecSpaceType; + typedef Kokkos::View host_size_type_array; + typedef Kokkos::View host_ordinal_type_array; + typedef Kokkos::View host_value_type_array; + + /// \name Constructor/Destructor methods + //@{ + + /** + * \brief Initialize from Teuchos::RCP. + * + * \warning Should not be called directly! Use instead + * Amesos2::create() to initialize a CssMKL interface. + */ + CssMKL(Teuchos::RCP A, + Teuchos::RCP X, + Teuchos::RCP B); + + + /// Destructor + ~CssMKL( ); + + //@} + + private: + + /** + * \brief Performs pre-ordering on the matrix to increase efficiency. + * + * CssMKL does reordering internally during symbolic + * factorization. Please refer to the \c "IPARM(2)" parameter for + * some reordering options. + */ + int preOrdering_impl(); + + + /** + * \brief Perform symbolic factorization of the matrix using CssMKL. + * + * Called the sequence before numericFactorization. + * + * \throw std::runtime_error CssMKL is not able to factor the matrix. + */ + int symbolicFactorization_impl(); + + + /** + * \brief CssMKL specific numeric factorization + * + * \throw std::runtime_error CssMKL is not able to factor the matrix + */ + int numericFactorization_impl(); + + + /** + * \brief CssMKL specific solve. + * + * Uses the symbolic and numeric factorizations, along with the RHS vector + * \c B to solve the sparse system of equations. + * + * The solution of the system is placed in X. + * + * \throw std::runtime_error CssMKL is not able to solve the system. + */ + int solve_impl(const Teuchos::Ptr > X, + const Teuchos::Ptr > B) const; + + + /** + * \brief Determines whether the shape of the matrix is OK for this solver. + * + * Pardiso MKL handles square matrices. + */ + bool matrixShapeOK_impl() const; + + + /** + * The Pardiso MKL parameters that are currently recognized are: + * + *
    + *
  • \c "IPARM(2)"
  • + *
  • \c "IPARM(4)"
  • + *
  • \c "IPARM(8)"
  • + *
  • \c "IPARM(10)"
  • + *
  • \c "IPARM(12)"
  • + *
  • \c "IPARM(18)"
  • + *
  • \c "IPARM(24)"
  • + *
  • \c "IPARM(25)"
  • + *
  • \c "IPARM(60)"
  • + *
+ * + * Please see the Pardiso MKL documentation for a summary of the + * meaning and valid values for each parameter. + */ + void setParameters_impl(const Teuchos::RCP & parameterList ); + + + /** + * \return a const Teuchos::ParameterList of all valid parameters + * (set to their default values) for this solver. + */ + Teuchos::RCP getValidParameters_impl() const; + + + /** + * \brief Reads matrix data into internal structures + * + * \param [in] current_phase an indication of which solution phase this + * load is being performed for. + * + * \return \c true if the matrix was loaded, \c false if not + */ + bool loadA_impl(EPhase current_phase); + + + ////////// Internal routines (not called from outside) ////////// + + /** \internal + * + * \brief Throws an appropriate runtime error in the event that + * error < 0 . + * + * \param phase the phase for which this error is being checked. + * The meaning of a particular error value may depend + * on which phase was last performed + * + * \param error the error value returned by CssMKL for the + * given phase. + * + * We broadcast the input value from the rank=0 image to all + * others before checking the value. Before doing this we convert + * the error into an \c int value which allow us to easily + * broadcast its value to all process images without having to + * enable Teuchos long long support in the case where the user is + * making use of pardiso_64. The valid values of error certainly + * fit within an int. + */ + void check_css_mkl_error(EPhase phase, int_t error) const; + + /** \internal + * + * Sets the internal mtype_ member. Errors are thrown for + * unacceptable scalar/mtype combinations. + * + * \param mtype the type of the matrix. This may come as input + * from the interface user, or may be set to the default value in + * case mtype == 0 on entry to this function. + */ + void set_css_mkl_matrix_type(int_t mtype = 0); + void set_css_mkl_default_parameters(void* pt[], int_t iparm[]) const; + + + /* Declare private variables necessary for interaction with the + * CssMKL TPL. + * + * For example, the following Arrays are persisting storage arrays + * for A, X, and B that can be used with solvers expecting a + * compressed-row representation of the matrix A. + */ + + /// Stores the values of the nonzero entries for CssMKL + host_value_type_array nzvals_view_; + host_value_type_array nzvals_temp_; + /// Stores the location in \c Ai_ and Aval_ that starts row j + host_ordinal_type_array colind_view_; + /// Stores the row indices of the nonzero entries + host_size_type_array rowptr_view_; + /// Persisting, contiguous, 1D store for X + mutable Teuchos::Array xvals_; + /// Persisting, contiguous, 1D store for B + mutable Teuchos::Array bvals_; + + /// CssMKL internal data address pointer + mutable void* pt_[64]; + /// The matrix type. We deal only with unsymmetrix matrices + int_t mtype_; + /// Number of equations in the sparse linear system + int_t n_; + /// Permutation vector + Teuchos::Array perm_; + /// number of righthand-side vectors + mutable int_t nrhs_; + + bool css_initialized_; + bool is_contiguous_; + + /// CssMKL parameter vector. Note that the documentation uses + /// 1-based indexing, but our interface must use 0-based indexing + int_t iparm_[64]; + + /// The messaging level. Set to 1 if you wish for Pardiso MKL to print statistical info + static const int_t msglvl_; + + // We will deal with 1 factor at a time + static const int_t maxfct_; + static const int_t mnum_; + + + static const bool complex_ + = Meta::or_, + std::is_same_v>::value; + + MPI_Fint CssComm_; + Teuchos::RCP css_rowmap_; + +}; // End class CssMKL + + +// Specialize the solver_traits struct for CssMKL. +template <> +struct solver_traits { +#ifdef HAVE_TEUCHOS_COMPLEX + typedef Meta::make_list6, + std::complex, + PMKL::_MKL_Complex8, + PMKL::_DOUBLE_COMPLEX_t> supported_scalars; +#else +typedef Meta::make_list2 supported_scalars; +#endif +}; + +} // end namespace Amesos + +#endif // AMESOS2_CSSMKL_DECL_HPP diff --git a/packages/amesos2/src/Amesos2_CssMKL_def.hpp b/packages/amesos2/src/Amesos2_CssMKL_def.hpp new file mode 100644 index 000000000000..87cc1c0a0bd9 --- /dev/null +++ b/packages/amesos2/src/Amesos2_CssMKL_def.hpp @@ -0,0 +1,636 @@ +// @HEADER +// +// *********************************************************************** +// +// Amesos2: Templated Direct Sparse Solver Package +// Copyright 2011 Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +// +// @HEADER + + +/** + \file Amesos2_CssMKL_def.hpp + \author Eric Bavier + \date Wed Jul 27 12:52:00 MDT 2011 + + \brief Definitions for the Amesos2 CssMKL interface. +*/ + +#ifndef AMESOS2_CSSMKL_DEF_HPP +#define AMESOS2_CSSMKL_DEF_HPP + +#include + +#include +#include +#include + +#include "Amesos2_SolverCore_def.hpp" +#include "Amesos2_CssMKL_decl.hpp" + + +namespace Amesos2 { + + namespace PMKL { +# include +# include + } + + template + CssMKL::CssMKL(Teuchos::RCP A, + Teuchos::RCP X, + Teuchos::RCP B) + : SolverCore(A, X, B) // instantiate superclass + , n_(Teuchos::as(this->globalNumRows_)) + , perm_(this->globalNumRows_) + , nrhs_(0) + , css_initialized_(false) + , is_contiguous_(true) + { + // set the default matrix type + set_css_mkl_matrix_type(); + set_css_mkl_default_parameters(pt_, iparm_); + + // index base + const global_ordinal_type indexBase = this->matrixA_->getRowMap ()->getIndexBase (); + iparm_[34] = (indexBase == 0 ? 1 : 0); /* Use one or zero-based indexing */ + // 1D block-row distribution + auto frow = this->matrixA_->getRowMap()->getMinGlobalIndex(); + auto nrows = this->matrixA_->getLocalNumRows(); + iparm_[39] = 2; /* Matrix input format. */ + iparm_[40] = frow; /* > Beginning of input domain. */ + iparm_[41] = frow+nrows-1; /* > End of input domain. */ + + // get MPI Comm + Teuchos::RCP > matComm = this->matrixA_->getComm (); + TEUCHOS_TEST_FOR_EXCEPTION( + matComm.is_null (), std::logic_error, "Amesos2::CssMKL " + "constructor: The matrix's communicator is null!"); + Teuchos::RCP > matMpiComm = + Teuchos::rcp_dynamic_cast > (matComm); + TEUCHOS_TEST_FOR_EXCEPTION( + matMpiComm.is_null (), std::logic_error, "Amesos2::CssMKL " + "constructor: The matrix's communicator is not an MpiComm!"); + TEUCHOS_TEST_FOR_EXCEPTION( + matMpiComm->getRawMpiComm ().is_null (), std::logic_error, "Amesos2::" + "CssMKL constructor: The matrix's communicator claims to be a " + "Teuchos::MpiComm, but its getRawPtrComm() method returns " + "Teuchos::null! This means that the underlying MPI_Comm doesn't even " + "exist, which likely implies that the Teuchos::MpiComm was constructed " + "incorrectly. It means something different than if the MPI_Comm were " + "MPI_COMM_NULL."); + MPI_Comm CssComm = *(matMpiComm->getRawMpiComm ()); + CssComm_ = MPI_Comm_c2f(CssComm); + + // rowmap for loadA (to have locally contiguous) + css_rowmap_ = + Teuchos::rcp (new map_type (this->globalNumRows_, nrows, indexBase, matComm)); + } + + + template + CssMKL::~CssMKL( ) + { + /* + * Free any memory allocated by the CssMKL library functions + */ + int_t error = 0; + if (css_initialized_) + { + int_t phase = -1; // release all internal solver memory + void *bdummy, *xdummy; + const MPI_Fint CssComm = CssComm_; + function_map::cluster_sparse_solver( pt_, const_cast(&maxfct_), + const_cast(&mnum_), &mtype_, &phase, &n_, + nzvals_view_.data(), rowptr_view_.data(), + colind_view_.data(), perm_.getRawPtr(), &nrhs_, iparm_, + const_cast(&msglvl_), &bdummy, &xdummy, &CssComm, &error ); + css_initialized_ = false; + } + check_css_mkl_error(Amesos2::CLEAN, error); + } + + + template + int + CssMKL::preOrdering_impl() + { + // preOrdering done during "Analysis" (aka symbolic + // factorization) phase + return(0); + } + + + template + int + CssMKL::symbolicFactorization_impl() + { + int_t error = 0; + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor symbFactTimer( this->timers_.symFactTime_ ); +#endif + + int_t phase = 11; // Analysis + void *bdummy, *xdummy; + const MPI_Fint CssComm = CssComm_; + function_map::cluster_sparse_solver( pt_, const_cast(&maxfct_), + const_cast(&mnum_), &mtype_, &phase, &n_, + nzvals_view_.data(), rowptr_view_.data(), + colind_view_.data(), perm_.getRawPtr(), &nrhs_, iparm_, + const_cast(&msglvl_), &bdummy, &xdummy, &CssComm, &error ); + } + check_css_mkl_error(Amesos2::SYMBFACT, error); + + // Pardiso only lets you retrieve the total number of factor + // non-zeros, not for each individually. We should document how + // such a situation is reported. + this->setNnzLU(iparm_[17]); + css_initialized_ = true; + + return(0); + } + + + template + int + CssMKL::numericFactorization_impl() + { + int_t error = 0; + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor numFactTimer( this->timers_.numFactTime_ ); +#endif + + //int_t phase = 12; // Analysis, numerical factorization + int_t phase = 22; // Numerical factorization + void *bdummy, *xdummy; + const MPI_Fint CssComm = CssComm_; + function_map::cluster_sparse_solver( pt_, const_cast(&maxfct_), + const_cast(&mnum_), &mtype_, &phase, &n_, + nzvals_view_.data(), rowptr_view_.data(), + colind_view_.data(), perm_.getRawPtr(), &nrhs_, iparm_, + const_cast(&msglvl_), &bdummy, &xdummy, &CssComm, &error ); + } + check_css_mkl_error(Amesos2::NUMFACT, error); + + return( 0 ); + } + + + template + int + CssMKL::solve_impl(const Teuchos::Ptr > X, + const Teuchos::Ptr > B) const + { + using Teuchos::as; + + // Get B data + const local_ordinal_type ld_rhs = this->matrixA_->getLocalNumRows(); + nrhs_ = as(X->getGlobalNumVectors()); + + const size_t val_store_size = as(ld_rhs * nrhs_); + xvals_.resize(val_store_size); + bvals_.resize(val_store_size); + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor mvConvTimer( this->timers_.vecConvTime_ ); + Teuchos::TimeMonitor redistTimer( this->timers_.vecRedistTime_ ); +#endif + + Util::get_1d_copy_helper< + MultiVecAdapter, + solver_scalar_type>::do_get(B, bvals_(), + as(ld_rhs), + DISTRIBUTED_NO_OVERLAP, + this->rowIndexBase_); + } + + int_t error = 0; + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor solveTimer( this->timers_.solveTime_ ); +#endif + + const int_t phase = 33; // Solve, iterative refinement + const MPI_Fint CssComm = CssComm_; + function_map::cluster_sparse_solver( pt_, + const_cast(&maxfct_), + const_cast(&mnum_), + const_cast(&mtype_), + const_cast(&phase), + const_cast(&n_), + const_cast(nzvals_view_.data()), + const_cast(rowptr_view_.data()), + const_cast(colind_view_.data()), + const_cast(perm_.getRawPtr()), + &nrhs_, + const_cast(iparm_), + const_cast(&msglvl_), + as(bvals_.getRawPtr()), + as(xvals_.getRawPtr()), &CssComm, &error ); + } + check_css_mkl_error(Amesos2::SOLVE, error); + + /* Get values to X */ + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_); +#endif + + Util::put_1d_data_helper< + MultiVecAdapter, + solver_scalar_type>::do_put(X, xvals_(), + as(ld_rhs), + DISTRIBUTED_NO_OVERLAP); + } + + return( 0 ); +} + + + template + bool + CssMKL::matrixShapeOK_impl() const + { + // CssMKL supports square matrices + return( this->globalNumRows_ == this->globalNumCols_ ); + } + + + template + void + CssMKL::setParameters_impl(const Teuchos::RCP & parameterList ) + { + using Teuchos::RCP; + using Teuchos::getIntegralValue; + using Teuchos::ParameterEntryValidator; + + RCP valid_params = getValidParameters_impl(); + + // Fill-in reordering: 0 = minimum degree, 2 = METIS 4.0.1 (default), 3 = METIS 5.1, 4 = AMD, + if( parameterList->isParameter("IPARM(2)") ) + { + RCP fillin_validator = valid_params->getEntry("IPARM(2)").validator(); + parameterList->getEntry("IPARM(2)").setValidator(fillin_validator); + iparm_[1] = getIntegralValue(*parameterList, "IPARM(2)"); + } + + // Max numbers of iterative refinement steps + if( parameterList->isParameter("IPARM(8)") ) + { + RCP refine_validator = valid_params->getEntry("IPARM(8)").validator(); + parameterList->getEntry("IPARM(8)").setValidator(refine_validator); + iparm_[7] = getIntegralValue(*parameterList, "IPARM(8)"); + } + + // Perturb the pivot elements + if( parameterList->isParameter("IPARM(10)") ) + { + RCP pivot_perturb_validator = valid_params->getEntry("IPARM(10)").validator(); + parameterList->getEntry("IPARM(10)").setValidator(pivot_perturb_validator); + iparm_[9] = getIntegralValue(*parameterList, "IPARM(10)"); + } + + // First check if the control object requests a transpose solve. + // Then solver specific options can override this. + iparm_[11] = this->control_.useTranspose_ ? 2 : 0; + // Normal solve (0), or a transpose solve (1) + if( parameterList->isParameter("IPARM(12)") ) + { + RCP trans_validator = valid_params->getEntry("IPARM(12)").validator(); + parameterList->getEntry("IPARM(12)").setValidator(trans_validator); + iparm_[11] = getIntegralValue(*parameterList, "IPARM(12)"); + } + + // (Non-)symmetric matchings : detault 1 for nonsymmetric and 0 for symmetric matrix (default is nonsymmetric) + if( parameterList->isParameter("IPARM(13)") ) + { + RCP trans_validator = valid_params->getEntry("IPARM(13)").validator(); + parameterList->getEntry("IPARM(13)").setValidator(trans_validator); + iparm_[12] = getIntegralValue(*parameterList, "IPARM(13)"); + } + + // Output: Number of nonzeros in the factor LU + if( parameterList->isParameter("IPARM(18)") ) + { + RCP report_validator = valid_params->getEntry("IPARM(18)").validator(); + parameterList->getEntry("IPARM(18)").setValidator(report_validator); + iparm_[17] = getIntegralValue(*parameterList, "IPARM(18)"); + } + + if( parameterList->isParameter("IsContiguous") ){ + is_contiguous_ = parameterList->get("IsContiguous"); + } + } + + +/* + * TODO: It would be nice if the parameters could be expressed as + * either all string or as all integers. I see no way of doing this + * at present with the standard validators. However, we could create + * our own validators or kindly ask the Teuchos team to add some + * features for use. + * + * The issue is that with the current validators we cannot specify + * arbitrary sets of numbers that are the only allowed parameters. + * For example the IPARM(2) parameter can take only the values 0, 2, + * and 3. The EnhancedNumberValidator can take a min value, and max + * value, and a step size, but with those options there is no way to + * specify the needed set. + * + * Another missing feature is the ability to give docstrings for such + * numbers. For example IPARM(25) can take on the values 0 and 1. + * This would be easy enough to accomplish with just a number + * validator, but then have no way to document the effect of each + * value. + */ +template +Teuchos::RCP +CssMKL::getValidParameters_impl() const +{ + using std::string; + using Teuchos::as; + using Teuchos::RCP; + using Teuchos::tuple; + using Teuchos::toString; + using Teuchos::EnhancedNumberValidator; + using Teuchos::setStringToIntegralParameter; + using Teuchos::anyNumberParameterEntryValidator; + + static Teuchos::RCP valid_params; + + if( is_null(valid_params) ){ + Teuchos::RCP pl = Teuchos::parameterList(); + + void* pt_temp[64]; + int_t iparm_temp[64]; + set_css_mkl_default_parameters(pt_temp, iparm_temp); + setStringToIntegralParameter("IPARM(2)", toString(iparm_temp[1]), + "Fill-in reducing ordering for the input matrix", + tuple("2", "3", "10"), + tuple("Nested dissection algorithm from METIS", + "Parallel version of the nested dissection algorithm", + "MPI version of the nested dissection and symbolic factorization algorithms"), + tuple(2, 3, 10), + pl.getRawPtr()); + + setStringToIntegralParameter("IPARM(12)", toString(iparm_temp[11]), + "Solve with transposed or conjugate transposed matrix A", + tuple("0", "1", "2"), + tuple("Non-transposed", + "Conjugate-transposed", + "Transposed"), + tuple(0, 1, 2), + pl.getRawPtr()); + + setStringToIntegralParameter("IPARM(13)", toString(iparm_temp[12]), + "Use weighted matching", + tuple("0", "1"), + tuple("No matching", "Use matching"), + tuple(0, 1), + pl.getRawPtr()); + + Teuchos::AnyNumberParameterEntryValidator::EPreferredType preferred_int = + Teuchos::AnyNumberParameterEntryValidator::PREFER_INT; + + Teuchos::AnyNumberParameterEntryValidator::AcceptedTypes accept_int( false ); + accept_int.allowInt( true ); + + pl->set("IPARM(8)" , as(iparm_temp[7]) , "Iterative refinement step", + anyNumberParameterEntryValidator(preferred_int, accept_int)); + + pl->set("IPARM(10)", as(iparm_temp[9]) , "Pivoting perturbation", + anyNumberParameterEntryValidator(preferred_int, accept_int)); + + pl->set("IPARM(18)", as(iparm_temp[17]), "Report the number of non-zero elements in the factors", + anyNumberParameterEntryValidator(preferred_int, accept_int)); + + pl->set("IsContiguous", true, "Whether GIDs contiguous"); + + valid_params = pl; + } + + return valid_params; +} + + + +template +bool +CssMKL::loadA_impl(EPhase current_phase) +{ +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_); +#endif + + // CssMKL does not need matrix data in the pre-ordering phase + if( current_phase == PREORDERING ) return( false ); + + EDistribution dist_option = (iparm_[39] != 0 ? DISTRIBUTED_NO_OVERLAP : ((is_contiguous_ == true) ? ROOTED : CONTIGUOUS_AND_ROOTED)); + if (current_phase == SYMBFACT) { + if (dist_option == DISTRIBUTED_NO_OVERLAP) { + Kokkos::resize(nzvals_temp_, this->matrixA_->getLocalNNZ()); + Kokkos::resize(nzvals_view_, this->matrixA_->getLocalNNZ()); + Kokkos::resize(colind_view_, this->matrixA_->getLocalNNZ()); + Kokkos::resize(rowptr_view_, this->matrixA_->getLocalNumRows() + 1); + } else { + if( this->root_ ) { + Kokkos::resize(nzvals_temp_, this->matrixA_->getGlobalNNZ()); + Kokkos::resize(nzvals_view_, this->matrixA_->getGlobalNNZ()); + Kokkos::resize(colind_view_, this->matrixA_->getGlobalNNZ()); + Kokkos::resize(rowptr_view_, this->matrixA_->getGlobalNumRows() + 1); + } else { + Kokkos::resize(nzvals_temp_, 0); + Kokkos::resize(nzvals_view_, 0); + Kokkos::resize(colind_view_, 0); + Kokkos::resize(rowptr_view_, 0); + } + } + } + + { +#ifdef HAVE_AMESOS2_TIMERS + Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); +#endif + int_t nnz_ret = 0; + Util::get_crs_helper_kokkos_view, + host_value_type_array,host_ordinal_type_array, host_size_type_array >::do_get( + this->matrixA_.ptr(), + nzvals_temp_, colind_view_, rowptr_view_, + nnz_ret, + Teuchos::ptrInArg(*css_rowmap_), + dist_option, + SORTED_INDICES); + Kokkos::deep_copy(nzvals_view_, nzvals_temp_); + } + return( true ); +} + + +template +void +CssMKL::check_css_mkl_error(EPhase phase, + int_t error) const +{ + int error_i = error; + Teuchos::broadcast(*(this->getComm()), 0, &error_i); // We only care about root's value + + if( error == 0 ) return; // No error + + std::string errmsg = "Other error"; + switch( error ){ + case -1: + errmsg = "CssMKL reported error: 'Input inconsistent'"; + break; + case -2: + errmsg = "CssMKL reported error: 'Not enough memory'"; + break; + case -3: + errmsg = "CssMKL reported error: 'Reordering problem'"; + break; + case -4: + errmsg = + "CssMKL reported error: 'Zero pivot, numerical " + "factorization or iterative refinement problem'"; + break; + case -5: + errmsg = "CssMKL reported error: 'Unclassified (internal) error'"; + break; + case -6: + errmsg = "CssMKL reported error: 'Reordering failed'"; + break; + case -7: + errmsg = "CssMKL reported error: 'Diagonal matrix is singular'"; + break; + case -8: + errmsg = "CssMKL reported error: '32-bit integer overflow problem'"; + break; + case -9: + errmsg = "CssMKL reported error: 'Not enough memory for OOC'"; + break; + case -10: + errmsg = "CssMKL reported error: 'Problems with opening OOC temporary files'"; + break; + case -11: + errmsg = "CssMKL reported error: 'Read/write problem with OOC data file'"; + break; + } + errmsg += (" at phase = "+std::to_string(phase)); + + TEUCHOS_TEST_FOR_EXCEPTION( true, std::runtime_error, errmsg ); +} + + +template +void +CssMKL::set_css_mkl_matrix_type(int_t mtype) +{ + if( mtype == 0 ){ + if( complex_ ){ + mtype_ = 13; // complex, unsymmetric + } else { + mtype_ = 11; // real, unsymmetric + } + } else { + switch( mtype ){ + case 11: + TEUCHOS_TEST_FOR_EXCEPTION( complex_, + std::invalid_argument, + "Cannot set a real Pardiso matrix type with scalar type complex" ); + mtype_ = 11; break; + case 13: + TEUCHOS_TEST_FOR_EXCEPTION( !complex_, + std::invalid_argument, + "Cannot set a complex Pardiso matrix type with non-complex scalars" ); + mtype_ = 13; break; + default: + TEUCHOS_TEST_FOR_EXCEPTION( true, + std::invalid_argument, + "Symmetric matrices are not yet supported by the Amesos2 interface" ); + } + } +} + +template +void +CssMKL::set_css_mkl_default_parameters(void* pt[], int_t iparm[]) const +{ + for( int i = 0; i < 64; ++i ){ + pt[i] = nullptr; + iparm[i] = 0; + } + iparm[0] = 1; /* No solver default */ + // Reset some of the default parameters + iparm[1] = 10; /* 2: Fill-in reordering from METIS, 3: thread dissection, 10: MPI version of the nested dissection and symbolic factorization*/ + iparm[7] = 0; /* Max numbers of iterative refinement steps */ + iparm[9] = 13; /* Perturb the pivot elements with 1E-13 */ + iparm[10] = 0; /* Disable nonsymmetric permutation and scaling MPS */ + iparm[11] = 0; /* Normal solve (0), or a transpose solve (1) */ + iparm[12] = 0; /* Do not use (non-)symmetric matchings */ + iparm[17] = -1; /* Output: Number of nonzeros in the factor LU */ + iparm[20] = -1; /* Pivoting for symmetric indefinite matrices */ + iparm[26] = 1; /* Check input matrix is sorted */ + + // set single or double precision + if constexpr ( std::is_same_v ) { + iparm[27] = 1; // single-precision + } else { + iparm[27] = 0; // double-precision + } + iparm[34] = 1; /* Use zero-based indexing */ +} + +template +const char* CssMKL::name = "CSSMKL"; + +template +const typename CssMKL::int_t +CssMKL::msglvl_ = 0; // set to be one, for more CSS messages + +template +const typename CssMKL::int_t +CssMKL::maxfct_ = 1; + +template +const typename CssMKL::int_t +CssMKL::mnum_ = 1; + + +} // end namespace Amesos + +#endif // AMESOS2_CSSMKL_DEF_HPP diff --git a/packages/amesos2/src/Amesos2_Details_LinearSolverFactory_def.hpp b/packages/amesos2/src/Amesos2_Details_LinearSolverFactory_def.hpp index e244c9d9c84f..213cabeeaaf5 100644 --- a/packages/amesos2/src/Amesos2_Details_LinearSolverFactory_def.hpp +++ b/packages/amesos2/src/Amesos2_Details_LinearSolverFactory_def.hpp @@ -196,6 +196,9 @@ class LinearSolver : else if (Amesos2::query ("pardiso_mkl")) { solverName_ = "pardiso_mkl"; } + else if (Amesos2::query ("css_mkl")) { + solverName_ = "css_mkl"; + } else if (Amesos2::query ("mumps")) { solverName_ = "mumps"; } diff --git a/packages/amesos2/src/Amesos2_Factory.cpp b/packages/amesos2/src/Amesos2_Factory.cpp index 1c2f21ee8795..1dbbf0284bfb 100644 --- a/packages/amesos2/src/Amesos2_Factory.cpp +++ b/packages/amesos2/src/Amesos2_Factory.cpp @@ -128,6 +128,14 @@ namespace Amesos2 { solverName == "pardisomkl") { return true; } +#ifdef HAVE_MPI + if (solverName == "amesos2_css_mkl" || + solverName == "css_mkl" || + solverName == "amesos2_cssmkl" || + solverName == "cssmkl") { + return true; + } +#endif #endif #ifdef HAVE_AMESOS2_LAPACK diff --git a/packages/amesos2/src/Amesos2_Factory.hpp b/packages/amesos2/src/Amesos2_Factory.hpp index 96f8b6d30740..3f14e95e08ee 100644 --- a/packages/amesos2/src/Amesos2_Factory.hpp +++ b/packages/amesos2/src/Amesos2_Factory.hpp @@ -126,6 +126,7 @@ #ifdef HAVE_AMESOS2_PARDISO_MKL // MKL version of Pardiso #include "Amesos2_PardisoMKL.hpp" +#include "Amesos2_CssMKL.hpp" #endif #ifdef HAVE_AMESOS2_LAPACK @@ -645,6 +646,14 @@ struct throw_no_matrix_support_exception { (solverName == "pardisomkl")){ return handle_solver_matrix_and_type_support::apply(A, X, B); } +#ifdef HAVE_MPI + if((solverName == "amesos2_css_mkl") || + (solverName == "css_mkl") || + (solverName == "amesos2_cssmkl") || + (solverName == "cssmkl")){ + return handle_solver_matrix_and_type_support::apply(A, X, B); + } +#endif #endif #ifdef HAVE_AMESOS2_LAPACK diff --git a/packages/amesos2/src/Amesos2_PardisoMKL_def.hpp b/packages/amesos2/src/Amesos2_PardisoMKL_def.hpp index 5b6010aa7003..11890394f763 100644 --- a/packages/amesos2/src/Amesos2_PardisoMKL_def.hpp +++ b/packages/amesos2/src/Amesos2_PardisoMKL_def.hpp @@ -491,7 +491,7 @@ PardisoMKL::getValidParameters_impl() const Teuchos::AnyNumberParameterEntryValidator::AcceptedTypes accept_int( false ); accept_int.allowInt( true ); - pl->set("IPARM(8)" , as(iparm_temp[8]) , "Iterative refinement step", + pl->set("IPARM(8)" , as(iparm_temp[7]) , "Iterative refinement step", anyNumberParameterEntryValidator(preferred_int, accept_int)); pl->set("IPARM(10)", as(iparm_temp[9]) , "Pivoting perturbation", @@ -527,12 +527,12 @@ PardisoMKL::loadA_impl(EPhase current_phase) Kokkos::resize(rowptr_view_, this->globalNumRows_ + 1); } - int_t nnz_ret = 0; { #ifdef HAVE_AMESOS2_TIMERS Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ ); #endif + int_t nnz_ret = 0; Util::get_crs_helper_kokkos_view< MatrixAdapter, host_value_type_array, host_ordinal_type_array, host_size_type_array>::do_get( diff --git a/packages/amesos2/src/CMakeLists.txt b/packages/amesos2/src/CMakeLists.txt index 49725393f27a..04c1e1590233 100644 --- a/packages/amesos2/src/CMakeLists.txt +++ b/packages/amesos2/src/CMakeLists.txt @@ -394,6 +394,19 @@ IF (${PACKAGE_NAME}_ENABLE_PARDISO_MKL) ) ENDIF() +IF (${PACKAGE_NAME}_ENABLE_PARDISO_MKL AND HAVE_MPI) + APPEND_SET(HEADERS + Amesos2_CssMKL.hpp + Amesos2_CssMKL_FunctionMap.hpp + Amesos2_CssMKL_decl.hpp + Amesos2_CssMKL_def.hpp + ) + + APPEND_SET(SOURCES + Amesos2_CssMKL.cpp + ) +ENDIF() + IF (${PACKAGE_NAME}_ENABLE_MUMPS) APPEND_SET(HEADERS Amesos2_MUMPS.hpp diff --git a/packages/amesos2/test/solvers/CMakeLists.txt b/packages/amesos2/test/solvers/CMakeLists.txt index 9e7245601ce5..16d9cda8621d 100644 --- a/packages/amesos2/test/solvers/CMakeLists.txt +++ b/packages/amesos2/test/solvers/CMakeLists.txt @@ -297,7 +297,7 @@ ENDIF() IF (${PACKAGE_NAME}_ENABLE_PARDISO_MKL) TRIBITS_COPY_FILES_TO_BINARY_DIR(SolverTestCopyPardisoMKLFiles SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} - SOURCE_FILES pardiso_mkl_test.xml + SOURCE_FILES pardiso_mkl_test.xml css_mkl_test.xml EXEDEPS Solver_Test ) @@ -308,6 +308,15 @@ IF (${PACKAGE_NAME}_ENABLE_PARDISO_MKL) STANDARD_PASS_OUTPUT COMM serial mpi ) + + TRIBITS_ADD_TEST( + Solver_Test + NAME Css_MKL_Solver_Test + ARGS "--xml-params=css_mkl_test.xml --filedir=${CMAKE_CURRENT_BINARY_DIR}/../matrices/ --multiple-solves --refactor" + NUM_MPI_PROCS 2 + STANDARD_PASS_OUTPUT + COMM mpi + ) ENDIF() diff --git a/packages/amesos2/test/solvers/Solver_Test.cpp b/packages/amesos2/test/solvers/Solver_Test.cpp index 7b8f4731805b..2a331ef6b216 100644 --- a/packages/amesos2/test/solvers/Solver_Test.cpp +++ b/packages/amesos2/test/solvers/Solver_Test.cpp @@ -66,6 +66,7 @@ #include #include #include +#include #include #include @@ -104,6 +105,7 @@ using Teuchos::CONJ_TRANS; using Teuchos::ParameterList; using Teuchos::Time; using Teuchos::TimeMonitor; +using Teuchos::StackedTimer; using Teuchos::Array; using Teuchos::ArrayView; @@ -211,15 +213,13 @@ int main(int argc, char*argv[]) { Tpetra::ScopeGuard tpetraScope(&argc,&argv); - TimeMonitor TotalTimer(*total_timer); - - Teuchos::RCP > comm = Tpetra::getDefaultComm(); - int root = 0; + Teuchos::RCP > comm = Tpetra::getDefaultComm(); string xml_file("solvers_test.xml"); // default xml file string src_memory_space_name("Undefined"); // default src memory space (no special testing) bool allprint = false; + bool useStackedTimer = false; Teuchos::CommandLineProcessor cmdp; cmdp.setDocString("A test driver for Amesos2 solvers. It reads parameters\n" "from a given (or default) xml file which describes:\n" @@ -231,6 +231,7 @@ int main(int argc, char*argv[]) cmdp.setOption("xml-params", &xml_file, "XML Parameters file"); cmdp.setOption("all-print","root-print",&allprint,"All processors print to out"); + cmdp.setOption("stacked-timer","no-stacked-timer",&useStackedTimer,"Add Stacked Timers"); cmdp.setOption("filedir", &filedir, "Directory to search for matrix files"); cmdp.setOption("verbosity", &verbosity, "Set verbosity level of output"); cmdp.setOption("multiple-solves","single-solve", &multiple_solves, "Perform multiple solves with different RHS arguments"); @@ -241,52 +242,71 @@ int main(int argc, char*argv[]) return EXIT_SUCCESS; // help was printed, exit gracefully. } - // set up output streams based on command-line parameters - fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); - if( !allprint ) fos->setOutputToRootOnly( root ); - - Teuchos::oblackholestream blackhole; - if( verbosity > 3 ){ - compare_fos = fos; - } else { - compare_fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(blackhole)); + RCP stackedTimer; + if(useStackedTimer) + { + stackedTimer = rcp(new StackedTimer("Amesos2 Solve-Test")); + Teuchos::TimeMonitor::setStackedTimer(stackedTimer); } + bool success = true; + { + TimeMonitor TotalTimer(*total_timer); - //Read the contents of the xml file into a ParameterList. - if( verbosity > 0 ){ - *fos << "Every proc reading parameters from xml_file: " - << xml_file << std::endl; - } - ParameterList test_params = - Teuchos::ParameterXMLFileReader(xml_file).getParameters(); + // set up output streams based on command-line parameters + fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); + if( !allprint ) fos->setOutputToRootOnly( root ); - // Check the parameterlist for the presence of any of the other params - if( test_params.isParameter("all-print") ){ - allprint = test_params.get("all-print"); - } - if( test_params.isParameter("filedir") ){ - filedir = test_params.get("filedir"); - } - if( test_params.isParameter("verbosity") ){ - verbosity = test_params.get("verbosity"); - } + Teuchos::oblackholestream blackhole; + if( verbosity > 3 ){ + compare_fos = fos; + } else { + compare_fos = Teuchos::fancyOStream(Teuchos::rcpFromRef(blackhole)); + } + //Read the contents of the xml file into a ParameterList. + if( verbosity > 0 ){ + *fos << "Every proc reading parameters from xml_file: " + << xml_file << std::endl; + } + ParameterList test_params = + Teuchos::ParameterXMLFileReader(xml_file).getParameters(); - // Go through the input parameters and execute tests accordingly. - bool success = true; - ParameterList::ConstIterator mat_it; - for( mat_it = test_params.begin(); mat_it != test_params.end(); ++mat_it ){ - if( test_params.entry(mat_it).isList() ){ // each matrix entry must be a list - success &= do_mat_test(Teuchos::getValue(test_params.entry(mat_it))); - } else { - *fos << "unexpected non-list entry in xml input, ignoring..." << std::endl; + // Check the parameterlist for the presence of any of the other params + if( test_params.isParameter("all-print") ){ + allprint = test_params.get("all-print"); + } + if( test_params.isParameter("filedir") ){ + filedir = test_params.get("filedir"); + } + if( test_params.isParameter("verbosity") ){ + verbosity = test_params.get("verbosity"); } - } + // Go through the input parameters and execute tests accordingly. + ParameterList::ConstIterator mat_it; + for( mat_it = test_params.begin(); mat_it != test_params.end(); ++mat_it ){ + if( test_params.entry(mat_it).isList() ){ // each matrix entry must be a list + success &= do_mat_test(Teuchos::getValue(test_params.entry(mat_it))); + } else { + *fos << "unexpected non-list entry in xml input, ignoring..." << std::endl; + } + } + } // The summary table is very verbose if( verbosity > 3 ){ TimeMonitor::summarize(); } + if(useStackedTimer) + { + stackedTimer->stopBaseTimer(); + StackedTimer::OutputOptions options; + options.num_histogram=3; + options.print_warnings = false; + options.output_histogram = true; + options.output_fraction=true; + options.output_minmax = true; + stackedTimer->report(std::cout, comm, options); + } // This output is used to indicate a passed test, the test framework // will parse for it. @@ -624,16 +644,31 @@ do_solve_routine(const string& solver_name, solver->setParameters( rcpFromRef(solve_params) ); switch (phase) { case Amesos2::CLEAN: + if (verbosity > 2) { + *fos << endl << " ** CLEAN **" << std::endl << std::flush; + } break; case Amesos2::PREORDERING: + if (verbosity > 2) { + *fos << endl << " ** preOrdering **" << std::endl << std::flush; + } solver->preOrdering (); break; case Amesos2::SYMBFACT: + if (verbosity > 2) { + *fos << endl << " ** symbolicFactorization **" << std::endl << std::flush; + } solver->symbolicFactorization (); break; case Amesos2::NUMFACT: + if (verbosity > 2) { + *fos << endl << " ** numlicFactorization **" << std::endl << std::flush; + } solver->numericFactorization (); } + if (verbosity > 2) { + *fos << endl << " ** done **" << std::endl << std::flush; + } ++phase; } @@ -673,21 +708,33 @@ do_solve_routine(const string& solver_name, // Do first solve according to our current style switch( style ){ case SOLVE_VERBOSE: + if (verbosity > 2) { + *fos << endl << " ++ VERBOSE ++" << std::endl << std::flush; + } solver->preOrdering(); solver->symbolicFactorization(); solver->numericFactorization(); solver->solve(); break; case SOLVE_XB: + if (verbosity > 2) { + *fos << endl << " ++ SOLVE_XB ++" << std::endl << std::flush; + } solver->preOrdering(); solver->symbolicFactorization(); solver->numericFactorization(); solver->solve(outArg(*Xhat), ptrInArg(**rhs_it)); break; case SOLVE_SHORT: + if (verbosity > 2) { + *fos << endl << " ++ SOLVE_SHORT ++" << std::endl << std::flush; + } solver->solve(outArg(*Xhat), ptrInArg(**rhs_it)); break; } + if (verbosity > 2) { + *fos << endl << " ++ DONE ++" << std::endl << std::flush; + } success &= checker(*x_it, Xhat); if (! success) { @@ -939,7 +986,7 @@ bool do_tpetra_test_with_types(const string& mm_file, typedef CrsMatrix MAT; typedef MultiVector MV; const size_t numVecs = 5; // arbitrary number - const size_t numRHS = 5; // also arbitrary + const size_t numRHS = 5; // also arbitrary bool transpose = solve_params.get("Transpose", false); diff --git a/packages/amesos2/test/solvers/css_mkl_test.xml b/packages/amesos2/test/solvers/css_mkl_test.xml new file mode 100644 index 000000000000..c39c1d0a3b7e --- /dev/null +++ b/packages/amesos2/test/solvers/css_mkl_test.xml @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/packages/belos/doc/DoxyfileWeb b/packages/belos/doc/DoxyfileWeb index a80c014af482..69e80a6e0752 100755 --- a/packages/belos/doc/DoxyfileWeb +++ b/packages/belos/doc/DoxyfileWeb @@ -118,6 +118,7 @@ EXAMPLE_PATH = ../epetra/example \ ../epetra/example/PCPG/PCPGEpetraExFile.cpp \ ../epetra/example/TFQMR/TFQMREpetraExFile.cpp \ ../epetra/example/TFQMR/PseudoBlockTFQMREpetraExFile.cpp \ + ../epetra/example/SolverFactory/SolverFactoryEpetraGaleriEx.cpp \ ../tpetra/example \ ../tpetra/example/BlockCG/BlockCGTpetraExFile.cpp \ ../tpetra/example/BlockCG/PseudoBlockCGTpetraExFile.cpp \ @@ -128,7 +129,8 @@ EXAMPLE_PATH = ../epetra/example \ ../tpetra/example/PCPG/PCPGTpetraExFile.cpp \ ../tpetra/example/GCRODR/GCRODRTpetraExFile.cpp \ ../tpetra/example/TFQMR/TFQMRTpetraExFile.cpp \ - ../tpetra/example/TFQMR/PseudoBlockTFQMREpetraExFile.cpp + ../tpetra/example/TFQMR/PseudoBlockTFQMREpetraExFile.cpp \ + ../tpetra/example/SolverFactory/SolverFactoryTpetraGaleriEx.cpp EXAMPLE_PATTERNS = *.cpp *.hpp IMAGE_PATH = INPUT_FILTER = diff --git a/packages/belos/epetra/example/CMakeLists.txt b/packages/belos/epetra/example/CMakeLists.txt index ec560161c2b6..a7cb81112d2e 100644 --- a/packages/belos/epetra/example/CMakeLists.txt +++ b/packages/belos/epetra/example/CMakeLists.txt @@ -6,3 +6,4 @@ ADD_SUBDIRECTORY(RCG) ADD_SUBDIRECTORY(PCPG) ADD_SUBDIRECTORY(TFQMR) ADD_SUBDIRECTORY(LSQR) +ADD_SUBDIRECTORY(SolverFactory) diff --git a/packages/belos/epetra/example/SolverFactory/CMakeLists.txt b/packages/belos/epetra/example/SolverFactory/CMakeLists.txt new file mode 100644 index 000000000000..8275b696c26e --- /dev/null +++ b/packages/belos/epetra/example/SolverFactory/CMakeLists.txt @@ -0,0 +1,15 @@ + +ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_Galeri) + +IF (${PACKAGE_NAME}_ENABLE_Galeri) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + SolverFactory_Epetra_Galeri_Ex + SOURCES SolverFactoryEpetraGaleriEx.cpp + COMM serial mpi + ARGS --verbose + STANDARD_PASS_OUTPUT + ) + +ENDIF (${PACKAGE_NAME}_ENABLE_Galeri) + diff --git a/packages/belos/epetra/example/SolverFactory/SolverFactoryEpetraGaleriEx.cpp b/packages/belos/epetra/example/SolverFactory/SolverFactoryEpetraGaleriEx.cpp new file mode 100644 index 000000000000..46da5c502138 --- /dev/null +++ b/packages/belos/epetra/example/SolverFactory/SolverFactoryEpetraGaleriEx.cpp @@ -0,0 +1,266 @@ +// @HEADER +// *********************************************************************** +// +// Belos: Block Linear Solvers Package +// Copyright 2004 Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +// @HEADER + +#include "BelosSolverFactory.hpp" +#include "BelosEpetraAdapter.hpp" +#include "Epetra_CrsMatrix.h" +#include "Epetra_MultiVector.h" + +// The Trilinos package Galeri has many example problems. +#include "Galeri_Maps.h" +#include "Galeri_CrsMatrices.h" + +#include +#include +#include +#include +#include + +// Include selected communicator class required by Epetra objects +#ifdef EPETRA_MPI +# include "Epetra_MpiComm.h" +#else +# include "Epetra_SerialComm.h" +#endif // EPETRA_MPI + +// **************************************************************************** +// BEGIN MAIN ROUTINE +// **************************************************************************** + +int +main (int argc, char *argv[]) +{ + int MyPID = 0; + + // Belos solvers have the following template parameters: + // + // - Scalar: The type of dot product results. + // - MV: The type of (multi)vectors. + // - OP: The type of operators (functions from multivector to + // multivector). A matrix (like Epetra_CrsMatrix) is an example + // of an operator; an Ifpack preconditioner is another example. + // + // Here, Scalar is double, MV is Epetra_MultiVector, and OP is + // Epetra_Operator. + typedef double ST; + typedef Teuchos::ScalarTraits SCT; + typedef SCT::magnitudeType MT; + typedef Epetra_MultiVector MV; + typedef Epetra_Operator OP; + typedef Belos::MultiVecTraits MVT; + typedef Belos::OperatorTraits OPT; + + using Teuchos::ParameterList; + using Teuchos::RCP; + using Teuchos::rcp; + +#ifdef EPETRA_MPI + MPI_Init (&argc, &argv); + Epetra_MpiComm Comm (MPI_COMM_WORLD); +#else + Epetra_SerialComm Comm; +#endif // EPETRA_MPI + +bool verbose = false; +bool success = true; +try { + bool proc_verbose = false; + bool debug = false; + int frequency = -1; // frequency of status test output + int blocksize = 1; // blocksize + int numrhs = 1; // number of right-hand sides to solve for + int maxiters = -1; // maximum number of iterations allowed per linear system + int maxsubspace = 50; // maximum number of blocks the solver can use for the subspace + int maxrestarts = 15; // number of restarts allowed + int nx = 10; // number of discretization points in each direction + MT tol = 1.0e-5; // relative residual tolerance + + Teuchos::CommandLineProcessor cmdp(false,true); + cmdp.setOption("verbose","quiet",&verbose,"Print messages and results."); + cmdp.setOption("debug","nondebug",&debug,"Print debugging information from solver."); + cmdp.setOption("frequency",&frequency,"Solvers frequency for printing residuals (#iters)."); + cmdp.setOption("tol",&tol,"Relative residual tolerance used by solver."); + cmdp.setOption("num-rhs",&numrhs,"Number of right-hand sides to be solved for."); + cmdp.setOption("block-size",&blocksize,"Block size used by solver."); + cmdp.setOption("max-iters",&maxiters,"Maximum number of iterations per linear system (-1 = adapted to problem/block size)."); + cmdp.setOption("max-subspace",&maxsubspace,"Maximum number of blocks the solver can use for the subspace."); + cmdp.setOption("max-restarts",&maxrestarts,"Maximum number of restarts allowed for solver."); + cmdp.setOption("nx",&nx,"Number of discretization points in each direction of 2D Laplacian."); + if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { + return -1; + } + if (!verbose) + frequency = -1; // reset frequency if test is not verbose + + // + // Set up the test problem. + // + // We use Trilinos' Galeri package to construct a test problem. + // Here, we use a discretization of the 2-D Laplacian operator. + // The global mesh size is nx * nx. + // + Teuchos::ParameterList GaleriList; + GaleriList.set ("n", nx * nx); + GaleriList.set ("nx", nx); + GaleriList.set ("ny", nx); + RCP Map = rcp (Galeri::CreateMap ("Linear", Comm, GaleriList)); + RCP A = + rcp (Galeri::CreateCrsMatrix ("Laplace2D", &*Map, GaleriList)); + + proc_verbose = verbose && (MyPID==0); /* Only print on the zero processor */ + + // Create RHS using random solution vector + RCP B = rcp (new MV (*Map, numrhs)); + RCP X = rcp (new MV (*Map, numrhs)); + RCP Xexact = rcp (new MV (*Map, numrhs)); + Xexact->Random (); + + A->Apply( *Xexact, *B ); + + // + // ********Other information used by block solver*********** + // *****************(can be user specified)****************** + // + const int NumGlobalElements = B->GlobalLength(); + if (maxiters == -1) + maxiters = NumGlobalElements/blocksize - 1; // maximum number of iterations to run + // + ParameterList belosList; + belosList.set( "Num Blocks", maxsubspace); // Maximum number of blocks in Krylov factorization + belosList.set( "Block Size", blocksize ); // Blocksize to be used by iterative solver + belosList.set( "Maximum Iterations", maxiters ); // Maximum number of iterations allowed + belosList.set( "Maximum Restarts", maxrestarts ); // Maximum number of restarts allowed + belosList.set( "Convergence Tolerance", tol ); // Relative convergence tolerance requested + int verbosity = Belos::Errors + Belos::Warnings; + if (verbose) { + verbosity += Belos::TimingDetails + Belos::StatusTestDetails; + if (frequency > 0) + belosList.set( "Output Frequency", frequency ); + } + if (debug) { + verbosity += Belos::Debug; + } + belosList.set( "Verbosity", verbosity ); + // + // Construct an unpreconditioned linear problem instance. + // + Belos::LinearProblem problem( A, X, B ); + bool set = problem.setProblem(); + if (set == false) { + if (proc_verbose) + std::cout << std::endl << "ERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; + return -1; + } + // + // + // ******************************************************************* + // ****************Start the solver iteration************************* + // ******************************************************************* + // + // Create a solver factory + Belos::SolverFactory factory; + + // Create an iterative solver manager + std::string solverName = "Block GMRES"; + RCP< Belos::SolverManager > newSolver = factory.create (solverName, rcp(&belosList,false)); + + // Set the problem on the solver manager + newSolver->setProblem( rcp(&problem,false) ); + // + // **********Print out information about problem******************* + // + if (proc_verbose) { + std::cout << std::endl << std::endl; + std::cout << "Dimension of matrix: " << NumGlobalElements << std::endl; + std::cout << "Number of right-hand sides: " << numrhs << std::endl; + std::cout << "Block size used by solver: " << blocksize << std::endl; + std::cout << "Max number of restarts allowed: " << maxrestarts << std::endl; + std::cout << "Max number of Gmres iterations per linear system: " << maxiters << std::endl; + std::cout << "Relative residual tolerance: " << tol << std::endl; + std::cout << std::endl; + } + // + // Perform solve + // + Belos::ReturnType ret = newSolver->solve(); + // + // Get the number of iterations for this solve. + // + int numIters = newSolver->getNumIters(); + if (proc_verbose) + std::cout << "Number of iterations performed for this solve: " << numIters << std::endl; + // + // Compute actual residuals. + // + bool badRes = false; + std::vector actual_resids( numrhs ); + std::vector rhs_norm( numrhs ); + Epetra_MultiVector resid(*Map, numrhs); + OPT::Apply( *A, *X, resid ); + MVT::MvAddMv( -1.0, resid, 1.0, *B, resid ); + MVT::MvNorm( resid, actual_resids ); + MVT::MvNorm( *B, rhs_norm ); + if (proc_verbose) { + std::cout<< "---------- Actual Residuals (normalized) ----------"< tol) badRes = true; + } + } + +if (ret!=Belos::Converged || badRes) { + success = false; + if (proc_verbose) + std::cout << "End Result: TEST FAILED" << std::endl; +} else { + if (proc_verbose) + std::cout << "End Result: TEST PASSED" << std::endl; +} +} +TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); + +#ifdef EPETRA_MPI +MPI_Finalize(); +#endif + +return success ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/packages/belos/src/BelosSolverFactory_Belos.hpp b/packages/belos/src/BelosSolverFactory_Belos.hpp index 16c33b2df2a6..8a780768915a 100644 --- a/packages/belos/src/BelosSolverFactory_Belos.hpp +++ b/packages/belos/src/BelosSolverFactory_Belos.hpp @@ -54,6 +54,13 @@ namespace Belos { +/** \example epetra/example/SolverFactory/SolverFactoryEpetraGaleriEx.cpp + This is an example of how to use the Belos::SolverFactory with Epetra. +*/ +/** \example tpetra/example/SolverFactory/SolverFactoryTpetraGaleriEx.cpp + This is an example of how to use the Belos::SolverFactory with Tpetra. +*/ + class BelosSolverFactory : public Impl::SolverFactoryParent,Operator> { public: diff --git a/packages/belos/tpetra/example/CMakeLists.txt b/packages/belos/tpetra/example/CMakeLists.txt index 33a130aa3251..3d46814d0ba4 100644 --- a/packages/belos/tpetra/example/CMakeLists.txt +++ b/packages/belos/tpetra/example/CMakeLists.txt @@ -7,3 +7,4 @@ ADD_SUBDIRECTORY(BlockCG) ADD_SUBDIRECTORY(RCG) ADD_SUBDIRECTORY(TFQMR) ADD_SUBDIRECTORY(PCPG) +ADD_SUBDIRECTORY(SolverFactory) diff --git a/packages/belos/tpetra/example/SolverFactory/CMakeLists.txt b/packages/belos/tpetra/example/SolverFactory/CMakeLists.txt new file mode 100644 index 000000000000..be54cf5d8a04 --- /dev/null +++ b/packages/belos/tpetra/example/SolverFactory/CMakeLists.txt @@ -0,0 +1,14 @@ +ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_Galeri) +ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_Xpetra) + +IF (${PACKAGE_NAME}_ENABLE_Galeri AND ${PACKAGE_NAME}_ENABLE_Xpetra) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + SolverFactory_Tpetra_Galeri_Ex + SOURCES SolverFactoryTpetraGaleriEx.cpp + COMM serial mpi + ARGS --verbose + STANDARD_PASS_OUTPUT + ) + +ENDIF (${PACKAGE_NAME}_ENABLE_Galeri AND ${PACKAGE_NAME}_ENABLE_Xpetra) diff --git a/packages/belos/tpetra/example/SolverFactory/SolverFactoryTpetraGaleriEx.cpp b/packages/belos/tpetra/example/SolverFactory/SolverFactoryTpetraGaleriEx.cpp new file mode 100644 index 000000000000..348bdd9895ae --- /dev/null +++ b/packages/belos/tpetra/example/SolverFactory/SolverFactoryTpetraGaleriEx.cpp @@ -0,0 +1,278 @@ +// @HEADER +// *********************************************************************** +// +// Belos: Block Linear Solvers Package +// Copyright 2004 Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +// @HEADER + +// Tpetra +#include +#include +#include + +// Galeri +#include +#include +#include + +// Teuchos +#include +#include +#include +#include +#include "Teuchos_ParameterList.hpp" +#include +#include +#include "Teuchos_StandardCatchMacros.hpp" +#include "Teuchos_CommandLineProcessor.hpp" + +// Belos +#include "BelosTpetraAdapter.hpp" +#include "BelosSolverFactory.hpp" + +// **************************************************************************** +// BEGIN RUN ROUTINE +// **************************************************************************** + +template +int run(int argc, char *argv[]) { + + // Belos solvers have the following template parameters: + // + // - Scalar: The type of dot product results. + // - MV: The type of (multi)vectors. + // - OP: The type of operators (functions from multivector to + // multivector). A matrix (like Tpetra::CrsMatrix) is an example + // of an operator; an Ifpack2 preconditioner is another example. + // + // Here, ST is set by the main function, MV is Tpetra::MultiVector, and OP is + // Tpetra::Operator. + using ST = typename Tpetra::MultiVector::scalar_type; + using LO = typename Tpetra::MultiVector<>::local_ordinal_type; + using GO = typename Tpetra::MultiVector<>::global_ordinal_type; + using NT = typename Tpetra::MultiVector<>::node_type; + + using OP = typename Tpetra::Operator; + using MV = typename Tpetra::MultiVector; + using MT = typename Teuchos::ScalarTraits::magnitudeType; + + using tmap_t = Tpetra::Map; + using tvector_t = Tpetra::Vector; + using trowmatrix_t = Tpetra::RowMatrix; + using tcrsmatrix_t = Tpetra::CrsMatrix; + + using MVT = typename Belos::MultiVecTraits; + using OPT = typename Belos::OperatorTraits; + + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::ParameterList; + + Teuchos::GlobalMPISession mpiSession (&argc, &argv, &std::cout); + const auto comm = Tpetra::getDefaultComm(); + const int myPID = comm->getRank(); + + bool verbose = false; + bool success = true; + + try { + bool procVerbose = false; + bool debug = false; + int frequency = -1; // frequency of status test output + int blockSize = 1; // blockSize + int numrhs = 1; // number of right-hand sides to solve for + int maxIters = -1; // maximum number of iterations allowed per linear system + int maxSubspace = 50; // maximum number of blocks the solver can use for the subspace + int maxRestarts = 15; // number of restarts allowed + int nx = 10; // number of discretization points in each direction + MT tol = 1.0e-5; // relative residual tolerance + + Teuchos::CommandLineProcessor cmdp(false,true); + cmdp.setOption("verbose","quiet",&verbose,"Print messages and results."); + cmdp.setOption("debug","nondebug",&debug,"Print debugging information from solver."); + cmdp.setOption("frequency",&frequency,"Solvers frequency for printing residuals (#iters)."); + cmdp.setOption("tol",&tol,"Relative residual tolerance used by GMRES solver."); + cmdp.setOption("num-rhs",&numrhs,"Number of right-hand sides to be solved for."); + cmdp.setOption("block-size",&blockSize,"Block size used by GMRES."); + cmdp.setOption("max-iters",&maxIters,"Maximum number of iterations per linear system (-1 = adapted to problem/block size)."); + cmdp.setOption("max-subspace",&maxSubspace,"Maximum number of blocks the solver can use for the subspace."); + cmdp.setOption("max-restarts",&maxRestarts,"Maximum number of restarts allowed for GMRES solver."); + cmdp.setOption("nx",&nx,"Number of discretization points in each direction of 3D Laplacian."); + if (cmdp.parse(argc,argv) != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) { + return -1; + } + if (!verbose) + frequency = -1; // reset frequency if test is not verbose + + procVerbose = ( verbose && (myPID==0) ); // Only print on the zero processor + + if (procVerbose) { + std::cout << Belos::Belos_Version() << std::endl << std::endl; + } + + // Set up the test problem. + // + // We use Trilinos' Galeri package to construct a test problem. + // Here, we use a discretization of the 2-D Laplacian operator. + // The global mesh size is nx * nx. + + Teuchos::ParameterList GaleriList; + GaleriList.set ("n", nx * nx ); + GaleriList.set ("nx", nx); + GaleriList.set ("ny", nx); + + auto Map = RCP{Galeri::Xpetra::CreateMap("Cartesian2D", comm, GaleriList)}; + auto GaleriProblem = Galeri::Xpetra::BuildProblem("Laplace2D", Map, GaleriList); + + // Create matrix from problem + auto A = GaleriProblem->BuildMatrix(); + + // Create RHS using random solution vector + RCP B = rcp (new MV (Map, numrhs)); + RCP X = rcp (new MV (Map, numrhs)); + RCP Xexact = rcp (new MV (Map, numrhs)); + MVT::MvRandom(*Xexact); + + OPT::Apply(*A, *Xexact, *B ); + + // ********Other information used by block solver*********** + // *****************(can be user specified)****************** + + const int numGlobalElements = B->getGlobalLength(); + if (maxIters == -1) + maxIters = numGlobalElements/blockSize - 1; // maximum number of iterations to run + + ParameterList belosList; + belosList.set( "Num Blocks", maxSubspace); // Maximum number of blocks in Krylov factorization + belosList.set( "Block Size", blockSize ); // BlockSize to be used by iterative solver + belosList.set( "Maximum Iterations", maxIters ); // Maximum number of iterations allowed + belosList.set( "Maximum Restarts", maxRestarts ); // Maximum number of restarts allowed + belosList.set( "Convergence Tolerance", tol ); // Relative convergence tolerance requested + int verbosity = Belos::Errors + Belos::Warnings; + if (verbose) { + verbosity += Belos::TimingDetails + Belos::StatusTestDetails; + if (frequency > 0) + belosList.set( "Output Frequency", frequency ); + } + if (debug) { + verbosity += Belos::Debug; + } + belosList.set( "Verbosity", verbosity ); + + // Construct an unpreconditioned linear problem instance. + Belos::LinearProblem problem( A, X, B ); + bool set = problem.setProblem(); + if (set == false) { + if (procVerbose) + std::cout << std::endl << "ERROR: Belos::LinearProblem failed to set up correctly!" << std::endl; + return -1; + } + + // + // ******************************************************************* + // ****************Start the solver iteration************************* + // ******************************************************************* + // + // Create a solver factory + Belos::SolverFactory factory; + + // Create an iterative solver manager + std::string solverName = "Block GMRES"; + RCP< Belos::SolverManager > newSolver = factory.create (solverName, rcp(&belosList,false)); + + // Set the problem on the solver manager + newSolver->setProblem( rcp(&problem,false) ); + + + // **********Print out information about problem******************* + + if (procVerbose) { + std::cout << std::endl << std::endl; + std::cout << "Dimension of matrix: " << numGlobalElements << std::endl; + std::cout << "Number of right-hand sides: " << numrhs << std::endl; + std::cout << "Block size used by solver: " << blockSize << std::endl; + std::cout << "Max number of restarts allowed: " << maxRestarts << std::endl; + std::cout << "Max number of Gmres iterations per linear system: " << maxIters << std::endl; + std::cout << "Relative residual tolerance: " << tol << std::endl; + std::cout << std::endl; + } + + // Perform solve + Belos::ReturnType ret = newSolver->solve(); + + // Get the number of iterations for this solve. + int numIters = newSolver->getNumIters(); + if (procVerbose) + std::cout << "Number of iterations performed for this solve: " << numIters << std::endl; + + // Compute actual residuals. + bool badRes = false; + std::vector actualResids( numrhs ); + std::vector rhsNorm( numrhs ); + MV resid(Map, numrhs); + OPT::Apply( *A, *X, resid ); + MVT::MvAddMv( -1.0, resid, 1.0, *B, resid ); + MVT::MvNorm( resid, actualResids ); + MVT::MvNorm( *B, rhsNorm ); + if (procVerbose) { + std::cout<< "---------- Actual Residuals (normalized) ----------"< tol) badRes = true; + } + } + + if (ret!=Belos::Converged || badRes) { + success = false; + if (procVerbose) + std::cout << "End Result: TEST FAILED" << std::endl; + } else { + if (procVerbose) + std::cout << "End Result: TEST PASSED" << std::endl; + } + } + TEUCHOS_STANDARD_CATCH_STATEMENTS(verbose, std::cerr, success); + + return success ? EXIT_SUCCESS : EXIT_FAILURE; +} + +int main(int argc, char *argv[]) { + // run with different ST + return run(argc,argv); + // run(argc,argv); // FAILS +} diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index f917ccefa47a..7eb86169ae42 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -2463,6 +2463,153 @@ use CUDA11-RUN-SERIAL-TESTS use rhel8_sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.1.4_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables use PACKAGE-ENABLES|ALL-NO-EPETRA +[rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables] +# uses sems-v2 modules +use RHEL8 +use RHEL8_COMPILER|CUDA +use NODE-TYPE|CUDA +use BUILD-TYPE|RELEASE +use RHEL8_SEMS_LIB-TYPE|STATIC +use KOKKOS-ARCH|VOLTA70 + +use USE-ASAN|NO +use USE-FPIC|NO +use USE-MPI|YES +use USE-PT|NO +use USE-COMPLEX|YES +use USE-RDC|NO +use USE-UVM|YES +use USE-DEPRECATED|YES + +use PACKAGE-ENABLES|NO-PACKAGE-ENABLES +use PACKAGE-ENABLES|NO-EPETRA +use COMMON_SPACK_TPLS +use SEMS_COMMON_CUDA_11 + +use RHEL7_POST +use CUDA11-RUN-SERIAL-TESTS + +opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF + +[rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_all] +use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables +use PACKAGE-ENABLES|ALL-NO-EPETRA + +[rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables] +# uses sems-v2 modules +use RHEL8 +use RHEL8_COMPILER|CUDA +use NODE-TYPE|CUDA +use BUILD-TYPE|RELEASE +use RHEL8_SEMS_LIB-TYPE|STATIC +use KOKKOS-ARCH|VOLTA70 + +use USE-ASAN|NO +use USE-FPIC|NO +use USE-MPI|YES +use USE-PT|NO +use USE-COMPLEX|YES +use USE-RDC|NO +use USE-UVM|NO +use USE-DEPRECATED|YES + +use PACKAGE-ENABLES|NO-PACKAGE-ENABLES +use COMMON_SPACK_TPLS +use SEMS_COMMON_CUDA_11 + +# TPL ENABLE/DISABLE settings +opt-set-cmake-var TPL_ENABLE_BLAS BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_BinUtils BOOL FORCE : OFF +opt-set-cmake-var TPL_ENABLE_Boost BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_CGNS BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_CUDA BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_CUSPARSE BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_DLlib BOOL FORCE : OFF +opt-set-cmake-var TPL_ENABLE_HDF5 BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_HWLOC BOOL FORCE : OFF +opt-set-cmake-var TPL_ENABLE_LAPACK BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_METIS BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_Matio BOOL FORCE : OFF +opt-set-cmake-var TPL_ENABLE_MPI BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_Netcdf BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : OFF +opt-set-cmake-var TPL_ENABLE_Pthread BOOL FORCE : ON +opt-set-cmake-var TPL_ENABLE_Scotch BOOL FORCE : OFF +opt-set-cmake-var TPL_ENABLE_SuperLU BOOL FORCE : OFF +opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : OFF +opt-set-cmake-var TPL_ENABLE_Zlib BOOL FORCE : ON + +#TPL_*_LIBRARIES +# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 +opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : /lib64/libblas.so.3 +opt-set-cmake-var TPL_BoostLib_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a +opt-set-cmake-var TPL_Boost_LIBRARIES STRING FORCE : ${BOOST_LIB|ENV}/libboost_program_options.a;${BOOST_LIB|ENV}/libboost_system.a +opt-set-cmake-var TPL_DLlib_LIBRARIES FILEPATH FORCE : "-ldl" +opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ${HDF5_LIB|ENV}/libhdf5_hl.so;${HDF5_LIB|ENV}/libhdf5.a;${ZLIB_LIB|ENV}/libz.a;-ldl +# see https://github.com/trilinos/Trilinos/issues/11109#issuecomment-1272146298 +opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : /lib64/liblapack.so.3 +opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.so +opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : -L${NETCDF_C_ROOT|ENV}/lib64;${NETCDF_C_ROOT|ENV}/lib/libnetcdf.a;${PARALLEL_NETCDF_ROOT|ENV}/lib/libpnetcdf.a;${TPL_HDF5_LIBRARIES|CMAKE} + +#TPL_[INCLUDE|LIBRARY]_DIRS +opt-set-cmake-var Netcdf_INCLUDE_DIRS STRING FORCE : ${NETCDF_C_INC|ENV} +opt-set-cmake-var ParMETIS_INCLUDE_DIRS STRING FORCE : ${PARMETIS_INC|ENV} +opt-set-cmake-var ParMETIS_LIBRARY_DIRS STRING FORCE : ${PARMETIS_LIB|ENV} +opt-set-cmake-var Scotch_INCLUDE_DIRS STRING FORCE : ${SCOTCH_INC|ENV} +opt-set-cmake-var Scotch_LIBRARY_DIRS STRING FORCE : ${SCOTCH_LIB|ENV} +opt-set-cmake-var SuperLU_INCLUDE_DIRS STRING FORCE : ${SUPERLU_INC|ENV} +opt-set-cmake-var SuperLU_LIBRARY_DIRS STRING FORCE : ${SUPERLU_LIB|ENV} + +#CXX Settings +opt-set-cmake-var CMAKE_CXX_FLAGS STRING : -fPIC -Wall -Warray-bounds -Wchar-subscripts -Wcomment -Wenum-compare -Wformat -Wuninitialized -Wmaybe-uninitialized -Wmain -Wnarrowing -Wnonnull -Wparentheses -Wreorder -Wreturn-type -Wsign-compare -Wsequence-point -Wtrigraphs -Wunused-function -Wunused-but-set-variable -Wunused-variable -Wwrite-strings + +#Package Options +opt-set-cmake-var EpetraExt_ENABLE_HDF5 BOOL FORCE : OFF +opt-set-cmake-var Kokkos_ENABLE_CUDA BOOL FORCE : ON +opt-set-cmake-var Kokkos_ENABLE_CUDA_LAMBDA BOOL FORCE : ON +opt-set-cmake-var Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA BOOL FORCE : ON +#opt-set-cmake-var Kokkos_ENABLE_Debug_Bounds_Check BOOL FORCE : ON +opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none +opt-set-cmake-var Panzer_FADTYPE STRING FORCE : "Sacado::Fad::DFad" +opt-set-cmake-var Phalanx_KOKKOS_DEVICE_TYPE STRING FORCE : CUDA +opt-set-cmake-var Sacado_ENABLE_HIERARCHICAL_DFAD BOOL FORCE : ON +opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON +opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF +opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF +opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF + +# Test failures as of 11-28-22 +opt-set-cmake-var PanzerAdaptersSTK_tQuad8ToQuad4Factory_MPI_2_DISABLE BOOL : ON +opt-set-cmake-var PanzerAdaptersSTK_tQuadraticToLinearMeshFactory_MPI_2_DISABLE BOOL : ON +opt-set-cmake-var Pliris_vector_random_MPI_3_DISABLE BOOL : ON +opt-set-cmake-var Pliris_vector_random_MPI_4_DISABLE BOOL : ON +opt-set-cmake-var ROL_example_PDE-OPT_navier-stokes_example_01_MPI_4_DISABLE BOOL : ON +opt-set-cmake-var ROL_example_PDE-OPT_nonlinear-elliptic_example_01_MPI_4_DISABLE BOOL : ON +opt-set-cmake-var ROL_example_PDE-OPT_nonlinear-elliptic_example_02_MPI_4_DISABLE BOOL : ON +opt-set-cmake-var ROL_example_PDE-OPT_obstacle_example_01_MPI_4_DISABLE BOOL : ON +opt-set-cmake-var ROL_example_PDE-OPT_topo-opt_poisson_example_01_MPI_4_DISABLE BOOL : ON +opt-set-cmake-var ROL_test_elementwise_TpetraMultiVector_MPI_4_DISABLE BOOL : ON +opt-set-cmake-var STKUnit_tests_stk_mesh_unit_tests_MPI_4_DISABLE BOOL : ON +# This was failing fairly reliably and Nate said it should be okay to disable (https://github.com/trilinos/Trilinos/issues/11678) +opt-set-cmake-var Kokkos_CoreUnitTest_CudaTimingBased_MPI_1_DISABLE BOOL : ON + +# This is temporarily disabled because it seems to be particularly sensitive to the spack-built +# MPI issue (TRILFRAME-552) +opt-set-cmake-var ROL_example_PinT_parabolic-control_AugmentedSystem_test_MPI_2_DISABLE BOOL FORCE : ON + +use PACKAGE-ENABLES|NO-EPETRA + +use RHEL7_POST + +use CUDA11-RUN-SERIAL-TESTS + +# WIP +[rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_all] +# uses sems-v2 modules +use rhel8_sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6_release_static_Volta70_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables +use PACKAGE-ENABLES|ALL-NO-EPETRA + + [rhel7_ascdo-gnu-10.3.0-serial_debug_shared_no-kokkos-arch_no-asan_no-complex_no-fpic_no-mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables] # uses asc-do modules use NODE-TYPE|SERIAL @@ -3039,3 +3186,34 @@ use USE-DEPRECATED|YES opt-set-cmake-var Trilinos_ENABLE_Teuchos BOOL : ON opt-set-cmake-var Trilinos_ENABLE_ALL_PACKAGES BOOL FORCE : OFF + +[rhel8_cuda-gcc-openmpi_release_static_Ampere80_no-asan_complex_no-fpic_mpi_pt_no-rdc_no-uvm_deprecated-on_no-package-enables] +use NODE-TYPE|CUDA +use BUILD-TYPE|RELEASE +opt-set-cmake-var BUILD_SHARED_LIBS BOOL : OFF +opt-set-cmake-var TPL_FIND_SHARED_LIBS BOOL : OFF +use KOKKOS-ARCH|AMPERE80 +use RHEL7_SEMS_USE-ASAN|NO_USE-FPIC|NO_USE-MPI|YES_USE-PT|NO +use USE-COMPLEX|YES +use USE-RDC|NO +use USE-UVM|NO +use USE-DEPRECATED|YES +use PACKAGE-ENABLES|NO-EPETRA +use SEMS_COMMON_CUDA_11 + +[rhel8_cuda-gcc-openmpi_release_static_Ampere80_no-asan_complex_no-fpic_mpi_pt_no-rdc_uvm_deprecated-on_no-package-enables] +use NODE-TYPE|CUDA +use BUILD-TYPE|RELEASE +opt-set-cmake-var BUILD_SHARED_LIBS BOOL : OFF +opt-set-cmake-var TPL_FIND_SHARED_LIBS BOOL : OFF +use KOKKOS-ARCH|AMPERE80 +use RHEL7_SEMS_USE-ASAN|NO_USE-FPIC|NO_USE-MPI|YES_USE-PT|NO +use USE-COMPLEX|YES +use USE-RDC|NO +use USE-UVM|YES +use USE-DEPRECATED|YES +use PACKAGE-ENABLES|NO-EPETRA +use SEMS_COMMON_CUDA_11 + +opt-set-cmake-var Trilinos_ENABLE_TESTS BOOL FORCE : OFF +opt-set-cmake-var Kokkos_ENABLE_TESTS BOOL FORCE : ON diff --git a/packages/framework/ini-files/environment-specs.ini b/packages/framework/ini-files/environment-specs.ini index 9dae944f28eb..4f91beb46394 100644 --- a/packages/framework/ini-files/environment-specs.ini +++ b/packages/framework/ini-files/environment-specs.ini @@ -156,6 +156,10 @@ envvar-find-in-path MPICC : mpicc envvar-find-in-path MPICXX : mpicxx envvar-find-in-path MPIF90 : mpif90 +[rhel8_cuda-gcc-openmpi] +envvar-set-if-empty TRILINOS_DIR : ${path_to_src} +envvar-set OMPI_CXX: ${TRILINOS_DIR}/packages/kokkos/bin/nvcc_wrapper + [rhel8_gcc-openmpi] [rhel8_gcc-serial] diff --git a/packages/framework/ini-files/supported-envs.ini b/packages/framework/ini-files/supported-envs.ini index d3a27cfcb82e..4f932092e475 100644 --- a/packages/framework/ini-files/supported-envs.ini +++ b/packages/framework/ini-files/supported-envs.ini @@ -174,11 +174,14 @@ gnu [rhel8] oneapi-intelmpi +cuda-gcc-openmpi gcc-openmpi gcc-serial aue-gcc-openmpi sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.1.4 +sems-cuda-11.4.2-gnu-10.1.0-openmpi-4.1.6 sems-clang-11.0.1-openmpi-4.0.5-serial + [ats2] cuda-11.2.152-gnu-8.3.1-spmpi-rolling diff --git a/packages/framework/pr_tools/LaunchDriver.py b/packages/framework/pr_tools/LaunchDriver.py index 325aeac0aa46..72cccc84d7b8 100755 --- a/packages/framework/pr_tools/LaunchDriver.py +++ b/packages/framework/pr_tools/LaunchDriver.py @@ -91,7 +91,7 @@ def main(argv): print("LaunchDriver> INFO: TRILINOS_DIR=\"" + os.environ["TRILINOS_DIR"] + "\"", flush=True) - ds = DetermineSystem(args.build_name, args.supported_systems) + ds = DetermineSystem(args.build_name, args.supported_systems, force_build_name=True) launch_env = get_launch_env(ds.system_name) launch_cmd = get_launch_cmd(ds.system_name) diff --git a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py index 54cbcd1227f3..99b30697985b 100644 --- a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py +++ b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationBase.py @@ -482,7 +482,7 @@ def pullrequest_build_name(self): PR--test-- """ - if self.arg_pullrequest_cdash_track == "Pull Request": + if "Pull Request" in self.arg_pullrequest_cdash_track: output = "PR-{}-test-{}-{}".format(self.arg_pullrequest_number, self.arg_pr_genconfig_job_name, self.arg_jenkins_job_number) elif self.arg_dashboard_build_name != "__UNKNOWN__": output = self.arg_dashboard_build_name @@ -735,7 +735,7 @@ def prepare_test(self): self.message("+" + "-"*68 + "+") self.message("| E N V I R O N M E N T S E T U P S T A R T") self.message("+" + "-"*68 + "+") - tr_env = LoadEnv([self.arg_pr_genconfig_job_name], + tr_env = LoadEnv([self.arg_pr_genconfig_job_name, "--force"], load_env_ini_file=Path(self.arg_pr_env_config_file)) tr_env.load_set_environment() diff --git a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py index cb3e5af13552..49f2c54dfe9c 100644 --- a/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py +++ b/packages/framework/pr_tools/trilinosprhelpers/TrilinosPRConfigurationStandard.py @@ -57,7 +57,7 @@ def execute_test(self): # Execute the call to ctest. cmd = ['ctest', - "-V", + "-VV", "-S", f"{self.arg_ctest_driver}", f"-Dsource_dir:PATH={self.arg_source_dir}", f"-Dbuild_dir:PATH={self.arg_build_dir}", diff --git a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py index 6f933f5d751a..a1774ff565ce 100755 --- a/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py +++ b/packages/framework/pr_tools/trilinosprhelpers/unittests/test_TrilinosPRConfigurationBase.py @@ -348,6 +348,15 @@ def test_TrilinosPRConfigurationBaseBuildNameGCC720(self): expected_build_name = "PR-{}-test-{}-{}".format(args.pullrequest_number, args.genconfig_build_name, args.jenkins_job_number) self.assertEqual(build_name, expected_build_name) + def test_TrilinosPRConfigurationBaseBuildNameContainsPullRequest(self): + """Test that a group containing 'Pull Request' causes the build name to reflect a PR build.""" + args = self.dummy_args_gcc_720() + args.pullrequest_cdash_track = "Pull Request (Non-blocking)" + pr_config = trilinosprhelpers.TrilinosPRConfigurationBase(args) + build_name = pr_config.pullrequest_build_name + print("--- build_name = {}".format(build_name)) + expected_build_name = "PR-{}-test-{}-{}".format(args.pullrequest_number, args.genconfig_build_name, args.jenkins_job_number) + self.assertEqual(build_name, expected_build_name) def test_TrilinosPRConfigurationBaseBuildNameNonPRTrack(self): args = self.dummy_args_non_pr_track() diff --git a/packages/ifpack2/src/CMakeLists.txt b/packages/ifpack2/src/CMakeLists.txt index a0ac4f179d2e..283a027ca150 100644 --- a/packages/ifpack2/src/CMakeLists.txt +++ b/packages/ifpack2/src/CMakeLists.txt @@ -171,19 +171,19 @@ IF(Ifpack2_ENABLE_EXPLICIT_INSTANTIATION) Details::LinearSolverFactory ) -ENDIF() + SET(TEMPLATE_FILE "Ifpack2_Details_LinearSolverFactory_ETI_SC_LO_GO_NT.tmpl") + IFPACK2_PROCESS_ETI_TEMPLATE_SLGN(${IFPACK2_ETI_LINEAR_SOLVER_FACTORY_CLASSES} ${TEMPLATE_FILE} IFPACK2_FACTORY_OUTPUT_FILES "${Ifpack2_ETI_SCALARS}" "${Ifpack2_ETI_LORDS}" "${Ifpack2_ETI_GORDS}" "${Ifpack2_ETI_NODES}") + LIST(APPEND IFPACK2_CPP_SOURCES ${IFPACK2_FACTORY_OUTPUT_FILES}) -SET(TEMPLATE_FILE "Ifpack2_Details_LinearSolverFactory_ETI_SC_LO_GO_NT.tmpl") -IFPACK2_PROCESS_ETI_TEMPLATE_SLGN(${IFPACK2_ETI_LINEAR_SOLVER_FACTORY_CLASSES} ${TEMPLATE_FILE} IFPACK2_FACTORY_OUTPUT_FILES "${Ifpack2_ETI_SCALARS}" "${Ifpack2_ETI_LORDS}" "${Ifpack2_ETI_GORDS}" "${Ifpack2_ETI_NODES}") -LIST(APPEND IFPACK2_CPP_SOURCES ${IFPACK2_FACTORY_OUTPUT_FILES}) + SET(TEMPLATE_FILE "Ifpack2_ETI_LO_GO_NT.tmpl") + IFPACK2_PROCESS_ETI_TEMPLATE_LGN("${IFPACK2_ETI_LO_GO_CLASSES}" ${TEMPLATE_FILE} IFPACK2_SRCS "${Ifpack2_ETI_LORDS}" "${Ifpack2_ETI_GORDS}" "${Ifpack2_ETI_NODES}") + LIST(APPEND IFPACK2_CPP_SOURCES ${IFPACK2_SRCS}) -SET(TEMPLATE_FILE "Ifpack2_ETI_LO_GO_NT.tmpl") -IFPACK2_PROCESS_ETI_TEMPLATE_LGN("${IFPACK2_ETI_LO_GO_CLASSES}" ${TEMPLATE_FILE} IFPACK2_SRCS "${Ifpack2_ETI_LORDS}" "${Ifpack2_ETI_GORDS}" "${Ifpack2_ETI_NODES}") -LIST(APPEND IFPACK2_CPP_SOURCES ${IFPACK2_SRCS}) + SET(TEMPLATE_FILE "Ifpack2_ETI_SC_LO_GO_NT.tmpl") + IFPACK2_PROCESS_ETI_TEMPLATE_SLGN("${IFPACK2_ETI_CLASSES}" ${TEMPLATE_FILE} IFPACK2_SRCS "${Ifpack2_ETI_SCALARS}" "${Ifpack2_ETI_LORDS}" "${Ifpack2_ETI_GORDS}" "${Ifpack2_ETI_NODES}") + LIST(APPEND IFPACK2_CPP_SOURCES ${IFPACK2_SRCS}) -SET(TEMPLATE_FILE "Ifpack2_ETI_SC_LO_GO_NT.tmpl") -IFPACK2_PROCESS_ETI_TEMPLATE_SLGN("${IFPACK2_ETI_CLASSES}" ${TEMPLATE_FILE} IFPACK2_SRCS "${Ifpack2_ETI_SCALARS}" "${Ifpack2_ETI_LORDS}" "${Ifpack2_ETI_GORDS}" "${Ifpack2_ETI_NODES}") -LIST(APPEND IFPACK2_CPP_SOURCES ${IFPACK2_SRCS}) +ENDIF() #MESSAGE(DEBUG " *** IFPACK2_CPP_SOURCES = ${IFPACK2_CPP_SOURCES}") diff --git a/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp b/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp index edfc39b6fdaa..03b34c545bfd 100644 --- a/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp +++ b/packages/ifpack2/src/Ifpack2_AdditiveSchwarz_def.hpp @@ -55,6 +55,7 @@ #ifndef IFPACK2_ADDITIVESCHWARZ_DEF_HPP #define IFPACK2_ADDITIVESCHWARZ_DEF_HPP +#include "Ifpack2_AdditiveSchwarz_decl.hpp" #include "Trilinos_Details_LinearSolverFactory.hpp" // We need Ifpack2's implementation of LinearSolver, because we use it // to wrap the user-provided Ifpack2::Preconditioner in diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp index fcc5a62932cc..479299ecf90d 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp @@ -953,9 +953,9 @@ void RBILUK::compute () "streams are not yet supported."); auto lclMtx = A_local_bcrs->getLocalMatrixDevice(); - this->A_local_rowmap_ = lclMtx.graph.row_map; - this->A_local_entries_ = lclMtx.graph.entries; - this->A_local_values_ = lclMtx.values; + auto A_local_rowmap = lclMtx.graph.row_map; + auto A_local_entries = lclMtx.graph.entries; + auto A_local_values = lclMtx.values; // L_block_->resumeFill (); // U_block_->resumeFill (); @@ -978,7 +978,7 @@ void RBILUK::compute () auto U_values = lclU.values; KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), this->LevelOfFill_, - this->A_local_rowmap_, this->A_local_entries_, this->A_local_values_, + A_local_rowmap, A_local_entries, A_local_values, L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values ); } } // Stop timing diff --git a/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_decl.hpp b/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_decl.hpp index 09bf1b98cd52..6face5d9fce4 100644 --- a/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_decl.hpp @@ -440,6 +440,8 @@ class LocalSparseTriangularSolver : const Teuchos::ETransp mode) const; void initializeState(); + + KokkosSparse::Experimental::SPTRSVAlgorithm kokkosKernelsAlgorithm() const; }; } // namespace Ifpack2 diff --git a/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_def.hpp b/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_def.hpp index 4bd919afe6ee..993434204dca 100644 --- a/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_def.hpp +++ b/packages/ifpack2/src/Ifpack2_LocalSparseTriangularSolver_def.hpp @@ -43,6 +43,7 @@ #ifndef IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP #define IFPACK2_LOCALSPARSETRIANGULARSOLVER_DEF_HPP +#include "Ifpack2_LocalSparseTriangularSolver_decl.hpp" #include "Tpetra_CrsMatrix.hpp" #include "Tpetra_Core.hpp" #include "Teuchos_StandardParameterEntryValidators.hpp" @@ -573,11 +574,32 @@ initialize () { if (!isKokkosKernelsStream_) { kh_ = Teuchos::rcp (new k_handle()); - } - else { + const bool is_lower_tri = (this->uplo_ == "L") ? true : false; + + auto A_crs = Teuchos::rcp_dynamic_cast (A_, true); + auto Alocal = A_crs->getLocalMatrixDevice(); + auto ptr = Alocal.graph.row_map; + auto ind = Alocal.graph.entries; + auto val = Alocal.values; + + auto numRows = Alocal.numRows(); + kh_->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows, is_lower_tri); + KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val); + } else { kh_v_ = std::vector< Teuchos::RCP >(num_streams_); for (int i = 0; i < num_streams_; i++) { kh_v_[i] = Teuchos::rcp (new k_handle ()); + auto A_crs_i = Teuchos::rcp_dynamic_cast (A_crs_v_[i], true); + auto Alocal_i = A_crs_i->getLocalMatrixDevice(); + auto ptr_i = Alocal_i.graph.row_map; + auto ind_i = Alocal_i.graph.entries; + auto val_i = Alocal_i.values; + + auto numRows_i = Alocal_i.numRows(); + + const bool is_lower_tri = (this->uplo_ == "L") ? true : false; + kh_v_[i]->create_sptrsv_handle(kokkosKernelsAlgorithm(), numRows_i, is_lower_tri); + KokkosSparse::Experimental::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i); } kh_v_nonnull_ = true; } @@ -587,6 +609,26 @@ initialize () ++numInitialize_; } +template +KokkosSparse::Experimental::SPTRSVAlgorithm +LocalSparseTriangularSolver::kokkosKernelsAlgorithm() const +{ +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) + // CuSparse only supports int type ordinals + // and scalar types of float, double, float complex and double complex + if constexpr (std::is_same::value && + std::is_same::value && + (std::is_same::value || + std::is_same::value || + std::is_same>::value || + std::is_same>::value)) + { + return KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE; + } +#endif + return KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1; +} + template void LocalSparseTriangularSolver:: @@ -631,79 +673,64 @@ compute () "been called by this point, but isInitialized_ is false. " "Please report this bug to the Ifpack2 developers."); - if (! isComputed_) {//Only compute if not computed before - if (Teuchos::nonnull (htsImpl_)) - htsImpl_->compute (*A_crs_, out_); - - if (Teuchos::nonnull(kh_) && this->isKokkosKernelsSptrsv_) { - const bool is_lower_tri = (this->uplo_ == "L") ? true : false; - - auto A_crs = Teuchos::rcp_dynamic_cast (A_); - auto Alocal = A_crs->getLocalMatrixDevice(); - auto ptr = Alocal.graph.row_map; - auto ind = Alocal.graph.entries; - auto val = Alocal.values; - - auto numRows = Alocal.numRows(); - - // Destroy existing handle and recreate in case new matrix provided - requires rerunning symbolic analysis - kh_->destroy_sptrsv_handle(); -#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) - // CuSparse only supports int type ordinals - // and scalar types of float, double, float complex and double complex - if (std::is_same::value && - std::is_same::value && - (std::is_same::value || - std::is_same::value || - std::is_same>::value || - std::is_same>::value)) - { - kh_->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE, numRows, is_lower_tri); - } - else -#endif - { - kh_->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1, numRows, is_lower_tri); - } - KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val); - } - else if (kh_v_nonnull_ && this->isKokkosKernelsSptrsv_) { - const bool is_lower_tri = (this->uplo_ == "L") ? true : false; - - for (int i = 0; i < num_streams_; i++) { - auto A_crs_i = Teuchos::rcp_dynamic_cast (A_crs_v_[i]); - auto Alocal_i = A_crs_i->getLocalMatrixDevice(); - auto ptr_i = Alocal_i.graph.row_map; - auto ind_i = Alocal_i.graph.entries; - auto val_i = Alocal_i.values; - - auto numRows_i = Alocal_i.numRows(); - - // Destroy existing handle and recreate in case new matrix provided - requires rerunning symbolic analysis - kh_v_[i]->destroy_sptrsv_handle(); -#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) - // CuSparse only supports int type ordinals - // and scalar types of float, double, float complex and double complex - if (std::is_same::value && - std::is_same::value && - (std::is_same::value || - std::is_same::value || - std::is_same>::value || - std::is_same>::value)) - { - kh_v_[i]->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE, numRows_i, is_lower_tri); - } - else -#endif - { - kh_v_[i]->create_sptrsv_handle(KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1, numRows_i, is_lower_tri); +// NOTE (Nov-09-2022): +// For Cuda >= 11.3 (using cusparseSpSV), always call symbolic during compute +// even when matrix values are changed with the same sparsity pattern. +// For Cuda >= 12.1 has a new cusparseSpSV_updateMatrix function just for updating the +// values that is substantially faster. +// This would all be much better handled via a KokkosSparse::Experimental::sptrsv_numeric(...) +// that could hide the Cuda implementation details. +#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUDA_VERSION >= 11030) + if constexpr ( std::is_same_v ) + { + if (this->isKokkosKernelsSptrsv_) { + if (Teuchos::nonnull(kh_) && !isKokkosKernelsStream_) { + auto A_crs = Teuchos::rcp_dynamic_cast (A_crs_, true); + auto Alocal = A_crs->getLocalMatrixDevice(); + auto val = Alocal.values; + #if (CUSPARSE_VERSION >= 12100) + auto *sptrsv_handle = kh_->get_sptrsv_handle(); + auto cusparse_handle = sptrsv_handle->get_cuSparseHandle(); + cusparseSpSV_updateMatrix(cusparse_handle->handle, + cusparse_handle->spsvDescr, + val.data(), + CUSPARSE_SPSV_UPDATE_GENERAL); + #else + auto ptr = Alocal.graph.row_map; + auto ind = Alocal.graph.entries; + KokkosSparse::Experimental::sptrsv_symbolic(kh_.getRawPtr(), ptr, ind, val); + #endif + } else if (kh_v_nonnull_) { + for (int i = 0; i < num_streams_; i++) { + auto A_crs_i = Teuchos::rcp_dynamic_cast (A_crs_v_[i], true); + auto Alocal_i = A_crs_i->getLocalMatrixDevice(); + auto val_i = Alocal_i.values; + #if (CUSPARSE_VERSION >= 12100) + auto *sptrsv_handle = kh_v_[i]->get_sptrsv_handle(); + auto cusparse_handle = sptrsv_handle->get_cuSparseHandle(); + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseSetStream(cusparse_handle->handle, exec_space_instances_[i].cuda_stream())); + cusparseSpSV_updateMatrix(cusparse_handle->handle, + cusparse_handle->spsvDescr, + val_i.data(), + CUSPARSE_SPSV_UPDATE_GENERAL); + #else + auto ptr_i = Alocal_i.graph.row_map; + auto ind_i = Alocal_i.graph.entries; + KokkosSparse::Experimental::sptrsv_symbolic(exec_space_instances_[i], kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i); + #endif + } } - KokkosSparse::Experimental::sptrsv_symbolic(kh_v_[i].getRawPtr(), ptr_i, ind_i, val_i); } } +#endif + + if (! isComputed_) {//Only compute if not computed before + if (Teuchos::nonnull (htsImpl_)) + htsImpl_->compute (*A_crs_, out_); - isComputed_ = true; - ++numCompute_; + isComputed_ = true; + ++numCompute_; } } @@ -1228,14 +1255,6 @@ setMatrix (const Teuchos::RCP& A) if (Teuchos::nonnull (htsImpl_)) htsImpl_->reset (); } // pointers are not the same - - //NOTE (Nov-09-2022): - //For Cuda >= 11.3 (using cusparseSpSV), always call compute before apply, - //even when matrix values are changed with the same sparsity pattern. - //So, force isComputed_ to FALSE here -#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030) - isComputed_ = false; -#endif } template @@ -1290,14 +1309,6 @@ setMatrices (const std::vector< Teuchos::RCP >& A_crs_v) } } // pointers are not the same } - - //NOTE (Nov-09-2022): - //For Cuda >= 11.3 (using cusparseSpSV), always call compute before apply, - //even when matrix values are changed with the same sparsity pattern. - //So, force isComputed_ to FALSE here -#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030) - isComputed_ = false; -#endif } } // namespace Ifpack2 diff --git a/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp b/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp index a93692f9e73f..1d3cdad6ab79 100644 --- a/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp +++ b/packages/ifpack2/src/Ifpack2_OverlappingRowMatrix_def.hpp @@ -45,6 +45,7 @@ #include +#include #include #include #include @@ -179,9 +180,17 @@ OverlappingRowMatrix (const Teuchos::RCP& A, ExtImporter_ = rcp (new import_type (A_->getRowMap (), ExtMap_)); { - ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0)); - ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::INSERT); - ExtMatrix_->fillComplete (A_->getDomainMap (), RowMap_); + auto ExtMatrixDynGraph = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0)); + ExtMatrixDynGraph->doImport (*A_, *ExtImporter_, Tpetra::INSERT); + ExtMatrixDynGraph->fillComplete (A_->getDomainMap (), RowMap_); + auto ExtLclMatrix = ExtMatrixDynGraph->getLocalMatrixDevice(); + auto ExtMatrixStaticGraph = rcp (new crs_graph_type(ExtLclMatrix.graph, + ExtMap_, + ColMap_, + ExtMatrixDynGraph->getDomainMap(), + ExtMatrixDynGraph->getRangeMap())); + ExtMatrix_ = rcp (new crs_matrix_type(ExtMatrixStaticGraph, ExtLclMatrix.values)); + ExtMatrix_->fillComplete (); } // fix indices for overlapping matrix @@ -846,11 +855,8 @@ OverlappingRowMatrix::getExtHaloStartsHost() const template void OverlappingRowMatrix::doExtImport() { - //TODO: CrsMatrix can't doImport after resumeFill (see #9720). Ideally, this import could - //happen using combine mode REPLACE without reconstructing the matrix. - //Maybe even without another fillComplete since this doesn't change structure - see #9655. - ExtMatrix_ = rcp (new crs_matrix_type (ExtMap_, ColMap_, 0)); - ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::INSERT); + ExtMatrix_->resumeFill(); + ExtMatrix_->doImport (*A_, *ExtImporter_, Tpetra::REPLACE); ExtMatrix_->fillComplete (A_->getDomainMap (), RowMap_); } diff --git a/packages/ifpack2/src/Ifpack2_RILUK_decl.hpp b/packages/ifpack2/src/Ifpack2_RILUK_decl.hpp index 07f95154e909..509e884dc815 100644 --- a/packages/ifpack2/src/Ifpack2_RILUK_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_RILUK_decl.hpp @@ -578,6 +578,17 @@ class RILUK: static void checkOrderingConsistency (const row_matrix_type& A); void initAllValues (const row_matrix_type& A); + void compute_serial(); + void compute_kkspiluk(); +// Workaround Cuda limitation of KOKKOS_LAMBDA in private/protected member functions +#ifdef KOKKOS_ENABLE_CUDA +public: +#endif + void compute_kkspiluk_stream(); +#ifdef KOKKOS_ENABLE_CUDA +private: +#endif + /// \brief Return A, wrapped in a LocalFilter, if necessary. /// /// "If necessary" means that if A is already a LocalFilter, or if @@ -599,9 +610,8 @@ class RILUK: /// may be computed using a crs_matrix_type that initialize() constructs /// temporarily. Teuchos::RCP A_local_; - lno_row_view_t A_local_rowmap_; - lno_nonzero_view_t A_local_entries_; - scalar_nonzero_view_t A_local_values_; + Teuchos::RCP A_local_crs_; + Teuchos::RCP A_local_crs_nc_; std::vector A_local_diagblks; std::vector< lno_row_view_t > A_local_diagblks_rowmap_v_; std::vector< lno_nonzero_view_t > A_local_diagblks_entries_v_; @@ -649,6 +659,7 @@ class RILUK: std::vector exec_space_instances_; bool hasStreamReordered_; std::vector perm_v_; + std::vector reverse_perm_v_; }; // NOTE (mfh 11 Feb 2015) This used to exist in order to deal with diff --git a/packages/ifpack2/src/Ifpack2_RILUK_def.hpp b/packages/ifpack2/src/Ifpack2_RILUK_def.hpp index 92770862324c..cc1f6fd0fee7 100644 --- a/packages/ifpack2/src/Ifpack2_RILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_RILUK_def.hpp @@ -41,6 +41,7 @@ #ifndef IFPACK2_CRSRILUK_DEF_HPP #define IFPACK2_CRSRILUK_DEF_HPP +#include "Ifpack2_RILUK_decl.hpp" #include "Ifpack2_LocalFilter.hpp" #include "Tpetra_CrsMatrix.hpp" #include "Teuchos_StandardParameterEntryValidators.hpp" @@ -425,6 +426,11 @@ setParameters (const Teuchos::ParameterList& params) LevelOfFill_ = fillLevel; Overalloc_ = overalloc; +#ifdef KOKKOS_ENABLE_OPENMP + if constexpr (std::is_same_v) { + nstreams = std::min(nstreams, execution_space{}.concurrency()); + } +#endif num_streams_ = nstreams; if (num_streams_ >= 1) { @@ -569,14 +575,14 @@ void RILUK::initialize () // we just copy the input matrix if it's not a CrsMatrix. { - RCP A_local_crs = Details::getCrsMatrix(A_local_); - if(A_local_crs.is_null()) { + A_local_crs_ = Details::getCrsMatrix(A_local_); + if(A_local_crs_.is_null()) { local_ordinal_type numRows = A_local_->getLocalNumRows(); Array entriesPerRow(numRows); for(local_ordinal_type i = 0; i < numRows; i++) { entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i); } - RCP A_local_crs_nc = + A_local_crs_nc_ = rcp (new crs_matrix_type (A_local_->getRowMap (), A_local_->getColMap (), entriesPerRow())); @@ -586,28 +592,56 @@ void RILUK::initialize () for(local_ordinal_type i = 0; i < numRows; i++) { size_t numEntries = 0; A_local_->getLocalRowCopy(i, indices, values, numEntries); - A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast(values.data()), indices.data()); + A_local_crs_nc_->insertLocalValues(i, numEntries, reinterpret_cast(values.data()), indices.data()); } - A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ()); - A_local_crs = rcp_const_cast (A_local_crs_nc); + A_local_crs_nc_->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ()); + A_local_crs_ = rcp_const_cast (A_local_crs_nc_); } if (!isKokkosKernelsStream_) { - Graph_ = rcp (new Ifpack2::IlukGraph (A_local_crs->getCrsGraph (), + Graph_ = rcp (new Ifpack2::IlukGraph (A_local_crs_->getCrsGraph (), LevelOfFill_, 0, Overalloc_)); } else { - auto lclMtx = A_local_crs->getLocalMatrixDevice(); - if (!hasStreamReordered_) + std::vector weights(num_streams_); + std::fill(weights.begin(), weights.end(), 1); + exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights); + + auto lclMtx = A_local_crs_->getLocalMatrixDevice(); + if (!hasStreamReordered_) { KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks); - else + } else { perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks, true); + reverse_perm_v_.resize(perm_v_.size()); + for(int istream=0; istream < perm_v_.size(); ++istream) { + using perm_type = typename lno_nonzero_view_t::non_const_type; + const auto perm = perm_v_[istream]; + const auto perm_length = perm.extent(0); + perm_type reverse_perm( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverse_perm"), + perm_length); + Kokkos::parallel_for(Kokkos::RangePolicy(exec_space_instances_[istream], 0, perm_length), + KOKKOS_LAMBDA(const local_ordinal_type ii) { + reverse_perm(perm(ii)) = ii; + }); + reverse_perm_v_[istream] = reverse_perm; + } + } + + A_local_diagblks_rowmap_v_ = std::vector(num_streams_); + A_local_diagblks_entries_v_ = std::vector(num_streams_); + A_local_diagblks_values_v_ = std::vector(num_streams_); + for(int i = 0; i < num_streams_; i++) { + A_local_diagblks_rowmap_v_[i] = A_local_diagblks[i].graph.row_map; + A_local_diagblks_entries_v_[i] = A_local_diagblks[i].graph.entries; + A_local_diagblks_values_v_[i] = A_local_diagblks[i].values; + Teuchos::RCP A_local_diagblks_RowMap = rcp (new crs_map_type(A_local_diagblks[i].numRows(), A_local_diagblks[i].numRows(), - A_local_crs->getRowMap()->getComm())); + A_local_crs_->getRowMap()->getComm())); Teuchos::RCP A_local_diagblks_ColMap = rcp (new crs_map_type(A_local_diagblks[i].numCols(), A_local_diagblks[i].numCols(), - A_local_crs->getColMap()->getComm())); + A_local_crs_->getColMap()->getComm())); Teuchos::RCP A_local_diagblks_ = rcp (new crs_matrix_type(A_local_diagblks_RowMap, A_local_diagblks_ColMap, A_local_diagblks[i])); @@ -636,9 +670,6 @@ void RILUK::initialize () 2*A_local_diagblks[i].nnz()*(LevelOfFill_+1) ); Graph_v_[i]->initialize (KernelHandle_v_[i]); // this calls spiluk_symbolic } - std::vector weights(num_streams_); - std::fill(weights.begin(), weights.end(), 1); - exec_space_instances_ = Kokkos::Experimental::partition_space(execution_space(), weights); } } else { @@ -655,12 +686,6 @@ void RILUK::initialize () L_solver_->setMatrices (L_v_); } L_solver_->initialize (); - //NOTE (Nov-09-2022): - //For Cuda >= 11.3 (using cusparseSpSV), skip trisolve computes here. - //Instead, call trisolve computes within RILUK compute -#if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030) - L_solver_->compute ();//NOTE: It makes sense to do compute here because only the nonzero pattern is involved in trisolve compute -#endif if (!isKokkosKernelsStream_) { U_solver_->setMatrix (U_); @@ -670,9 +695,6 @@ void RILUK::initialize () U_solver_->setMatrices (U_v_); } U_solver_->initialize (); -#if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030) - U_solver_->compute ();//NOTE: It makes sense to do compute here because only the nonzero pattern is involved in trisolve compute -#endif // Do not call initAllValues. compute() always calls initAllValues to // fill L and U with possibly new numbers. initialize() is concerned @@ -838,321 +860,335 @@ initAllValues (const row_matrix_type& A) isInitialized_ = true; } - template -void RILUK::compute () +void RILUK::compute_serial () { - using Teuchos::RCP; - using Teuchos::rcp; - using Teuchos::rcp_const_cast; - using Teuchos::rcp_dynamic_cast; - using Teuchos::Array; - using Teuchos::ArrayView; - const char prefix[] = "Ifpack2::RILUK::compute: "; + // Fill L and U with numbers. This supports nonzero pattern reuse by calling + // initialize() once and then compute() multiple times. + initAllValues (*A_local_); - // initialize() checks this too, but it's easier for users if the - // error shows them the name of the method that they actually - // called, rather than the name of some internally called method. - TEUCHOS_TEST_FOR_EXCEPTION - (A_.is_null (), std::runtime_error, prefix << "The matrix is null. Please " - "call setMatrix() with a nonnull input before calling this method."); - TEUCHOS_TEST_FOR_EXCEPTION - (! A_->isFillComplete (), std::runtime_error, prefix << "The matrix is not " - "fill complete. You may not invoke initialize() or compute() with this " - "matrix until the matrix is fill complete. If your matrix is a " - "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and " - "range Maps, if appropriate) before calling this method."); + // MinMachNum should be officially defined, for now pick something a little + // bigger than IEEE underflow value - if (! isInitialized ()) { - initialize (); // Don't count this in the compute() time - } + const scalar_type MinDiagonalValue = STS::rmin (); + const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue; - Teuchos::Time timer ("RILUK::compute"); + size_t NumIn, NumL, NumU; - // Start timing - Teuchos::TimeMonitor timeMon (timer); - double startTime = timer.wallTime(); + // Get Maximum Row length + const size_t MaxNumEntries = + L_->getLocalMaxNumRowEntries () + U_->getLocalMaxNumRowEntries () + 1; - isComputed_ = false; + Teuchos::Array InI(MaxNumEntries); // Allocate temp space + Teuchos::Array InV(MaxNumEntries); + size_t num_cols = U_->getColMap()->getLocalNumElements(); + Teuchos::Array colflag(num_cols, -1); - if (!this->isKokkosKernelsSpiluk_) { - // Fill L and U with numbers. This supports nonzero pattern reuse by calling - // initialize() once and then compute() multiple times. - initAllValues (*A_local_); + auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0); - // MinMachNum should be officially defined, for now pick something a little - // bigger than IEEE underflow value + // Now start the factorization. - const scalar_type MinDiagonalValue = STS::rmin (); - const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue; + using IST = typename row_matrix_type::impl_scalar_type; + for (size_t i = 0; i < L_->getLocalNumRows (); ++i) { + local_ordinal_type local_row = i; + // Need some integer workspace and pointers + size_t NumUU; + local_inds_host_view_type UUI; + values_host_view_type UUV; - size_t NumIn, NumL, NumU; + // Fill InV, InI with current row of L, D and U combined - // Get Maximum Row length - const size_t MaxNumEntries = - L_->getLocalMaxNumRowEntries () + U_->getLocalMaxNumRowEntries () + 1; + NumIn = MaxNumEntries; + nonconst_local_inds_host_view_type InI_v(InI.data(),MaxNumEntries); + nonconst_values_host_view_type InV_v(reinterpret_cast(InV.data()),MaxNumEntries); - Teuchos::Array InI(MaxNumEntries); // Allocate temp space - Teuchos::Array InV(MaxNumEntries); - size_t num_cols = U_->getColMap()->getLocalNumElements(); - Teuchos::Array colflag(num_cols); + L_->getLocalRowCopy (local_row, InI_v , InV_v, NumL); - auto DV = Kokkos::subview(D_->getLocalViewHost(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0); + InV[NumL] = DV(i); // Put in diagonal + InI[NumL] = local_row; - // Now start the factorization. + nonconst_local_inds_host_view_type InI_sub(InI.data()+NumL+1,MaxNumEntries-NumL-1); + nonconst_values_host_view_type InV_sub(reinterpret_cast(InV.data())+NumL+1,MaxNumEntries-NumL-1); - for (size_t j = 0; j < num_cols; ++j) { - colflag[j] = -1; - } - using IST = typename row_matrix_type::impl_scalar_type; - for (size_t i = 0; i < L_->getLocalNumRows (); ++i) { - local_ordinal_type local_row = i; - // Need some integer workspace and pointers - size_t NumUU; - local_inds_host_view_type UUI; - values_host_view_type UUV; + U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU); + NumIn = NumL+NumU+1; - // Fill InV, InI with current row of L, D and U combined + // Set column flags + for (size_t j = 0; j < NumIn; ++j) { + colflag[InI[j]] = j; + } - NumIn = MaxNumEntries; - nonconst_local_inds_host_view_type InI_v(InI.data(),MaxNumEntries); - nonconst_values_host_view_type InV_v(reinterpret_cast(InV.data()),MaxNumEntries); + scalar_type diagmod = STS::zero (); // Off-diagonal accumulator - L_->getLocalRowCopy (local_row, InI_v , InV_v, NumL); + for (size_t jj = 0; jj < NumL; ++jj) { + local_ordinal_type j = InI[jj]; + IST multiplier = InV[jj]; // current_mults++; - InV[NumL] = DV(i); // Put in diagonal - InI[NumL] = local_row; + InV[jj] *= static_cast(DV(j)); - nonconst_local_inds_host_view_type InI_sub(InI.data()+NumL+1,MaxNumEntries-NumL-1); - nonconst_values_host_view_type InV_sub(reinterpret_cast(InV.data())+NumL+1,MaxNumEntries-NumL-1); + U_->getLocalRowView(j, UUI, UUV); // View of row above + NumUU = UUI.size(); - U_->getLocalRowCopy (local_row, InI_sub,InV_sub, NumU); - NumIn = NumL+NumU+1; + if (RelaxValue_ == STM::zero ()) { + for (size_t k = 0; k < NumUU; ++k) { + const int kk = colflag[UUI[k]]; + // FIXME (mfh 23 Dec 2013) Wait a second, we just set + // colflag above using size_t (which is generally unsigned), + // but now we're querying it using int (which is signed). + if (kk > -1) { + InV[kk] -= static_cast(multiplier * UUV[k]); + } + } - // Set column flags - for (size_t j = 0; j < NumIn; ++j) { - colflag[InI[j]] = j; } - - scalar_type diagmod = STS::zero (); // Off-diagonal accumulator - - for (size_t jj = 0; jj < NumL; ++jj) { - local_ordinal_type j = InI[jj]; - IST multiplier = InV[jj]; // current_mults++; - - InV[jj] *= static_cast(DV(j)); - - U_->getLocalRowView(j, UUI, UUV); // View of row above - NumUU = UUI.size(); - - if (RelaxValue_ == STM::zero ()) { - for (size_t k = 0; k < NumUU; ++k) { - const int kk = colflag[UUI[k]]; - // FIXME (mfh 23 Dec 2013) Wait a second, we just set - // colflag above using size_t (which is generally unsigned), - // but now we're querying it using int (which is signed). - if (kk > -1) { - InV[kk] -= static_cast(multiplier * UUV[k]); - } + else { + for (size_t k = 0; k < NumUU; ++k) { + // FIXME (mfh 23 Dec 2013) Wait a second, we just set + // colflag above using size_t (which is generally unsigned), + // but now we're querying it using int (which is signed). + const int kk = colflag[UUI[k]]; + if (kk > -1) { + InV[kk] -= static_cast(multiplier*UUV[k]); } - - } - else { - for (size_t k = 0; k < NumUU; ++k) { - // FIXME (mfh 23 Dec 2013) Wait a second, we just set - // colflag above using size_t (which is generally unsigned), - // but now we're querying it using int (which is signed). - const int kk = colflag[UUI[k]]; - if (kk > -1) { - InV[kk] -= static_cast(multiplier*UUV[k]); - } - else { - diagmod -= static_cast(multiplier*UUV[k]); - } + else { + diagmod -= static_cast(multiplier*UUV[k]); } } } + } - if (NumL) { - // Replace current row of L - L_->replaceLocalValues (local_row, InI (0, NumL), InV (0, NumL)); - } + if (NumL) { + // Replace current row of L + L_->replaceLocalValues (local_row, InI (0, NumL), InV (0, NumL)); + } - DV(i) = InV[NumL]; // Extract Diagonal value + DV(i) = InV[NumL]; // Extract Diagonal value - if (RelaxValue_ != STM::zero ()) { - DV(i) += RelaxValue_*diagmod; // Add off diagonal modifications - } + if (RelaxValue_ != STM::zero ()) { + DV(i) += RelaxValue_*diagmod; // Add off diagonal modifications + } - if (STS::magnitude (DV(i)) > STS::magnitude (MaxDiagonalValue)) { - if (STS::real (DV(i)) < STM::zero ()) { - DV(i) = -MinDiagonalValue; - } - else { - DV(i) = MinDiagonalValue; - } + if (STS::magnitude (DV(i)) > STS::magnitude (MaxDiagonalValue)) { + if (STS::real (DV(i)) < STM::zero ()) { + DV(i) = -MinDiagonalValue; } else { - DV(i) = static_cast(STS::one ()) / DV(i); // Invert diagonal value + DV(i) = MinDiagonalValue; } + } + else { + DV(i) = static_cast(STS::one ()) / DV(i); // Invert diagonal value + } - for (size_t j = 0; j < NumU; ++j) { - InV[NumL+1+j] *= static_cast(DV(i)); // Scale U by inverse of diagonal - } + for (size_t j = 0; j < NumU; ++j) { + InV[NumL+1+j] *= static_cast(DV(i)); // Scale U by inverse of diagonal + } - if (NumU) { - // Replace current row of L and U - U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU)); - } + if (NumU) { + // Replace current row of L and U + U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU)); + } - // Reset column flags - for (size_t j = 0; j < NumIn; ++j) { - colflag[InI[j]] = -1; - } + // Reset column flags + for (size_t j = 0; j < NumIn; ++j) { + colflag[InI[j]] = -1; } + } + + // The domain of L and the range of U are exactly their own row maps + // (there is no communication). The domain of U and the range of L + // must be the same as those of the original matrix, However if the + // original matrix is a VbrMatrix, these two latter maps are + // translation from a block map to a point map. + // FIXME (mfh 23 Dec 2013) Do we know that the column Map of L_ is + // always one-to-one? + L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ()); + U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ()); + + // If L_solver_ or U_solver store modified factors internally, we need to reset those + L_solver_->setMatrix (L_); + L_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing + U_solver_->setMatrix (U_); + U_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing - // The domain of L and the range of U are exactly their own row maps - // (there is no communication). The domain of U and the range of L - // must be the same as those of the original matrix, However if the - // original matrix is a VbrMatrix, these two latter maps are - // translation from a block map to a point map. - // FIXME (mfh 23 Dec 2013) Do we know that the column Map of L_ is - // always one-to-one? - L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ()); - U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ()); - - // If L_solver_ or U_solver store modified factors internally, we need to reset those - L_solver_->setMatrix (L_); - L_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing - U_solver_->setMatrix (U_); - U_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing +} + +template +void RILUK::compute_kkspiluk() +{ + L_->resumeFill (); + U_->resumeFill (); + + L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices + U_->setAllToScalar (STS::zero ()); + + using row_map_type = typename crs_matrix_type::local_matrix_device_type::row_map_type; + auto lclL = L_->getLocalMatrixDevice(); + row_map_type L_rowmap = lclL.graph.row_map; + auto L_entries = lclL.graph.entries; + auto L_values = lclL.values; + + auto lclU = U_->getLocalMatrixDevice(); + row_map_type U_rowmap = lclU.graph.row_map; + auto U_entries = lclU.graph.entries; + auto U_values = lclU.values; + + auto lclMtx = A_local_crs_->getLocalMatrixDevice(); + KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_, + lclMtx.graph.row_map, lclMtx.graph.entries, lclMtx.values, + L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values ); + + L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ()); + U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ()); + + L_solver_->compute (); + U_solver_->compute (); +} + +template +void RILUK::compute_kkspiluk_stream() +{ + for(int i = 0; i < num_streams_; i++) { + L_v_[i]->resumeFill (); + U_v_[i]->resumeFill (); + + L_v_[i]->setAllToScalar (STS::zero ()); // Zero out L and U matrices + U_v_[i]->setAllToScalar (STS::zero ()); + } + std::vector L_rowmap_v(num_streams_); + std::vector L_entries_v(num_streams_); + std::vector L_values_v(num_streams_); + std::vector U_rowmap_v(num_streams_); + std::vector U_entries_v(num_streams_); + std::vector U_values_v(num_streams_); + std::vector KernelHandle_rawptr_v_(num_streams_); + for(int i = 0; i < num_streams_; i++) { + auto lclL = L_v_[i]->getLocalMatrixDevice(); + L_rowmap_v[i] = lclL.graph.row_map; + L_entries_v[i] = lclL.graph.entries; + L_values_v[i] = lclL.values; + + auto lclU = U_v_[i]->getLocalMatrixDevice(); + U_rowmap_v[i] = lclU.graph.row_map; + U_entries_v[i] = lclU.graph.entries; + U_values_v[i] = lclU.values; + KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr(); } - else { - {//Make sure values in A is picked up even in case of pattern reuse - RCP A_local_crs = Details::getCrsMatrix(A_local_); - if(A_local_crs.is_null()) { - local_ordinal_type numRows = A_local_->getLocalNumRows(); - Array entriesPerRow(numRows); - for(local_ordinal_type i = 0; i < numRows; i++) { - entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i); - } - RCP A_local_crs_nc = - rcp (new crs_matrix_type (A_local_->getRowMap (), - A_local_->getColMap (), - entriesPerRow())); - // copy entries into A_local_crs - nonconst_local_inds_host_view_type indices("indices",A_local_->getLocalMaxNumRowEntries()); - nonconst_values_host_view_type values("values",A_local_->getLocalMaxNumRowEntries()); - for(local_ordinal_type i = 0; i < numRows; i++) { - size_t numEntries = 0; - A_local_->getLocalRowCopy(i, indices, values, numEntries); - A_local_crs_nc->insertLocalValues(i, numEntries, reinterpret_cast(values.data()),indices.data()); - } - A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ()); - A_local_crs = rcp_const_cast (A_local_crs_nc); - } - auto lclMtx = A_local_crs->getLocalMatrixDevice(); - if (!isKokkosKernelsStream_) { - A_local_rowmap_ = lclMtx.graph.row_map; - A_local_entries_ = lclMtx.graph.entries; - A_local_values_ = lclMtx.values; - } - else { - if (!hasStreamReordered_) - KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks); - else - perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks, true); - A_local_diagblks_rowmap_v_ = std::vector(num_streams_); - A_local_diagblks_entries_v_ = std::vector(num_streams_); - A_local_diagblks_values_v_ = std::vector(num_streams_); - for(int i = 0; i < num_streams_; i++) { - A_local_diagblks_rowmap_v_[i] = A_local_diagblks[i].graph.row_map; - A_local_diagblks_entries_v_[i] = A_local_diagblks[i].graph.entries; - A_local_diagblks_values_v_[i] = A_local_diagblks[i].values; - } - } + { + auto lclMtx = A_local_crs_->getLocalMatrixDevice(); + // A_local_diagblks was already setup during initialize, just copy the corresponding + // values from A_local_crs_ in parallel now. + using TeamPolicy = Kokkos::TeamPolicy; + const auto A_nrows = lclMtx.numRows(); + auto rows_per_block = ((A_nrows % num_streams_) == 0) + ? (A_nrows / num_streams_) + : (A_nrows / num_streams_ + 1); + for(int i = 0; i < num_streams_; i++) { + const auto start_row_offset = i * rows_per_block; + auto rowptrs = A_local_diagblks_rowmap_v_[i]; + auto colindices = A_local_diagblks_entries_v_[i]; + auto values = A_local_diagblks_values_v_[i]; + const bool reordered = hasStreamReordered_; + typename lno_nonzero_view_t::non_const_type reverse_perm = hasStreamReordered_ ? reverse_perm_v_[i] : typename lno_nonzero_view_t::non_const_type{}; + TeamPolicy pol(exec_space_instances_[i], A_local_diagblks_rowmap_v_[i].extent(0) - 1, Kokkos::AUTO); + Kokkos::parallel_for(pol, KOKKOS_LAMBDA (const typename TeamPolicy::member_type &team) { + const auto irow = team.league_rank(); + const auto irow_A = start_row_offset + (reordered ? reverse_perm(irow) : irow); + const auto A_local_crs_row = lclMtx.rowConst(irow_A); + const auto begin_row = rowptrs(irow); + const auto num_entries = rowptrs(irow + 1) - begin_row; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, num_entries), [&](const int j) { + const auto colidx = colindices(begin_row + j); + const auto colidx_A = start_row_offset + (reordered ? reverse_perm(colidx) : colidx); + // Find colidx in A_local_crs_row + const int offset = KokkosSparse::findRelOffset( + &A_local_crs_row.colidx(0), A_local_crs_row.length, colidx_A, 0, false); + values(begin_row + j) = A_local_crs_row.value(offset); + }); + }); } + } - if (!isKokkosKernelsStream_) { - L_->resumeFill (); - U_->resumeFill (); + KokkosSparse::Experimental::spiluk_numeric_streams( exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_, + A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_, + L_rowmap_v, L_entries_v, L_values_v, + U_rowmap_v, U_entries_v, U_values_v ); + for(int i = 0; i < num_streams_; i++) { + L_v_[i]->fillComplete (); + U_v_[i]->fillComplete (); + } - if (L_->isStaticGraph () || L_->isLocallyIndexed ()) { - L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices - U_->setAllToScalar (STS::zero ()); - } - } - else { - for(int i = 0; i < num_streams_; i++) { - L_v_[i]->resumeFill (); - U_v_[i]->resumeFill (); + L_solver_->compute (); + U_solver_->compute (); +} - if (L_v_[i]->isStaticGraph () || L_v_[i]->isLocallyIndexed ()) { - L_v_[i]->setAllToScalar (STS::zero ()); // Zero out L and U matrices - U_v_[i]->setAllToScalar (STS::zero ()); - } - } - } +template +void RILUK::compute () +{ + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::rcp_const_cast; + using Teuchos::rcp_dynamic_cast; + using Teuchos::Array; + using Teuchos::ArrayView; + const char prefix[] = "Ifpack2::RILUK::compute: "; - using row_map_type = typename crs_matrix_type::local_matrix_device_type::row_map_type; + // initialize() checks this too, but it's easier for users if the + // error shows them the name of the method that they actually + // called, rather than the name of some internally called method. + TEUCHOS_TEST_FOR_EXCEPTION + (A_.is_null (), std::runtime_error, prefix << "The matrix is null. Please " + "call setMatrix() with a nonnull input before calling this method."); + TEUCHOS_TEST_FOR_EXCEPTION + (! A_->isFillComplete (), std::runtime_error, prefix << "The matrix is not " + "fill complete. You may not invoke initialize() or compute() with this " + "matrix until the matrix is fill complete. If your matrix is a " + "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and " + "range Maps, if appropriate) before calling this method."); - if (!isKokkosKernelsStream_) { - auto lclL = L_->getLocalMatrixDevice(); - row_map_type L_rowmap = lclL.graph.row_map; - auto L_entries = lclL.graph.entries; - auto L_values = lclL.values; + if (! isInitialized ()) { + initialize (); // Don't count this in the compute() time + } - auto lclU = U_->getLocalMatrixDevice(); - row_map_type U_rowmap = lclU.graph.row_map; - auto U_entries = lclU.graph.entries; - auto U_values = lclU.values; + Teuchos::Time timer ("RILUK::compute"); - KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), LevelOfFill_, - A_local_rowmap_, A_local_entries_, A_local_values_, - L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values ); + // Start timing + Teuchos::TimeMonitor timeMon (timer); + double startTime = timer.wallTime(); - L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ()); - U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ()); + isComputed_ = false; - L_solver_->setMatrix (L_); - U_solver_->setMatrix (U_); - } - else { - std::vector L_rowmap_v(num_streams_); - std::vector L_entries_v(num_streams_); - std::vector L_values_v(num_streams_); - std::vector U_rowmap_v(num_streams_); - std::vector U_entries_v(num_streams_); - std::vector U_values_v(num_streams_); - std::vector KernelHandle_rawptr_v_(num_streams_); - for(int i = 0; i < num_streams_; i++) { - auto lclL = L_v_[i]->getLocalMatrixDevice(); - L_rowmap_v[i] = lclL.graph.row_map; - L_entries_v[i] = lclL.graph.entries; - L_values_v[i] = lclL.values; - - auto lclU = U_v_[i]->getLocalMatrixDevice(); - U_rowmap_v[i] = lclU.graph.row_map; - U_entries_v[i] = lclU.graph.entries; - U_values_v[i] = lclU.values; - KernelHandle_rawptr_v_[i] = KernelHandle_v_[i].getRawPtr(); + if (!this->isKokkosKernelsSpiluk_) { + compute_serial(); + } + else { + //Make sure values in A is picked up even in case of pattern reuse + if(!A_local_crs_nc_.is_null()) { + A_local_crs_nc_->resumeFill(); + local_ordinal_type numRows = A_local_->getLocalNumRows(); + Array entriesPerRow(numRows); + for(local_ordinal_type i = 0; i < numRows; i++) { + entriesPerRow[i] = A_local_->getNumEntriesInLocalRow(i); } - KokkosSparse::Experimental::spiluk_numeric_streams( exec_space_instances_, KernelHandle_rawptr_v_, LevelOfFill_, - A_local_diagblks_rowmap_v_, A_local_diagblks_entries_v_, A_local_diagblks_values_v_, - L_rowmap_v, L_entries_v, L_values_v, - U_rowmap_v, U_entries_v, U_values_v ); - for(int i = 0; i < num_streams_; i++) { - L_v_[i]->fillComplete (); - U_v_[i]->fillComplete (); + // copy entries into A_local_crs + nonconst_local_inds_host_view_type indices("indices",A_local_->getLocalMaxNumRowEntries()); + nonconst_values_host_view_type values("values",A_local_->getLocalMaxNumRowEntries()); + for(local_ordinal_type i = 0; i < numRows; i++) { + size_t numEntries = 0; + A_local_->getLocalRowCopy(i, indices, values, numEntries); + A_local_crs_nc_->replaceLocalValues(i, numEntries, reinterpret_cast(values.data()),indices.data()); } - - L_solver_->setMatrices (L_v_); - U_solver_->setMatrices (U_v_); + A_local_crs_nc_->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ()); } - L_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing - U_solver_->compute ();//NOTE: Only do compute if the pointer changed. Otherwise, do nothing + using row_map_type = typename crs_matrix_type::local_matrix_device_type::row_map_type; + if (!isKokkosKernelsStream_) { + compute_kkspiluk(); + } + else { + compute_kkspiluk_stream(); + } } isComputed_ = true; @@ -1160,7 +1196,6 @@ void RILUK::compute () computeTime_ += (timer.wallTime() - startTime); } - template void RILUK:: @@ -1193,8 +1228,10 @@ apply (const Tpetra::MultiVectornorm1 (); - TEUCHOS_TEST_FOR_EXCEPTION( STM::isnaninf (D_nrm1), std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf."); + if (!isKokkosKernelsStream_) { + const magnitude_type D_nrm1 = D_->norm1 (); + TEUCHOS_TEST_FOR_EXCEPTION( STM::isnaninf (D_nrm1), std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf."); + } Teuchos::Array norms (X.getNumVectors ()); X.norm1 (norms ()); bool good = true; diff --git a/packages/ifpack2/test/belos/CMakeLists.txt b/packages/ifpack2/test/belos/CMakeLists.txt index fc969c2b9dee..1971bfaf8437 100644 --- a/packages/ifpack2/test/belos/CMakeLists.txt +++ b/packages/ifpack2/test/belos/CMakeLists.txt @@ -381,78 +381,76 @@ ENDIF() -IF(Kokkos_ENABLE_CUDA) - TRIBITS_ADD_TEST( - tif_belos - NAME RILUK_2streams_hb_belos - ARGS "--xml_file=test_2_RILUK_2streams_nos1_hb.xml" - COMM serial mpi - NUM_MPI_PROCS 2 - STANDARD_PASS_OUTPUT - ) +TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_2streams_hb_belos + ARGS "--xml_file=test_2_RILUK_2streams_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 2 + STANDARD_PASS_OUTPUT +) - TRIBITS_ADD_TEST( - tif_belos - NAME RILUK_4streams_hb_belos - ARGS "--xml_file=test_2_RILUK_4streams_nos1_hb.xml" - COMM serial mpi - NUM_MPI_PROCS 2 - STANDARD_PASS_OUTPUT - ) +TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_4streams_hb_belos + ARGS "--xml_file=test_2_RILUK_4streams_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 2 + STANDARD_PASS_OUTPUT +) - TRIBITS_ADD_TEST( - tif_belos - NAME RILUK_2streams_hb_belos - ARGS "--xml_file=test_4_RILUK_2streams_nos1_hb.xml" - COMM serial mpi - NUM_MPI_PROCS 4 - STANDARD_PASS_OUTPUT - ) +TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_2streams_hb_belos + ARGS "--xml_file=test_4_RILUK_2streams_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 4 + STANDARD_PASS_OUTPUT +) - TRIBITS_ADD_TEST( - tif_belos - NAME RILUK_4streams_hb_belos - ARGS "--xml_file=test_4_RILUK_4streams_nos1_hb.xml" - COMM serial mpi - NUM_MPI_PROCS 4 - STANDARD_PASS_OUTPUT - ) - TRIBITS_ADD_TEST( - tif_belos - NAME RILUK_2streams_rcm_hb_belos - ARGS "--xml_file=test_2_RILUK_2streams_rcm_nos1_hb.xml" - COMM serial mpi - NUM_MPI_PROCS 2 - STANDARD_PASS_OUTPUT - ) +TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_4streams_hb_belos + ARGS "--xml_file=test_4_RILUK_4streams_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 4 + STANDARD_PASS_OUTPUT +) +TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_2streams_rcm_hb_belos + ARGS "--xml_file=test_2_RILUK_2streams_rcm_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 2 + STANDARD_PASS_OUTPUT +) - TRIBITS_ADD_TEST( - tif_belos - NAME RILUK_4streams_rcm_hb_belos - ARGS "--xml_file=test_2_RILUK_4streams_rcm_nos1_hb.xml" - COMM serial mpi - NUM_MPI_PROCS 2 - STANDARD_PASS_OUTPUT - ) +TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_4streams_rcm_hb_belos + ARGS "--xml_file=test_2_RILUK_4streams_rcm_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 2 + STANDARD_PASS_OUTPUT +) - TRIBITS_ADD_TEST( - tif_belos - NAME RILUK_2streams_rcm_hb_belos - ARGS "--xml_file=test_4_RILUK_2streams_rcm_nos1_hb.xml" - COMM serial mpi - NUM_MPI_PROCS 4 - STANDARD_PASS_OUTPUT - ) +TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_2streams_rcm_hb_belos + ARGS "--xml_file=test_4_RILUK_2streams_rcm_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 4 + STANDARD_PASS_OUTPUT +) - TRIBITS_ADD_TEST( - tif_belos - NAME RILUK_4streams_rcm_hb_belos - ARGS "--xml_file=test_4_RILUK_4streams_rcm_nos1_hb.xml" - COMM serial mpi - NUM_MPI_PROCS 4 - STANDARD_PASS_OUTPUT - ) -ENDIF() +TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_4streams_rcm_hb_belos + ARGS "--xml_file=test_4_RILUK_4streams_rcm_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 4 + STANDARD_PASS_OUTPUT +) ENDIF() diff --git a/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_impl.hpp b/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_impl.hpp index 5f9cbea04004..29e858f2deaa 100644 --- a/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_impl.hpp +++ b/packages/kokkos-kernels/sparse/impl/KokkosSparse_spmv_impl.hpp @@ -32,8 +32,6 @@ namespace KokkosSparse { namespace Impl { -constexpr const char* KOKKOSSPARSE_ALG_NATIVE_MERGE = "native-merge"; - // This TransposeFunctor is functional, but not necessarily performant. template @@ -603,7 +601,8 @@ static void spmv_beta(const execution_space& exec, Handle* handle, typename YVector::const_value_type& beta, const YVector& y) { if (mode[0] == NoTranspose[0]) { - if (handle->algo == SPMV_MERGE_PATH) { + if (handle->algo == SPMV_MERGE_PATH || + handle->algo == SPMV_NATIVE_MERGE_PATH) { SpmvMergeHierarchical::spmv( exec, mode, alpha, A, x, beta, y); } else { @@ -611,7 +610,8 @@ static void spmv_beta(const execution_space& exec, Handle* handle, dobeta, false>(exec, handle, alpha, A, x, beta, y); } } else if (mode[0] == Conjugate[0]) { - if (handle->algo == SPMV_MERGE_PATH) { + if (handle->algo == SPMV_MERGE_PATH || + handle->algo == SPMV_NATIVE_MERGE_PATH) { SpmvMergeHierarchical::spmv( exec, mode, alpha, A, x, beta, y); } else { diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp index d930cbdfbe35..b0759bb7a633 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spmv_handle.hpp @@ -36,8 +36,11 @@ enum SPMVAlgorithm { /// is only used once. SPMV_NATIVE, /// Use the best KokkosKernels implementation, even if a TPL /// implementation is available. - SPMV_MERGE_PATH, /// Use load-balancing merge path algorithm (for CrsMatrix - /// only) + SPMV_MERGE_PATH, /// Use algorithm optimized for matrices with + /// imbalanced/irregular sparsity patterns (merge path or + /// similar). May call a TPL. For CrsMatrix only. + SPMV_NATIVE_MERGE_PATH, /// Use the KokkosKernels implementation of merge + /// path. For CrsMatrix only. SPMV_BSR_V41, /// Use experimental version 4.1 algorithm (for BsrMatrix only) SPMV_BSR_V42, /// Use experimental version 4.2 algorithm (for BsrMatrix only) SPMV_BSR_TC /// Use experimental tensor core algorithm (for BsrMatrix only) @@ -59,6 +62,7 @@ inline const char* get_spmv_algorithm_name(SPMVAlgorithm a) { case SPMV_FAST_SETUP: return "SPMV_FAST_SETUP"; case SPMV_NATIVE: return "SPMV_NATIVE"; case SPMV_MERGE_PATH: return "SPMV_MERGE_PATH"; + case SPMV_NATIVE_MERGE_PATH: return "SPMV_NATIVE_MERGE_PATH"; case SPMV_BSR_V41: return "SPMV_BSR_V41"; case SPMV_BSR_V42: return "SPMV_BSR_V42"; case SPMV_BSR_TC: return "SPMV_BSR_TC"; @@ -73,10 +77,11 @@ inline const char* get_spmv_algorithm_name(SPMVAlgorithm a) { inline bool is_spmv_algorithm_native(SPMVAlgorithm a) { switch (a) { case SPMV_NATIVE: - case SPMV_MERGE_PATH: + case SPMV_NATIVE_MERGE_PATH: case SPMV_BSR_V41: case SPMV_BSR_V42: case SPMV_BSR_TC: return true; + // DEFAULT, FAST_SETUP and MERGE_PATH may call TPLs default: return false; } } @@ -352,6 +357,7 @@ struct SPMVHandle } else { switch (get_algorithm()) { case SPMV_MERGE_PATH: + case SPMV_NATIVE_MERGE_PATH: throw std::invalid_argument(std::string("SPMVHandle: algorithm ") + get_spmv_algorithm_name(get_algorithm()) + " cannot be used if A is a BsrMatrix"); diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv.hpp index c5107fcf0ab8..88927202dad1 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_spmv.hpp @@ -493,7 +493,11 @@ template (algo, numRows, nnz, bandwidth, row_size_variance, heavy); } diff --git a/packages/muelu/example/basic/Teko.cpp b/packages/muelu/example/basic/Teko.cpp index 29ab168ef2d6..75000774213b 100644 --- a/packages/muelu/example/basic/Teko.cpp +++ b/packages/muelu/example/basic/Teko.cpp @@ -54,7 +54,6 @@ #include "Teuchos_CommandLineProcessor.hpp" // Tpetra includes -#include "mpi.h" #include "Tpetra_CrsMatrix.hpp" #include "Tpetra_Core.hpp" #include "MatrixMarket_Tpetra.hpp" @@ -64,9 +63,6 @@ #include "Teko_InverseFactory.hpp" #include "Teko_InverseLibrary.hpp" #include "Teko_BlockedTpetraOperator.hpp" -#include "Teko_LSCPreconditionerFactory.hpp" -#include "Teko_InvLSCStrategy.hpp" -#include "Teko_SIMPLEPreconditionerFactory.hpp" #include "Teko_TpetraInverseFactoryOperator.hpp" #include "Thyra_TpetraLinearOp.hpp" diff --git a/packages/muelu/research/caglusa/1d-binary/hierarchical.xml b/packages/muelu/research/caglusa/1d-binary/hierarchical.xml index bf084dbde596..bb46a74f4730 100644 --- a/packages/muelu/research/caglusa/1d-binary/hierarchical.xml +++ b/packages/muelu/research/caglusa/1d-binary/hierarchical.xml @@ -22,5 +22,8 @@ + + + + - \ No newline at end of file diff --git a/packages/muelu/research/caglusa/1d-binary/problem.xml b/packages/muelu/research/caglusa/1d-binary/problem.xml index 404e37b2d9d0..1efcb6b30fd3 100644 --- a/packages/muelu/research/caglusa/1d-binary/problem.xml +++ b/packages/muelu/research/caglusa/1d-binary/problem.xml @@ -13,8 +13,4 @@ - - - - \ No newline at end of file diff --git a/packages/muelu/research/caglusa/1d-mm/hierarchical.xml b/packages/muelu/research/caglusa/1d-mm/hierarchical.xml index 77b137aaf1f4..6e6b7ff6cd50 100644 --- a/packages/muelu/research/caglusa/1d-mm/hierarchical.xml +++ b/packages/muelu/research/caglusa/1d-mm/hierarchical.xml @@ -22,5 +22,8 @@ + + + + - \ No newline at end of file diff --git a/packages/muelu/research/caglusa/1d-mm/problem.xml b/packages/muelu/research/caglusa/1d-mm/problem.xml index 91826bb3d530..d26344751dda 100644 --- a/packages/muelu/research/caglusa/1d-mm/problem.xml +++ b/packages/muelu/research/caglusa/1d-mm/problem.xml @@ -13,8 +13,4 @@ - - - - \ No newline at end of file diff --git a/packages/muelu/research/caglusa/CMakeLists.txt b/packages/muelu/research/caglusa/CMakeLists.txt index b6a1283a830f..393572e78902 100644 --- a/packages/muelu/research/caglusa/CMakeLists.txt +++ b/packages/muelu/research/caglusa/CMakeLists.txt @@ -20,8 +20,8 @@ IF (${PACKAGE_NAME}_ENABLE_Belos AND ${PACKAGE_NAME}_ENABLE_Ifpack2 AND ${PACKAG HierarchicalDriver NAME "HierarchicalDriver-fractionalLaplacian1D" ARGS - "--xml=1d-mm/hierarchical.xml --xmlProblem=1d-mm/problem.xml" - "--xml=1d-binary/hierarchical.xml --xmlProblem=1d-binary/problem.xml" + "--xmlHierarchical=1d-mm/hierarchical.xml --xmlProblem=1d-mm/problem.xml" + "--xmlHierarchical=1d-binary/hierarchical.xml --xmlProblem=1d-binary/problem.xml" COMM mpi NUM_MPI_PROCS 4 ) diff --git a/packages/muelu/research/caglusa/MueLu_IOhelpers.hpp b/packages/muelu/research/caglusa/MueLu_IOhelpers.hpp index 4bd682c4844a..9da714b26f96 100644 --- a/packages/muelu/research/caglusa/MueLu_IOhelpers.hpp +++ b/packages/muelu/research/caglusa/MueLu_IOhelpers.hpp @@ -3,6 +3,7 @@ #include #include +#include namespace MueLu { diff --git a/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_def.hpp b/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_def.hpp index 392df660e046..da572d2f69e5 100644 --- a/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_def.hpp +++ b/packages/muelu/research/caglusa/Tpetra_BlockedMatrix_def.hpp @@ -1,6 +1,9 @@ #ifndef TPETRA_BLOCKEDMATRIX_DEF_HPP #define TPETRA_BLOCKEDMATRIX_DEF_HPP +#include "Kokkos_DualView.hpp" +#include "Teuchos_Assert.hpp" +#include "Tpetra_Access.hpp" namespace Tpetra { template :: TEUCHOS_ASSERT(pointA_->getDomainMap()->isSameAs(*pointA_->getRangeMap())); TEUCHOS_ASSERT(pointA_->getDomainMap()->isSameAs(*pointA_->getRowMap())); TEUCHOS_ASSERT(pointA_->getDomainMap()->isSameAs(*blockMap_->pointMap_)); + + { + auto lcl_blockA = blockA_->getLocalMatrixHost(); + auto lcl_pointA = pointA_->getLocalMatrixHost(); + auto lcl_blockSizesRowMap = blockMap_->blockSizes_->getLocalViewHost(Tpetra::Access::ReadOnly); + auto lcl_offsets = Kokkos::create_mirror_view(blockMap_->offsets_); + Kokkos::deep_copy(lcl_offsets, blockMap_->offsets_); + typename lo_vec_type::dual_view_type::t_host::const_type lcl_blockSizesColMap; + Kokkos::View lcl_ghostedOffsets; + if (!ghosted_blockMap.is_null()) { + lcl_blockSizesColMap = ghosted_blockMap_->blockSizes_->getLocalViewHost(Tpetra::Access::ReadOnly); + lcl_ghostedOffsets = Kokkos::create_mirror_view(ghosted_blockMap_->offsets_); + Kokkos::deep_copy(lcl_ghostedOffsets, ghosted_blockMap_->offsets_); + } else { + lcl_blockSizesColMap = lcl_blockSizesRowMap; + lcl_ghostedOffsets = lcl_offsets; + } + for (LocalOrdinal brlid = 0; brlid < lcl_blockA.numRows(); ++brlid) { + size_t brsize = lcl_blockSizesRowMap(brlid, 0); + auto brow = lcl_blockA.row(brlid); + for (LocalOrdinal k = 0; k < brow.length; ++k) { + LocalOrdinal bclid = brow.colidx(k); + size_t bcsize = lcl_blockSizesColMap(bclid, 0); + + // We expect a dense block of size brsize*bcsize. + const LocalOrdinal row_start = lcl_offsets(brlid); + const LocalOrdinal row_end = lcl_offsets(brlid + 1); + const LocalOrdinal col_start = lcl_ghostedOffsets(bclid); + const LocalOrdinal col_end = lcl_ghostedOffsets(bclid + 1); + + TEUCHOS_ASSERT_EQUALITY(Teuchos::as(row_end - row_start), brsize); + TEUCHOS_ASSERT_EQUALITY(Teuchos::as(col_end - col_start), bcsize); + + size_t entries = 0; + for (LocalOrdinal rlid = row_start; rlid < row_end; ++rlid) { + auto row = lcl_pointA.row(rlid); + size_t entriesInRow = 0; + for (LocalOrdinal n = 0; n < row.length; ++n) { + auto clid = row.colidx(n); + if ((col_start <= clid) && (clid < col_end)) { + ++entriesInRow; + ++entries; + } + } + TEUCHOS_ASSERT_EQUALITY(entriesInRow, bcsize); + } + TEUCHOS_ASSERT_EQUALITY(entries, brsize * bcsize); + } + } + } } template #include #include +#include #include #include +#include "Teuchos_VerbosityLevel.hpp" namespace Tpetra { @@ -52,7 +54,7 @@ template ::scalar_type, class LocalOrdinal = typename Tpetra::Operator::local_ordinal_type, class GlobalOrdinal = typename Tpetra::Operator::global_ordinal_type, class Node = typename Tpetra::Operator::node_type> -class HierarchicalOperator : public Tpetra::RowMatrix { +class HierarchicalOperator : public Tpetra::OperatorWithDiagonal { public: using matrix_type = Tpetra::CrsMatrix; using mv_type = Tpetra::MultiVector; @@ -132,6 +134,61 @@ class HierarchicalOperator : public Tpetra::RowMatrix(nnz) / (getDomainMap()->getGlobalNumElements() * getDomainMap()->getGlobalNumElements()); } + size_t numClusterPairsOnLevelGlobal(size_t level) const { + using vec_type = typename Tpetra::Vector; + using Teuchos::RCP; + const Scalar ONE = Teuchos::ScalarTraits::one(); + const Scalar ZERO = Teuchos::ScalarTraits::zero(); + if (level == 0) { + // RCP tempV = Teuchos::rcp(new vec_type(kernelApproximations_->blockMap_->blockMap_, false)); + // RCP tempV2 = Teuchos::rcp(new vec_type(kernelApproximations_->blockMap_->blockMap_, false)); + + // tempV->putScalar(ONE); + // transferMatrices_[0]->blockA_->apply(*tempV, *tempV2, Teuchos::TRANS); + // tempV->putScalar(ZERO); + // kernelApproximations_->blockA_->apply(*tempV2, *tempV); + // return Teuchos::as(tempV->dot(*tempV2)); + return 0; + } else if (level <= transferMatrices_.size()) { + RCP tempV = Teuchos::rcp(new vec_type(kernelApproximations_->blockMap_->blockMap_, false)); + RCP tempV2 = Teuchos::rcp(new vec_type(kernelApproximations_->blockMap_->blockMap_, false)); + + tempV->putScalar(ONE); + transferMatrices_[level - 1]->blockA_->apply(*tempV, *tempV2, Teuchos::TRANS); + tempV->putScalar(ZERO); + kernelApproximations_->blockA_->apply(*tempV2, *tempV); + return Teuchos::as(tempV->dot(*tempV2)); + } else { + TEUCHOS_ASSERT(false); + } + } + + size_t numClustersOnLevelGlobal(size_t level) const { + if (level > 0) + return Teuchos::as(transferMatrices_[level - 1]->blockA_->getGlobalNumEntries()); + else { + const double treeCoarseningFactor = params_->get("treeCoarseningFactor"); + return Teuchos::as(transferMatrices_[0]->blockA_->getGlobalNumEntries() / treeCoarseningFactor); + } + } + + size_t numClustersOnLevelLocal(size_t level) const { + if (level > 0) + return Teuchos::as(transferMatrices_[level - 1]->blockA_->getLocalNumEntries()); + else { + const double treeCoarseningFactor = params_->get("treeCoarseningFactor"); + return Teuchos::as(transferMatrices_[0]->blockA_->getLocalNumEntries() / treeCoarseningFactor); + } + } + + size_t numLevels() const { + return transferMatrices_.size() + 1; + } + + void setDebugOutput(const bool debugOutput) { + debugOutput_ = debugOutput; + } + Teuchos::RCP nearFieldMatrix() { return nearField_; } @@ -181,92 +238,10 @@ class HierarchicalOperator : public Tpetra::RowMatrixgetLocalNumEntries(); } - size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const { - throw std::runtime_error("Not implemented."); - } - - size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const { - throw std::runtime_error("Not implemented."); - } - - size_t getGlobalMaxNumRowEntries() const { - throw std::runtime_error("Not implemented."); - } - - LocalOrdinal getBlockSize() const { - throw std::runtime_error("Not implemented."); - } - - size_t getLocalMaxNumRowEntries() const { - throw std::runtime_error("Not implemented."); - } - - bool hasColMap() const { - return false; - } - - bool isLocallyIndexed() const { - return true; - } - - bool isGloballyIndexed() const { - return true; - } - - bool isFillComplete() const { - return true; - } - - bool supportsRowViews() const { - return false; - } - - void - getGlobalRowCopy(GlobalOrdinal GlobalRow, - nonconst_global_inds_host_view_type& Indices, - nonconst_values_host_view_type& Values, - size_t& NumEntries) const { - throw std::runtime_error("Not implemented."); - } - - void - getLocalRowCopy(LocalOrdinal LocalRow, - nonconst_local_inds_host_view_type& Indices, - nonconst_values_host_view_type& Values, - size_t& NumEntries) const { - throw std::runtime_error("Not implemented."); - } - - void - getGlobalRowView(GlobalOrdinal GlobalRow, - global_inds_host_view_type& indices, - values_host_view_type& values) const { - throw std::runtime_error("Not implemented."); - } - - void - getLocalRowView(LocalOrdinal LocalRow, - local_inds_host_view_type& indices, - values_host_view_type& values) const { - throw std::runtime_error("Not implemented."); - } - void getLocalDiagCopy(Vector& diag) const { nearField_->getLocalDiagCopy(diag); } - void leftScale(const Vector& x) { - throw std::runtime_error("Not implemented."); - } - - void rightScale(const Vector& x) { - throw std::runtime_error("Not implemented."); - } - - mag_type getFrobeniusNorm() const { - return 0.; - } - void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { describe(out, verbLevel, true); } @@ -309,6 +284,18 @@ class HierarchicalOperator : public Tpetra::RowMatrix 0; + return !transferMatrices_.empty(); } bool denserThanDenseMatrix() const { diff --git a/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_def.hpp b/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_def.hpp index 67af4b36c4e6..f4e820a64286 100644 --- a/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_def.hpp +++ b/packages/muelu/research/caglusa/Tpetra_HierarchicalOperator_def.hpp @@ -3,6 +3,7 @@ #include #include +#include namespace Tpetra { @@ -18,7 +19,7 @@ removeSmallEntries(Teuchos::RCP ATS; + using ATS = Kokkos::ArithTraits; using impl_SC = typename ATS::val_type; using impl_ATS = Kokkos::ArithTraits; @@ -26,30 +27,22 @@ removeSmallEntries(Teuchos::RCP(0, lclA.numRows()), - KOKKOS_LAMBDA(const LocalOrdinal rlid) { + KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) { auto row = lclA.row(rlid); for (LocalOrdinal k = 0; k < row.length; ++k) { if (impl_ATS::magnitude(row.value(k)) > tol) { - rowptr(rlid + 1) += 1; + partial_nnz += 1; } } - }); - LocalOrdinal nnz; - Kokkos::parallel_scan( - "removeSmallEntries::rowptr2", - Kokkos::RangePolicy(0, lclA.numRows()), - KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) { - partial_nnz += rowptr(rlid + 1); if (is_final) rowptr(rlid + 1) = partial_nnz; }, nnz); - // auto nnz = rowptr(lclA.numRows()); - auto idx = col_idx_type("idx", nnz); auto vals = vals_type("vals", nnz); @@ -68,6 +61,57 @@ removeSmallEntries(Teuchos::RCPgetRowMap(), A->getColMap(), rowptr, idx, vals)); + newA->fillComplete(A->getDomainMap(), + A->getRangeMap()); + return newA; +} + +template +Teuchos::RCP > +constructSubMatrix(const Teuchos::RCP >& A, + std::pair lid_range) { + using crs_matrix = Tpetra::CrsMatrix; + using local_graph_type = typename crs_matrix::local_graph_device_type; + using local_matrix_type = typename crs_matrix::local_matrix_device_type; + using row_ptr_type = typename local_graph_type::row_map_type::non_const_type; + using col_idx_type = typename local_graph_type::entries_type::non_const_type; + using vals_type = typename local_matrix_type::values_type; + + auto lclA = A->getLocalMatrixDevice(); + + auto old_rowptr = lclA.graph.row_map; + auto rowptr = row_ptr_type("rowptr", lclA.numRows() + 1); + + LocalOrdinal nnz; + Kokkos::parallel_scan( + "constructSubMatrix::rowptr", + Kokkos::RangePolicy(0, lclA.numRows()), + KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) { + if ((lid_range.first <= rlid) && (rlid < lid_range.second)) { + partial_nnz += old_rowptr(rlid + 1) - old_rowptr(rlid); + } + if (is_final) + rowptr(rlid + 1) = partial_nnz; + }, + nnz); + + typename local_graph_type::size_type start; + typename local_graph_type::size_type end; + Kokkos::deep_copy(start, Kokkos::subview(old_rowptr, lid_range.first)); + Kokkos::deep_copy(end, Kokkos::subview(old_rowptr, lid_range.second)); + auto vals_range = Kokkos::make_pair(start, end); + + TEUCHOS_ASSERT_EQUALITY(nnz, static_cast(end - start)); + + auto idx = Kokkos::subview(lclA.graph.entries, vals_range); + auto vals = Kokkos::subview(lclA.values, vals_range); + auto newA = Teuchos::rcp(new crs_matrix(A->getRowMap(), A->getColMap(), rowptr, idx, vals)); newA->fillComplete(A->getDomainMap(), A->getRangeMap()); @@ -148,6 +192,8 @@ HierarchicalOperator:: defaultParams.set("keepTransfers", -1); defaultParams.set("treeCoarseningFactor", 2.0); defaultParams.set("leftOverFactor", 1.0); + defaultParams.set("batchSize", 50); + defaultParams.set("numBatches", 36); if (params_.is_null()) params_ = Teuchos::rcp(new Teuchos::ParameterList("")); params_->validateParametersAndSetDefaults(defaultParams); @@ -159,7 +205,11 @@ HierarchicalOperator:: sendTypeKernelApproximations = params_->get("Send type kernelApproximations"); coarseningCriterion_ = params_->get("Coarsening criterion"); TEUCHOS_ASSERT((coarseningCriterion_ == "numClusters") || (coarseningCriterion_ == "equivalentDense") || (coarseningCriterion_ == "transferLevels")); - debugOutput_ = params_->get("debugOutput"); + setDebugOutput(params_->get("debugOutput")); + + auto comm = getComm(); + if (debugOutput_ && (comm->getRank() == 0)) + std::cout << *params_ << std::endl; if (doDebugChecks) { // near field matrix lives on map and is nonlocal @@ -523,6 +573,8 @@ Teuchos::RCP > const Scalar ZERO = Teuchos::ScalarTraits::zero(); const Scalar HALF = ONE / (ONE + ONE); + RCP coarseParams = rcp(new Teuchos::ParameterList(*params_)); + // newBasisMatrix = P^T * basisMatrix RCP newBasisMatrix = rcp(new matrix_type(P->getDomainMap(), clusterCoeffMap_, 0)); MatrixMatrix::Multiply(*P, true, *basisMatrix_, false, *newBasisMatrix); @@ -576,10 +628,11 @@ Teuchos::RCP > } // coarse cluster pair graph - RCP newKernelBlockGraph = rcp(new matrix_type(kernelApproximations_->blockA_->getCrsGraph())); - newKernelBlockGraph->resumeFill(); + RCP newKernelBlockGraph; // point entries of cluster pairs that should be moved to the near field - RCP diffKernelApprox = rcp(new matrix_type(kernelApproximations_->pointA_->getCrsGraph())); + RCP diffKernelApprox; + // coarse point matrix of cluster pairs + Teuchos::RCP newKernelApprox; // Determine which cluster pairs should be moved to the near field. // We are constructing the coarse block matrix newKernelBlockGraph @@ -622,7 +675,7 @@ Teuchos::RCP > } std::sort(clusterPairSizes.begin(), clusterPairSizes.end()); double coarseningRate = Teuchos::as(P->getGlobalNumCols()) / Teuchos::as(P->getGlobalNumRows()); - tgt_clusterPairSize = clusterPairSizes[Teuchos::as(clusterPairSizes.size() * (1 - coarseningRate))]; + tgt_clusterPairSize = clusterPairSizes[Teuchos::as(static_cast(clusterPairSizes.size()) * (1 - coarseningRate))]; // std::cout << "HERE " << clusterPairSizes[0] << " " << tgt_clusterPairSize << " " << clusterPairSizes[clusterPairSizes.size()-1] << std::endl; } @@ -630,145 +683,180 @@ Teuchos::RCP > // Drop cluster pairs by level in the tree. auto comm = getComm(); std::set blidsToDrop; + LocalOrdinal minDrop = kernelApproximations_->blockMap_->blockMap_->getLocalNumElements() + 1; + LocalOrdinal maxDrop = -1; if (coarseningCriterion_ == "transferLevels") { double coarseningRate = Teuchos::as(P->getGlobalNumCols()) / Teuchos::as(P->getGlobalNumRows()); size_t droppedClusterPairs = 0; size_t totalNumClusterPairs = kernelApproximations_->blockA_->getGlobalNumEntries(); RCP tempV = Teuchos::rcp(new vec_type(kernelApproximations_->blockMap_->blockMap_, false)); RCP tempV2 = Teuchos::rcp(new vec_type(kernelApproximations_->blockMap_->blockMap_, false)); - int keepTransfers = params_->get("keepTransfers", -1); + // keepTransfers == transferMatrices_.size(): keep all transfers + // keepTransfers == 0: keep no transfers + int keepTransfers = params_->get("keepTransfers", -1); + const double treeCoarseningFactor = params_->get("treeCoarseningFactor"); if (keepTransfers == -1) { - double leftOverFactor = params_->get("leftOverFactor"); - keepTransfers = transferMatrices_.size(); - double temp = (1.0 / coarseningRate) * leftOverFactor; - const double treeCoarseningFactor = params_->get("treeCoarseningFactor"); - while (temp >= 2.0) { + double leftOverFactor = params_->get("leftOverFactor"); + keepTransfers = transferMatrices_.size(); + double temp = (1.0 / coarseningRate) * leftOverFactor; + while (temp >= 1.0) { --keepTransfers; temp /= treeCoarseningFactor; } keepTransfers = std::max(keepTransfers, 0); - params_->set("leftOverFactor", temp); + coarseParams->set("leftOverFactor", temp); } + coarseParams->set("keepTransfers", -1); + TEUCHOS_ASSERT((0 <= keepTransfers) && (keepTransfers <= Teuchos::as(transferMatrices_.size()))); + size_t droppedTransfers = 0; for (int k = Teuchos::as(transferMatrices_.size()) - 1; k >= 0; --k) { - size_t clustersInLevel = transferMatrices_[k]->blockA_->getGlobalNumEntries(); + size_t numTreeEdgesBetweenLevels = transferMatrices_[k]->blockA_->getGlobalNumEntries(); + size_t numClusters_k1 = numTreeEdgesBetweenLevels; + auto numClusters_k = Teuchos::as(static_cast(numTreeEdgesBetweenLevels) / treeCoarseningFactor); if (debugOutput_ && (comm->getRank() == 0)) - std::cout << "level " << k << " clustersInLevel " << clustersInLevel << std::endl; + std::cout << "transfer " << k << " between levels " << k + 1 << " and " << k << " maps " << numClusters_k1 << " to " << numClusters_k << " clusters" << std::endl; tempV->putScalar(ONE); transferMatrices_[k]->blockA_->apply(*tempV, *tempV2, Teuchos::TRANS); - - size_t numClusters = tempV2->norm1(); - if (debugOutput_ && (comm->getRank() == 0)) - std::cout << "numClusters " << numClusters << std::endl; tempV->putScalar(ZERO); kernelApproximations_->blockA_->apply(*tempV2, *tempV); - Scalar numClusterPairs = tempV->dot(*tempV2); if (debugOutput_ && (comm->getRank() == 0)) - std::cout << "numClusterPairs " << numClusterPairs << std::endl; + std::cout << "cluster pairs on level " << k + 1 << ": " << numClusterPairs << std::endl; bool doDrop; if (keepTransfers >= 0) { doDrop = (keepTransfers <= k); } else { - doDrop = (droppedClusterPairs + numClusterPairs < (1.0 - coarseningRate) * totalNumClusterPairs); + doDrop = (droppedClusterPairs + numClusterPairs < (1.0 - coarseningRate) * static_cast(totalNumClusterPairs)); } if (doDrop) { auto lcl_transfer = transferMatrices_[k]->blockA_->getLocalMatrixHost(); auto lcl_transfer_graph = lcl_transfer.graph; - for (LocalOrdinal j = 0; j < lcl_transfer_graph.entries.extent_int(0); j++) + for (LocalOrdinal j = 0; j < lcl_transfer_graph.entries.extent_int(0); j++) { blidsToDrop.insert(lcl_transfer_graph.entries(j)); + minDrop = std::min(minDrop, lcl_transfer_graph.entries(j)); + maxDrop = std::max(maxDrop, lcl_transfer_graph.entries(j)); + } + droppedTransfers += 1; droppedClusterPairs += numClusterPairs; - } else { - if (debugOutput_ && (comm->getRank() == 0)) - std::cout << "Dropped " << transferMatrices_.size() - 1 - k << " transfers of " << transferMatrices_.size() << " dropped cp: " << droppedClusterPairs << std::endl; - break; } } + if (debugOutput_ && (comm->getRank() == 0)) + std::cout << "Dropped " << droppedTransfers << " transfers of " << transferMatrices_.size() << ", dropped cluster pairs: " << droppedClusterPairs << std::endl; } + bool dropContiguousRange = (static_cast(maxDrop + 1 - minDrop) == blidsToDrop.size()); + // number of cluster pairs dropped int dropped = 0; // number of cluster pairs we kept int kept = 0; // number of cluster pairs that were no longer present int ignored = 0; - // loop over cluster pairs - // TODO: parallel_for - auto lcl_BlockGraph = kernelApproximations_->blockA_->getLocalMatrixHost(); - auto lcl_newBlockGraph = newKernelBlockGraph->getLocalMatrixHost(); - auto lcl_KernelApprox = kernelApproximations_->pointA_->getLocalMatrixHost(); - auto lcl_diffKernelApprox = diffKernelApprox->getLocalMatrixHost(); - for (LocalOrdinal brlid = 0; brlid < lcl_BlockGraph.numRows(); ++brlid) { - size_t brsize = lcl_clusterSizes(brlid, 0); - auto brow = lcl_BlockGraph.row(brlid); - auto new_brow = lcl_newBlockGraph.row(brlid); - for (LocalOrdinal k = 0; k < brow.length; ++k) { - // Entries of the block matrix for kernelApproximations - // decide whether the cluster pair is present and only take - // values 1 or 0. - if (brow.value(k) > HALF) { - LocalOrdinal bclid = brow.colidx(k); - size_t bcsize = lcl_ghosted_clusterSizes(bclid, 0); - - // criterium for removing a cluster pair from the far field - bool removeCluster = false; - if (coarseningCriterion_ == "equivalentDense") { - // Size of the sparse cluster approximation >= size of dense equivalent - removeCluster = (brsize * bcsize >= lcl_numUnknownsPerCluster(brlid, 0) * lcl_ghosted_numUnknownsPerCluster(bclid, 0)); - } else if (coarseningCriterion_ == "numClusters") { - removeCluster = (lcl_numUnknownsPerCluster(brlid, 0) * lcl_ghosted_numUnknownsPerCluster(bclid, 0) < tgt_clusterPairSize); - } else if (coarseningCriterion_ == "transferLevels") { - removeCluster = ((blidsToDrop.find(brlid) != blidsToDrop.end()) || - (blidsToDrop.find(bclid) != blidsToDrop.end())); - } - if (removeCluster) { - // we are dropping the cluster pair from the far field - ++dropped; - new_brow.value(k) = ZERO; - - // loop over the point matrix and add the entries to diffKernelApprox - const LocalOrdinal row_start = lcl_offsets(brlid); - const LocalOrdinal row_end = lcl_offsets(brlid + 1); - const LocalOrdinal col_start = lcl_ghosted_offsets(bclid); - const LocalOrdinal col_end = lcl_ghosted_offsets(bclid + 1); - TEUCHOS_ASSERT_EQUALITY(Teuchos::as(row_end - row_start), brsize); - TEUCHOS_ASSERT_EQUALITY(Teuchos::as(col_end - col_start), bcsize); - for (LocalOrdinal rlid = row_start; rlid < row_end; ++rlid) { - auto diff_row = lcl_diffKernelApprox.row(rlid); - auto row = lcl_KernelApprox.row(rlid); - size_t removed = 0; - for (LocalOrdinal n = 0; n < row.length; ++n) { - if ((col_start <= row.colidx(n)) && (col_end > row.colidx(n))) { - diff_row.value(n) = row.value(n); - ++removed; - } - } - if (removed != bcsize) { - std::ostringstream oss; - oss << "brlid " << brlid << " row " << rlid << std::endl; - oss << "col_start " << col_start << " col_end " << col_end << std::endl; + + if (dropContiguousRange) { + newKernelBlockGraph = constructSubMatrix(kernelApproximations_->blockA_, {0, minDrop}); + dropped = maxDrop + 1 - minDrop; + kept = minDrop; + LocalOrdinal minLid = lcl_offsets(minDrop); + LocalOrdinal maxLid = lcl_offsets(maxDrop + 1); + diffKernelApprox = constructSubMatrix(kernelApproximations_->pointA_, {minLid, maxLid}); + newKernelApprox = constructSubMatrix(kernelApproximations_->pointA_, {0, minLid}); + + } else { + newKernelBlockGraph = rcp(new matrix_type(kernelApproximations_->blockA_->getCrsGraph())); + newKernelBlockGraph->resumeFill(); + diffKernelApprox = rcp(new matrix_type(kernelApproximations_->pointA_->getCrsGraph())); + + // loop over cluster pairs + // TODO: parallel_for + auto lcl_BlockGraph = kernelApproximations_->blockA_->getLocalMatrixHost(); + auto lcl_newBlockGraph = newKernelBlockGraph->getLocalMatrixHost(); + auto lcl_KernelApprox = kernelApproximations_->pointA_->getLocalMatrixHost(); + auto lcl_diffKernelApprox = diffKernelApprox->getLocalMatrixHost(); + for (LocalOrdinal brlid = 0; brlid < lcl_BlockGraph.numRows(); ++brlid) { + size_t brsize = lcl_clusterSizes(brlid, 0); + auto brow = lcl_BlockGraph.row(brlid); + auto new_brow = lcl_newBlockGraph.row(brlid); + for (LocalOrdinal k = 0; k < brow.length; ++k) { + // Entries of the block matrix for kernelApproximations + // decide whether the cluster pair is present and only take + // values 1 or 0. + if (brow.value(k) > HALF) { + LocalOrdinal bclid = brow.colidx(k); + size_t bcsize = lcl_ghosted_clusterSizes(bclid, 0); + + // criterium for removing a cluster pair from the far field + bool removeCluster = false; + if (coarseningCriterion_ == "equivalentDense") { + // Size of the sparse cluster approximation >= size of dense equivalent + removeCluster = (brsize * bcsize >= lcl_numUnknownsPerCluster(brlid, 0) * lcl_ghosted_numUnknownsPerCluster(bclid, 0)); + } else if (coarseningCriterion_ == "numClusters") { + removeCluster = (lcl_numUnknownsPerCluster(brlid, 0) * lcl_ghosted_numUnknownsPerCluster(bclid, 0) < tgt_clusterPairSize); + } else if (coarseningCriterion_ == "transferLevels") { + removeCluster = ((blidsToDrop.find(brlid) != blidsToDrop.end()) || + (blidsToDrop.find(bclid) != blidsToDrop.end())); + } + if (removeCluster) { + // we are dropping the cluster pair from the far field + ++dropped; + new_brow.value(k) = ZERO; + + // loop over the point matrix and add the entries to diffKernelApprox + const LocalOrdinal row_start = lcl_offsets(brlid); + const LocalOrdinal row_end = lcl_offsets(brlid + 1); + const LocalOrdinal col_start = lcl_ghosted_offsets(bclid); + const LocalOrdinal col_end = lcl_ghosted_offsets(bclid + 1); + TEUCHOS_ASSERT_EQUALITY(Teuchos::as(row_end - row_start), brsize); + TEUCHOS_ASSERT_EQUALITY(Teuchos::as(col_end - col_start), bcsize); + for (LocalOrdinal rlid = row_start; rlid < row_end; ++rlid) { + auto diff_row = lcl_diffKernelApprox.row(rlid); + auto row = lcl_KernelApprox.row(rlid); + size_t removed = 0; for (LocalOrdinal n = 0; n < row.length; ++n) { - oss << row.colidx(n) << " " << row.value(n) << std::endl; + if ((col_start <= row.colidx(n)) && (col_end > row.colidx(n))) { + diff_row.value(n) = row.value(n); + ++removed; + } + } + if (removed != bcsize) { + std::ostringstream oss; + oss << "brlid " << brlid << " row " << rlid << std::endl; + oss << "col_start " << col_start << " col_end " << col_end << std::endl; + for (LocalOrdinal n = 0; n < row.length; ++n) { + oss << row.colidx(n) << " " << row.value(n) << std::endl; + } + std::cout << oss.str(); } - std::cout << oss.str(); + TEUCHOS_ASSERT_EQUALITY(removed, bcsize); } - TEUCHOS_ASSERT_EQUALITY(removed, bcsize); + } else { + // We are keeping the cluster pair. + ++kept; + new_brow.value(k) = brow.value(k); } } else { - // We are keeping the cluster pair. - ++kept; + // The cluster pair has already been dropped on the fine level. + ++ignored; new_brow.value(k) = brow.value(k); } - } else { - // The cluster pair has already been dropped on the fine level. - ++ignored; - new_brow.value(k) = brow.value(k); } } + + newKernelBlockGraph->fillComplete(kernelApproximations_->blockA_->getDomainMap(), + kernelApproximations_->blockA_->getRangeMap()); + newKernelBlockGraph = removeSmallEntries(newKernelBlockGraph, Teuchos::ScalarTraits::eps()); + diffKernelApprox->fillComplete(clusterCoeffMap_, + clusterCoeffMap_); + + { + Teuchos::RCP temp = MatrixMatrix::add(ONE, false, *kernelApproximations_->pointA_, -ONE, false, *diffKernelApprox); + newKernelApprox = removeSmallEntries(temp, Teuchos::ScalarTraits::eps()); + } } if (debugOutput_) { // number of cluster pairs dropped @@ -781,23 +869,10 @@ Teuchos::RCP > Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 1, &kept, &gbl_kept); Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, 1, &ignored, &gbl_ignored); if (comm->getRank() == 0) - std::cout << "dropped " << gbl_dropped << " kept " << gbl_kept << " ignored " << gbl_ignored << std::endl; + std::cout << "dropped " << gbl_dropped << " cluster pairs, kept " << gbl_kept << " cluster pairs, ignored " << gbl_ignored << " cluster pairs" << std::endl; } } - newKernelBlockGraph->fillComplete(kernelApproximations_->blockA_->getDomainMap(), - kernelApproximations_->blockA_->getRangeMap()); - newKernelBlockGraph = removeSmallEntries(newKernelBlockGraph, Teuchos::ScalarTraits::eps()); - diffKernelApprox->fillComplete(clusterCoeffMap_, - clusterCoeffMap_); - - // coarse point matrix of cluster pairs - Teuchos::RCP newKernelApprox; - { - Teuchos::RCP temp = MatrixMatrix::add(ONE, false, *kernelApproximations_->pointA_, -ONE, false, *diffKernelApprox); - newKernelApprox = removeSmallEntries(temp, Teuchos::ScalarTraits::eps()); - } - // construct identity on clusterCoeffMap_ Teuchos::RCP identity = buildIdentityMatrix(clusterCoeffMap_); @@ -806,6 +881,8 @@ Teuchos::RCP > // select subset of transfer matrices for coarse operator std::vector > newTransferMatrices; { + Teuchos::TimeMonitor tM_basis_transfer(*Teuchos::TimeMonitor::getNewTimer(std::string("Coarse basis and transfers"))); + auto comm = getComm(); RCP v_temp = rcp(new vec_type(newKernelBlockGraph->getDomainMap())); @@ -841,32 +918,49 @@ Teuchos::RCP > // Coarse near field RCP newNearField; { - // transfer = newBasisMatrix * (identity + newTransferMatrices[K-1]^T) * ... * (identity + newTransferMatrices[0])^T - Teuchos::RCP transfer = rcp(new matrix_type(*newBasisMatrix)); - for (int i = Teuchos::as(newTransferMatrices.size()) - 1; i >= 0; i--) { - Teuchos::RCP temp = MatrixMatrix::add(ONE, false, *identity, ONE, false, *newTransferMatrices[i]->pointA_); - Teuchos::RCP temp2 = rcp(new matrix_type(newBasisMatrix->getRowMap(), 0)); - MatrixMatrix::Multiply(*transfer, false, *temp, true, *temp2); - transfer = temp2; + Teuchos::TimeMonitor tM_near(*Teuchos::TimeMonitor::getNewTimer(std::string("Coarse near field"))); + + // diffKernelApprox := (identity + newTransferMatrices[K-1])^T * ... * (identity + newTransferMatrices[0])^T + // * diffKernelApprox + // * (identity + newTransferMatrices[0]) * ... * (identity + newTransferMatrices[K-1]) + { + Teuchos::TimeMonitor tM_near_1(*Teuchos::TimeMonitor::getNewTimer(std::string("Coarse near field 1"))); + for (int i = 0; i < Teuchos::as(newTransferMatrices.size()); ++i) { + // diffKernelApprox := (I + newTransferMatrices[i])^T * diffKernelApprox * (I + newTransferMatrices[i]) + Teuchos::RCP temp = MatrixMatrix::add(ONE, false, *identity, ONE, false, *newTransferMatrices[i]->pointA_); + Teuchos::RCP temp2 = rcp(new matrix_type(clusterCoeffMap_, 0)); + MatrixMatrix::Multiply(*temp, true, *diffKernelApprox, false, *temp2); + MatrixMatrix::Multiply(*temp2, false, *temp, false, *diffKernelApprox); + } } - // diffFarField = transfer * diffKernelApprox * transfer^T - RCP diffFarField; + // diffKernelApprox := (newBasisMatrix * diffKernelApprox) * newBasisMatrix^T { + Teuchos::TimeMonitor tM_near2(*Teuchos::TimeMonitor::getNewTimer(std::string("Coarse near field 2"))); + Teuchos::RCP temp = rcp(new matrix_type(newBasisMatrix->getRowMap(), 0)); - MatrixMatrix::Multiply(*transfer, false, *diffKernelApprox, false, *temp); - diffFarField = rcp(new matrix_type(newBasisMatrix->getRowMap(), 0)); - MatrixMatrix::Multiply(*temp, false, *transfer, true, *diffFarField); + { + Teuchos::TimeMonitor tM_near2a(*Teuchos::TimeMonitor::getNewTimer(std::string("Coarse near field 2a"))); + MatrixMatrix::Multiply(*newBasisMatrix, false, *diffKernelApprox, false, *temp); + } + + { + Teuchos::TimeMonitor tM_near2a(*Teuchos::TimeMonitor::getNewTimer(std::string("Coarse near field 2b"))); + diffKernelApprox = rcp(new matrix_type(newBasisMatrix->getRowMap(), 0)); + MatrixMatrix::Multiply(*temp, false, *newBasisMatrix, true, *diffKernelApprox); + } } - // newNearField = P^T * nearField * P + diffFarField + // newNearField = (P^T * nearField * P) + diffKernelApprox { + Teuchos::TimeMonitor tM_near3(*Teuchos::TimeMonitor::getNewTimer(std::string("Coarse near field 3"))); RCP temp = rcp(new matrix_type(nearField_->getRowMap(), 0)); MatrixMatrix::Multiply(*nearField_, false, *P, false, *temp); RCP temp2 = rcp(new matrix_type(P->getDomainMap(), 0)); MatrixMatrix::Multiply(*P, true, *temp, false, *temp2); - newNearField = MatrixMatrix::add(ONE, false, *temp2, ONE, false, *diffFarField); - newNearField = removeSmallEntries(newNearField, Teuchos::ScalarTraits::eps()); + newNearField = MatrixMatrix::add(ONE, false, *temp2, ONE, false, *diffKernelApprox); + diffKernelApprox = Teuchos::null; + // newNearField = removeSmallEntries(newNearField, Teuchos::ScalarTraits::eps()); } } @@ -874,7 +968,7 @@ Teuchos::RCP > newBlockedKernelApproximation, newBasisMatrix, newTransferMatrices, - params_)); + coarseParams)); } template :: const Scalar ONE = Teuchos::ScalarTraits::one(); if (hasFarField()) { - // transfer = basisMatrix_ * (identity + transferMatrices_[K-1]) * ... * (identity + transferMatrices_[0]) - RCP transfer = rcp(new matrix_type(*basisMatrix_)); + RCP kernelApproximations; if (hasTransferMatrices()) { + kernelApproximations = rcp(new matrix_type(*kernelApproximations_->pointA_)); + // construct identity on clusterCoeffMap_ Teuchos::RCP identity = buildIdentityMatrix(clusterCoeffMap_); - for (int i = Teuchos::as(transferMatrices_.size()) - 1; i >= 0; i--) { - RCP temp = MatrixMatrix::add(ONE, false, *identity, ONE, false, *transferMatrices_[i]->pointA_); - RCP temp2 = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); - MatrixMatrix::Multiply(*transfer, false, *temp, true, *temp2); - transfer = temp2; + Teuchos::TimeMonitor tM_near_1(*Teuchos::TimeMonitor::getNewTimer(std::string("Densify far field 1"))); + for (int i = 0; i < Teuchos::as(transferMatrices_.size()); ++i) { + // kernelApproximations := (I + newTransferMatrices[i])^T * kernelApproximations * (I + newTransferMatrices[i]) + Teuchos::RCP temp = MatrixMatrix::add(ONE, false, *identity, ONE, false, *transferMatrices_[i]->pointA_); + Teuchos::RCP temp2 = rcp(new matrix_type(clusterCoeffMap_, 0)); + MatrixMatrix::Multiply(*temp, true, *kernelApproximations, false, *temp2); + Teuchos::RCP temp3 = rcp(new matrix_type(clusterCoeffMap_, 0)); + MatrixMatrix::Multiply(*temp2, false, *temp, false, *temp3); + kernelApproximations = temp3; } + } else { + kernelApproximations = kernelApproximations_->pointA_; } - // farField = transfer * kernelApproximations_ * transfer^T - RCP temp = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); - MatrixMatrix::Multiply(*transfer, false, *kernelApproximations_->pointA_, false, *temp); - RCP farField = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); - MatrixMatrix::Multiply(*temp, false, *transfer, true, *farField); + // farField = (basisMatrix_ * kernelApproximations) * basisMatrix_^T + RCP farField; + { + Teuchos::TimeMonitor tM_near_2(*Teuchos::TimeMonitor::getNewTimer(std::string("Densify far field 2"))); + int rank = getComm()->getRank(); + int size = getComm()->getSize(); + int numBatches = params_->get("numBatches"); + if (numBatches < 0) { + int batchSize = params_->get("batchSize"); + numBatches = size / batchSize; + } + numBatches = std::max(numBatches, 1); + for (int batchNo = 0; batchNo < numBatches; ++batchNo) { + RCP kernelApproximationsSlice; + { + Teuchos::TimeMonitor tM_near_2a(*Teuchos::TimeMonitor::getNewTimer(std::string("Densify far field 2 0"))); + + using local_matrix_type = typename matrix_type::local_matrix_device_type; + local_matrix_type lclKernelApproximationsSlice; + + if (rank % numBatches == batchNo) { + lclKernelApproximationsSlice = kernelApproximations->getLocalMatrixDevice(); + + } else { + using local_graph_type = typename matrix_type::local_graph_device_type; + using row_ptr_type = typename local_graph_type::row_map_type::non_const_type; + using col_idx_type = typename local_graph_type::entries_type::non_const_type; + using vals_type = typename local_matrix_type::values_type; + + auto lclKernelApproximations = kernelApproximations->getLocalMatrixDevice(); + auto rowptr = row_ptr_type("rowptr", lclKernelApproximations.numRows() + 1); + auto idx = col_idx_type("colidx", 0); + auto vals = vals_type("vals", 0); + + auto lclKernelApproximationsSliceGraph = local_graph_type(idx, rowptr); + lclKernelApproximationsSlice = local_matrix_type("slice", lclKernelApproximations.numCols(), vals, lclKernelApproximationsSliceGraph); + } + kernelApproximationsSlice = rcp(new matrix_type(lclKernelApproximationsSlice, + kernelApproximations->getRowMap(), + kernelApproximations->getColMap(), + kernelApproximations->getDomainMap(), + kernelApproximations->getRangeMap())); + } + { + RCP temp; + { + Teuchos::TimeMonitor tM_near_2a(*Teuchos::TimeMonitor::getNewTimer(std::string("Densify far field 2a"))); + temp = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); + MatrixMatrix::Multiply(*basisMatrix_, false, *kernelApproximationsSlice, false, *temp); + } + + { + Teuchos::TimeMonitor tM_near_2b(*Teuchos::TimeMonitor::getNewTimer(std::string("Densify far field 2b"))); + auto temp2 = rcp(new matrix_type(basisMatrix_->getRowMap(), 0)); + MatrixMatrix::Multiply(*temp, false, *basisMatrix_, true, *temp2); + temp = Teuchos::null; + if (batchNo == 0) + farField = temp2; + else + farField = MatrixMatrix::add(ONE, false, *farField, ONE, false, *temp2); + } + } + } + } // nearField_ + farField - return MatrixMatrix::add(ONE, false, *nearField_, ONE, false, *farField); + RCP dense; + { + Teuchos::TimeMonitor tM_near_3(*Teuchos::TimeMonitor::getNewTimer(std::string("Densify far field 3"))); + dense = MatrixMatrix::add(ONE, false, *nearField_, ONE, false, *farField); + } + return dense; } else diff --git a/packages/muelu/research/caglusa/Tpetra_OperatorWithDiagonal.hpp b/packages/muelu/research/caglusa/Tpetra_OperatorWithDiagonal.hpp new file mode 100644 index 000000000000..3b4a324e7550 --- /dev/null +++ b/packages/muelu/research/caglusa/Tpetra_OperatorWithDiagonal.hpp @@ -0,0 +1,136 @@ +#ifndef TPETRA_OPERATORWITHDIAGONAL_HPP +#define TPETRA_OPERATORWITHDIAGONAL_HPP + +#include +#include +#include +#include +#include "Teuchos_VerbosityLevel.hpp" + +namespace Tpetra { + +template ::scalar_type, + class LocalOrdinal = typename Tpetra::RowMatrix::local_ordinal_type, + class GlobalOrdinal = typename Tpetra::RowMatrix::global_ordinal_type, + class Node = typename Tpetra::RowMatrix::node_type> +class OperatorWithDiagonal : public Tpetra::RowMatrix { + public: + using mv_type = Tpetra::MultiVector; + using map_type = Tpetra::Map; + + using row_matrix_type = RowMatrix; + + using impl_scalar_type = typename row_matrix_type::impl_scalar_type; + using mag_type = typename Kokkos::ArithTraits::mag_type; + + using local_inds_device_view_type = + typename row_matrix_type::local_inds_device_view_type; + using local_inds_host_view_type = + typename row_matrix_type::local_inds_host_view_type; + using nonconst_local_inds_host_view_type = + typename row_matrix_type::nonconst_local_inds_host_view_type; + + using global_inds_device_view_type = + typename row_matrix_type::global_inds_device_view_type; + using global_inds_host_view_type = + typename row_matrix_type::global_inds_host_view_type; + using nonconst_global_inds_host_view_type = + typename row_matrix_type::nonconst_global_inds_host_view_type; + + using values_device_view_type = + typename row_matrix_type::values_device_view_type; + using values_host_view_type = + typename row_matrix_type::values_host_view_type; + using nonconst_values_host_view_type = + typename row_matrix_type::nonconst_values_host_view_type; + + //! @name Constructor/Destructor + //@{ + + virtual ~OperatorWithDiagonal() = default; + + size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const { + throw std::runtime_error("Not implemented."); + } + + size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const { + throw std::runtime_error("Not implemented."); + } + + size_t getGlobalMaxNumRowEntries() const { + throw std::runtime_error("Not implemented."); + } + + LocalOrdinal getBlockSize() const { + throw std::runtime_error("Not implemented."); + } + + size_t getLocalMaxNumRowEntries() const { + throw std::runtime_error("Not implemented."); + } + + bool hasColMap() const { + return false; + } + + bool isLocallyIndexed() const { + return true; + } + + bool isGloballyIndexed() const { + return true; + } + + bool isFillComplete() const { + return true; + } + + bool supportsRowViews() const { + return false; + } + + void + getGlobalRowCopy(GlobalOrdinal GlobalRow, + nonconst_global_inds_host_view_type& Indices, + nonconst_values_host_view_type& Values, + size_t& NumEntries) const { + throw std::runtime_error("Not implemented."); + } + + void + getLocalRowCopy(LocalOrdinal LocalRow, + nonconst_local_inds_host_view_type& Indices, + nonconst_values_host_view_type& Values, + size_t& NumEntries) const { + throw std::runtime_error("Not implemented."); + } + + void + getGlobalRowView(GlobalOrdinal GlobalRow, + global_inds_host_view_type& indices, + values_host_view_type& values) const { + throw std::runtime_error("Not implemented."); + } + + void + getLocalRowView(LocalOrdinal LocalRow, + local_inds_host_view_type& indices, + values_host_view_type& values) const { + throw std::runtime_error("Not implemented."); + } + + void leftScale(const Vector& x) { + throw std::runtime_error("Not implemented."); + } + + void rightScale(const Vector& x) { + throw std::runtime_error("Not implemented."); + } + + mag_type getFrobeniusNorm() const { + return 0.; + } +}; +} // namespace Tpetra + +#endif // TPETRA_OPERATORWITHDIAGONAL_HPP diff --git a/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_decl.hpp b/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_decl.hpp index 98910ac96fe5..8760a76c11c6 100644 --- a/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_decl.hpp +++ b/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_decl.hpp @@ -8,6 +8,8 @@ #include #include #include +#include "Xpetra_OperatorWithDiagonal.hpp" +#include "Xpetra_RowMatrix.hpp" namespace Xpetra { @@ -15,7 +17,7 @@ template -class HierarchicalOperator : public TpetraOperator { +class HierarchicalOperator : public OperatorWithDiagonal { public: using tHOp = Tpetra::HierarchicalOperator; using map_type = Xpetra::Map; @@ -28,7 +30,9 @@ class HierarchicalOperator : public TpetraOperator& op) - : op_(op) {} + : op_(op) { + this->setTpetra_RowMatrix(op); + } HierarchicalOperator(const RCP& nearField, const RCP& kernelApproximations, @@ -83,15 +87,19 @@ class HierarchicalOperator : public TpetraOperatorgetCompression(); } + void setDebugOutput(const bool debugOutput) { + op_->setDebugOutput(debugOutput); + } + RCP nearFieldMatrix() { auto tpMat = Teuchos::rcp(new TpetraCrsMatrix(op_->nearFieldMatrix())); return Teuchos::rcp(new CrsMatrixWrap(Teuchos::rcp_dynamic_cast >(tpMat))); } //! Gets the operator out - RCP > getOperator() { return op_; } + // RCP > getTpetra_RowMatrix() const override { return op_; } - RCP > getOperatorConst() const { return op_; } + // RCP > getTpetra_RowMatrixNonConst() const override { return op_; } void describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel verbLevel) const { describe(out, verbLevel, true); diff --git a/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_def.hpp b/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_def.hpp index f59c48e3738a..ca79a23862a9 100644 --- a/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_def.hpp +++ b/packages/muelu/research/caglusa/Xpetra_HierarchicalOperator_def.hpp @@ -30,6 +30,7 @@ HierarchicalOperator:: Teuchos::rcp_dynamic_cast(Teuchos::rcp_dynamic_cast(basisMatrix)->getCrsMatrix(), true)->getTpetra_CrsMatrixNonConst(), tTransferMatrices, params)); + this->setTpetra_RowMatrix(op_); } } // namespace Xpetra diff --git a/packages/muelu/research/caglusa/Xpetra_OperatorWithDiagonal.hpp b/packages/muelu/research/caglusa/Xpetra_OperatorWithDiagonal.hpp new file mode 100644 index 000000000000..d6e1d16b8cea --- /dev/null +++ b/packages/muelu/research/caglusa/Xpetra_OperatorWithDiagonal.hpp @@ -0,0 +1,148 @@ +#ifndef XPETRA_OPERATORWITHDIAGONAL_HPP +#define XPETRA_OPERATORWITHDIAGONAL_HPP + +#include +#include +#include + +namespace Xpetra { + +template +class OperatorWithDiagonal : virtual public Xpetra::TpetraRowMatrix { + public: + typedef Scalar scalar_type; + typedef LocalOrdinal local_ordinal_type; + typedef GlobalOrdinal global_ordinal_type; + typedef Node node_type; + + //! @name Constructor/Destructor Methods + //@{ + + //! Destructor. + virtual ~OperatorWithDiagonal() = default; + + //@} + + //! @name Matrix Query Methods + //@{ + + //! Returns the Map that describes the row distribution in this matrix. + const Teuchos::RCP > getRowMap() const { + throw std::runtime_error("Not implemented."); + }; + + //! Returns the Map that describes the column distribution in this matrix. + const Teuchos::RCP > getColMap() const { + throw std::runtime_error("Not implemented."); + }; + + //! Returns the number of global rows in this matrix. + global_size_t getGlobalNumRows() const { + return this->getRangeMap()->getGlobalNumElements(); + }; + + //! Returns the number of global columns in this matrix. + global_size_t getGlobalNumCols() const { + return this->getDomainMap()->getGlobalNumElements(); + }; + + //! Returns the number of rows owned on the calling node. + size_t getLocalNumRows() const { + return this->getRangeMap()->getLocalNumElements(); + }; + + //! Returns the number of columns needed to apply the forward operator on this node, i.e., the number of elements listed in the column map. + size_t getLocalNumCols() const { + return this->getDomainMap()->getLocalNumElements(); + }; + + //! Returns the global number of entries in this matrix. + global_size_t getGlobalNumEntries() const { + throw std::runtime_error("Not implemented."); + }; + + //! Returns the local number of entries in this matrix. + size_t getLocalNumEntries() const { + throw std::runtime_error("Not implemented."); + }; + + //! Returns the current number of entries on this node in the specified local row. + size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const { + throw std::runtime_error("Not implemented."); + }; + + //! Returns the maximum number of entries across all rows/columns on all nodes. + size_t getGlobalMaxNumRowEntries() const { + throw std::runtime_error("Not implemented."); + }; + + //! Returns the maximum number of entries across all rows/columns on this node. + size_t getLocalMaxNumRowEntries() const { + throw std::runtime_error("Not implemented."); + }; + + //! If matrix indices are in the local range, this function returns true. Otherwise, this function returns false. */. + bool isLocallyIndexed() const { + return true; + }; + + //! If matrix indices are in the global range, this function returns true. Otherwise, this function returns false. */. + bool isGloballyIndexed() const { + return true; + }; + + //! Returns true if fillComplete() has been called. + bool isFillComplete() const { + return true; + }; + + //! Returns true if getLocalRowView() and getGlobalRowView() are valid for this class. + bool supportsRowViews() const { + return false; + }; + + //@} + + //! @name Extraction Methods + //@{ + + //! Extract a list of entries in a specified local row of the graph. Put into storage allocated by calling routine. + void getLocalRowCopy(LocalOrdinal LocalRow, const Teuchos::ArrayView &Indices, const Teuchos::ArrayView &Values, size_t &NumEntries) const { + throw std::runtime_error("Not implemented."); + }; + + //! Extract a const, non-persisting view of global indices in a specified row of the matrix. + void getGlobalRowView(GlobalOrdinal GlobalRow, ArrayView &indices, ArrayView &values) const { + throw std::runtime_error("Not implemented."); + }; + + //! Extract a const, non-persisting view of local indices in a specified row of the matrix. + void getLocalRowView(LocalOrdinal LocalRow, ArrayView &indices, ArrayView &values) const { + throw std::runtime_error("Not implemented."); + }; + + //! Get a copy of the diagonal entries owned by this node, with local row indices. + void getLocalDiagCopy(Vector &diag) const { + throw std::runtime_error("Not implemented."); + }; + + //@} + + //! @name Mathematical Methods + //@{ + + //! Returns the Frobenius norm of the matrix. + typename ScalarTraits::magnitudeType getFrobeniusNorm() const { + throw std::runtime_error("Not implemented."); + }; + + //@} + +}; // RowMatrix class + +} // namespace Xpetra + +#endif diff --git a/packages/muelu/research/caglusa/auxiliaryOperators.hpp b/packages/muelu/research/caglusa/auxiliaryOperators.hpp index a8b274e8b444..ed288e8cd52c 100644 --- a/packages/muelu/research/caglusa/auxiliaryOperators.hpp +++ b/packages/muelu/research/caglusa/auxiliaryOperators.hpp @@ -153,18 +153,27 @@ constructHierarchyFromAuxiliary(RCP > auxH, Teuchos::ParameterList& params, Teuchos::FancyOStream& out) { + using op_type = Xpetra::Operator; + using hop_type = Xpetra::HierarchicalOperator; + using mat_type = Xpetra::Matrix; + params.set("coarse: max size", 1); params.set("max levels", auxH->GetNumLevels()); const bool implicitTranspose = params.get("transpose: use implicit", MueLu::MasterList::getDefault("transpose: use implicit")); - auto hop = rcp_dynamic_cast >(op); + auto hop = rcp_dynamic_cast(op); if (!hop.is_null()) op->describe(out, Teuchos::VERB_EXTREME); RCP > H = rcp(new Hierarchy()); RCP lvl = H->GetLevel(0); - lvl->Set("A", op); + auto op_mat = Teuchos::rcp_dynamic_cast(op); + if (!op_mat.is_null()) + lvl->Set("A", op_mat); + else { + lvl->Set("A", Teuchos::rcp_dynamic_cast(op, true)); + } // lvl->Set("Coordinates", coords); for (int lvlNo = 1; lvlNo < auxH->GetNumLevels(); lvlNo++) { RCP fineLvl = H->GetLevel(lvlNo - 1); @@ -174,21 +183,21 @@ constructHierarchyFromAuxiliary(RCPGetFactoryManager(); // auxLvl->print(std::cout, MueLu::Debug); - RCP > P = auxLvl->Get > >("P"); - RCP > fineAOp = fineLvl->Get > >("A"); + RCP P = auxLvl->Get >("P"); lvl->Set("P", P); params.sublist("level " + std::to_string(lvlNo)).set("P", P); if (!implicitTranspose) { TEUCHOS_ASSERT(auxLvl->IsAvailable("R")); - RCP > R = auxLvl->Get > >("R"); + RCP R = auxLvl->Get >("R"); lvl->Set("R", R); params.sublist("level " + std::to_string(lvlNo)).set("R", R); } - auto fineA = rcp_dynamic_cast >(fineAOp); - if (!fineA.is_null()) { - auto coarseA = fineA->restrict(P); + if (fineLvl->IsType >("A")) { + RCP fineAOp = fineLvl->Get >("A"); + auto fineA = rcp_dynamic_cast(fineAOp); + auto coarseA = fineA->restrict(P); #ifdef MUELU_HIERARCHICAL_DEBUG { @@ -251,11 +260,12 @@ constructHierarchyFromAuxiliary(RCPSet("A", matA); } else { coarseA->describe(out, Teuchos::VERB_EXTREME, /*printHeader=*/false); - lvl->Set("A", rcp_dynamic_cast >(coarseA)); + lvl->Set("A", rcp_dynamic_cast(coarseA)); } } else { // classical RAP - auto fineAmat = rcp_dynamic_cast >(fineAOp, true); + RCP fineAmat = fineLvl->Get >("A"); + // auto fineAmat = rcp_dynamic_cast >(fineAOp, true); Level fineLevel, coarseLevel; fineLevel.SetFactoryManager(Teuchos::null); coarseLevel.SetFactoryManager(Teuchos::null); @@ -269,7 +279,7 @@ constructHierarchyFromAuxiliary(RCPSetParameterList(rapList); coarseLevel.Request("A", rapFact.get()); - RCP > matA = coarseLevel.Get > >("A", rapFact.get()); + RCP matA = coarseLevel.Get >("A", rapFact.get()); using std::endl; using std::setw; diff --git a/packages/muelu/research/caglusa/main.cpp b/packages/muelu/research/caglusa/main.cpp index 0de4ac0a8ef6..90acaaf06392 100644 --- a/packages/muelu/research/caglusa/main.cpp +++ b/packages/muelu/research/caglusa/main.cpp @@ -88,8 +88,12 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg clp.setOption("tests", "notests", &doTests, "Test operator using known LHS & RHS."); bool doUnPrecSolve = true; clp.setOption("unPrec", "noUnPrec", &doUnPrecSolve, "Solve unpreconditioned"); + bool failOnUnPrecSolve = true; + clp.setOption("failOnUnPrec", "noFailOnUnPrec", &failOnUnPrecSolve, "Set error condition if unpreconditioned solve does not converge."); bool doPrecSolve = true; clp.setOption("prec", "noPrec", &doPrecSolve, "Solve preconditioned with AMG"); + bool debugOutput = false; + clp.setOption("debug", "noDebug", &debugOutput, "Debug output"); switch (clp.parse(argc, argv)) { case Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED: return EXIT_SUCCESS; break; @@ -130,21 +134,30 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg RCP op; RCP hop; { + comm->barrier(); Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Read hierarchical matrix"))); op = IOhelpers::Read(xmlHierarchical, comm); hop = Teuchos::rcp_dynamic_cast(op); + + comm->barrier(); } - if (!hop.is_null()) + if (!hop.is_null()) { + if (debugOutput) + hop->setDebugOutput(debugOutput); out << "Compression: " << hop->getCompression() << " of dense matrix." << std::endl; + } Teuchos::ParameterList problemParams; Teuchos::updateParametersFromXmlFileAndBroadcast(xmlProblem, Teuchos::Ptr(&problemParams), *comm); RCP map = op->getDomainMap(); - RCP auxOp, mass; - RCP X_ex, RHS, X; + RCP auxOp; + RCP mass; + RCP X_ex; + RCP RHS; + RCP X; RCP coords; { // Read in auxiliary stuff @@ -154,9 +167,12 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg // Auxiliary matrix used for multigrid construction { + comm->barrier(); Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Construct auxiliary operator"))); auxOp = MueLu::constructAuxiliaryOperator(op, problemParams); + + comm->barrier(); } // Mass matrix for L2 error computation @@ -178,7 +194,9 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg if (doTests) { // Some simple apply tests - Scalar opX_exRHS, MopX_exRHS, MopTX_exRHS; + Scalar opX_exRHS; + Scalar MopX_exRHS; + Scalar MopTX_exRHS; { op->apply(*X_ex, *X); @@ -223,31 +241,37 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg out << "Unpreconditioned Krylov method\n"; out << "*********************************************************\n\n"; - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Unpreconditioned solve"))); + Belos::ReturnType ret; + int numIts; + { + comm->barrier(); + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Unpreconditioned solve"))); - using MV = typename HOp::mv_type; - using OP = Belos::OperatorT; + using MV = typename HOp::mv_type; + using OP = Belos::OperatorT; - X->putScalar(zero); - RCP belosOp = rcp(new Belos::XpetraOp(op)); - RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, RHS)); + X->putScalar(zero); + RCP belosOp = rcp(new Belos::XpetraOp(op)); + RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, RHS)); - std::string belosType = "Pseudoblock CG"; - auto belosSolverList = rcpFromRef(belosParams.sublist(belosType)); + std::string belosType = "Pseudoblock CG"; + auto belosSolverList = rcpFromRef(belosParams.sublist(belosType)); - bool set = belosProblem->setProblem(); - if (set == false) { - throw MueLu::Exceptions::RuntimeError("ERROR: Belos::LinearProblem failed to set up correctly!"); - } + bool set = belosProblem->setProblem(); + if (!set) { + throw MueLu::Exceptions::RuntimeError("ERROR: Belos::LinearProblem failed to set up correctly!"); + } - // Create an iterative solver manager - Belos::SolverFactory solverFactory; - RCP > solver = solverFactory.create(belosType, belosSolverList); - solver->setProblem(belosProblem); + // Create an iterative solver manager + Belos::SolverFactory solverFactory; + RCP > solver = solverFactory.create(belosType, belosSolverList); + solver->setProblem(belosProblem); - // Perform solve - Belos::ReturnType ret = solver->solve(); - int numIts = solver->getNumIters(); + // Perform solve + ret = solver->solve(); + numIts = solver->getNumIters(); + comm->barrier(); + } // Get the number of iterations for this solve. out << "Number of iterations performed for this solve: " << numIts << std::endl; @@ -257,14 +281,16 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg out << "|X-X_ex| = " << X->getVector(0)->norm2() << std::endl << std::endl; - success &= (ret == Belos::Converged); + if (failOnUnPrecSolve) + success &= (ret == Belos::Converged); } #endif // HAVE_MUELU_BELOS if (doPrecSolve) { // Solve linear system using a AMG preconditioned Krylov method - RCP auxH, H; + RCP auxH; + RCP H; { //////////////////////////////////////////////////////////////// @@ -273,6 +299,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg out << "Building the auxiliary hierarchy\n"; out << "*********************************************************\n\n"; + comm->barrier(); Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Construct auxiliary hierarchy"))); Teuchos::ParameterList auxParams; @@ -284,6 +311,7 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg 2 * comm->getSize())); auxH = MueLu::CreateXpetraPreconditioner(auxOp, auxParams); + comm->barrier(); } { @@ -293,13 +321,15 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg out << "Building the main hierarchy\n"; out << "*********************************************************\n\n"; + comm->barrier(); Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Construct hierarchy"))); Teuchos::ParameterList params; Teuchos::updateParametersFromXmlFileAndBroadcast(xmlMueLu, Teuchos::Ptr(¶ms), *comm); params.sublist("user data").set("Coordinates", coords); - H = MueLu::constructHierarchyFromAuxiliary(Teuchos::rcp_dynamic_cast(op, true), auxH, params, out); + H = MueLu::constructHierarchyFromAuxiliary(op, auxH, params, out); + comm->barrier(); } #ifdef HAVE_MUELU_BELOS @@ -307,34 +337,40 @@ int main_(Teuchos::CommandLineProcessor &clp, Xpetra::UnderlyingLib lib, int arg //////////////////////////////////////////////////////////////// // Set up the Krylov solver - Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Preconditioned solve"))); + Belos::ReturnType ret; + int numIts; + { + comm->barrier(); + Teuchos::TimeMonitor tM(*Teuchos::TimeMonitor::getNewTimer(std::string("Preconditioned solve"))); - using MV = typename HOp::mv_type; - using OP = Belos::OperatorT; + using MV = typename HOp::mv_type; + using OP = Belos::OperatorT; - X->putScalar(zero); - RCP belosOp = rcp(new Belos::XpetraOp(op)); - RCP belosPrec = rcp(new Belos::MueLuOp(H)); - RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, RHS)); + X->putScalar(zero); + RCP belosOp = rcp(new Belos::XpetraOp(op)); + RCP belosPrec = rcp(new Belos::MueLuOp(H)); + RCP > belosProblem = rcp(new Belos::LinearProblem(belosOp, X, RHS)); - std::string belosType = "Pseudoblock CG"; - auto belosSolverList = rcpFromRef(belosParams.sublist(belosType)); + std::string belosType = "Pseudoblock CG"; + auto belosSolverList = rcpFromRef(belosParams.sublist(belosType)); - belosProblem->setRightPrec(belosPrec); + belosProblem->setRightPrec(belosPrec); - bool set = belosProblem->setProblem(); - if (set == false) { - throw MueLu::Exceptions::RuntimeError("ERROR: Belos::LinearProblem failed to set up correctly!"); - } + bool set = belosProblem->setProblem(); + if (!set) { + throw MueLu::Exceptions::RuntimeError("ERROR: Belos::LinearProblem failed to set up correctly!"); + } - // Create an iterative solver manager - Belos::SolverFactory solverFactory; - RCP > solver = solverFactory.create(belosType, belosSolverList); - solver->setProblem(belosProblem); + // Create an iterative solver manager + Belos::SolverFactory solverFactory; + RCP > solver = solverFactory.create(belosType, belosSolverList); + solver->setProblem(belosProblem); - // Perform solve - Belos::ReturnType ret = solver->solve(); - int numIts = solver->getNumIters(); + // Perform solve + ret = solver->solve(); + numIts = solver->getNumIters(); + comm->barrier(); + } // Get the number of iterations for this solve. out << "Number of iterations performed for this solve: " << numIts << std::endl; diff --git a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp index ecbff335f524..07653aef90e3 100644 --- a/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/HybridAggregation/MueLu_HybridAggregationFactory_def.hpp @@ -103,7 +103,9 @@ RCP HybridAggregationFactory::magnitudeType; + public: //! @name Constructors/Destructors. //@{ //! Constructor. - ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal = true, const GlobalOrdinal expectedNNZperRow = -1); + ThresholdAFilterFactory(const std::string& ename, const magnitudeType threshold, const bool keepDiagonal = true, const GlobalOrdinal expectedNNZperRow = -1); //! Input //@{ @@ -96,8 +99,8 @@ class ThresholdAFilterFactory : public SingleLevelFactoryBase { //@} private: - std::string varName_; ///< name of input and output variable - const Scalar threshold_; ///< threshold parameter + std::string varName_; ///< name of input and output variable + const magnitudeType threshold_; ///< threshold parameter const bool keepDiagonal_; const GlobalOrdinal expectedNNZperRow_; diff --git a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp index ad8621640bb4..020fc348b63d 100644 --- a/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp +++ b/packages/muelu/src/Misc/MueLu_ThresholdAFilterFactory_def.hpp @@ -57,7 +57,7 @@ namespace MueLu { template -ThresholdAFilterFactory::ThresholdAFilterFactory(const std::string& ename, const Scalar threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) +ThresholdAFilterFactory::ThresholdAFilterFactory(const std::string& ename, const magnitudeType threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) : varName_(ename) , threshold_(threshold) , keepDiagonal_(keepDiagonal) diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp index 74cced80577e..039b8bef04b6 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_decl.hpp @@ -110,7 +110,7 @@ class UtilitiesBase { NOTE -- it's assumed that A has been fillComplete'd. */ - static RCP GetThresholdedMatrix(const RCP& Ain, const Scalar threshold, const bool keepDiagonal = true, const GlobalOrdinal expectedNNZperRow = -1); + static RCP GetThresholdedMatrix(const RCP& Ain, const Magnitude threshold, const bool keepDiagonal = true, const GlobalOrdinal expectedNNZperRow = -1); /*! @brief Threshold a graph diff --git a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp index f11695d05936..f5d9f4c40615 100644 --- a/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp +++ b/packages/muelu/src/Utils/MueLu_UtilitiesBase_def.hpp @@ -68,6 +68,7 @@ #include #include "MueLu_Exceptions.hpp" +#include "Xpetra_CrsMatrixFactory.hpp" #include #include @@ -83,10 +84,124 @@ UtilitiesBase:: return rcp(new CrsMatrixWrap(Op)); } +template +Teuchos::RCP> +removeSmallEntries(Teuchos::RCP>& A, + const typename Teuchos::ScalarTraits::magnitudeType threshold, + const bool keepDiagonal) { + using crs_matrix = Xpetra::CrsMatrix; + using row_ptr_type = typename crs_matrix::local_graph_type::row_map_type::non_const_type; + using col_idx_type = typename crs_matrix::local_graph_type::entries_type::non_const_type; + using vals_type = typename crs_matrix::local_matrix_type::values_type; + using execution_space = typename crs_matrix::local_matrix_type::execution_space; + + using ATS = Kokkos::ArithTraits; + using impl_SC = typename ATS::val_type; + using impl_ATS = Kokkos::ArithTraits; + + auto lclA = A->getLocalMatrixDevice(); + + auto rowptr = row_ptr_type("rowptr", lclA.numRows() + 1); + col_idx_type idx; + vals_type vals; + LocalOrdinal nnz; + + if (keepDiagonal) { + auto lclRowMap = A->getRowMap()->getLocalMap(); + auto lclColMap = A->getColMap()->getLocalMap(); + Kokkos::parallel_scan( + "removeSmallEntries::rowptr", + Kokkos::RangePolicy(0, lclA.numRows()), + KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) { + auto row = lclA.row(rlid); + auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid)); + for (LocalOrdinal k = 0; k < row.length; ++k) { + if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) { + partial_nnz += 1; + } + } + if (is_final) + rowptr(rlid + 1) = partial_nnz; + }, + nnz); + + idx = col_idx_type("idx", nnz); + vals = vals_type("vals", nnz); + + Kokkos::parallel_for( + "removeSmallEntries::indicesValues", + Kokkos::RangePolicy(0, lclA.numRows()), + KOKKOS_LAMBDA(const LocalOrdinal rlid) { + auto row = lclA.row(rlid); + auto rowInCol = lclColMap.getLocalElement(lclRowMap.getGlobalElement(rlid)); + auto I = rowptr(rlid); + for (LocalOrdinal k = 0; k < row.length; ++k) { + if ((impl_ATS::magnitude(row.value(k)) > threshold) || (row.colidx(k) == rowInCol)) { + idx(I) = row.colidx(k); + vals(I) = row.value(k); + I += 1; + } + } + }); + + Kokkos::fence(); + } else { + Kokkos::parallel_scan( + "removeSmallEntries::rowptr", + Kokkos::RangePolicy(0, lclA.numRows()), + KOKKOS_LAMBDA(const LocalOrdinal rlid, LocalOrdinal& partial_nnz, bool is_final) { + auto row = lclA.row(rlid); + for (LocalOrdinal k = 0; k < row.length; ++k) { + if (impl_ATS::magnitude(row.value(k)) > threshold) { + partial_nnz += 1; + } + } + if (is_final) + rowptr(rlid + 1) = partial_nnz; + }, + nnz); + + idx = col_idx_type("idx", nnz); + vals = vals_type("vals", nnz); + + Kokkos::parallel_for( + "removeSmallEntries::indicesValues", + Kokkos::RangePolicy(0, lclA.numRows()), + KOKKOS_LAMBDA(const LocalOrdinal rlid) { + auto row = lclA.row(rlid); + auto I = rowptr(rlid); + for (LocalOrdinal k = 0; k < row.length; ++k) { + if (impl_ATS::magnitude(row.value(k)) > threshold) { + idx(I) = row.colidx(k); + vals(I) = row.value(k); + I += 1; + } + } + }); + + Kokkos::fence(); + } + + auto lclNewA = typename crs_matrix::local_matrix_type("thresholdedMatrix", lclA.numRows(), lclA.numCols(), nnz, vals, rowptr, idx); + auto newA = Xpetra::MatrixFactory::Build(lclNewA, A->getRowMap(), A->getColMap(), A->getDomainMap(), A->getRangeMap()); + + return newA; +} + template RCP> UtilitiesBase:: - GetThresholdedMatrix(const RCP>& Ain, const Scalar threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) { + GetThresholdedMatrix(const RCP>& Ain, const typename Teuchos::ScalarTraits::magnitudeType threshold, const bool keepDiagonal, const GlobalOrdinal expectedNNZperRow) { + auto crsWrap = rcp_dynamic_cast(Ain); + if (!crsWrap.is_null()) { + auto crsMat = crsWrap->getCrsMatrix(); + auto filteredMat = removeSmallEntries(crsMat, threshold, keepDiagonal); + return rcp_static_cast(filteredMat); + } + RCP rowmap = Ain->getRowMap(); RCP colmap = Ain->getColMap(); RCP Aout = rcp(new CrsMatrixWrap(rowmap, expectedNNZperRow <= 0 ? Ain->getGlobalMaxNumRowEntries() : expectedNNZperRow)); diff --git a/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory.cpp b/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory.cpp index 22672df3d27d..70f6d62e8968 100644 --- a/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory.cpp +++ b/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory.cpp @@ -42,8 +42,6 @@ #include "PanzerAdaptersSTK_config.hpp" -#ifdef PANZER_HAVE_EPETRA_STACK - #include "Panzer_STK_ModelEvaluatorFactory.hpp" #include "Panzer_STK_ModelEvaluatorFactory_impl.hpp" @@ -52,5 +50,3 @@ namespace panzer_stk { template class ModelEvaluatorFactory; } - -#endif // PANZER_HAVE_EPETRA_STACK \ No newline at end of file diff --git a/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory.hpp b/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory.hpp index 1e45add02200..baaa034bd725 100644 --- a/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory.hpp +++ b/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory.hpp @@ -117,8 +117,8 @@ namespace panzer_stk { Teuchos::RCP getValidParameters() const; //@} +#ifdef PANZER_HAVE_EPETRA_STACK /** \brief Builds the model evaluators for a panzer assembly - \param[in] comm (Required) Teuchos communicator. Must be non-null. \param[in] global_data (Required) A fully constructed (all members allocated) global data object used to control parameter library and output support. Must be non-null. \param[in] eqset_factory (Required) Equation set factory to provide user defined equation sets. @@ -131,6 +131,7 @@ namespace panzer_stk { const panzer::BCStrategyFactory & bc_factory, const panzer::ClosureModelFactory_TemplateManager & cm_factory, bool meConstructionOn=true); +#endif Teuchos::RCP > getPhysicsModelEvaluator(); @@ -146,9 +147,11 @@ namespace panzer_stk { template int addResponse(const std::string & responseName,const std::vector & wkstDesc,const BuilderT & builder); +#ifdef PANZER_HAVE_EPETRA_STACK void buildResponses(const panzer::ClosureModelFactory_TemplateManager& cm_factory, const bool write_graphviz_file=false, const std::string& graphviz_file_prefix=""); +#endif Teuchos::RCP > getResponseOnlyModelEvaluator(); @@ -196,6 +199,7 @@ namespace panzer_stk { bool isTransient() const { return m_is_transient; } +#ifdef PANZER_HAVE_EPETRA_STACK /** Clone the internal model evaluator, but use new physics blocks. Note that * the physics blocks must be in some sense compatible with the original set. */ @@ -208,6 +212,7 @@ namespace panzer_stk { bool is_transient,bool is_explicit, const Teuchos::Ptr & bc_list=Teuchos::null, const Teuchos::RCP > & physics_me=Teuchos::null) const; +#endif /** \brief Setup the initial conditions in a model evaluator. Note that this * is entirely self contained. @@ -236,6 +241,7 @@ namespace panzer_stk { const Teuchos::ParameterList & user_data_pl, int workset_size) const; +#ifdef PANZER_HAVE_EPETRA_STACK /** This method is to assist with construction of the model evaluators. */ Teuchos::RCP > @@ -248,6 +254,7 @@ namespace panzer_stk { const Teuchos::RCP > & solverFactory, const Teuchos::RCP & global_data, bool is_transient,double t_init) const; +#endif bool useDynamicCoordinates() const diff --git a/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory_impl.hpp b/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory_impl.hpp index 5073e57808c1..f3ab9cf6a107 100644 --- a/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory_impl.hpp +++ b/packages/panzer/adapters-stk/src/Panzer_STK_ModelEvaluatorFactory_impl.hpp @@ -61,12 +61,9 @@ #include "Panzer_BlockedDOFManagerFactory.hpp" #include "Panzer_LinearObjFactory.hpp" #include "Panzer_TpetraLinearObjFactory.hpp" -#include "Panzer_EpetraLinearObjContainer.hpp" #include "Panzer_ThyraObjContainer.hpp" -#include "Panzer_BlockedEpetraLinearObjFactory.hpp" #include "Panzer_BlockedTpetraLinearObjFactory.hpp" #include "Panzer_InitialCondition_Builder.hpp" -#include "Panzer_ModelEvaluator_Epetra.hpp" #include "Panzer_ModelEvaluator.hpp" #include "Panzer_ElementBlockIdToPhysicsIdMap.hpp" #include "Panzer_WorksetContainer.hpp" @@ -104,7 +101,6 @@ #include // for std::getenv // Piro solver objects -#include "Thyra_EpetraModelEvaluator.hpp" #include "Piro_ConfigDefs.hpp" #include "Piro_NOXSolver.hpp" #include "Piro_LOCASolver.hpp" @@ -112,6 +108,13 @@ #include "Piro_TempusSolverForwardOnly.hpp" #endif +#ifdef PANZER_HAVE_EPETRA_STACK +#include "Panzer_EpetraLinearObjContainer.hpp" +#include "Thyra_EpetraModelEvaluator.hpp" +#include "Panzer_ModelEvaluator_Epetra.hpp" +#include "Panzer_BlockedEpetraLinearObjFactory.hpp" +#endif + #include namespace panzer_stk { @@ -245,6 +248,7 @@ namespace panzer_stk { } } // namespace +#ifdef PANZER_HAVE_EPETRA_STACK template void ModelEvaluatorFactory::buildObjects(const Teuchos::RCP >& comm, const Teuchos::RCP& global_data, @@ -779,6 +783,7 @@ namespace panzer_stk { m_physics_me = thyra_me; } +#endif // ifdef PANZER_HAVE_EPETRA_STACK template void ModelEvaluatorFactory:: @@ -1334,6 +1339,7 @@ namespace panzer_stk { return fmb; } +#ifdef PANZER_HAVE_EPETRA_STACK template Teuchos::RCP > ModelEvaluatorFactory:: @@ -1468,7 +1474,9 @@ namespace panzer_stk { return thyra_me; } } +#endif // ifdef PANZER_HAVE_EPETRA_STACK +#ifdef PANZER_HAVE_EPETRA_STACK template Teuchos::RCP > ModelEvaluatorFactory:: @@ -1500,6 +1508,7 @@ namespace panzer_stk { return thyra_me; } +#endif // ifdef PANZER_HAVE_EPETRA_STACK template double ModelEvaluatorFactory:: @@ -1602,6 +1611,7 @@ namespace panzer_stk { ); } +#ifdef PANZER_HAVE_EPETRA_STACK template void ModelEvaluatorFactory:: buildResponses(const panzer::ClosureModelFactory_TemplateManager & cm_factory, @@ -1642,6 +1652,7 @@ namespace panzer_stk { TEUCHOS_ASSERT(false); } +#endif // ifdef PANZER_HAVE_EPETRA_STACK } #endif diff --git a/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in b/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in index 4ea6b6dfe5b5..75a74ac1fc6a 100644 --- a/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in +++ b/packages/shylu/shylu_node/tacho/cmake/Tacho_config.h.in @@ -34,6 +34,9 @@ Sandia National Laboratories, Albuquerque, NM, USA /* Define if want to build with BLAS enabled */ #cmakedefine TACHO_HAVE_BLAS +/* Define if want to build with CuSparse enabled */ +#cmakedefine TACHO_HAVE_CUSPARSE + /* Define if want to build with METIS enabled */ #cmakedefine TACHO_HAVE_METIS diff --git a/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp b/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp index e0905f9d3180..f1407968ea54 100644 --- a/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp +++ b/packages/shylu/shylu_node/tacho/example/Tacho_ExampleDriver.hpp @@ -42,6 +42,7 @@ template int driver(int argc, char *argv[]) { int device_solve_thres = 128; int variant = 0; int nstreams = 8; + bool no_warmup = false; int nfacts = 2; int nsolves = 10; @@ -62,6 +63,7 @@ template int driver(int argc, char *argv[]) { opts.set_option("device-solve-thres", "Device function is used above this subproblem size", &device_solve_thres); opts.set_option("variant", "algorithm variant in levelset scheduling; 0, 1 and 2", &variant); opts.set_option("nstreams", "# of streams used in CUDA; on host, it is ignored", &nstreams); + opts.set_option("no-warmup", "Flag to turn off warmup", &no_warmup); opts.set_option("nfacts", "# of factorizations to perform", &nfacts); opts.set_option("nsolves", "# of solves to perform", &nsolves); @@ -92,7 +94,8 @@ template int driver(int argc, char *argv[]) { Tacho::printExecSpaceConfiguration("HostSpace", detail); std::cout << " Method Name:: " << method_name << std::endl; std::cout << " Solver Type:: " << variant << std::endl; - std::cout << " # Streams:: " << nstreams; + std::cout << " # Streams:: " << nstreams << std::endl; + std::cout << " # RHSs:: " << nrhs; std::cout << std::endl << " --------------------- " << std::endl << std::endl; int r_val = 0; @@ -192,6 +195,10 @@ template int driver(int argc, char *argv[]) { double initi_time = timer.seconds(); /// symbolic structure can be reused + if (!no_warmup) { + // warm-up + solver.factorize(values_on_device); + } timer.reset(); for (int i = 0; i < nfacts; ++i) { solver.factorize(values_on_device); @@ -213,8 +220,18 @@ template int driver(int argc, char *argv[]) { } } - std::cout << std::endl; double solve_time = 0.0; + if (!no_warmup) { + // warm-up + timer.reset(); + solver.solve(x, b, t); + solve_time = timer.seconds(); + const double res = solver.computeRelativeResidual(values_on_device, x, b); + std::cout << "TachoSolver (warm-up): residual = " << res << " time " << solve_time << "\n"; + } + std::cout << std::endl; + + solve_time = 0.0; for (int i = 0; i < nsolves; ++i) { timer.reset(); solver.solve(x, b, t); @@ -229,7 +246,6 @@ template int driver(int argc, char *argv[]) { std::cout << " Facto Time " << facto_time / (double)nfacts << std::endl; std::cout << " Solve Time " << solve_time / (double)nsolves << std::endl; std::cout << std::endl; - solver.release(); } catch (const std::exception &e) { std::cerr << "Error: exception is caught: \n" << e.what() << "\n"; diff --git a/packages/shylu/shylu_node/tacho/src/CMakeLists.txt b/packages/shylu/shylu_node/tacho/src/CMakeLists.txt index 228ee88b281d..200158736430 100644 --- a/packages/shylu/shylu_node/tacho/src/CMakeLists.txt +++ b/packages/shylu/shylu_node/tacho/src/CMakeLists.txt @@ -10,6 +10,8 @@ SET(TACHO_HAVE_MKL ${TPL_ENABLE_MKL}) SET(TACHO_HAVE_LAPACK ${TPL_ENABLE_LAPACK}) SET(TACHO_HAVE_BLAS ${TPL_ENABLE_BLAS}) +SET(TACHO_HAVE_CUSPARSE ${TPL_ENABLE_CUSPARSE}) + SET(TACHO_HAVE_METIS ${TPL_ENABLE_METIS}) SET(TACHO_HAVE_METIS_MT ${TPL_ENABLE_MTMETIS}) SET(TACHO_HAVE_SCOTCH ${TPL_ENABLE_Scotch}) diff --git a/packages/shylu/shylu_node/tacho/src/Tacho_CuSolver.hpp b/packages/shylu/shylu_node/tacho/src/Tacho_CuSolver.hpp index 67d7fe2c603b..dec061058314 100644 --- a/packages/shylu/shylu_node/tacho/src/Tacho_CuSolver.hpp +++ b/packages/shylu/shylu_node/tacho/src/Tacho_CuSolver.hpp @@ -57,7 +57,9 @@ class CuSolver { private: cusolverSpHandle_t _handle; csrcholInfo_t _chol_info; +#if defined(TACHO_HAVE_CUSPARSE) cusparseMatDescr_t _desc; +#endif int _status; ordinal_type _m; @@ -81,12 +83,18 @@ class CuSolver { checkStatus("cusolverSpCreate"); _status = cusolverSpCreateCsrcholInfo(&_chol_info); checkStatus("cusolverSpCreateCsrcholInfo"); +#if defined(TACHO_HAVE_CUSPARSE) _status = cusparseCreateMatDescr(&_desc); checkStatus("cusparseCreateMatDescr"); +#else + std::logic_error("CuSparse is not enabled"); +#endif } virtual ~CuSolver() { +#if defined(TACHO_HAVE_CUSPARSE) _status = cusparseDestroyMatDescr(_desc); checkStatus("cusparseDestroyMatDescr"); +#endif _status = cusolverSpDestroyCsrcholInfo(_chol_info); checkStatus("cusolverSpDestroyCsrcholInfo"); _status = cusolverSpDestroy(_handle); @@ -132,7 +140,9 @@ class CuSolver { const double t_copy = timer.seconds(); timer.reset(); +#if defined(TACHO_HAVE_CUSPARSE) _status = cusolverSpXcsrcholAnalysis(_handle, _m, _nnz, _desc, _ap_ordinal.data(), _aj.data(), _chol_info); +#endif checkStatus("cusolverSpXcsrcholAnalysis"); Kokkos::fence(); const double t_analyze = timer.seconds(); @@ -160,9 +170,11 @@ class CuSolver { Kokkos::Timer timer; timer.reset(); - size_t internalDataInBytes, workspaceInBytes; + size_t internalDataInBytes = 0, workspaceInBytes = 0; +#if defined(TACHO_HAVE_CUSPARSE) _status = cusolverSpDcsrcholBufferInfo(_handle, _m, _nnz, _desc, ax.data(), _ap_ordinal.data(), _aj.data(), _chol_info, &internalDataInBytes, &workspaceInBytes); +#endif checkStatus("cusolverSpDcsrcholBufferInfo"); const size_t bufsize = workspaceInBytes / sizeof(value_type); @@ -172,8 +184,10 @@ class CuSolver { const double t_alloc = timer.seconds(); timer.reset(); +#if defined(TACHO_HAVE_CUSPARSE) _status = cusolverSpDcsrcholFactor(_handle, _m, _nnz, _desc, ax.data(), _ap_ordinal.data(), _aj.data(), _chol_info, _buf.data()); +#endif checkStatus("cusolverSpDcsrcholFactor"); Kokkos::fence(); const double t_factor = timer.seconds(); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp index e4f26f30489a..2f0470e9c76e 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_Driver_Impl.hpp @@ -150,8 +150,13 @@ void Driver::setLevelSetOptionDeviceFunctionThreshold(const ordinal_type } template void Driver::setLevelSetOptionAlgorithmVariant(const ordinal_type variant) { - if (variant > 2 || variant < 0) { - std::logic_error("levelset algorithm variants range from 0 to 2"); +#if !defined(TACHO_HAVE_CUSPARSE) && !defined(KOKKOS_ENABLE_HIP) + if (variant == 3) { + TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, "variant 3 requires CuSparse or rocSparce"); + } +#endif + if (variant > 3 || variant < 0) { + TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, "levelset algorithm variants range from 0 to 3"); } _variant = variant; } diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Factory.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Factory.hpp index 4ee33d449ad4..36d4319d6055 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Factory.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_Factory.hpp @@ -135,6 +135,7 @@ template class NumericToolsFactory; using numeric_tools_levelset_var1_type = NumericToolsLevelSet; using numeric_tools_levelset_var2_type = NumericToolsLevelSet; + using numeric_tools_levelset_var3_type = NumericToolsLevelSet; TACHO_NUMERIC_TOOLS_FACTORY_BASE_USING; TACHO_NUMERIC_TOOLS_FACTORY_BASE_MEMBER; @@ -178,6 +179,7 @@ template class NumericToolsFactory; using numeric_tools_levelset_var1_type = NumericToolsLevelSet; using numeric_tools_levelset_var2_type = NumericToolsLevelSet; + using numeric_tools_levelset_var3_type = NumericToolsLevelSet; TACHO_NUMERIC_TOOLS_FACTORY_BASE_USING; TACHO_NUMERIC_TOOLS_FACTORY_BASE_MEMBER; @@ -247,6 +249,7 @@ template class NumericToolsFactory; using numeric_tools_levelset_var1_type = NumericToolsLevelSet; using numeric_tools_levelset_var2_type = NumericToolsLevelSet; + using numeric_tools_levelset_var3_type = NumericToolsLevelSet; TACHO_NUMERIC_TOOLS_FACTORY_BASE_USING; TACHO_NUMERIC_TOOLS_FACTORY_BASE_MEMBER; @@ -288,6 +291,14 @@ template class NumericToolsFactory::type> using numeric_tools_levelset_var0_type = NumericToolsLevelSet; using numeric_tools_levelset_var1_type = NumericToolsLevelSet; using numeric_tools_levelset_var2_type = NumericToolsLevelSet; + using numeric_tools_levelset_var3_type = NumericToolsLevelSet; TACHO_NUMERIC_TOOLS_FACTORY_BASE_USING; TACHO_NUMERIC_TOOLS_FACTORY_BASE_MEMBER; @@ -348,6 +360,14 @@ class NumericToolsFactory::type> TACHO_NUMERIC_TOOLS_FACTORY_LEVELSET_BODY(numeric_tools_levelset_var2_type); break; } + case 3: { + if (_method == 1 || _method == 3) { + TACHO_NUMERIC_TOOLS_FACTORY_LEVELSET_BODY(numeric_tools_levelset_var3_type); + } else { + TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, "Invalid variant input"); + } + break; + } default: { TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, "Invalid variant input"); break; @@ -368,6 +388,7 @@ class NumericToolsFactory; using numeric_tools_levelset_var1_type = NumericToolsLevelSet; using numeric_tools_levelset_var2_type = NumericToolsLevelSet; + using numeric_tools_levelset_var3_type = NumericToolsLevelSet; TACHO_NUMERIC_TOOLS_FACTORY_BASE_USING; TACHO_NUMERIC_TOOLS_FACTORY_BASE_MEMBER; diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp index e1eaa30c5743..c6552b8af1b0 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_NumericTools_LevelSet.hpp @@ -56,6 +56,7 @@ Sandia National Laboratories, Albuquerque, NM, USA #include "Tacho_Trsv_OnDevice.hpp" #include "Tacho_SupernodeInfo.hpp" +#include "Tacho_TeamFunctor_ExtractCRS.hpp" #include "Tacho_TeamFunctor_FactorizeChol.hpp" #include "Tacho_TeamFunctor_SolveLowerChol.hpp" @@ -164,10 +165,21 @@ class NumericToolsLevelSet : public NumericToolsBase { // cuda stream int _nstreams; + + // workspace for SpMV + value_type_matrix _w_vec; + value_type_array buffer_U; + value_type_array buffer_L; #if defined(KOKKOS_ENABLE_CUDA) bool _is_cublas_created, _is_cusolver_dn_created; cublasHandle_t _handle_blas; cusolverDnHandle_t _handle_lapack; + #if defined(TACHO_HAVE_CUSPARSE) + // workspace for SpMV + cusparseDnMatDescr_t matT, matW; + cusparseDnVecDescr_t vecT, vecW; + #endif + using blas_handle_type = cublasHandle_t; using lapack_handle_type = cusolverDnHandle_t; using stream_array_host = std::vector; @@ -178,6 +190,10 @@ class NumericToolsLevelSet : public NumericToolsBase { rocblas_handle _handle_blas; rocblas_handle _handle_lapack; std::vector _handles; + // workspace for SpMV + rocsparse_dnmat_descr matT, matW; + rocsparse_dnvec_descr vecT, vecW; + using blas_handle_type = rocblas_handle; using lapack_handle_type = rocblas_handle; using stream_array_host = std::vector; @@ -287,7 +303,7 @@ class NumericToolsLevelSet : public NumericToolsBase { flop += DenseFlopCount::Chol(m); if (variant == 1) { flop += DenseFlopCount::Trsm(true, m, m); - } else if (variant == 2) { + } else if (variant == 2 || variant == 3) { flop += DenseFlopCount::Trsm(true, m, m); flop += DenseFlopCount::Trsm(true, m, n); } @@ -303,7 +319,7 @@ class NumericToolsLevelSet : public NumericToolsBase { flop += DenseFlopCount::LDL(m); if (variant == 1) { flop += DenseFlopCount::Trsm(true, m, m); - } else if (variant == 2) { + } else if (variant == 2 || variant == 3) { flop += DenseFlopCount::Trsm(true, m, m); flop += DenseFlopCount::Trsm(true, m, n); } @@ -319,7 +335,7 @@ class NumericToolsLevelSet : public NumericToolsBase { flop += DenseFlopCount::LU(m, m); if (variant == 1) { flop += 2 * DenseFlopCount::Trsm(true, m, m); - } else if (variant == 2) { + } else if (variant == 2 || variant == 3) { flop += 2 * DenseFlopCount::Trsm(true, m, m); flop += 2 * DenseFlopCount::Trsm(true, m, n); } @@ -351,6 +367,7 @@ class NumericToolsLevelSet : public NumericToolsBase { } public: + /// /// initialization / release /// @@ -437,24 +454,29 @@ class NumericToolsLevelSet : public NumericToolsBase { const auto s = _h_supernodes(sid); const ordinal_type m = s.m, n = s.n, n_m = n - m; const ordinal_type schur_work_size = n_m * (n_m + max_factor_team_size); - const ordinal_type chol_factor_work_size_variants[3] = {schur_work_size, max(m * m, schur_work_size), + const ordinal_type chol_factor_work_size_variants[4] = {schur_work_size, max(m * m, schur_work_size), + m * m + schur_work_size, m * m + schur_work_size}; const ordinal_type chol_factor_work_size = chol_factor_work_size_variants[variant]; const ordinal_type ldl_factor_work_size_variant_0 = chol_factor_work_size_variants[0] + max(32 * m, m * n); - const ordinal_type ldl_factor_work_size_variants[3] = {ldl_factor_work_size_variant_0, + const ordinal_type ldl_factor_work_size_variants[4] = {ldl_factor_work_size_variant_0, max(m * m, ldl_factor_work_size_variant_0 + m * n_m), + m * m + ldl_factor_work_size_variant_0 + m * n_m, m * m + ldl_factor_work_size_variant_0 + m * n_m}; const ordinal_type ldl_factor_work_size = ldl_factor_work_size_variants[variant]; - const ordinal_type lu_factor_work_size_variants[3] = {schur_work_size, max(m * m, schur_work_size), + const ordinal_type lu_factor_work_size_variants[4] = {schur_work_size, max(m * m, schur_work_size), + m * m + schur_work_size, m * m + schur_work_size}; const ordinal_type lu_factor_work_size = lu_factor_work_size_variants[variant]; - const ordinal_type factor_work_size_variants[3] = {chol_factor_work_size, ldl_factor_work_size, + const ordinal_type factor_work_size_variants[4] = {chol_factor_work_size, ldl_factor_work_size, + lu_factor_work_size, lu_factor_work_size}; const ordinal_type chol_solve_work_size = (variant == 0 ? n_m : n); const ordinal_type ldl_solve_work_size = chol_solve_work_size; const ordinal_type lu_solve_work_size = chol_solve_work_size; - const ordinal_type solve_work_size_variants[3] = {chol_solve_work_size, ldl_solve_work_size, + const ordinal_type solve_work_size_variants[4] = {chol_solve_work_size, ldl_solve_work_size, + lu_solve_work_size, lu_solve_work_size}; const ordinal_type index_work_size = this->getSolutionMethod() - 1; @@ -513,7 +535,7 @@ class NumericToolsLevelSet : public NumericToolsBase { _device_level_cut = min(device_level_cut, _nlevel); _device_factorize_thres = device_factorize_thres; - _device_solve_thres = device_solve_thres; + _device_solve_thres = (variant == 3 ? 0 : device_solve_thres); _h_factorize_mode = ordinal_type_array_host(do_not_initialize_tag("h_factorize_mode"), _nsupernodes); Kokkos::deep_copy(_h_factorize_mode, -1); @@ -967,7 +989,7 @@ class NumericToolsLevelSet : public NumericToolsBase { factorizeCholeskyOnDeviceVar0(pbeg, pend, h_buf_factor_ptr, work); else if (variant == 1) factorizeCholeskyOnDeviceVar1(pbeg, pend, h_buf_factor_ptr, work); - else if (variant == 2) + else if (variant == 2 || variant == 3) factorizeCholeskyOnDeviceVar2(pbeg, pend, h_buf_factor_ptr, work); else { TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, @@ -1238,7 +1260,7 @@ class NumericToolsLevelSet : public NumericToolsBase { factorizeLDL_OnDeviceVar0(pbeg, pend, h_buf_factor_ptr, work); else if (variant == 1) factorizeLDL_OnDeviceVar1(pbeg, pend, h_buf_factor_ptr, work); - else if (variant == 2) + else if (variant == 2 || variant == 3) factorizeLDL_OnDeviceVar2(pbeg, pend, h_buf_factor_ptr, work); else { TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, @@ -1505,7 +1527,7 @@ class NumericToolsLevelSet : public NumericToolsBase { factorizeLU_OnDeviceVar0(pbeg, pend, h_buf_factor_ptr, work); else if (variant == 1) factorizeLU_OnDeviceVar1(pbeg, pend, h_buf_factor_ptr, work); - else if (variant == 2) + else if (variant == 2 || variant == 3) factorizeLU_OnDeviceVar2(pbeg, pend, h_buf_factor_ptr, work); else { TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, @@ -1513,6 +1535,403 @@ class NumericToolsLevelSet : public NumericToolsBase { } } + inline void extractCRS(bool lu) { +#if (defined(KOKKOS_ENABLE_CUDA) && defined(TACHO_HAVE_CUSPARSE)) || \ + defined(KOKKOS_ENABLE_HIP) + + const ordinal_type nrhs = 1; + const ordinal_type m = _m; + const value_type one(1); + const value_type zero(0); + + // ======================== + // workspace + Kokkos::resize(_w_vec, m, nrhs); + int ldw = _w_vec.stride(1); +#if defined(KOKKOS_ENABLE_CUDA) + cudaDataType computeType; + if (std::is_same::value) { + computeType = CUDA_R_64F; + } else if (std::is_same::value) { + computeType = CUDA_R_32F; + } else { + TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, + "LevelSetTools::solveCholeskyLowerOnDevice: ComputeSPMV only supported double or float"); + } + // attach to Cusparse/Rocsparse data struct + cusparseCreateDnMat(&matW, m, nrhs, ldw, (void*)(_w_vec.data()), computeType, CUSPARSE_ORDER_COL); + cusparseCreateDnVec(&vecW, m, (void*)(_w_vec.data()), computeType); + // vectors used for preprocessing +#ifdef USE_SPMM_FOR_WORKSPACE_SIZE + cusparseDnMatDescr_t vecX, vecY; + const ordinal_type ldx = _w_vec.stride(1); + cusparseCreateDnMat(&vecX, m, nrhs, ldx, _w_vec.data(), computeType, CUSPARSE_ORDER_COL); + cusparseCreateDnMat(&vecY, m, nrhs, ldx, _w_vec.data(), computeType, CUSPARSE_ORDER_COL); +#else + cusparseDnVecDescr_t vecX, vecY; + cusparseCreateDnVec(&vecX, m, _w_vec.data(), computeType); + cusparseCreateDnVec(&vecY, m, _w_vec.data(), computeType); +#endif +#elif defined(KOKKOS_ENABLE_HIP) + rocsparse_datatype rocsparse_compute_type = rocsparse_datatype_f64_r; + if (std::is_same::value) { + rocsparse_compute_type = rocsparse_datatype_f32_r; + } + // attach to Cusparse/Rocsparse data struct + rocsparse_create_dnmat_descr(&matW, m, nrhs, ldw, (void*)(_w_vec.data()), rocsparse_compute_type, rocsparse_order_column); + rocsparse_create_dnvec_descr(&vecW, m, (void*)(_w_vec.data()), rocsparse_compute_type); + // vectors used for preprocessing + rocsparse_dnvec_descr vecX, vecY; + rocsparse_create_dnvec_descr(&vecX, m, (void*)_w_vec.data(), rocsparse_compute_type); + rocsparse_create_dnvec_descr(&vecY, m, (void*)_w_vec.data(), rocsparse_compute_type); +#endif + + for (ordinal_type lvl = 0; lvl < _team_serial_level_cut; ++lvl) { + const ordinal_type pbeg = _h_level_ptr(lvl), pend = _h_level_ptr(lvl + 1); + + // the first supernode in this lvl (where the CRS matrix is stored) + auto &s0 = _h_supernodes(_h_level_sids(pbeg)); + + #define TACHO_INSERT_DIAGONALS + // NOTE: this needs extra vector-entry copy for the non-active rows at each level for solve (copy t to w, and w back to t) + // but it seems faster on AMD 250X GPU, and not much performance impact on V100 + #define TACHO_INSERT_DIAGONALS + // ======================== + // count nnz / row + Kokkos::resize(s0.rowptrU, 1+m); + typedef TeamFunctor_ExtractCrs functor_type; + + functor_type extractor_crs(_info, _solve_mode, _level_sids); + extractor_crs.setGlobalSize(m); + extractor_crs.setRange(pbeg, pend); + extractor_crs.setRowPtr(s0.rowptrU); + { + using team_policy_type = Kokkos::TeamPolicy, exec_space, + typename functor_type::ExtractPtrTag>; + team_policy_type team_policy((pend-pbeg)+1, Kokkos::AUTO()); + + Kokkos::parallel_for("extract rowptr", team_policy, extractor_crs); + exec_space().fence(); + } + + // ======================== + // shift to generate rowptr + using range_type = Kokkos::pair; + ordinal_type nnz = 0; + { + using range_policy_type = Kokkos::RangePolicy; + Kokkos::parallel_scan("shiftRowptr", range_policy_type(0, m+1), rowptr_sum(s0.rowptrU)); + exec_space().fence(); + // get nnz + auto d_nnz = Kokkos::subview(s0.rowptrU, range_type(m, m+1)); + auto h_nnz = Kokkos::create_mirror_view_and_copy(host_memory_space(), d_nnz); + nnz = h_nnz(0); + } + + // ======================== + // allocate (TODO: move to symbolic) + Kokkos::resize(s0.colindU, nnz); + Kokkos::resize(s0.nzvalsU, nnz); + + // ======================== + // extract nonzero element + extractor_crs.setCrsView(s0.colindU, s0.nzvalsU); + { + using team_policy_type = Kokkos::TeamPolicy, exec_space, + typename functor_type::ExtractValTag>; + team_policy_type team_policy((pend-pbeg)+1, Kokkos::AUTO()); + + // >> launch functor to extract nonzero entries + Kokkos::parallel_for("extract nzvals", team_policy, extractor_crs); + exec_space().fence(); + } + + { + // ======================== + // shift back (TODO: shift first to avoid this) + // copy to CPU, for now + auto h_rowptr = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.rowptrU); + for (ordinal_type i = m; i > 0 ; i--) h_rowptr(i) = h_rowptr(i-1); + h_rowptr(0) = 0; + Kokkos::deep_copy(s0.rowptrU, h_rowptr); + } + +#if defined(KOKKOS_ENABLE_HIP) + s0.spmv_explicit_transpose = true; +#else + s0.spmv_explicit_transpose = false; // okay for SpMV, though may not for SpMM +#endif + if (lu) { + s0.spmv_explicit_transpose = true; + + // get nnz per row (L is stored by column) + Kokkos::resize(s0.rowptrL, 1+m); + Kokkos::deep_copy(s0.rowptrL, 0); + ordinal_type nnz = 0; + { + using team_policy_type = Kokkos::TeamPolicy, exec_space, + typename functor_type::ExtractPtrColTag>; + team_policy_type team_policy((pend-pbeg)+1, Kokkos::AUTO()); + + extractor_crs.setRowPtr(s0.rowptrL); + Kokkos::parallel_for("extract rowptr L", team_policy, extractor_crs); + exec_space().fence(); + } + { + // convert to offset (on CPU for now) + using range_policy_type = Kokkos::RangePolicy; + Kokkos::parallel_scan("shiftRowptr L", range_policy_type(0, m+1), rowptr_sum(s0.rowptrL)); + exec_space().fence(); + // get nnz + auto d_nnz = Kokkos::subview(s0.rowptrL, range_type(m, m+1)); + auto h_nnz = Kokkos::create_mirror_view_and_copy(host_memory_space(), d_nnz); + nnz = h_nnz(0); + } + + // allocate (TODO: move to symbolic) + Kokkos::resize(s0.colindL, nnz); + Kokkos::resize(s0.nzvalsL, nnz); + + // insert nonzeros + extractor_crs.setCrsView(s0.colindL, s0.nzvalsL); + extractor_crs.setPivPtr(_piv); + { + using team_policy_type = Kokkos::TeamPolicy, exec_space, + typename functor_type::ExtractValColTag>; + team_policy_type team_policy((pend-pbeg)+1, Kokkos::AUTO()); + + // >> launch functor to extract nonzero entries + Kokkos::parallel_for("extract nzvals L", team_policy, extractor_crs); + exec_space().fence(); + } + { + // ======================== + // shift back (TODO: shift first to avoid this) + // copy to CPU, for now + auto h_rowptr = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.rowptrL); + for (ordinal_type i = m; i > 0 ; i--) h_rowptr(i) = h_rowptr(i-1); + h_rowptr(0) = 0; + Kokkos::deep_copy(s0.rowptrL, h_rowptr); + } + } else if (s0.spmv_explicit_transpose) { + // ======================== + // transpose + // >> allocate + Kokkos::resize(s0.rowptrL, 1+m); + Kokkos::resize(s0.colindL, nnz); + Kokkos::resize(s0.nzvalsL, nnz); + + Kokkos::deep_copy(s0.rowptrL, 0); + extractor_crs.setRowPtrT(s0.rowptrL); + { + // >> count nnz / row (transpose) + using team_policy_type = Kokkos::RangePolicy; + team_policy_type team_policy(0, m); + Kokkos::parallel_for("transpose pointer", team_policy, extractor_crs); + } + { + // >> accumulate to generate rowptr (transpose) + using range_policy_type = Kokkos::RangePolicy; + Kokkos::parallel_scan("shiftRowptrT", range_policy_type(0, m+1), rowptr_sum(s0.rowptrL)); + exec_space().fence(); + } + extractor_crs.setCrsViewT(s0.colindL, s0.nzvalsL); + { + // >> copy into transpose-matrix + using team_policy_type = Kokkos::RangePolicy; + team_policy_type team_policy(0, m); + Kokkos::parallel_for("transpose pointer", team_policy, extractor_crs); + } + { + // ======================== + // copy to CPU, for now + auto h_rowptr = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.rowptrL); + for (ordinal_type i = m; i > 0 ; i--) h_rowptr(i) = h_rowptr(i-1); + h_rowptr(0) = 0; + Kokkos::deep_copy(s0.rowptrL, h_rowptr); + } + } + + size_t buffer_size_L = 0; + size_t buffer_size_U = 0; + nnz = s0.colindU.extent(0); + value_type alpha = one; + #ifdef TACHO_INSERT_DIAGONALS + value_type beta = zero; + #else + value_type beta = one; + #endif +#if defined(KOKKOS_ENABLE_CUDA) + cusparseCreate(&s0.cusparseHandle); + // create matrix + cusparseCreateCsr(&s0.U_cusparse, m, m, nnz, + s0.rowptrU.data(), s0.colindU.data(), s0.nzvalsU.data(), + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, computeType); +#ifdef USE_SPMM_FOR_WORKSPACE_SIZE + cusparseSpMM_bufferSize(s0.cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, s0.U_cusparse, vecX, &beta, vecY, + computeType, CUSPARSE_MM_ALG_DEFAULT, &buffer_size_U); +#else + cusparseSpMV_bufferSize(s0.cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, s0.U_cusparse, vecX, &beta, vecY, + computeType, CUSPARSE_MV_ALG_DEFAULT, &buffer_size_U); +#endif + if (s0.spmv_explicit_transpose) { + // create matrix (transpose or L) + nnz = s0.colindL.extent(0); + cusparseCreateCsr(&s0.L_cusparse, m, m, nnz, + s0.rowptrL.data(), s0.colindL.data(), s0.nzvalsL.data(), + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, computeType); + // workspace size +#ifdef USE_SPMM_FOR_WORKSPACE_SIZE + cusparseSpMM_bufferSize(s0.cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, s0.L_cusparse, vecX, &beta, vecY, + computeType, CUSPARSE_MM_ALG_DEFAULT, &buffer_size_L); +#else + cusparseSpMV_bufferSize(s0.cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, s0.L_cusparse, vecX, &beta, vecY, + computeType, CUSPARSE_MV_ALG_DEFAULT, &buffer_size_L); +#endif + } else { + // create matrix (L_cusparse stores the same ptrs as descrU, but optimized for trans) + cusparseCreateCsr(&s0.L_cusparse, m, m, nnz, + s0.rowptrU.data(), s0.colindU.data(), s0.nzvalsU.data(), + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, computeType); + // workspace size for transpose SpMV +#ifdef USE_SPMM_FOR_WORKSPACE_SIZE + cusparseSpMM_bufferSize(s0.cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, s0.L_cusparse, vecX, &beta, vecY, + computeType, CUSPARSE_MM_ALG_DEFAULT, &buffer_size_L); +#else + cusparseSpMV_bufferSize(s0.cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, &alpha, s0.L_cusparse, vecX, &beta, vecY, + computeType, CUSPARSE_MV_ALG_DEFAULT, &buffer_size_L); +#endif + } + // allocate workspace + if (buffer_size_U > buffer_U.extent(0)) { + Kokkos::resize(buffer_U, buffer_size_U); + } + if (buffer_size_L > buffer_L.extent(0)) { + Kokkos::resize(buffer_L, buffer_size_L); + } +#elif defined(KOKKOS_ENABLE_HIP) + rocsparse_create_handle(&s0.rocsparseHandle); + // create matrix + rocsparse_create_csr_descr(&(s0.descrU), m, m, nnz, + s0.rowptrU.data(), s0.colindU.data(), s0.nzvalsU.data(), + rocsparse_indextype_i32, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_compute_type); + // workspace + #if ROCM_VERSION >= 50400 + rocsparse_spmv_ex + #else + rocsparse_spmv + #endif + (s0.rocsparseHandle, rocsparse_operation_none, + &alpha, s0.descrU, vecX, &beta, vecY, + rocsparse_compute_type, rocsparse_spmv_alg_default, + #if ROCM_VERSION >= 50400 + rocsparse_spmv_stage_buffer_size, + #endif + &buffer_size_U, nullptr); + // allocate workspace + if (buffer_size_U > buffer_U.extent(0)) { + Kokkos::resize(buffer_U, buffer_size_U); + } + #if ROCM_VERSION >= 50400 + // preprocess + buffer_size_U = buffer_U.extent(0); + rocsparse_spmv_ex + (s0.rocsparseHandle, rocsparse_operation_none, + &alpha, s0.descrU, vecX, &beta, vecY, + rocsparse_compute_type, rocsparse_spmv_alg_default, + rocsparse_spmv_stage_preprocess, + &buffer_size_U, (void*)buffer_U.data()); + #endif + if (s0.spmv_explicit_transpose) { + // create matrix (transpose) + nnz = s0.colindL.extent(0); + rocsparse_create_csr_descr(&(s0.descrL), m, m, nnz, + s0.rowptrL.data(), s0.colindL.data(), s0.nzvalsL.data(), + rocsparse_indextype_i32, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_compute_type); + // workspace + #if ROCM_VERSION >= 50400 + rocsparse_spmv_ex + #else + rocsparse_spmv + #endif + (s0.rocsparseHandle, rocsparse_operation_none, + &alpha, s0.descrL, vecX, &beta, vecY, + rocsparse_compute_type, rocsparse_spmv_alg_default, + #if ROCM_VERSION >= 50400 + rocsparse_spmv_stage_buffer_size, + #endif + &buffer_size_L, nullptr); + // allocate workspace + if (buffer_size_L > buffer_L.extent(0)) { + Kokkos::resize(buffer_L, buffer_size_L); + } + #if ROCM_VERSION >= 50400 + // preprocess + buffer_size_L = buffer_L.extent(0); + rocsparse_spmv_ex + (s0.rocsparseHandle, rocsparse_operation_none, + &alpha, s0.descrL, vecX, &beta, vecY, + rocsparse_compute_type, rocsparse_spmv_alg_default, + rocsparse_spmv_stage_preprocess, + &buffer_size_L, (void*)buffer_L.data()); + #endif + } else { + // create matrix, transpose (L_cusparse stores the same ptrs as descrU, but optimized for trans) + nnz = s0.colindL.extent(0); + rocsparse_create_csr_descr(&(s0.descrL), m, m, nnz, + s0.rowptrU.data(), s0.colindU.data(), s0.nzvalsU.data(), + rocsparse_indextype_i32, rocsparse_indextype_i32, rocsparse_index_base_zero, rocsparse_compute_type); + // workspace (transpose) + #if ROCM_VERSION >= 50400 + rocsparse_spmv_ex + #else + rocsparse_spmv + #endif + (s0.rocsparseHandle, rocsparse_operation_transpose, + &alpha, s0.descrL, vecX, &beta, vecY, + rocsparse_compute_type, rocsparse_spmv_alg_default, + #if ROCM_VERSION >= 50400 + rocsparse_spmv_stage_buffer_size, + #endif + &buffer_size_L, nullptr); + // allcate workspace + if (buffer_size_L > buffer_L.extent(0)) { + Kokkos::resize(buffer_L, buffer_size_L); + } + #if ROCM_VERSION >= 50400 + // preprocess + buffer_size_L = buffer_L.extent(0); + rocsparse_spmv_ex + (s0.rocsparseHandle, rocsparse_operation_transpose, + &alpha, s0.descrL, vecX, &beta, vecY, + rocsparse_compute_type, rocsparse_spmv_alg_default, + rocsparse_spmv_stage_preprocess, + &buffer_size_L, (void*)buffer_L.data()); + #endif + } +#endif + } +#if defined(KOKKOS_ENABLE_CUDA) +#ifdef USE_SPMM_FOR_WORKSPACE_SIZE + cusparseDestroyDnMat(vecX); + cusparseDestroyDnMat(vecY); +#else + cusparseDestroyDnVec(vecX); + cusparseDestroyDnVec(vecY); +#endif +#elif defined(KOKKOS_ENABLE_HIP) + rocsparse_destroy_dnvec_descr(vecX); + rocsparse_destroy_dnvec_descr(vecY); +#endif +#endif + } + /// /// Level set factorize /// @@ -1645,8 +2064,12 @@ class NumericToolsLevelSet : public NumericToolsBase { } } // end of Cholesky stat.t_factor = timer.seconds(); - timer.reset(); + if (variant == 3) { + // compress each partitioned inverse at each level into CRS matrix + bool lu = false; + extractCRS(lu); + } { #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) track_free(work.span() * sizeof(value_type)); @@ -1762,6 +2185,219 @@ class NumericToolsLevelSet : public NumericToolsBase { } } + inline void solveGenericLowerOnDeviceVar2_SpMV(const ordinal_type lvl, const ordinal_type nlvls, + const ordinal_type pbeg, const ordinal_type pend, + const value_type_matrix &t) { +#if (defined(KOKKOS_ENABLE_CUDA) && defined(TACHO_HAVE_CUSPARSE)) || \ + defined(KOKKOS_ENABLE_HIP) + const ordinal_type m = t.extent(0); + const ordinal_type nrhs = t.extent(1); +#if defined(KOKKOS_ENABLE_CUDA) + cudaDataType computeType = CUDA_R_64F; + if (std::is_same::value) { + computeType = CUDA_R_32F; + } else if (!std::is_same::value) { + TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, + "LevelSetTools::solveCholeskyLowerOnDevice: ComputeSPMV only supported double or float"); + } +#elif defined(KOKKOS_ENABLE_HIP) + rocsparse_datatype rocsparse_compute_type = rocsparse_datatype_f64_r; + if (std::is_same::value) { + rocsparse_compute_type = rocsparse_datatype_f32_r; + } else if (!std::is_same::value) { + TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, + "LevelSetTools::solveCholeskyLowerOnDevice: ComputeSPMV only supported double or float"); + } +#endif + #ifdef TACHO_INSERT_DIAGONALS + // compute t = L^{-1}*w + const value_type alpha (1); + const value_type beta (0); + if (_w_vec.extent(1) != nrhs) { + // expand workspace + Kokkos::resize(_w_vec, m, nrhs); + // attach to Cusparse/Rocsparse data struct + int ldw = _w_vec.stride(1); +#if defined(KOKKOS_ENABLE_CUDA) + cusparseCreateDnMat(&matW, m, nrhs, ldw, (void*)(_w_vec.data()), computeType, CUSPARSE_ORDER_COL); + cusparseCreateDnVec(&vecW, m, (void*)(_w_vec.data()), computeType); +#elif defined(KOKKOS_ENABLE_HIP) + rocsparse_create_dnmat_descr(&matW, m, nrhs, ldw, (void*)(_w_vec.data()), rocsparse_compute_type, rocsparse_order_column); + rocsparse_create_dnvec_descr(&vecW, m, (void*)(_w_vec.data()), rocsparse_compute_type); +#endif + } + const ordinal_type ldt = t.stride(1); + const ordinal_type ldw = _w_vec.stride(1); + auto &s0 = _h_supernodes(_h_level_sids(pbeg)); + #else + exit(0); + #endif +#if defined(KOKKOS_ENABLE_CUDA) + cusparseStatus_t status; + if (nrhs > 1) { + if (lvl == nlvls-1) { + // start : create DnMat for T + cusparseCreateDnMat(&matT, m, nrhs, ldt, (void*)(t.data()), computeType, CUSPARSE_ORDER_COL); + } + // create vectors + auto matX = ((nlvls-1-lvl)%2 == 0 ? matT : matW); + auto matY = ((nlvls-1-lvl)%2 == 0 ? matW : matT); + // SpMM + if (s0.spmv_explicit_transpose) { + status = cusparseSpMM(s0.cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, s0.L_cusparse, + matX, + &beta, matY, + computeType, CUSPARSE_MM_ALG_DEFAULT, (void*)buffer_L.data()); + } else { + status = cusparseSpMM(s0.cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, s0.L_cusparse, // L_cusparse stores the same ptrs as descrU, but optimized for trans + matX, + &beta, matY, + computeType, CUSPARSE_MM_ALG_DEFAULT, (void*)buffer_L.data()); + } + } else { + if (lvl == nlvls-1) { + // start : create DnMat for T + cusparseCreateDnVec(&vecT, m, (void*)(t.data()), computeType); + } + // create vectors + auto vecX = ((nlvls-1-lvl)%2 == 0 ? vecT : vecW); + auto vecY = ((nlvls-1-lvl)%2 == 0 ? vecW : vecT); + // SpMV + if (s0.spmv_explicit_transpose) { + status = cusparseSpMV(s0.cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, s0.L_cusparse, + vecX, + &beta, vecY, + computeType, CUSPARSE_MV_ALG_DEFAULT, (void*)buffer_L.data()); + } else { + status = cusparseSpMV(s0.cusparseHandle, CUSPARSE_OPERATION_TRANSPOSE, + &alpha, s0.L_cusparse, // L_cusparse stores the same ptrs as descrU, but optimized for trans + vecX, + &beta, vecY, + computeType, CUSPARSE_MV_ALG_DEFAULT, (void*)buffer_L.data()); + } + } + if (CUSPARSE_STATUS_SUCCESS != status) { + printf( " Failed cusparseSpMV for SpMV\n" ); + } +#elif defined(KOKKOS_ENABLE_HIP) + rocsparse_status status; + if (nrhs > 1) { + if (lvl == nlvls-1) { + // start : create DnMat for T + rocsparse_create_dnmat_descr(&matT, m, nrhs, ldt, (void*)(t.data()), rocsparse_compute_type, rocsparse_order_column); + } + // create vectors + auto vecX = ((nlvls-1-lvl)%2 == 0 ? matT : matW); + auto vecY = ((nlvls-1-lvl)%2 == 0 ? matW : matT); + if (s0.spmv_explicit_transpose) { + size_t buffer_size_L = buffer_L.extent(0); + status = rocsparse_spmm(s0.rocsparseHandle, rocsparse_operation_none, rocsparse_operation_none, + &alpha, s0.descrL, vecX, &beta, vecY, + rocsparse_compute_type, rocsparse_spmm_alg_default, + rocsparse_spmm_stage_compute, + &buffer_size_L, (void*)buffer_L.data()); + } else { + size_t buffer_size_L = buffer_L.extent(0); + status = rocsparse_spmm(s0.rocsparseHandle, rocsparse_operation_transpose, rocsparse_operation_none, + &alpha, s0.descrL, vecX, &beta, vecY, // dscrL stores the same ptrs as descrU, but optimized for trans + rocsparse_compute_type, rocsparse_spmm_alg_default, + rocsparse_spmm_stage_compute, + &buffer_size_L, (void*)buffer_L.data()); + } + } else { + if (lvl == nlvls-1) { + // start : create DnVec for T + rocsparse_create_dnvec_descr(&vecT, m, (void*)(t.data()), rocsparse_compute_type); + } + size_t buffer_size_L = buffer_L.extent(0); + auto vecX = ((nlvls-1-lvl)%2 == 0 ? vecT : vecW); + auto vecY = ((nlvls-1-lvl)%2 == 0 ? vecW : vecT); + if (s0.spmv_explicit_transpose) { + status = + #if ROCM_VERSION >= 50400 + rocsparse_spmv_ex + #else + rocsparse_spmv + #endif + (s0.rocsparseHandle, rocsparse_operation_none, + &alpha, s0.descrL, vecX, &beta, vecY, + rocsparse_compute_type, rocsparse_spmv_alg_default, + #if ROCM_VERSION >= 50400 + rocsparse_spmv_stage_compute, + #endif + &buffer_size_L, (void*)buffer_L.data()); + } else { + status = + #if ROCM_VERSION >= 50400 + rocsparse_spmv_ex + #else + rocsparse_spmv + #endif + (s0.rocsparseHandle, rocsparse_operation_transpose, + &alpha, s0.descrL, vecX, &beta, vecY, // dscrL stores the same ptrs as descrU, but optimized for trans + rocsparse_compute_type, rocsparse_spmv_alg_default, + #if ROCM_VERSION >= 50400 + rocsparse_spmv_stage_compute, + #endif + &buffer_size_L, (void*)buffer_L.data()); + } + } + if (rocsparse_status_success != status) { + printf( " Failed rocsparse_spmv for L\n" ); + } +#else + const value_type zero(0); + auto h_w = Kokkos::create_mirror_view_and_copy(host_memory_space(), ((nlvls-1-lvl)%2 == 0 ? t : _w_vec)); + auto h_t = Kokkos::create_mirror_view(host_memory_space(), ((nlvls-1-lvl)%2 == 0 ? _w_vec : t)); + Kokkos::deep_copy(h_t, zero); + + if (s0.spmv_explicit_transpose) { + auto h_rowptr = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.rowptrL); + auto h_colind = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.colindL); + auto h_nzvals = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.nzvalsL); + for (ordinal_type i = 0; i < m ; i++) { + for (int k = h_rowptr(i); k < h_rowptr(i+1); k++) { + for (int j = 0; j < nrhs; j++) { + h_t(i, j) += h_nzvals(k) * h_w(h_colind(k), j); + } + } + } + } else { + auto h_rowptr = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.rowptrU); + auto h_colind = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.colindU); + auto h_nzvals = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.nzvalsU); + for (ordinal_type i = 0; i < m ; i++) { + for (int k = h_rowptr(i); k < h_rowptr(i+1); k++) { + for (int j = 0; j < nrhs; j++) { + h_t(h_colind(k), j) += h_nzvals(k) * h_w(i, j); + } + } + } + } +#endif + if (lvl == 0) { + // end : destroy vectors +#if defined(KOKKOS_ENABLE_CUDA) && defined(TACHO_HAVE_CUSPARSE) + if (nrhs > 1) + cusparseDestroyDnMat(matT); + else + cusparseDestroyDnVec(vecT); +#elif defined(KOKKOS_ENABLE_HIP) + if (nrhs > 1) + rocsparse_destroy_dnmat_descr(matT); + else + rocsparse_destroy_dnvec_descr(vecT); +#endif + if ((nlvls-1)%2 == 0) { + Kokkos::deep_copy(t, _w_vec); + } + } +#endif + } + inline void solveCholeskyLowerOnDeviceVar2(const ordinal_type pbeg, const ordinal_type pend, const size_type_array_host &h_buf_solve_ptr, const value_type_matrix &t) { const ordinal_type nrhs = t.extent(1); @@ -1802,7 +2438,8 @@ class NumericToolsLevelSet : public NumericToolsBase { } } - inline void solveCholeskyLowerOnDevice(const ordinal_type pbeg, const ordinal_type pend, + inline void solveCholeskyLowerOnDevice(const ordinal_type lvl, const ordinal_type nlvls, + const ordinal_type pbeg, const ordinal_type pend, const size_type_array_host &h_buf_solve_ptr, const value_type_matrix &t) { if (variant == 0) solveCholeskyLowerOnDeviceVar0(pbeg, pend, h_buf_solve_ptr, t); @@ -1810,7 +2447,9 @@ class NumericToolsLevelSet : public NumericToolsBase { solveCholeskyLowerOnDeviceVar1(pbeg, pend, h_buf_solve_ptr, t); else if (variant == 2) solveCholeskyLowerOnDeviceVar2(pbeg, pend, h_buf_solve_ptr, t); - else { + else if (variant == 3) { + solveGenericLowerOnDeviceVar2_SpMV(lvl, nlvls, pbeg, pend, t); + } else { TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, "LevelSetTools::solveCholeskyLowerOnDevice, algorithm variant is not supported"); } @@ -1915,6 +2554,151 @@ class NumericToolsLevelSet : public NumericToolsBase { } } + inline void solveGenericUpperOnDeviceVar2_SpMV(const ordinal_type lvl, const ordinal_type nlvls, + const ordinal_type pbeg, const ordinal_type pend, + const value_type_matrix &t) { +#if (defined(KOKKOS_ENABLE_CUDA) && defined(TACHO_HAVE_CUSPARSE)) || \ + defined(KOKKOS_ENABLE_HIP) + const ordinal_type m = t.extent(0); + const ordinal_type nrhs = t.extent(1); + + auto &s0 = _h_supernodes(_h_level_sids(pbeg)); + + #ifdef TACHO_INSERT_DIAGONALS + // x = t & y = w (lvl = 0,2,4) + // compute t = L^{-1}*w + const value_type alpha (1); + const value_type beta (0); + const ordinal_type ldt = t.stride(1); + const ordinal_type ldw = _w_vec.stride(1); + #else + exit(0); + #endif +#if defined(KOKKOS_ENABLE_CUDA) + cudaDataType computeType = CUDA_R_64F; + if (std::is_same::value) { + computeType = CUDA_R_32F; + } else if (!std::is_same::value) { + TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, + "LevelSetTools::solveCholeskyLowerOnDevice: ComputeSPMV only supported double or float"); + } + + cusparseStatus_t status; + if (nrhs > 1) { + if (lvl == 0) { + // start : create DnMat for T + cusparseCreateDnMat(&matT, m, nrhs, ldt, (void*)(t.data()), computeType, CUSPARSE_ORDER_COL); + } + auto vecX = (lvl%2 == 0 ? matT : matW); + auto vecY = (lvl%2 == 0 ? matW : matT); + // SpMM + status = cusparseSpMM(s0.cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, s0.U_cusparse, + vecX, + &beta, vecY, + computeType, CUSPARSE_MM_ALG_DEFAULT, (void*)buffer_U.data()); + } else { + if (lvl == 0) { + // start : create DnMat for T + cusparseCreateDnVec(&vecT, m, (void*)(t.data()), computeType); + } + auto vecX = (lvl%2 == 0 ? vecT : vecW); + auto vecY = (lvl%2 == 0 ? vecW : vecT); + // SpMV + status = cusparseSpMV(s0.cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, s0.U_cusparse, + vecX, + &beta, vecY, + computeType, CUSPARSE_MV_ALG_DEFAULT, (void*)buffer_U.data()); + } + if (CUSPARSE_STATUS_SUCCESS != status) { + printf( " Failed cusparseSpMV for SpMV\n" ); + } +#elif defined(KOKKOS_ENABLE_HIP) + rocsparse_datatype rocsparse_compute_type = rocsparse_datatype_f64_r; + if (std::is_same::value) { + rocsparse_compute_type = rocsparse_datatype_f32_r; + } else if (!std::is_same::value) { + TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, + "LevelSetTools::solveCholeskyLowerOnDevice: ComputeSPMV only supported double or float"); + } + size_t buffer_size_U = buffer_U.extent(0); + rocsparse_status status; + if (nrhs > 1) { + if (lvl == 0) { + // start : create DnMat for T + rocsparse_create_dnmat_descr(&matT, m, nrhs, ldt, (void*)(t.data()), rocsparse_compute_type, rocsparse_order_column); + } + auto vecX = (lvl%2 == 0 ? matT : matW); + auto vecY = (lvl%2 == 0 ? matW : matT); + status = rocsparse_spmm(s0.rocsparseHandle, rocsparse_operation_none, rocsparse_operation_none, + &alpha, s0.descrU, vecX, &beta, vecY, + rocsparse_compute_type, rocsparse_spmm_alg_default, + rocsparse_spmm_stage_compute, + &buffer_size_U, (void*)buffer_U.data()); + } else { + if (lvl == 0) { + // start : create DnVec for T + rocsparse_create_dnvec_descr(&vecT, m, (void*)(t.data()), rocsparse_compute_type); + } + auto vecX = (lvl%2 == 0 ? vecT : vecW); + auto vecY = (lvl%2 == 0 ? vecW : vecT); + status = + #if ROCM_VERSION >= 50400 + rocsparse_spmv_ex + #else + rocsparse_spmv + #endif + (s0.rocsparseHandle, rocsparse_operation_none, + &alpha, s0.descrU, vecX, &beta, vecY, + rocsparse_compute_type, rocsparse_spmv_alg_default, + #if ROCM_VERSION >= 50400 + rocsparse_spmv_stage_compute, + #endif + &buffer_size_U, (void*)buffer_U.data()); + } + if (rocsparse_status_success != status) { + printf( " Failed rocsparse_spmv for U\n" ); + } +#else + const value_type zero(0); + auto h_w = Kokkos::create_mirror_view_and_copy(host_memory_space(), (lvl%2 == 0 ? t : _w_vec)); + auto h_t = Kokkos::create_mirror_view(host_memory_space(), (lvl%2 == 0 ? _w_vec : t)); + Kokkos::deep_copy(h_t, zero); + + auto h_rowptr = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.rowptrU); + auto h_colind = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.colindU); + auto h_nzvals = Kokkos::create_mirror_view_and_copy(host_memory_space(), s0.nzvalsU); + + for (ordinal_type i = 0; i < m ; i++) { + for (int k = h_rowptr(i); k < h_rowptr(i+1); k++) { + for (int j = 0; j < nrhs; j++) { + h_t(i, j) += h_nzvals(k) * h_w(h_colind(k), j); + } + } + } + Kokkos::deep_copy(t, h_t); +#endif + if (lvl == nlvls-1) { + // end : destroy vectors +#if defined(KOKKOS_ENABLE_CUDA) && defined(TACHO_HAVE_CUSPARSE) + if (nrhs > 1) + cusparseDestroyDnMat(matT); + else + cusparseDestroyDnVec(vecT); +#elif defined(KOKKOS_ENABLE_HIP) + if (nrhs > 1) + rocsparse_destroy_dnmat_descr(matT); + else + rocsparse_destroy_dnvec_descr(vecT); +#endif + if (lvl%2 == 0) { + Kokkos::deep_copy(t, _w_vec); + } + } +#endif + } + inline void solveCholeskyUpperOnDeviceVar2(const ordinal_type pbeg, const ordinal_type pend, const size_type_array_host &h_buf_solve_ptr, const value_type_matrix &t) { const ordinal_type nrhs = t.extent(1); @@ -1954,7 +2738,8 @@ class NumericToolsLevelSet : public NumericToolsBase { } } - inline void solveCholeskyUpperOnDevice(const ordinal_type pbeg, const ordinal_type pend, + inline void solveCholeskyUpperOnDevice(const ordinal_type lvl, const ordinal_type nlvls, + const ordinal_type pbeg, const ordinal_type pend, const size_type_array_host &h_buf_solve_ptr, const value_type_matrix &t) { if (variant == 0) solveCholeskyUpperOnDeviceVar0(pbeg, pend, h_buf_solve_ptr, t); @@ -1962,7 +2747,9 @@ class NumericToolsLevelSet : public NumericToolsBase { solveCholeskyUpperOnDeviceVar1(pbeg, pend, h_buf_solve_ptr, t); else if (variant == 2) solveCholeskyUpperOnDeviceVar2(pbeg, pend, h_buf_solve_ptr, t); - else { + else if (variant == 3) { + solveGenericUpperOnDeviceVar2_SpMV(lvl, nlvls, pbeg, pend, t); + } else { TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, "LevelSetTools::solveCholeskyUpperOnDevice, algorithm variant is not supported"); } @@ -2485,7 +3272,6 @@ class NumericToolsLevelSet : public NumericToolsBase { _status = ApplyPermutation::invoke(exec_instance, bT, perm, tT); } - _status = Gemv::invoke(handle_blas, one, AL, tT, zero, b); checkDeviceBlasStatus("gemv"); } @@ -2494,7 +3280,8 @@ class NumericToolsLevelSet : public NumericToolsBase { } } - inline void solveLU_LowerOnDevice(const ordinal_type pbeg, const ordinal_type pend, + inline void solveLU_LowerOnDevice(const ordinal_type lvl, const ordinal_type nlvls, + const ordinal_type pbeg, const ordinal_type pend, const size_type_array_host &h_buf_solve_ptr, const value_type_matrix &t) { if (variant == 0) solveLU_LowerOnDeviceVar0(pbeg, pend, h_buf_solve_ptr, t); @@ -2502,7 +3289,9 @@ class NumericToolsLevelSet : public NumericToolsBase { solveLU_LowerOnDeviceVar1(pbeg, pend, h_buf_solve_ptr, t); else if (variant == 2) solveLU_LowerOnDeviceVar2(pbeg, pend, h_buf_solve_ptr, t); - else { + else if (variant == 3) { + solveGenericLowerOnDeviceVar2_SpMV(lvl, nlvls, pbeg, pend, t); + } else { TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, "LevelSetTools::solveLU_LowerOnDevice, algorithm variant is not supported"); } @@ -2644,7 +3433,8 @@ class NumericToolsLevelSet : public NumericToolsBase { } } - inline void solveLU_UpperOnDevice(const ordinal_type pbeg, const ordinal_type pend, + inline void solveLU_UpperOnDevice(const ordinal_type lvl, const ordinal_type nlvls, + const ordinal_type pbeg, const ordinal_type pend, const size_type_array_host &h_buf_solve_ptr, const value_type_matrix &t) { if (variant == 0) solveLU_UpperOnDeviceVar0(pbeg, pend, h_buf_solve_ptr, t); @@ -2652,28 +3442,33 @@ class NumericToolsLevelSet : public NumericToolsBase { solveLU_UpperOnDeviceVar1(pbeg, pend, h_buf_solve_ptr, t); else if (variant == 2) solveLU_UpperOnDeviceVar2(pbeg, pend, h_buf_solve_ptr, t); - else { + else if (variant == 3) { + solveGenericUpperOnDeviceVar2_SpMV(lvl, nlvls, pbeg, pend, t); + } else { TACHO_TEST_FOR_EXCEPTION(true, std::logic_error, "LevelSetTools::solveLU_UpperOnDevice, algorithm variant is not supported"); } } inline void allocateWorkspaceSolve(const ordinal_type nrhs) { - const size_type buf_extent = _bufsize_solve * nrhs; - const size_type buf_span = _buf.span(); - - if (buf_extent != buf_span) { - _buf = value_type_array(do_not_initialize_tag("buf"), buf_extent); - track_free(buf_span * sizeof(value_type)); - track_alloc(_buf.span() * sizeof(value_type)); - { - const Kokkos::RangePolicy policy(0, _buf_solve_ptr.extent(0)); - const auto buf_solve_nrhs_ptr = _buf_solve_nrhs_ptr; - const auto buf_solve_ptr = _buf_solve_ptr; - Kokkos::parallel_for( - policy, KOKKOS_LAMBDA(const ordinal_type &i) { buf_solve_nrhs_ptr(i) = nrhs * buf_solve_ptr(i); }); + if (variant == 3) { + } else { + const size_type buf_extent = _bufsize_solve * nrhs; + const size_type buf_span = _buf.span(); + + if (buf_extent != buf_span) { + _buf = value_type_array(do_not_initialize_tag("buf"), buf_extent); + track_free(buf_span * sizeof(value_type)); + track_alloc(_buf.span() * sizeof(value_type)); + { + const Kokkos::RangePolicy policy(0, _buf_solve_ptr.extent(0)); + const auto buf_solve_nrhs_ptr = _buf_solve_nrhs_ptr; + const auto buf_solve_ptr = _buf_solve_ptr; + Kokkos::parallel_for( + policy, KOKKOS_LAMBDA(const ordinal_type &i) { buf_solve_nrhs_ptr(i) = nrhs * buf_solve_ptr(i); }); + } + Kokkos::deep_copy(_h_buf_solve_nrhs_ptr, _buf_solve_nrhs_ptr); } - Kokkos::deep_copy(_h_buf_solve_nrhs_ptr, _buf_solve_nrhs_ptr); } } @@ -2768,12 +3563,14 @@ class NumericToolsLevelSet : public NumericToolsBase { ++stat_level.n_kernel_launching; } const auto h_buf_solve_ptr = Kokkos::subview(_h_buf_solve_nrhs_ptr, range_solve_buf_ptr); - solveCholeskyLowerOnDevice(pbeg, pend, h_buf_solve_ptr, t); - Kokkos::fence(); + solveCholeskyLowerOnDevice(lvl, _team_serial_level_cut, pbeg, pend, h_buf_solve_ptr, t); - Kokkos::parallel_for("update lower", policy_update_with_work_property, functor); - ++stat_level.n_kernel_launching; - exec_space().fence(); + if (variant != 3) { + Kokkos::fence(); + Kokkos::parallel_for("update lower", policy_update_with_work_property, functor); + ++stat_level.n_kernel_launching; + exec_space().fence(); + } } } } // end of lower tri solve @@ -2824,21 +3621,24 @@ class NumericToolsLevelSet : public NumericToolsBase { const auto policy_solve_with_work_property = policy_solve; const auto policy_update_with_work_property = policy_update; #endif - Kokkos::parallel_for("update upper", policy_update_with_work_property, functor); - ++stat_level.n_kernel_launching; - exec_space().fence(); - - if (lvl < _device_level_cut) { - // do nothing - // Kokkos::parallel_for("solve upper", policy_solve, functor); - } else { - Kokkos::parallel_for("solve upper", policy_solve_with_work_property, functor); + if (variant != 3) { + Kokkos::parallel_for("update upper", policy_update_with_work_property, functor); ++stat_level.n_kernel_launching; + exec_space().fence(); + + if (lvl < _device_level_cut) { + // do nothing + // Kokkos::parallel_for("solve upper", policy_solve, functor); + } else { + Kokkos::parallel_for("solve upper", policy_solve_with_work_property, functor); + ++stat_level.n_kernel_launching; + } } - const auto h_buf_solve_ptr = Kokkos::subview(_h_buf_solve_nrhs_ptr, range_solve_buf_ptr); - solveCholeskyUpperOnDevice(pbeg, pend, h_buf_solve_ptr, t); - Kokkos::fence(); + solveCholeskyUpperOnDevice(lvl, _team_serial_level_cut, pbeg, pend, h_buf_solve_ptr, t); + if (variant != 3) { + Kokkos::fence(); + } } } } /// end of upper tri solve @@ -3001,7 +3801,6 @@ class NumericToolsLevelSet : public NumericToolsBase { } } // end of LDL stat.t_factor = timer.seconds(); - timer.reset(); { #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) @@ -3355,8 +4154,12 @@ class NumericToolsLevelSet : public NumericToolsBase { } } // end of LU stat.t_factor = timer.seconds(); - timer.reset(); + if (variant == 3) { + // compress each partitioned inverse at each level into CRS matrix + bool lu = true; + extractCRS(lu); + } { #if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) track_free(work.span() * sizeof(value_type)); @@ -3446,7 +4249,6 @@ class NumericToolsLevelSet : public NumericToolsBase { { for (ordinal_type lvl = (_team_serial_level_cut - 1); lvl >= 0; --lvl) { const ordinal_type pbeg = _h_level_ptr(lvl), pend = _h_level_ptr(lvl + 1), pcnt = pend - pbeg; - const range_type range_solve_buf_ptr(_h_buf_level_ptr(lvl), _h_buf_level_ptr(lvl + 1)); const auto solve_buf_ptr = Kokkos::subview(_buf_solve_nrhs_ptr, range_solve_buf_ptr); @@ -3469,20 +4271,24 @@ class NumericToolsLevelSet : public NumericToolsBase { const auto policy_solve_with_work_property = policy_solve; const auto policy_update_with_work_property = policy_update; #endif - if (lvl < _device_level_cut) { - // do nothing - // Kokkos::parallel_for("solve lower", policy_solve, functor); - } else { - Kokkos::parallel_for("solve lower", policy_solve_with_work_property, functor); - ++stat_level.n_kernel_launching; + if (variant != 3) { + if (lvl < _device_level_cut) { + // do nothing + // Kokkos::parallel_for("solve lower", policy_solve, functor); + } else { + Kokkos::parallel_for("solve lower", policy_solve_with_work_property, functor); + ++stat_level.n_kernel_launching; + } } const auto h_buf_solve_ptr = Kokkos::subview(_h_buf_solve_nrhs_ptr, range_solve_buf_ptr); - solveLU_LowerOnDevice(pbeg, pend, h_buf_solve_ptr, t); - Kokkos::fence(); + solveLU_LowerOnDevice(lvl, _team_serial_level_cut, pbeg, pend, h_buf_solve_ptr, t); + if (variant != 3) { + Kokkos::fence(); - Kokkos::parallel_for("update lower", policy_update_with_work_property, functor); - ++stat_level.n_kernel_launching; - exec_space().fence(); + Kokkos::parallel_for("update lower", policy_update_with_work_property, functor); + ++stat_level.n_kernel_launching; + exec_space().fence(); + } } } } // end of lower tri solve @@ -3533,21 +4339,24 @@ class NumericToolsLevelSet : public NumericToolsBase { const auto policy_solve_with_work_property = policy_solve; const auto policy_update_with_work_property = policy_update; #endif - Kokkos::parallel_for("update upper", policy_update_with_work_property, functor); - ++stat_level.n_kernel_launching; - exec_space().fence(); - - if (lvl < _device_level_cut) { - // do nothing - // Kokkos::parallel_for("solve upper", policy_solve, functor); - } else { - Kokkos::parallel_for("solve upper", policy_solve_with_work_property, functor); + if (variant != 3) { + Kokkos::parallel_for("update upper", policy_update_with_work_property, functor); ++stat_level.n_kernel_launching; + exec_space().fence(); + + if (lvl < _device_level_cut) { + // do nothing + // Kokkos::parallel_for("solve upper", policy_solve, functor); + } else { + Kokkos::parallel_for("solve upper", policy_solve_with_work_property, functor); + ++stat_level.n_kernel_launching; + } } - const auto h_buf_solve_ptr = Kokkos::subview(_h_buf_solve_nrhs_ptr, range_solve_buf_ptr); - solveLU_UpperOnDevice(pbeg, pend, h_buf_solve_ptr, t); - Kokkos::fence(); + solveLU_UpperOnDevice(lvl, _team_serial_level_cut, pbeg, pend, h_buf_solve_ptr, t); + if (variant != 3) { + Kokkos::fence(); + } } } } /// end of upper tri solve diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_SupernodeInfo.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_SupernodeInfo.hpp index cb634215da08..8b1f69bbea8f 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_SupernodeInfo.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_SupernodeInfo.hpp @@ -20,6 +20,13 @@ Sandia National Laboratories, Albuquerque, NM, USA #define __TACHO_SUPERNODE_INFO_HPP__ #include "Tacho_Util.hpp" +#if defined(KOKKOS_ENABLE_CUDA) + #include +#elif defined(KOKKOS_ENABLE_HIP) + #include + #include + #define ROCM_VERSION ROCM_VERSION_MAJOR * 10000 + ROCM_VERSION_MINOR * 100 + ROCM_VERSION_PATCH +#endif /// \file Tacho_SupernodeInfo.hpp /// \author Kyungjoo Kim (kyukim@sandia.gov) @@ -99,6 +106,9 @@ template struct SupernodeInfo { using ordinal_pair_type_array = Kokkos::View; using value_type_matrix = Kokkos::View; + using rowptr_view = Kokkos::View; + using colind_view = Kokkos::View; + using nzvals_view = Kokkos::View; using range_type = Kokkos::pair; struct Supernode { @@ -118,6 +128,26 @@ template struct SupernodeInfo { bool do_not_apply_pivots; + // for using SpMV + rowptr_view rowptrU; + colind_view colindU; + nzvals_view nzvalsU; + + rowptr_view rowptrL; + colind_view colindL; + nzvals_view nzvalsL; + + bool spmv_explicit_transpose; +#if defined(KOKKOS_ENABLE_CUDA) + cusparseHandle_t cusparseHandle; + cusparseSpMatDescr_t U_cusparse; + cusparseSpMatDescr_t L_cusparse; +#elif defined(KOKKOS_ENABLE_HIP) + rocsparse_handle rocsparseHandle; + rocsparse_spmat_descr descrU; + rocsparse_spmat_descr descrL; +#endif + KOKKOS_INLINE_FUNCTION Supernode() : lock(0), row_begin(0), m(0), n(0), gid_col_begin(0), gid_col_end(0), sid_col_begin(0), sid_col_end(0), diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp new file mode 100644 index 000000000000..99d7aa2b6fa7 --- /dev/null +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_ExtractCRS.hpp @@ -0,0 +1,388 @@ +// clang-format off +/* ===================================================================================== +Copyright 2022 National Technology & Engineering Solutions of Sandia, LLC (NTESS). +Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains +certain rights in this software. + +SCR#:2790.0 + +This file is part of Tacho. Tacho is open source software: you can redistribute it +and/or modify it under the terms of BSD 2-Clause License +(https://opensource.org/licenses/BSD-2-Clause). A copy of the licese is also +provided under the main directory + +Questions? Kyungjoo Kim at + +Sandia National Laboratories, Albuquerque, NM, USA +===================================================================================== */ +// clang-format on +#ifndef __TACHO_TEAMFUNCTOR_EXTRACT_CRS_HPP__ +#define __TACHO_TEAMFUNCTOR_EXTRACT_CRS_HPP__ + +/// \file Tacho_TeamFunctor_FactorizeChol.hpp +/// \author Kyungjoo Kim (kyukim@sandia.gov) + +#include "Tacho_Util.hpp" + +#include "Tacho_SupernodeInfo.hpp" + +namespace Tacho { + + +template +struct rowptr_sum { + rowptr_view _rowptr; + + rowptr_sum(rowptr_view rowptr) + : _rowptr(rowptr) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const ordinal_type i, ordinal_type& update, + const bool is_final) const { + const ordinal_type val_i = _rowptr(i); + update += val_i; + if (is_final) { + _rowptr(i) = update; + } + } + + ordinal_type nnz() { return _rowptr(_rowptr.extent(0)); } +}; + +template struct TeamFunctor_ExtractCrs { +public: + typedef Kokkos::pair range_type; + + typedef SupernodeInfoType supernode_info_type; + typedef typename supernode_info_type::supernode_type supernode_type; + + typedef typename supernode_info_type::ordinal_type_array ordinal_type_array; + typedef typename supernode_info_type::size_type_array size_type_array; + + typedef typename supernode_info_type::value_type value_type; + typedef typename supernode_info_type::value_type_array value_type_array; + typedef typename supernode_info_type::value_type_matrix value_type_matrix; + + typedef typename supernode_info_type::rowptr_view rowptr_view; + typedef typename supernode_info_type::colind_view colind_view; + typedef typename supernode_info_type::nzvals_view nzvals_view; + +private: + supernode_info_type _info; + ordinal_type_array _compute_mode, _level_sids; + ordinal_type _pbeg, _pend; + ordinal_type _m; + + // in CRS format + rowptr_view _rowptr; + colind_view _colind; + nzvals_view _nzvals; + // in CRS format, transpose + rowptr_view _rowptrT; + colind_view _colindT; + nzvals_view _nzvalsT; + // pivot + ordinal_type_array _piv; + +public: + KOKKOS_INLINE_FUNCTION + TeamFunctor_ExtractCrs() = delete; + + KOKKOS_INLINE_FUNCTION + TeamFunctor_ExtractCrs(const supernode_info_type &info, const ordinal_type_array &compute_mode, + const ordinal_type_array &level_sids) + : _info(info), _compute_mode(compute_mode), _level_sids(level_sids) {} + + inline void setGlobalSize(const ordinal_type m) { + _m = m; + } + + inline void setRange(const ordinal_type pbeg, const ordinal_type pend) { + _pbeg = pbeg; + _pend = pend; + } + + inline void setRowPtr(rowptr_view &rowptr) { _rowptr = rowptr; } + inline void setCrsView(colind_view &colind, nzvals_view &nzvals) { + _colind = colind; + _nzvals = nzvals; + } + inline void setRowPtrT(rowptr_view &rowptrT) { _rowptrT = rowptrT; } + inline void setCrsViewT(colind_view &colindT, nzvals_view &nzvalsT) { + _colindT = colindT; + _nzvalsT = nzvalsT; + } + inline void setPivPtr(ordinal_type_array &piv) { _piv = piv; } + + struct ExtractPtrTag {}; + struct ExtractValTag {}; + struct ExtractPtrColTag {}; + struct ExtractValColTag {}; + struct TransPtrTag {}; + struct TransMatTag {}; + + + // --------------------------------------- + // Functors to convert to CRS format + // from row-major + template + KOKKOS_INLINE_FUNCTION void operator()(const ExtractPtrTag &, const MemberType &member) const { + const ordinal_type id = member.league_rank(); + const ordinal_type p = _pbeg + id; + + const value_type zero(0); + const ordinal_type sid = (p == _pend ? 0 : _level_sids(p)); + const ordinal_type mode = (p == _pend ? 0 : _compute_mode(sid)); + if (mode == 0) { + // extract this supernode + const auto &s = _info.supernodes(sid); + const ordinal_type offm = (p == _pend ? _m : s.row_begin); + #define TACHO_INSERT_DIAGONALS + #ifdef TACHO_INSERT_DIAGONALS + // last row of previous supernode + ordinal_type row_id = 0; + if (p > _pbeg) { + const ordinal_type prev_sid = _level_sids(p-1); + const auto &prev_s = _info.supernodes(prev_sid); + row_id = prev_s.row_begin + prev_s.m; + } + // add diagonal entry + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, offm-row_id), + [&](const int& i) { _rowptr(row_id+i+1) = 1; }); + #endif + if (p < _pend) { + if (s.m > 0) { + // extract this supernode + // stored by row, but checking for nonzereo (instead of just taking all s.n nonzereos) + value_type *aptr = s.u_buf; + UnmanagedViewType AT(aptr, s.m, s.n); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, s.m), + [&](const int& i) { + _rowptr(1+i+offm) = 0; + for (ordinal_type j = 0; j < s.n; j++) { + if (AT(i,j) != zero) { + _rowptr(1+i+offm) ++; + } + } + }); + } + } + } + } + + template + KOKKOS_INLINE_FUNCTION void operator()(const ExtractValTag &, const MemberType &member) const { + const ordinal_type id = member.league_rank(); + const ordinal_type p = _pbeg + id; + + const value_type one (1); + const value_type zero(0); + const ordinal_type sid = (p == _pend ? 0 : _level_sids(p)); + const ordinal_type mode = (p == _pend ? 0 : _compute_mode(sid)); + if (mode == 0) { + // extract this supernode + const auto &s = _info.supernodes(sid); + const ordinal_type offm = (p == _pend ? _m : s.row_begin); + const ordinal_type offn = (p == _pend ? 0 : s.gid_col_begin); + #ifdef TACHO_INSERT_DIAGONALS + // last row of previous supernode + ordinal_type row_id = 0; + if (p > _pbeg) { + const ordinal_type prev_sid = _level_sids(p-1); + const auto &prev_s = _info.supernodes(prev_sid); + row_id = prev_s.row_begin + prev_s.m; + } + // insert diagonals for the missing rows between previous and this block + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, offm-row_id), + [&](const int& i) { + int nnz = _rowptr(row_id+i); + _colind(nnz) = row_id+i; + _nzvals(nnz) = one; + _rowptr(row_id+i)++; + }); + #endif + if (p < _pend) { + if (s.m > 0) { + // extract this supernode + value_type *aptr = s.u_buf; + UnmanagedViewType AT(aptr, s.m, s.n); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, s.m), + [&](const int& i) { + // diagonal block + ordinal_type j; + for (ordinal_type j = i; j < s.m; j++) { + if (AT(i,j) != zero) { + int nnz = _rowptr(i+offm); + _colind(nnz) = j+offm; + _nzvals(nnz) = AT(i,j); + _rowptr(i+offm) ++; + } + } + // off-diagonal blocksa + j = s.m; + for (ordinal_type id = s.sid_col_begin + 1; id < s.sid_col_end - 1; id++) { + for (ordinal_type k = _info.sid_block_colidx(id).second; k < _info.sid_block_colidx(id + 1).second; k++) { + if (AT(i,j) != zero) { + int nnz = _rowptr(i+offm); + _colind(nnz) = _info.gid_colidx(k+offn); + _nzvals(nnz) = AT(i,j); + _rowptr(i+offm) ++; + } + j++; + } + } + }); + } + } + } + } + + // --------------------------------------- + // Functors to convert to CRS format + // from col-major + template + KOKKOS_INLINE_FUNCTION void operator()(const ExtractPtrColTag &, const MemberType &member) const { + const ordinal_type id = member.league_rank(); + const ordinal_type p = _pbeg + id; + + const value_type zero(0); + const ordinal_type sid = (p == _pend ? 0 : _level_sids(p)); + const ordinal_type mode = (p == _pend ? 0 : _compute_mode(sid)); + if (mode == 0) { + // extract this supernode + const auto &s = _info.supernodes(sid); + const ordinal_type offm = (p == _pend ? _m : s.row_begin); + const ordinal_type offn = (p == _pend ? 0 : s.gid_col_begin); + #ifdef TACHO_INSERT_DIAGONALS + // last row of previous supernode + ordinal_type row_id = 0; + if (p > _pbeg) { + const ordinal_type prev_sid = _level_sids(p-1); + const auto &prev_s = _info.supernodes(prev_sid); + row_id = prev_s.row_begin + prev_s.m; + } + // add diagonal entry + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, offm-row_id), + [&](const int& i) { Kokkos::atomic_add(&(_rowptr(row_id+i+1)), 1); }); + #endif + if (p < _pend) { + // extract this supernode (AL is stored by col) + if (s.m > 0) { + value_type *aptr = s.l_buf; + UnmanagedViewType AL(aptr, s.n, s.m); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, s.m), + [&](const int& j) { + // first extract diagnal block (each thread extract row in parallel) + for (ordinal_type i = 0; i < s.m; i++) { + if (AL(i,j) != zero) { + Kokkos::atomic_add(&(_rowptr(1+i+offm)), 1); + } + } + // off-diagonals (each thread extract col, needing atomic-add) + ordinal_type i = s.m; + for (ordinal_type id = s.sid_col_begin + 1; id < s.sid_col_end - 1; id++) { + for (ordinal_type k = _info.sid_block_colidx(id).second; k < _info.sid_block_colidx(id + 1).second; k++) { + if (AL(i, j) != zero) { + ordinal_type gid_i = _info.gid_colidx(k+offn); + Kokkos::atomic_add(&(_rowptr(1+gid_i)), 1); + } + i++; + } + } + }); + } + } + } + } + + template + KOKKOS_INLINE_FUNCTION void operator()(const ExtractValColTag &, const MemberType &member) const { + const ordinal_type id = member.league_rank(); + const ordinal_type p = _pbeg + id; + + const value_type zero(0); + const value_type one (1); + const ordinal_type sid = (p == _pend ? 0 : _level_sids(p)); + const ordinal_type mode = (p == _pend ? 0 : _compute_mode(sid)); + if (mode == 0) { + // extract this supernode + const auto &s = _info.supernodes(sid); + const ordinal_type offm = (p == _pend ? _m : s.row_begin); + const ordinal_type offn = (p == _pend ? 0 : s.gid_col_begin); + #ifdef TACHO_INSERT_DIAGONALS + // last row of previous supernode + ordinal_type row_id = 0; + if (p > _pbeg) { + const ordinal_type prev_sid = _level_sids(p-1); + const auto &prev_s = _info.supernodes(prev_sid); + row_id = prev_s.row_begin + prev_s.m; + } + // add diagonal entry + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, offm-row_id), + [&](const int& i) { + ordinal_type nnz = Kokkos::atomic_fetch_add(&(_rowptr(row_id+i)), 1); + _colind(nnz) = row_id+i; + _nzvals(nnz) = one; + }); + #endif + if (p < _pend) { + // extract this supernode + // stored by col + if (s.m > 0) { + bool no_perm = s.do_not_apply_pivots; + ConstUnmanagedViewType perm(_piv.data() + 4 * offm + 2 * s.m, s.m); + + value_type *aptr = s.l_buf; + UnmanagedViewType AL(aptr, s.n, s.m); + Kokkos::parallel_for(Kokkos::TeamThreadRange(member, s.m), + [&](const int& j) { + ordinal_type gid_j = (no_perm ? j+offm : perm(j)+offm); + // diagnal block + for (ordinal_type i = 0; i < s.m; i++) { + if (AL(i,j) != zero) { + ordinal_type nnz = Kokkos::atomic_fetch_add(&(_rowptr(offm+i)), 1); + _colind(nnz) = gid_j; + _nzvals(nnz) = AL(i,j); + } + } + // off-diagonals (each thread extract col, needing atomic-add) + ordinal_type i = s.m; + for (ordinal_type id = s.sid_col_begin + 1; id < s.sid_col_end - 1; id++) { + for (ordinal_type k = _info.sid_block_colidx(id).second; k < _info.sid_block_colidx(id + 1).second; k++) { + if (AL(i, j) != zero) { + ordinal_type gid_i = _info.gid_colidx(k+offn); + ordinal_type nnz = Kokkos::atomic_fetch_add(&(_rowptr(gid_i)), 1); + _colind(nnz) = gid_j; + _nzvals(nnz) = AL(i,j); + } + i++; + } + } + }); + } + } + } + } + + + // --------------------------------------- + // Functors to transpose + KOKKOS_INLINE_FUNCTION void operator()(const TransPtrTag &, const int i) const { + // count offset rowptrT + for (ordinal_type k = _rowptr(i); k < _rowptr(i+1); k++) { + Kokkos::atomic_add(&(_rowptrT(_colind(k)+1)), 1); + } + } + + KOKKOS_INLINE_FUNCTION void operator()(const TransMatTag &, const int i) const { + // count offset rowptrT + for (ordinal_type k = _rowptr(i); k < _rowptr(i+1); k++) { + int nnz = Kokkos::atomic_fetch_add(&(_rowptrT(_colind(k))), 1); + _colindT(nnz) = i; + _nzvalsT(nnz) = _nzvals(k); + } + } +}; +} // namespace Tacho + +#endif diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeChol.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeChol.hpp index 8dd8ca43112c..f97103664def 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeChol.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeChol.hpp @@ -42,10 +42,15 @@ template struct TeamFunctor_FactorizeChol { typedef typename supernode_info_type::value_type_array value_type_array; typedef typename supernode_info_type::value_type_matrix value_type_matrix; + typedef typename supernode_info_type::rowptr_view rowptr_view; + typedef typename supernode_info_type::colind_view colind_view; + typedef typename supernode_info_type::nzvals_view nzvals_view; + private: supernode_info_type _info; ordinal_type_array _compute_mode, _level_sids; ordinal_type _pbeg, _pend; + ordinal_type _m; size_type_array _buf_ptr; value_type_array _buf; @@ -61,6 +66,10 @@ template struct TeamFunctor_FactorizeChol { const ordinal_type_array &level_sids, const value_type_array buf, int *rval) : _info(info), _compute_mode(compute_mode), _level_sids(level_sids), _buf(buf), _rval(rval) {} + inline void setGlobalSize(const ordinal_type m) { + _m = m; + } + inline void setRange(const ordinal_type pbeg, const ordinal_type pend) { _pbeg = pbeg; _pend = pend; @@ -339,6 +348,8 @@ template struct TeamFunctor_FactorizeChol { struct UpdateTag {}; struct DummyTag {}; + // --------------------------------------- + // Functors to factorize template KOKKOS_INLINE_FUNCTION void operator()(const FactorizeTag &, const MemberType &member) const { const ordinal_type lid = member.league_rank(); @@ -358,7 +369,7 @@ template struct TeamFunctor_FactorizeChol { UnmanagedViewType ABR(bufptr, n_m, n_m); UnmanagedViewType T(bufptr, m, m); factorize_var1(member, s, T, ABR); - } else if (factorize_tag_type::variant == 2) { + } else if (factorize_tag_type::variant == 2 || factorize_tag_type::variant == 3) { UnmanagedViewType ABR(bufptr, n_m, n_m); UnmanagedViewType T(bufptr + ABR.span(), m, m); factorize_var2(member, s, T, ABR); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLDL.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLDL.hpp index e93f0e53fdfb..b84f0ade813f 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLDL.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLDL.hpp @@ -435,7 +435,7 @@ template struct TeamFunctor_FactorizeLDL { const ordinal_type used_span = max(ABR.span(), T.span()); UnmanagedViewType W(bufptr + used_span, int(bufend - bufbeg - used_span)); factorize_var1(member, s, P, D, W, T, ABR); - } else if (factorize_tag_type::variant == 2) { + } else if (factorize_tag_type::variant == 2 || factorize_tag_type::variant == 3) { const ordinal_type bufbeg = _buf_ptr(lid), bufend = _buf_ptr(lid + 1); auto bufptr = _buf.data() + bufbeg; UnmanagedViewType ABR(bufptr, n_m, n_m); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp index 5de3ea88e7fe..8c3c13ee3515 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_FactorizeLU.hpp @@ -429,7 +429,7 @@ template struct TeamFunctor_FactorizeLU { UnmanagedViewType ABR(bufptr, n_m, n_m); UnmanagedViewType T(bufptr, m, m); factorize_var1(member, s, P, T, ABR); - } else if (factorize_tag_type::variant == 2) { + } else if (factorize_tag_type::variant == 2 || factorize_tag_type::variant == 3) { UnmanagedViewType ABR(bufptr, n_m, n_m); UnmanagedViewType T(bufptr + ABR.span(), m, m); factorize_var2(member, s, P, T, ABR); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerChol.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerChol.hpp index 40fcd030b87d..3a17c480f682 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerChol.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerChol.hpp @@ -304,7 +304,7 @@ template struct TeamFunctor_SolveLowerChol { solve_var0(member, s, bptr); else if (solve_tag_type::variant == 1) solve_var1(member, s, bptr); - else if (solve_tag_type::variant == 2) + else if (solve_tag_type::variant == 2 || solve_tag_type::variant == 3) solve_var2(member, s, bptr); else Kokkos::printf("Error: TeamFunctorSolveLowerChol::SolveTag, algorithm variant is not supported\n"); @@ -328,7 +328,7 @@ template struct TeamFunctor_SolveLowerChol { update_var0(member, s, bptr); else if (update_tag_type::variant == 1) update_var1(member, s, bptr); - else if (update_tag_type::variant == 2) + else if (update_tag_type::variant == 2 || update_tag_type::variant == 3) update_var2(member, s, bptr); else Kokkos::printf("Error: TeamFunctorSolveLowerChol::UpdateTag, algorithm variant is not supported\n"); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerLDL.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerLDL.hpp index ec656cbbedc0..50eb2cdc2b3e 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerLDL.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerLDL.hpp @@ -316,7 +316,7 @@ template struct TeamFunctor_SolveLowerLDL { solve_var0(member, s, bptr); } else if (solve_tag_type::variant == 1) { solve_var1(member, s, bptr); - } else if (solve_tag_type::variant == 2) { + } else if (solve_tag_type::variant == 2 || solve_tag_type::variant == 3) { solve_var2(member, s, bptr); } } @@ -340,7 +340,7 @@ template struct TeamFunctor_SolveLowerLDL { update_var0(member, s, bptr); } else if (update_tag_type::variant == 1) { update_var1(member, s, bptr); - } else if (update_tag_type::variant == 2) { + } else if (update_tag_type::variant == 2 || update_tag_type::variant == 3) { update_var2(member, s, bptr); } } else { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerLU.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerLU.hpp index 08e0919ff6fc..99fde7bd5d79 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerLU.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveLowerLU.hpp @@ -317,7 +317,7 @@ template struct TeamFunctor_SolveLowerLU { solve_var0(member, s, bptr); } else if (solve_tag_type::variant == 1) { solve_var1(member, s, bptr); - } else if (solve_tag_type::variant == 2) { + } else if (solve_tag_type::variant == 2 || solve_tag_type::variant == 3) { solve_var2(member, s, bptr); } } @@ -341,7 +341,7 @@ template struct TeamFunctor_SolveLowerLU { update_var0(member, s, bptr); } else if (update_tag_type::variant == 1) { update_var1(member, s, bptr); - } else if (update_tag_type::variant == 2) { + } else if (update_tag_type::variant == 2 || update_tag_type::variant == 3) { update_var2(member, s, bptr); } } else { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperChol.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperChol.hpp index 3de8848a166d..8c879dd7f200 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperChol.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperChol.hpp @@ -262,7 +262,7 @@ template struct TeamFunctor_SolveUpperChol { solve_var0(member, s, bptr); else if (solve_tag_type::variant == 1) solve_var1(member, s, bptr); - else if (solve_tag_type::variant == 2) + else if (solve_tag_type::variant == 2 || solve_tag_type::variant == 3) solve_var2(member, s, bptr); else Kokkos::printf("Error: TeamFunctorSolveUpperChol::SolveTag, algorithm variant is not supported\n"); @@ -285,7 +285,7 @@ template struct TeamFunctor_SolveUpperChol { update_var0(member, s, bptr); else if (update_tag_type::variant == 1) update_var1(member, s, bptr); - else if (update_tag_type::variant == 2) + else if (update_tag_type::variant == 2 || update_tag_type::variant == 3) update_var2(member, s, bptr); else Kokkos::printf("Error: TeamFunctorUpdateUpperChol::SolveTag, algorithm variant is not supported\n"); diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperLDL.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperLDL.hpp index d7ab533a842e..ea85a97053dc 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperLDL.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperLDL.hpp @@ -291,7 +291,7 @@ template struct TeamFunctor_SolveUpperLDL { solve_var0(member, s, bptr); } else if (solve_tag_type::variant == 1) { solve_var1(member, s, bptr); - } else if (solve_tag_type::variant == 2) { + } else if (solve_tag_type::variant == 2 || solve_tag_type::variant == 3) { solve_var2(member, s, bptr); } } else if (mode == -1) { @@ -315,7 +315,7 @@ template struct TeamFunctor_SolveUpperLDL { update_var0(member, s, bptr); } else if (update_tag_type::variant == 1) { update_var1(member, s, bptr); - } else if (update_tag_type::variant == 2) { + } else if (update_tag_type::variant == 2 || update_tag_type::variant == 3) { update_var2(member, s, bptr); } } else { diff --git a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperLU.hpp b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperLU.hpp index 5e85c6ce50b9..55e276c2767b 100644 --- a/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperLU.hpp +++ b/packages/shylu/shylu_node/tacho/src/impl/Tacho_TeamFunctor_SolveUpperLU.hpp @@ -253,7 +253,7 @@ template struct TeamFunctor_SolveUpperLU { solve_var0(member, s, bptr); } else if (solve_tag_type::variant == 1) { solve_var1(member, s, bptr); - } else if (solve_tag_type::variant == 2) { + } else if (solve_tag_type::variant == 2 || solve_tag_type::variant == 3) { solve_var2(member, s, bptr); } } else if (mode == -1) { @@ -276,7 +276,7 @@ template struct TeamFunctor_SolveUpperLU { update_var0(member, s, bptr); } else if (update_tag_type::variant == 1) { update_var1(member, s, bptr); - } else if (update_tag_type::variant == 2) { + } else if (update_tag_type::variant == 2 || update_tag_type::variant == 3) { update_var2(member, s, bptr); } } else { diff --git a/packages/stk/CHANGELOG.md b/packages/stk/CHANGELOG.md index 644d609cb2b1..6e418aed6c67 100644 --- a/packages/stk/CHANGELOG.md +++ b/packages/stk/CHANGELOG.md @@ -1,5 +1,10 @@ # CHANGELOG +5.19.4 (STK_VERSION 5190401) 5/29/2024 + stk_search: fixed bug in morton: (accessing device view on host) + stk_search: fixed implementations to respect execution-space + stk_mesh: change default bucket capacity from 512 to 16 (now grows as needed) + 5.19.4 (STK_VERSION 5190400) 5/18/2024 Added field_fill with Selector in stk_mesh/base/NgpFieldBLAS.hpp Added field_copy with and without Selector in stk_mesh/base/NgpFieldBLAS.hpp diff --git a/packages/stk/stk_balance/Jamfile b/packages/stk/stk_balance/Jamfile index 9b02239b3d3e..545e04e64fcd 100644 --- a/packages/stk/stk_balance/Jamfile +++ b/packages/stk/stk_balance/Jamfile @@ -75,10 +75,6 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets - : - ; # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_coupling/Jamfile b/packages/stk/stk_coupling/Jamfile index 5cffbebd269b..2fa157eb7681 100644 --- a/packages/stk/stk_coupling/Jamfile +++ b/packages/stk/stk_coupling/Jamfile @@ -73,8 +73,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_doc_tests/stk_search/howToNgpSearchElemNodeNeighbors.cpp b/packages/stk/stk_doc_tests/stk_search/howToNgpSearchElemNodeNeighbors.cpp index 86eb6fc5ac2e..c3039761e78f 100644 --- a/packages/stk/stk_doc_tests/stk_search/howToNgpSearchElemNodeNeighbors.cpp +++ b/packages/stk/stk_doc_tests/stk_search/howToNgpSearchElemNodeNeighbors.cpp @@ -100,8 +100,8 @@ DomainViewType create_elem_spheres(const stk::mesh::BulkData& mesh, double radiu KOKKOS_LAMBDA(const unsigned& i) { stk::mesh::ConnectedNodes nodes = ngpMesh.get_nodes(stk::topology::ELEM_RANK, elemIndices(i)); stk::search::Point center(0,0,0); - for(unsigned i=0; i coords = ngpCoords(nodeIndex); center[0] += coords[0]; center[1] += coords[1]; diff --git a/packages/stk/stk_emend/Jamfile b/packages/stk/stk_emend/Jamfile index fb9511103633..14ab0156aefe 100644 --- a/packages/stk/stk_emend/Jamfile +++ b/packages/stk/stk_emend/Jamfile @@ -89,9 +89,6 @@ alias install-targets $(installed-developer-files) ; -explicit install-serial-targets ; -alias install-serial-targets : ; - # # SECTION 3: End-user install # diff --git a/packages/stk/stk_expreval/Jamfile b/packages/stk/stk_expreval/Jamfile index 3985cd90beba..48d40cd7e8ce 100644 --- a/packages/stk/stk_expreval/Jamfile +++ b/packages/stk/stk_expreval/Jamfile @@ -76,8 +76,8 @@ local installed-developer-files = local xml-files = ; -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_integration_tests/Jamfile b/packages/stk/stk_integration_tests/Jamfile index 0db7289a3542..2965ffe2181a 100644 --- a/packages/stk/stk_integration_tests/Jamfile +++ b/packages/stk/stk_integration_tests/Jamfile @@ -71,8 +71,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_integration_tests/cmake_install_test/cuda_recipe b/packages/stk/stk_integration_tests/cmake_install_test/cuda_recipe index bbc35c40c1b8..20e5460a0b3a 100755 --- a/packages/stk/stk_integration_tests/cmake_install_test/cuda_recipe +++ b/packages/stk/stk_integration_tests/cmake_install_test/cuda_recipe @@ -1,7 +1,7 @@ #! /bin/bash -#this may be user-specific or system-specific: -sierra_code=/scratch/$USER/code_v +#this is user-specific... +sierra_code=/fgs/$USER/code #depending on the system, you may need sierra-devel or sierra-devel/nvidia module purge @@ -12,9 +12,9 @@ module load sierra-devel/nvidia export mpicxx_path=`type -p mpicxx` export MPI_ROOT=$(dirname ${mpicxx_path}) export LLNL_USE_OMPI_VARS=y -export OMPI_CXX=/scratch/william/stk-cmake-testing/Trilinos/packages/kokkos/bin/nvcc_wrapper +export OMPI_CXX=/fgs/$USER/stk-cmake-testing/Trilinos/packages/kokkos/bin/nvcc_wrapper cd ${sierra_code} -CUDA=ON CLEAR_CACHE=OFF ./stk/stk_integration_tests/cmake_install_test/build_stk_using_cmake +CUDA=ON CLEAR_CACHE=ON ./stk/stk_integration_tests/cmake_install_test/build_stk_using_cmake diff --git a/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk b/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk index 2816adb3ce43..ddc0f06b7a46 100755 --- a/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk +++ b/packages/stk/stk_integration_tests/cmake_install_test/run_cmake_stk @@ -73,8 +73,8 @@ cmake \ -DTrilinos_ENABLE_TrilinosCouplings:BOOL=${not_cuda} \ -DTPL_ENABLE_CUDA:BOOL=${cuda_on_or_off} \ -DKokkos_ENABLE_CUDA:BOOL=${cuda_on_or_off} \ --DKokkos_ENABLE_CUDA_UVM:BOOL=${cuda_on_or_off} \ --DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE:BOOL=OFF \ +-DKokkos_ENABLE_CUDA_UVM:BOOL=OFF \ +-DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE:BOOL=ON \ -DKokkos_ARCH_VOLTA70=${cuda_on_or_off} \ -DTrilinos_ENABLE_KokkosKernels:BOOL=ON \ -DTrilinos_ENABLE_Zoltan:BOOL=ON \ diff --git a/packages/stk/stk_io/Jamfile b/packages/stk/stk_io/Jamfile index 67e5e5a5c670..81a8e935fcfd 100644 --- a/packages/stk/stk_io/Jamfile +++ b/packages/stk/stk_io/Jamfile @@ -80,8 +80,8 @@ local installed-developer-files = local xml-files = ; -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_math/Jamfile b/packages/stk/stk_math/Jamfile index 8bc826c40059..e402fb2fa1d4 100644 --- a/packages/stk/stk_math/Jamfile +++ b/packages/stk/stk_math/Jamfile @@ -72,8 +72,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_math/stk_math/.StkVector.hpp.swo b/packages/stk/stk_math/stk_math/.StkVector.hpp.swo deleted file mode 100644 index 01f61b326298..000000000000 Binary files a/packages/stk/stk_math/stk_math/.StkVector.hpp.swo and /dev/null differ diff --git a/packages/stk/stk_mesh/Jamfile b/packages/stk/stk_mesh/Jamfile index 8f0b8e743d6c..13f9fe9dac2b 100644 --- a/packages/stk/stk_mesh/Jamfile +++ b/packages/stk/stk_mesh/Jamfile @@ -82,8 +82,8 @@ local installed-developer-files = local xml-files = ; -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_mesh/stk_mesh/base/CMakeLists.txt b/packages/stk/stk_mesh/stk_mesh/base/CMakeLists.txt index b3892eb6cb0d..f97cc5eb019c 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/CMakeLists.txt +++ b/packages/stk/stk_mesh/stk_mesh/base/CMakeLists.txt @@ -54,14 +54,21 @@ if(HAVE_STK_Trilinos) ) else() find_package(Shards REQUIRED) - find_package(BLAS REQUIRED) add_library(stk_mesh_base ${SOURCES} ${SOURCES_IMPL} ${SOURCES_ELEMGRAPH}) target_link_libraries(stk_mesh_base ${Shards_LIBRARIES}) - target_link_libraries(stk_mesh_base BLAS::BLAS) + target_link_libraries(stk_mesh_base sierra_blas_lapack) target_link_libraries(stk_mesh_base stk_topology) target_link_libraries(stk_mesh_base stk_util_diag) target_link_libraries(stk_mesh_base stk_util_env) target_link_libraries(stk_mesh_base stk_util_parallel) + + if(USE_SIERRA_BLAS_LAPACK) + target_link_libraries(stk_mesh_base sierra_blas_lapack) + else() + find_package(BLAS REQUIRED) + target_link_libraries(stk_mesh_base BLAS::BLAS) + endif() + endif() target_include_directories(stk_mesh_base PUBLIC diff --git a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.cpp b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.cpp index d8b566a5e1f8..571f7f8a1850 100644 --- a/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.cpp +++ b/packages/stk/stk_mesh/stk_mesh/base/DeviceMesh.cpp @@ -150,7 +150,6 @@ void DeviceMesh::update_mesh() require_ngp_mesh_rank_limit(bulk->mesh_meta_data()); Kokkos::Profiling::pushRegion("DeviceMesh::update_mesh"); - const bool anyChanges = fill_buckets(*bulk); if (anyChanges) { @@ -294,6 +293,7 @@ void DeviceMesh::fill_sparse_connectivities(const stk::mesh::BulkData& bulk_in) unsigned totalNumConnectedEntities[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS] = {{0}, {0}, {0}, {0}, {0}}; unsigned totalNumPermutations[stk::topology::NUM_RANKS][stk::topology::NUM_RANKS] = {{0}, {0}, {0}, {0}, {0}}; + for(stk::mesh::EntityRank rank=stk::topology::NODE_RANK; rank 0) { + + const stk::mesh::Entity* connectedEntities = stkBucket.begin(iEntity, connectedRank); + const stk::mesh::ConnectivityOrdinal* connectedOrdinals = stkBucket.begin_ordinals(iEntity, connectedRank); + const stk::mesh::Permutation* permutations = hasPermutation ? stkBucket.begin_permutations(iEntity, connectedRank) : nullptr; + for(unsigned i=0; isync_to_host(); + stkField->modify_on_host(); if (stkField->has_ngp_field()) { impl::get_ngp_field(*stkField)->debug_modification_begin(); } diff --git a/packages/stk/stk_middle_mesh/Jamfile b/packages/stk/stk_middle_mesh/Jamfile index 48b752a7ad12..32b1ca559c81 100644 --- a/packages/stk/stk_middle_mesh/Jamfile +++ b/packages/stk/stk_middle_mesh/Jamfile @@ -78,8 +78,8 @@ local installed-developer-files = local xml-files = ; -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_middle_mesh/stk_middle_mesh/CDTInterface.cpp b/packages/stk/stk_middle_mesh/stk_middle_mesh/CDTInterface.cpp index a35d032d51d1..a0f4e5b4f435 100644 --- a/packages/stk/stk_middle_mesh/stk_middle_mesh/CDTInterface.cpp +++ b/packages/stk/stk_middle_mesh/stk_middle_mesh/CDTInterface.cpp @@ -10,7 +10,7 @@ void CDTInterface::triangulate(const utils::impl::Projection& proj) { assert(m_mesh->get_elements().size() == 0); - CDT::Triangulation tri(CDT::FindingClosestPoint::ClosestRandom); + CDT::Triangulation tri(CDT::VertexInsertionOrder::Enum::AsProvided); // make vertices tri.insertVertices( diff --git a/packages/stk/stk_middle_mesh_util/Jamfile b/packages/stk/stk_middle_mesh_util/Jamfile index 2ad2e9a8da28..d97edcd78b3c 100644 --- a/packages/stk/stk_middle_mesh_util/Jamfile +++ b/packages/stk/stk_middle_mesh_util/Jamfile @@ -67,8 +67,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_ngp_test/Jamfile b/packages/stk/stk_ngp_test/Jamfile index b96afd8edf4a..213728060c00 100644 --- a/packages/stk/stk_ngp_test/Jamfile +++ b/packages/stk/stk_ngp_test/Jamfile @@ -79,8 +79,8 @@ local installed-developer-files = local xml-files = ; -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_performance_tests/Jamfile b/packages/stk/stk_performance_tests/Jamfile index eb31103ae66d..8681f54cb3b6 100644 --- a/packages/stk/stk_performance_tests/Jamfile +++ b/packages/stk/stk_performance_tests/Jamfile @@ -67,8 +67,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_performance_tests/stk_io/perfMeshRead.cpp b/packages/stk/stk_performance_tests/stk_io/perfMeshRead.cpp index a48a7e611fe5..92052255fe0a 100644 --- a/packages/stk/stk_performance_tests/stk_io/perfMeshRead.cpp +++ b/packages/stk/stk_performance_tests/stk_io/perfMeshRead.cpp @@ -53,8 +53,8 @@ TEST(StkIo, meshRead_hex_noAura) if (stk::parallel_machine_size(MPI_COMM_WORLD) > 16) { GTEST_SKIP(); } const unsigned NUM_RUNS = 5; - const unsigned NUM_ITERS = 5; - int ELEMS_PER_DIM = stk::unit_test_util::simple_fields::get_command_line_option("--ne", 50); + const unsigned NUM_ITERS = 10; + int ELEMS_PER_DIM = stk::unit_test_util::simple_fields::get_command_line_option("--ne", 80); std::string elems = std::to_string(ELEMS_PER_DIM); std::string meshSpec = "generated:"+elems+"x"+elems+"x"+elems; @@ -89,8 +89,8 @@ TEST(StkIo, meshRead_hex_shells_sidesets_aura) if (stk::parallel_machine_size(MPI_COMM_WORLD) > 16) { GTEST_SKIP(); } const unsigned NUM_RUNS = 5; - const unsigned NUM_ITERS = 5; - int ELEMS_PER_DIM = stk::unit_test_util::simple_fields::get_command_line_option("--ne", 50); + const unsigned NUM_ITERS = 10; + int ELEMS_PER_DIM = stk::unit_test_util::simple_fields::get_command_line_option("--ne", 80); std::string elems = std::to_string(ELEMS_PER_DIM); std::string meshSpec = "generated:"+elems+"x"+elems+"x"+elems+"|shell:xyzXYZ|sideset:xyzXYZ"; diff --git a/packages/stk/stk_performance_tests/stk_search/VolumeToSurface.cpp b/packages/stk/stk_performance_tests/stk_search/VolumeToSurface.cpp index 4c6271aece7b..16c820018756 100644 --- a/packages/stk/stk_performance_tests/stk_search/VolumeToSurface.cpp +++ b/packages/stk/stk_performance_tests/stk_search/VolumeToSurface.cpp @@ -429,10 +429,11 @@ void distributed_arborx_coarse_search(Kokkos::View elem Kokkos::View sideBoxes, MPI_Comm comm) { - + std::cerr << "start of distributed raw ArborX test" << std::endl; ExecSpace execSpace; ArborX::DistributedTree tree(comm, execSpace, elemBoxes); + std::cerr << "after DistributedTree construction" << std::endl; const int numQueries = sideBoxes.extent(0); Kokkos::View *, MemSpace> queries(Kokkos::ViewAllocateWithoutInitializing("queries"), numQueries); @@ -440,22 +441,25 @@ void distributed_arborx_coarse_search(Kokkos::View elem Kokkos::parallel_for("setup_queries", Kokkos::RangePolicy(0, numQueries), KOKKOS_LAMBDA(int i) { queries(i) = ArborX::intersects(sideBoxes(i)); }); Kokkos::fence(); + std::cerr << "after queries population" << std::endl; Kokkos::View values("indicesAndRanks", 0); Kokkos::View offsets("offsets", 0); - + + std::cerr << "before tree query" << std::endl; tree.query(execSpace, queries, values, offsets); + std::cerr << "after tree query" << std::endl; } void run_search_test_distributed_arborx(const std::string& meshFileName) { const unsigned NUM_RUNS = 5; - const unsigned NUM_ITERS = 100; + const unsigned NUM_ITERS = 5; stk::unit_test_util::BatchTimer batchTimer(MPI_COMM_WORLD); batchTimer.initialize_batch_timer(); for (unsigned j = 0; j < NUM_RUNS; j++) { - + std::cerr << "batch timer iteration " << j << std::endl; stk::mesh::MeshBuilder builder(MPI_COMM_WORLD); std::shared_ptr bulkPtr = builder.create(); @@ -466,6 +470,7 @@ void run_search_test_distributed_arborx(const std::string& meshFileName) batchTimer.start_batch_timer(); for (unsigned i = 0; i < NUM_ITERS; ++i) { + std::cerr << "inner iteration " << i << std::endl; distributed_arborx_coarse_search(elemBoxes, sideBoxes, MPI_COMM_WORLD); } batchTimer.stop_batch_timer(); diff --git a/packages/stk/stk_search/Jamfile b/packages/stk/stk_search/Jamfile index f9de4d0e2a40..88d9adb1dbe3 100644 --- a/packages/stk/stk_search/Jamfile +++ b/packages/stk/stk_search/Jamfile @@ -69,8 +69,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_search/stk_search/CoarseSearch.hpp b/packages/stk/stk_search/stk_search/CoarseSearch.hpp index cd5c83fb104e..1b3dba2b55b8 100644 --- a/packages/stk/stk_search/stk_search/CoarseSearch.hpp +++ b/packages/stk/stk_search/stk_search/CoarseSearch.hpp @@ -120,14 +120,14 @@ void coarse_search(Kokkos::View SearchMethod method, stk::ParallelMachine comm, Kokkos::View*, ExecutionSpace>& intersections, - ExecutionSpace const& execSpace = Kokkos::DefaultExecutionSpace{}, + ExecutionSpace const& execSpace = ExecutionSpace{}, bool enforceSearchResultSymmetry = true, bool autoSwapDomainAndRange = true) { switch (method) { case ARBORX: { #ifdef STK_HAS_ARBORX - coarse_search_arborx(domain, range, comm, intersections, enforceSearchResultSymmetry); + coarse_search_arborx(domain, range, comm, intersections, execSpace, enforceSearchResultSymmetry); #else STK_ThrowErrorMsg("STK(stk_search) was not configured with ARBORX enabled. Please use KDTREE or MORTON_LBVH."); #endif @@ -138,7 +138,7 @@ void coarse_search(Kokkos::View break; } case MORTON_LBVH: { - coarse_search_morton_lbvh(domain, range, comm, intersections, enforceSearchResultSymmetry); + coarse_search_morton_lbvh(domain, range, comm, intersections, execSpace, enforceSearchResultSymmetry); break; } default: { diff --git a/packages/stk/stk_search/stk_search/CommonSearchUtil.hpp b/packages/stk/stk_search/stk_search/CommonSearchUtil.hpp index 8089cabda876..eee1434aaece 100644 --- a/packages/stk/stk_search/stk_search/CommonSearchUtil.hpp +++ b/packages/stk/stk_search/stk_search/CommonSearchUtil.hpp @@ -43,6 +43,7 @@ #include "stk_util/parallel/CommSparse.hpp" #include "stk_util/parallel/ParallelComm.hpp" #include "stk_util/util/SortAndUnique.hpp" +#include "stk_search/BoxIdent.hpp" #include "stk_search/kdtree/KDTree_BoundingBox.hpp" #include "stk_search/kdtree/KDTree.hpp" @@ -303,6 +304,20 @@ void communicate_views(stk::ParallelMachine arg_comm, } } +namespace impl { +template +bool constexpr is_stk_box = + std::is_same_v> || std::is_base_of_v, T>; + +template +bool constexpr is_stk_sphere = + std::is_same_v> || std::is_base_of_v, T>; + +template +bool constexpr is_stk_point = + std::is_same_v> || std::is_base_of_v, T>; +} + } // end namespace stk::search #endif diff --git a/packages/stk/stk_search/stk_search/LocalCoarseSearch.hpp b/packages/stk/stk_search/stk_search/LocalCoarseSearch.hpp index b2c88ab53ea6..8edd8d2c75f5 100644 --- a/packages/stk/stk_search/stk_search/LocalCoarseSearch.hpp +++ b/packages/stk/stk_search/stk_search/LocalCoarseSearch.hpp @@ -86,12 +86,12 @@ void local_coarse_search( Kokkos::View*, ExecutionSpace> const & range, SearchMethod method, Kokkos::View*, ExecutionSpace> & intersections, - ExecutionSpace const& execSpace = Kokkos::DefaultExecutionSpace{}) + ExecutionSpace const& execSpace = ExecutionSpace{}) { switch (method) { case ARBORX: { #ifdef STK_HAS_ARBORX - local_coarse_search_arborx(domain, range, intersections); + local_coarse_search_arborx(domain, range, intersections, execSpace); #else STK_ThrowErrorMsg("STK(stk_search) was not configured with ARBORX. Please use KDTREE or MORTON_LBVH."); #endif @@ -102,7 +102,7 @@ void local_coarse_search( break; } case MORTON_LBVH: { - local_coarse_search_morton_lbvh(domain, range, intersections); + local_coarse_search_morton_lbvh(domain, range, intersections, execSpace); break; } default: { diff --git a/packages/stk/stk_search/stk_search/arborx/AccessTraits.hpp b/packages/stk/stk_search/stk_search/arborx/AccessTraits.hpp index d62bafbc1906..08ccaf7b20bb 100644 --- a/packages/stk/stk_search/stk_search/arborx/AccessTraits.hpp +++ b/packages/stk/stk_search/stk_search/arborx/AccessTraits.hpp @@ -97,15 +97,17 @@ struct AccessTraits, ArborX::PrimitivesTag> static KOKKOS_INLINE_FUNCTION ArborXShape get(ViewWrapperForArborXTraits const& primitives, std::size_t i) { + StkShape stkBox; if constexpr (stk::search::impl::is_pair_v) { - return stk::search::impl::StkToArborX(primitives.view(i).first); + stkBox = primitives.view(i).first; } else { - return stk::search::impl::StkToArborX(primitives.view(i).box); + stkBox = primitives.view(i).box; } - } + return stk::search::impl::StkToArborX(stkBox); + } }; @@ -124,19 +126,22 @@ struct AccessTraits, ArborX::PredicatesTag> static KOKKOS_FUNCTION ArborXPredicateWithIndex get(ViewWrapperForArborXTraits const & predicates, std::size_t i) { + + ArborXShape arborXBox; + if constexpr (stk::search::impl::is_pair_v) { - ArborXShape arborXBox = stk::search::impl::StkToArborX(predicates.view(i).first); - return ArborXPredicateWithIndex(arborXBox, i); + arborXBox = stk::search::impl::StkToArborX(predicates.view(i).first); } else { - ArborXShape arborXBox = stk::search::impl::StkToArborX(predicates.view(i).box); - return ArborXPredicateWithIndex(arborXBox, i); + arborXBox = stk::search::impl::StkToArborX(predicates.view(i).box); } + + return ArborXPredicateWithIndex(arborXBox, i); } }; } -#endif \ No newline at end of file +#endif diff --git a/packages/stk/stk_search/stk_search/arborx/CoarseSearchArborX.hpp b/packages/stk/stk_search/stk_search/arborx/CoarseSearchArborX.hpp index 34e8fcbdbb86..8e5d2f537b04 100644 --- a/packages/stk/stk_search/stk_search/arborx/CoarseSearchArborX.hpp +++ b/packages/stk/stk_search/stk_search/arborx/CoarseSearchArborX.hpp @@ -200,7 +200,6 @@ inline void coarse_search_arborx(std::vector::ArborXType; - using ArborXRangeType = typename impl::StkToArborX::ArborXType; using DomainView = Kokkos::View*, Kokkos::HostSpace, Kokkos::MemoryTraits>; using RangeView = Kokkos::View*, Kokkos::HostSpace, Kokkos::MemoryTraits>; @@ -226,14 +225,12 @@ inline void coarse_search_arborx(std::vector*, ExecutionSpace> const& localRange, MPI_Comm comm, Kokkos::View*, ExecutionSpace>& searchResults, + ExecutionSpace const& execSpace = ExecutionSpace{}, bool enforceSearchResultSymmetry = true) { using HostSpace = Kokkos::DefaultHostExecutionSpace; - using ExecSpace = Kokkos::DefaultExecutionSpace; + using ExecSpace = ExecutionSpace; using MemSpace = typename ExecSpace::memory_space; using DomainValueType = typename DomainBoxType::value_type; using RangeValueType = typename RangeBoxType::value_type; using ArborXDomainType = typename impl::StkToArborX::ArborXType; - using ArborXRangeType = typename impl::StkToArborX::ArborXType; STK_ThrowRequireMsg((std::is_same_v), "The domain and range boxes must have the same floating-point precision"); - auto execSpace = ExecSpace{}; - Kokkos::View values( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Indices_And_Ranks"), 0); Kokkos::View*, MemSpace> queries( diff --git a/packages/stk/stk_search/stk_search/arborx/LocalCoarseSearchArborX.hpp b/packages/stk/stk_search/stk_search/arborx/LocalCoarseSearchArborX.hpp index a148eba7a75f..50e53c068a95 100644 --- a/packages/stk/stk_search/stk_search/arborx/LocalCoarseSearchArborX.hpp +++ b/packages/stk/stk_search/stk_search/arborx/LocalCoarseSearchArborX.hpp @@ -180,7 +180,8 @@ template *, ExecutionSpace>& localDomain, const Kokkos::View*, ExecutionSpace>& localRange, - Kokkos::View*, ExecutionSpace>& searchResults) + Kokkos::View*, ExecutionSpace>& searchResults, + ExecutionSpace const& execSpace = ExecutionSpace{}) { using ExecSpace = ExecutionSpace; using MemSpace = typename ExecSpace::memory_space; @@ -191,7 +192,6 @@ inline void local_coarse_search_arborx( "The domain and range boxes must have the same floating-point precision"); Kokkos::Profiling::pushRegion("STK call arborx"); - auto execSpace = ExecSpace{}; auto localRangeWrapped = impl::wrap_view_for_arborx(localRange); auto localDomainWrapped = impl::wrap_view_for_arborx(localDomain); diff --git a/packages/stk/stk_search/stk_search/arborx/StkToArborX.hpp b/packages/stk/stk_search/stk_search/arborx/StkToArborX.hpp index 2807527b03eb..a3b063215c50 100644 --- a/packages/stk/stk_search/stk_search/arborx/StkToArborX.hpp +++ b/packages/stk/stk_search/stk_search/arborx/StkToArborX.hpp @@ -52,18 +52,6 @@ namespace stk::search { namespace impl { -template -bool constexpr is_stk_box = - std::is_same_v> || std::is_base_of_v, T>; - -template -bool constexpr is_stk_sphere = - std::is_same_v> || std::is_base_of_v, T>; - -template -bool constexpr is_stk_point = - std::is_same_v> || std::is_base_of_v, T>; - template struct StkToArborX { }; diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp index a78d9778a8d8..82d9bd3d4a1c 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/CoarseSearchMortonLBVH.hpp @@ -61,6 +61,8 @@ inline void coarse_search_morton_lbvh(std::vector> & searchResults, bool enforceSearchResultSymmetry = true) { + using HostSpace = Kokkos::DefaultHostExecutionSpace; + STK_ThrowRequireMsg((std::is_same_v), "The domain and range boxes must have the same floating-point precision"); @@ -68,12 +70,12 @@ inline void coarse_search_morton_lbvh(std::vector(localDomain, localRange, comm, HostSpace{}); Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("Fill domain and range trees"); - stk::search::MortonAabbTree domainTree("Domain Tree", localDomain.size()); - stk::search::MortonAabbTree rangeTree("Range Tree", extendedRangeBoxes.size()); + stk::search::MortonAabbTree domainTree("Domain Tree", localDomain.size()); + stk::search::MortonAabbTree rangeTree("Range Tree", extendedRangeBoxes.size()); stk::search::export_from_box_ident_proc_vec_to_morton_tree(localDomain, domainTree); stk::search::export_from_box_vec_to_morton_tree(extendedRangeBoxes, rangeTree); @@ -82,8 +84,8 @@ inline void coarse_search_morton_lbvh(std::vector collisionList("Collision List"); - stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList); + stk::search::CollisionList collisionList("Collision List"); + stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList, HostSpace{}); collisionList.sync_from_device(); Kokkos::Profiling::popRegion(); @@ -137,12 +139,11 @@ inline void coarse_search_morton_lbvh( Kokkos::View*, ExecutionSpace> const& localRange, MPI_Comm comm, Kokkos::View*, ExecutionSpace>& searchResults, + ExecutionSpace const& execSpace = ExecutionSpace{}, bool enforceSearchResultSymmetry = true) { using HostSpace = Kokkos::DefaultHostExecutionSpace; - auto execSpace = ExecutionSpace{}; - auto hostSpace = HostSpace{}; STK_ThrowRequireMsg((std::is_same_v), "The domain and range boxes must have the same floating-point precision"); @@ -150,8 +151,8 @@ inline void coarse_search_morton_lbvh( using ValueType = typename DomainBoxType::value_type; Kokkos::Profiling::pushRegion("Move device results to host and convert into compatible data type."); - auto localDomainHost = Kokkos::create_mirror_view_and_copy(hostSpace, localDomain); - auto localRangeHost = Kokkos::create_mirror_view_and_copy(hostSpace, localRange); + auto localDomainHost = Kokkos::create_mirror_view_and_copy(HostSpace{}, localDomain); + auto localRangeHost = Kokkos::create_mirror_view_and_copy(HostSpace{}, localRange); std::vector> localDomainVec(localDomainHost.size()); std::vector> localRangeVec(localRangeHost.size()); @@ -171,23 +172,23 @@ inline void coarse_search_morton_lbvh( Kokkos::Profiling::pushRegion("Parallel consistency: extend range box list"); const auto [extendedRangeBoxes, remoteRangeIdentProcs] = - morton_extend_local_range_with_remote_boxes_that_might_intersect(localDomainVec, localRangeVec, comm); + morton_extend_local_range_with_remote_boxes_that_might_intersect(localDomainVec, localRangeVec, comm, execSpace); Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("Fill domain and range trees"); - stk::search::MortonAabbTree domainTree("Domain Tree", localDomainHost.size()); - stk::search::MortonAabbTree rangeTree("Range Tree", extendedRangeBoxes.size()); + stk::search::MortonAabbTree domainTree("Domain Tree", localDomainHost.size()); + stk::search::MortonAabbTree rangeTree("Range Tree", extendedRangeBoxes.size()); stk::search::export_from_box_ident_proc_vec_to_morton_tree(localDomainVec, domainTree); - stk::search::export_from_box_vec_to_morton_tree(extendedRangeBoxes, rangeTree); + stk::search::export_from_box_vec_to_morton_tree(extendedRangeBoxes, rangeTree); domainTree.sync_to_device(); rangeTree.sync_to_device(); Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("Perform Morton query"); - stk::search::CollisionList collisionList("Collision List"); - stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList); + stk::search::CollisionList collisionList("Collision List"); + stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList, execSpace); collisionList.sync_from_device(); Kokkos::Profiling::popRegion(); @@ -196,7 +197,7 @@ inline void coarse_search_morton_lbvh( searchResults = Kokkos::View*, ExecutionSpace>( Kokkos::ViewAllocateWithoutInitializing(searchResults.label()), numCollisions); - auto searchResultsHost = Kokkos::create_mirror_view_and_copy(hostSpace, searchResults); + auto searchResultsHost = Kokkos::create_mirror_view_and_copy(HostSpace{}, searchResults); const unsigned numLocalRange = localRangeHost.size(); unsigned searchResultIndex = 0; @@ -221,7 +222,7 @@ inline void coarse_search_morton_lbvh( insert_into_results(domainIdx, rangeIdx, searchResultIndex); } else { - if (intersects(localDomain(domainIdx).box, extendedRangeBoxes[rangeIdx])) { + if (intersects(localDomainHost(domainIdx).box, extendedRangeBoxes[rangeIdx])) { insert_into_results(domainIdx, rangeIdx, searchResultIndex); } } diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/LocalCoarseSearchMortonLBVH.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/LocalCoarseSearchMortonLBVH.hpp index 23cbf07fae06..7553b9fe9bf2 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/LocalCoarseSearchMortonLBVH.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/LocalCoarseSearchMortonLBVH.hpp @@ -34,13 +34,16 @@ #ifndef LOCALCOARSESEARCHMORTONLBVH_HPP #define LOCALCOARSESEARCHMORTONLBVH_HPP -#include "stk_search/BoxIdent.hpp" #include "stk_search/Box.hpp" +#include "stk_search/BoxIdent.hpp" +#include "stk_search/CommonSearchUtil.hpp" #include "stk_search/morton_lbvh/MortonLBVH_Search.hpp" #include "stk_search/morton_lbvh/MortonLBVH_Tree.hpp" -#include "Kokkos_Core.hpp" -#include + #include +#include + +#include "Kokkos_Core.hpp" namespace stk::search { @@ -69,11 +72,12 @@ void insert_intersections_into_results( const Kokkos::View*, ExecutionSpace> & domain, const Kokkos::View*, ExecutionSpace> & range, const stk::search::CollisionList rawIntersections, - Kokkos::View*, ExecutionSpace> & intersections) + Kokkos::View*, ExecutionSpace> & intersections, + ExecutionSpace const& execSpace) { const int numCollisions = rawIntersections.get_num_collisions(); - Kokkos::parallel_for(Kokkos::RangePolicy(0, numCollisions), + Kokkos::parallel_for(Kokkos::RangePolicy(execSpace, 0, numCollisions), KOKKOS_LAMBDA(int index) { const unsigned domainIdx = rawIntersections.m_data(index, 0); const unsigned rangeIdx = rawIntersections.m_data(index, 1); @@ -126,40 +130,40 @@ void insert_only_confirmed_intersections_into_results( const Kokkos::View*, ExecutionSpace> & domain, const Kokkos::View*, ExecutionSpace> & range, const stk::search::CollisionList rawIntersections, - Kokkos::View*, ExecutionSpace> & intersections) + Kokkos::View*, ExecutionSpace> & intersections, + ExecutionSpace const& execSpace) { - constexpr DomainIdentType INVALID_DOMAIN_IDENT = std::numeric_limits::max(); - constexpr RangeIdentType INVALID_RANGE_IDENT = std::numeric_limits::max(); + static bool constexpr isSphere = impl::is_stk_sphere || impl::is_stk_sphere; + const int numCollisions = rawIntersections.get_num_collisions(); - Kokkos::parallel_for(Kokkos::RangePolicy(0, numCollisions), + Kokkos::View counter("counter"); + Kokkos::parallel_for(Kokkos::RangePolicy(execSpace, 0, numCollisions), KOKKOS_LAMBDA(int index) { const unsigned domainIdx = rawIntersections.m_data(index, 0); const unsigned rangeIdx = rawIntersections.m_data(index, 1); - const auto & domainBoxIdent = domain[domainIdx]; - const auto & rangeBoxIdent = range[rangeIdx]; - if (intersects(domainBoxIdent.box, rangeBoxIdent.box)) { + const auto domainBoxIdent = domain[domainIdx]; + const auto rangeBoxIdent = range[rangeIdx]; + + if (isSphere) + { + if (intersects(domainBoxIdent.box, rangeBoxIdent.box)) + { + int outputIdx = Kokkos::atomic_fetch_add(&(counter()), 1); + intersections[outputIdx] = {domainBoxIdent.ident, rangeBoxIdent.ident}; + } + } else + { intersections[index] = {domainBoxIdent.ident, rangeBoxIdent.ident}; } - else { - intersections[index] = {INVALID_DOMAIN_IDENT, INVALID_RANGE_IDENT}; - } }); - int numActualIntersections = 0; - Kokkos::parallel_reduce(Kokkos::RangePolicy(0, 1), - KOKKOS_LAMBDA(int /*index*/, int & intersectionSum) { - int destIndex = 0; - for (int sourceIndex = 0; sourceIndex < numCollisions; ++sourceIndex) { - if (intersections[sourceIndex].domainIdent != INVALID_DOMAIN_IDENT) { - intersections[destIndex++] = intersections[sourceIndex]; - } - } - intersectionSum = destIndex; - }, - numActualIntersections); - - Kokkos::resize(intersections, numActualIntersections); + if constexpr (isSphere) + { + int numActualIntersections; + Kokkos::deep_copy(numActualIntersections, counter); + Kokkos::resize(intersections, numActualIntersections); + } } template @@ -172,19 +176,19 @@ void local_coarse_search_morton_lbvh( "The domain and range boxes must have the same floating-point precision"); using ValueType = typename DomainBoxType::value_type; - using ExecutionSpace = Kokkos::DefaultHostExecutionSpace; + using HostSpace = Kokkos::DefaultHostExecutionSpace; Kokkos::Profiling::pushRegion("Fill domain and range trees"); const bool supportHostBoxes = false; - stk::search::MortonAabbTree domainTree("Domain Tree", domain.size(), supportHostBoxes); - stk::search::MortonAabbTree rangeTree("Range Tree", range.size(), supportHostBoxes); + stk::search::MortonAabbTree domainTree("Domain Tree", domain.size(), supportHostBoxes); + stk::search::MortonAabbTree rangeTree("Range Tree", range.size(), supportHostBoxes); stk::search::export_from_box_ident_vector_to_morton_tree(domain, domainTree); stk::search::export_from_box_ident_vector_to_morton_tree(range, rangeTree); Kokkos::Profiling::popRegion(); - stk::search::CollisionList collisionList("Collision List"); - stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList); + stk::search::CollisionList collisionList("Collision List"); + stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList, HostSpace{}); Kokkos::Profiling::pushRegion("Aggregate search results"); const int numCollisions = collisionList.get_num_collisions(); @@ -207,7 +211,8 @@ template *, ExecutionSpace> & domain, const Kokkos::View*, ExecutionSpace> & range, - Kokkos::View*, ExecutionSpace> & searchResults) + Kokkos::View*, ExecutionSpace> & searchResults, + ExecutionSpace const& execSpace = ExecutionSpace{}) { STK_ThrowRequireMsg((std::is_same_v), "The domain and range boxes must have the same floating-point precision"); @@ -227,8 +232,7 @@ void local_coarse_search_morton_lbvh( Kokkos::Profiling::pushRegion("STK Morton Search"); stk::search::CollisionList collisionList("Collision List"); - stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList); - Kokkos::Profiling::popRegion(); + stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList, execSpace); Kokkos::Profiling::pushRegion("STK Aggregate search results"); const int numCollisions = collisionList.get_num_collisions(); @@ -238,10 +242,10 @@ void local_coarse_search_morton_lbvh( if constexpr ((std::is_same_v> || std::is_same_v>) && (std::is_same_v> || std::is_same_v>)) { - insert_intersections_into_results(domain, range, collisionList, searchResults); + insert_intersections_into_results(domain, range, collisionList, searchResults, execSpace); } else { - insert_only_confirmed_intersections_into_results(domain, range, collisionList, searchResults); + insert_only_confirmed_intersections_into_results(domain, range, collisionList, searchResults, execSpace); } Kokkos::Profiling::popRegion(); diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_CollisionList.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_CollisionList.hpp index 94e951731165..904cd1aba356 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_CollisionList.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_CollisionList.hpp @@ -47,7 +47,6 @@ template struct CollisionList { using LVBH_types = MortonLbvhTypes; - using execution_space = typename LVBH_types::execution_space; using memory_space = typename LVBH_types::memory_space; using data_val_type = typename LVBH_types::local_ordinal_pairs_t; using host_data_val_type = typename data_val_type::HostMirror; diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_CommonTypes.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_CommonTypes.hpp index 320ff3909b0a..200333dec506 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_CommonTypes.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_CommonTypes.hpp @@ -54,14 +54,6 @@ struct MortonLbvhMemorySpace using memory_space = typename ExecutionSpace::memory_space; }; -#ifdef KOKKOS_ENABLE_CUDA -template <> -struct MortonLbvhMemorySpace -{ - using memory_space = Kokkos::CudaSpace; -}; -#endif - template inline ViewT no_init(const std::string &name) { @@ -83,8 +75,7 @@ inline ViewT with_init(const std::string &name, unsigned len) template struct MortonLbvhTypes { - using execution_space = ExecutionSpace; - using memory_space = typename MortonLbvhMemorySpace::memory_space; + using memory_space = typename ExecutionSpace::memory_space; // View of a single LocalOrdinal. using local_ordinal_scl_t = Kokkos::View; @@ -101,29 +92,27 @@ struct MortonLbvhTypes using local_ordinal_pairs_hmt = typename local_ordinal_pairs_t::HostMirror; using local_ordinal_pairs_tmt = Kokkos::View; - using aabb_morton_codes_t = Kokkos::View; + using aabb_morton_codes_t = Kokkos::View; using aabb_morton_codes_hmt = typename aabb_morton_codes_t::HostMirror; - using aabb_morton_codes_tmt = Kokkos::View; + using aabb_morton_codes_tmt = Kokkos::View; }; template struct MortonAabbTypes { - using execution_space = typename MortonLbvhTypes::execution_space; using memory_space = typename MortonLbvhTypes::memory_space; - using real_type = RealType; // Points - using aabb_points_t = Kokkos::View; + using aabb_points_t = Kokkos::View; using aabb_points_hmt = typename aabb_points_t::HostMirror; - using aabb_const_points_t = Kokkos::View; - using aabb_const_points_tmt = Kokkos::View; + using aabb_const_points_t = Kokkos::View; + using aabb_const_points_tmt = Kokkos::View; // We'll use these when convert from using (min_pt, max_pt) pairs. - using bboxes_3d_view_t = Kokkos::View; + using bboxes_3d_view_t = Kokkos::View; using bboxes_3d_view_hmt = typename bboxes_3d_view_t::HostMirror; - using bboxes_const_3d_view_t = Kokkos::View; - using bboxes_3d_view_amt = Kokkos::View; + using bboxes_3d_view_amt = Kokkos::View>; }; diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_ParallelConsistencyUtils.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_ParallelConsistencyUtils.hpp index 88a944bd1d6e..08a8cca0ce33 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_ParallelConsistencyUtils.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_ParallelConsistencyUtils.hpp @@ -30,12 +30,13 @@ gather_all_processor_superset_domain_boxes(const std::vector +template std::pair, std::vector> morton_extend_local_range_with_remote_boxes_that_might_intersect( const std::vector> & localDomain, const std::vector> & localRange, - MPI_Comm comm) + MPI_Comm comm, + ExecutionSpace const& execSpace) { using DomainValueType = typename DomainBoxType::value_type; @@ -44,9 +45,9 @@ morton_extend_local_range_with_remote_boxes_that_might_intersect( const auto globalSupersetBoxes = gather_all_processor_superset_domain_boxes(localDomain, comm); - stk::search::MortonAabbTree domainTree("Proc Domain Tree", + stk::search::MortonAabbTree domainTree("Proc Domain Tree", localRange.size()); - stk::search::MortonAabbTree rangeTree("Proc Range Tree", + stk::search::MortonAabbTree rangeTree("Proc Range Tree", globalSupersetBoxes.size()); export_from_box_ident_proc_vec_to_morton_tree(localRange, domainTree); @@ -54,8 +55,8 @@ morton_extend_local_range_with_remote_boxes_that_might_intersect( domainTree.sync_to_device(); rangeTree.sync_to_device(); - stk::search::CollisionList collisionList("Proc Collision List"); - stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList); + stk::search::CollisionList collisionList("Proc Collision List"); + stk::search::morton_lbvh_search(domainTree, rangeTree, collisionList, execSpace); collisionList.sync_from_device(); using GlobalIdType = typename RangeIdentProcType::ident_type; diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_Search.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_Search.hpp index ae785ecaf952..2e6e89c79626 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_Search.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_Search.hpp @@ -160,13 +160,14 @@ inline void export_from_box_vec_to_morton_tree(const std::vector &boxVe template inline void morton_lbvh_search(MortonAabbTree &tree1, MortonAabbTree &tree2, - CollisionList &searchResults) + CollisionList &searchResults, + ExecutionSpace const& execSpace = ExecutionSpace{}) { Kokkos::Profiling::pushRegion("Initialization"); Kokkos::Profiling::pushRegion("Get global bounds"); // Get total bounds - TotalBoundsFunctor::apply(tree1); - TotalBoundsFunctor::apply(tree2); + TotalBoundsFunctor::apply(tree1, execSpace); + TotalBoundsFunctor::apply(tree2, execSpace); Kokkos::fence(); Kokkos::Profiling::popRegion(); @@ -177,8 +178,8 @@ inline void morton_lbvh_search(MortonAabbTree &tree1, // Morton encode the centroids of the leaves Kokkos::Profiling::pushRegion("Morton encoding of leaves"); - MortonEncoder::apply(tree1, sortTree1); - MortonEncoder::apply(tree2, sortTree2); + MortonEncoder::apply(tree1, execSpace, sortTree1); + MortonEncoder::apply(tree2, execSpace, sortTree2); Kokkos::fence(); Kokkos::Profiling::popRegion(); @@ -186,11 +187,11 @@ inline void morton_lbvh_search(MortonAabbTree &tree1, Kokkos::Profiling::pushRegion("Sort the trees"); if (sortTree1) { // printf("Sorting tree with %d leaves\n", tree1.hm_numLeaves()); - SortByCode::apply(tree1); + SortByCode::apply(tree1, execSpace); } if (sortTree2) { // printf("Sorting tree with %d leaves\n", tree1.hm_numLeaves()); - SortByCode::apply(tree2); + SortByCode::apply(tree2, execSpace); } Kokkos::fence(); Kokkos::Profiling::popRegion(); @@ -200,10 +201,10 @@ inline void morton_lbvh_search(MortonAabbTree &tree1, bool buildTree1 = (sortTree1 && flipOrder); bool buildTree2 = (sortTree2 && !flipOrder); if (buildTree1) { - BuildRadixTree::apply(tree1); + BuildRadixTree::apply(tree1, execSpace); } if (buildTree2) { - BuildRadixTree::apply(tree2); + BuildRadixTree::apply(tree2, execSpace); } Kokkos::fence(); Kokkos::Profiling::popRegion(); @@ -211,10 +212,10 @@ inline void morton_lbvh_search(MortonAabbTree &tree1, // Augment the trees to be bounding volume (box) hierarchies Kokkos::Profiling::pushRegion("Augment the trees to be bounding volume hierarchies"); if (buildTree1) { - UpdateInteriorNodeBVs::apply(tree1); + UpdateInteriorNodeBVs::apply(tree1, execSpace); } if (buildTree2) { - UpdateInteriorNodeBVs::apply(tree2); + UpdateInteriorNodeBVs::apply(tree2, execSpace); } Kokkos::fence(); Kokkos::Profiling::popRegion(); @@ -223,10 +224,10 @@ inline void morton_lbvh_search(MortonAabbTree &tree1, // Test the boxes from the non-tree against the tree that was built. Kokkos::Profiling::pushRegion("Search query"); if (flipOrder) { - Traverse_MASTB_BVH_Functor::apply_tree(tree2, tree1, searchResults, true); + Traverse_MASTB_BVH_Functor::apply_tree(tree2, tree1, searchResults, execSpace, true); } else { - Traverse_MASTB_BVH_Functor::apply_tree(tree1, tree2, searchResults); + Traverse_MASTB_BVH_Functor::apply_tree(tree1, tree2, searchResults, execSpace); } Kokkos::fence(); Kokkos::Profiling::popRegion(); @@ -235,7 +236,8 @@ inline void morton_lbvh_search(MortonAabbTree &tree1, template inline void morton_lbvh_search(const std::vector &boxA, const std::vector &boxB, - CollisionList &searchResults) + CollisionList &searchResults, + ExecutionSpace const& execSpace = ExecutionSpace{}) { Kokkos::Profiling::pushRegion("morton_lbvh_search: export boxes to trees"); MortonAabbTree mlbvhA("a"), mlbvhB("b"); @@ -246,7 +248,7 @@ inline void morton_lbvh_search(const std::vector &boxA, Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("morton_lbvh_search: execute search"); - morton_lbvh_search(mlbvhA, mlbvhB, searchResults); + morton_lbvh_search(mlbvhA, mlbvhB, searchResults, execSpace); Kokkos::Profiling::popRegion(); } diff --git a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp index b32c3d0883b6..2aca828b2aa0 100644 --- a/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp +++ b/packages/stk/stk_search/stk_search/morton_lbvh/MortonLBVH_TreeManipulationUtils.hpp @@ -81,6 +81,7 @@ #include #include #include +#include "Kokkos_Sort.hpp" #include #include #include @@ -88,16 +89,6 @@ #include #include -#ifdef KOKKOS_ENABLE_CUDA -#include -#include - -#ifdef LENGTH -// The Thrust implementation uses this as the name of a template argument in numerous places! -#error "WHO DEFINED LENGTH IN A MACRO?" -#endif -#endif - // // Cuda and gcc disagree about whether the argument to clz*(.) is signed or not! // @@ -117,23 +108,21 @@ namespace stk::search { constexpr size_t COLLISION_SCALE_FACTOR = 16; -template +template struct TotalBoundsFunctor { - using execution_space = ExecutionSpace; - using size_type = typename execution_space::size_type; + using size_type = typename ExecutionSpace::size_type; - using real_type = RealType; - using value_type = MortonAABox; - using kokkos_aabb_types = MortonAabbTypes; + using value_type = MortonAABox; + using kokkos_aabb_types = MortonAabbTypes; using bboxes_const_3d_view_t = typename kokkos_aabb_types::bboxes_const_3d_view_t; - TotalBoundsFunctor(const MortonAabbTree &tree); + TotalBoundsFunctor(const MortonAabbTree &tree); KOKKOS_INLINE_FUNCTION void init(value_type &update) const; - static void apply(MortonAabbTree &tree); + static void apply(MortonAabbTree &tree, ExecutionSpace const& execSpace); KOKKOS_INLINE_FUNCTION void operator()(size_type idx, value_type &update) const; @@ -144,12 +133,12 @@ struct TotalBoundsFunctor bboxes_const_3d_view_t m_minMaxs; }; -template -TotalBoundsFunctor::TotalBoundsFunctor(const MortonAabbTree &tree) +template +TotalBoundsFunctor::TotalBoundsFunctor(const MortonAabbTree &tree) : m_minMaxs(tree.m_minMaxs) {} -template +template KOKKOS_INLINE_FUNCTION void TotalBoundsFunctor::init(value_type &update) const { @@ -162,8 +151,8 @@ void TotalBoundsFunctor::init(value_type &update) cons update.m_max[2] = -FLT_MAX; } -template -void TotalBoundsFunctor::apply(MortonAabbTree &tree) +template +void TotalBoundsFunctor::apply(MortonAabbTree &tree, ExecutionSpace const& execSpace) { value_type retBox; retBox.m_min[0] = FLT_MAX; @@ -177,7 +166,8 @@ void TotalBoundsFunctor::apply(MortonAabbTree 0) { const TotalBoundsFunctor tbf(tree); const size_t numLeaves = tree.hm_numLeaves(); - Kokkos::parallel_reduce(numLeaves, tbf, retBox); + auto policy = Kokkos::RangePolicy(execSpace, 0, numLeaves); + Kokkos::parallel_reduce(policy, tbf, retBox); } tree.m_globalMinPt[0] = retBox.m_min[0]; @@ -189,7 +179,7 @@ void TotalBoundsFunctor::apply(MortonAabbTree +template KOKKOS_INLINE_FUNCTION void TotalBoundsFunctor::operator()(size_type idx, value_type &update) const { @@ -202,7 +192,7 @@ void TotalBoundsFunctor::operator()(size_type idx, val update.m_max[2] = fmax(m_minMaxs(idx, 5), update.m_max[2]); } -template +template KOKKOS_INLINE_FUNCTION void TotalBoundsFunctor::join(value_type &update, const value_type &input) const { @@ -216,21 +206,19 @@ void TotalBoundsFunctor::join(value_type &update, cons } -template +template struct MortonEncoder { - using execution_space = ExecutionSpace; using value_type = int; - using real_type = RealType; - using LBVH_types = MortonLbvhTypes; - using kokkos_aabb_types = MortonAabbTypes; + using LBVH_types = MortonLbvhTypes; + using kokkos_aabb_types = MortonAabbTypes; using bboxes_const_3d_view_t = typename kokkos_aabb_types::bboxes_const_3d_view_t; using bboxes_3d_view_amt = typename kokkos_aabb_types::bboxes_3d_view_amt; - MortonEncoder(const MortonAabbTree &tree, bool reallyEncode); + MortonEncoder(const MortonAabbTree &tree, bool reallyEncode); - static void apply(const MortonAabbTree &tree, bool reallyEncode = true); + static void apply(const MortonAabbTree &tree, ExecutionSpace const& execSpace, bool reallyEncode = true); KOKKOS_INLINE_FUNCTION void operator()(unsigned leafIdx) const; @@ -239,17 +227,17 @@ struct MortonEncoder typename LBVH_types::local_ordinals_t m_idsOut; typename LBVH_types::aabb_morton_codes_t m_codesOut; const LocalOrdinal m_numPts; - const real_type m_xWidth; - const real_type m_yWidth; - const real_type m_zWidth; - const real_type m_globalXMin; - const real_type m_globalYMin; - const real_type m_globalZMin; + const RealType m_xWidth; + const RealType m_yWidth; + const RealType m_zWidth; + const RealType m_globalXMin; + const RealType m_globalYMin; + const RealType m_globalZMin; const bool m_reallyDo; }; -template -MortonEncoder::MortonEncoder(const MortonAabbTree &tree, +template +MortonEncoder::MortonEncoder(const MortonAabbTree &tree, bool reallyEncode) : m_minMaxs(tree.m_minMaxs), m_idsOut(tree.m_leafIds), @@ -265,24 +253,25 @@ MortonEncoder::MortonEncoder(const MortonAabbTree -void MortonEncoder::apply(const MortonAabbTree &tree, - bool reallyEncode) +template +void MortonEncoder::apply(const MortonAabbTree &tree, + ExecutionSpace const& execSpace, bool reallyEncode) { const MortonEncoder op(tree, reallyEncode); const size_t numLeaves = tree.hm_numLeaves(); - Kokkos::parallel_for(numLeaves, op); + auto policy = Kokkos::RangePolicy(execSpace, 0, numLeaves); + Kokkos::parallel_for(policy, op); } #ifdef SMALL_MORTON // 32 bit Morton code -template +template KOKKOS_INLINE_FUNCTION void MortonEncoder::operator()(unsigned leafIdx) const { - real_type ctdX = 0.5 * (m_minMaxs(leafIdx, 0) + m_minMaxs(leafIdx, 3)); - real_type ctdY = 0.5 * (m_minMaxs(leafIdx, 1) + m_minMaxs(leafIdx, 4)); - real_type ctdZ = 0.5 * (m_minMaxs(leafIdx, 2) + m_minMaxs(leafIdx, 5)); + RealType ctdX = 0.5 * (m_minMaxs(leafIdx, 0) + m_minMaxs(leafIdx, 3)); + RealType ctdY = 0.5 * (m_minMaxs(leafIdx, 1) + m_minMaxs(leafIdx, 4)); + RealType ctdZ = 0.5 * (m_minMaxs(leafIdx, 2) + m_minMaxs(leafIdx, 5)); // std::cout << "box(" << leafIdx << ") = (" << m_minMax(leafIdx, 0) << " " // << m_minMax(leafIdx, 1) << " " << m_minMax(leafIdx, 2) @@ -324,16 +313,16 @@ void MortonEncoder::operator()(unsigned leafIdx) const #else // 64 bit Morton codes -template +template KOKKOS_INLINE_FUNCTION void MortonEncoder::operator()(unsigned leafIdx) const { m_idsOut(leafIdx) = leafIdx; if (m_reallyDo) { - real_type ctdX = 0.5 * (m_minMaxs(leafIdx, 0) + m_minMaxs(leafIdx, 3)); - real_type ctdY = 0.5 * (m_minMaxs(leafIdx, 1) + m_minMaxs(leafIdx, 4)); - real_type ctdZ = 0.5 * (m_minMaxs(leafIdx, 2) + m_minMaxs(leafIdx, 5)); + RealType ctdX = 0.5 * (m_minMaxs(leafIdx, 0) + m_minMaxs(leafIdx, 3)); + RealType ctdY = 0.5 * (m_minMaxs(leafIdx, 1) + m_minMaxs(leafIdx, 4)); + RealType ctdZ = 0.5 * (m_minMaxs(leafIdx, 2) + m_minMaxs(leafIdx, 5)); // std::cout << "box(" << leafIdx << ") = (" << m_minMaxs(leafIdx, 0) << " " // << m_minMaxs(leafIdx, 1) << " " << m_minMaxs(leafIdx, 2) @@ -383,13 +372,11 @@ void MortonEncoder::operator()(unsigned leafIdx) const template struct SortByCodeIdPair { - using execution_space = ExecutionSpace; - using LBVH_types = MortonLbvhTypes; - using real_type = RealType; + using LBVH_types = MortonLbvhTypes; - SortByCodeIdPair(const MortonAabbTree &tree); + SortByCodeIdPair(const MortonAabbTree &tree); - static void apply(const MortonAabbTree &tree, bool reallyEncode = true); + static void apply(const MortonAabbTree &tree, bool reallyEncode = true); std::vector m_buffer; typename LBVH_types::local_ordinals_hmt hm_leafIds; @@ -397,7 +384,7 @@ struct SortByCodeIdPair }; template -SortByCodeIdPair::SortByCodeIdPair(const MortonAabbTree &tree) +SortByCodeIdPair::SortByCodeIdPair(const MortonAabbTree &tree) { hm_leafIds = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, tree.m_leafIds); hm_leafCodes = Kokkos::create_mirror_view(Kokkos::WithoutInitializing, tree.m_leafCodes); @@ -414,7 +401,7 @@ SortByCodeIdPair::SortByCodeIdPair(const MortonAabbTre } template -void SortByCodeIdPair::apply(const MortonAabbTree &tree, +void SortByCodeIdPair::apply(const MortonAabbTree &tree, bool reallyEncode) { SortByCodeIdPair tmp(tree); @@ -431,56 +418,28 @@ void SortByCodeIdPair::apply(const MortonAabbTree +template struct SortByCode { - using real_type = RealType; - using execution_space = ExecutionSpace; - - static void apply(const MortonAabbTree &tree) - { - SortByCodeIdPair::apply(tree); - } -}; - -#ifdef KOKKOS_ENABLE_CUDA - -#define SBS_THRUST_SORT_THRESHOLD 2048 - -template -struct SortByCode { - using real_type = RealType; - using execution_space = Kokkos::Cuda; - - static void apply(const MortonAabbTree &tree) + static void apply(const MortonAabbTree &tree, ExecutionSpace const& execSpace) { - if (tree.hm_numLeaves() <= SBS_THRUST_SORT_THRESHOLD) { - SortByCodeIdPair::apply(tree); + if constexpr (std::is_same_v) { + SortByCodeIdPair::apply(tree); } else { - int n = tree.m_leafIds.extent(0); - - morton_code_t *rawLeafCodes = tree.m_leafCodes.data(); - thrust::device_ptr rawLeafCodesThr = thrust::device_pointer_cast(rawLeafCodes); - LocalOrdinal *rawLeafIds = tree.m_leafIds.data(); - thrust::device_ptr rawLeafIdsThr = thrust::device_pointer_cast(rawLeafIds); - thrust::stable_sort_by_key(rawLeafCodesThr, rawLeafCodesThr + n, rawLeafIdsThr); + Kokkos::Experimental::sort_by_key(execSpace, tree.m_leafCodes, tree.m_leafIds); } } }; -#endif - - -template +template struct BuildRadixTree { - using execution_space = ExecutionSpace; - using LBVH_types = MortonLbvhTypes; + using LBVH_types = MortonLbvhTypes; - BuildRadixTree(const MortonAabbTree &tree); + BuildRadixTree(const MortonAabbTree &tree); - static void apply(const MortonAabbTree &tree); + static void apply(const MortonAabbTree &tree, ExecutionSpace const& execSpace); KOKKOS_INLINE_FUNCTION void operator()(unsigned argIdx) const; @@ -497,8 +456,8 @@ struct BuildRadixTree typename LBVH_types::local_ordinals_t m_atomicFlags; }; -template -BuildRadixTree::BuildRadixTree(const MortonAabbTree &tree) +template +BuildRadixTree::BuildRadixTree(const MortonAabbTree &tree) : m_numLeaves(tree.hm_numLeaves()), m_numInternalNodes(tree.hm_numInternalNodes()), tm_leafCodes(tree.m_leafCodes), @@ -508,17 +467,18 @@ BuildRadixTree::BuildRadixTree(const MortonAabbTree -void BuildRadixTree::apply(const MortonAabbTree &tree) +template +void BuildRadixTree::apply(const MortonAabbTree &tree, ExecutionSpace const& execSpace) { if (tree.hm_numLeaves() <= 0) { return; } BuildRadixTree op(tree); - Kokkos::parallel_for(static_cast(tree.hm_numInternalNodes()), op); + auto policy = Kokkos::RangePolicy(execSpace, 0, static_cast(tree.hm_numInternalNodes())); + Kokkos::parallel_for(policy, op); } -template +template KOKKOS_INLINE_FUNCTION void BuildRadixTree::operator()(unsigned argIdx) const { @@ -593,7 +553,7 @@ void BuildRadixTree::operator()(unsigned argIdx) const #ifdef SMALL_MORTON // 32 bit Morton -template +template KOKKOS_INLINE_FUNCTION int leaves_cpr(LocalOrdinal baseIdx, LocalOrdinal testIdx) const { @@ -612,7 +572,7 @@ int leaves_cpr(LocalOrdinal baseIdx, LocalOrdinal testIdx) const #else // 64 bit Morton -template +template KOKKOS_INLINE_FUNCTION int BuildRadixTree::leaves_cpr(LocalOrdinal baseIdx, LocalOrdinal testIdx) const { @@ -631,25 +591,23 @@ int BuildRadixTree::leaves_cpr(LocalOrdinal baseIdx, L #endif // 64 bit Morton -template +template struct UpdateInteriorNodeBVs { - using real_type = RealType; - using execution_space = ExecutionSpace; - using LBVH_types = MortonLbvhTypes; - using kokkos_aabb_types = MortonAabbTypes; + using LBVH_types = MortonLbvhTypes; + using kokkos_aabb_types = MortonAabbTypes; using bboxes_const_3d_view_t = typename kokkos_aabb_types::bboxes_const_3d_view_t; - UpdateInteriorNodeBVs(const MortonAabbTree &tree); + UpdateInteriorNodeBVs(const MortonAabbTree &tree); - static void apply(const MortonAabbTree &tree); + static void apply(const MortonAabbTree &tree, ExecutionSpace const& execSpace); KOKKOS_INLINE_FUNCTION void operator()(unsigned argIdx) const; template KOKKOS_FORCEINLINE_FUNCTION - void get_box(real_type bvMinMax[6], LocalOrdinal idx, const BBox3dViewType &boxesMinMax) const; + void get_box(RealType bvMinMax[6], LocalOrdinal idx, const BBox3dViewType &boxesMinMax) const; const LocalOrdinal m_numLeaves; const LocalOrdinal m_numInternalNodes; @@ -662,8 +620,8 @@ struct UpdateInteriorNodeBVs typename LBVH_types::local_ordinals_t m_atomicFlags; }; -template -UpdateInteriorNodeBVs::UpdateInteriorNodeBVs(const MortonAabbTree &tree) +template +UpdateInteriorNodeBVs::UpdateInteriorNodeBVs(const MortonAabbTree &tree) : m_numLeaves(tree.hm_numLeaves()), m_numInternalNodes(tree.hm_numInternalNodes()), tm_nodeChildren(tree.m_nodeChildren), @@ -673,27 +631,28 @@ UpdateInteriorNodeBVs::UpdateInteriorNodeBVs(const Mor m_atomicFlags(tree.m_atomicFlags) {} -template -void UpdateInteriorNodeBVs::apply(const MortonAabbTree &tree) +template +void UpdateInteriorNodeBVs::apply(const MortonAabbTree &tree, ExecutionSpace const& execSpace) { const UpdateInteriorNodeBVs op(tree); const size_t numLeaves = tree.hm_numLeaves(); - Kokkos::parallel_for(numLeaves, op); + auto policy = Kokkos::RangePolicy(execSpace, 0, numLeaves); + Kokkos::parallel_for(policy, op); } -template +template KOKKOS_INLINE_FUNCTION void UpdateInteriorNodeBVs::operator()(unsigned argIdx) const { if (m_numLeaves > 1) { LocalOrdinal idx = static_cast(argIdx); - real_type bvMinMax[6]; + RealType bvMinMax[6]; get_box(bvMinMax, idx, m_leafMinMaxs); LocalOrdinal parent = tm_nodeParents(idx); - real_type sibMinMax[6]; + RealType sibMinMax[6]; while (Kokkos::atomic_fetch_add(&m_atomicFlags(parent - m_numLeaves), 1) == 1) { LocalOrdinal sib = tm_nodeChildren(parent, 0); @@ -721,10 +680,10 @@ void UpdateInteriorNodeBVs::operator()(unsigned argIdx } } -template +template template KOKKOS_FORCEINLINE_FUNCTION -void UpdateInteriorNodeBVs::get_box(real_type bvMinMax[6], LocalOrdinal idx, +void UpdateInteriorNodeBVs::get_box(RealType bvMinMax[6], LocalOrdinal idx, const BBox3dViewType &boxMinMaxs) const { for (LocalOrdinal j = 0; j < 6; ++j) { @@ -733,44 +692,43 @@ void UpdateInteriorNodeBVs::get_box(real_type bvMinMax } -template +template struct Traverse_MASTB_BVH_Functor { - using execution_space = ExecutionSpace; using value_type = int; - using real_type = RealType; - using LBVH_types = MortonLbvhTypes; - using kokkos_aabb_types = MortonAabbTypes; + using LBVH_types = MortonLbvhTypes; + using kokkos_aabb_types = MortonAabbTypes; using local_ordinals_tmt = typename LBVH_types::local_ordinals_tmt; using bboxes_3d_view_t = typename kokkos_aabb_types::bboxes_3d_view_t; using bboxes_const_3d_view_t = typename kokkos_aabb_types::bboxes_const_3d_view_t; - using collision_list_type = CollisionList; + using collision_list_type = CollisionList; Traverse_MASTB_BVH_Functor(bboxes_3d_view_t domainMinMaxs, local_ordinals_tmt domainIds, - const MortonAabbTree &rangeTree, + const MortonAabbTree &rangeTree, collision_list_type &collisions, bool flippedResults = false); KOKKOS_INLINE_FUNCTION void init(value_type &update) const { update = 0; } - static void apply_tree(const MortonAabbTree &domainTree, - const MortonAabbTree &rangeTree, + static void apply_tree(const MortonAabbTree &domainTree, + const MortonAabbTree &rangeTree, collision_list_type &collisions, + ExecutionSpace const& execSpace, bool flipOutputPairs = false); KOKKOS_INLINE_FUNCTION void operator()(unsigned domainIdx, value_type &update) const; KOKKOS_FORCEINLINE_FUNCTION - bool overlaps_range(real_type bvMinMax[6], LocalOrdinal rangeIdx) const; + bool overlaps_range(RealType bvMinMax[6], LocalOrdinal rangeIdx) const; KOKKOS_FORCEINLINE_FUNCTION bool is_range_leaf(LocalOrdinal rangeIdx) const{ return (rangeIdx < m_rangeRoot); } KOKKOS_FORCEINLINE_FUNCTION - void get_box(real_type bvMinMax[6], LocalOrdinal idx, const bboxes_const_3d_view_t &boxMinMaxs) const; + void get_box(RealType bvMinMax[6], LocalOrdinal idx, const bboxes_const_3d_view_t &boxMinMaxs) const; KOKKOS_INLINE_FUNCTION void join(value_type &update, const value_type &input) const { update = (input < update ? input : update); } @@ -789,11 +747,11 @@ struct Traverse_MASTB_BVH_Functor collision_list_type m_results; }; -template +template Traverse_MASTB_BVH_Functor::Traverse_MASTB_BVH_Functor( bboxes_3d_view_t domainMinMaxs, local_ordinals_tmt domainIds, - const MortonAabbTree &rangeTree, + const MortonAabbTree &rangeTree, collision_list_type &collisions, bool flippedResults) : m_domainMinMaxs(domainMinMaxs), @@ -806,11 +764,12 @@ Traverse_MASTB_BVH_Functor::Traverse_MASTB_BVH_Functor m_results(collisions) {} -template +template void Traverse_MASTB_BVH_Functor::apply_tree( - const MortonAabbTree &domainTree, - const MortonAabbTree &rangeTree, + const MortonAabbTree &domainTree, + const MortonAabbTree &rangeTree, collision_list_type &collisions, + ExecutionSpace const& execSpace, bool flipOutputPairs) { if ((domainTree.hm_numLeaves() == 0) || (rangeTree.hm_numLeaves() == 0)) { @@ -828,7 +787,8 @@ void Traverse_MASTB_BVH_Functor::apply_tree( const Traverse_MASTB_BVH_Functor op(domainTree.m_minMaxs, domainTree.m_leafIds, rangeTree, collisions, flipOutputPairs); - Kokkos::parallel_reduce(numDomainLeaves, op, retCode); + auto policy = Kokkos::RangePolicy(execSpace, 0, numDomainLeaves); + Kokkos::parallel_reduce(policy, op, retCode); int numActualCollisions = collisions.get_num_collisions(); @@ -837,17 +797,17 @@ void Traverse_MASTB_BVH_Functor::apply_tree( retCode = 0; const Traverse_MASTB_BVH_Functor op2(domainTree.m_minMaxs, domainTree.m_leafIds, rangeTree, collisions, flipOutputPairs); - Kokkos::parallel_reduce(numDomainLeaves, op2, retCode); + Kokkos::parallel_reduce(policy, op2, retCode); } } -template +template KOKKOS_INLINE_FUNCTION void Traverse_MASTB_BVH_Functor::operator()(unsigned argDomainIdx, value_type& update) const { LocalOrdinal domainIdx = tm_domainIds(argDomainIdx); - real_type bvMinMax[6]; + RealType bvMinMax[6]; get_box(bvMinMax, domainIdx, m_domainMinMaxs); if (m_rangeRoot > 1) { @@ -918,9 +878,9 @@ KOKKOS_INLINE_FUNCTION void Traverse_MASTB_BVH_Functor } } -template +template KOKKOS_FORCEINLINE_FUNCTION -bool Traverse_MASTB_BVH_Functor::overlaps_range(real_type bvMinMax[6], +bool Traverse_MASTB_BVH_Functor::overlaps_range(RealType bvMinMax[6], LocalOrdinal rangeIdx) const { return (bvMinMax[3] < tm_rangeMinMaxs(rangeIdx, 0) || @@ -931,9 +891,9 @@ bool Traverse_MASTB_BVH_Functor::overlaps_range(real_t bvMinMax[2] > tm_rangeMinMaxs(rangeIdx, 5)) ? false : true; } -template +template KOKKOS_FORCEINLINE_FUNCTION -void Traverse_MASTB_BVH_Functor::get_box(real_type bvMinMax[6], LocalOrdinal idx, +void Traverse_MASTB_BVH_Functor::get_box(RealType bvMinMax[6], LocalOrdinal idx, const bboxes_const_3d_view_t &boxMinMaxs) const { bvMinMax[0] = boxMinMaxs(idx, 0); @@ -944,7 +904,7 @@ void Traverse_MASTB_BVH_Functor::get_box(real_type bvM bvMinMax[5] = boxMinMaxs(idx, 5); } -template +template std::ostream &Traverse_MASTB_BVH_Functor::stream_pair(LocalOrdinal domainIdx, bool overlap, LocalOrdinal rangeIdx, std::ostream &os) const { diff --git a/packages/stk/stk_search_util/Jamfile b/packages/stk/stk_search_util/Jamfile index a0fda08b013b..63e5ab0474cf 100644 --- a/packages/stk/stk_search_util/Jamfile +++ b/packages/stk/stk_search_util/Jamfile @@ -67,8 +67,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_simd/Jamfile b/packages/stk/stk_simd/Jamfile index 7ff831f8cdfe..b2e19b1c3138 100644 --- a/packages/stk/stk_simd/Jamfile +++ b/packages/stk/stk_simd/Jamfile @@ -73,8 +73,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_tools/Jamfile b/packages/stk/stk_tools/Jamfile index d795a513187e..dd3ac218ccde 100644 --- a/packages/stk/stk_tools/Jamfile +++ b/packages/stk/stk_tools/Jamfile @@ -85,8 +85,8 @@ local installed-developer-files = local xml-files = ; -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.cpp b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.cpp index 6ea261e82a70..73fd8e531503 100644 --- a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.cpp +++ b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocks.cpp @@ -35,7 +35,7 @@ #include "stk_tools/mesh_tools/DisconnectBlocks.hpp" #include "stk_mesh/base/BulkData.hpp" #include "stk_mesh/base/GetEntities.hpp" -#include "stk_tools/mesh_tools/CustomAura.hpp" +#include "stk_mesh/base/MeshUtils.hpp" #include "stk_tools/mesh_tools/DetectHingesImpl.hpp" #include "stk_tools/mesh_tools/DisconnectBlocksImpl.hpp" #include "stk_util/environment/WallTime.hpp" diff --git a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.cpp b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.cpp index 83ce7b50829e..aa20d13a9f50 100644 --- a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.cpp +++ b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectBlocksImpl.cpp @@ -1,7 +1,14 @@ #include "stk_io/IossBridge.hpp" #include "stk_mesh/base/BulkData.hpp" +#include "stk_mesh/base/FEMHelpers.hpp" #include "stk_mesh/base/GetEntities.hpp" #include "stk_mesh/base/Types.hpp" +#include "stk_mesh/base/Selector.hpp" +#include "stk_mesh/base/SideSetEntry.hpp" +#include "stk_mesh/base/SideSetUtil.hpp" +#include "stk_mesh/base/SkinMeshUtil.hpp" +#include "stk_mesh/base/ExodusTranslator.hpp" +#include "stk_mesh/baseImpl/MeshImplUtils.hpp" #include "stk_tools/mesh_tools/DisconnectBlocks.hpp" #include "stk_util/parallel/CommSparse.hpp" #include "stk_util/util/SortAndUnique.hpp" @@ -85,6 +92,26 @@ ReconnectGroup get_group_for_reconnect_node(const DisconnectGroup& disconnectGro return reconnectGroup; } +bool is_in_disconnected_block(const stk::mesh::BulkData& bulk, const stk::mesh::Entity elem, const BlockPair& blockPair) +{ + return bulk.bucket(elem).member(*blockPair.second); +} + +bool is_in_disconnected_block(const stk::mesh::Bucket & elemBucket, const BlockPair& blockPair) +{ + return elemBucket.member(*blockPair.second); +} + +bool is_in_preserved_block(const stk::mesh::BulkData& bulk, const stk::mesh::Entity elem, const BlockPair& blockPair) +{ + return bulk.bucket(elem).member(*blockPair.first); +} + +bool is_in_preserved_block(const stk::mesh::Bucket & elemBucket, const BlockPair& blockPair) +{ + return elemBucket.member(*blockPair.first); +} + void insert_parts_uniquely(const stk::mesh::PartVector& fromParts, stk::mesh::PartVector& toParts) { stk::mesh::PartLess compare; @@ -108,13 +135,12 @@ void add_to_sharing_lookup(const stk::mesh::BulkData& bulk, stk::mesh::Entity no void create_new_node_map_entry(const stk::mesh::BulkData& bulk, const stk::mesh::Entity node, const BlockPair& blockPair, LinkInfo& info) { - const stk::mesh::Part & secondBlock = *blockPair.second; const stk::mesh::Entity * elems = bulk.begin_elements(node); const unsigned numElems = bulk.num_elements(node); bool needToCloneNode = false; for (unsigned i = 0; i < numElems; ++i) { const stk::mesh::Bucket & elemBucket = bulk.bucket(elems[i]); - if (elemBucket.member(secondBlock) && elemBucket.owned()) { + if (is_in_disconnected_block(elemBucket, blockPair) && elemBucket.owned()) { needToCloneNode = true; break; } @@ -165,6 +191,66 @@ void add_nodes_to_disconnect(const stk::mesh::BulkData & bulk, } } +void add_faces_to_disconnect(stk::mesh::BulkData& bulk, + BlockPair const& blockPair, + LinkInfo& info) +{ + const stk::mesh::MetaData & meta = bulk.mesh_meta_data(); + const stk::mesh::Part& firstBlock = *blockPair.first; + const stk::mesh::Part& secondBlock = *blockPair.second; + + auto firstBlockSurfaces = meta.get_surfaces_touched_by_block(blockPair.first); + auto secondBlockSurfaces = meta.get_surfaces_touched_by_block(blockPair.second); + + auto check_is_in_blocks = [](auto& inBlock, auto& surfaces, auto partOrdinal) { + for (auto surface : surfaces) { + if (partOrdinal == surface->mesh_meta_data_ordinal()) { + inBlock = true; + break; + } + }}; + + stk::mesh::Selector blockBoundary = firstBlock & secondBlock & + (meta.locally_owned_part() | meta.globally_shared_part()); + const stk::mesh::BucketVector & onBlockBoundaryFaces = bulk.get_buckets(meta.side_rank(), blockBoundary); + for (auto bucket : onBlockBoundaryFaces) { + stk::mesh::PartVector bucketSidesetParts; + + const stk::mesh::PartVector& superset = bucket->supersets(); + for (stk::mesh::Part* part : superset) { + if (stk::mesh::is_side_set(*part)) { + bucketSidesetParts.push_back(part); + } + } + + for (auto face : *bucket) { + auto elems = bulk.begin_elements(face); + auto numElems = bulk.num_elements(face); + STK_ThrowRequire(numElems == 2u); + + std::pair isInBlocks = {false, false}; + for (auto part : bucketSidesetParts) { + auto ordinal = part->mesh_meta_data_ordinal(); + + check_is_in_blocks(isInBlocks.first, firstBlockSurfaces, ordinal); + check_is_in_blocks(isInBlocks.second, secondBlockSurfaces, ordinal); + } + + auto ordinals = bulk.begin_ordinals(face, stk::topology::ELEM_RANK); + + for (unsigned i = 0; i < numElems; ++i) { + if (is_in_disconnected_block(bulk, elems[i], blockPair)) { + InternalFaceInfo faceInfo(blockPair, + elems[1u-i], ordinals[1u-i], + elems[i], ordinals[i], face, + isInBlocks); + stk::util::insert_keep_sorted_and_unique(faceInfo, info.internalSides); + } + } + } + } +} + void create_new_duplicate_node_IDs(stk::mesh::BulkData & bulk, LinkInfo& info) { std::vector newNodeIDs; @@ -249,43 +335,92 @@ std::vector get_block_pairs_to_disconnect(const stk::mesh::BulkData & return blockPairsToDisconnect; } -void update_disconnected_entity_relation(stk::mesh::BulkData& bulk, stk::mesh::Entity node, stk::mesh::Entity newNode, +bool update_disconnected_entity_relation(stk::mesh::BulkData& bulk, stk::mesh::Entity node, stk::mesh::Entity newNode, stk::mesh::Entity entity) { + bool updatedNode = false; unsigned numNodes = bulk.num_connectivity(entity, stk::topology::NODE_RANK); - const stk::mesh::Entity * elemNodes = bulk.begin(entity, stk::topology::NODE_RANK); + const stk::mesh::Entity * entityNodes = bulk.begin(entity, stk::topology::NODE_RANK); stk::mesh::ConnectivityOrdinal const * nodeOrdinals = bulk.begin_ordinals(entity, stk::topology::NODE_RANK); for (unsigned iNode = 0; iNode < numNodes; ++iNode) { - if (elemNodes[iNode] == node) { + if (entityNodes[iNode] == node) { bulk.destroy_relation(entity, node, nodeOrdinals[iNode]); bulk.declare_relation(entity, newNode, nodeOrdinals[iNode]); + updatedNode = true; + } + } + return updatedNode; +} + +void update_internal_face_node_map(stk::mesh::BulkData& bulk, stk::mesh::Entity node, stk::mesh::Entity newNode, + InternalFaceInfo& faceInfo) +{ + stk::mesh::Entity face = faceInfo.internalFace; + unsigned numNodes = bulk.num_connectivity(face, stk::topology::NODE_RANK); + const stk::mesh::Entity * entityNodes = bulk.begin(face, stk::topology::NODE_RANK); + stk::mesh::ConnectivityOrdinal const * nodeOrdinals = bulk.begin_ordinals(face, stk::topology::NODE_RANK); + for (unsigned iNode = 0; iNode < numNodes; ++iNode) { + if (entityNodes[iNode] == node) { + faceInfo.nodeMap[std::make_pair(node,nodeOrdinals[iNode])] = newNode; } } } -void disconnect_sub_rank(stk::mesh::BulkData& bulk, stk::mesh::EntityRank rank, stk::mesh::Entity newNode, - const stk::mesh::Entity oldNode, stk::mesh::Entity elem) +void update_internal_face_entity_relation(stk::mesh::BulkData& bulk, InternalFaceInfo& faceInfo) +{ + stk::mesh::Entity face = faceInfo.internalFace; + + for (auto iter = faceInfo.nodeMap.begin(); iter != faceInfo.nodeMap.end(); ++iter) { + const auto& key = iter->first; + const auto& newNode = iter->second; + + stk::mesh::Entity node = key.first; + stk::mesh::ConnectivityOrdinal ordinal = key.second; + + bulk.destroy_relation(face, node, ordinal); + bulk.declare_relation(face, newNode, ordinal); + } +} + +void disconnect_sub_rank(stk::mesh::BulkData& bulk, LinkInfo& info, stk::mesh::EntityRank rank, + stk::mesh::Entity newNode, const stk::mesh::Entity oldNode, + stk::mesh::Entity elem, stk::tools::BlockPair& blockPair) { + if (blockPair.is_valid() && is_in_preserved_block(bulk, elem, blockPair)) { return; } + + auto sideRank = bulk.mesh_meta_data().side_rank(); const stk::mesh::Entity* subEntities = bulk.begin(elem, rank); unsigned numSubEntities = bulk.num_connectivity(elem, rank); for(unsigned i = 0; i < numSubEntities; i++) { - update_disconnected_entity_relation(bulk, oldNode, newNode, subEntities[i]); + auto entity = subEntities[i]; + + if(sideRank == rank) { + auto iter = std::lower_bound(info.internalSides.begin(), info.internalSides.end(), entity); + if (!(iter == info.internalSides.end()) && !(entity < *iter)) { + update_internal_face_node_map(bulk, oldNode, newNode, *iter); + continue; + } + } + + update_disconnected_entity_relation(bulk, oldNode, newNode, entity); } } -void disconnect_sub_ranks(stk::mesh::BulkData& bulk, stk::mesh::Entity newNode, - const stk::mesh::Entity oldNode, stk::mesh::Entity elem) +void disconnect_sub_ranks(stk::mesh::BulkData& bulk, LinkInfo& info, + stk::mesh::Entity newNode, const stk::mesh::Entity oldNode, + stk::mesh::Entity elem, stk::tools::BlockPair& blockPair) { - disconnect_sub_rank(bulk, bulk.mesh_meta_data().side_rank(), newNode, oldNode, elem); + disconnect_sub_rank(bulk, info, bulk.mesh_meta_data().side_rank(), newNode, oldNode, elem, blockPair); if(bulk.mesh_meta_data().side_rank() != stk::topology::EDGE_RANK) { - disconnect_sub_rank(bulk, stk::topology::EDGE_RANK, newNode, oldNode, elem); + disconnect_sub_rank(bulk, info, stk::topology::EDGE_RANK, newNode, oldNode, elem, blockPair); } } void disconnect_elements(stk::mesh::BulkData& bulk, const NodeMapKey& key, NodeMapValue& value, LinkInfo& info) { const DisconnectGroup& group = key.disconnectedGroup; + auto blockPair = group.get_block_pair(); if (group.is_active()) { const stk::mesh::Entity node = key.parentNode; @@ -298,9 +433,11 @@ void disconnect_elements(stk::mesh::BulkData& bulk, const NodeMapKey& key, NodeM for (int sharingProc : value.sharingProcs) { bulk.add_node_sharing(newNode, sharingProc); } - update_disconnected_entity_relation(bulk, node, newNode, elem); + bool updatedNode = update_disconnected_entity_relation(bulk, node, newNode, elem); - disconnect_sub_ranks(bulk, newNode, node, elem); + if(updatedNode) { + disconnect_sub_ranks(bulk, info, newNode, node, elem, blockPair); + } if (bulk.num_elements(node) == 0 && !info.preserveOrphans) { bulk.destroy_entity(node); @@ -322,6 +459,275 @@ void disconnect_elements(stk::mesh::BulkData & bulk, const BlockPair & blockPair } } +void destroy_internal_face(stk::mesh::BulkData& bulk, const InternalFaceInfo& faceInfo) +{ + bulk.destroy_relation(faceInfo.elemInSecondBlock, faceInfo.internalFace, faceInfo.faceOrdinalInSecondBlock); + bulk.destroy_relation(faceInfo.elemInFirstBlock, faceInfo.internalFace, faceInfo.faceOrdinalInFirstBlock); + bulk.destroy_entity(faceInfo.internalFace, false); +} + +void duplicate_and_destroy_internal_face(stk::mesh::BulkData& bulk, + const InternalFaceInfo& faceInfo, + const InternalFaceDisconnectState firstBlockState, + const InternalFaceDisconnectState secondBlockState, + stk::mesh::ConstPartVector& firstBlockSurfaces, + stk::mesh::ConstPartVector& secondBlockSurfaces) +{ + bulk.destroy_relation(faceInfo.elemInSecondBlock, faceInfo.internalFace, faceInfo.faceOrdinalInSecondBlock); + bulk.destroy_relation(faceInfo.elemInFirstBlock, faceInfo.internalFace, faceInfo.faceOrdinalInFirstBlock); + + if(secondBlockState == InternalFaceDisconnectState::KEEP) { + stk::mesh::Entity newFace = bulk.declare_element_side(faceInfo.elemInSecondBlock, + faceInfo.faceOrdinalInSecondBlock, + stk::mesh::PartVector{faceInfo.blockPair.second}); + STK_ThrowRequire(newFace != faceInfo.internalFace); + + bulk.copy_entity_fields(faceInfo.internalFace, newFace); + bulk.change_entity_parts(newFace, secondBlockSurfaces, firstBlockSurfaces); + } + + if(firstBlockState == InternalFaceDisconnectState::KEEP) { + stk::mesh::Entity newFace = bulk.declare_element_side(faceInfo.elemInFirstBlock, + faceInfo.faceOrdinalInFirstBlock, + stk::mesh::PartVector{faceInfo.blockPair.first}); + STK_ThrowRequire(newFace != faceInfo.internalFace); + + bulk.copy_entity_fields(faceInfo.internalFace, newFace); + bulk.change_entity_parts(newFace, firstBlockSurfaces, secondBlockSurfaces); + } + + bulk.destroy_entity(faceInfo.internalFace, false); +} + +void connect_element_side_to_internal_face(stk::mesh::BulkData& bulk, + stk::mesh::Entity elem, + stk::mesh::ConnectivityOrdinal sideOrdinal, + stk::mesh::Entity face, + stk::mesh::ConstPartVector& addSurfaces, + stk::mesh::ConstPartVector& removeSurfaces) +{ + auto numElems = bulk.num_elements(face); + auto elems = bulk.begin_elements(face); + auto elemOrdinals = bulk.begin_ordinals(face, stk::topology::ELEM_RANK); + for(unsigned i=0; i +get_surface_parts_for_internal_face_block(stk::mesh::BulkData& bulk, stk::mesh::Part* block, stk::mesh::Entity face) +{ + auto& meta = bulk.mesh_meta_data(); + auto surfaces = meta.get_surfaces_touched_by_block(block); + + const stk::mesh::Bucket& faceBucket = bulk.bucket(face); + + auto is_not_selected = [&](const stk::mesh::Part* surface) { + return !faceBucket.member(*surface) ; + }; + auto end = std::remove_if(surfaces.begin(), surfaces.end(), is_not_selected); + surfaces.erase(end, surfaces.end()); + + return surfaces; +} + +void disconnect_internal_face(stk::mesh::BulkData& bulk, const InternalFaceInfo& faceInfo) +{ + auto firstBlockSurfaces = get_surface_parts_for_internal_face_block(bulk, faceInfo.blockPair.first, faceInfo.internalFace); + firstBlockSurfaces.push_back(faceInfo.blockPair.first); + + auto secondBlockSurfaces = get_surface_parts_for_internal_face_block(bulk, faceInfo.blockPair.second, faceInfo.internalFace); + secondBlockSurfaces.push_back(faceInfo.blockPair.second); + + InternalFaceDisconnectState firstBlockState = get_internal_face_disconnect_state(bulk, + faceInfo.elemInFirstBlock, + faceInfo.isInBlockPairSideset.first); + InternalFaceDisconnectState secondBlockState = get_internal_face_disconnect_state(bulk, + faceInfo.elemInSecondBlock, + faceInfo.isInBlockPairSideset.second); + + bool wedgedSidesetAttachedToBothBlocksWithOneLocalElement = + ( firstBlockState == InternalFaceDisconnectState::KEEP && secondBlockState == InternalFaceDisconnectState::DESTROY) || + (secondBlockState == InternalFaceDisconnectState::KEEP && firstBlockState == InternalFaceDisconnectState::DESTROY); + + if(wedgedSidesetAttachedToBothBlocksWithOneLocalElement) { + detach_internal_face(bulk, faceInfo, firstBlockState, secondBlockState, firstBlockSurfaces, secondBlockSurfaces); + } + + bool wedgedSidesetAttachedToOneBlockWithLocalAndRemoteElement = + ( firstBlockState == InternalFaceDisconnectState::NOOP && secondBlockState == InternalFaceDisconnectState::DESTROY) || + (secondBlockState == InternalFaceDisconnectState::NOOP && firstBlockState == InternalFaceDisconnectState::DESTROY); + + if(wedgedSidesetAttachedToOneBlockWithLocalAndRemoteElement) { + destroy_internal_face(bulk, faceInfo); + } + + bool wedgedSidesetAttachedToOneBlockWithLocalElementInSidesetBlock = + ( firstBlockState == InternalFaceDisconnectState::NOOP && secondBlockState == InternalFaceDisconnectState::KEEP) || + (secondBlockState == InternalFaceDisconnectState::NOOP && firstBlockState == InternalFaceDisconnectState::KEEP); + + if(wedgedSidesetAttachedToOneBlockWithLocalElementInSidesetBlock) { + detach_internal_face(bulk, faceInfo, firstBlockState, secondBlockState, firstBlockSurfaces, secondBlockSurfaces); + } + + bool wedgedSidesetAttachedToBothBlocksWithBothLocalElements = + (firstBlockState == InternalFaceDisconnectState::KEEP && secondBlockState == InternalFaceDisconnectState::KEEP); + + if(wedgedSidesetAttachedToBothBlocksWithBothLocalElements) { + detach_and_duplicate_internal_face(bulk, faceInfo, firstBlockState, secondBlockState, firstBlockSurfaces, secondBlockSurfaces); + } +} + +void disconnect_faces(stk::mesh::BulkData& bulk, LinkInfo& info) +{ + for (auto faceInfo : info.internalSides) { + update_internal_face_entity_relation(bulk, faceInfo); + } + + if (bulk.has_face_adjacent_element_graph()) { + bulk.get_face_adjacent_element_graph().fill_from_mesh(); + } + + for (auto faceInfo : info.internalSides) { + disconnect_internal_face(bulk, faceInfo); + } +} + std::vector get_node_sharing_for_restoration(stk::mesh::BulkData& bulk, const stk::mesh::Part* blockPart, const DisconnectGroup& group, stk::mesh::Entity destNode, LinkInfo& info) { @@ -473,12 +879,11 @@ bool can_be_reconnected(const DisconnectGroup& disconnectedGroup, const NodeMapV { const stk::mesh::BulkData& bulk = disconnectedGroup.get_bulk(); - if(!bulk.is_valid(currentEntity)) { return false; } const stk::mesh::PartVector& blockMembership = nodeMapValue.oldBlockMembership; bool isOriginalMember = false; - bool isInOneOfBlockPair = bulk.bucket(currentEntity).member(*blockPair.first) != bulk.bucket(currentEntity).member(*blockPair.second); + bool isInOneOfBlockPair = is_in_preserved_block(bulk,currentEntity,blockPair) != is_in_disconnected_block(bulk,currentEntity,blockPair); if (disconnectedGroup.has_block_pair() && isInOneOfBlockPair) { isOriginalMember = std::binary_search(blockMembership.begin(), blockMembership.end(), blockPair.first, stk::mesh::PartLess()) && @@ -761,7 +1166,7 @@ void determine_local_reconnect_node_id(stk::mesh::BulkData& bulk, const std::vec auto fill_reconnect_node_info_func = [&](const stk::mesh::PartVector& transitiveBlockList_, NodeMapType::iterator it, stk::mesh::Entity currentEntity_) { - unsigned groupId = it->second.reconnectGroupId; // reconnectGroup.get_id(); + unsigned groupId = it->second.reconnectGroupId; ReconnectMapKey mapKey = std::make_pair(it->second.oldNodeId, groupId); auto reconnectMapIter = info.reconnectMap.find(mapKey); @@ -897,6 +1302,7 @@ void disconnect_block_pairs(stk::mesh::BulkData& bulk, const std::vector 0)) { @@ -916,6 +1322,8 @@ void disconnect_block_pairs(stk::mesh::BulkData& bulk, const std::vector; using PreservedSharingInfo = std::map>; using NodeMapIterator = NodeMapType::iterator; using ReconnectMapKey = std::pair; +using InternalSidePerPair = std::vector>; struct ReconnectNodeInfo { stk::mesh::EntityId reconnectNodeId = stk::mesh::InvalidEntityId; @@ -136,8 +137,82 @@ class NullStream : public std::ostream { NullStream() : std::ostream( &m_nb ) {} }; +enum class InternalFaceDisconnectState {KEEP, DESTROY, NOOP}; + +inline std::ostream &operator<<(std::ostream &out, const InternalFaceDisconnectState &t) +{ + switch (t) { + case InternalFaceDisconnectState::KEEP: return out << "KEEP"; break; + case InternalFaceDisconnectState::DESTROY: return out << "DESTROY"; break; + case InternalFaceDisconnectState::NOOP: return out << "NOOP"; break; + default: return out << "INVALID"; break; + } + return out << "INVALID[" << (unsigned)t << "]"; +} + +struct InternalFaceInfo +{ + BlockPair blockPair; + stk::mesh::Entity elemInFirstBlock; + stk::mesh::ConnectivityOrdinal faceOrdinalInFirstBlock; + stk::mesh::Entity elemInSecondBlock; + stk::mesh::ConnectivityOrdinal faceOrdinalInSecondBlock; + stk::mesh::Entity internalFace; + std::pair isInBlockPairSideset; + std::map, stk::mesh::Entity> nodeMap; + + InternalFaceInfo(const BlockPair& blockPair_, + const stk::mesh::Entity elemInFirstBlock_, + const stk::mesh::ConnectivityOrdinal faceOrdinalInFirstBlock_, + const stk::mesh::Entity elemInSecondBlock_, + const stk::mesh::ConnectivityOrdinal faceOrdinalInSecondBlock_, + const stk::mesh::Entity internalFace_, + const std::pair& isInBlockPairSideset_) + : blockPair(blockPair_) + , elemInFirstBlock(elemInFirstBlock_) + , faceOrdinalInFirstBlock(faceOrdinalInFirstBlock_) + , elemInSecondBlock(elemInSecondBlock_) + , faceOrdinalInSecondBlock(faceOrdinalInSecondBlock_) + , internalFace(internalFace_) + , isInBlockPairSideset(isInBlockPairSideset_) {} + + operator stk::mesh::Entity() const { return internalFace; } + + bool operator<(const InternalFaceInfo &rhs) + { + return internalFace < rhs.internalFace; + }; + + bool operator<(const stk::mesh::Entity &rhs) + { + return internalFace < rhs; + }; + + bool operator==(const InternalFaceInfo &rhs) + { + return internalFace == rhs.internalFace; + }; + + bool operator==(const stk::mesh::Entity &rhs) + { + return internalFace == rhs; + }; + + bool operator!=(const InternalFaceInfo &rhs) + { + return internalFace != rhs.internalFace; + }; + + bool operator!=(const stk::mesh::Entity &rhs) + { + return internalFace != rhs; + }; +}; + struct LinkInfo { + using DisconnectInternalFaces = std::vector; + PreservedSharingInfo sharedInfo; NodeMapType clonedNodeMap; NodeMapType originalNodeMap; @@ -147,6 +222,7 @@ struct LinkInfo std::ostringstream os; NullStream ns; ReconnectMap reconnectMap; + DisconnectInternalFaces internalSides; double startTime; double setupTime; @@ -207,6 +283,10 @@ void add_nodes_to_disconnect(const stk::mesh::BulkData & bulk, const BlockPair & blockPair, LinkInfo& info); +void add_sidesets_to_disconnect(stk::mesh::BulkData const& bulk, + BlockPair const& blockPair, + LinkInfo& info); + void create_new_duplicate_node_IDs(stk::mesh::BulkData & bulk, LinkInfo& info); void communicate_shared_node_information(stk::mesh::BulkData & bulk, LinkInfo& info); diff --git a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectTypes.hpp b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectTypes.hpp index aa97d0cc5200..852baa7d22a5 100644 --- a/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectTypes.hpp +++ b/packages/stk/stk_tools/stk_tools/mesh_tools/DisconnectTypes.hpp @@ -92,6 +92,7 @@ struct BlockPair { const stk::mesh::Part* get_first() const { return first; } const stk::mesh::Part* get_second() const { return second; } bool is_adjacent() const { return true; } + bool is_valid() const {return (first != nullptr) && (second != nullptr) && (first != second); } stk::mesh::Part* first; stk::mesh::Part* second; diff --git a/packages/stk/stk_topology/Jamfile b/packages/stk/stk_topology/Jamfile index a26f5daf85d2..bec8941603d5 100644 --- a/packages/stk/stk_topology/Jamfile +++ b/packages/stk/stk_topology/Jamfile @@ -73,8 +73,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_transfer/Jamfile b/packages/stk/stk_transfer/Jamfile index af0f82ce1d0b..c6953a181144 100644 --- a/packages/stk/stk_transfer/Jamfile +++ b/packages/stk/stk_transfer/Jamfile @@ -71,8 +71,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_transfer_util/Jamfile b/packages/stk/stk_transfer_util/Jamfile index 7651f6b0fa40..702b38d321ef 100644 --- a/packages/stk/stk_transfer_util/Jamfile +++ b/packages/stk/stk_transfer_util/Jamfile @@ -67,8 +67,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_transfer_util/stk_transfer_util/CMakeLists.txt b/packages/stk/stk_transfer_util/stk_transfer_util/CMakeLists.txt index 82787aa135ad..e3cd4b9a6654 100644 --- a/packages/stk/stk_transfer_util/stk_transfer_util/CMakeLists.txt +++ b/packages/stk/stk_transfer_util/stk_transfer_util/CMakeLists.txt @@ -51,8 +51,12 @@ else() target_link_libraries(stk_transfer_util PUBLIC stk_search_util) target_link_libraries(stk_transfer_util PUBLIC stk_util_env) - find_package(LAPACK REQUIRED) - target_link_libraries(stk_transfer_util PUBLIC LAPACK::LAPACK) + if(USE_SIERRA_BLAS_LAPACK) + target_link_libraries(stk_transfer_util PUBLIC sierra_blas_lapack) + else() + find_package(LAPACK REQUIRED) + target_link_libraries(stk_transfer_util PUBLIC LAPACK::LAPACK) + endif() target_include_directories(stk_transfer_util PUBLIC $ diff --git a/packages/stk/stk_unit_test_utils/Jamfile b/packages/stk/stk_unit_test_utils/Jamfile index beec402a2f11..d2a13c23d29e 100644 --- a/packages/stk/stk_unit_test_utils/Jamfile +++ b/packages/stk/stk_unit_test_utils/Jamfile @@ -64,8 +64,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp index 0a2ac0b77900..61304a0bbb54 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/NgpDebugFieldSync_Fixtures.hpp @@ -394,6 +394,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::simple_fields::Mesh const int component = 0; stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + ngpField.sync_to_device(); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, selector, KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& entityIndex) { @@ -412,6 +413,7 @@ class NgpDebugFieldSyncFixture : public stk::unit_test_util::simple_fields::Mesh { stk::mesh::NgpMesh & ngpMesh = stk::mesh::get_updated_ngp_mesh(get_bulk()); stk::mesh::NgpField & ngpField = stk::mesh::get_updated_ngp_field(stkField); + ngpField.sync_to_device(); stk::mesh::for_each_entity_run(ngpMesh, stk::topology::ELEM_RANK, selector, KOKKOS_LAMBDA(const stk::mesh::FastMeshIndex& entityIndex) { diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp index 3639d9c776d4..e785a696ac82 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldBLASTest.cpp @@ -100,7 +100,6 @@ class NgpFieldBLAS : public stk::unit_test_util::simple_fields::MeshFixture stk::unit_test_util::simple_fields::setup_text_mesh(get_bulk(), meshDesc); EXPECT_FALSE(stkField1->need_sync_to_host()); - EXPECT_FALSE(stkField1->need_sync_to_device()); } const int numComponent1 = 8; diff --git a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp index 0fed3ee6b46a..9263d7397e5a 100644 --- a/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp +++ b/packages/stk/stk_unit_tests/stk_mesh/ngp/ngpFieldTest.cpp @@ -1032,7 +1032,6 @@ TEST_F(NgpFieldFixture, ModifyOnHostFlagClearedOnInitialNgpFieldConstruction) setup_one_field_one_element_mesh(); stk::mesh::Field& field1 = *get_meta().get_field(stk::topology::ELEM_RANK, "field1"); - EXPECT_FALSE(field1.need_sync_to_device()); field1.modify_on_host(); auto ngpfield = stk::mesh::get_updated_ngp_field(field1); @@ -1046,7 +1045,6 @@ TEST_F(NgpFieldFixture, InvalidModifyFlagCondition) setup_one_field_one_element_mesh(); stk::mesh::Field& field1 = *get_meta().get_field(stk::topology::ELEM_RANK, "field1"); - EXPECT_FALSE(field1.need_sync_to_device()); auto ngpfield = stk::mesh::get_updated_ngp_field(field1); EXPECT_FALSE(field1.need_sync_to_device()); @@ -1063,6 +1061,7 @@ TEST_F(NgpFieldFixture, PersistentModifyOnDeviceFlag) stk::mesh::Field& field1 = *get_meta().get_field(stk::topology::ELEM_RANK, "field1"); EXPECT_FALSE(field1.need_sync_to_host()); + field1.sync_to_device(); field1.modify_on_device(); auto ngpfield = stk::mesh::get_updated_ngp_field(field1); @@ -1141,7 +1140,7 @@ TEST_F(NgpFieldFixture, DeviceField_set_all_after_modified_on_host) auto stkField1 = get_meta().get_field(stk::topology::ELEM_RANK, "variableLengthField1"); EXPECT_FALSE(stkField1->need_sync_to_host()); - EXPECT_FALSE(stkField1->need_sync_to_device()); + EXPECT_TRUE(stkField1->need_sync_to_device()); stk::mesh::NgpField ngpField1 = stk::mesh::get_updated_ngp_field(*stkField1); @@ -1167,8 +1166,8 @@ TEST_F(NgpFieldFixture, blas_field_copy_device_to_device) EXPECT_FALSE(stkField1->need_sync_to_host()); EXPECT_FALSE(stkField2->need_sync_to_host()); - EXPECT_FALSE(stkField1->need_sync_to_device()); - EXPECT_FALSE(stkField2->need_sync_to_device()); + EXPECT_TRUE(stkField1->need_sync_to_device()); + EXPECT_TRUE(stkField2->need_sync_to_device()); const double myConstantValue = 97.9; diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.cpp b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.cpp index ee0d985c3b6c..1fbc11ee682e 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.cpp @@ -1344,6 +1344,70 @@ unsigned get_num_total_sides(const stk::mesh::BulkData & bulk) return get_num_total_entities(bulk, bulk.mesh_meta_data().side_rank()); } +stk::mesh::EntityVector get_element_side_nodes(stk::mesh::BulkData & bulk, + stk::mesh::Entity elem, + stk::mesh::ConnectivityOrdinal sideOrdinal) +{ + auto elemTopology = bulk.bucket(elem).topology(); + auto sideTopology = elemTopology.side_topology(sideOrdinal); + + std::vector elementSideNodeOrdinalVector(sideTopology.num_nodes()); + elemTopology.side_node_ordinals(sideOrdinal, elementSideNodeOrdinalVector.data()); + + stk::mesh::EntityVector elementSideNodeVector; + auto elemNodes = bulk.begin_nodes(elem); + for(auto nodeIndex : elementSideNodeOrdinalVector) { + elementSideNodeVector.push_back(elemNodes[nodeIndex]); + } + + return elementSideNodeVector; +} + +bool verify_attached_faces(stk::mesh::BulkData & bulk) +{ + for (stk::mesh::Bucket * bucket : bulk.buckets(bulk.mesh_meta_data().side_rank())) { + for (stk::mesh::Entity face : *bucket) { + auto numElems = bulk.num_elements(face); + auto elems = bulk.begin_elements(face); + auto ordinals = bulk.begin_ordinals(face, stk::topology::ELEM_RANK); + stk::mesh::EntityVector faceNodes(bulk.begin_nodes(face), bulk.begin_nodes(face)+bulk.num_nodes(face)); + std::sort(faceNodes.begin(), faceNodes.end()); + + for (unsigned i = 0; i < numElems; ++i) { + auto elem = elems[i]; + auto sideOrdinal = ordinals[i]; + + stk::mesh::EntityVector elementSideNodeVector = get_element_side_nodes(bulk, elem, sideOrdinal); + std::sort(elementSideNodeVector.begin(), elementSideNodeVector.end()); + + if(faceNodes != elementSideNodeVector) { + std::ostringstream oss; + + oss << "P" << bulk.parallel_rank() + << ": Could not match nodes on face: " << bulk.entity_key(face) + << " with element: " << bulk.entity_rank(elem) + << " on ordinal: " << sideOrdinal << std::endl; + + oss << "\tFace nodes\n"; + for(auto node : faceNodes) { + oss << "\t\t" << bulk.entity_key(node) << std::endl; + } + + oss << "\n\tElement side nodes\n"; + for(auto node : elementSideNodeVector) { + oss << "\t\t" << bulk.entity_key(node) << std::endl; + } + + std::cout << oss.str(); + return false; + } + } + } + } + + return true; +} + bool check_orphaned_nodes(stk::mesh::BulkData & bulk) { bool foundOrphanedNode = false; @@ -2262,10 +2326,42 @@ stk::mesh::PartVector setup_mesh_2block_2hex(stk::mesh::BulkData& bulk) return {block1, block2}; } -stk::mesh::PartVector setup_mesh_2block_2hex_with_internal_sides(stk::mesh::BulkData& bulk) +stk::mesh::PartVector setup_mesh_2block_2hex_with_internal_sides(stk::mesh::BulkData& bulk, bool loadMeshFirst) +{ + stk::mesh::Part * block2 = nullptr; + + if(!loadMeshFirst) { + block2 = &create_part(bulk.mesh_meta_data(), stk::topology::HEX_8, "block_2", 2); + create_part(bulk.mesh_meta_data(), stk::topology::QUAD_4, "surface_1", 1); + } + + stk::io::fill_mesh("generated:1x1x2", bulk); + + if(loadMeshFirst) { + block2 = &create_part(bulk.mesh_meta_data(), stk::topology::HEX_8, "block_2", 2); + create_part(bulk.mesh_meta_data(), stk::topology::QUAD_4, "surface_1", 1); + } + + stk::mesh::Part * block1 = bulk.mesh_meta_data().get_part("block_1"); + move_elems_from_block_to_block(bulk, std::vector{2}, "block_1", "block_2"); + + create_sideset(bulk, "surface_1", {"block_1"}); + create_sides_between_blocks(bulk, "block_1", "block_2", "surface_1"); + + EXPECT_EQ( 4u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ( 1u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(12u, get_num_total_nodes(bulk)); + EXPECT_EQ( 1u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + + return {block1, block2}; +} + +stk::mesh::PartVector setup_mesh_2block_2hex_with_internal_and_external_sides(stk::mesh::BulkData& bulk) { stk::mesh::Part * block2 = &create_part(bulk.mesh_meta_data(), stk::topology::HEX_8, "block_2", 2); create_part(bulk.mesh_meta_data(), stk::topology::QUAD_4, "surface_1", 1); + create_part(bulk.mesh_meta_data(), stk::topology::QUAD_4, "surface_2", 2); stk::io::fill_mesh("generated:1x1x2", bulk); stk::mesh::Part * block1 = bulk.mesh_meta_data().get_part("block_1"); @@ -2274,11 +2370,36 @@ stk::mesh::PartVector setup_mesh_2block_2hex_with_internal_sides(stk::mesh::Bulk create_sideset(bulk, "surface_1", {"block_1"}); create_sides_between_blocks(bulk, "block_1", "block_2", "surface_1"); + create_all_boundary_sides(bulk, "surface_2"); EXPECT_EQ( 4u, get_num_intersecting_nodes(bulk, {block1, block2})); EXPECT_EQ( 1u, get_num_common_sides(bulk, {block1, block2})); EXPECT_EQ(12u, get_num_total_nodes(bulk)); - EXPECT_EQ( 1u, get_num_total_sides(bulk)); + EXPECT_EQ(11u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + + return {block1, block2}; +} + +stk::mesh::PartVector setup_mesh_2block_2hex_with_dual_internal_and_external_sides(stk::mesh::BulkData& bulk) +{ + stk::mesh::Part * block2 = &create_part(bulk.mesh_meta_data(), stk::topology::HEX_8, "block_2", 2); + create_part(bulk.mesh_meta_data(), stk::topology::QUAD_4, "surface_1", 1); + create_part(bulk.mesh_meta_data(), stk::topology::QUAD_4, "surface_2", 2); + + stk::io::fill_mesh("generated:1x1x2", bulk); + stk::mesh::Part * block1 = bulk.mesh_meta_data().get_part("block_1"); + + move_elems_from_block_to_block(bulk, std::vector{2}, "block_1", "block_2"); + + create_sideset(bulk, "surface_1", {"block_1", "block_2"}); + create_sides_between_blocks(bulk, "block_1", "block_2", "surface_1"); + create_all_boundary_sides(bulk, "surface_2"); + + EXPECT_EQ( 4u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ( 1u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(12u, get_num_total_nodes(bulk)); + EXPECT_EQ(11u, get_num_total_sides(bulk)); EXPECT_FALSE(check_orphaned_nodes(bulk)); return {block1, block2}; diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.hpp b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.hpp index a39d647a92af..0b3c512a81c1 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.hpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/DisconnectBlocksMeshConstruction.hpp @@ -275,6 +275,8 @@ unsigned get_num_total_sides(const stk::mesh::BulkData & bulk); bool check_orphaned_nodes(stk::mesh::BulkData & bulk); +bool verify_attached_faces(stk::mesh::BulkData & bulk); + void output_mesh(stk::mesh::BulkData & bulk, const std::string & fileName); void output_mesh(stk::mesh::BulkData & bulk); @@ -345,7 +347,11 @@ stk::mesh::PartVector setup_mesh_2block_1hex(stk::mesh::BulkData& bulk); stk::mesh::PartVector setup_mesh_2block_2hex(stk::mesh::BulkData& bulk); -stk::mesh::PartVector setup_mesh_2block_2hex_with_internal_sides(stk::mesh::BulkData& bulk); +stk::mesh::PartVector setup_mesh_2block_2hex_with_internal_sides(stk::mesh::BulkData& bulk, bool loadMeshFirst = false); + +stk::mesh::PartVector setup_mesh_2block_2hex_with_internal_and_external_sides(stk::mesh::BulkData& bulk); + +stk::mesh::PartVector setup_mesh_2block_2hex_with_dual_internal_and_external_sides(stk::mesh::BulkData& bulk); stk::mesh::PartVector setup_mesh_2block_2hex_with_external_sides(stk::mesh::BulkData& bulk); diff --git a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDisconnectBlocks.cpp b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDisconnectBlocks.cpp index 6edec3c29139..cc963e4019e5 100644 --- a/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDisconnectBlocks.cpp +++ b/packages/stk/stk_unit_tests/stk_tools/mesh_tools/UnitTestDisconnectBlocks.cpp @@ -14,6 +14,7 @@ #include #include // for MetaData #include +#include #include #include #include @@ -2043,8 +2044,6 @@ TEST_F(TestDisconnectUserBlocks2D, disconnect_user_blocks_3blocks_4quad_custom_o BlockConnectionVector blockPairsToDisconnectVector{BlockConnection("vl","lateral",0), BlockConnection("radax","lateral",0)}; test_user_block_disconnect(get_bulk(), blockPairsToDisconnectVector, 2u, stk::tools::DisconnectBlocksOption(stk::tools::DISCONNECT_LOCAL, stk::tools::PRESERVE_INITIAL_HINGES)); - - stk::io::write_mesh("custom_ordinal_unit_test_mesh.g", get_bulk()); } @@ -2150,3 +2149,493 @@ TEST(TestNGSDisconnect, jtd_sub_mesh) stk::tools::BlockPairVector disconnectPairs{{block1, block3}, {block2, block3}}; EXPECT_NO_THROW(stk::tools::disconnect_user_blocks(bulk, disconnectPairs, disconnectOption)); } + +void create_2_tet10s_in_2_blocks_sharing_6_nodes(stk::mesh::BulkData &bulk) +{ + std::string meshDesc; + + auto meshDescNp1 + { + "0,1,TET_10,1,2,3,4,6,7,8, 9,10,11,block_1\n" + "0,2,TET_10,3,2,1,5,7,6,8,12,13,14,block_2" + }; + + auto meshDescNp2 + { + "0,1,TET_10,1,2,3,4,6,7,8, 9,10,11,block_1\n" + "1,2,TET_10,3,2,1,5,7,6,8,12,13,14,block_2" + }; + + meshDesc = (bulk.parallel_size() == 2) ? meshDescNp2 : meshDescNp1; + + const std::vector coordinates + { + 0, 0, 0, + 1, 0, 0, + 0, 1, 0, + 0, 0, 1, + 0, 0, -1, + 0.5, 0, 0, + 0.5, 0.5, 0, + 0, 0.5, 0, + 0, 0, 0.5, + 0.5, 0, 0.5, + 0, 0.5, 0.5, + 0, 0.5, -0.5, + 0.5, 0, -0.5, + 0, 0, -0.5, + }; + + stk::unit_test_util::setup_text_mesh(bulk, stk::unit_test_util::get_full_text_mesh_desc(meshDesc, coordinates)); +} + +void create_4_hex8s_in_2_blocks_sharing_4_nodes(stk::mesh::BulkData &bulk) +{ + stk::io::fill_mesh("generated:4x1x1", bulk); + stk::mesh::Part* block1 = bulk.mesh_meta_data().get_part("block_1"); + stk::mesh::Part* block2 = &create_part(bulk.mesh_meta_data(), stk::topology::HEX_8, "block_2", 2); + + stk::mesh::EntityVector elems; + for(stk::mesh::EntityId elemId : std::vector{2,3}) { + stk::mesh::Entity elem = bulk.get_entity(stk::topology::ELEM_RANK, elemId); + if (bulk.is_valid(elem) && bulk.bucket(elem).owned()) { + elems.push_back(elem); + } + } + + bulk.batch_change_entity_parts(elems, stk::mesh::PartVector{block2}, stk::mesh::PartVector{block1}); +} + +stk::mesh::Part & create_sideset_part(stk::mesh::MetaData &meta, const size_t sidesetId) +{ + stk::mesh::Part &sidesetPart = meta.declare_part("Sideset"+std::to_string(sidesetId), meta.side_rank()); + meta.set_part_id(sidesetPart, sidesetId); + stk::io::put_io_part_attribute(sidesetPart); + return sidesetPart; +} + +stk::mesh::Part & create_sideset_part_with_topology(stk::mesh::MetaData &meta, const size_t sidesetId, stk::topology::topology_t topo) +{ + stk::mesh::Part &sidesetPart = meta.declare_part_with_topology("Sideset"+std::to_string(sidesetId), topo); + meta.set_part_id(sidesetPart, sidesetId); + stk::io::put_io_part_attribute(sidesetPart); + return sidesetPart; +} + +void expect_num_nodes_faces_elems( + const stk::mesh::BulkData &bulk, + const size_t goldNumNodes, + const size_t goldNumFaces, + const size_t goldNumElems) +{ + const stk::mesh::Selector s(bulk.mesh_meta_data().universal_part()); + EXPECT_EQ(goldNumNodes, stk::mesh::count_selected_entities(s, bulk.buckets(stk::topology::NODE_RANK))); + EXPECT_EQ(goldNumFaces, stk::mesh::count_selected_entities(s, bulk.buckets(stk::topology::FACE_RANK))); + EXPECT_EQ(goldNumElems, stk::mesh::count_selected_entities(s, bulk.buckets(stk::topology::ELEM_RANK))); +} + +void expect_one_face_in_sideset_with_adjacent_blocks( + const stk::mesh::BulkData &bulk, + const stk::mesh::Part &sidesetPart, + const stk::mesh::ConstPartVector &goldAdjacentElemBlockParts, + const size_t goldNumAdjElems) +{ + stk::mesh::EntityVector faces; + stk::mesh::get_selected_entities(sidesetPart, bulk.buckets(stk::topology::FACE_RANK), faces); + ASSERT_EQ(1u, faces.size()) << " for sideset " << sidesetPart.name(); + + const size_t numAdjElems = bulk.num_elements(faces[0]); + ASSERT_EQ(goldNumAdjElems, numAdjElems) << " for sideset " << sidesetPart.name(); + const stk::mesh::Entity *elems = bulk.begin_elements(faces[0]); + for(size_t i{0}; i 1) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + auto& meta = get_meta(); + + create_2_tet10s_in_2_blocks_sharing_6_nodes(bulk); + stk::mesh::Part &elemPart1 = *meta.get_part("block_1"); + stk::mesh::Part &elemPart2 = *meta.get_part("block_2"); + + stk::mesh::Part &facePart1 = create_sideset_part_with_topology(meta, 1, stk::topology::TRI_6); + stk::mesh::Part &facePart2 = create_sideset_part_with_topology(meta, 2, stk::topology::TRI_6); + meta.set_surface_to_block_mapping(&facePart1, {&elemPart1}); + meta.set_surface_to_block_mapping(&facePart2, {&elemPart2}); + stk::mesh::create_interior_block_boundary_sides(bulk, meta.universal_part(), {&facePart1, &facePart2}); + + expect_num_nodes_faces_elems(bulk, 14, 1, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart1, {&elemPart1, &elemPart2}, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart2, {&elemPart1, &elemPart2}, 2); + + stk::tools::disconnect_all_blocks(bulk); + + expect_num_nodes_faces_elems(bulk, 20, 2, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart1, {&elemPart1}, 1); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart2, {&elemPart2}, 1); + + EXPECT_TRUE(verify_attached_faces(bulk)); +} + +TEST_F(TestDisconnectWithSidesets, twoTetsSharingFaceWith3InternalSidesets_disconnect_blocksWith3Sidesets) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + auto& meta = get_meta(); + + create_2_tet10s_in_2_blocks_sharing_6_nodes(bulk); + stk::mesh::Part &elemPart1 = *meta.get_part("block_1"); + stk::mesh::Part &elemPart2 = *meta.get_part("block_2"); + + stk::mesh::Part &facePart1 = create_sideset_part_with_topology(meta, 1, stk::topology::TRI_6); + stk::mesh::Part &facePart2 = create_sideset_part_with_topology(meta, 2, stk::topology::TRI_6); + stk::mesh::Part &facePart3 = create_sideset_part_with_topology(meta, 3, stk::topology::TRI_6); + meta.set_surface_to_block_mapping(&facePart1, {&elemPart1}); + meta.set_surface_to_block_mapping(&facePart2, {&elemPart1}); + meta.set_surface_to_block_mapping(&facePart3, {&elemPart2}); + stk::mesh::create_interior_block_boundary_sides(bulk, meta.universal_part(), {&facePart1, &facePart2, &facePart3}); + + expect_num_nodes_faces_elems(bulk, 14, 1, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart1, {&elemPart1, &elemPart2}, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart2, {&elemPart1, &elemPart2}, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart3, {&elemPart1, &elemPart2}, 2); + + stk::tools::disconnect_all_blocks(bulk); + + expect_num_nodes_faces_elems(bulk, 20, 2, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart1, {&elemPart1}, 1); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart2, {&elemPart1}, 1); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart3, {&elemPart2}, 1); + + EXPECT_TRUE(verify_attached_faces(bulk)); +} + +TEST_F(TestDisconnectWithSidesets, twoTetsSharingFaceWith3InternalSidesets_disconnect_blocksWith3Sidesets_reverse) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 1) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + auto& meta = get_meta(); + + create_2_tet10s_in_2_blocks_sharing_6_nodes(bulk); + stk::mesh::Part &elemPart1 = *meta.get_part("block_1"); + stk::mesh::Part &elemPart2 = *meta.get_part("block_2"); + + stk::mesh::Part &facePart1 = create_sideset_part_with_topology(meta, 1, stk::topology::TRI_6); + stk::mesh::Part &facePart2 = create_sideset_part_with_topology(meta, 2, stk::topology::TRI_6); + stk::mesh::Part &facePart3 = create_sideset_part_with_topology(meta, 3, stk::topology::TRI_6); + meta.set_surface_to_block_mapping(&facePart1, {&elemPart1}); + meta.set_surface_to_block_mapping(&facePart2, {&elemPart2}); + meta.set_surface_to_block_mapping(&facePart3, {&elemPart2}); + stk::mesh::create_interior_block_boundary_sides(bulk, meta.universal_part(), {&facePart1, &facePart2, &facePart3}); + + expect_num_nodes_faces_elems(bulk, 14, 1, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart1, {&elemPart1, &elemPart2}, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart2, {&elemPart1, &elemPart2}, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart3, {&elemPart1, &elemPart2}, 2); + + stk::tools::disconnect_all_blocks(bulk); + + expect_num_nodes_faces_elems(bulk, 20, 2, 2); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart1, {&elemPart1}, 1); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart2, {&elemPart2}, 1); + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart3, {&elemPart2}, 1); + + EXPECT_TRUE(verify_attached_faces(bulk)); +} + +TEST_F(TestDisconnectWithSidesets, twoTetsSharingFaceWith2InternalSidesets_two_procs) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 2) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + auto& meta = get_meta(); + auto rank = stk::parallel_machine_rank(MPI_COMM_WORLD); + + create_2_tet10s_in_2_blocks_sharing_6_nodes(bulk); + stk::mesh::Part &elemPart1 = *meta.get_part("block_1"); + stk::mesh::Part &elemPart2 = *meta.get_part("block_2"); + + stk::mesh::Part &facePart1 = create_sideset_part_with_topology(meta, 1, stk::topology::TRI_6); + stk::mesh::Part &facePart2 = create_sideset_part_with_topology(meta, 2, stk::topology::TRI_6); + meta.set_surface_to_block_mapping(&facePart1, {&elemPart1}); + meta.set_surface_to_block_mapping(&facePart2, {&elemPart2}); + stk::mesh::create_interior_block_boundary_sides(bulk, meta.universal_part(), {&facePart1, &facePart2}); + + expect_num_nodes_faces_elems(bulk, 14, 1, 2); + if (rank == 0) + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart1, {&elemPart1}, 2); + if (rank == 1) + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart2, {&elemPart2}, 2); + + stk::tools::disconnect_all_blocks(bulk); + + expect_num_nodes_faces_elems(bulk, 10, 1, 1); + if (rank == 0) + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart1, {&elemPart1}, 1); + if (rank == 1) + expect_one_face_in_sideset_with_adjacent_blocks(bulk, facePart2, {&elemPart2}, 1); + + EXPECT_TRUE(verify_attached_faces(bulk)); +} + +TEST_F(TestDisconnectWithSidesets, twoHexesInTwoBlocks_with_internalSidesets) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 2) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + + auto blocks = setup_mesh_2block_2hex_with_internal_sides(bulk); + auto block1 = blocks[0]; + auto block2 = blocks[1]; + + stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(16u, get_num_total_nodes(bulk)); + EXPECT_EQ(1u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + EXPECT_TRUE(verify_attached_faces(bulk)); +} + +TEST_F(TestDisconnectWithSidesets, fourHexesInThreeBlocks_with_internalSidesets) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 2) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + auto blocks = setup_mesh_3block_4hex_with_internal_sides(bulk); + + auto block1 = blocks[0]; + auto block2 = blocks[1]; + auto block3 = blocks[2]; + + stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}, + stk::tools::BlockPair{block1, block3}, + stk::tools::BlockPair{block2, block3}}); + + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block2, block3})); + EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(28u, get_num_total_nodes(bulk)); + EXPECT_EQ(3u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + EXPECT_TRUE(verify_attached_faces(bulk)); +} + +TEST_F(TestDisconnectWithSidesets, twoTet4InTwoBlocks_with_internalSidesets) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 2) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + auto blocks = setup_mesh_2block_2cubeOfTet_with_internal_sides(bulk); + auto block1 = blocks[0]; + auto block2 = blocks[1]; + + stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(16u, get_num_total_nodes(bulk)); + EXPECT_EQ(2u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + EXPECT_TRUE(verify_attached_faces(bulk)); +} + +TEST_F(TestDisconnectWithSidesets, threeTet4InFourBlocks_with_internalSidesets) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 2) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + auto blocks = setup_mesh_3block_4cubeOfTet_with_internal_sides(bulk); + auto block1 = blocks[0]; + auto block2 = blocks[1]; + auto block3 = blocks[2]; + + stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}, + stk::tools::BlockPair{block1, block3}, + stk::tools::BlockPair{block2, block3}}); + + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block2, block3})); + EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(28u, get_num_total_nodes(bulk)); + EXPECT_EQ(6u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + EXPECT_TRUE(verify_attached_faces(bulk)); +} + +TEST_F(TestDisconnectWithSidesets, twoHexesInTwoBlocks_with_internalAndExternalSidesets) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 2) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + + auto blocks = setup_mesh_2block_2hex_with_internal_and_external_sides(bulk); + auto block1 = blocks[0]; + auto block2 = blocks[1]; + + stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(16u, get_num_total_nodes(bulk)); + EXPECT_EQ(11u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + EXPECT_TRUE(verify_attached_faces(bulk)); +} + +TEST_F(TestDisconnectWithSidesets, twoHexesInTwoBlocks_with_dualInternalAndExternalSidesets) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 2) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + + auto blocks = setup_mesh_2block_2hex_with_dual_internal_and_external_sides(bulk); + auto block1 = blocks[0]; + auto block2 = blocks[1]; + + stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(16u, get_num_total_nodes(bulk)); + EXPECT_EQ(12u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + EXPECT_TRUE(verify_attached_faces(bulk)); +} + +TEST_F(TestDisconnectWithSidesets, twoHexesInTwoBlocks_with_internalSidesetAndEmptySidesets) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) > 2) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + + stk::mesh::Part * surface2 = &create_part(bulk.mesh_meta_data(), stk::topology::QUAD_4, "surface_2", 2); + stk::mesh::Part * surface3 = &create_part(bulk.mesh_meta_data(), stk::topology::QUAD_4, "surface_3", 3); + + bool loadMeshFirst = true; + auto blocks = setup_mesh_2block_2hex_with_internal_sides(bulk, loadMeshFirst); + auto block1 = blocks[0]; + auto block2 = blocks[1]; + + create_sideset(bulk, "surface_2", {"block_1"}); + create_sideset(bulk, "surface_3", {"block_1"}); + + unsigned numFacesInSurface2 = stk::mesh::count_selected_entities(*surface2, get_bulk().buckets(stk::topology::FACE_RANK)); + unsigned numFacesInSurface3 = stk::mesh::count_selected_entities(*surface3, get_bulk().buckets(stk::topology::FACE_RANK)); + EXPECT_EQ(0u, numFacesInSurface2); + EXPECT_EQ(0u, numFacesInSurface3); + + stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(16u, get_num_total_nodes(bulk)); + EXPECT_EQ(1u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + EXPECT_TRUE(verify_attached_faces(bulk)); + + numFacesInSurface2 = stk::mesh::count_selected_entities(*surface2, get_bulk().buckets(stk::topology::FACE_RANK)); + numFacesInSurface3 = stk::mesh::count_selected_entities(*surface3, get_bulk().buckets(stk::topology::FACE_RANK)); + + EXPECT_EQ(0u, numFacesInSurface2); + EXPECT_EQ(0u, numFacesInSurface3); +} + +TEST_F(TestDisconnectWithSidesets, ALRBC) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 3) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + + std::string meshDesc = + "0,1,HEX_8, 1, 2, 3, 4, 5, 6, 7, 8,block_1\n" + "1,2,HEX_8, 5, 6, 7, 8, 9,10,11,12,block_2\n" + "2,3,HEX_8, 9,10,11,12,13,14,15,16,block_3\n" + "|coordinates: 0,0,0, 1,0,0, 1,1,0, 0,1,0, 0,0,1, 1,0,1, 1,1,1, 0,1,1, 0,0,2, 1,0,2, 1,1,2, 0,1,2, 0,0,3, 1,0,3, 1,1,3, 0,1,3" + "|sideset:name=surface_1; data=1,6, 2,5"; + + stk::io::fill_mesh("textmesh:" + meshDesc, bulk); + + auto block1 = bulk.mesh_meta_data().get_part("block_1"); + auto block2 = bulk.mesh_meta_data().get_part("block_2"); + auto block3 = bulk.mesh_meta_data().get_part("block_3"); + + auto surface1 = bulk.mesh_meta_data().get_part("surface_1"); + + unsigned numFacesInSurface1 = stk::mesh::count_selected_entities(*surface1, get_bulk().buckets(stk::topology::FACE_RANK)); + EXPECT_EQ(1u, numFacesInSurface1); + + stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}, stk::tools::BlockPair{block2, block3}}); + + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block2, block3})); + EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(24u, get_num_total_nodes(bulk)); + EXPECT_EQ(2u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + EXPECT_TRUE(verify_attached_faces(bulk)); + + numFacesInSurface1 = stk::mesh::count_selected_entities(*surface1, get_bulk().buckets(stk::topology::FACE_RANK)); + + int myProc = bulk.parallel_rank(); + if(myProc == 0 || myProc == 1) { + EXPECT_EQ(1u, numFacesInSurface1); + } else { + EXPECT_EQ(0u, numFacesInSurface1); + } +} + +TEST_F(TestDisconnectWithSidesets, ALRB) +{ + if (stk::parallel_machine_size(MPI_COMM_WORLD) != 2) { GTEST_SKIP(); } + + auto& bulk = get_bulk(); + + std::string meshDesc = + "1,1,HEX_8, 1, 2, 3, 4, 5, 6, 7, 8,block_1\n" + "0,2,HEX_8, 5, 6, 7, 8, 9,10,11,12,block_2\n" + "|coordinates: 0,0,0, 1,0,0, 1,1,0, 0,1,0, 0,0,1, 1,0,1, 1,1,1, 0,1,1, 0,0,2, 1,0,2, 1,1,2, 0,1,2" + "|sideset:name=surface_1; data=1,6, 2,5"; + + stk::io::fill_mesh("textmesh:" + meshDesc, bulk); + + auto block1 = bulk.mesh_meta_data().get_part("block_1"); + auto block2 = bulk.mesh_meta_data().get_part("block_2"); + + auto surface1 = bulk.mesh_meta_data().get_part("surface_1"); + + unsigned numFacesInSurface1 = stk::mesh::count_selected_entities(*surface1, get_bulk().buckets(stk::topology::FACE_RANK)); + EXPECT_EQ(1u, numFacesInSurface1); + + stk::tools::disconnect_user_blocks(bulk, {stk::tools::BlockPair{block1, block2}}); + + EXPECT_EQ(0u, get_num_intersecting_nodes(bulk, {block1, block2})); + EXPECT_EQ(0u, get_num_common_sides(bulk, {block1, block2})); + EXPECT_EQ(16u, get_num_total_nodes(bulk)); + EXPECT_EQ(2u, get_num_total_sides(bulk)); + EXPECT_FALSE(check_orphaned_nodes(bulk)); + EXPECT_TRUE(verify_attached_faces(bulk)); + + numFacesInSurface1 = stk::mesh::count_selected_entities(*surface1, get_bulk().buckets(stk::topology::FACE_RANK)); + + EXPECT_EQ(1u, numFacesInSurface1); +} + diff --git a/packages/stk/stk_unit_tests/stk_util/diag/UnitTestResource2.cpp b/packages/stk/stk_unit_tests/stk_util/diag/UnitTestResource2.cpp index 0807e822f796..1413f412a81b 100644 --- a/packages/stk/stk_unit_tests/stk_util/diag/UnitTestResource2.cpp +++ b/packages/stk/stk_unit_tests/stk_util/diag/UnitTestResource2.cpp @@ -37,6 +37,13 @@ #include "stk_util/diag/Resource2.h" #include +TEST(Resource2, invalid_name) +{ + sierra::String regionName("regionResource"); + sierra::Rsrc2::ResourceRoot regionResource(regionName); + EXPECT_ANY_THROW(regionResource.create("foo.bar", nullptr)); +} + TEST(Resource2, construct_list_and_find) { sierra::String parentName("parentResource"); diff --git a/packages/stk/stk_util/Jamfile b/packages/stk/stk_util/Jamfile index 466684585b9c..83c798152441 100644 --- a/packages/stk/stk_util/Jamfile +++ b/packages/stk/stk_util/Jamfile @@ -86,8 +86,8 @@ local installed-developer-files = # # SECTION 2: Development install # -explicit install-serial-targets ; -alias install-serial-targets ; + + # Dependencies listed in this target are installed in the developer's project. # This should include all executables and any other files needed for developer use. diff --git a/packages/stk/stk_util/stk_util/Version.hpp b/packages/stk/stk_util/stk_util/Version.hpp index 93f87622ce20..9ec38172e154 100644 --- a/packages/stk/stk_util/stk_util/Version.hpp +++ b/packages/stk/stk_util/stk_util/Version.hpp @@ -44,7 +44,7 @@ //See the file CHANGELOG.md for a listing that shows the //correspondence between version numbers and API changes. -#define STK_VERSION 5190400 +#define STK_VERSION 5190401 namespace stk diff --git a/packages/stk/stk_util/stk_util/diag/Resource2.cpp b/packages/stk/stk_util/stk_util/diag/Resource2.cpp index 079ea4748b2c..572329afdfca 100644 --- a/packages/stk/stk_util/stk_util/diag/Resource2.cpp +++ b/packages/stk/stk_util/stk_util/diag/Resource2.cpp @@ -135,6 +135,9 @@ Resource::const_iterator Resource::end() const Resource Resource::create(const String& resource_name, AnyData* any_data) { + const auto d = std::find(resource_name.begin(), resource_name.end(), '.'); + STK_ThrowRequireMsg(d == resource_name.end(), "Cannot use a '.' in a global variable name. Offending name = '" << resource_name << "'"); + auto& resource_list = getResourceMap(m_resource_); STK_ThrowRequireMsg(resource_list.find(resource_name) == resource_list.end(), diff --git a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp index 3580e98c2ccf..e4eba23b5543 100644 --- a/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp +++ b/packages/stk/stk_util/stk_util/registry/ProductRegistry.cpp @@ -42,7 +42,7 @@ //In Sierra, STK_VERSION_STRING is provided on the compile line by bake. //For Trilinos stk snapshots, the following macro definition gets populated with //the real version string by the trilinos_snapshot.sh script. -#define STK_VERSION_STRING "5.19.4-46-gca6c4f7d" +#define STK_VERSION_STRING "5.19.4-573-gfdf674ff" #endif namespace stk { diff --git a/packages/teuchos/CMakeLists.txt b/packages/teuchos/CMakeLists.txt index e08961c1bdba..11d6a6ed6c37 100644 --- a/packages/teuchos/CMakeLists.txt +++ b/packages/teuchos/CMakeLists.txt @@ -433,9 +433,7 @@ IF(TPL_ENABLE_Valgrind) ENDIF() ENDIF() -# Enabling Kokkos profiling hooks in Teuchos timers requires that the -# Kokkos package be enabled - +#Option Teuchos_KOKKOS_PROFILING IF(DEFINED ${PROJECT_NAME}_ENABLE_Kokkos AND ${PROJECT_NAME}_ENABLE_Kokkos) SET(${PACKAGE_NAME}_KOKKOS_PROFILING_DEFAULT ON) ELSE() @@ -448,9 +446,28 @@ TRIBITS_ADD_OPTION_AND_DEFINE(${PACKAGE_NAME}_KOKKOS_PROFILING "Wrap every Teuchos timer with a Kokkos profiling region." "${${PACKAGE_NAME}_KOKKOS_PROFILING_DEFAULT}") IF (${PACKAGE_NAME}_KOKKOS_PROFILING) + # Enabling Kokkos profiling hooks in Teuchos timers requires that the + # Kokkos package be enabled + IF(NOT(DEFINED ${PROJECT_NAME}_ENABLE_Kokkos AND ${PROJECT_NAME}_ENABLE_Kokkos)) + MESSAGE(FATAL_ERROR "Setting Teuchos_KOKKOS_PROFILING=ON requires that the Kokkos package is enabled.") + ENDIF() MESSAGE(STATUS "Wrapping every Teuchos timer with a Kokkos profiling region.") ENDIF() +#Option Teuchos_TIMER_KOKKOS_FENCE +SET(Teuchos_TIMER_KOKKOS_FENCE_DEFAULT OFF) +TRIBITS_ADD_OPTION_AND_DEFINE( + Teuchos_TIMER_KOKKOS_FENCE + HAVE_TEUCHOS_TIMER_KOKKOS_FENCE + "Call Kokkos::fence() whenever a Teuchos timer starts or stops." + "${Teuchos_TIMER_KOKKOS_FENCE_DEFAULT}") +IF (Teuchos_TIMER_KOKKOS_FENCE) + # Enabling Kokkos::fence() in Teuchos timers requires that the Kokkos package be enabled + IF(NOT(DEFINED ${PROJECT_NAME}_ENABLE_Kokkos AND ${PROJECT_NAME}_ENABLE_Kokkos)) + MESSAGE(FATAL_ERROR "Setting Teuchos_TIMER_KOKKOS_FENCE=ON requires that the Kokkos package is enabled.") + ENDIF() + MESSAGE(STATUS "Will call Kokkos::fence() when a Teuchos timer starts or stops.") +ENDIF() # # D) Process the subpackages for Teuchos diff --git a/packages/teuchos/comm/test/Time/TimeMonitor_UnitTests.cpp b/packages/teuchos/comm/test/Time/TimeMonitor_UnitTests.cpp index d0e5053abb96..d26e068bf5b7 100644 --- a/packages/teuchos/comm/test/Time/TimeMonitor_UnitTests.cpp +++ b/packages/teuchos/comm/test/Time/TimeMonitor_UnitTests.cpp @@ -67,6 +67,10 @@ void sleep(int sec) } #endif +#ifdef HAVE_TEUCHOS_TIMER_KOKKOS_FENCE +#include "Kokkos_Core.hpp" +#endif + namespace { void func_time_monitor1() @@ -1078,4 +1082,56 @@ namespace Teuchos { TimeMonitor::clearCounters (); } + // + // Test that Time::start() and Time::stop() call Kokkos::fence(), + // if the option to do that is enabled. + // + #ifdef HAVE_TEUCHOS_TIMER_KOKKOS_FENCE + namespace KokkosFenceCounter + { + static int numFences; + + void reset() + { + numFences = 0; + } + + void begin_fence_callback(const char*, const uint32_t deviceId, uint64_t*) { + using namespace Kokkos::Tools::Experimental; + // Only count global device fences on the default space. Otherwise fences + // could be counted multiple times, depending on how many backends are enabled. + DeviceType fenceDevice = identifier_from_devid(deviceId).type; + DeviceType defaultDevice = DeviceTypeTraits::id; + if(fenceDevice == defaultDevice) + numFences++; + } + } + + TEUCHOS_UNIT_TEST( TimeMonitor, CheckTimerKokkosFences ) + { + // This test doesn't care about the comm size or rank because Kokkos + // fences and profiling is purely local to each rank. + // + // Set up the fence counter (reset count to 0 and set the callback) + KokkosFenceCounter::reset(); + Kokkos::Tools::Experimental::set_begin_fence_callback(KokkosFenceCounter::begin_fence_callback); + int fenceCountAfterStart = 0; + int fenceCountAfterStop = 0; + + { + RCP