Skip to content

Commit

Permalink
Merge pull request #895 from LLNL/v0.12.1-rc
Browse files Browse the repository at this point in the history
V0.12.1 rc
  • Loading branch information
rhornung67 authored Sep 9, 2020
2 parents 32d92e3 + a5d6fe1 commit 9cb6370
Show file tree
Hide file tree
Showing 28 changed files with 148 additions and 134 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ endif()
# Set version number
set(RAJA_VERSION_MAJOR 0)
set(RAJA_VERSION_MINOR 12)
set(RAJA_VERSION_PATCHLEVEL 0)
set(RAJA_VERSION_PATCHLEVEL 1)

if (RAJA_LOADED AND (NOT RAJA_LOADED STREQUAL "${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}"))
message(FATAL_ERROR "You are mixing RAJA versions. Loaded is ${RAJA_LOADED}, expected ${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}")
Expand Down
8 changes: 8 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,14 @@
Version vxx.yy.zz -- Release date 20yy-mm-dd
============================================

Version v0.12.1 -- Release date 2020-09-09
============================================

This release contains fixes for errors when using a CUDA build with a
non-CUDA compiler and compiler warnings, plus some other bug fixes related
to OpenMP target compilation.


Version v0.12.0 -- Release date 2020-09-03
============================================

Expand Down
2 changes: 1 addition & 1 deletion docs/sphinx/user_guide/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
# The short X.Y version.
version = u'0.12'
# The full version, including alpha/beta/rc tags.
release = u'0.12.0'
release = u'0.12.1'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
14 changes: 7 additions & 7 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,13 +102,13 @@ raja_add_executable(
SOURCES multiview.cpp)

if(ENABLE_TARGET_OPENMP)
raja_add_executable(
NAME target-kernel
SOURCES omp-target-kernel.cpp)

raja_add_executable(
NAME omp-target-ltimes
SOURCES omp-target-ltimes.cpp)
# raja_add_executable(
# NAME target-kernel
# SOURCES omp-target-kernel.cpp)
#
# raja_add_executable(
# NAME omp-target-ltimes
# SOURCES omp-target-ltimes.cpp)
endif()

raja_add_executable(
Expand Down
39 changes: 20 additions & 19 deletions examples/raja-teams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))

std::cout << "\n Running RAJA-Teams examples...\n";
int num_of_backends = 1;
#if defined(RAJA_ENABLE_DEVICE)
#if defined(RAJA_DEVICE_ACTIVE)
num_of_backends++;
#endif

Expand All @@ -119,13 +119,15 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
// Allocate memory for either host or device
int N_tri = 5;

int *Ddat;
if (select_cpu_or_gpu == RAJA::expt::HOST)
int* Ddat = nullptr;
if (select_cpu_or_gpu == RAJA::expt::HOST) {
Ddat = host_res.allocate<int>(N_tri * N_tri);
}

#if defined(RAJA_ENABLE_DEVICE)
if (select_cpu_or_gpu == RAJA::expt::DEVICE)
#if defined(RAJA_DEVICE_ACTIVE)
if (select_cpu_or_gpu == RAJA::expt::DEVICE) {
Ddat = device_res.allocate<int>(N_tri * N_tri);
}
#endif

/*
Expand All @@ -143,9 +145,9 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))
*/

if (select_cpu_or_gpu == RAJA::expt::HOST){
std::cout << "\n Running Upper triangular pattern example on the host...\n";
std::cout << "\n Running upper triangular pattern example on the host...\n";
}else {
std::cout << "\n Running Upper triangular pattern example on the device...\n";
std::cout << "\n Running upper triangular pattern example on the device...\n";
}


Expand All @@ -157,29 +159,28 @@ int main(int RAJA_UNUSED_ARG(argc), char **RAJA_UNUSED_ARG(argv[]))

RAJA::expt::loop<teams_x>(ctx, RAJA::RangeSegment(0, N_tri), [&](int r) {

// Array shared within threads of the same team
TEAM_SHARED int s_A[1];

RAJA::expt::loop<threads_x>(ctx, RAJA::RangeSegment(r, N_tri), [&](int c) {
if (c == r) s_A[0] = r;
D(r, c) = r * N_tri + c;
}); // loop j
// Array shared within threads of the same team
RAJA_TEAM_SHARED int s_A[1];

ctx.teamSync();
RAJA::expt::loop<threads_x>(ctx, RAJA::RangeSegment(0, 1), [&](int c) {
s_A[c] = r;
}); // loop c

RAJA::expt::loop<threads_x>(ctx, RAJA::RangeSegment(r, N_tri), [&](int c) {
ctx.teamSync();

printf("r=%d, c=%d : D=%d : s_A = %d \n", r, c, D(r, c), s_A[0]);
RAJA::expt::loop<threads_x>(ctx, RAJA::RangeSegment(r, N_tri), [&](int c) {
D(r, c) = r * N_tri + c;
printf("r=%d, c=%d : D=%d : s_A = %d \n", r, c, D(r, c), s_A[0]);
}); // loop c

}); // loop c
}); // loop r
}); // outer lambda

if (select_cpu_or_gpu == RAJA::expt::HOST) {
host_res.deallocate(Ddat);
}

#if defined(RAJA_ENABLE_DEVICE)
#if defined(RAJA_DEVICE_ACTIVE)
if (select_cpu_or_gpu == RAJA::expt::DEVICE) {
device_res.deallocate(Ddat);
}
Expand Down
12 changes: 7 additions & 5 deletions include/RAJA/config.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -155,12 +155,14 @@ namespace RAJA {
#endif // _OPENMP
#endif // RAJA_ENABLE_OPENMP

#if defined(RAJA_ENABLE_CUDA)
#if not defined(__CUDACC__)
#error RAJA configured with ENABLE_CUDA, but CUDA not supported by current compiler
#endif //
#endif // RAJA_ENABLE_CUDA
#if defined(RAJA_ENABLE_CUDA) && defined(__CUDACC__)
#define RAJA_CUDA_ACTIVE
#endif // RAJA_ENABLE_CUDA && __CUDACC__

#if defined(RAJA_CUDA_ACTIVE) || \
defined(RAJA_ENABLE_HIP)
#define RAJA_DEVICE_ACTIVE
#endif

/*!
******************************************************************************
Expand Down
6 changes: 3 additions & 3 deletions include/RAJA/index/ListSegment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
#include "RAJA/util/Span.hpp"
#include "RAJA/util/types.hpp"

#if (defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && defined(RAJA_ENABLE_CUDA)
#if defined(RAJA_CUDA_ACTIVE)
#include "RAJA/policy/cuda/raja_cudaerrchk.hpp"
#else
#define cudaErrchk(...)
Expand Down Expand Up @@ -70,7 +70,7 @@ class TypedListSegment
* won't see any different usage or behavior.
*/

#if ((defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && defined(RAJA_ENABLE_CUDA)) || defined(RAJA_ENABLE_HIP)
#if defined(RAJA_DEVICE_ACTIVE)
static constexpr bool Has_GPU = true;
#else
static constexpr bool Has_GPU = false;
Expand Down Expand Up @@ -117,7 +117,7 @@ class TypedListSegment
//! specialization for allocation of CPU_memory
void allocate(CPU_memory) { m_data = new T[m_size]; }

#if (defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && defined(RAJA_ENABLE_CUDA)
#if defined(RAJA_CUDA_ACTIVE)
//! copy data from container using BlockCopy
template <typename Container>
void copy(Container&& src, BlockCopy)
Expand Down
2 changes: 1 addition & 1 deletion include/RAJA/pattern/teams.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
//
#include "RAJA/pattern/teams/teams_sequential.hpp"

#if (defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && defined(RAJA_ENABLE_CUDA)
#if defined(RAJA_CUDA_ACTIVE)
#include "RAJA/pattern/teams/teams_cuda.hpp"
#endif

Expand Down
20 changes: 7 additions & 13 deletions include/RAJA/pattern/teams/teams_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,10 @@
#include "camp/concepts.hpp"
#include "camp/tuple.hpp"

#if ((defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && \
defined(RAJA_ENABLE_CUDA)) || \
defined(RAJA_ENABLE_HIP)
#define RAJA_ENABLE_DEVICE
#endif

#if defined(RAJA_DEVICE_CODE)
#define TEAM_SHARED __shared__
#define RAJA_TEAM_SHARED __shared__
#else
#define TEAM_SHARED
#define RAJA_TEAM_SHARED
#endif

namespace RAJA
Expand All @@ -54,27 +48,27 @@ struct null_launch_t {

// Support for host, and device
template <typename HOST_POLICY
#if defined(RAJA_ENABLE_DEVICE)
#if defined(RAJA_DEVICE_ACTIVE)
,
typename DEVICE_POLICY
#endif
>
struct LoopPolicy {
using host_policy_t = HOST_POLICY;
#if defined(RAJA_ENABLE_DEVICE)
#if defined(RAJA_DEVICE_ACTIVE)
using device_policy_t = DEVICE_POLICY;
#endif
};

template <typename HOST_POLICY
#if defined(RAJA_ENABLE_DEVICE)
#if defined(RAJA_DEVICE_ACTIVE)
,
typename DEVICE_POLICY
#endif
>
struct LaunchPolicy {
using host_policy_t = HOST_POLICY;
#if defined(RAJA_ENABLE_DEVICE)
#if defined(RAJA_DEVICE_ACTIVE)
using device_policy_t = DEVICE_POLICY;
#endif
};
Expand Down Expand Up @@ -193,7 +187,7 @@ void launch(ExecPlace place, Resources const &team_resources, BODY const &body)
launch_t::exec(LaunchContext(team_resources, HOST), body);
break;
}
#ifdef RAJA_ENABLE_DEVICE
#ifdef RAJA_DEVICE_ACTIVE
case DEVICE: {
using launch_t = LaunchExecute<typename POLICY_LIST::device_policy_t>;
launch_t::exec(LaunchContext(team_resources, DEVICE), body);
Expand Down
3 changes: 2 additions & 1 deletion include/RAJA/pattern/teams/teams_sequential.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ struct seq_launch_t {
template <>
struct LaunchExecute<RAJA::expt::null_launch_t> {
template <typename BODY>
static void exec(LaunchContext const &ctx, BODY const &body)
static void exec(LaunchContext const& RAJA_UNUSED_ARG(ctx),
BODY const& RAJA_UNUSED_ARG(body))
{
RAJA_ABORT_OR_THROW("NULL Launch");
}
Expand Down
2 changes: 1 addition & 1 deletion include/RAJA/policy/cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

#include "RAJA/config.hpp"

#if (defined(__NVCC__) || (defined(__clang__) && defined(__CUDA__))) && defined(RAJA_ENABLE_CUDA)
#if defined(RAJA_CUDA_ACTIVE)

#include <cuda.h>
#include <cuda_runtime.h>
Expand Down
2 changes: 1 addition & 1 deletion include/RAJA/policy/cuda/policy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

#include "RAJA/config.hpp"

#if defined(RAJA_ENABLE_CUDA)
#if defined(RAJA_CUDA_ACTIVE)

#include <utility>

Expand Down
3 changes: 0 additions & 3 deletions include/RAJA/policy/openmp/policy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,6 @@ namespace internal
struct Parallel {
};

struct Collapse {
};

struct For {
};

Expand Down
6 changes: 3 additions & 3 deletions include/RAJA/policy/openmp_target/forall.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,11 @@ RAJA_INLINE resources::EventProxy<resources::Omp> forall_impl(resources::Omp &om
ib(begin_it[i]);
}

return resources::EventProxy<resources::Omp>(&res);
return resources::EventProxy<resources::Omp>(&omp_res);
}

template <typename Iterable, typename Func>
RAJA_INLINE resources::EventProxy<resources::Omp> forall_impl(resources::Resource &omp_res,
RAJA_INLINE resources::EventProxy<resources::Omp> forall_impl(resources::Omp &omp_res,
const omp_target_parallel_for_exec_nt&,
Iterable&& iter,
Func&& loop_body)
Expand All @@ -89,7 +89,7 @@ RAJA_INLINE resources::EventProxy<resources::Omp> forall_impl(resources::Resourc
ib(begin_it[i]);
}

return RAJA::resources::EventProxy<resources::Omp>(&res);
return resources::EventProxy<resources::Omp>(&omp_res);
}

} // namespace omp
Expand Down
3 changes: 2 additions & 1 deletion include/RAJA/policy/openmp_target/kernel/For.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ struct StatementExecutor<statement::For<ArgumentId, omp_target_parallel_for_exec
auto len = segment_length<ArgumentId>(data);
using len_t = decltype(len);

forall_impl(omp_target_parallel_for_exec<N>{}, TypedRangeSegment<len_t>(0, len), for_wrapper);
auto r = resources::Omp::get_default();
forall_impl(r, omp_target_parallel_for_exec<N>{}, TypedRangeSegment<len_t>(0, len), for_wrapper);
}
};

Expand Down
3 changes: 3 additions & 0 deletions include/RAJA/policy/openmp_target/policy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ struct Target {
struct Distribute {
};

struct Collapse {
};

template <size_t ThreadsPerTeam>
struct omp_target_parallel_for_exec
: make_policy_pattern_t<Policy::target_openmp,
Expand Down
Loading

0 comments on commit 9cb6370

Please sign in to comment.