diff --git a/core/src/Cabana_Halo.hpp b/core/src/Cabana_Halo.hpp index d8f6691c3..d18c97a61 100644 --- a/core/src/Cabana_Halo.hpp +++ b/core/src/Cabana_Halo.hpp @@ -203,18 +203,20 @@ struct is_halo : public is_halo_impl::type>::type namespace Impl { +//---------------------------------------------------------------------------// +// Check that the AoSoA/slice is the right size. template void checkSize( const Halo_t& halo, Container_t& container ) { - // Check that the AoSoA/slice is the right size. if ( container.size() != halo.numLocal() + halo.numGhost() ) throw std::runtime_error( "AoSoA/slice is the wrong size for gather!" ); } +//---------------------------------------------------------------------------// +// Fill the data to be sent. AoSoA variant. template -void sendBuffer( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer ) +void fillSends( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer ) { - // Get the steering vector for the sends. auto steering = halo.getExportSteering(); @@ -230,9 +232,10 @@ void sendBuffer( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer ) Kokkos::fence(); } +// Fill the data to be sent with modifications. AoSoA variant. template -void sendBuffer( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer, - const Modify_t& modify_functor ) +void fillSends( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer, + const Modify_t& modify_functor ) { // Get the steering vector for the sends. auto steering = halo.getExportSteering(); @@ -251,8 +254,10 @@ void sendBuffer( const Halo_t& halo, AoSoA_t& aosoa, View_t& send_buffer, Kokkos::fence(); } +// Extract the received data. AoSoA variant. template -void recvBuffer( const Halo_t& halo, AoSoA_t& aosoa, const View_t& send_buffer ) +void sendReceiveExtract( const Halo_t& halo, AoSoA_t& aosoa, + const View_t& send_buffer ) { // Allocate a receive buffer. Kokkos::View @@ -321,6 +326,148 @@ void recvBuffer( const Halo_t& halo, AoSoA_t& aosoa, const View_t& send_buffer ) MPI_Barrier( halo.comm() ); } +//---------------------------------------------------------------------------// +// Fill the data to be sent. Slice variant. +template +void fillSends( const Halo_t& halo, Slice_t& slice, View_t& send_buffer, + std::size_t num_comp ) +{ + // Get the raw slice data. + auto slice_data = slice.data(); + + // Get the steering vector for the sends. + auto steering = halo.getExportSteering(); + + // Gather from the local data into a tuple-contiguous send buffer. + auto gather_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + { + auto s = Slice_t::index_type::s( steering( i ) ); + auto a = Slice_t::index_type::a( steering( i ) ); + std::size_t slice_offset = s * slice.stride( 0 ) + a; + for ( std::size_t n = 0; n < num_comp; ++n ) + send_buffer( i, n ) = + slice_data[slice_offset + n * Slice_t::vector_length]; + }; + Kokkos::RangePolicy + gather_send_buffer_policy( 0, halo.totalNumExport() ); + Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", + gather_send_buffer_policy, gather_send_buffer_func ); + Kokkos::fence(); +} + +// Fill the data to be sent with modifications. Slice variant. +template +void fillSends( const Halo_t& halo, Slice_t& slice, View_t& send_buffer, + std::size_t num_comp, const Modify_t& modify_functor ) +{ + // Get the raw slice data. + auto slice_data = slice.data(); + + // Get the steering vector for the sends. + auto steering = halo.getExportSteering(); + + // Gather from the local data into a tuple-contiguous send buffer. + auto gather_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + { + auto s = Slice_t::index_type::s( steering( i ) ); + auto a = Slice_t::index_type::a( steering( i ) ); + std::size_t slice_offset = s * slice.stride( 0 ) + a; + for ( std::size_t n = 0; n < num_comp; ++n ) + { + send_buffer( i, n ) = + slice_data[slice_offset + n * Slice_t::vector_length]; + modify_functor( send_buffer, i, n ); + } + }; + Kokkos::RangePolicy + gather_send_buffer_policy( 0, halo.totalNumExport() ); + Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", + gather_send_buffer_policy, gather_send_buffer_func ); + Kokkos::fence(); +} + +template +void sendReceiveExtract( const Halo_t& halo, Slice_t& slice, + const View_t& send_buffer, std::size_t num_comp ) +{ + // Allocate a receive buffer. + Kokkos::View + recv_buffer( + Kokkos::ViewAllocateWithoutInitializing( "halo_recv_buffer" ), + halo.totalNumExport(), num_comp ); + + // Get the raw slice data. + auto slice_data = slice.data(); + + // The halo has it's own communication space so choose any mpi tag. + const int mpi_tag = 2345; + + // Post non-blocking receives. + int num_n = halo.numNeighbor(); + std::vector requests( num_n ); + std::pair recv_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + recv_range.second = recv_range.first + halo.numImport( n ); + + auto recv_subview = + Kokkos::subview( recv_buffer, recv_range, Kokkos::ALL ); + + MPI_Irecv( recv_subview.data(), + recv_subview.size() * sizeof( typename Slice_t::value_type ), + MPI_BYTE, halo.neighborRank( n ), mpi_tag, halo.comm(), + &( requests[n] ) ); + + recv_range.first = recv_range.second; + } + + // Do blocking sends. + std::pair send_range = { 0, 0 }; + for ( int n = 0; n < num_n; ++n ) + { + send_range.second = send_range.first + halo.numExport( n ); + + auto send_subview = + Kokkos::subview( send_buffer, send_range, Kokkos::ALL ); + + MPI_Send( send_subview.data(), + send_subview.size() * sizeof( typename Slice_t::value_type ), + MPI_BYTE, halo.neighborRank( n ), mpi_tag, halo.comm() ); + + send_range.first = send_range.second; + } + + // Wait on non-blocking receives. + std::vector status( num_n ); + const int ec = + MPI_Waitall( requests.size(), requests.data(), status.data() ); + if ( MPI_SUCCESS != ec ) + throw std::logic_error( "Failed MPI Communication" ); + + // Extract the receive buffer into the ghosted elements. + std::size_t num_local = halo.numLocal(); + auto extract_recv_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + { + std::size_t ghost_idx = i + num_local; + auto s = Slice_t::index_type::s( ghost_idx ); + auto a = Slice_t::index_type::a( ghost_idx ); + std::size_t slice_offset = s * slice.stride( 0 ) + a; + for ( std::size_t n = 0; n < num_comp; ++n ) + slice_data[slice_offset + Slice_t::vector_length * n] = + recv_buffer( i, n ); + }; + Kokkos::RangePolicy + extract_recv_buffer_policy( 0, halo.totalNumImport() ); + Kokkos::parallel_for( "Cabana::gather::extract_recv_buffer", + extract_recv_buffer_policy, + extract_recv_buffer_func ); + Kokkos::fence(); + + // Barrier before completing to ensure synchronization. + MPI_Barrier( halo.comm() ); +} + } // namespace Impl //---------------------------------------------------------------------------// @@ -367,8 +514,8 @@ void gather( const Halo_t& halo, AoSoA_t& aosoa, const Modify_t& modify_functor, Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), halo.totalNumExport() ); - Impl::sendBuffer( halo, aosoa, send_buffer, modify_functor ); - Impl::recvBuffer( halo, aosoa, send_buffer ); + Impl::fillSends( halo, aosoa, send_buffer, modify_functor ); + Impl::sendReceiveExtract( halo, aosoa, send_buffer ); } //---------------------------------------------------------------------------// @@ -409,8 +556,8 @@ void gather( const Halo_t& halo, AoSoA_t& aosoa, Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), halo.totalNumExport() ); - Impl::sendBuffer( halo, aosoa, send_buffer ); - Impl::recvBuffer( halo, aosoa, send_buffer ); + Impl::fillSends( halo, aosoa, send_buffer ); + Impl::sendReceiveExtract( halo, aosoa, send_buffer ); } //---------------------------------------------------------------------------// @@ -427,11 +574,11 @@ void gather( const Halo_t& halo, AoSoA_t& aosoa, \tparam Halo_t Halo type - must be a Halo. - \tparam Slice_t Slice type - must be a Slice. + \tparam Slice_t Slice type - must be a slice. \param halo The halo to use for the gather. - \param slice The Slice on which to perform the gather. The Slice should have + \param slice The slice on which to perform the gather. The slice should have a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned elements are expected to appear first (i.e. in the first halo.numLocal() elements) and the ghosted elements are expected to appear second (i.e. in @@ -443,7 +590,7 @@ void gather( const Halo_t& halo, Slice_t& slice, is_slice::value ), int>::type* = 0 ) { - // Check that the Slice is the right size. + // Check that the slice is the right size. Impl::checkSize( halo, slice ); // Get the number of components in the slice. @@ -451,9 +598,6 @@ void gather( const Halo_t& halo, Slice_t& slice, for ( std::size_t d = 2; d < slice.rank(); ++d ) num_comp *= slice.extent( d ); - // Get the raw slice data. - auto slice_data = slice.data(); - // Allocate a send buffer. Note this one is layout right so the components // are consecutive. Kokkos::View - gather_send_buffer_policy( 0, halo.totalNumExport() ); - Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", - gather_send_buffer_policy, gather_send_buffer_func ); - Kokkos::fence(); - - // Allocate a receive buffer. Note this one is layout right so the - // components are consecutive. - Kokkos::View - recv_buffer( - Kokkos::ViewAllocateWithoutInitializing( "halo_recv_buffer" ), - halo.totalNumImport(), num_comp ); - - // The halo has it's own communication space so choose any mpi tag. - const int mpi_tag = 2345; + Impl::fillSends( halo, slice, send_buffer, num_comp ); + Impl::sendReceiveExtract( halo, slice, send_buffer, num_comp ); +} - // Post non-blocking receives. - int num_n = halo.numNeighbor(); - std::vector requests( num_n ); - std::pair recv_range = { 0, 0 }; - for ( int n = 0; n < num_n; ++n ) - { - recv_range.second = recv_range.first + halo.numImport( n ); +//---------------------------------------------------------------------------// +/*! + \brief Synchronously gather data from the local decomposition to the ghosts + using the halo forward communication plan. Slice version, where the buffer is + modified before being sent. This is a uniquely-owned to multiply-owned + communication. - auto recv_subview = - Kokkos::subview( recv_buffer, recv_range, Kokkos::ALL ); + A gather sends data from a locally owned elements to one or many ranks on + which they exist as ghosts. A locally owned element may be sent to as many + ranks as desired to be used as a ghost on those ranks. The value of the + element in the locally owned decomposition will be the value assigned to the + element in the ghosted decomposition. - MPI_Irecv( recv_subview.data(), - recv_subview.size() * sizeof( typename Slice_t::value_type ), - MPI_BYTE, halo.neighborRank( n ), mpi_tag, halo.comm(), - &( requests[n] ) ); + \tparam Halo_t Halo type - must be a Halo. - recv_range.first = recv_range.second; - } + \tparam Slice_t Slice type - must be a slice. - // Do blocking sends. - std::pair send_range = { 0, 0 }; - for ( int n = 0; n < num_n; ++n ) - { - send_range.second = send_range.first + halo.numExport( n ); + \tparam Modify_t Buffer modification type. - auto send_subview = - Kokkos::subview( send_buffer, send_range, Kokkos::ALL ); + \param halo The halo to use for the gather. - MPI_Send( send_subview.data(), - send_subview.size() * sizeof( typename Slice_t::value_type ), - MPI_BYTE, halo.neighborRank( n ), mpi_tag, halo.comm() ); + \param slice The slice on which to perform the gather. The slice should have + a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned + elements are expected to appear first (i.e. in the first halo.numLocal() + elements) and the ghosted elements are expected to appear second (i.e. in + the next halo.numGhost() elements()). - send_range.first = send_range.second; - } + \param modify_functor Class containing functor to modify the send buffer + before being sent (e.g. for periodic coordinate update). +*/ +template +void gather( const Halo_t& halo, Slice_t& slice, const Modify_t& modify_functor, + typename std::enable_if<( is_halo::value && + is_slice::value ), + int>::type* = 0 ) +{ + // Check that the slice is the right size. + Impl::checkSize( halo, slice ); - // Wait on non-blocking receives. - std::vector status( num_n ); - const int ec = - MPI_Waitall( requests.size(), requests.data(), status.data() ); - if ( MPI_SUCCESS != ec ) - throw std::logic_error( "Failed MPI Communication" ); + // Get the number of components in the slice. + std::size_t num_comp = 1; + for ( std::size_t d = 2; d < slice.rank(); ++d ) + num_comp *= slice.extent( d ); - // Extract the receive buffer into the ghosted elements. - std::size_t num_local = halo.numLocal(); - auto extract_recv_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) - { - std::size_t ghost_idx = i + num_local; - auto s = Slice_t::index_type::s( ghost_idx ); - auto a = Slice_t::index_type::a( ghost_idx ); - std::size_t slice_offset = s * slice.stride( 0 ) + a; - for ( std::size_t n = 0; n < num_comp; ++n ) - slice_data[slice_offset + Slice_t::vector_length * n] = - recv_buffer( i, n ); - }; - Kokkos::RangePolicy - extract_recv_buffer_policy( 0, halo.totalNumImport() ); - Kokkos::parallel_for( "Cabana::gather::extract_recv_buffer", - extract_recv_buffer_policy, - extract_recv_buffer_func ); - Kokkos::fence(); + // Allocate a send buffer. Note this one is layout right so the components + // are consecutive. + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), + halo.totalNumExport(), num_comp ); - // Barrier before completing to ensure synchronization. - MPI_Barrier( halo.comm() ); + Impl::fillSends( halo, slice, send_buffer, num_comp, modify_functor ); + Impl::sendReceiveExtract( halo, slice, send_buffer, num_comp ); } //---------------------------------------------------------------------------// @@ -709,6 +818,6 @@ void scatter( const Halo_t& halo, Slice_t& slice, //---------------------------------------------------------------------------// -} // end namespace Cabana +} // namespace Cabana #endif // end CABANA_HALO_HPP diff --git a/core/src/Cabana_ParticleGridCommunication.hpp b/core/src/Cabana_ParticleGridCommunication.hpp index 5a55df3e5..fe1846232 100644 --- a/core/src/Cabana_ParticleGridCommunication.hpp +++ b/core/src/Cabana_ParticleGridCommunication.hpp @@ -55,18 +55,18 @@ auto getTopology( const LocalGridType& local_grid ) //---------------------------------------------------------------------------// /*! -\brief Check for the number of particles that must be communicated + \brief Check for the number of particles that must be communicated -\tparam LocalGridType Cajita LocalGrid type. + \tparam LocalGridType Cajita LocalGrid type. -\tparam PositionSliceType Particle position type. + \tparam PositionSliceType Particle position type. -\param local_grid The local grid containing periodicity and system bound -information. + \param local_grid The local grid containing periodicity and system bound + information. -\param positions The particle position container, either Slice or View. + \param positions The particle position container, either Slice or View. -\param minimum_halo_width Number of halo mesh widths to include for ghosting. + \param minimum_halo_width Number of halo mesh widths to include for ghosting. */ template int migrateCount( const LocalGridType& local_grid, @@ -372,6 +372,7 @@ void gridMigrate( const LocalGridType& local_grid, namespace Impl { +// Functor to determine which particles should be ghosted with Cajita grid. template struct HaloIds @@ -569,25 +570,69 @@ struct HaloIds } } }; + +//---------------------------------------------------------------------------// +// Determine which particles should be ghosted, reallocating and recounting if +// needed. +template +auto getHaloIDs( + const LocalGridType& local_grid, const PositionSliceType& positions, + const int min_halo_width, const int max_export_guess, + typename std::enable_if::value, int>::type* = + 0 ) +{ + using device_type = typename PositionSliceType::device_type; + using pos_value = typename PositionSliceType::value_type; + + // Get all 26 neighbor ranks. + auto topology = Impl::getTopology( local_grid ); + + using DestinationRankView = typename Kokkos::View; + using ShiftViewType = typename Kokkos::View; + using CountView = + typename Kokkos::View>; + DestinationRankView destinations( + Kokkos::ViewAllocateWithoutInitializing( "destinations" ), + max_export_guess ); + DestinationRankView ids( Kokkos::ViewAllocateWithoutInitializing( "ids" ), + max_export_guess ); + ShiftViewType shifts( Kokkos::ViewAllocateWithoutInitializing( "shifts" ), + max_export_guess, 3 ); + CountView send_count( "halo_send_count" ); + + // Determine which particles need to be ghosted to neighbors. + auto halo_ids = Impl::HaloIds( + local_grid, positions, send_count, destinations, ids, shifts, + min_halo_width ); + // Rebuild if needed. + halo_ids.rebuild( local_grid ); + + // Create the Cabana Halo. + auto halo = + Halo( local_grid.globalGrid().comm(), positions.size(), + ids, destinations, topology ); + return std::make_pair( halo, shifts ); +} + } // namespace Impl //---------------------------------------------------------------------------// /*! \class PeriodicShift - \brief Store and apply periodic shifts for halo communication. + \brief Store periodic shifts for halo communication. \tparam DeviceType Device type for which the data for this class will be allocated and where parallel execution occurs. - \tparam PositionIndex Particle position index within the AoSoA. - Ideally this would inherit from Halo (PeriodicHalo), combining the periodic shift and halo together. This is not currently done because the CommunicationPlan contains std member variables that would be captured on the device (warnings with NVCC). */ -template +template struct PeriodicShift { Kokkos::View _shifts; @@ -604,6 +649,25 @@ struct PeriodicShift : _shifts( shifts ) { } +}; + +/*! + \class PeriodicModifyAoSoA + + \brief Modify AoSoA buffer with periodic shifts during gather. + + \tparam DeviceType Device type for which the data for this class will be + allocated and where parallel execution occurs. + + \tparam PositionIndex Particle position index within the AoSoA. +*/ +template +struct PeriodicModifyAoSoA : PeriodicShift +{ + using PeriodicShift::PeriodicShift; + using PeriodicShift::_shifts; + + std::size_t dim = _shifts.extent( 1 ); /*! \brief Modify the send buffer with periodic shifts. @@ -618,139 +682,180 @@ struct PeriodicShift KOKKOS_INLINE_FUNCTION void operator()( ViewType& send_buffer, const int i ) const { - for ( int d = 0; d < 3; ++d ) + for ( std::size_t d = 0; d < dim; ++d ) get( send_buffer( i ), d ) += _shifts( i, d ); } }; -template +/*! + \class PeriodicModifySlice + + \brief Modify slice buffer with periodic shifts during gather. + + \tparam DeviceType Device type for which the data for this class will be + allocated and where parallel execution occurs. +*/ +template +struct PeriodicModifySlice : PeriodicShift +{ + using PeriodicShift::PeriodicShift; + using PeriodicShift::_shifts; + + /*! + \brief Modify the send buffer with periodic shifts. + + \tparam ViewType The container type for the send buffer. + + \param send_buffer Send buffer of positions being ghosted. + + \param i Particle index. + + \param d Dimension index. + */ + template + KOKKOS_INLINE_FUNCTION void operator()( ViewType& send_buffer, const int i, + const int d ) const + { + send_buffer( i, d ) += _shifts( i, d ); + } +}; + +//---------------------------------------------------------------------------// +/*! + \class GridHalo + + \brief Store communication Halo and Modify functor. + + \tparam HaloType Halo type. + + \tparam ModifyType Modification functor type. +*/ +template class GridHalo { const HaloType _halo; - const ShiftType _shifts; + const ModifyType _modify; public: - GridHalo( const HaloType& halo, const ShiftType& shifts ) + /* + \brief Constructor + + \param halo Halo for gather/scatter. + + \param modify Store and apply buffer modifications. + */ + GridHalo( const HaloType& halo, const ModifyType& modify ) : _halo( halo ) - , _shifts( shifts ) + , _modify( modify ) { } + /* + \brief Return stored halo + + \return Halo for gather/scatter. + */ HaloType getHalo() const { return _halo; } + /* + \brief Return stored modification functor. - ShiftType getShifts() const { return _shifts; } + \return Modify object to access or apply buffer modifications. + */ + ModifyType getModify() const { return _modify; } }; //---------------------------------------------------------------------------// /*! \brief Determine which data should be ghosted on another decomposition, using - bounds of a Cajita grid and taking periodic boundaries into account. Slice + bounds of a Cajita grid and taking periodic boundaries into account. AoSoA variant. \tparam LocalGridType Cajita LocalGrid type. - \tparam PositionSliceType Slice/View type. - - \param local_grid The local grid containing periodicity and system bound - information. + \tparam ParticleContainer AoSoA type. + \param local_grid The local grid for creating halo and periodicity. - \param positions The particle positions. + \param particles The particle AoSoA, containing positions. \param PositionIndex Particle position index within the AoSoA. \param min_halo_width Number of halo mesh widths to include for ghosting. \param max_export_guess The allocation size for halo export ranks, IDs, and - periodic shifts + periodic shifts. - \return GridHalo containing Halo and PeriodicShift. + \return GridHalo containing Halo and PeriodicModify. */ -template auto createGridHalo( - const LocalGridType& local_grid, const PositionSliceType& positions, + const LocalGridType& local_grid, const ParticleContainer& particles, std::integral_constant, const int min_halo_width, const int max_export_guess = 0, - typename std::enable_if::value, int>::type* = + typename std::enable_if::value, int>::type* = 0 ) { - using device_type = typename PositionSliceType::device_type; - using pos_value = typename PositionSliceType::value_type; - - // Get all 26 neighbor ranks. - auto topology = Impl::getTopology( local_grid ); + using device_type = typename ParticleContainer::device_type; - using DestinationRankView = typename Kokkos::View; - using ShiftViewType = typename Kokkos::View; - using CountView = - typename Kokkos::View>; - DestinationRankView destinations( - Kokkos::ViewAllocateWithoutInitializing( "destinations" ), - max_export_guess ); - DestinationRankView ids( Kokkos::ViewAllocateWithoutInitializing( "ids" ), - max_export_guess ); - ShiftViewType shifts( Kokkos::ViewAllocateWithoutInitializing( "shifts" ), - max_export_guess, 3 ); - CountView send_count( "halo_send_count" ); - - // Determine which particles need to be ghosted to neighbors. - auto halo_ids = Impl::HaloIds( - local_grid, positions, send_count, destinations, ids, shifts, - min_halo_width ); - // Rebuild if needed. - halo_ids.rebuild( local_grid ); - - // Create the Cabana Halo. - auto halo = - Halo( local_grid.globalGrid().comm(), positions.size(), - ids, destinations, topology ); - - // Create the Shifts. - auto periodic_shift = PeriodicShift( shifts ); - - // Return Halo and PeriodicShifts together. - GridHalo, PeriodicShift> - grid_halo( halo, periodic_shift ); + auto positions = slice( particles ); + auto pair = Impl::getHaloIDs( local_grid, positions, min_halo_width, + max_export_guess ); + using halo_type = Halo; + halo_type halo = pair.first; + auto shifts = pair.second; + + // Create the functor for modifying the buffer. + using modify_type = PeriodicModifyAoSoA; + auto periodic_modify = modify_type( shifts ); + + // Return Halo and PeriodicModify together. + GridHalo grid_halo( halo, periodic_modify ); return grid_halo; } //---------------------------------------------------------------------------// /*! \brief Determine which data should be ghosted on another decomposition, using - bounds of a Cajita grid and taking periodic boundaries into account. AoSoA + bounds of a Cajita grid and taking periodic boundaries into account. Slice variant. \tparam LocalGridType Cajita LocalGrid type. - \tparam ParticleContainer AoSoA type. - \param local_grid The local grid for creating halo and periodicity. + \tparam PositionSliceType Slice type. - \param particles The particle AoSoA, containing positions. + \param local_grid The local grid for creating halo and periodicity. - \param PositionIndex Particle position index within the AoSoA. + \param positions The position slice. \param min_halo_width Number of halo mesh widths to include for ghosting. \param max_export_guess The allocation size for halo export ranks, IDs, and periodic shifts. - \return GridHalo containing Halo and PeriodicShift. + \return GridHalo containing Halo and PeriodicModify. */ -template +template auto createGridHalo( - const LocalGridType& local_grid, const ParticleContainer& particles, - std::integral_constant, + const LocalGridType& local_grid, const PositionSliceType& positions, const int min_halo_width, const int max_export_guess = 0, - typename std::enable_if::value, int>::type* = + typename std::enable_if::value, int>::type* = 0 ) { - auto positions = slice( particles ); - return createGridHalo( local_grid, positions, - std::integral_constant(), - min_halo_width, max_export_guess ); + using device_type = typename PositionSliceType::device_type; + + auto pair = Impl::getHaloIDs( local_grid, positions, min_halo_width, + max_export_guess ); + using halo_type = Halo; + halo_type halo = pair.first; + auto shifts = pair.second; + + // Create the functor for modifying the buffer. + using modify_type = PeriodicModifySlice; + auto periodic_modify = modify_type( shifts ); + + // Return Halo and PeriodicModify together. + GridHalo grid_halo( halo, periodic_modify ); + return grid_halo; } //---------------------------------------------------------------------------// @@ -759,29 +864,55 @@ auto createGridHalo( using the bounds and periodicity of a Cajita grid to determine which particles should be copied. AoSoA variant. - \tparam HaloType Halo type. - - \tparam PeriodicShiftType Periodic shift type. + \tparam GridHaloType GridHalo type - contained ModifyType must have an + AoSoA-compatible functor to modify the buffer. \tparam ParticleContainer AoSoA type. - \param halo The communication halo. - - \param shift Periodic shift functor. + \param grid_halo The communication halo taking into account periodic + boundaries. \param particles The particle AoSoA, containing positions. */ template -void gridGather( const GridHaloType grid_halo, ParticleContainer& particles ) +void gridGather( const GridHaloType grid_halo, ParticleContainer& particles, + typename std::enable_if::value, + int>::type* = 0 ) { auto halo = grid_halo.getHalo(); - auto shifts = grid_halo.getShifts(); + auto modify = grid_halo.getModify(); particles.resize( halo.numLocal() + halo.numGhost() ); - gather( halo, particles, shifts ); + gather( halo, particles, modify ); } -// TODO: slice version +//---------------------------------------------------------------------------// +/*! + \brief Gather data from one decomposition and ghosts on another decomposition, + using the bounds and periodicity of a Cajita grid to determine which particles + should be copied. Slice variant. + + \tparam GridHaloType GridHalo type - contained ModifyType must have a + slice-compatible functor to modify the buffer. + + \tparam PositionSliceType Slice type. + + \param grid_halo The communication halo taking into account periodic + boundaries. + + \param positions The position slice. +*/ +template +void gridGather( const GridHaloType grid_halo, PositionSliceType& positions, + typename std::enable_if::value, + int>::type* = 0 ) +{ + auto halo = grid_halo.getHalo(); + auto modify = grid_halo.getModify(); + + // Must be resized to match local/ghost externally. + gather( halo, positions, modify ); +} } // namespace Cabana diff --git a/core/unit_test/tstParticleGridCommunication.hpp b/core/unit_test/tstParticleGridCommunication.hpp index af8a78f69..7a9b632c6 100644 --- a/core/unit_test/tstParticleGridCommunication.hpp +++ b/core/unit_test/tstParticleGridCommunication.hpp @@ -275,7 +275,8 @@ void testMigrate( const int halo_width, const int test_halo_width, } //---------------------------------------------------------------------------// -void testGather( const int halo_width, const int test_type ) +void testGather( const int halo_width, const int test_halo_width, + const int test_type ) { PGCommTestData test_data( false, halo_width ); auto data_host = test_data.data_host; @@ -293,15 +294,34 @@ void testGather( const int halo_width, const int test_type ) if ( test_type == 0 ) { - // Do the gather with the AoSoA. + // Do the gather with an AoAoA. auto grid_halo = Cabana::createGridHalo( local_grid, data_src, std::integral_constant(), - halo_width ); + test_halo_width ); gridGather( grid_halo, data_src ); data_host.resize( data_src.size() ); Cabana::deep_copy( data_host, data_src ); } + else if ( test_type == 1 ) + { + // Create the halo with a slice. + auto pos_src = Cabana::slice<1>( data_src ); + auto grid_halo = + Cabana::createGridHalo( local_grid, pos_src, test_halo_width ); + + // Resize (cannot resize slice). + auto halo = grid_halo.getHalo(); + data_src.resize( halo.numLocal() + halo.numGhost() ); + pos_src = Cabana::slice<1>( data_src ); + + // Gather with slice. + gridGather( grid_halo, pos_src ); + + data_host.resize( data_src.size() ); + Cabana::deep_copy( data_host, data_src ); + } + // Check the results. int new_num_data = data_host.size(); auto pos_host = Cabana::slice<1>( data_host ); @@ -318,63 +338,78 @@ void testGather( const int halo_width, const int test_type ) } for ( int i = num_data; i < new_num_data; ++i ) { - bool within_x = true; - bool within_y = true; - bool within_z = true; - // Make sure all ghosts are in halo region in at least one direction. - if ( pos_host( i, Cajita::Dim::I ) < test_data.lo_x ) - { - EXPECT_LE( pos_host( i, Cajita::Dim::I ), test_data.lo_x ); - EXPECT_GE( pos_host( i, Cajita::Dim::I ), - test_data.lo_x - test_data.ghost_x ); - } - else if ( pos_host( i, Cajita::Dim::I ) > test_data.hi_x ) - { - - EXPECT_GE( pos_host( i, Cajita::Dim::I ), test_data.hi_x ); - EXPECT_LE( pos_host( i, Cajita::Dim::I ), - test_data.hi_x + test_data.ghost_x ); - } - else - { - within_x = false; - } - if ( pos_host( i, Cajita::Dim::J ) < test_data.lo_y ) - { - EXPECT_LE( pos_host( i, Cajita::Dim::J ), test_data.lo_y ); - EXPECT_GE( pos_host( i, Cajita::Dim::J ), - test_data.lo_y - test_data.ghost_y ); - } - else if ( pos_host( i, Cajita::Dim::J ) > test_data.hi_y ) - { - EXPECT_GE( pos_host( i, Cajita::Dim::J ), test_data.hi_y ); - EXPECT_LE( pos_host( i, Cajita::Dim::J ), - test_data.hi_y + test_data.ghost_y ); - } - else - { - within_y = false; - } - if ( pos_host( i, Cajita::Dim::K ) < test_data.lo_z ) - { - EXPECT_LE( pos_host( i, Cajita::Dim::K ), test_data.lo_z ); - EXPECT_GE( pos_host( i, Cajita::Dim::K ), - test_data.lo_z - test_data.ghost_z ); - } - else if ( pos_host( i, Cajita::Dim::K ) > test_data.hi_z ) + // Make sure particles haven't moved if within allowable halo mesh. + if ( test_halo_width < halo_width ) { - EXPECT_GE( pos_host( i, Cajita::Dim::K ), test_data.hi_z ); - EXPECT_LE( pos_host( i, Cajita::Dim::K ), - test_data.hi_z + test_data.ghost_z ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::I ), + pos_initial( i, Cajita::Dim::I ) ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::J ), + pos_initial( i, Cajita::Dim::J ) ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::K ), + pos_initial( i, Cajita::Dim::K ) ); } else { - within_z = false; - } - if ( !within_x && !within_y && !within_z ) - { - FAIL() << "Ghost particle outside ghost region." << pos_host( i, 0 ) - << " " << pos_host( i, 1 ) << " " << pos_host( i, 2 ); + bool within_x = true; + bool within_y = true; + bool within_z = true; + // Make sure all ghosts are in halo region in at least one + // direction. + if ( pos_host( i, Cajita::Dim::I ) < test_data.lo_x ) + { + EXPECT_LE( pos_host( i, Cajita::Dim::I ), test_data.lo_x ); + EXPECT_GE( pos_host( i, Cajita::Dim::I ), + test_data.lo_x - test_data.ghost_x ); + } + else if ( pos_host( i, Cajita::Dim::I ) > test_data.hi_x ) + { + + EXPECT_GE( pos_host( i, Cajita::Dim::I ), test_data.hi_x ); + EXPECT_LE( pos_host( i, Cajita::Dim::I ), + test_data.hi_x + test_data.ghost_x ); + } + else + { + within_x = false; + } + if ( pos_host( i, Cajita::Dim::J ) < test_data.lo_y ) + { + EXPECT_LE( pos_host( i, Cajita::Dim::J ), test_data.lo_y ); + EXPECT_GE( pos_host( i, Cajita::Dim::J ), + test_data.lo_y - test_data.ghost_y ); + } + else if ( pos_host( i, Cajita::Dim::J ) > test_data.hi_y ) + { + EXPECT_GE( pos_host( i, Cajita::Dim::J ), test_data.hi_y ); + EXPECT_LE( pos_host( i, Cajita::Dim::J ), + test_data.hi_y + test_data.ghost_y ); + } + else + { + within_y = false; + } + if ( pos_host( i, Cajita::Dim::K ) < test_data.lo_z ) + { + EXPECT_LE( pos_host( i, Cajita::Dim::K ), test_data.lo_z ); + EXPECT_GE( pos_host( i, Cajita::Dim::K ), + test_data.lo_z - test_data.ghost_z ); + } + else if ( pos_host( i, Cajita::Dim::K ) > test_data.hi_z ) + { + EXPECT_GE( pos_host( i, Cajita::Dim::K ), test_data.hi_z ); + EXPECT_LE( pos_host( i, Cajita::Dim::K ), + test_data.hi_z + test_data.ghost_z ); + } + else + { + within_z = false; + } + if ( !within_x && !within_y && !within_z ) + { + FAIL() << "Ghost particle outside ghost region." + << pos_host( i, 0 ) << " " << pos_host( i, 1 ) << " " + << pos_host( i, 2 ); + } } } @@ -385,7 +420,7 @@ void testGather( const int halo_width, const int test_type ) // RUN TESTS //---------------------------------------------------------------------------// -TEST( TEST_CATEGORY, periodic_test_migrate_inplace ) +TEST( TEST_CATEGORY, periodic_test_migrate_aosoa ) { // Call with varied halo data and halo width, without forced communication for ( int i = 1; i < 4; i++ ) @@ -396,7 +431,7 @@ TEST( TEST_CATEGORY, periodic_test_migrate_inplace ) testMigrate( 1, 1, true, 0 ); } -TEST( TEST_CATEGORY, periodic_test_migrate_separate ) +TEST( TEST_CATEGORY, periodic_test_migrate_aosoa_separate ) { for ( int i = 1; i < 4; i++ ) for ( int j = 1; j < 4; j++ ) @@ -414,10 +449,18 @@ TEST( TEST_CATEGORY, periodic_test_migrate_slice ) testMigrate( 1, 1, true, 2 ); } -TEST( TEST_CATEGORY, periodic_test_gather_inplace ) +TEST( TEST_CATEGORY, periodic_test_gather_aosoa ) +{ + for ( int i = 1; i < 4; i++ ) + for ( int j = 1; j < 4; j++ ) + testGather( i, j, 0 ); +} + +TEST( TEST_CATEGORY, periodic_test_gather_slice ) { - for ( int i = 1; i < 2; i++ ) - testGather( i, 0 ); + for ( int i = 1; i < 4; i++ ) + for ( int j = 1; j < 4; j++ ) + testGather( i, j, 1 ); } //---------------------------------------------------------------------------//