Skip to content

Commit

Permalink
testing and backup files
Browse files Browse the repository at this point in the history
  • Loading branch information
mukul1992 committed Sep 5, 2023
1 parent 17fb871 commit 93320c9
Show file tree
Hide file tree
Showing 2 changed files with 634 additions and 0 deletions.
316 changes: 316 additions & 0 deletions Source/Advection/AdvectionSrcForMom_bkup.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,316 @@
#include <AdvectionSrcForMom_N.H>
#include <AdvectionSrcForMom_T.H>

using namespace amrex;

/**
* Function for computing the advective tendency for the momentum equations
* This routine has explicit expressions for all cases (terrain or not) when
* the horizontal and vertial spatial orders are <= 2, and calls more specialized
* functions when either (or both) spatial order(s) is greater than 2.
*
* @param[in] bxx box over which the x-momentum is updated
* @param[in] bxy box over which the y-momentum is updated
* @param[in] bxz box over which the z-momentum is updated
* @param[out] rho_u_rhs tendency for the x-momentum equation
* @param[out] rho_v_rhs tendency for the y-momentum equation
* @param[out] rho_w_rhs tendency for the z-momentum equation
* @param[in] u x-component of the velocity
* @param[in] v y-component of the velocity
* @param[in] w z-component of the velocity
* @param[in] rho_u x-component of the momentum
* @param[in] rho_v y-component of the momentum
* @param[in] Omega component of the momentum normal to the z-coordinate surface
* @param[in] z_nd height coordinate at nodes
* @param[in] detJ Jacobian of the metric transformation (= 1 if use_terrain is false)
* @param[in] cellSizeInv inverse of the mesh spacing
* @param[in] mf_m map factor at cell centers
* @param[in] mf_u map factor at x-faces
* @param[in] mf_v map factor at y-faces
* @param[in] horiz_adv_type sets the spatial order to be used for lateral derivatives
* @param[in] vert_adv_type sets the spatial order to be used for vertical derivatives
* @param[in] use_terrain if true, use the terrain-aware derivatives (with metric terms)
* @param[in] domhi_z maximum k value in the domain
*/
void
AdvectionSrcForMom (const Box& bxx, const Box& bxy, const Box& bxz,
const Array4< Real>& rho_u_rhs,
const Array4< Real>& rho_v_rhs,
const Array4< Real>& rho_w_rhs,
const Array4<const Real>& u,
const Array4<const Real>& v,
const Array4<const Real>& w,
const Array4<const Real>& rho_u,
const Array4<const Real>& rho_v,
const Array4<const Real>& Omega,
const Array4<const Real>& z_nd,
const Array4<const Real>& detJ,
const GpuArray<Real, AMREX_SPACEDIM>& cellSizeInv,
const Array4<const Real>& mf_m,
const Array4<const Real>& mf_u,
const Array4<const Real>& mf_v,
const AdvType horiz_adv_type,
const AdvType vert_adv_type,
const int use_terrain,
const int domhi_z)
{
BL_PROFILE_VAR("AdvectionSrcForMom", AdvectionSrcForMom);

auto dxInv = cellSizeInv[0], dyInv = cellSizeInv[1], dzInv = cellSizeInv[2];

AMREX_ALWAYS_ASSERT(bxz.smallEnd(2) > 0);

// new updates for GPU opt July 2023
// compute mf_u and mf_v inverses beforehand
// smallest 2D box containing bxx and bxy
IntVect bx_hi = amrex::max(bxx.bigEnd(), bxy.bigEnd());
IntVect bx_lo = amrex::min(bxx.smallEnd(), bxy.smallEnd());
Box box2d(bx_lo, bx_hi); box2d.setRange(2,0);
box2d.grow({1,1,0}); // {3,3,0} ??

// now create mf_u_inv and mf_v_inv FABs/arrays
FArrayBox mf_u_invFAB(box2d); FArrayBox mf_v_invFAB(box2d);
Elixir mf_u_inv_eli = mf_u_invFAB.elixir();
Elixir mf_v_inv_eli = mf_v_invFAB.elixir();
const Array4<Real>& mf_u_inv = mf_u_invFAB.array();
const Array4<Real>& mf_v_inv = mf_v_invFAB.array();

// compute inverses
ParallelFor(box2d,
[=] AMREX_GPU_DEVICE (int i, int j, int) noexcept
{
mf_u_inv(i,j,0) = 1. / mf_u(i,j,0);
mf_v_inv(i,j,0) = 1. / mf_v(i,j,0);
});

/*
Print() << "Orig boxes: \n" << bxx << '\n' << bxy << '\n' << bxz << "\n\n";
Print() << "New 2D box: \n" << box2d << "\n\n";
Print() << "Map factors: \n" << mf_u << '\n' << mf_v << "\n\n";
*/

if (!use_terrain) {
// Inline with 2nd order for efficiency
if (horiz_adv_type == AdvType::Centered_2nd && vert_adv_type == AdvType::Centered_2nd)
{
ParallelFor(bxx, bxy, bxz,
[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
{
Real xflux_hi = 0.25 * (rho_u(i, j , k) * mf_u_inv(i,j,0) + rho_u(i+1, j , k) * mf_u_inv(i+1,j,0)) * (u(i+1,j,k) + u(i,j,k));
Real xflux_lo = 0.25 * (rho_u(i, j , k) * mf_u_inv(i,j,0) + rho_u(i-1, j , k) * mf_u_inv(i-1,j,0)) * (u(i-1,j,k) + u(i,j,k));

Real yflux_hi = 0.25 * (rho_v(i, j+1, k) * mf_v_inv(i,j+1,0) + rho_v(i-1, j+1, k) * mf_v_inv(i-1,j+1,0)) * (u(i,j+1,k) + u(i,j,k));
Real yflux_lo = 0.25 * (rho_v(i, j , k) * mf_v_inv(i,j ,0) + rho_v(i-1, j , k) * mf_v_inv(i-1,j ,0)) * (u(i,j-1,k) + u(i,j,k));

Real zflux_hi = 0.25 * (Omega(i, j, k+1) + Omega(i-1, j, k+1)) * (u(i,j,k+1) + u(i,j,k));
Real zflux_lo = 0.25 * (Omega(i, j, k ) + Omega(i-1, j, k )) * (u(i,j,k-1) + u(i,j,k));

Real mfsq = mf_u(i,j,0) * mf_u(i,j,0);

Real advectionSrc = (xflux_hi - xflux_lo) * dxInv * mfsq
+ (yflux_hi - yflux_lo) * dyInv * mfsq
+ (zflux_hi - zflux_lo) * dzInv;
rho_u_rhs(i, j, k) = -advectionSrc;
},
[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
{
Real xflux_hi = 0.25 * (rho_u(i+1, j, k) * mf_u_inv(i+1,j,0) + rho_u(i+1, j-1, k) * mf_u_inv(i+1,j-1,0)) * (v(i+1,j,k) + v(i,j,k));
Real xflux_lo = 0.25 * (rho_u(i , j, k) * mf_u_inv(i ,j,0) + rho_u(i , j-1, k) * mf_u_inv(i ,j-1,0)) * (v(i-1,j,k) + v(i,j,k));

Real yflux_hi = 0.25 * (rho_v(i ,j+1,k) * mf_v_inv(i,j+1,0) + rho_v(i ,j ,k) * mf_v_inv(i,j ,0)) * (v(i,j+1,k) + v(i,j,k));
Real yflux_lo = 0.25 * (rho_v(i ,j ,k) * mf_v_inv(i,j ,0) + rho_v(i ,j-1,k) * mf_v_inv(i,j-1,0) ) * (v(i,j-1,k) + v(i,j,k));

Real zflux_hi = 0.25 * (Omega(i, j, k+1) + Omega(i, j-1, k+1)) * (v(i,j,k+1) + v(i,j,k));
Real zflux_lo = 0.25 * (Omega(i, j, k ) + Omega(i, j-1, k )) * (v(i,j,k-1) + v(i,j,k));

Real mfsq = mf_v(i,j,0) * mf_v(i,j,0);

Real advectionSrc = (xflux_hi - xflux_lo) * dxInv * mfsq
+ (yflux_hi - yflux_lo) * dyInv * mfsq
+ (zflux_hi - zflux_lo) * dzInv;
rho_v_rhs(i, j, k) = -advectionSrc;
},
[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
{
Real xflux_hi = 0.25*(rho_u(i+1,j ,k) + rho_u(i+1, j, k-1)) * mf_u_inv(i+1,j ,0) * (w(i+1,j,k) + w(i,j,k));
Real xflux_lo = 0.25*(rho_u(i ,j ,k) + rho_u(i , j, k-1)) * mf_u_inv(i ,j ,0) * (w(i-1,j,k) + w(i,j,k));

Real yflux_hi = 0.25*(rho_v(i ,j+1,k) + rho_v(i, j+1, k-1)) * mf_v_inv(i ,j+1,0) * (w(i,j+1,k) + w(i,j,k));
Real yflux_lo = 0.25*(rho_v(i ,j ,k) + rho_v(i, j , k-1)) * mf_v_inv(i ,j ,0) * (w(i,j-1,k) + w(i,j,k));

Real zflux_lo = 0.25 * (Omega(i,j,k) + Omega(i,j,k-1)) * (w(i,j,k) + w(i,j,k-1));

Real zflux_hi = (k == domhi_z+1) ? Omega(i,j,k) * w(i,j,k) :
0.25 * (Omega(i,j,k) + Omega(i,j,k+1)) * (w(i,j,k) + w(i,j,k+1));

Real mfsq = mf_m(i,j,0) * mf_m(i,j,0);

Real advectionSrc = (xflux_hi - xflux_lo) * dxInv * mfsq
+ (yflux_hi - yflux_lo) * dyInv * mfsq
+ (zflux_hi - zflux_lo) * dzInv;
rho_w_rhs(i, j, k) = -advectionSrc;
});
// Template higher order methods
} else {
if (horiz_adv_type == AdvType::Centered_2nd) {
AdvectionSrcForMomVert_N<CENTERED2>(bxx, bxy, bxz,
rho_u_rhs, rho_v_rhs, rho_w_rhs,
rho_u, rho_v, Omega, u, v, w,
cellSizeInv, mf_m,
mf_u_inv, mf_v_inv,
vert_adv_type, domhi_z);
} else if (horiz_adv_type == AdvType::Upwind_3rd) {
AdvectionSrcForMomVert_N<UPWIND3>(bxx, bxy, bxz,
rho_u_rhs, rho_v_rhs, rho_w_rhs,
rho_u, rho_v, Omega, u, v, w,
cellSizeInv, mf_m,
mf_u_inv, mf_v_inv,
vert_adv_type, domhi_z);
} else if (horiz_adv_type == AdvType::Centered_4th) {
AdvectionSrcForMomVert_N<CENTERED4>(bxx, bxy, bxz,
rho_u_rhs, rho_v_rhs, rho_w_rhs,
rho_u, rho_v, Omega, u, v, w,
cellSizeInv, mf_m,
mf_u_inv, mf_v_inv,
vert_adv_type, domhi_z);
} else if (horiz_adv_type == AdvType::Upwind_5th) {
AdvectionSrcForMomVert_N<UPWIND5>(bxx, bxy, bxz,
rho_u_rhs, rho_v_rhs, rho_w_rhs,
rho_u, rho_v, Omega, u, v, w,
cellSizeInv, mf_m,
mf_u_inv, mf_v_inv,
vert_adv_type, domhi_z);
} else if (horiz_adv_type == AdvType::Centered_6th) {
AdvectionSrcForMomVert_N<CENTERED6>(bxx, bxy, bxz,
rho_u_rhs, rho_v_rhs, rho_w_rhs,
rho_u, rho_v, Omega, u, v, w,
cellSizeInv, mf_m,
mf_u_inv, mf_v_inv,
vert_adv_type, domhi_z);
} else {
AMREX_ASSERT_WITH_MESSAGE(false, "Unknown advection scheme!");
}
}
} // end of use_terrain == false
else
{ // now do use_terrain == true
// Inline with 2nd order for efficiency
if (horiz_adv_type == AdvType::Centered_2nd && vert_adv_type == AdvType::Centered_2nd)
{
ParallelFor(bxx, bxy, bxz,
[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
{
Real met_h_zeta_xhi = Compute_h_zeta_AtCellCenter(i ,j ,k ,cellSizeInv,z_nd);
Real xflux_hi = 0.25 * (rho_u(i, j , k) * mf_u_inv(i,j,0) + rho_u(i+1, j , k) * mf_u_inv(i+1,j,0)) * (u(i+1,j,k) + u(i,j,k)) * met_h_zeta_xhi;

Real met_h_zeta_xlo = Compute_h_zeta_AtCellCenter(i-1,j ,k ,cellSizeInv,z_nd);
Real xflux_lo = 0.25 * (rho_u(i, j , k) * mf_u_inv(i,j,0) + rho_u(i-1, j , k) * mf_u_inv(i-1,j,0)) * (u(i-1,j,k) + u(i,j,k)) * met_h_zeta_xlo;

Real met_h_zeta_yhi = Compute_h_zeta_AtEdgeCenterK(i ,j+1,k ,cellSizeInv,z_nd);
Real yflux_hi = 0.25 * (rho_v(i, j+1, k) * mf_v_inv(i ,j+1,0) + rho_v(i-1, j+1, k) * mf_v_inv(i-1,j+1,0)) * (u(i,j+1,k) + u(i,j,k)) * met_h_zeta_yhi;

Real met_h_zeta_ylo = Compute_h_zeta_AtEdgeCenterK(i ,j ,k ,cellSizeInv,z_nd);
Real yflux_lo = 0.25 * (rho_v(i, j , k) * mf_v_inv(i ,j ,0) + rho_v(i-1, j , k) * mf_v_inv(i-1,j ,0)) * (u(i,j-1,k) + u(i,j,k)) * met_h_zeta_ylo;

Real zflux_hi = 0.25 * (Omega(i, j, k+1) + Omega(i-1, j, k+1)) * (u(i,j,k+1) + u(i,j,k));
Real zflux_lo = 0.25 * (Omega(i, j, k ) + Omega(i-1, j, k )) * (u(i,j,k-1) + u(i,j,k));

Real mfsq = mf_u(i,j,0) * mf_u(i,j,0);

Real advectionSrc = (xflux_hi - xflux_lo) * dxInv * mfsq
+ (yflux_hi - yflux_lo) * dyInv * mfsq
+ (zflux_hi - zflux_lo) * dzInv;

rho_u_rhs(i, j, k) = -advectionSrc / (0.5 * (detJ(i,j,k) + detJ(i-1,j,k)));
},
[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
{
Real met_h_zeta_xhi = Compute_h_zeta_AtEdgeCenterK(i+1,j ,k ,cellSizeInv,z_nd);
Real xflux_hi = 0.25 * (rho_u(i+1,j ,k) * mf_u_inv(i+1,j ,0) + rho_u(i+1,j-1, k) * mf_u_inv(i+1,j-1,0)) * (v(i+1,j,k) + v(i,j,k)) * met_h_zeta_xhi;

Real met_h_zeta_xlo = Compute_h_zeta_AtEdgeCenterK(i ,j ,k ,cellSizeInv,z_nd);
Real xflux_lo = 0.25 * (rho_u(i, j , k) * mf_u_inv(i ,j ,0) + rho_u(i ,j-1, k) * mf_u_inv(i-1,j ,0)) * (v(i-1,j,k) + v(i,j,k)) * met_h_zeta_xlo;

Real met_h_zeta_yhi = Compute_h_zeta_AtCellCenter(i ,j ,k ,cellSizeInv,z_nd);
Real yflux_hi = 0.25 * (rho_v(i ,j+1, k) * mf_v_inv(i ,j+1,0) + rho_v(i ,j ,k) * mf_v_inv(i ,j ,0)) * (v(i,j+1,k) + v(i,j,k)) * met_h_zeta_yhi;

Real met_h_zeta_ylo = Compute_h_zeta_AtCellCenter(i ,j-1,k ,cellSizeInv,z_nd);
Real yflux_lo = 0.25 * (rho_v(i ,j ,k) * mf_v_inv(i ,j ,0) + rho_v(i , j-1, k) * mf_v_inv(i ,j-1,0)) * (v(i,j-1,k) + v(i,j,k)) * met_h_zeta_ylo;

Real zflux_hi = 0.25 * (Omega(i, j, k+1) + Omega(i, j-1, k+1)) * (v(i,j,k+1) + v(i,j,k));
Real zflux_lo = 0.25 * (Omega(i, j, k ) + Omega(i, j-1, k )) * (v(i,j,k-1) + v(i,j,k));

Real mfsq = mf_v(i,j,0) * mf_v(i,j,0);

Real advectionSrc = (xflux_hi - xflux_lo) * dxInv * mfsq
+ (yflux_hi - yflux_lo) * dyInv * mfsq
+ (zflux_hi - zflux_lo) * dzInv;

rho_v_rhs(i, j, k) = -advectionSrc / (0.5 * (detJ(i,j,k) + detJ(i,j-1,k)));
},
[=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
{
Real met_h_zeta_xhi = Compute_h_zeta_AtEdgeCenterJ(i+1,j ,k ,cellSizeInv,z_nd);
Real xflux_hi = 0.25*(rho_u(i+1,j ,k) + rho_u(i+1, j, k-1)) * mf_u_inv(i+1,j ,0) * (w(i+1,j,k) + w(i,j,k)) * met_h_zeta_xhi;

Real met_h_zeta_xlo = Compute_h_zeta_AtEdgeCenterJ(i ,j ,k ,cellSizeInv,z_nd);
Real xflux_lo = 0.25*(rho_u(i ,j ,k) + rho_u(i , j, k-1)) * mf_u_inv(i ,j ,0) * (w(i-1,j,k) + w(i,j,k)) * met_h_zeta_xlo;

Real met_h_zeta_yhi = Compute_h_zeta_AtEdgeCenterI(i ,j+1,k ,cellSizeInv,z_nd);
Real yflux_hi = 0.25*(rho_v(i ,j+1,k) + rho_v(i, j+1, k-1)) * mf_v_inv(i ,j+1,0) * (w(i,j+1,k) + w(i,j,k)) * met_h_zeta_yhi;

Real met_h_zeta_ylo = Compute_h_zeta_AtEdgeCenterI(i ,j ,k ,cellSizeInv,z_nd);
Real yflux_lo = 0.25*(rho_v(i ,j ,k) + rho_v(i, j , k-1)) * mf_v_inv(i ,j ,0) * (w(i,j-1,k) + w(i,j,k)) * met_h_zeta_ylo;

Real zflux_lo = 0.25 * (Omega(i,j,k) + Omega(i,j,k-1)) * (w(i,j,k) + w(i,j,k-1));

Real zflux_hi = (k == domhi_z+1) ? Omega(i,j,k) * w(i,j,k) :
0.25 * (Omega(i,j,k) + Omega(i,j,k+1)) * (w(i,j,k) + w(i,j,k+1));

Real mfsq = mf_m(i,j,0) * mf_m(i,j,0);

Real advectionSrc = (xflux_hi - xflux_lo) * dxInv * mfsq
+ (yflux_hi - yflux_lo) * dyInv * mfsq
+ (zflux_hi - zflux_lo) * dzInv;

rho_w_rhs(i, j, k) = -advectionSrc / (0.5*(detJ(i,j,k) + detJ(i,j,k-1)));
});
// Template higher order methods
} else {
if (horiz_adv_type == AdvType::Centered_2nd) {
AdvectionSrcForMomVert_T<CENTERED2>(bxx, bxy, bxz,
rho_u_rhs, rho_v_rhs, rho_w_rhs,
rho_u, rho_v, Omega, u, v, w, z_nd, detJ,
cellSizeInv, mf_m, mf_u_inv, mf_v_inv,
vert_adv_type, domhi_z);
} else if (horiz_adv_type == AdvType::Upwind_3rd) {
AdvectionSrcForMomVert_T<UPWIND3>(bxx, bxy, bxz,
rho_u_rhs, rho_v_rhs, rho_w_rhs,
rho_u, rho_v, Omega, u, v, w, z_nd, detJ,
cellSizeInv, mf_m, mf_u_inv, mf_v_inv,
vert_adv_type, domhi_z);
} else if (horiz_adv_type == AdvType::Centered_4th) {
AdvectionSrcForMomVert_T<CENTERED4>(bxx, bxy, bxz,
rho_u_rhs, rho_v_rhs, rho_w_rhs,
rho_u, rho_v, Omega, u, v, w, z_nd, detJ,
cellSizeInv, mf_m, mf_u_inv, mf_v_inv,
vert_adv_type, domhi_z);
} else if (horiz_adv_type == AdvType::Upwind_5th) {
AdvectionSrcForMomVert_T<UPWIND5>(bxx, bxy, bxz,
rho_u_rhs, rho_v_rhs, rho_w_rhs,
rho_u, rho_v, Omega, u, v, w, z_nd, detJ,
cellSizeInv, mf_m, mf_u_inv, mf_v_inv,
vert_adv_type, domhi_z);
} else if (horiz_adv_type == AdvType::Centered_6th) {
AdvectionSrcForMomVert_T<CENTERED6>(bxx, bxy, bxz,
rho_u_rhs, rho_v_rhs, rho_w_rhs,
rho_u, rho_v, Omega, u, v, w, z_nd, detJ,
cellSizeInv, mf_m, mf_u_inv, mf_v_inv,
vert_adv_type, domhi_z);
} else {
AMREX_ASSERT_WITH_MESSAGE(false, "Unknown advection scheme!");
}
}
}
}

Loading

0 comments on commit 93320c9

Please sign in to comment.