diff --git a/include/Residual/ResidualGive/applyAGive.inl b/include/Residual/ResidualGive/applyAGive.inl index 2802e0ef..3692f493 100644 --- a/include/Residual/ResidualGive/applyAGive.inl +++ b/include/Residual/ResidualGive/applyAGive.inl @@ -4,30 +4,33 @@ namespace residual_give { template -static inline void node_apply_a_give(int i_r, int i_theta, double r, double theta, const PolarGrid& grid, - const LevelCacheType& level_cache, bool DirBC_Interior, Vector& result, - ConstVector& x) +static inline void node_apply_a_give(int i_r, int i_theta, const PolarGrid& grid, const LevelCacheType& level_cache, + bool DirBC_Interior, Vector& result, ConstVector& x) { /* ---------------------------------------- */ /* Compute or retrieve stencil coefficients */ /* ---------------------------------------- */ - const int center = grid.index(i_r, i_theta); + const int center = grid.index(i_r, i_theta); + const double radius = grid.radius(i_r); + const double theta = grid.theta(i_theta); + double coeff_beta, arr, att, art, detDF; - level_cache.obtainValues(i_r, i_theta, center, r, theta, coeff_beta, arr, att, art, detDF); + level_cache.obtainValues(i_r, i_theta, center, radius, theta, coeff_beta, arr, att, art, detDF); /* -------------------- */ /* Node in the interior */ /* -------------------- */ - if (i_r > 1 && i_r < grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; + if (i_r > 0 && i_r < grid.nr() - 1) { + const double h1 = grid.radialSpacing(i_r - 1); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); + + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + const double coeff5 = 0.25 * (h1 + h2) * (k1 + k2); const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); @@ -38,23 +41,28 @@ static inline void node_apply_a_give(int i_r, int i_theta, double r, double thet const int top = grid.index(i_r, i_theta_P1); /* Fill result(i,j) */ - result[center] += (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF) * x[center] /* beta_{i,j} */ + result[center] += (coeff5 * coeff_beta * std::fabs(detDF) * x[center] /* beta_{i,j} */ - coeff1 * arr * x[left] /* Left */ - coeff2 * arr * x[right] /* Right */ - coeff3 * att * x[bottom] /* Bottom */ - coeff4 * att * x[top] /* Top */ + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * - x[center]) /* Center: (Left, Right, Bottom, Top) */; + x[center]); /* Center: (Left, Right, Bottom, Top) */ + /* Fill result(i-1,j) */ - result[left] += (-coeff1 * arr * x[center] /* Right */ - + coeff1 * arr * x[left] /* Center: (Right) */ - - 0.25 * art * x[top] /* Top Right */ - + 0.25 * art * x[bottom]); /* Bottom Right */ + if (i_r > 1 || (i_r == 1 && !DirBC_Interior)) { /* Don't give to the inner dirichlet boundary! */ + result[left] += (-coeff1 * arr * x[center] /* Right */ + + coeff1 * arr * x[left] /* Center: (Right) */ + - 0.25 * art * x[top] /* Top Right */ + + 0.25 * art * x[bottom]); /* Bottom Right */ + } /* Fill result(i+1,j) */ - result[right] += (-coeff2 * arr * x[center] /* Left */ - + coeff2 * arr * x[right] /* Center: (Left) */ - + 0.25 * art * x[top] /* Top Left */ - - 0.25 * art * x[bottom]); /* Bottom Left */ + if (i_r < grid.nr() - 2) { /* Don't give to the outer dirichlet boundary! */ + result[right] += (-coeff2 * arr * x[center] /* Left */ + + coeff2 * arr * x[right] /* Center: (Left) */ + + 0.25 * art * x[top] /* Top Left */ + - 0.25 * art * x[bottom]); /* Bottom Left */ + } /* Fill result(i,j-1) */ result[bottom] += (-coeff3 * att * x[center] /* Top */ + coeff3 * att * x[bottom] /* Center: (Top) */ @@ -75,14 +83,14 @@ static inline void node_apply_a_give(int i_r, int i_theta, double r, double thet /* ------------------------------------------------ */ if (DirBC_Interior) { /* Fill result(i,j) */ - result[center] += x[center]; + result[center] = x[center]; /* Give value to the interior nodes! */ - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff2 = 0.5 * (k1 + k2) / h2; const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); @@ -104,15 +112,16 @@ static inline void node_apply_a_give(int i_r, int i_theta, double r, double thet /* h1 gets replaced with 2 * R0. */ /* (i_r-1,i_theta) gets replaced with (i_r, i_theta + (grid.ntheta()/2)). */ /* Some more adjustments from the changing the 9-point stencil to the artifical 7-point stencil. */ - double h1 = 2.0 * grid.radius(0); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + const double h1 = 2.0 * grid.radius(0); + const double h2 = grid.radialSpacing(i_r); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; + const double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff2 = 0.5 * (k1 + k2) / h2; + const double coeff3 = 0.5 * (h1 + h2) / k1; + const double coeff4 = 0.5 * (h1 + h2) / k2; + const double coeff5 = 0.25 * (h1 + h2) * (k1 + k2); const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); @@ -124,7 +133,7 @@ static inline void node_apply_a_give(int i_r, int i_theta, double r, double thet const int top = grid.index(i_r, i_theta_P1); /* Fill result(i,j) */ - result[center] += (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF) * x[center] /* beta_{i,j} */ + result[center] += (coeff5 * coeff_beta * std::fabs(detDF) * x[center] /* beta_{i,j} */ - coeff1 * arr * x[left] /* Left */ - coeff2 * arr * x[right] /* Right */ - coeff3 * att * x[bottom] /* Bottom */ @@ -154,125 +163,19 @@ static inline void node_apply_a_give(int i_r, int i_theta, double r, double thet /* - 0.25 * art * x[left]; // Bottom Left: REMOVED DUE TO ARTIFICAL 7 POINT STENCIL */ } } - /* ------------------------------- */ - /* Node next to the inner boundary */ - /* ------------------------------- */ - else if (i_r == 1) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - const int left = grid.index(i_r - 1, i_theta); - const int right = grid.index(i_r + 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int top = grid.index(i_r, i_theta_P1); - - /* Fill result(i,j) */ - result[center] += (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF) * x[center] /* beta_{i,j} */ - - coeff1 * arr * x[left] /* Left */ - - coeff2 * arr * x[right] /* Right */ - - coeff3 * att * x[bottom] /* Bottom */ - - coeff4 * att * x[top] /* Top */ - + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * - x[center]); /* Center: (Left, Right, Bottom, Top) */ - /* Fill result(i-1,j) */ - if (!DirBC_Interior) { /* Don't give to the inner dirichlet boundary! */ - result[left] += (-coeff1 * arr * x[center] /* Right */ - + coeff1 * arr * x[left] /* Center: (Right) */ - - 0.25 * art * x[top] /* Top Right */ - + 0.25 * art * x[bottom]); /* Bottom Right */ - } - /* Fill result(i+1,j) */ - result[right] += (-coeff2 * arr * x[center] /* Left */ - + coeff2 * arr * x[right] /* Center: (Left) */ - + 0.25 * art * x[top] /* Top Left */ - - 0.25 * art * x[bottom]); /* Bottom Left */ - /* Fill result(i,j-1) */ - result[bottom] += (-coeff3 * att * x[center] /* Top */ - + coeff3 * att * x[bottom] /* Center: (Top) */ - - 0.25 * art * x[right] /* Top Right */ - + 0.25 * art * x[left]); /* Top Left */ - /* Fill result(i,j+1) */ - result[top] += (-coeff4 * att * x[center] /* Bottom */ - + coeff4 * att * x[top] /* Center: (Bottom) */ - + 0.25 * art * x[right] /* Bottom Right */ - - 0.25 * art * x[left]); /* Bottom Left */ - } - /* ------------------------------- */ - /* Node next to the outer boundary */ - /* ------------------------------- */ - else if (i_r == grid.nr() - 2) { - double h1 = grid.radialSpacing(i_r - 1); - double h2 = grid.radialSpacing(i_r); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); - - double coeff1 = 0.5 * (k1 + k2) / h1; - double coeff2 = 0.5 * (k1 + k2) / h2; - double coeff3 = 0.5 * (h1 + h2) / k1; - double coeff4 = 0.5 * (h1 + h2) / k2; - - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); - - const int left = grid.index(i_r - 1, i_theta); - const int right = grid.index(i_r + 1, i_theta); - const int bottom = grid.index(i_r, i_theta_M1); - const int top = grid.index(i_r, i_theta_P1); - - /* Fill result(i,j) */ - result[center] += (0.25 * (h1 + h2) * (k1 + k2) * coeff_beta * std::fabs(detDF) * x[center] /* beta_{i,j} */ - - coeff1 * arr * x[left] /* Left */ - - coeff2 * arr * x[right] /* Right */ - - coeff3 * att * x[bottom] /* Bottom */ - - coeff4 * att * x[top] /* Top */ - + ((coeff1 + coeff2) * arr + (coeff3 + coeff4) * att) * - x[center]); /* Center: (Left, Right, Bottom, Top) */ - /* Fill result(i-1,j) */ - result[left] += (-coeff1 * arr * x[center] /* Right */ - + coeff1 * arr * x[left] /* Center: (Right) */ - - 0.25 * art * x[top] /* Top Right */ - + 0.25 * art * x[bottom]); /* Bottom Right */ - /* Don't give to the outer dirichlet boundary! */ - /* Fill result(i+1,j) */ - /* result[right] += ( */ - /* - coeff2 * arr * x[center] // Left */ - /* + coeff2 * arr * x[right] // Center: (Left) */ - /* + 0.25 * art * x[top] // Top Left */ - /* - 0.25 * art * x[bottom]); // Bottom Left */ - /* Fill result(i,j-1) */ - result[bottom] += (-coeff3 * att * x[center] /* Top */ - + coeff3 * att * x[bottom] /* Center: (Top) */ - - 0.25 * art * x[right] /* Top Right */ - + 0.25 * art * x[left]); /* Top Left */ - /* Fill result(i,j+1) */ - result[top] += (-coeff4 * att * x[center] /* Bottom */ - + coeff4 * att * x[top] /* Center: (Bottom) */ - + 0.25 * art * x[right] /* Bottom Right */ - - 0.25 * art * x[left]); /* Bottom Left */ - } /* ----------------------------- */ /* Node on to the outer boundary */ /* ----------------------------- */ else if (i_r == grid.nr() - 1) { /* Fill result of (i,j) */ - result[center] += x[center]; + result[center] = x[center]; /* Give value to the interior nodes! */ - double h1 = grid.radialSpacing(i_r - 1); - double k1 = grid.angularSpacing(i_theta - 1); - double k2 = grid.angularSpacing(i_theta); + const double h1 = grid.radialSpacing(i_r - 1); + const double k1 = grid.angularSpacing(i_theta - 1); + const double k2 = grid.angularSpacing(i_theta); - double coeff1 = 0.5 * (k1 + k2) / h1; + const double coeff1 = 0.5 * (k1 + k2) / h1; const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); @@ -292,32 +195,69 @@ static inline void node_apply_a_give(int i_r, int i_theta, double r, double thet } // namespace residual_give template -void ResidualGive::applyCircleSection(const int i_r, Vector result, ConstVector x) const +void ResidualGive::applySystemOperator(Vector result, ConstVector x) const { - using residual_give::node_apply_a_give; + assert(result.size() == x.size()); - const PolarGrid& grid = Residual::grid_; - - const double r = grid.radius(i_r); - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - const double theta = grid.theta(i_theta); - node_apply_a_give(i_r, i_theta, r, theta, grid, Residual::level_cache_, - Residual::DirBC_Interior_, result, x); - } -} + const PolarGrid& grid = Residual::grid_; + const LevelCacheType& level_cache = Residual::level_cache_; + const bool DirBC_Interior = Residual::DirBC_Interior_; -template -void ResidualGive::applyRadialSection(const int i_theta, Vector result, - ConstVector x) const -{ using residual_give::node_apply_a_give; - const PolarGrid& grid = Residual::grid_; + assign(result, 0.0); + + /* ---------------- */ + /* Circular section */ + /* ---------------- */ + // We parallelize over i_r (step 3) to avoid data race conditions between adjacent circles. + // The i_theta loop is sequential inside the kernel. + const int num_circle_tasks = grid.numberSmootherCircles(); + + for (int start_circle = 0; start_circle < 3; ++start_circle) { + const int num_circular_tasks = (num_circle_tasks - start_circle + 2) / 3; + Kokkos::parallel_for( + "ResidualGive: ApplyA (Circular)", Kokkos::RangePolicy<>(0, num_circular_tasks), + KOKKOS_LAMBDA(const int circle_task) { + const int i_r = start_circle + circle_task * 3; + for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { + node_apply_a_give(i_r, i_theta, grid, level_cache, DirBC_Interior, result, x); + } + }); + Kokkos::fence(); + } + + /* -------------- */ + /* Radial section */ + /* -------------- */ + // We parallelize over i_theta (step 3) to avoid data race conditions between adjacent radial lines. + // The i_r loop is sequential inside the kernel. + // Due to periodicity in the angular direction, handle up to 2 additional + // radial lines (i_theta = 0 and 1) before the parallel passes. + const int additional_radial_tasks = grid.ntheta() % 3; + const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; + + for (int i_theta = 0; i_theta < additional_radial_tasks; i_theta++) { + Kokkos::parallel_for( + "ResidualGive: ApplyA (Radial, additional)", Kokkos::RangePolicy<>(0, 1), KOKKOS_LAMBDA(const int) { + for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + node_apply_a_give(i_r, i_theta, grid, level_cache, DirBC_Interior, result, x); + } + }); + Kokkos::fence(); + } - const double theta = grid.theta(i_theta); - for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - const double r = grid.radius(i_r); - node_apply_a_give(i_r, i_theta, r, theta, grid, Residual::level_cache_, - Residual::DirBC_Interior_, result, x); + for (int start_radial = 0; start_radial < 3; ++start_radial) { + const int num_radial_batches = (num_radial_tasks - start_radial + 2) / 3; + Kokkos::parallel_for( + "ResidualGive: ApplyA (Radial)", Kokkos::RangePolicy<>(0, num_radial_batches), + KOKKOS_LAMBDA(const int radial_task) { + const int i_theta = additional_radial_tasks + start_radial + radial_task * 3; + for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { + node_apply_a_give(i_r, i_theta, grid, level_cache, DirBC_Interior, result, x); + } + }); + Kokkos::fence(); } } +// clang-format on \ No newline at end of file diff --git a/include/Residual/ResidualGive/residualGive.h b/include/Residual/ResidualGive/residualGive.h index 37d1040d..4aec6dbb 100644 --- a/include/Residual/ResidualGive/residualGive.h +++ b/include/Residual/ResidualGive/residualGive.h @@ -13,15 +13,11 @@ class ResidualGive : public Residual const int num_omp_threads); ~ResidualGive() override = default; - void computeResidual(Vector result, ConstVector rhs, ConstVector x) const final; - void applySystemOperator(Vector result, ConstVector x) const final; - -private: - void applyCircleSection(const int i_r, Vector result, ConstVector x) const; - void applyRadialSection(const int i_theta, Vector result, ConstVector x) const; + void computeResidual(Vector result, ConstVector rhs, ConstVector x) const final; }; #include "residualGive.inl" #include "applyAGive.inl" + } // namespace gmgpolar diff --git a/include/Residual/ResidualGive/residualGive.inl b/include/Residual/ResidualGive/residualGive.inl index bc50f620..0f1648d2 100644 --- a/include/Residual/ResidualGive/residualGive.inl +++ b/include/Residual/ResidualGive/residualGive.inl @@ -7,105 +7,6 @@ ResidualGive::ResidualGive(const PolarGrid& grid, const LevelCac { } -/* ------------ */ -/* result = A*x */ - -// clang-format off -template -void ResidualGive::applySystemOperator(Vector result, ConstVector x) const -{ - assert(result.size() == x.size()); - - assign(result, 0.0); - - const PolarGrid& grid = Residual::grid_; - - const int num_omp_threads = Residual::num_omp_threads_; - - /* Single-threaded execution */ - if (num_omp_threads == 1) { - for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { - applyCircleSection(i_r, result, x); - } - for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - applyRadialSection(i_theta, result, x); - } - } - /* Multi-threaded execution */ - else { - const int num_circle_tasks = grid.numberSmootherCircles(); - const int additional_radial_tasks = grid.ntheta() % 3; - const int num_radial_tasks = grid.ntheta() - additional_radial_tasks; - - #pragma omp parallel num_threads(num_omp_threads) - { - /* Circle Section 0 */ - #pragma omp for - for (int circle_task = 0; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid.numberSmootherCircles() - circle_task - 1; - applyCircleSection(i_r, result, x); - } - /* Circle Section 1 */ - #pragma omp for - for (int circle_task = 1; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid.numberSmootherCircles() - circle_task - 1; - applyCircleSection(i_r, result, x); - } - /* Circle Section 2 */ - #pragma omp for nowait - for (int circle_task = 2; circle_task < num_circle_tasks; circle_task += 3) { - int i_r = grid.numberSmootherCircles() - circle_task - 1; - applyCircleSection(i_r, result, x); - } - - /* Radial Section 0 */ - #pragma omp for - for (int radial_task = 0; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 0) { - int i_theta = radial_task + additional_radial_tasks; - applyRadialSection(i_theta, result, x); - } - else { - if (additional_radial_tasks == 0) { - applyRadialSection(0, result, x); - } - else if (additional_radial_tasks >= 1) { - applyRadialSection(0, result, x); - applyRadialSection(1, result, x); - } - } - } - /* Radial Section 1 */ - #pragma omp for - for (int radial_task = 1; radial_task < num_radial_tasks; radial_task += 3) { - if (radial_task > 1) { - int i_theta = radial_task + additional_radial_tasks; - applyRadialSection(i_theta, result, x); - } - else { - if (additional_radial_tasks == 0) { - applyRadialSection(1, result, x); - } - else if (additional_radial_tasks == 1) { - applyRadialSection(2, result, x); - } - else if (additional_radial_tasks == 2) { - applyRadialSection(2, result, x); - applyRadialSection(3, result, x); - } - } - } - /* Radial Section 2 */ - #pragma omp for - for (int radial_task = 2; radial_task < num_radial_tasks; radial_task += 3) { - int i_theta = radial_task + additional_radial_tasks; - applyRadialSection(i_theta, result, x); - } - } - } -} -// clang-format on - /* ------------------ */ /* result = rhs - A*x */ template @@ -117,10 +18,11 @@ void ResidualGive::computeResidual(Vector result, ConstV applySystemOperator(result, x); // Subtract A*x from rhs to get the residual. - const int n = result.size(); - const int num_omp_threads = Residual::num_omp_threads_; -#pragma omp parallel for num_threads(num_omp_threads) - for (int i = 0; i < n; i++) { - result[i] = rhs[i] - result[i]; - } -} + const int n = result.size(); + + Kokkos::parallel_for( + "Residual Give: Subtract A*x from rhs", Kokkos::RangePolicy<>(0, n), + KOKKOS_LAMBDA(const int i) { result[i] = rhs[i] - result[i]; }); + + Kokkos::fence(); +} \ No newline at end of file diff --git a/include/Residual/ResidualTake/applyATake.inl b/include/Residual/ResidualTake/applyATake.inl index 0982b315..3cd44fc3 100644 --- a/include/Residual/ResidualTake/applyATake.inl +++ b/include/Residual/ResidualTake/applyATake.inl @@ -28,12 +28,11 @@ static KOKKOS_INLINE_FUNCTION void node_apply_a_take(const int i_r, const int i_ const double coeff4 = 0.5 * (h1 + h2) / k2; const double coeff5 = 0.25 * (h1 + h2) * (k1 + k2); - const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); - const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_M1 = grid.wrapThetaIndex(i_theta - 1); + const int i_theta_P1 = grid.wrapThetaIndex(i_theta + 1); + const int i_theta_Across = grid.wrapThetaIndex(i_theta + grid.ntheta() / 2); - // Across origin: (i_r - 1, i_theta) gets replaced with (i_r, i_theta + (grid.ntheta() / 2)). - const int left = - (i_r > 0) ? grid.index(i_r - 1, i_theta) : grid.index(i_r, grid.wrapThetaIndex(i_theta + grid.ntheta() / 2)); + const int left = (i_r == 0) ? grid.index(i_r, i_theta_Across) : grid.index(i_r - 1, i_theta); const int right = grid.index(i_r + 1, i_theta); const int bottom = grid.index(i_r, i_theta_M1); const int top = grid.index(i_r, i_theta_P1); diff --git a/include/Residual/ResidualTake/residualTake.h b/include/Residual/ResidualTake/residualTake.h index 2c7cb173..d24b1880 100644 --- a/include/Residual/ResidualTake/residualTake.h +++ b/include/Residual/ResidualTake/residualTake.h @@ -13,11 +13,11 @@ class ResidualTake : public Residual const int num_omp_threads); ~ResidualTake() override = default; - void computeResidual(Vector result, ConstVector rhs, ConstVector x) const override; - - void applySystemOperator(Vector result, ConstVector x) const override; + void applySystemOperator(Vector result, ConstVector x) const final; + void computeResidual(Vector result, ConstVector rhs, ConstVector x) const final; }; #include "residualTake.inl" #include "applyATake.inl" + } // namespace gmgpolar diff --git a/include/Residual/ResidualTake/residualTake.inl b/include/Residual/ResidualTake/residualTake.inl index ab73cbda..dfe32e08 100644 --- a/include/Residual/ResidualTake/residualTake.inl +++ b/include/Residual/ResidualTake/residualTake.inl @@ -15,14 +15,14 @@ void ResidualTake::computeResidual(Vector result, ConstV { assert(result.size() == x.size()); - const int num_omp_threads = Residual::num_omp_threads_; - applySystemOperator(result, x); // Subtract A*x from rhs to get the residual. const int n = result.size(); -#pragma omp parallel for num_threads(num_omp_threads) - for (int i = 0; i < n; i++) { - result[i] = rhs[i] - result[i]; - } + + Kokkos::parallel_for( + "Residual Take: Subtract A*x from rhs", Kokkos::RangePolicy<>(0, n), + KOKKOS_LAMBDA(const int i) { result[i] = rhs[i] - result[i]; }); + + Kokkos::fence(); } diff --git a/include/Residual/residual.h b/include/Residual/residual.h index f1525bd1..0b110ab7 100644 --- a/include/Residual/residual.h +++ b/include/Residual/residual.h @@ -26,10 +26,9 @@ class Residual } virtual ~Residual() = default; + virtual void applySystemOperator(Vector result, ConstVector x) const = 0; virtual void computeResidual(Vector result, ConstVector rhs, ConstVector x) const = 0; - virtual void applySystemOperator(Vector result, ConstVector x) const = 0; - protected: /* ------------------- */ /* Constructor members */