format

marchdf · marchdf · commit 16aac777a191 · 2025-10-20T15:37:39.000-06:00
diff --git a/amr-wind/wind_energy/ABLStats.cpp b/amr-wind/wind_energy/ABLStats.cpp
@@ -239,64 +239,18 @@ void ABLStats::compute_zi()
                      .create_scratch_field(3, m_temperature.num_grow()[0]);
     fvm::gradient(*gradT, m_temperature);
 
-//     // Only compute zi using coarsest level
-//     BL_PROFILE_VAR("amr-wind::ABLStats::compute_zi_a", ab);
+    // Only compute zi using coarsest level
     const int lev = 0;
     const int dir = m_normal_dir;
     const auto& geom = (this->m_sim.repo()).mesh().Geom(lev);
     auto const& domain_box = geom.Domain();
-//     const auto& gradT_arrs = (*gradT)(lev).const_arrays();
-//     auto device_tg_fab = amrex::ReduceToPlane<
-//         amrex::ReduceOpMax, amrex::KeyValuePair<amrex::Real, int>>(
-//         dir, domain_box, m_temperature(lev),
-//         [=] AMREX_GPU_DEVICE(int nbx, int i, int j, int k)
-//             -> amrex::KeyValuePair<amrex::Real, int> {
-//             const amrex::IntVect iv(i, j, k);
-//             return {gradT_arrs[nbx](i, j, k, dir), iv[dir]};
-//         });
-//     BL_PROFILE_VAR_STOP(ab);
-
-//     BL_PROFILE_VAR("amr-wind::ABLStats::compute_zi_b", bb);
-// #ifdef AMREX_USE_GPU
-//     amrex::BaseFab<amrex::KeyValuePair<amrex::Real, int>> pinned_tg_fab(
-//         device_tg_fab.box(), device_tg_fab.nComp(), amrex::The_Pinned_Arena());
-//     amrex::Gpu::dtoh_memcpy(
-//         pinned_tg_fab.dataPtr(), device_tg_fab.dataPtr(),
-//         pinned_tg_fab.nBytes());
-// #else
-//     auto& pinned_tg_fab = device_tg_fab;
-// #endif
-//     BL_PROFILE_VAR_STOP(bb);
-
-//     BL_PROFILE_VAR("amr-wind::ABLStats::compute_zi_barrier", barrier);
-//     amrex::ParallelDescriptor::Barrier();
-//     BL_PROFILE_VAR_STOP(barrier);
-        
-//     BL_PROFILE_VAR("amr-wind::ABLStats::compute_zi_c", cb);
-//     amrex::ParallelReduce::Max(
-//         pinned_tg_fab.dataPtr(), static_cast<int>(pinned_tg_fab.size()),
-//         amrex::ParallelDescriptor::IOProcessorNumber(),
-//         amrex::ParallelDescriptor::Communicator());
-//     BL_PROFILE_VAR_STOP(cb);
-
-//     BL_PROFILE_VAR("amr-wind::ABLStats::compute_zi_d", db);
-//     if (amrex::ParallelDescriptor::IOProcessor()) {
-//         const auto dnval = m_dn;
-//         auto* p = pinned_tg_fab.dataPtr();
-//         m_zi = amrex::Reduce::Sum<amrex::Real>(
-//             pinned_tg_fab.size(),
-//             [=] AMREX_GPU_DEVICE(int i) noexcept -> amrex::Real {
-//                 return (p[i].second() + 0.5) * dnval;
-//             },
-//             0.0);
-//         m_zi /= static_cast<amrex::Real>(pinned_tg_fab.size());
-//     }
-//     BL_PROFILE_VAR_STOP(db);
-
-    AMREX_ALWAYS_ASSERT(domain_box.smallEnd() == 0); // We could relax this if necessary.
-    amrex::Array<bool,AMREX_SPACEDIM> decomp{AMREX_D_DECL(true,true,true)};
+
+    AMREX_ALWAYS_ASSERT(
+        domain_box.smallEnd() == 0); // We could relax this if necessary.
+    amrex::Array<bool, AMREX_SPACEDIM> decomp{AMREX_D_DECL(true, true, true)};
     decomp[dir] = false; // no domain decompose in the dir direction.
-    auto new_ba = amrex::decompose(domain_box, amrex::ParallelDescriptor::NProcs(), decomp);
+    auto new_ba = amrex::decompose(
+        domain_box, amrex::ParallelDescriptor::NProcs(), decomp);
 
     amrex::Vector<int> pmap(new_ba.size());
     std::iota(pmap.begin(), pmap.end(), 0);
@@ -310,7 +264,8 @@ void ABLStats::compute_zi()
     if (myproc < new_mf.size()) {
         auto const& a = new_mf.const_array(myproc);
         amrex::Box box2d = amrex::makeSlab(amrex::Box(a), dir, 0);
-        AMREX_ALWAYS_ASSERT(dir == 2); // xxxxx TODO: we can support other directions later
+        AMREX_ALWAYS_ASSERT(
+            dir == 2); // xxxxx TODO: we can support other directions later
         // xxxxx TODO: sycl can be supported in the future.
         // xxxxx TODO: we can support CPU later.
         const int nblocks = box2d.numPts();
@@ -321,35 +276,40 @@ void ABLStats::compute_zi()
         const int loy = box2d.smallEnd(1);
         amrex::Gpu::DeviceVector<int> tmp(nblocks);
         auto* ptmp = tmp.data();
-        amrex::launch<nthreads>(nblocks, amrex::Gpu::gpuStream(),
-                                [=] AMREX_GPU_DEVICE()
-        {
-            const int j = int(blockIdx.x) /   lenx + loy;
-            const int i = int(blockIdx.x) - j*lenx +lox;
-            amrex::KeyValuePair<amrex::Real,int> r{std::numeric_limits<amrex::Real>::lowest(),0};
-            for (int k = threadIdx.x; k < lenz; k += nthreads) {
-              if (a(i,j,k) > r.first()) { r.second() = k; r.first() = a(i,j,k);}
-            }
-            r = amrex::Gpu::blockReduceMax<nthreads>(r);
-            if (threadIdx.x == 0) {
-                ptmp[blockIdx.x] = r.second();
-            }
-        });
+        amrex::launch<nthreads>(
+            nblocks, amrex::Gpu::gpuStream(), [=] AMREX_GPU_DEVICE() {
+                const int j = int(blockIdx.x) / lenx + loy;
+                const int i = int(blockIdx.x) - j * lenx + lox;
+                amrex::KeyValuePair<amrex::Real, int> r{
+                    std::numeric_limits<amrex::Real>::lowest(), 0};
+                for (int k = threadIdx.x; k < lenz; k += nthreads) {
+                    if (a(i, j, k) > r.first()) {
+                        r.second() = k;
+                        r.first() = a(i, j, k);
+                    }
+                }
+                r = amrex::Gpu::blockReduceMax<nthreads>(r);
+                if (threadIdx.x == 0) {
+                    ptmp[blockIdx.x] = r.second();
+                }
+            });
 
         const auto dnval = m_dn;
-        zi_sum = amrex::Reduce::Sum<amrex::Real>
-            (nblocks, [=] AMREX_GPU_DEVICE (int iblock)
-                {
-                    return (ptmp[iblock] + amrex::Real(0.5)) * dnval;
-                });
+        zi_sum = amrex::Reduce::Sum<amrex::Real>(
+            nblocks, [=] AMREX_GPU_DEVICE(int iblock) {
+                return (ptmp[iblock] + amrex::Real(0.5)) * dnval;
+            });
     }
 
-    amrex::ParallelReduce::Sum(zi_sum, amrex::ParallelDescriptor::IOProcessorNumber(),
-                               amrex::ParallelDescriptor::Communicator());
+    amrex::ParallelReduce::Sum(
+        zi_sum, amrex::ParallelDescriptor::IOProcessorNumber(),
+        amrex::ParallelDescriptor::Communicator());
 
     amrex::Long npts = 1;
     for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-        if (idim != dir) { npts *= domain_box.length(idim); }
+        if (idim != dir) {
+            npts *= domain_box.length(idim);
+        }
     }
     m_zi = zi_sum / static_cast<amrex::Real>(npts);
 }