From 8ff0ad7440ac936470b8042b471aaa9b95d81e17 Mon Sep 17 00:00:00 2001
From: Menno <menno.veerman@wur.nl>
Date: Fri, 13 Feb 2026 11:54:31 +0100
Subject: [PATCH 1/5] Modify command line interface to except both ints and
 floats (newly introduced tau_frac_threshold will be used in next commit) and
 simplify command line code

---
 src_test/test_rte_rrtmgp_rt.cu | 54 ++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/src_test/test_rte_rrtmgp_rt.cu b/src_test/test_rte_rrtmgp_rt.cu
index ee9ed017..bd8de66b 100644
--- a/src_test/test_rte_rrtmgp_rt.cu
+++ b/src_test/test_rte_rrtmgp_rt.cu
@@ -131,7 +131,7 @@ void configure_memory_pool(int nlays, int ncols, int nchunks, int ngpts, int nbn
 
 bool parse_command_line_options(
         std::map<std::string, std::pair<bool, std::string>>& command_line_switches,
-        std::map<std::string, std::pair<int, std::string>>& command_line_ints,
+        std::map<std::string, Float>& command_line_numbers,
         int argc, char** argv)
 {
     for (int i=1; i<argc; ++i)
@@ -179,21 +179,21 @@ bool parse_command_line_options(
             command_line_switches.at(argument).first = enable;
         }
 
-        // Check if a is integer is too be expect and if so, supplied
-        if (command_line_ints.find(argument) != command_line_ints.end() && i+1 < argc)
+        // Check if a is number (int of float) and hether that ir is too be expect and if so, supplied
+        if (command_line_numbers.find(argument) != command_line_numbers.end() && i+1 < argc)
         {
-            std::string next_argument(argv[i+1]);
-            boost::trim(next_argument);
-
-            bool arg_is_int = true;
-            for (int j=0; j<next_argument.size(); ++j)
-                arg_is_int *= std::isdigit(next_argument[j]);
-
-            if (arg_is_int)
+            try
             {
-                command_line_ints.at(argument).first = std::stoi(argv[i+1]);
-                ++i;
+                size_t p;
+                Float val = Float(std::stod(argv[i+1], &p));
+                if (p == strlen(argv[i+1]))
+                {
+                    command_line_numbers.at(argument) = Float(std::stod(argv[i+1]));
+                    ++i;
+                }
+
             }
+            catch(...){}
         }
     }
 
@@ -202,15 +202,15 @@ bool parse_command_line_options(
 
 void print_command_line_options(
         const std::map<std::string, std::pair<bool, std::string>>& command_line_switches,
-        const std::map<std::string, std::pair<int, std::string>>& command_line_ints)
+        const std::map<std::string, Float>& command_line_numbers)
 {
     Status::print_message("Solver settings:");
     for (const auto& option : command_line_switches)
     {
         std::ostringstream ss;
         ss << std::left << std::setw(20) << (option.first);
-        if (command_line_ints.find(option.first) != command_line_ints.end() && option.second.first)
-            ss << " = " << std::boolalpha << command_line_ints.at(option.first).first << std::endl;
+        if (command_line_numbers.find(option.first) != command_line_numbers.end() && option.second.first)
+            ss << " = " << std::boolalpha << command_line_numbers.at(option.first) << std::endl;
         else
             ss << " = " << std::boolalpha << option.second.first << std::endl;
         Status::print_message(ss);
@@ -229,7 +229,7 @@ void solve_radiation(int argc, char** argv)
         {"longwave"          , { false, "Enable computation of longwave radiation." }},
         {"fluxes"            , { true,  "Enable computation of fluxes."             }},
         {"two-stream"        , { true, "Run two-stream solver for to obtain 1D fluxes" }},
-        {"raytracing"        , { true,  "Use raytracing for flux computation. '--raytracing 256': use 256 rays per pixel" }},
+        {"raytracing"        , { true,  "Use raytracing for flux computation. '--raytracing 256': use 256 rays per pixel per spectral quadrature point" }},
         {"independent-column", { false, "run raytracer in independent column mode"}},
         {"cloud-optics"      , { false, "Enable cloud optics (both liquid and ice)."}},
         {"liq-cloud-optics"  , { false, "liquid only cloud optics."                 }},
@@ -241,13 +241,15 @@ void solve_radiation(int argc, char** argv)
         {"profiling"         , { false, "Perform additional profiling run."         }},
         {"delta-cloud"       , { false, "delta-scaling of cloud optical properties"   }},
         {"delta-aerosol"     , { false, "delta-scaling of aerosol optical properties"   }},
+        {"tau-frac-threshold", { true,  "Skip ray tracing and use 1D solution above given ratio between highest per-cell gas and cloud optical depths. Default: '--tau-frac-threshold 0.1'" }},
         {"tica"              , { false, "attenuate path when doing an overhead 1D calculation of tilted input"   }}};
 
-    std::map<std::string, std::pair<int, std::string>> command_line_ints {
-        {"raytracing", {32, "Number of rays initialised at TOD per pixel per quadraute."}},
-        {"single-gpt", {1 , "g-point to store optical properties and fluxes of" }}};
+    std::map<std::string, Float> command_line_numbers {
+        {"raytracing", 32},
+        {"single-gpt", 1},
+        {"tau-frac-threshold", 0.1}};
 
-    if (parse_command_line_options(command_line_switches, command_line_ints, argc, argv))
+    if (parse_command_line_options(command_line_switches, command_line_numbers, argc, argv))
         return;
 
     const bool switch_shortwave         = command_line_switches.at("shortwave"         ).first;
@@ -260,6 +262,7 @@ void solve_radiation(int argc, char** argv)
     bool switch_liq_cloud_optics  = command_line_switches.at("liq-cloud-optics"  ).first;
     bool switch_ice_cloud_optics  = command_line_switches.at("ice-cloud-optics"  ).first;
     const bool switch_lw_scattering     = command_line_switches.at("lw-scattering"     ).first;
+    const bool switch_tau_frac_threshold= command_line_switches.at("tau-frac-threshold").first;
     const bool switch_cloud_mie         = command_line_switches.at("cloud-mie"         ).first;
     const bool switch_aerosol_optics    = command_line_switches.at("aerosol-optics"    ).first;
     const bool switch_single_gpt        = command_line_switches.at("single-gpt"        ).first;
@@ -268,8 +271,7 @@ void solve_radiation(int argc, char** argv)
     const bool switch_delta_aerosol     = command_line_switches.at("delta-aerosol"     ).first;
     const bool switch_tica              = command_line_switches.at("tica"     ).first;
 
-    Int photons_per_pixel = Int(command_line_ints.at("raytracing").first);
-
+    Int photons_per_pixel = Int(command_line_numbers.at("raytracing"));
     if (Float(int(std::log2(Float(photons_per_pixel)))) != std::log2(Float(photons_per_pixel)))
     {
         std::string error = "number of photons per pixel should be a power of 2 ";
@@ -303,9 +305,11 @@ void solve_radiation(int argc, char** argv)
     }
 
     // Print the options to the screen.
-    print_command_line_options(command_line_switches, command_line_ints);
+    print_command_line_options(command_line_switches, command_line_numbers);
+
+    int single_gpt = int(command_line_numbers.at("single-gpt"));
 
-    int single_gpt = command_line_ints.at("single-gpt").first;
+    const Float tau_frac_threshold = switch_tau_frac_threshold ? command_line_numbers.at("tau-frac-threshold") : Float(-1.);
 
     Status::print_message("Using "+ std::to_string(photons_per_pixel) + " rays per pixel");
 

From bfbd8c9b2d3d1855d2d5a2cf2b42307a4e050c55 Mon Sep 17 00:00:00 2001
From: Menno <menno.veerman@wur.nl>
Date: Fri, 13 Feb 2026 11:58:48 +0100
Subject: [PATCH 2/5] move zeroing of per g-point fluxes outside of trace_rays

---
 include_test/radiation_solver_rt.h | 1 +
 src_cuda_rt/raytracer_lw.cu        | 6 ------
 src_test/radiation_solver_rt.cu    | 5 +++++
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include_test/radiation_solver_rt.h b/include_test/radiation_solver_rt.h
index 898dcefb..20beede2 100644
--- a/include_test/radiation_solver_rt.h
+++ b/include_test/radiation_solver_rt.h
@@ -54,6 +54,7 @@ class Radiation_solver_longwave
                 const bool switch_lw_scattering,
                 const bool switch_independent_column,
                 const int single_gpt,
+                const Float tau_frac_threshold,
                 const Int ray_count,
                 const Vector<int> grid_cells,
                 const Vector<Float> grid_d,
diff --git a/src_cuda_rt/raytracer_lw.cu b/src_cuda_rt/raytracer_lw.cu
index 7b254f45..1b9dfdae 100644
--- a/src_cuda_rt/raytracer_lw.cu
+++ b/src_cuda_rt/raytracer_lw.cu
@@ -297,12 +297,6 @@ void Raytracer_lw::trace_rays(
     Array_gpu<Float,2> surface_up_count({grid_cells.x, grid_cells.y});
     Array_gpu<Float,3> atmos_count({grid_cells.x, grid_cells.y, grid_cells.z});
 
-    Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, flux_tod_dn.ptr());
-    Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, flux_tod_up.ptr());
-    Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, flux_sfc_dn.ptr());
-    Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, flux_sfc_up.ptr());
-    Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, grid_cells.z, flux_abs.ptr());
-
     // domain sizes
     const Vector<Float> grid_size = grid_d * grid_cells;
 
diff --git a/src_test/radiation_solver_rt.cu b/src_test/radiation_solver_rt.cu
index 4696b1ee..9152d5fb 100644
--- a/src_test/radiation_solver_rt.cu
+++ b/src_test/radiation_solver_rt.cu
@@ -643,6 +643,11 @@ void Radiation_solver_longwave::solve_gpu(
 
             if (switch_raytracing)
             {
+                Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_tod_dn().ptr());
+                Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_tod_up().ptr());
+                Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_sfc_dif().ptr());
+                Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_sfc_up().ptr());
+                Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, grid_cells.z, (*fluxes).get_flux_abs_dif().ptr());
 
                 raytracer_lw.trace_rays(
                         igpt,

From 5188ba36a641b5e9aa5152e0f8e1ef3574772287 Mon Sep 17 00:00:00 2001
From: Menno <menno.veerman@wur.nl>
Date: Fri, 13 Feb 2026 11:59:59 +0100
Subject: [PATCH 3/5] remove Float casts of random numbers for alias table,
 that is fully double precision now

---
 src_kernels_cuda_rt/raytracer_kernels_lw.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src_kernels_cuda_rt/raytracer_kernels_lw.cu b/src_kernels_cuda_rt/raytracer_kernels_lw.cu
index a6ba0731..0c85decd 100644
--- a/src_kernels_cuda_rt/raytracer_kernels_lw.cu
+++ b/src_kernels_cuda_rt/raytracer_kernels_lw.cu
@@ -146,7 +146,7 @@ namespace
             {
                 const int idx = sample_alias_table(
                         alias_prob, alias_idx, alias_n,
-                        Float(rng()), Float(rng()));
+                        alias_rng(), alias_rng());
 
                 const int i = idx % grid_cells.x ;
                 const int j = idx / grid_cells.x ;
@@ -166,7 +166,7 @@ namespace
             {
                 const int idx = sample_alias_table(
                         alias_prob, alias_idx, alias_n,
-                        Float(rng()), Float(rng()));
+                        alias_rng(), alias_rng());
 
                 const int i = idx % grid_cells.x ;
                 const int j = idx / grid_cells.x ;

From c1f7491625b062d67ad588ba610ca2f032bacf97 Mon Sep 17 00:00:00 2001
From: Menno <menno.veerman@wur.nl>
Date: Fri, 13 Feb 2026 12:02:11 +0100
Subject: [PATCH 4/5] compute ratio of highest-per cell gas and cloud optical
 depth for each g-point. If this ratio is above a provided threshold value
 (command line argumen, with a default of 0.1), compute surface and tod fluxes
 and flux absorption rates based on the 1D computations

---
 src_test/radiation_solver_rt.cu | 186 +++++++++++++++++++++++++-------
 src_test/test_rte_rrtmgp_rt.cu  |   1 +
 2 files changed, 150 insertions(+), 37 deletions(-)

diff --git a/src_test/radiation_solver_rt.cu b/src_test/radiation_solver_rt.cu
index 9152d5fb..a1b6c35b 100644
--- a/src_test/radiation_solver_rt.cu
+++ b/src_test/radiation_solver_rt.cu
@@ -34,7 +34,7 @@
 #include "fluxes_rt.h"
 #include "rte_lw_rt.h"
 #include "rte_sw_rt.h"
-
+#include "cub/cub.cuh"
 #include "subset_kernels_cuda.h"
 #include "gas_optics_rrtmgp_kernels_cuda_rt.h"
 #include "gpt_combine_kernels_cuda_rt.h"
@@ -42,28 +42,94 @@
 
 namespace
 {
-        __global__
-        void scale_tau_kernel(Float* tau, const int ncol, const int nlay, Float scale_factor) {
-                const int icol = blockIdx.x*blockDim.x + threadIdx.x;
-                const int ilay = blockIdx.y*blockDim.y + threadIdx.y;
+    __global__
+    void convert_1d_to_rt_hr_kernels(
+        const int ncol,
+        const int nz,
+        const Float dz,
+        const Float* flux_net,
+        Float* flux_rt_abs)
+    {
+        const int icol = blockIdx.x*blockDim.x + threadIdx.x;
+        const int iz = blockIdx.y*blockDim.y + threadIdx.y;
 
-                if ( (icol < ncol) && (ilay < nlay) )
-                {
-                const int idx = icol + ilay*ncol;
-                tau[idx] = tau[idx] * scale_factor;
-                }
+        if ( (icol < ncol) && (iz < nz) )
+        {
+            const int idx = icol + iz*ncol;
+            const int idx_p = icol + (iz+1)*ncol;
+
+            flux_rt_abs[idx] = (flux_net[idx_p] - flux_net[idx])/dz;
         }
+    }
 
-        void scale_tau(Float* tau, const int ncol, const int nlay, Float scale_factor) {
-                const int block_col = 64;
-                const int block_lay = 1;
-                const int grid_col  = ncol/block_col + (ncol%block_col > 0);
-                const int grid_lay  = nlay/block_lay + (nlay%block_lay > 0);
+    __global__
+    void convert_1d_to_rt_flx_kernels(
+        const int ncol,
+        const int iz,
+        const Float* flux_1d_dn,
+        const Float* flux_1d_up,
+        Float* flux_rt_dn,
+        Float* flux_rt_up)
+    {
+        const int icol = blockIdx.x*blockDim.x + threadIdx.x;
 
-                dim3 grid_gpu(grid_col, grid_lay);
-                dim3 block_gpu(block_col, block_lay);
-                scale_tau_kernel<<<grid_gpu, block_gpu>>>(tau, ncol, nlay, scale_factor);
+        if (icol < ncol)
+        {
+            const int idx_in = icol + iz*ncol;
+            flux_rt_dn[icol] = flux_1d_dn[idx_in];
+            flux_rt_up[icol] = flux_1d_up[idx_in];
         }
+    }
+
+    void convert_1d_to_rt_output(
+        const int ncol,
+        const int nlay,
+        const int nz,
+        const Float dz,
+        const Array_gpu<Float,2>& flux_up,
+        const Array_gpu<Float,2>& flux_dn,
+        const Array_gpu<Float,2>& flux_net,
+        Array_gpu<Float,2>& flux_tod_dn,
+        Array_gpu<Float,2>& flux_tod_up,
+        Array_gpu<Float,2>& flux_sfc_dn,
+        Array_gpu<Float,2>& flux_sfc_up,
+        Array_gpu<Float,3>& flux_abs)
+    {
+        const int block_col = 64;
+        const int grid_col = ncol/block_col + (ncol%block_col > 0);
+
+        convert_1d_to_rt_flx_kernels<<<grid_col, block_col>>>(ncol, 0, flux_dn.ptr(), flux_up.ptr(), flux_sfc_dn.ptr(), flux_sfc_up.ptr());
+        convert_1d_to_rt_flx_kernels<<<grid_col, block_col>>>(ncol, nz+1, flux_dn.ptr(), flux_up.ptr(), flux_tod_dn.ptr(), flux_tod_up.ptr());
+
+        const dim3 block_2d(block_col, 1, 1);
+        const dim3 grid_2d(grid_col, nz, 1);
+        convert_1d_to_rt_hr_kernels<<<grid_2d, block_2d>>>(
+            ncol, nz, dz, flux_net.ptr(), flux_abs.ptr());
+    }
+
+
+    __global__
+    void scale_tau_kernel(Float* tau, const int ncol, const int nlay, Float scale_factor) {
+            const int icol = blockIdx.x*blockDim.x + threadIdx.x;
+            const int ilay = blockIdx.y*blockDim.y + threadIdx.y;
+
+            if ( (icol < ncol) && (ilay < nlay) )
+            {
+                const int idx = icol + ilay*ncol;
+                tau[idx] = tau[idx] * scale_factor;
+            }
+    }
+
+    void scale_tau(Float* tau, const int ncol, const int nlay, Float scale_factor) {
+            const int block_col = 64;
+            const int block_lay = 1;
+            const int grid_col  = ncol/block_col + (ncol%block_col > 0);
+            const int grid_lay  = nlay/block_lay + (nlay%block_lay > 0);
+
+            dim3 grid_gpu(grid_col, grid_lay);
+            dim3 block_gpu(block_col, block_lay);
+            scale_tau_kernel<<<grid_gpu, block_gpu>>>(tau, ncol, nlay, scale_factor);
+    }
 
     std::vector<std::string> get_variable_string(
             const std::string& var_name,
@@ -421,6 +487,7 @@ void Radiation_solver_longwave::solve_gpu(
         const bool switch_lw_scattering,
         const bool switch_independent_column,
         const int single_gpt,
+        const Float tau_frac_threshold,
         const Int ray_count,
         const Vector<int> grid_cells,
         const Vector<Float> grid_d,
@@ -534,6 +601,29 @@ void Radiation_solver_longwave::solve_gpu(
         }
 
 
+        // Allocate temporary storage
+        void* d_temp_storage = nullptr;
+        size_t temp_storage_bytes = 0;
+
+        const int max_size = n_col * grid_cells.z;
+
+        Float* max_tau_gas_g = Tools_gpu::allocate_gpu<Float>(1);
+        Float* max_tau_cld_g = Tools_gpu::allocate_gpu<Float>(1);
+        Float max_tau_gas = 0;
+        Float max_tau_cld = 0;
+
+        // Get required temp storage size
+        cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes,
+                               optical_props->get_tau().ptr(), max_tau_gas_g, max_size);
+
+        // Allocate temp storage
+        cudaMalloc(&d_temp_storage, temp_storage_bytes);
+
+        // Compute max
+        cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes,
+                               optical_props->get_tau().ptr(), max_tau_gas_g, max_size);
+
+        cudaMemcpy(&max_tau_gas, max_tau_gas_g, sizeof(Float), cudaMemcpyDeviceToHost);
 
         if (switch_cloud_optics)
         {
@@ -550,6 +640,11 @@ void Radiation_solver_longwave::solve_gpu(
                 // cloud->delta_scale();
 
             }
+            // Compute max
+            cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes,
+                                   cloud_optical_props->get_tau().ptr(), max_tau_cld_g, max_size);
+            cudaMemcpy(&max_tau_cld, max_tau_cld_g, sizeof(Float), cudaMemcpyDeviceToHost);
+
             // Add the cloud optical props to the gas optical properties.
             add_to(
                     dynamic_cast<Optical_props_2str_rt&>(*optical_props),
@@ -649,31 +744,48 @@ void Radiation_solver_longwave::solve_gpu(
                 Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_sfc_up().ptr());
                 Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, grid_cells.z, (*fluxes).get_flux_abs_dif().ptr());
 
-                raytracer_lw.trace_rays(
-                        igpt,
-                        switch_independent_column,
-                        ray_count,
-                        grid_cells,
-                        grid_d,
-                        kn_grid,
-                        dynamic_cast<Optical_props_2str_rt&>(*optical_props).get_tau(),
-                        dynamic_cast<Optical_props_2str_rt&>(*optical_props).get_ssa(),
-                        dynamic_cast<Optical_props_2str_rt&>(*cloud_optical_props).get_tau(),
-                        dynamic_cast<Optical_props_2str_rt&>(*cloud_optical_props).get_ssa(),
-                        dynamic_cast<Optical_props_2str_rt&>(*cloud_optical_props).get_g(),
-                        dynamic_cast<Optical_props_2str_rt&>(*aerosol_optical_props).get_tau(),
-                        dynamic_cast<Optical_props_2str_rt&>(*aerosol_optical_props).get_ssa(),
-                        dynamic_cast<Optical_props_2str_rt&>(*aerosol_optical_props).get_g(),
-                        (*sources).get_lay_source(),
-                        (*sources).get_sfc_source(),
-                        emis_sfc,
-                        (*fluxes).get_flux_dn()({1, grid_cells.z}),
+                if ( ((max_tau_gas / max_tau_cld) < tau_frac_threshold) || (tau_frac_threshold < 0) )
+                {
+                    raytracer_lw.trace_rays(
+                            igpt,
+                            switch_independent_column,
+                            ray_count,
+                            grid_cells,
+                            grid_d,
+                            kn_grid,
+                            dynamic_cast<Optical_props_2str_rt&>(*optical_props).get_tau(),
+                            dynamic_cast<Optical_props_2str_rt&>(*optical_props).get_ssa(),
+                            dynamic_cast<Optical_props_2str_rt&>(*cloud_optical_props).get_tau(),
+                            dynamic_cast<Optical_props_2str_rt&>(*cloud_optical_props).get_ssa(),
+                            dynamic_cast<Optical_props_2str_rt&>(*cloud_optical_props).get_g(),
+                            dynamic_cast<Optical_props_2str_rt&>(*aerosol_optical_props).get_tau(),
+                            dynamic_cast<Optical_props_2str_rt&>(*aerosol_optical_props).get_ssa(),
+                            dynamic_cast<Optical_props_2str_rt&>(*aerosol_optical_props).get_g(),
+                            (*sources).get_lay_source(),
+                            (*sources).get_sfc_source(),
+                            emis_sfc,
+                            (*fluxes).get_flux_dn()({1, grid_cells.z}),
+                            (*fluxes).get_flux_tod_dn(),
+                            (*fluxes).get_flux_tod_up(),
+                            (*fluxes).get_flux_sfc_dif(),
+                            (*fluxes).get_flux_sfc_up(),
+                            (*fluxes).get_flux_abs_dif());
+                }
+                else
+                {
+                    convert_1d_to_rt_output(
+                        n_col, n_lay, grid_cells.z, grid_d.z,
+                        (*fluxes).get_flux_up(),
+                        (*fluxes).get_flux_dn(),
+                        (*fluxes).get_flux_net(),
                         (*fluxes).get_flux_tod_dn(),
                         (*fluxes).get_flux_tod_up(),
                         (*fluxes).get_flux_sfc_dif(),
                         (*fluxes).get_flux_sfc_up(),
                         (*fluxes).get_flux_abs_dif());
 
+                }
+
                 Gpt_combine_kernels_cuda_rt::add_from_gpoint(
                         grid_cells.x, grid_cells.y,
                         rt_flux_tod_dn.ptr(), rt_flux_tod_up.ptr(), rt_flux_sfc_dn.ptr(), rt_flux_sfc_up.ptr(),
diff --git a/src_test/test_rte_rrtmgp_rt.cu b/src_test/test_rte_rrtmgp_rt.cu
index bd8de66b..df0f9c97 100644
--- a/src_test/test_rte_rrtmgp_rt.cu
+++ b/src_test/test_rte_rrtmgp_rt.cu
@@ -731,6 +731,7 @@ void solve_radiation(int argc, char** argv)
                     switch_lw_scattering,
                     switch_independent_column,
                     single_gpt,
+                    tau_frac_threshold,
                     photons_per_pixel,
                     grid_cells,
                     grid_d,

From 42d384a17acf170631f360a2275d9a8fbf0c9bea Mon Sep 17 00:00:00 2001
From: Menno <menno.veerman@wur.nl>
Date: Sun, 15 Feb 2026 13:06:50 +0100
Subject: [PATCH 5/5] instead of the tau_gas/tau_frac ratio, use the ratio
 between the lowest mean free path and the small horizontal grid dimension to
 decide whether to use 3D monte carlo or simply the 1D solution

---
 include_test/radiation_solver_rt.h |  2 +-
 src_test/radiation_solver_rt.cu    | 15 +++++++--------
 src_test/test_rte_rrtmgp_rt.cu     | 10 +++++-----
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/include_test/radiation_solver_rt.h b/include_test/radiation_solver_rt.h
index 20beede2..283d7187 100644
--- a/include_test/radiation_solver_rt.h
+++ b/include_test/radiation_solver_rt.h
@@ -54,7 +54,7 @@ class Radiation_solver_longwave
                 const bool switch_lw_scattering,
                 const bool switch_independent_column,
                 const int single_gpt,
-                const Float tau_frac_threshold,
+                const Float min_mfp_grid_ratio,
                 const Int ray_count,
                 const Vector<int> grid_cells,
                 const Vector<Float> grid_d,
diff --git a/src_test/radiation_solver_rt.cu b/src_test/radiation_solver_rt.cu
index a1b6c35b..fb70dfb2 100644
--- a/src_test/radiation_solver_rt.cu
+++ b/src_test/radiation_solver_rt.cu
@@ -487,7 +487,7 @@ void Radiation_solver_longwave::solve_gpu(
         const bool switch_lw_scattering,
         const bool switch_independent_column,
         const int single_gpt,
-        const Float tau_frac_threshold,
+        const Float min_mfp_grid_ratio,
         const Int ray_count,
         const Vector<int> grid_cells,
         const Vector<Float> grid_d,
@@ -518,6 +518,8 @@ void Radiation_solver_longwave::solve_gpu(
 
     const Bool top_at_1 = p_lay({1, 1}) < p_lay({1, n_lay});
 
+    const Float grid_d_xy_min = min(grid_d.x, grid_d.y);
+
     optical_props = std::make_unique<Optical_props_2str_rt>(n_col, n_lay, *kdist_gpu);
     cloud_optical_props = std::make_unique<Optical_props_2str_rt>(n_col, n_lay, *cloud_optics_gpu);
     aerosol_optical_props = std::make_unique<Optical_props_2str_rt>(n_col, n_lay, *aerosol_optics_gpu);
@@ -608,9 +610,7 @@ void Radiation_solver_longwave::solve_gpu(
         const int max_size = n_col * grid_cells.z;
 
         Float* max_tau_gas_g = Tools_gpu::allocate_gpu<Float>(1);
-        Float* max_tau_cld_g = Tools_gpu::allocate_gpu<Float>(1);
         Float max_tau_gas = 0;
-        Float max_tau_cld = 0;
 
         // Get required temp storage size
         cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes,
@@ -625,6 +625,8 @@ void Radiation_solver_longwave::solve_gpu(
 
         cudaMemcpy(&max_tau_gas, max_tau_gas_g, sizeof(Float), cudaMemcpyDeviceToHost);
 
+        const Float lowest_gas_mean_free_path = grid_d.z / max_tau_gas;
+
         if (switch_cloud_optics)
         {
             if (band > previous_band)
@@ -640,10 +642,6 @@ void Radiation_solver_longwave::solve_gpu(
                 // cloud->delta_scale();
 
             }
-            // Compute max
-            cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes,
-                                   cloud_optical_props->get_tau().ptr(), max_tau_cld_g, max_size);
-            cudaMemcpy(&max_tau_cld, max_tau_cld_g, sizeof(Float), cudaMemcpyDeviceToHost);
 
             // Add the cloud optical props to the gas optical properties.
             add_to(
@@ -744,7 +742,7 @@ void Radiation_solver_longwave::solve_gpu(
                 Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_sfc_up().ptr());
                 Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, grid_cells.z, (*fluxes).get_flux_abs_dif().ptr());
 
-                if ( ((max_tau_gas / max_tau_cld) < tau_frac_threshold) || (tau_frac_threshold < 0) )
+                if ( (lowest_gas_mean_free_path / grid_d_xy_min) > min_mfp_grid_ratio)
                 {
                     raytracer_lw.trace_rays(
                             igpt,
@@ -796,6 +794,7 @@ void Radiation_solver_longwave::solve_gpu(
             }
 
         }
+        Tools_gpu::free_gpu<Float>(max_tau_gas_g);
     }
 }
 
diff --git a/src_test/test_rte_rrtmgp_rt.cu b/src_test/test_rte_rrtmgp_rt.cu
index df0f9c97..4e5880d9 100644
--- a/src_test/test_rte_rrtmgp_rt.cu
+++ b/src_test/test_rte_rrtmgp_rt.cu
@@ -241,13 +241,13 @@ void solve_radiation(int argc, char** argv)
         {"profiling"         , { false, "Perform additional profiling run."         }},
         {"delta-cloud"       , { false, "delta-scaling of cloud optical properties"   }},
         {"delta-aerosol"     , { false, "delta-scaling of aerosol optical properties"   }},
-        {"tau-frac-threshold", { true,  "Skip ray tracing and use 1D solution above given ratio between highest per-cell gas and cloud optical depths. Default: '--tau-frac-threshold 0.1'" }},
+        {"min-mfp-grid-ratio", { true,  "Lowest ratio between the shortest gasous mean free path and the smallest grid dimension at which we still do ray tracing. Below this ratio, the 1D solution is used. '--min-mfp-grid-ratio 1'" }},
         {"tica"              , { false, "attenuate path when doing an overhead 1D calculation of tilted input"   }}};
 
     std::map<std::string, Float> command_line_numbers {
         {"raytracing", 32},
         {"single-gpt", 1},
-        {"tau-frac-threshold", 0.1}};
+        {"min-mfp-grid-ratio", 1}};
 
     if (parse_command_line_options(command_line_switches, command_line_numbers, argc, argv))
         return;
@@ -262,7 +262,7 @@ void solve_radiation(int argc, char** argv)
     bool switch_liq_cloud_optics  = command_line_switches.at("liq-cloud-optics"  ).first;
     bool switch_ice_cloud_optics  = command_line_switches.at("ice-cloud-optics"  ).first;
     const bool switch_lw_scattering     = command_line_switches.at("lw-scattering"     ).first;
-    const bool switch_tau_frac_threshold= command_line_switches.at("tau-frac-threshold").first;
+    const bool switch_min_mfp_grid_ratio= command_line_switches.at("min-mfp-grid-ratio").first;
     const bool switch_cloud_mie         = command_line_switches.at("cloud-mie"         ).first;
     const bool switch_aerosol_optics    = command_line_switches.at("aerosol-optics"    ).first;
     const bool switch_single_gpt        = command_line_switches.at("single-gpt"        ).first;
@@ -309,7 +309,7 @@ void solve_radiation(int argc, char** argv)
 
     int single_gpt = int(command_line_numbers.at("single-gpt"));
 
-    const Float tau_frac_threshold = switch_tau_frac_threshold ? command_line_numbers.at("tau-frac-threshold") : Float(-1.);
+    const Float min_mfp_grid_ratio = switch_min_mfp_grid_ratio ? command_line_numbers.at("min-mfp-grid-ratio") : Float(0.);
 
     Status::print_message("Using "+ std::to_string(photons_per_pixel) + " rays per pixel");
 
@@ -731,7 +731,7 @@ void solve_radiation(int argc, char** argv)
                     switch_lw_scattering,
                     switch_independent_column,
                     single_gpt,
-                    tau_frac_threshold,
+                    min_mfp_grid_ratio,
                     photons_per_pixel,
                     grid_cells,
                     grid_d,