From 8ff0ad7440ac936470b8042b471aaa9b95d81e17 Mon Sep 17 00:00:00 2001 From: Menno Date: Fri, 13 Feb 2026 11:54:31 +0100 Subject: [PATCH 1/5] Modify command line interface to except both ints and floats (newly introduced tau_frac_threshold will be used in next commit) and simplify command line code --- src_test/test_rte_rrtmgp_rt.cu | 54 ++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/src_test/test_rte_rrtmgp_rt.cu b/src_test/test_rte_rrtmgp_rt.cu index ee9ed017..bd8de66b 100644 --- a/src_test/test_rte_rrtmgp_rt.cu +++ b/src_test/test_rte_rrtmgp_rt.cu @@ -131,7 +131,7 @@ void configure_memory_pool(int nlays, int ncols, int nchunks, int ngpts, int nbn bool parse_command_line_options( std::map>& command_line_switches, - std::map>& command_line_ints, + std::map& command_line_numbers, int argc, char** argv) { for (int i=1; i>& command_line_switches, - const std::map>& command_line_ints) + const std::map& command_line_numbers) { Status::print_message("Solver settings:"); for (const auto& option : command_line_switches) { std::ostringstream ss; ss << std::left << std::setw(20) << (option.first); - if (command_line_ints.find(option.first) != command_line_ints.end() && option.second.first) - ss << " = " << std::boolalpha << command_line_ints.at(option.first).first << std::endl; + if (command_line_numbers.find(option.first) != command_line_numbers.end() && option.second.first) + ss << " = " << std::boolalpha << command_line_numbers.at(option.first) << std::endl; else ss << " = " << std::boolalpha << option.second.first << std::endl; Status::print_message(ss); @@ -229,7 +229,7 @@ void solve_radiation(int argc, char** argv) {"longwave" , { false, "Enable computation of longwave radiation." }}, {"fluxes" , { true, "Enable computation of fluxes." }}, {"two-stream" , { true, "Run two-stream solver for to obtain 1D fluxes" }}, - {"raytracing" , { true, "Use raytracing for flux computation. '--raytracing 256': use 256 rays per pixel" }}, + {"raytracing" , { true, "Use raytracing for flux computation. '--raytracing 256': use 256 rays per pixel per spectral quadrature point" }}, {"independent-column", { false, "run raytracer in independent column mode"}}, {"cloud-optics" , { false, "Enable cloud optics (both liquid and ice)."}}, {"liq-cloud-optics" , { false, "liquid only cloud optics." }}, @@ -241,13 +241,15 @@ void solve_radiation(int argc, char** argv) {"profiling" , { false, "Perform additional profiling run." }}, {"delta-cloud" , { false, "delta-scaling of cloud optical properties" }}, {"delta-aerosol" , { false, "delta-scaling of aerosol optical properties" }}, + {"tau-frac-threshold", { true, "Skip ray tracing and use 1D solution above given ratio between highest per-cell gas and cloud optical depths. Default: '--tau-frac-threshold 0.1'" }}, {"tica" , { false, "attenuate path when doing an overhead 1D calculation of tilted input" }}}; - std::map> command_line_ints { - {"raytracing", {32, "Number of rays initialised at TOD per pixel per quadraute."}}, - {"single-gpt", {1 , "g-point to store optical properties and fluxes of" }}}; + std::map command_line_numbers { + {"raytracing", 32}, + {"single-gpt", 1}, + {"tau-frac-threshold", 0.1}}; - if (parse_command_line_options(command_line_switches, command_line_ints, argc, argv)) + if (parse_command_line_options(command_line_switches, command_line_numbers, argc, argv)) return; const bool switch_shortwave = command_line_switches.at("shortwave" ).first; @@ -260,6 +262,7 @@ void solve_radiation(int argc, char** argv) bool switch_liq_cloud_optics = command_line_switches.at("liq-cloud-optics" ).first; bool switch_ice_cloud_optics = command_line_switches.at("ice-cloud-optics" ).first; const bool switch_lw_scattering = command_line_switches.at("lw-scattering" ).first; + const bool switch_tau_frac_threshold= command_line_switches.at("tau-frac-threshold").first; const bool switch_cloud_mie = command_line_switches.at("cloud-mie" ).first; const bool switch_aerosol_optics = command_line_switches.at("aerosol-optics" ).first; const bool switch_single_gpt = command_line_switches.at("single-gpt" ).first; @@ -268,8 +271,7 @@ void solve_radiation(int argc, char** argv) const bool switch_delta_aerosol = command_line_switches.at("delta-aerosol" ).first; const bool switch_tica = command_line_switches.at("tica" ).first; - Int photons_per_pixel = Int(command_line_ints.at("raytracing").first); - + Int photons_per_pixel = Int(command_line_numbers.at("raytracing")); if (Float(int(std::log2(Float(photons_per_pixel)))) != std::log2(Float(photons_per_pixel))) { std::string error = "number of photons per pixel should be a power of 2 "; @@ -303,9 +305,11 @@ void solve_radiation(int argc, char** argv) } // Print the options to the screen. - print_command_line_options(command_line_switches, command_line_ints); + print_command_line_options(command_line_switches, command_line_numbers); + + int single_gpt = int(command_line_numbers.at("single-gpt")); - int single_gpt = command_line_ints.at("single-gpt").first; + const Float tau_frac_threshold = switch_tau_frac_threshold ? command_line_numbers.at("tau-frac-threshold") : Float(-1.); Status::print_message("Using "+ std::to_string(photons_per_pixel) + " rays per pixel"); From bfbd8c9b2d3d1855d2d5a2cf2b42307a4e050c55 Mon Sep 17 00:00:00 2001 From: Menno Date: Fri, 13 Feb 2026 11:58:48 +0100 Subject: [PATCH 2/5] move zeroing of per g-point fluxes outside of trace_rays --- include_test/radiation_solver_rt.h | 1 + src_cuda_rt/raytracer_lw.cu | 6 ------ src_test/radiation_solver_rt.cu | 5 +++++ 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include_test/radiation_solver_rt.h b/include_test/radiation_solver_rt.h index 898dcefb..20beede2 100644 --- a/include_test/radiation_solver_rt.h +++ b/include_test/radiation_solver_rt.h @@ -54,6 +54,7 @@ class Radiation_solver_longwave const bool switch_lw_scattering, const bool switch_independent_column, const int single_gpt, + const Float tau_frac_threshold, const Int ray_count, const Vector grid_cells, const Vector grid_d, diff --git a/src_cuda_rt/raytracer_lw.cu b/src_cuda_rt/raytracer_lw.cu index 7b254f45..1b9dfdae 100644 --- a/src_cuda_rt/raytracer_lw.cu +++ b/src_cuda_rt/raytracer_lw.cu @@ -297,12 +297,6 @@ void Raytracer_lw::trace_rays( Array_gpu surface_up_count({grid_cells.x, grid_cells.y}); Array_gpu atmos_count({grid_cells.x, grid_cells.y, grid_cells.z}); - Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, flux_tod_dn.ptr()); - Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, flux_tod_up.ptr()); - Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, flux_sfc_dn.ptr()); - Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, flux_sfc_up.ptr()); - Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, grid_cells.z, flux_abs.ptr()); - // domain sizes const Vector grid_size = grid_d * grid_cells; diff --git a/src_test/radiation_solver_rt.cu b/src_test/radiation_solver_rt.cu index 4696b1ee..9152d5fb 100644 --- a/src_test/radiation_solver_rt.cu +++ b/src_test/radiation_solver_rt.cu @@ -643,6 +643,11 @@ void Radiation_solver_longwave::solve_gpu( if (switch_raytracing) { + Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_tod_dn().ptr()); + Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_tod_up().ptr()); + Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_sfc_dif().ptr()); + Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_sfc_up().ptr()); + Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, grid_cells.z, (*fluxes).get_flux_abs_dif().ptr()); raytracer_lw.trace_rays( igpt, From 5188ba36a641b5e9aa5152e0f8e1ef3574772287 Mon Sep 17 00:00:00 2001 From: Menno Date: Fri, 13 Feb 2026 11:59:59 +0100 Subject: [PATCH 3/5] remove Float casts of random numbers for alias table, that is fully double precision now --- src_kernels_cuda_rt/raytracer_kernels_lw.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src_kernels_cuda_rt/raytracer_kernels_lw.cu b/src_kernels_cuda_rt/raytracer_kernels_lw.cu index a6ba0731..0c85decd 100644 --- a/src_kernels_cuda_rt/raytracer_kernels_lw.cu +++ b/src_kernels_cuda_rt/raytracer_kernels_lw.cu @@ -146,7 +146,7 @@ namespace { const int idx = sample_alias_table( alias_prob, alias_idx, alias_n, - Float(rng()), Float(rng())); + alias_rng(), alias_rng()); const int i = idx % grid_cells.x ; const int j = idx / grid_cells.x ; @@ -166,7 +166,7 @@ namespace { const int idx = sample_alias_table( alias_prob, alias_idx, alias_n, - Float(rng()), Float(rng())); + alias_rng(), alias_rng()); const int i = idx % grid_cells.x ; const int j = idx / grid_cells.x ; From c1f7491625b062d67ad588ba610ca2f032bacf97 Mon Sep 17 00:00:00 2001 From: Menno Date: Fri, 13 Feb 2026 12:02:11 +0100 Subject: [PATCH 4/5] compute ratio of highest-per cell gas and cloud optical depth for each g-point. If this ratio is above a provided threshold value (command line argumen, with a default of 0.1), compute surface and tod fluxes and flux absorption rates based on the 1D computations --- src_test/radiation_solver_rt.cu | 186 +++++++++++++++++++++++++------- src_test/test_rte_rrtmgp_rt.cu | 1 + 2 files changed, 150 insertions(+), 37 deletions(-) diff --git a/src_test/radiation_solver_rt.cu b/src_test/radiation_solver_rt.cu index 9152d5fb..a1b6c35b 100644 --- a/src_test/radiation_solver_rt.cu +++ b/src_test/radiation_solver_rt.cu @@ -34,7 +34,7 @@ #include "fluxes_rt.h" #include "rte_lw_rt.h" #include "rte_sw_rt.h" - +#include "cub/cub.cuh" #include "subset_kernels_cuda.h" #include "gas_optics_rrtmgp_kernels_cuda_rt.h" #include "gpt_combine_kernels_cuda_rt.h" @@ -42,28 +42,94 @@ namespace { - __global__ - void scale_tau_kernel(Float* tau, const int ncol, const int nlay, Float scale_factor) { - const int icol = blockIdx.x*blockDim.x + threadIdx.x; - const int ilay = blockIdx.y*blockDim.y + threadIdx.y; + __global__ + void convert_1d_to_rt_hr_kernels( + const int ncol, + const int nz, + const Float dz, + const Float* flux_net, + Float* flux_rt_abs) + { + const int icol = blockIdx.x*blockDim.x + threadIdx.x; + const int iz = blockIdx.y*blockDim.y + threadIdx.y; - if ( (icol < ncol) && (ilay < nlay) ) - { - const int idx = icol + ilay*ncol; - tau[idx] = tau[idx] * scale_factor; - } + if ( (icol < ncol) && (iz < nz) ) + { + const int idx = icol + iz*ncol; + const int idx_p = icol + (iz+1)*ncol; + + flux_rt_abs[idx] = (flux_net[idx_p] - flux_net[idx])/dz; } + } - void scale_tau(Float* tau, const int ncol, const int nlay, Float scale_factor) { - const int block_col = 64; - const int block_lay = 1; - const int grid_col = ncol/block_col + (ncol%block_col > 0); - const int grid_lay = nlay/block_lay + (nlay%block_lay > 0); + __global__ + void convert_1d_to_rt_flx_kernels( + const int ncol, + const int iz, + const Float* flux_1d_dn, + const Float* flux_1d_up, + Float* flux_rt_dn, + Float* flux_rt_up) + { + const int icol = blockIdx.x*blockDim.x + threadIdx.x; - dim3 grid_gpu(grid_col, grid_lay); - dim3 block_gpu(block_col, block_lay); - scale_tau_kernel<<>>(tau, ncol, nlay, scale_factor); + if (icol < ncol) + { + const int idx_in = icol + iz*ncol; + flux_rt_dn[icol] = flux_1d_dn[idx_in]; + flux_rt_up[icol] = flux_1d_up[idx_in]; } + } + + void convert_1d_to_rt_output( + const int ncol, + const int nlay, + const int nz, + const Float dz, + const Array_gpu& flux_up, + const Array_gpu& flux_dn, + const Array_gpu& flux_net, + Array_gpu& flux_tod_dn, + Array_gpu& flux_tod_up, + Array_gpu& flux_sfc_dn, + Array_gpu& flux_sfc_up, + Array_gpu& flux_abs) + { + const int block_col = 64; + const int grid_col = ncol/block_col + (ncol%block_col > 0); + + convert_1d_to_rt_flx_kernels<<>>(ncol, 0, flux_dn.ptr(), flux_up.ptr(), flux_sfc_dn.ptr(), flux_sfc_up.ptr()); + convert_1d_to_rt_flx_kernels<<>>(ncol, nz+1, flux_dn.ptr(), flux_up.ptr(), flux_tod_dn.ptr(), flux_tod_up.ptr()); + + const dim3 block_2d(block_col, 1, 1); + const dim3 grid_2d(grid_col, nz, 1); + convert_1d_to_rt_hr_kernels<<>>( + ncol, nz, dz, flux_net.ptr(), flux_abs.ptr()); + } + + + __global__ + void scale_tau_kernel(Float* tau, const int ncol, const int nlay, Float scale_factor) { + const int icol = blockIdx.x*blockDim.x + threadIdx.x; + const int ilay = blockIdx.y*blockDim.y + threadIdx.y; + + if ( (icol < ncol) && (ilay < nlay) ) + { + const int idx = icol + ilay*ncol; + tau[idx] = tau[idx] * scale_factor; + } + } + + void scale_tau(Float* tau, const int ncol, const int nlay, Float scale_factor) { + const int block_col = 64; + const int block_lay = 1; + const int grid_col = ncol/block_col + (ncol%block_col > 0); + const int grid_lay = nlay/block_lay + (nlay%block_lay > 0); + + dim3 grid_gpu(grid_col, grid_lay); + dim3 block_gpu(block_col, block_lay); + scale_tau_kernel<<>>(tau, ncol, nlay, scale_factor); + } std::vector get_variable_string( const std::string& var_name, @@ -421,6 +487,7 @@ void Radiation_solver_longwave::solve_gpu( const bool switch_lw_scattering, const bool switch_independent_column, const int single_gpt, + const Float tau_frac_threshold, const Int ray_count, const Vector grid_cells, const Vector grid_d, @@ -534,6 +601,29 @@ void Radiation_solver_longwave::solve_gpu( } + // Allocate temporary storage + void* d_temp_storage = nullptr; + size_t temp_storage_bytes = 0; + + const int max_size = n_col * grid_cells.z; + + Float* max_tau_gas_g = Tools_gpu::allocate_gpu(1); + Float* max_tau_cld_g = Tools_gpu::allocate_gpu(1); + Float max_tau_gas = 0; + Float max_tau_cld = 0; + + // Get required temp storage size + cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, + optical_props->get_tau().ptr(), max_tau_gas_g, max_size); + + // Allocate temp storage + cudaMalloc(&d_temp_storage, temp_storage_bytes); + + // Compute max + cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, + optical_props->get_tau().ptr(), max_tau_gas_g, max_size); + + cudaMemcpy(&max_tau_gas, max_tau_gas_g, sizeof(Float), cudaMemcpyDeviceToHost); if (switch_cloud_optics) { @@ -550,6 +640,11 @@ void Radiation_solver_longwave::solve_gpu( // cloud->delta_scale(); } + // Compute max + cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, + cloud_optical_props->get_tau().ptr(), max_tau_cld_g, max_size); + cudaMemcpy(&max_tau_cld, max_tau_cld_g, sizeof(Float), cudaMemcpyDeviceToHost); + // Add the cloud optical props to the gas optical properties. add_to( dynamic_cast(*optical_props), @@ -649,31 +744,48 @@ void Radiation_solver_longwave::solve_gpu( Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_sfc_up().ptr()); Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, grid_cells.z, (*fluxes).get_flux_abs_dif().ptr()); - raytracer_lw.trace_rays( - igpt, - switch_independent_column, - ray_count, - grid_cells, - grid_d, - kn_grid, - dynamic_cast(*optical_props).get_tau(), - dynamic_cast(*optical_props).get_ssa(), - dynamic_cast(*cloud_optical_props).get_tau(), - dynamic_cast(*cloud_optical_props).get_ssa(), - dynamic_cast(*cloud_optical_props).get_g(), - dynamic_cast(*aerosol_optical_props).get_tau(), - dynamic_cast(*aerosol_optical_props).get_ssa(), - dynamic_cast(*aerosol_optical_props).get_g(), - (*sources).get_lay_source(), - (*sources).get_sfc_source(), - emis_sfc, - (*fluxes).get_flux_dn()({1, grid_cells.z}), + if ( ((max_tau_gas / max_tau_cld) < tau_frac_threshold) || (tau_frac_threshold < 0) ) + { + raytracer_lw.trace_rays( + igpt, + switch_independent_column, + ray_count, + grid_cells, + grid_d, + kn_grid, + dynamic_cast(*optical_props).get_tau(), + dynamic_cast(*optical_props).get_ssa(), + dynamic_cast(*cloud_optical_props).get_tau(), + dynamic_cast(*cloud_optical_props).get_ssa(), + dynamic_cast(*cloud_optical_props).get_g(), + dynamic_cast(*aerosol_optical_props).get_tau(), + dynamic_cast(*aerosol_optical_props).get_ssa(), + dynamic_cast(*aerosol_optical_props).get_g(), + (*sources).get_lay_source(), + (*sources).get_sfc_source(), + emis_sfc, + (*fluxes).get_flux_dn()({1, grid_cells.z}), + (*fluxes).get_flux_tod_dn(), + (*fluxes).get_flux_tod_up(), + (*fluxes).get_flux_sfc_dif(), + (*fluxes).get_flux_sfc_up(), + (*fluxes).get_flux_abs_dif()); + } + else + { + convert_1d_to_rt_output( + n_col, n_lay, grid_cells.z, grid_d.z, + (*fluxes).get_flux_up(), + (*fluxes).get_flux_dn(), + (*fluxes).get_flux_net(), (*fluxes).get_flux_tod_dn(), (*fluxes).get_flux_tod_up(), (*fluxes).get_flux_sfc_dif(), (*fluxes).get_flux_sfc_up(), (*fluxes).get_flux_abs_dif()); + } + Gpt_combine_kernels_cuda_rt::add_from_gpoint( grid_cells.x, grid_cells.y, rt_flux_tod_dn.ptr(), rt_flux_tod_up.ptr(), rt_flux_sfc_dn.ptr(), rt_flux_sfc_up.ptr(), diff --git a/src_test/test_rte_rrtmgp_rt.cu b/src_test/test_rte_rrtmgp_rt.cu index bd8de66b..df0f9c97 100644 --- a/src_test/test_rte_rrtmgp_rt.cu +++ b/src_test/test_rte_rrtmgp_rt.cu @@ -731,6 +731,7 @@ void solve_radiation(int argc, char** argv) switch_lw_scattering, switch_independent_column, single_gpt, + tau_frac_threshold, photons_per_pixel, grid_cells, grid_d, From 42d384a17acf170631f360a2275d9a8fbf0c9bea Mon Sep 17 00:00:00 2001 From: Menno Date: Sun, 15 Feb 2026 13:06:50 +0100 Subject: [PATCH 5/5] instead of the tau_gas/tau_frac ratio, use the ratio between the lowest mean free path and the small horizontal grid dimension to decide whether to use 3D monte carlo or simply the 1D solution --- include_test/radiation_solver_rt.h | 2 +- src_test/radiation_solver_rt.cu | 15 +++++++-------- src_test/test_rte_rrtmgp_rt.cu | 10 +++++----- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/include_test/radiation_solver_rt.h b/include_test/radiation_solver_rt.h index 20beede2..283d7187 100644 --- a/include_test/radiation_solver_rt.h +++ b/include_test/radiation_solver_rt.h @@ -54,7 +54,7 @@ class Radiation_solver_longwave const bool switch_lw_scattering, const bool switch_independent_column, const int single_gpt, - const Float tau_frac_threshold, + const Float min_mfp_grid_ratio, const Int ray_count, const Vector grid_cells, const Vector grid_d, diff --git a/src_test/radiation_solver_rt.cu b/src_test/radiation_solver_rt.cu index a1b6c35b..fb70dfb2 100644 --- a/src_test/radiation_solver_rt.cu +++ b/src_test/radiation_solver_rt.cu @@ -487,7 +487,7 @@ void Radiation_solver_longwave::solve_gpu( const bool switch_lw_scattering, const bool switch_independent_column, const int single_gpt, - const Float tau_frac_threshold, + const Float min_mfp_grid_ratio, const Int ray_count, const Vector grid_cells, const Vector grid_d, @@ -518,6 +518,8 @@ void Radiation_solver_longwave::solve_gpu( const Bool top_at_1 = p_lay({1, 1}) < p_lay({1, n_lay}); + const Float grid_d_xy_min = min(grid_d.x, grid_d.y); + optical_props = std::make_unique(n_col, n_lay, *kdist_gpu); cloud_optical_props = std::make_unique(n_col, n_lay, *cloud_optics_gpu); aerosol_optical_props = std::make_unique(n_col, n_lay, *aerosol_optics_gpu); @@ -608,9 +610,7 @@ void Radiation_solver_longwave::solve_gpu( const int max_size = n_col * grid_cells.z; Float* max_tau_gas_g = Tools_gpu::allocate_gpu(1); - Float* max_tau_cld_g = Tools_gpu::allocate_gpu(1); Float max_tau_gas = 0; - Float max_tau_cld = 0; // Get required temp storage size cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, @@ -625,6 +625,8 @@ void Radiation_solver_longwave::solve_gpu( cudaMemcpy(&max_tau_gas, max_tau_gas_g, sizeof(Float), cudaMemcpyDeviceToHost); + const Float lowest_gas_mean_free_path = grid_d.z / max_tau_gas; + if (switch_cloud_optics) { if (band > previous_band) @@ -640,10 +642,6 @@ void Radiation_solver_longwave::solve_gpu( // cloud->delta_scale(); } - // Compute max - cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, - cloud_optical_props->get_tau().ptr(), max_tau_cld_g, max_size); - cudaMemcpy(&max_tau_cld, max_tau_cld_g, sizeof(Float), cudaMemcpyDeviceToHost); // Add the cloud optical props to the gas optical properties. add_to( @@ -744,7 +742,7 @@ void Radiation_solver_longwave::solve_gpu( Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, (*fluxes).get_flux_sfc_up().ptr()); Gas_optics_rrtmgp_kernels_cuda_rt::zero_array(grid_cells.x, grid_cells.y, grid_cells.z, (*fluxes).get_flux_abs_dif().ptr()); - if ( ((max_tau_gas / max_tau_cld) < tau_frac_threshold) || (tau_frac_threshold < 0) ) + if ( (lowest_gas_mean_free_path / grid_d_xy_min) > min_mfp_grid_ratio) { raytracer_lw.trace_rays( igpt, @@ -796,6 +794,7 @@ void Radiation_solver_longwave::solve_gpu( } } + Tools_gpu::free_gpu(max_tau_gas_g); } } diff --git a/src_test/test_rte_rrtmgp_rt.cu b/src_test/test_rte_rrtmgp_rt.cu index df0f9c97..4e5880d9 100644 --- a/src_test/test_rte_rrtmgp_rt.cu +++ b/src_test/test_rte_rrtmgp_rt.cu @@ -241,13 +241,13 @@ void solve_radiation(int argc, char** argv) {"profiling" , { false, "Perform additional profiling run." }}, {"delta-cloud" , { false, "delta-scaling of cloud optical properties" }}, {"delta-aerosol" , { false, "delta-scaling of aerosol optical properties" }}, - {"tau-frac-threshold", { true, "Skip ray tracing and use 1D solution above given ratio between highest per-cell gas and cloud optical depths. Default: '--tau-frac-threshold 0.1'" }}, + {"min-mfp-grid-ratio", { true, "Lowest ratio between the shortest gasous mean free path and the smallest grid dimension at which we still do ray tracing. Below this ratio, the 1D solution is used. '--min-mfp-grid-ratio 1'" }}, {"tica" , { false, "attenuate path when doing an overhead 1D calculation of tilted input" }}}; std::map command_line_numbers { {"raytracing", 32}, {"single-gpt", 1}, - {"tau-frac-threshold", 0.1}}; + {"min-mfp-grid-ratio", 1}}; if (parse_command_line_options(command_line_switches, command_line_numbers, argc, argv)) return; @@ -262,7 +262,7 @@ void solve_radiation(int argc, char** argv) bool switch_liq_cloud_optics = command_line_switches.at("liq-cloud-optics" ).first; bool switch_ice_cloud_optics = command_line_switches.at("ice-cloud-optics" ).first; const bool switch_lw_scattering = command_line_switches.at("lw-scattering" ).first; - const bool switch_tau_frac_threshold= command_line_switches.at("tau-frac-threshold").first; + const bool switch_min_mfp_grid_ratio= command_line_switches.at("min-mfp-grid-ratio").first; const bool switch_cloud_mie = command_line_switches.at("cloud-mie" ).first; const bool switch_aerosol_optics = command_line_switches.at("aerosol-optics" ).first; const bool switch_single_gpt = command_line_switches.at("single-gpt" ).first; @@ -309,7 +309,7 @@ void solve_radiation(int argc, char** argv) int single_gpt = int(command_line_numbers.at("single-gpt")); - const Float tau_frac_threshold = switch_tau_frac_threshold ? command_line_numbers.at("tau-frac-threshold") : Float(-1.); + const Float min_mfp_grid_ratio = switch_min_mfp_grid_ratio ? command_line_numbers.at("min-mfp-grid-ratio") : Float(0.); Status::print_message("Using "+ std::to_string(photons_per_pixel) + " rays per pixel"); @@ -731,7 +731,7 @@ void solve_radiation(int argc, char** argv) switch_lw_scattering, switch_independent_column, single_gpt, - tau_frac_threshold, + min_mfp_grid_ratio, photons_per_pixel, grid_cells, grid_d,