From d88b8f1fb485b48fa68e1634315381ecfd2a7073 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 26 Jun 2025 13:47:45 -0600 Subject: [PATCH 001/141] Add some additional timers around decomp level activity --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 2 ++ src/main/clm_initializeMod.F90 | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index b7ef7216d9..af1426bf9b 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -642,8 +642,10 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return call ESMF_GridCompGet(gcomp, vm=vm, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return + call t_startf ('lc_lnd_set_decomp_and_domain_from_readmesh') call lnd_set_decomp_and_domain_from_readmesh(driver='cmeps', vm=vm, & meshfile_lnd=model_meshfile, meshfile_mask=meshfile_mask, mesh_ctsm=mesh, ni=ni, nj=nj, rc=rc) + call t_stopf ('lc_lnd_set_decomp_and_domain_from_readmesh') if (ChkErr(rc,__LINE__,u_FILE_u)) return end if diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 8c0b50230b..da8185be31 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -279,7 +279,9 @@ subroutine initialize2(ni,nj, currtime) end if ! Determine decomposition of subgrid scale landunits, columns, patches + call t_startf('clm_decompInit_clumps') call decompInit_clumps(ni, nj, glc_behavior) + call t_stopf('clm_decompInit_clumps') ! *** Get ALL processor bounds - for gridcells, landunit, columns and patches *** call get_proc_bounds(bounds_proc) @@ -304,7 +306,9 @@ subroutine initialize2(ni,nj, currtime) !$OMP END PARALLEL DO ! Set global seg maps for gridcells, landlunits, columns and patches + call t_startf('clm_decompInit_glcp') call decompInit_glcp(ni, nj, glc_behavior) + call t_stopf('clm_decompInit_glcp') if (use_hillslope) then ! Initialize hillslope properties From 6cf4b8e5e40a340e21a1f68898a1041dad8b8f6b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 26 Jun 2025 15:36:14 -0600 Subject: [PATCH 002/141] Start adding control for decomp_init self tests and add ability for self tests to exit after running in initialization --- src/main/clm_varctl.F90 | 6 ++++++ src/self_tests/SelfTestDriver.F90 | 26 ++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/main/clm_varctl.F90 b/src/main/clm_varctl.F90 index 41978ae695..b6037c7795 100644 --- a/src/main/clm_varctl.F90 +++ b/src/main/clm_varctl.F90 @@ -52,6 +52,12 @@ module clm_varctl ! true => run tests of ncdio_pio logical, public :: for_testing_run_ncdiopio_tests = .false. + ! true => run tests of decompInit + logical, public :: for_testing_run_decomp_init_tests = .false. + + ! true => exit after the self-tests run + logical, public :: for_testing_after_self_tests_run = .false. + ! true => allocate memory for and use a second grain pool. This is meant only for ! software testing of infrastructure to support the AgSys crop model integration. This ! option can be dropped once AgSys is integrated and we have tests of it. diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index d109a27827..28994bc20b 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -6,10 +6,6 @@ module SelfTestDriver ! ! See the README file in this directory for a high-level overview of these self-tests. - use clm_varctl, only : for_testing_run_ncdiopio_tests - use decompMod, only : bounds_type - use TestNcdioPio, only : test_ncdio_pio - implicit none private save @@ -32,17 +28,39 @@ subroutine self_test_driver(bounds) ! This subroutine should be called all the time, but each set of self tests is only ! run if the appropriate flag is set. ! + ! !USES: + use clm_varctl, only : for_testing_run_ncdiopio_tests, for_testing_run_decomp_init_tests + use clm_varctl, only : for_testing_exit_after_self_tests, iulog + use decompMod, only : bounds_type + use TestNcdioPio, only : test_ncdio_pio + use ESMF, only : ESMF_LogWrite, ESMF_LOGMSG_INFO, ESMF_Finalize + use shr_sys_mod, only : shr_sys_flush ! !ARGUMENTS: type(bounds_type), intent(in) :: bounds ! ! !LOCAL VARIABLES: character(len=*), parameter :: subname = 'self_test_driver' + integer :: ntests = 0 !----------------------------------------------------------------------- if (for_testing_run_ncdiopio_tests) then + ntests = ntests + 1 call test_ncdio_pio(bounds) end if + if (for_testing_run_decomp_init_tests) then + ntests = ntests + 1 + end if + if (for_testing_exit_after_self_tests) then + if ( ntests == 0 )then + write(iulog,*) 'WARNING: You are exiting after self tests were run -- but no self tests were run.' + else + write(iulog,*) 'Exiting after running ', ntests, ' self tests.' + end if + call shr_sys_flush(iulog) + call ESMF_LogWrite(' exiting after running self tests', ESMF_LOGMSG_INFO) + call ESMF_Finalize() + end if end subroutine self_test_driver From a27de36fe7f0f1cb5b14515feb41f0ea55449d9d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 27 Jun 2025 16:54:13 -0600 Subject: [PATCH 003/141] Add namelist controls for self testing --- .../namelist_definition_ctsm.xml | 11 +++++++++++ src/cpl/nuopc/lnd_comp_nuopc.F90 | 18 ++++++++++++++++++ src/main/clm_varctl.F90 | 2 +- src/main/controlMod.F90 | 6 +++++- src/self_tests/SelfTestDriver.F90 | 19 +++++++++++-------- 5 files changed, 46 insertions(+), 10 deletions(-) diff --git a/bld/namelist_files/namelist_definition_ctsm.xml b/bld/namelist_files/namelist_definition_ctsm.xml index 820975655d..e116d51b21 100644 --- a/bld/namelist_files/namelist_definition_ctsm.xml +++ b/bld/namelist_files/namelist_definition_ctsm.xml @@ -1242,12 +1242,23 @@ Whether to use subgrid fluxes for snow Whether snow on the vegetation canopy affects the radiation/albedo calculations + +Whether to exit early after the initialization self tests are run. This is typically only used in automated tests. + + Whether to run some tests of ncdio_pio as part of the model run. This is typically only used in automated tests. + +Whether to run some tests of decompInit (to get the gridcell to MPI task decomposition) as part of the model run. This is +typically only used in automated tests. + + If true, allocate memory for and use a second crop grain pool. This is diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index af1426bf9b..0287e1fce2 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -39,6 +39,7 @@ module lnd_comp_nuopc use clm_varctl , only : single_column, clm_varctl_set, iulog use clm_varctl , only : nsrStartup, nsrContinue, nsrBranch use clm_varctl , only : FL => fname_len + use clm_varctl , only : for_testing_exit_after_self_tests use clm_time_manager , only : set_timemgr_init, advance_timestep use clm_time_manager , only : update_rad_dtime use clm_time_manager , only : get_nstep, get_step_size @@ -492,6 +493,12 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) else single_column = .false. end if + if ( for_testing_exit_after_self_tests) then + ! ******************* + ! *** RETURN HERE *** + ! ******************* + RETURN + end if !---------------------------------------------------------------------------- ! Reset shr logging to my log file @@ -662,6 +669,9 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return call initialize2(ni, nj, currtime) + if (for_testing_exit_after_self_tests) then + RETURN + end if !-------------------------------- ! Create land export state @@ -771,6 +781,9 @@ subroutine ModelAdvance(gcomp, rc) if (single_column .and. .not. scol_valid) then RETURN end if + if (for_testing_exit_after_self_tests) then + RETURN + end if !$ call omp_set_num_threads(nthrds) @@ -1002,6 +1015,7 @@ subroutine ModelSetRunClock(gcomp, rc) rc = ESMF_SUCCESS call ESMF_LogWrite(subname//' called', ESMF_LOGMSG_INFO) if (.not. scol_valid) return + if (for_testing_exit_after_self_tests) return ! query the Component for its clocks call NUOPC_ModelGet(gcomp, driverClock=dclock, modelClock=mclock, rc=rc) @@ -1285,6 +1299,7 @@ subroutine clm_orbital_update(clock, logunit, mastertask, eccen, obliqr, lambm0 end subroutine clm_orbital_update subroutine CheckImport(gcomp, rc) + use clm_varctl, only : for_testing_exit_after_self_tests type(ESMF_GridComp) :: gcomp integer, intent(out) :: rc character(len=*) , parameter :: subname = "("//__FILE__//":CheckImport)" @@ -1313,6 +1328,9 @@ subroutine CheckImport(gcomp, rc) if (single_column .and. .not. scol_valid) then RETURN end if + if (for_testing_exit_after_self_tests) then + RETURN + end if ! The remander of this should be equivalent to the NUOPC internal routine ! from NUOPC_ModeBase.F90 diff --git a/src/main/clm_varctl.F90 b/src/main/clm_varctl.F90 index b6037c7795..e51ce4d33c 100644 --- a/src/main/clm_varctl.F90 +++ b/src/main/clm_varctl.F90 @@ -56,7 +56,7 @@ module clm_varctl logical, public :: for_testing_run_decomp_init_tests = .false. ! true => exit after the self-tests run - logical, public :: for_testing_after_self_tests_run = .false. + logical, public :: for_testing_exit_after_self_tests = .false. ! true => allocate memory for and use a second grain pool. This is meant only for ! software testing of infrastructure to support the AgSys crop model integration. This diff --git a/src/main/controlMod.F90 b/src/main/controlMod.F90 index 6d363a9a6e..51bc9ce5c0 100644 --- a/src/main/controlMod.F90 +++ b/src/main/controlMod.F90 @@ -211,6 +211,7 @@ subroutine control_init(dtime) snow_thermal_cond_lake_method, snow_cover_fraction_method, & irrigate, run_zero_weight_urban, all_active, & crop_fsat_equals_zero, for_testing_run_ncdiopio_tests, & + for_testing_run_decomp_init_tests, for_testing_exit_after_self_tests, & for_testing_use_second_grain_pool, for_testing_use_repr_structure_pool, & for_testing_no_crop_seed_replenishment, & z0param_method, use_z0m_snowmelt @@ -764,8 +765,11 @@ subroutine control_spmd() ! Crop saturated excess runoff call mpi_bcast(crop_fsat_equals_zero, 1, MPI_LOGICAL, 0, mpicom, ier) - ! Whether to run tests of ncdio_pio + ! Whether to run self tests call mpi_bcast(for_testing_run_ncdiopio_tests, 1, MPI_LOGICAL, 0, mpicom, ier) + call mpi_bcast(for_testing_run_decomp_init_tests, 1, MPI_LOGICAL, 0, mpicom, ier) + + call mpi_bcast(for_testing_exit_after_self_tests, 1, MPI_LOGICAL, 0, mpicom, ier) ! Various flags used for testing infrastructure for having multiple crop reproductive pools call mpi_bcast(for_testing_use_second_grain_pool, 1, MPI_LOGICAL, 0, mpicom, ier) diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index 28994bc20b..dd44a185df 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -35,6 +35,7 @@ subroutine self_test_driver(bounds) use TestNcdioPio, only : test_ncdio_pio use ESMF, only : ESMF_LogWrite, ESMF_LOGMSG_INFO, ESMF_Finalize use shr_sys_mod, only : shr_sys_flush + use spmdMod, only : masterproc ! !ARGUMENTS: type(bounds_type), intent(in) :: bounds ! @@ -52,14 +53,16 @@ subroutine self_test_driver(bounds) ntests = ntests + 1 end if if (for_testing_exit_after_self_tests) then - if ( ntests == 0 )then - write(iulog,*) 'WARNING: You are exiting after self tests were run -- but no self tests were run.' - else - write(iulog,*) 'Exiting after running ', ntests, ' self tests.' - end if - call shr_sys_flush(iulog) - call ESMF_LogWrite(' exiting after running self tests', ESMF_LOGMSG_INFO) - call ESMF_Finalize() + ! Print out some messaging if we are exiting after self tests. + if ( masterproc ) then + if ( ntests == 0 )then + write(iulog,*) 'WARNING: You are exiting after self tests were run -- but no self tests were run.' + else + write(iulog,*) 'Exiting after running ', ntests, ' self tests.' + end if + call shr_sys_flush(iulog) + call ESMF_LogWrite(' exiting after running self tests', ESMF_LOGMSG_INFO) + end if end if end subroutine self_test_driver From 2fd081b544cc00b2f426fd9d44b12b9d995bf240 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 2 Jul 2025 10:11:49 -0600 Subject: [PATCH 004/141] Changes to exit early when self test namelist option used for_testing_exit_after_self_tests, change the self tests testmod so that its about initialization, this works with a compset with SATM, but hangs -- because nothing stops the run --- .../testmods_dirs/clm/run_self_tests/README | 9 +++++++-- .../clm/run_self_tests/shell_commands | 4 ++++ .../clm/run_self_tests/user_nl_clm | 6 ++++++ src/cpl/nuopc/lnd_comp_nuopc.F90 | 15 +++++++++++---- src/cpl/nuopc/lnd_import_export.F90 | 19 ++++++++++++++----- 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README index 938dffbe6f..985b2bfae4 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README @@ -1,5 +1,10 @@ -The purpose of this testmod directory is to trigger the runtime -self-tests. This runs a suite of unit/integration tests. +The purpose of this testmod directory is to trigger runtime +initialization self-tests. This runs a set of unit/integration tests +that apply at initialization. We use cold start so that we can get through initialization faster, since how we initialize the model is unimportant for these self-tests. +We also exit as early as possible to minimize the time spent. + +There are other self_tests that need to be exercised in the model time stepping +and are done outside of these. diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index d426269206..9383f70de0 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -3,3 +3,7 @@ # We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to every time step ./xmlchange ROF_NCPL=48 + +# Restarts aren't allowed for these tests, and turn off CPL history +./xmlchange REST_OPTION="never" +./xmlchange HIST_OPTION="never" diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 6187386336..3edb8c7fc7 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -1 +1,7 @@ for_testing_run_ncdiopio_tests = .true. +for_testing_run_decomp_init_tests = .true. +for_testing_exit_after_self_tests = .true. + +! Turn off history, restarts, and output +hist_empty_htapes = .true. +use_noio = .true. \ No newline at end of file diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 0287e1fce2..7245954b4d 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -81,6 +81,7 @@ module lnd_comp_nuopc logical :: glc_present logical :: rof_prognostic + logical :: atm_present logical :: atm_prognostic integer, parameter :: dbug = 0 character(*),parameter :: modName = "(lnd_comp_nuopc)" @@ -285,6 +286,11 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) else atm_prognostic = .true. end if + if (trim(atm_model) == 'satm') then + atm_present = .false. + else + atm_present = .true. + end if call NUOPC_CompAttributeGet(gcomp, name='GLC_model', value=glc_model, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return if (trim(glc_model) == 'sglc') then @@ -311,6 +317,9 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) write(iulog,'(a )')' rof component = '//trim(rof_model) write(iulog,'(a )')' glc component = '//trim(glc_model) write(iulog,'(a,L2)')' atm_prognostic = ',atm_prognostic + if (.not. atm_present) then + write(iulog,'(a,L2)')' atm_present = ',atm_present + end if write(iulog,'(a,L2)')' rof_prognostic = ',rof_prognostic write(iulog,'(a,L2)')' glc_present = ',glc_present if (glc_present) then @@ -329,7 +338,8 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) call control_setNL("lnd_in"//trim(inst_suffix)) - call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, atm_prognostic, rc) + call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, & + atm_prognostic, atm_present, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return !---------------------------------------------------------------------------- @@ -669,9 +679,6 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return call initialize2(ni, nj, currtime) - if (for_testing_exit_after_self_tests) then - RETURN - end if !-------------------------------- ! Create land export state diff --git a/src/cpl/nuopc/lnd_import_export.F90 b/src/cpl/nuopc/lnd_import_export.F90 index 624590b9a6..b8a5efeb8d 100644 --- a/src/cpl/nuopc/lnd_import_export.F90 +++ b/src/cpl/nuopc/lnd_import_export.F90 @@ -156,7 +156,8 @@ module lnd_import_export contains !=============================================================================== - subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, atm_prognostic, rc) + subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, & + atm_prognostic, atm_present, rc) use shr_carma_mod , only : shr_carma_readnl use shr_ndep_mod , only : shr_ndep_readnl @@ -173,6 +174,7 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r logical , intent(in) :: cism_evolve logical , intent(in) :: rof_prognostic logical , intent(in) :: atm_prognostic + logical , intent(in) :: atm_present integer , intent(out) :: rc ! local variables @@ -210,7 +212,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r ! Need to determine if there is no land for single column before the advertise call is done - if (atm_prognostic .or. force_send_to_atm) then + if (.not. atm_present)then + send_to_atm = .false. + else if (atm_prognostic .or. force_send_to_atm) then send_to_atm = .true. else send_to_atm = .false. @@ -253,12 +257,11 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r if (shr_megan_mechcomps_n .ne. megan_nflds) call shr_sys_abort('ERROR: megan field count mismatch') ! CARMA volumetric soil water from land - call shr_carma_readnl('drv_flds_in', carma_fields) ! export to atm call fldlist_add(fldsFrLnd_num, fldsFrlnd, trim(flds_scalar_name)) - call fldlist_add(fldsFrLnd_num, fldsFrlnd, 'Sl_lfrin') if (send_to_atm) then + call fldlist_add(fldsFrLnd_num, fldsFrlnd, 'Sl_lfrin') call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_t ) call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_tref ) call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_qref ) @@ -339,6 +342,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r call fldlist_add(fldsToLnd_num, fldsToLnd, trim(flds_scalar_name)) + !!!!!!!!!!!!!!!!!!!!!!!!!!! new if section !!!!!!!!!!!!!!!!!!!!!!!!!! + if ( atm_present ) then + ! from atm call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_z ) call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_topo ) @@ -389,6 +395,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_co2diag) end if + end if ! atm_present + !!!!!!!!!!!!!!!!!!!!!!!!!!! new if section !!!!!!!!!!!!!!!!!!!!!!!!!! + if (rof_prognostic) then ! from river call fldlist_add(fldsToLnd_num, fldsToLnd, Flrr_flood ) @@ -773,6 +782,7 @@ subroutine export_fields( gcomp, bounds, glc_present, rof_prognostic, & ! output to mediator ! ----------------------- + if (send_to_atm) then call state_setexport_1d(exportState, Sl_lfrin, ldomain%frac(begg:), init_spval=.false., rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -780,7 +790,6 @@ subroutine export_fields( gcomp, bounds, glc_present, rof_prognostic, & ! output to atm ! ----------------------- - if (send_to_atm) then call state_setexport_1d(exportState, Sl_t , lnd2atm_inst%t_rad_grc(begg:), & init_spval=.true., rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return From 6eaadd4b82fa1f79bd09b98712507f3e30a25203 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 10 Jul 2025 23:13:06 -0600 Subject: [PATCH 005/141] Bring in the share branch with the memory logger from John Dennis --- .gitmodules | 7 +++++-- share | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 0d39ebff4e..14e5163b73 100644 --- a/.gitmodules +++ b/.gitmodules @@ -99,8 +99,11 @@ fxDONOTUSEurl = https://github.com/ESCOMP/CDEPS.git [submodule "share"] path = share -url = https://github.com/ESCOMP/CESM_share -fxtag = share1.1.9 +#url = https://github.com/ESCOMP/CESM_share +url = https://github.com/ekluzek/CESM_share +#fxtag = share1.1.9 +#fxtag = add_jdennis_procstatus_module +fxtag = 6fe1530b604fa729a5d363c08272714f95ea6ea1 fxrequired = ToplevelRequired # Standard Fork to compare to with "git fleximod test" to ensure personal forks aren't committed fxDONOTUSEurl = https://github.com/ESCOMP/CESM_share diff --git a/share b/share index 14338bef3f..6fe1530b60 160000 --- a/share +++ b/share @@ -1 +1 @@ -Subproject commit 14338bef3fa604d49160e376257264db1d3313e5 +Subproject commit 6fe1530b604fa729a5d363c08272714f95ea6ea1 From f18e4b034426767869012124f3166c37e1c4996f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 11 Jul 2025 14:32:08 -0600 Subject: [PATCH 006/141] Update proc_status_vm to use shr_sys_abort, and iulog from shr_log, and use newunit rather than a hardcoded file unit to make it more robust, it wasn't showing any output before --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 15578e5da3..18833ee4c2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -103,7 +103,7 @@ path = share url = https://github.com/ekluzek/CESM_share #fxtag = share1.1.9 #fxtag = add_jdennis_procstatus_module -fxtag = 6fe1530b604fa729a5d363c08272714f95ea6ea1 +fxtag = 3cc3770a92b223556fc210e8f686994b3e4241be fxrequired = ToplevelRequired # Standard Fork to compare to with "git fleximod test" to ensure personal forks aren't committed fxDONOTUSEurl = https://github.com/ESCOMP/CESM_share From 71de4c9165761e9ffecaa0831526f76343352dac Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 11 Jul 2025 14:33:32 -0600 Subject: [PATCH 007/141] Turn off restarts and history and add some timer options as well as turning off ncdio_pio testing for the purposes of decompInit work --- .../testmods_dirs/clm/run_self_tests/shell_commands | 9 +++++++++ .../testmods_dirs/clm/run_self_tests/user_nl_clm | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index d1efa2a23c..ca2a393757 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -12,5 +12,14 @@ ./xmlchange CLM_CONFIGURATION="nwp" # Restarts aren't allowed for these tests, and turn off CPL history +# First change in env_test.xml, then in the standard one so it won't complain there +./xmlchange --force REST_OPTION="never" --file env_test.xml +./xmlchange --force HIST_OPTION="never" --file env_test.xml ./xmlchange REST_OPTION="never" ./xmlchange HIST_OPTION="never" + +# Timer settings +./xmlchange TIMER_DETAIL="2" +./xmlchange SAVE_TIMING="TRUE" +./xmlchange CHECK_TIMING="TRUE" +./xmlchange ESMF_PROFILING_LEVEL="10" diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 3edb8c7fc7..4430f92e10 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -1,4 +1,4 @@ -for_testing_run_ncdiopio_tests = .true. +for_testing_run_ncdiopio_tests = .false. for_testing_run_decomp_init_tests = .true. for_testing_exit_after_self_tests = .true. From efd21296c22974fb1c15e58d806596ed853c416a Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 14 Jul 2025 15:43:08 -0600 Subject: [PATCH 008/141] Fix proc_status_vm from the changes I made, it's now reporting properly --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 18833ee4c2..7615eec0bb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -103,7 +103,7 @@ path = share url = https://github.com/ekluzek/CESM_share #fxtag = share1.1.9 #fxtag = add_jdennis_procstatus_module -fxtag = 3cc3770a92b223556fc210e8f686994b3e4241be +fxtag = 9973692556da54f9562935be43c1d43b0607d24b fxrequired = ToplevelRequired # Standard Fork to compare to with "git fleximod test" to ensure personal forks aren't committed fxDONOTUSEurl = https://github.com/ESCOMP/CESM_share From d9e212bfc85edd49abeb7715f8e252d8533e9a4f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 14 Jul 2025 15:43:26 -0600 Subject: [PATCH 009/141] Fix proc_status_vm from the changes I made, it's now reporting properly --- share | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share b/share index 6fe1530b60..9973692556 160000 --- a/share +++ b/share @@ -1 +1 @@ -Subproject commit 6fe1530b604fa729a5d363c08272714f95ea6ea1 +Subproject commit 9973692556da54f9562935be43c1d43b0607d24b From 213ff9c9cff62849eff1b85b064db1192aad91cb Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 14 Jul 2025 15:44:38 -0600 Subject: [PATCH 010/141] Add calls for evaluating memory --- src/main/decompInitMod.F90 | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index bebcd9d358..930f19c07c 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -12,12 +12,14 @@ module decompInitMod use spmdMod , only : masterproc, iam, npes, mpicom use abortutils , only : endrun use clm_varctl , only : iulog + use proc_status_vm, only : prt_vm_status + use shr_mem_mod , only : shr_mem_init, shr_mem_getusage ! implicit none private ! ! !PUBLIC TYPES: - ! + !b ! !PUBLIC MEMBER FUNCTIONS: public :: decompInit_lnd ! initializes lnd grid decomposition into clumps and processors public :: decompInit_clumps ! initializes atm grid decomposition into clumps @@ -34,6 +36,7 @@ module decompInitMod integer, parameter :: dbug=0 ! 0 = min, 1=normal, 2=much, 3=max character(len=*), parameter :: sourcefile = & __FILE__ + real(r8) :: msize, mrss ! memory usage variables #include ! mpi library include file !------------------------------------------------------------------------------ @@ -72,8 +75,17 @@ subroutine decompInit_lnd(lni, lnj, amask) integer, pointer :: clumpcnt(:) ! clump index counter integer, allocatable :: gdc2glo(:)! used to create gindex_global type(bounds_type) :: bounds ! contains subgrid bounds data + real(r8) :: msize, mrss !------------------------------------------------------------------------------ + call shr_mem_init(prt=.true.) ! initialize memory tracking + if(masterproc) then + call prt_vm_status('CTSM: decompInit_lnd: before') + endif + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + lns = lni * lnj !--- set and verify nclumps --- @@ -291,6 +303,13 @@ subroutine decompInit_lnd(lni, lnj, amask) gindex_global(n-procinfo%begg+1) = gdc2glo(n) enddo + if(masterproc) then + call prt_vm_status('CTSM: decompInit_lnd: afterwards before deallocate') + endif + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + deallocate(clumpcnt) deallocate(gdc2glo) @@ -349,6 +368,9 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) character(len=32), parameter :: subname = 'decompInit_clumps' !------------------------------------------------------------------------------ + if(masterproc) then + call prt_vm_status('CTSM: decompInit_clumps: before') + endif !--- assign gridcells to clumps (and thus pes) --- call get_proc_bounds(bounds) begg = bounds%begg; endg = bounds%endg @@ -471,6 +493,10 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) endif enddo + if(masterproc) then + call prt_vm_status('CTSM: decompInit_clumps: after before deallocate') + endif + deallocate(allvecg,allvecl) deallocate(lcid) From 0abc15c327ab249a9e84137701b2f8c531245267 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 14 Jul 2025 18:20:21 -0600 Subject: [PATCH 011/141] Put memory stuff only under masterproc to only report on a single task to make it more obvious what is going on because each task should be similar --- src/main/decompInitMod.F90 | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 930f19c07c..187d074d63 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -78,13 +78,13 @@ subroutine decompInit_lnd(lni, lnj, amask) real(r8) :: msize, mrss !------------------------------------------------------------------------------ - call shr_mem_init(prt=.true.) ! initialize memory tracking if(masterproc) then + call shr_mem_init(prt=.true.) ! initialize memory tracking call prt_vm_status('CTSM: decompInit_lnd: before') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) endif - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) lns = lni * lnj @@ -305,10 +305,10 @@ subroutine decompInit_lnd(lni, lnj, amask) if(masterproc) then call prt_vm_status('CTSM: decompInit_lnd: afterwards before deallocate') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) endif - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) deallocate(clumpcnt) deallocate(gdc2glo) From c1bfd8366ffffa58ac2ef5538e8d4067fae7129f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 29 Jul 2025 13:25:52 -0600 Subject: [PATCH 012/141] Add a PE layout for mpas13p75 --- cime_config/config_pes.xml | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/cime_config/config_pes.xml b/cime_config/config_pes.xml index bb10b8019c..d0794339db 100644 --- a/cime_config/config_pes.xml +++ b/cime_config/config_pes.xml @@ -2092,6 +2092,44 @@ + + + + + none + + -1 + -80 + -80 + -80 + -80 + -80 + -80 + -80 + -80 + + + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + + + 0 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + + + From 3a325198192b28fe7198256b56061302d833297f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 29 Jul 2025 13:27:56 -0600 Subject: [PATCH 013/141] Start adding timers to lnd_set_decomp_and_domain_from_readmesh --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 0b066ceb5b..9a2695f2f7 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -20,6 +20,7 @@ module lnd_set_decomp_and_domain use spmdMod , only : masterproc, mpicom use clm_varctl , only : iulog, inst_suffix, FL => fname_len use abortutils , only : endrun + use perf_mod , only : t_startf, t_stopf implicit none private ! except @@ -106,6 +107,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes call lnd_get_global_dims(ni, nj, gsize, isgrid2d) ! Read in the land mesh from the file + call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh') mesh_lndinput = ESMF_MeshCreate(filename=trim(meshfile_lnd), fileformat=ESMF_FILEFORMAT_ESMFMESH, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -142,6 +144,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes else call shr_sys_abort('driver '//trim(driver)//' is not supported, must be lilac or cmeps') end if + call t_stopf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh') ! Determine lnd decomposition that will be used by ctsm from lndmask_glob call decompInit_lnd(lni=ni, lnj=nj, amask=lndmask_glob) @@ -190,6 +193,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes end do ! Generate a new mesh on the gindex decomposition + call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition') distGrid_ctsm = ESMF_DistGridCreate(arbSeqIndexList=gindex_ctsm, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return mesh_ctsm = ESMF_MeshCreate(mesh_lndinput, elementDistGrid=distgrid_ctsm, rc=rc) @@ -198,6 +202,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes ! Set ldomain%lonc, ldomain%latc and ldomain%area call lnd_set_ldomain_gridinfo_from_mesh(mesh_ctsm, vm, gindex_ctsm, begg, endg, isgrid2d, ni, nj, ldomain, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return + call t_stopf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition') ! Set ldomain%lfrac ! Create fields on the input decomp and ctsm decomp @@ -206,6 +211,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes ! Redistribute field_lnd to field_ctsm ! Determine ldomain%frac using ctsm decomposition + call t_startf('lnd_set_decomp_and_domain_from_readmesh: land frac') if (trim(driver) == 'cmeps') then if (trim(meshfile_mask) /= trim(meshfile_lnd)) then @@ -245,6 +251,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes deallocate(lndfrac_glob) end if + call t_stopf('lnd_set_decomp_and_domain_from_readmesh: land frac') ! Deallocate local pointer memory deallocate(gindex_lnd) From d531303d3aaa62013cdae6197df54c49454c4d26 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 11:41:32 -0600 Subject: [PATCH 014/141] Turn off RTM rather than increase the ROF coupling frequency --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index ca2a393757..143acc98e7 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -1,8 +1,8 @@ #!/bin/bash ./xmlchange CLM_FORCE_COLDSTART="on" -# We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to same frequency as DATM -./xmlchange ROF_NCPL='$ATM_NCPL' +# Turn off ROF model when used with compsets that have them +./xmlchange ROF_MODE='NULL' # Turn MEGAN off to run faster ./xmlchange CLM_BLDNML_OPTS='--no-megan' --append From 402584e05a4611926d0855ad63bef6e0d3cb1b01 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 11:42:24 -0600 Subject: [PATCH 015/141] Turn off urban HAC completely and minimize urban in gridcells --- .../testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 4430f92e10..499770f153 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -4,4 +4,6 @@ for_testing_exit_after_self_tests = .true. ! Turn off history, restarts, and output hist_empty_htapes = .true. -use_noio = .true. \ No newline at end of file +use_noio = .true. +urban_hac = 'OFF' +toosmall_urban = 98.0d00 ! Minimize urban in gridcells From 31ad84635d27dbb4900c544e7e5da8150bb7db76 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 11:47:35 -0600 Subject: [PATCH 016/141] Add a testmod for mpasa3p75 grid --- .../testdefs/testmods_dirs/clm/mpasa3p75/user_nl_clm | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 cime_config/testdefs/testmods_dirs/clm/mpasa3p75/user_nl_clm diff --git a/cime_config/testdefs/testmods_dirs/clm/mpasa3p75/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/mpasa3p75/user_nl_clm new file mode 100644 index 0000000000..bfdcfb115b --- /dev/null +++ b/cime_config/testdefs/testmods_dirs/clm/mpasa3p75/user_nl_clm @@ -0,0 +1,6 @@ +! Settings currently required to run at the mpasa3p75 grid +! urbantv files at that resolution and use a redistribution mapping + +stream_fldfilename_urbantv = '/glade/derecho/scratch/bdobbins/ko/tbuildmax.nc' +stream_meshfile_urbantv = '/glade/derecho/scratch/bdobbins/ko/mesh.nc' +urbantvmapalgo = 'redist' From be81f3ad97f546e94a79cd31bae25bd858ef698b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 11:53:51 -0600 Subject: [PATCH 017/141] Add decomp initialization test and test list for ultra high resolution (3.75km mpasa) --- cime_config/testdefs/testlist_clm.xml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index 478dc59bdd..3f232e7518 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -14,6 +14,7 @@ matrixcn: Tests exercising the matrix-CN capability aux_clm_mpi_serial: aux_clm tests using mpi-serial. Useful for redoing tests that failed due to https://github.com/ESCOMP/CTSM/issues/2916, after having replaced libraries/mpi-serial with a fresh copy. decomp_init: Initialization tests specifically for examining the PE layout decomposition initialization + uhr_decomp_init: Initialization tests at Ultra High Resolution -- specifically for examining the PE layout decomposition initialization --> @@ -4209,6 +4210,15 @@ + + + + + + + + + From 95ab0147925f81eb66da2f5a041fd1bce7202a3f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 13:03:20 -0600 Subject: [PATCH 018/141] Fix syntax and correct 3p75 resolution grid name for test --- cime_config/testdefs/testlist_clm.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index 3f232e7518..a3dc4d760f 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -14,7 +14,7 @@ matrixcn: Tests exercising the matrix-CN capability aux_clm_mpi_serial: aux_clm tests using mpi-serial. Useful for redoing tests that failed due to https://github.com/ESCOMP/CTSM/issues/2916, after having replaced libraries/mpi-serial with a fresh copy. decomp_init: Initialization tests specifically for examining the PE layout decomposition initialization - uhr_decomp_init: Initialization tests at Ultra High Resolution -- specifically for examining the PE layout decomposition initialization + uhr_decomp_init: Initialization tests at Ultra High Resolution - specifically for examining the PE layout decomposition initialization --> @@ -4210,7 +4210,7 @@ - + From 6ee90776a4581764afa3bd8f282ce186f4b87e69 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 13:29:15 -0600 Subject: [PATCH 019/141] Still need to set NCPL_ROF --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index 143acc98e7..2cb24928b5 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -1,6 +1,9 @@ #!/bin/bash ./xmlchange CLM_FORCE_COLDSTART="on" +# We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to same frequency as DATM +./xmlchange ROF_NCPL='$ATM_NCPL + # Turn off ROF model when used with compsets that have them ./xmlchange ROF_MODE='NULL' From f03a0ebf9eccdf9b2038d8c247df97870bf5a6f0 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 13:29:50 -0600 Subject: [PATCH 020/141] Fix name of mpasa3p75 testmod in test --- cime_config/testdefs/testlist_clm.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index a3dc4d760f..1a53e25ae9 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -4210,7 +4210,7 @@ - + From ed4c49e84f5815dc3d007aca1f869c954ce176bb Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 13:54:10 -0600 Subject: [PATCH 021/141] Fix syntax error --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index 2cb24928b5..f3144c219d 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -2,7 +2,7 @@ ./xmlchange CLM_FORCE_COLDSTART="on" # We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to same frequency as DATM -./xmlchange ROF_NCPL='$ATM_NCPL +./xmlchange ROF_NCPL='$ATM_NCPL' # Turn off ROF model when used with compsets that have them ./xmlchange ROF_MODE='NULL' From b5ab98c4d0e225716fb158fccf516f6f6a2dee88 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 16:15:27 -0600 Subject: [PATCH 022/141] Remove the mpasa15 test from expected fails --- cime_config/testdefs/ExpectedTestFails.xml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cime_config/testdefs/ExpectedTestFails.xml b/cime_config/testdefs/ExpectedTestFails.xml index ac35ad812e..51f85ba8f3 100644 --- a/cime_config/testdefs/ExpectedTestFails.xml +++ b/cime_config/testdefs/ExpectedTestFails.xml @@ -363,11 +363,4 @@ - - - FAIL - #3316 - - - From 8914b12abfe465c97e5a3f7afe23fb72041c6a6d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 14:08:14 -0600 Subject: [PATCH 023/141] Add timers for clm_initialize2 that cover the whole subroutine --- src/main/clm_initializeMod.F90 | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index da8185be31..0138fba686 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -225,6 +225,7 @@ subroutine initialize2(ni,nj, currtime) call t_startf('clm_init2') + call t_startf('clm_init2_part1') ! Get processor bounds for gridcells call get_proc_bounds(bounds_proc) begg = bounds_proc%begg; endg = bounds_proc%endg @@ -277,12 +278,14 @@ subroutine initialize2(ni,nj, currtime) call CLMFatesGlobals2() end if + call t_stopf('clm_init2_part1') ! Determine decomposition of subgrid scale landunits, columns, patches call t_startf('clm_decompInit_clumps') call decompInit_clumps(ni, nj, glc_behavior) call t_stopf('clm_decompInit_clumps') + call t_startf('clm_init2_subgrid') ! *** Get ALL processor bounds - for gridcells, landunit, columns and patches *** call get_proc_bounds(bounds_proc) @@ -304,12 +307,14 @@ subroutine initialize2(ni,nj, currtime) call initGridCells(bounds_clump, glc_behavior) end do !$OMP END PARALLEL DO + call t_stopf('clm_init2_subgrid') ! Set global seg maps for gridcells, landlunits, columns and patches call t_startf('clm_decompInit_glcp') call decompInit_glcp(ni, nj, glc_behavior) call t_stopf('clm_decompInit_glcp') + call t_startf('clm_init2_part2') if (use_hillslope) then ! Initialize hillslope properties call InitHillslope(bounds_proc, hillslope_file) @@ -369,15 +374,15 @@ subroutine initialize2(ni,nj, currtime) if (use_fates) call CLMFatesTimesteps() ! Initialize daylength from the previous time step (needed so prev_dayl can be set correctly) - call t_startf('init_orbd') calday = get_curr_calday(reuse_day_365_for_day_366=.true.) call shr_orb_decl( calday, eccen, mvelpp, lambm0, obliqr, declin, eccf ) dtime = get_step_size_real() caldaym1 = get_curr_calday(offset=-int(dtime), reuse_day_365_for_day_366=.true.) call shr_orb_decl( caldaym1, eccen, mvelpp, lambm0, obliqr, declinm1, eccf ) - call t_stopf('init_orbd') call InitDaylength(bounds_proc, declin=declin, declinm1=declinm1, obliquity=obliqr) + call t_stopf('clm_init2_part2') + call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) call BalanceCheckInit() @@ -424,13 +429,13 @@ subroutine initialize2(ni,nj, currtime) ! Print history field info to standard out call hist_printflds() + call t_stopf('clm_init2_part3') + call t_startf('clm_init2_part4') ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV ! and/or dynamic landunits); note that these will be overwritten in a restart run - call t_startf('init_dyn_subgrid') call init_subgrid_weights_mod(bounds_proc) call dynSubgrid_init(bounds_proc, glc_behavior, crop_inst) - call t_stopf('init_dyn_subgrid') ! Initialize fates LUH2 usage if (use_fates_luh) then @@ -546,10 +551,12 @@ subroutine initialize2(ni,nj, currtime) call restFile_read(bounds_proc, fnamer, glc_behavior, & reset_dynbal_baselines_lake_columns = reset_dynbal_baselines_lake_columns) end if + call t_stopf('clm_init2_part4') ! If appropriate, create interpolated initial conditions if (nsrest == nsrStartup .and. finidat_interp_source /= ' ') then + call t_startf('clm_init2_init_interp') ! Check that finidat is not cold start - abort if it is if (finidat /= ' ') then call endrun(msg='ERROR clm_initializeMod: '//& @@ -599,8 +606,10 @@ subroutine initialize2(ni,nj, currtime) close(iun) write(iulog,'(a)')' Successfully wrote finidat status file '//trim(locfn) end if + call t_stopf('clm_init2_init_interp') end if + call t_startf('clm_init2_part5') ! If requested, reset dynbal baselines ! This needs to happen after reading the restart file (including after reading the ! interpolated restart file, if applicable). @@ -773,7 +782,6 @@ subroutine initialize2(ni,nj, currtime) deallocate(topo_glc_mec, fert_cft, irrig_method) ! Write log output for end of initialization - call t_startf('init_wlog') if (masterproc) then write(iulog,*) 'Successfully initialized the land model' if (nsrest == nsrStartup) then @@ -788,15 +796,17 @@ subroutine initialize2(ni,nj, currtime) write(iulog,'(72a1)') ("*",i=1,60) write(iulog,*) endif - call t_stopf('init_wlog') + call t_stopf('clm_init2_part5') if (water_inst%DoConsistencyCheck()) then + call t_startf('tracer_consistency_check') !$OMP PARALLEL DO PRIVATE (nc, bounds_clump) do nc = 1,nclumps call get_clump_bounds(nc, bounds_clump) call water_inst%TracerConsistencyCheck(bounds_clump, 'end of initialization') end do !$OMP END PARALLEL DO + call t_stopf('tracer_consistency_check') end if call t_stopf('clm_init2') From f294b31b68147422b7d3c891df5b59e1e5d6bd70 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 15:03:28 -0600 Subject: [PATCH 024/141] Add another timer within part3, and also turn off some of the history stuff in it when use_noio is TRUE --- src/main/clm_initializeMod.F90 | 43 ++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 0138fba686..e12b528fef 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -144,6 +144,7 @@ subroutine initialize2(ni,nj, currtime) use clm_varctl , only : use_hillslope use clm_varorb , only : eccen, mvelpp, lambm0, obliqr use clm_varctl , only : use_cropcal_streams + use clm_varctl , only : use_noio use landunit_varcon , only : landunit_varcon_init, max_lunit, numurbl use pftconMod , only : pftcon use decompInitMod , only : decompInit_clumps, decompInit_glcp @@ -382,12 +383,14 @@ subroutine initialize2(ni,nj, currtime) call InitDaylength(bounds_proc, declin=declin, declinm1=declinm1, obliquity=obliqr) call t_stopf('clm_init2_part2') - call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) + call t_startf('balance_check_init') call BalanceCheckInit() + call t_stopf('balance_check_init') + call t_startf('clm_init2_part3') ! History file variables - if (use_cn) then + if (use_cn .and. .not. use_noio ) then call hist_addfld1d (fname='DAYL', units='s', & avgflag='A', long_name='daylength', & ptr_gcell=grc%dayl, default='inactive') @@ -403,21 +406,23 @@ subroutine initialize2(ni,nj, currtime) ! First put in history calls for subgrid data structures - these cannot appear in the ! module for the subgrid data definition due to circular dependencies that are introduced - data2dptr => col%dz(:,-nlevsno+1:0) - col%dz(bounds_proc%begc:bounds_proc%endc,:) = spval - call hist_addfld2d (fname='SNO_Z', units='m', type2d='levsno', & - avgflag='A', long_name='Snow layer thicknesses', & - ptr_col=data2dptr, no_snow_behavior=no_snow_normal, default='inactive') - - call hist_addfld2d (fname='SNO_Z_ICE', units='m', type2d='levsno', & - avgflag='A', long_name='Snow layer thicknesses (ice landunits only)', & - ptr_col=data2dptr, no_snow_behavior=no_snow_normal, & - l2g_scale_type='ice', default='inactive') - - col%zii(bounds_proc%begc:bounds_proc%endc) = spval - call hist_addfld1d (fname='ZII', units='m', & - avgflag='A', long_name='convective boundary height', & - ptr_col=col%zii, default='inactive') + if ( .not. use_noio )then + data2dptr => col%dz(:,-nlevsno+1:0) + col%dz(bounds_proc%begc:bounds_proc%endc,:) = spval + call hist_addfld2d (fname='SNO_Z', units='m', type2d='levsno', & + avgflag='A', long_name='Snow layer thicknesses', & + ptr_col=data2dptr, no_snow_behavior=no_snow_normal, default='inactive') + + call hist_addfld2d (fname='SNO_Z_ICE', units='m', type2d='levsno', & + avgflag='A', long_name='Snow layer thicknesses (ice landunits only)', & + ptr_col=data2dptr, no_snow_behavior=no_snow_normal, & + l2g_scale_type='ice', default='inactive') + + col%zii(bounds_proc%begc:bounds_proc%endc) = spval + call hist_addfld1d (fname='ZII', units='m', & + avgflag='A', long_name='convective boundary height', & + ptr_col=col%zii, default='inactive') + end if ! Initialize instances of all derived types as well as time constant variables call clm_instInit(bounds_proc) @@ -428,7 +433,9 @@ subroutine initialize2(ni,nj, currtime) call SnowAge_init( ) ! SNICAR aging parameters: ! Print history field info to standard out - call hist_printflds() + if ( .not. use_noio )then + call hist_printflds() + end if call t_stopf('clm_init2_part3') call t_startf('clm_init2_part4') From 1bd240844aa037d2012a089a331f194d4872dd30 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 15:20:33 -0600 Subject: [PATCH 025/141] Balance check doesn't take time, so adjust the timers again for part3 --- src/main/clm_initializeMod.F90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index e12b528fef..6647af0dda 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -383,12 +383,10 @@ subroutine initialize2(ni,nj, currtime) call InitDaylength(bounds_proc, declin=declin, declinm1=declinm1, obliquity=obliqr) call t_stopf('clm_init2_part2') + call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) - call t_startf('balance_check_init') call BalanceCheckInit() - call t_stopf('balance_check_init') - call t_startf('clm_init2_part3') ! History file variables if (use_cn .and. .not. use_noio ) then call hist_addfld1d (fname='DAYL', units='s', & @@ -426,7 +424,9 @@ subroutine initialize2(ni,nj, currtime) ! Initialize instances of all derived types as well as time constant variables call clm_instInit(bounds_proc) + call t_stopf('clm_init2_part3') + call t_startf('clm_init2_snow_soil_init') call CNParamsSetSoilDepth() ! Initialize SNICAR optical and aging parameters call SnowOptics_init( ) ! SNICAR optical parameters: @@ -436,7 +436,7 @@ subroutine initialize2(ni,nj, currtime) if ( .not. use_noio )then call hist_printflds() end if - call t_stopf('clm_init2_part3') + call t_stopf('clm_init2_snow_soil_init') call t_startf('clm_init2_part4') ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV From 60bd85e71b379bc4108b2164e4a3953439468080 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 23:04:12 -0600 Subject: [PATCH 026/141] Add memory checking calls through the lnd_set_decomp_and_domain_from_readmesh subroutine --- .../share_esmf/lnd_set_decomp_and_domain.F90 | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 9a2695f2f7..2e5485f2af 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -56,6 +56,9 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes use decompMod , only : gindex_global, bounds_type, get_proc_bounds use clm_varpar , only : nlevsoi use clm_varctl , only : use_soil_moisture_streams + use proc_status_vm, only : prt_vm_status + use shr_mem_mod , only : shr_mem_getusage + use shr_sys_mod , only : shr_sys_flush ! input/output variables character(len=*) , intent(in) :: driver ! cmeps or lilac @@ -86,6 +89,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes real(r8) , pointer :: lndfrac_glob(:) real(r8) , pointer :: lndfrac_loc_input(:) real(r8) , pointer :: dataptr1d(:) + real(r8) :: msize, mrss !------------------------------------------------------------------------------- rc = ESMF_SUCCESS @@ -103,6 +107,13 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes write(iulog,*) end if + if(masterproc) then + call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: before allocate') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + endif + ! Determine global 2d sizes from read of dimensions of surface dataset and allocate global memory call lnd_get_global_dims(ni, nj, gsize, isgrid2d) @@ -191,6 +202,12 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes gindex_ctsm(n) = gindex_ocn(n-nlnd) end if end do + if(masterproc) then + call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after allocates') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + endif ! Generate a new mesh on the gindex decomposition call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition') @@ -252,12 +269,25 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes end if call t_stopf('lnd_set_decomp_and_domain_from_readmesh: land frac') + if(masterproc) then + call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: just before deallocates') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + endif ! Deallocate local pointer memory deallocate(gindex_lnd) deallocate(gindex_ocn) deallocate(gindex_ctsm) + if(masterproc) then + call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after deallocates') + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) 'msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + endif + end subroutine lnd_set_decomp_and_domain_from_readmesh !=============================================================================== From 8c5debbb251b3cd840619064d556730a5c445b0a Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 00:24:04 -0600 Subject: [PATCH 027/141] Remove one of the memory checks as it wasn't needed --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 2e5485f2af..caaa9ca8d3 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -202,12 +202,6 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes gindex_ctsm(n) = gindex_ocn(n-nlnd) end if end do - if(masterproc) then - call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after allocates') - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) - endif ! Generate a new mesh on the gindex decomposition call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition') From 3f5cff59b34aca05dc222a94f77968f04d8ef601 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 10:40:51 -0600 Subject: [PATCH 028/141] Add some timers for clmInstInit --- src/main/clm_instMod.F90 | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/main/clm_instMod.F90 b/src/main/clm_instMod.F90 index 7d9a0f6ad2..b284836159 100644 --- a/src/main/clm_instMod.F90 +++ b/src/main/clm_instMod.F90 @@ -229,6 +229,7 @@ subroutine clm_instInit(bounds) integer :: dummy_to_make_pgi_happy !---------------------------------------------------------------------- + call t_startf('clm_instInit_part1') ! Note: h2osno_col and snow_depth_col are initialized as local variables ! since they are needed to initialize vertical data structures @@ -286,6 +287,9 @@ subroutine clm_instInit(bounds) call setSoilLayerClass(bounds) endif + call t_stopf('clm_instInit_part1') + + call t_startf('clm_instInit_part2') !----------------------------------------------- ! Set cold-start values for snow levels, snow layers and snow interfaces !----------------------------------------------- @@ -338,6 +342,10 @@ subroutine clm_instInit(bounds) call glacier_smb_inst%Init(bounds) + call t_stopf('clm_instInit_part2') + + call t_startf('clm_instInit_part3') + ! COMPILER_BUG(wjs, 2014-11-29, pgi 14.7) Without the following assignment, the ! assertion in energyflux_inst%Init fails with pgi 14.7 on yellowstone, presumably due ! to a compiler bug. @@ -370,6 +378,10 @@ subroutine clm_instInit(bounds) call surfrad_inst%Init(bounds) + call t_stopf('clm_instInit_part3') + + call t_startf('clm_instInit_part4') + allocate(dust_emis_inst, source = create_dust_emissions(bounds, NLFilename)) allocate(scf_method, source = CreateAndInitSnowCoverFraction( & @@ -401,6 +413,10 @@ subroutine clm_instInit(bounds) call drydepvel_inst%Init(bounds) + call t_stopf('clm_instInit_part4') + + call t_startf('clm_instInit_part5') + if_decomp: if (decomp_method /= no_soil_decomp) then ! Initialize soilbiogeochem_state_inst @@ -473,6 +489,7 @@ subroutine clm_instInit(bounds) deallocate (h2osno_col) deallocate (snow_depth_col) deallocate (exice_init_conc_col) + call t_stopf('clm_instInit_part5') ! ------------------------------------------------------------------------ ! Initialize accumulated fields From b30d9e03ed5382e0dd30ad81e6d9db3b039139cf Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 11:18:23 -0600 Subject: [PATCH 029/141] Combine timers for part3/4/5 as they are all small --- src/main/clm_instMod.F90 | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/main/clm_instMod.F90 b/src/main/clm_instMod.F90 index b284836159..bc86749d06 100644 --- a/src/main/clm_instMod.F90 +++ b/src/main/clm_instMod.F90 @@ -378,10 +378,6 @@ subroutine clm_instInit(bounds) call surfrad_inst%Init(bounds) - call t_stopf('clm_instInit_part3') - - call t_startf('clm_instInit_part4') - allocate(dust_emis_inst, source = create_dust_emissions(bounds, NLFilename)) allocate(scf_method, source = CreateAndInitSnowCoverFraction( & @@ -413,10 +409,6 @@ subroutine clm_instInit(bounds) call drydepvel_inst%Init(bounds) - call t_stopf('clm_instInit_part4') - - call t_startf('clm_instInit_part5') - if_decomp: if (decomp_method /= no_soil_decomp) then ! Initialize soilbiogeochem_state_inst @@ -489,7 +481,7 @@ subroutine clm_instInit(bounds) deallocate (h2osno_col) deallocate (snow_depth_col) deallocate (exice_init_conc_col) - call t_stopf('clm_instInit_part5') + call t_stopf('clm_instInit_part3') ! ------------------------------------------------------------------------ ! Initialize accumulated fields From 373b84ce078d722140bbc5f58c5ab5b4e146e00e Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 11:19:17 -0600 Subject: [PATCH 030/141] Add timers for urbantv Init and InitVertical --- src/cpl/share_esmf/UrbanTimeVarType.F90 | 4 ++++ src/main/initVerticalMod.F90 | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/src/cpl/share_esmf/UrbanTimeVarType.F90 b/src/cpl/share_esmf/UrbanTimeVarType.F90 index 1e6d004e96..cb30cfe02c 100644 --- a/src/cpl/share_esmf/UrbanTimeVarType.F90 +++ b/src/cpl/share_esmf/UrbanTimeVarType.F90 @@ -16,6 +16,7 @@ module UrbanTimeVarType use clm_varcon , only : spval use LandunitType , only : lun use GridcellType , only : grc + use perf_mod , only : t_startf, t_stopf ! implicit none private @@ -143,6 +144,8 @@ subroutine urbantv_init(this, bounds, NLFilename) stream_meshfile_urbantv, & urbantv_tintalgo + call t_startf("urbantv_init") + ! Default values for namelist stream_year_first_urbantv = 1 ! first year in stream to use stream_year_last_urbantv = 1 ! last year in stream to use @@ -219,6 +222,7 @@ subroutine urbantv_init(this, bounds, NLFilename) if (ESMF_LogFoundError(rcToCheck=rc, msg=ESMF_LOGERR_PASSTHRU, line=__LINE__, file=__FILE__)) then call ESMF_Finalize(endflag=ESMF_END_ABORT) end if + call t_stopf("urbantv_init") end subroutine urbantv_init diff --git a/src/main/initVerticalMod.F90 b/src/main/initVerticalMod.F90 index 64383e7a7c..4a1177666e 100644 --- a/src/main/initVerticalMod.F90 +++ b/src/main/initVerticalMod.F90 @@ -29,6 +29,7 @@ module initVerticalMod use ColumnType , only : col use glcBehaviorMod , only : glc_behavior_type use abortUtils , only : endrun + use perf_mod , only : t_startf, t_stopf use ncdio_pio ! ! !PUBLIC TYPES: @@ -189,6 +190,7 @@ subroutine initVertical(bounds, glc_behavior, thick_wall, thick_roof) integer :: jmin_bedrock character(len=*), parameter :: subname = 'initVertical' !------------------------------------------------------------------------ + call t_startf('initVertical') begc = bounds%begc; endc= bounds%endc begl = bounds%begl; endl= bounds%endl @@ -669,6 +671,8 @@ subroutine initVertical(bounds, glc_behavior, thick_wall, thick_roof) call ncd_pio_closefile(ncid) + call t_stopf('initVertical') + end subroutine initVertical !----------------------------------------------------------------------- From 4f7de29eb23fd0abcc6c5d970c15e80905362887 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 12:13:44 -0600 Subject: [PATCH 031/141] Add a timer around just the strdata_init --- src/cpl/share_esmf/UrbanTimeVarType.F90 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cpl/share_esmf/UrbanTimeVarType.F90 b/src/cpl/share_esmf/UrbanTimeVarType.F90 index cb30cfe02c..2870ae9b5c 100644 --- a/src/cpl/share_esmf/UrbanTimeVarType.F90 +++ b/src/cpl/share_esmf/UrbanTimeVarType.F90 @@ -198,6 +198,7 @@ subroutine urbantv_init(this, bounds, NLFilename) endif ! Initialize the cdeps data type this%sdat_urbantv + call t_startf("str_data_init") call shr_strdata_init_from_inline(this%sdat_urbantv, & my_task = iam, & logunit = iulog, & @@ -222,6 +223,8 @@ subroutine urbantv_init(this, bounds, NLFilename) if (ESMF_LogFoundError(rcToCheck=rc, msg=ESMF_LOGERR_PASSTHRU, line=__LINE__, file=__FILE__)) then call ESMF_Finalize(endflag=ESMF_END_ABORT) end if + call t_stopf("str_data_init") + call t_stopf("urbantv_init") end subroutine urbantv_init From 7f03d77dba888624d79b16085a417d1fdd37c052 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 12:31:18 -0600 Subject: [PATCH 032/141] Make an internal subroutine for deallocation inside of lnd_set_decomp_and_domain_from_readmesh --- .../share_esmf/lnd_set_decomp_and_domain.F90 | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index caaa9ca8d3..1de6ecddde 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -91,7 +91,6 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes real(r8) , pointer :: dataptr1d(:) real(r8) :: msize, mrss !------------------------------------------------------------------------------- - rc = ESMF_SUCCESS ! Write diag info @@ -271,9 +270,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes endif ! Deallocate local pointer memory - deallocate(gindex_lnd) - deallocate(gindex_ocn) - deallocate(gindex_ctsm) + call from_readmesh_dealloc() if(masterproc) then call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after deallocates') @@ -282,6 +279,20 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes call shr_sys_flush(iulog) endif + + !=============================================================================== + ! Internal subroutines for this subroutine + contains + !=============================================================================== + + subroutine from_readmesh_dealloc() + deallocate(gindex_lnd) + deallocate(gindex_ocn) + deallocate(gindex_ctsm) + end subroutine from_readmesh_dealloc + + !------------------------------------------------------------------------------- + end subroutine lnd_set_decomp_and_domain_from_readmesh !=============================================================================== From 02f894e4d4c8f9328318fe4fea49ef5c3a6a3cf1 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 12:46:34 -0600 Subject: [PATCH 033/141] Add release of the ESMF objects in the lnd_set_decomp_and_domain_from_readmesh subroutine --- .../share_esmf/lnd_set_decomp_and_domain.F90 | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 1de6ecddde..4e3714e125 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -269,8 +269,9 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes call shr_sys_flush(iulog) endif - ! Deallocate local pointer memory - call from_readmesh_dealloc() + ! Deallocate local pointer memory including ESMF objects + call from_readmesh_dealloc( rc ) + if (chkerr(rc,__LINE__,u_FILE_u)) return if(masterproc) then call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after deallocates') @@ -285,10 +286,29 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes contains !=============================================================================== - subroutine from_readmesh_dealloc() + subroutine from_readmesh_dealloc( rc ) + use ESMF, only : ESMF_FieldRedistRelease, ESMF_DistGridDestroy, ESMF_FieldDestroy, ESMF_MeshDestroy + integer, intent(out) :: rc ! ESMF return code to indicate deallocate was successful + + rc = ESMF_SUCCESS + deallocate(gindex_lnd) deallocate(gindex_ocn) deallocate(gindex_ctsm) + ! Destroy or release all of the ESMF objects + call ESMF_FieldRedistRelease( rhandle_lnd2ctsm, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_DistGridDestroy( distgrid_ctsm, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_FieldDestroy( field_lnd, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_FieldDestroy( field_ctsm, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_MeshDestroy( mesh_maskinput, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_MeshDestroy( mesh_lndinput, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + end subroutine from_readmesh_dealloc !------------------------------------------------------------------------------- From 57b04cdbc5b6ec54266a87af2c1c80bf14c92542 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 13:08:24 -0600 Subject: [PATCH 034/141] ESMF tells me that some of these objects are used later and can not be destroyed so remove the destroy for the distgrid, and the two meshes, this runs but doesn't seem to lower memory --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 4e3714e125..bb57b16ee6 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -290,6 +290,8 @@ subroutine from_readmesh_dealloc( rc ) use ESMF, only : ESMF_FieldRedistRelease, ESMF_DistGridDestroy, ESMF_FieldDestroy, ESMF_MeshDestroy integer, intent(out) :: rc ! ESMF return code to indicate deallocate was successful + logical :: no_esmf_garbage = .true. ! If .true. release all ESMF data (which can be problematic if referenced again) + rc = ESMF_SUCCESS deallocate(gindex_lnd) @@ -298,16 +300,16 @@ subroutine from_readmesh_dealloc( rc ) ! Destroy or release all of the ESMF objects call ESMF_FieldRedistRelease( rhandle_lnd2ctsm, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return - call ESMF_DistGridDestroy( distgrid_ctsm, rc=rc) - if (chkerr(rc,__LINE__,u_FILE_u)) return + !call ESMF_DistGridDestroy( distgrid_ctsm, rc=rc) + !if (chkerr(rc,__LINE__,u_FILE_u)) return call ESMF_FieldDestroy( field_lnd, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return call ESMF_FieldDestroy( field_ctsm, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return - call ESMF_MeshDestroy( mesh_maskinput, rc=rc) - if (chkerr(rc,__LINE__,u_FILE_u)) return - call ESMF_MeshDestroy( mesh_lndinput, rc=rc) - if (chkerr(rc,__LINE__,u_FILE_u)) return + !call ESMF_MeshDestroy( mesh_maskinput, rc=rc) + !if (chkerr(rc,__LINE__,u_FILE_u)) return + !call ESMF_MeshDestroy( mesh_lndinput, rc=rc) + !if (chkerr(rc,__LINE__,u_FILE_u)) return end subroutine from_readmesh_dealloc From 8cf101afea39160a3b032d2d9352b2b108929914 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 13:52:08 -0600 Subject: [PATCH 035/141] Turn on removing all ESMF garbage for the things deleted, and add note about leaving the distgrid around, and also delete the meshes as it seems to work with this in place --- .../share_esmf/lnd_set_decomp_and_domain.F90 | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index bb57b16ee6..6cbdde43f6 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -72,7 +72,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes ! local variables type(ESMF_Mesh) :: mesh_maskinput type(ESMF_Mesh) :: mesh_lndinput - type(ESMF_DistGrid) :: distgrid_ctsm + type(ESMF_DistGrid) :: distgrid_ctsm ! This appears to be local but is used later in lnd_import_export type(ESMF_Field) :: field_lnd type(ESMF_Field) :: field_ctsm type(ESMF_RouteHandle) :: rhandle_lnd2ctsm @@ -203,6 +203,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes end do ! Generate a new mesh on the gindex decomposition + ! NOTE: The distgrid_ctsm will be used later in lnd_import_export, even though it appears to just be local call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition') distGrid_ctsm = ESMF_DistGridCreate(arbSeqIndexList=gindex_ctsm, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -298,18 +299,22 @@ subroutine from_readmesh_dealloc( rc ) deallocate(gindex_ocn) deallocate(gindex_ctsm) ! Destroy or release all of the ESMF objects - call ESMF_FieldRedistRelease( rhandle_lnd2ctsm, rc=rc) + call ESMF_FieldRedistRelease( rhandle_lnd2ctsm, noGarbage=no_esmf_garbage, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return + !-------------------------------------------------------------------------- + ! NOTE: We can't destroy the distgrid -- because it will be used later + ! As such we don't do the following... EBK 08/01/2025 !call ESMF_DistGridDestroy( distgrid_ctsm, rc=rc) !if (chkerr(rc,__LINE__,u_FILE_u)) return - call ESMF_FieldDestroy( field_lnd, rc=rc) + !-------------------------------------------------------------------------- + call ESMF_FieldDestroy( field_lnd, noGarbage=no_esmf_garbage, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return - call ESMF_FieldDestroy( field_ctsm, rc=rc) + call ESMF_FieldDestroy( field_ctsm, noGarbage=no_esmf_garbage, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_MeshDestroy( mesh_maskinput, noGarbage=no_esmf_garbage, rc=rc) + if (chkerr(rc,__LINE__,u_FILE_u)) return + call ESMF_MeshDestroy( mesh_lndinput, noGarbage=no_esmf_garbage, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return - !call ESMF_MeshDestroy( mesh_maskinput, rc=rc) - !if (chkerr(rc,__LINE__,u_FILE_u)) return - !call ESMF_MeshDestroy( mesh_lndinput, rc=rc) - !if (chkerr(rc,__LINE__,u_FILE_u)) return end subroutine from_readmesh_dealloc From 47dbc1beb2aedafea9b21bb8e0e9ce90880628dd Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 16:15:27 -0600 Subject: [PATCH 036/141] Remove the mpasa15 test from expected fails --- cime_config/testdefs/ExpectedTestFails.xml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cime_config/testdefs/ExpectedTestFails.xml b/cime_config/testdefs/ExpectedTestFails.xml index ac35ad812e..51f85ba8f3 100644 --- a/cime_config/testdefs/ExpectedTestFails.xml +++ b/cime_config/testdefs/ExpectedTestFails.xml @@ -363,11 +363,4 @@ - - - FAIL - #3316 - - - From 5bc62fa872fde0e476c1afc82c216d43e6b619cc Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 11 Jul 2025 14:33:32 -0600 Subject: [PATCH 037/141] Turn off restarts and history and add some timer options as well as turning off ncdio_pio testing for the purposes of decompInit work --- .../testmods_dirs/clm/run_self_tests/shell_commands | 9 +++++++++ .../testmods_dirs/clm/run_self_tests/user_nl_clm | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index d1efa2a23c..ca2a393757 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -12,5 +12,14 @@ ./xmlchange CLM_CONFIGURATION="nwp" # Restarts aren't allowed for these tests, and turn off CPL history +# First change in env_test.xml, then in the standard one so it won't complain there +./xmlchange --force REST_OPTION="never" --file env_test.xml +./xmlchange --force HIST_OPTION="never" --file env_test.xml ./xmlchange REST_OPTION="never" ./xmlchange HIST_OPTION="never" + +# Timer settings +./xmlchange TIMER_DETAIL="2" +./xmlchange SAVE_TIMING="TRUE" +./xmlchange CHECK_TIMING="TRUE" +./xmlchange ESMF_PROFILING_LEVEL="10" diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 3edb8c7fc7..4430f92e10 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -1,4 +1,4 @@ -for_testing_run_ncdiopio_tests = .true. +for_testing_run_ncdiopio_tests = .false. for_testing_run_decomp_init_tests = .true. for_testing_exit_after_self_tests = .true. From f1a42771536ba4a80437670b9e349d9a28b655f1 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 11:41:32 -0600 Subject: [PATCH 038/141] Turn off RTM rather than increase the ROF coupling frequency --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index ca2a393757..143acc98e7 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -1,8 +1,8 @@ #!/bin/bash ./xmlchange CLM_FORCE_COLDSTART="on" -# We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to same frequency as DATM -./xmlchange ROF_NCPL='$ATM_NCPL' +# Turn off ROF model when used with compsets that have them +./xmlchange ROF_MODE='NULL' # Turn MEGAN off to run faster ./xmlchange CLM_BLDNML_OPTS='--no-megan' --append From 2b9746b828802a38894b226dd495cb26e50a2ab3 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 13:29:15 -0600 Subject: [PATCH 039/141] Still need to set NCPL_ROF --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index 143acc98e7..2cb24928b5 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -1,6 +1,9 @@ #!/bin/bash ./xmlchange CLM_FORCE_COLDSTART="on" +# We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to same frequency as DATM +./xmlchange ROF_NCPL='$ATM_NCPL + # Turn off ROF model when used with compsets that have them ./xmlchange ROF_MODE='NULL' From 60425cd6ea074b901f7849223ecc02cfc672e42b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 13:54:10 -0600 Subject: [PATCH 040/141] Fix syntax error --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index 2cb24928b5..f3144c219d 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -2,7 +2,7 @@ ./xmlchange CLM_FORCE_COLDSTART="on" # We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to same frequency as DATM -./xmlchange ROF_NCPL='$ATM_NCPL +./xmlchange ROF_NCPL='$ATM_NCPL' # Turn off ROF model when used with compsets that have them ./xmlchange ROF_MODE='NULL' From ccdd13ceaa9a8ffa1d0d28a51468b7ebc079b7db Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 17:55:14 -0600 Subject: [PATCH 041/141] Fix XML name for RTM_MODE --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index f3144c219d..0d8a5d36e1 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -5,7 +5,7 @@ ./xmlchange ROF_NCPL='$ATM_NCPL' # Turn off ROF model when used with compsets that have them -./xmlchange ROF_MODE='NULL' +./xmlchange RTM_MODE='NULL' # Turn MEGAN off to run faster ./xmlchange CLM_BLDNML_OPTS='--no-megan' --append From 9908a81e357420a06ef855f5495c4c9ae4e363da Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 17:55:14 -0600 Subject: [PATCH 042/141] Fix XML name for RTM_MODE --- .../testdefs/testmods_dirs/clm/run_self_tests/shell_commands | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands index f3144c219d..0d8a5d36e1 100755 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -5,7 +5,7 @@ ./xmlchange ROF_NCPL='$ATM_NCPL' # Turn off ROF model when used with compsets that have them -./xmlchange ROF_MODE='NULL' +./xmlchange RTM_MODE='NULL' # Turn MEGAN off to run faster ./xmlchange CLM_BLDNML_OPTS='--no-megan' --append From 8fc95c19ac7aaac7d053e80cb3ce43b00be24c48 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 30 Jul 2025 11:42:24 -0600 Subject: [PATCH 043/141] Turn off urban HAC completely and minimize urban in gridcells --- .../testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 4430f92e10..499770f153 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -4,4 +4,6 @@ for_testing_exit_after_self_tests = .true. ! Turn off history, restarts, and output hist_empty_htapes = .true. -use_noio = .true. \ No newline at end of file +use_noio = .true. +urban_hac = 'OFF' +toosmall_urban = 98.0d00 ! Minimize urban in gridcells From 19fb93df06670638d6049295cd14e2eaa7e4bf4e Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 1 Aug 2025 18:08:41 -0600 Subject: [PATCH 044/141] Keep all self tests on --- .../testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 499770f153..b7a298b23d 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -1,4 +1,4 @@ -for_testing_run_ncdiopio_tests = .false. +for_testing_run_ncdiopio_tests = .true. for_testing_run_decomp_init_tests = .true. for_testing_exit_after_self_tests = .true. From 03722cdcb31aeb61c8e01f9f38cc22486472e50b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 4 Aug 2025 15:07:30 -0600 Subject: [PATCH 045/141] Call shr_malloc_trim so that memory is released by the OS after the dealloc happens, this shows the dealloc actually releasing memory now --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 6cbdde43f6..01580db90d 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -56,7 +56,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes use decompMod , only : gindex_global, bounds_type, get_proc_bounds use clm_varpar , only : nlevsoi use clm_varctl , only : use_soil_moisture_streams - use proc_status_vm, only : prt_vm_status + use proc_status_vm, only : prt_vm_status, shr_malloc_trim use shr_mem_mod , only : shr_mem_getusage use shr_sys_mod , only : shr_sys_flush @@ -273,6 +273,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes ! Deallocate local pointer memory including ESMF objects call from_readmesh_dealloc( rc ) if (chkerr(rc,__LINE__,u_FILE_u)) return + call shr_malloc_trim() if(masterproc) then call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after deallocates') From 44839449a103105f781fcc43f6791c5f4805eba9 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 4 Aug 2025 16:42:04 -0600 Subject: [PATCH 046/141] Change memory check to a utility function to call that does everything in one place --- .../share_esmf/lnd_set_decomp_and_domain.F90 | 27 +++------------- src/main/decompInitMod.F90 | 30 ++++++----------- src/utils/ctsm_memcheck.F90 | 32 +++++++++++++++++++ 3 files changed, 45 insertions(+), 44 deletions(-) create mode 100644 src/utils/ctsm_memcheck.F90 diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 01580db90d..aed472c08d 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -21,6 +21,7 @@ module lnd_set_decomp_and_domain use clm_varctl , only : iulog, inst_suffix, FL => fname_len use abortutils , only : endrun use perf_mod , only : t_startf, t_stopf + use ctsm_memcheck, only : memcheck implicit none private ! except @@ -56,9 +57,6 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes use decompMod , only : gindex_global, bounds_type, get_proc_bounds use clm_varpar , only : nlevsoi use clm_varctl , only : use_soil_moisture_streams - use proc_status_vm, only : prt_vm_status, shr_malloc_trim - use shr_mem_mod , only : shr_mem_getusage - use shr_sys_mod , only : shr_sys_flush ! input/output variables character(len=*) , intent(in) :: driver ! cmeps or lilac @@ -106,12 +104,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes write(iulog,*) end if - if(masterproc) then - call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: before allocate') - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) - endif + call memcheck('lnd_set_decomp_and_domain_from_readmesh: before allocate') ! Determine global 2d sizes from read of dimensions of surface dataset and allocate global memory call lnd_get_global_dims(ni, nj, gsize, isgrid2d) @@ -263,25 +256,13 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes end if call t_stopf('lnd_set_decomp_and_domain_from_readmesh: land frac') - if(masterproc) then - call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: just before deallocates') - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) - endif + call memcheck('lnd_set_decomp_and_domain_from_readmesh: just before deallocate') ! Deallocate local pointer memory including ESMF objects call from_readmesh_dealloc( rc ) if (chkerr(rc,__LINE__,u_FILE_u)) return - call shr_malloc_trim() - - if(masterproc) then - call prt_vm_status('CTSM: lnd_set_decomp_and_domain_from_readmesh: after deallocates') - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) - endif + call memcheck('lnd_set_decomp_and_domain_from_readmesh: after deallocate') !=============================================================================== ! Internal subroutines for this subroutine diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 187d074d63..c10e1db1e9 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -12,8 +12,7 @@ module decompInitMod use spmdMod , only : masterproc, iam, npes, mpicom use abortutils , only : endrun use clm_varctl , only : iulog - use proc_status_vm, only : prt_vm_status - use shr_mem_mod , only : shr_mem_init, shr_mem_getusage + use ctsm_memcheck, only : memcheck ! implicit none private @@ -78,13 +77,7 @@ subroutine decompInit_lnd(lni, lnj, amask) real(r8) :: msize, mrss !------------------------------------------------------------------------------ - if(masterproc) then - call shr_mem_init(prt=.true.) ! initialize memory tracking - call prt_vm_status('CTSM: decompInit_lnd: before') - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) - endif + call memcheck('decompInit_lnd: before allocate') lns = lni * lnj @@ -303,16 +296,13 @@ subroutine decompInit_lnd(lni, lnj, amask) gindex_global(n-procinfo%begg+1) = gdc2glo(n) enddo - if(masterproc) then - call prt_vm_status('CTSM: decompInit_lnd: afterwards before deallocate') - call shr_mem_getusage( msize, mrss, prt=.true.) - write(iulog,*) 'msize, mrss = ',msize, mrss - call shr_sys_flush(iulog) - endif + call memcheck('decompInit_lnd: after allocate before deallocate') deallocate(clumpcnt) deallocate(gdc2glo) + call memcheck('decompInit_lnd: after allocate after deallocate') + ! Diagnostic output if (masterproc) then write(iulog,*)' Surface Grid Characteristics' @@ -368,9 +358,7 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) character(len=32), parameter :: subname = 'decompInit_clumps' !------------------------------------------------------------------------------ - if(masterproc) then - call prt_vm_status('CTSM: decompInit_clumps: before') - endif + call memcheck('decompInit_clumps: before alloc') !--- assign gridcells to clumps (and thus pes) --- call get_proc_bounds(bounds) begg = bounds%begg; endg = bounds%endg @@ -493,13 +481,13 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) endif enddo - if(masterproc) then - call prt_vm_status('CTSM: decompInit_clumps: after before deallocate') - endif + call memcheck('decompInit_clumps: before deallocate') deallocate(allvecg,allvecl) deallocate(lcid) + call memcheck('decompInit_clumps: after deallocate') + ! Diagnostic output call get_proc_global(ng=numg, nl=numl, nc=numc, np=nump, nCohorts=numCohort) diff --git a/src/utils/ctsm_memcheck.F90 b/src/utils/ctsm_memcheck.F90 new file mode 100644 index 0000000000..e477c89678 --- /dev/null +++ b/src/utils/ctsm_memcheck.F90 @@ -0,0 +1,32 @@ +module ctsm_memcheck + + use shr_kind_mod, only: r8 => shr_kind_r8 + use clm_varctl, only : iulog + use spmdMod, only : masterproc + implicit none + private + + public :: memcheck +contains + + subroutine memcheck(msg) + use proc_status_vm, only : prt_vm_status, shr_malloc_trim + use shr_mem_mod, only : shr_mem_getusage + use shr_sys_mod, only : shr_sys_flush + character(len=*), intent(in) :: msg + + real(r8) :: msize, mrss ! Memory size and resident set size + + call shr_malloc_trim() ! Make sure the OS trims the memory in response to deallocates + + ! Only output memory on main task as memory usage should be similar between tasks + if (masterproc) then + call prt_vm_status('CTSM(Memory check): ' // trim(msg)) + call shr_mem_getusage( msize, mrss, prt=.true.) + write(iulog,*) ' msize, mrss = ',msize, mrss + call shr_sys_flush(iulog) + end if + + end subroutine memcheck + +end module ctsm_memcheck \ No newline at end of file From 8cd61a519c4b60aaf61ddd0f1f0cc8f7d23b5340 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 4 Aug 2025 19:03:29 -0600 Subject: [PATCH 047/141] Clarify memory check logging --- src/main/decompInitMod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index c10e1db1e9..9da8c59284 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -301,7 +301,7 @@ subroutine decompInit_lnd(lni, lnj, amask) deallocate(clumpcnt) deallocate(gdc2glo) - call memcheck('decompInit_lnd: after allocate after deallocate') + call memcheck('decompInit_lnd: after deallocate') ! Diagnostic output if (masterproc) then From e930f1d1d866f124154f9be43318e04abdb35bea Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 4 Aug 2025 19:12:48 -0600 Subject: [PATCH 048/141] Deallocate lndfrac_loc_input --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index aed472c08d..c637280c4c 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -277,6 +277,7 @@ subroutine from_readmesh_dealloc( rc ) rc = ESMF_SUCCESS + deallocate(lndfrac_loc_input) deallocate(gindex_lnd) deallocate(gindex_ocn) deallocate(gindex_ctsm) From 7b1d3ea8aa8654f0b58f35b4c80771fa0d341a2f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 6 Aug 2025 10:05:07 -0600 Subject: [PATCH 049/141] Add timers --- src/main/decompInitMod.F90 | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 9da8c59284..e27edf47a6 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -13,6 +13,7 @@ module decompInitMod use abortutils , only : endrun use clm_varctl , only : iulog use ctsm_memcheck, only : memcheck + use perf_mod , only : t_startf, t_stopf ! implicit none private @@ -76,6 +77,7 @@ subroutine decompInit_lnd(lni, lnj, amask) type(bounds_type) :: bounds ! contains subgrid bounds data real(r8) :: msize, mrss !------------------------------------------------------------------------------ + call t_startf('decompInit_lnd') call memcheck('decompInit_lnd: before allocate') @@ -314,6 +316,7 @@ subroutine decompInit_lnd(lni, lnj, amask) write(iulog,*) end if call shr_sys_flush(iulog) + call t_stopf('decompInit_lnd') end subroutine decompInit_lnd @@ -358,6 +361,7 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) character(len=32), parameter :: subname = 'decompInit_clumps' !------------------------------------------------------------------------------ + call t_startf('decompInit_clumps') call memcheck('decompInit_clumps: before alloc') !--- assign gridcells to clumps (and thus pes) --- call get_proc_bounds(bounds) @@ -580,6 +584,7 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) call shr_sys_flush(iulog) call mpi_barrier(mpicom,ier) end do + call t_stopf('decompInit_clumps') end subroutine decompInit_clumps @@ -639,6 +644,7 @@ subroutine decompInit_glcp(lni,lnj,glc_behavior) integer :: gsize Character(len=32), parameter :: subname = 'decompInit_glcp' !------------------------------------------------------------------------------ + call t_startf('decompInit_glcp') ! Get processor bounds @@ -865,6 +871,8 @@ subroutine decompInit_glcp(lni,lnj,glc_behavior) deallocate(start_global) if (allocated(index_lndgridcells)) deallocate(index_lndgridcells) + call t_stopf('decompInit_glcp') + end subroutine decompInit_glcp end module decompInitMod From 623754254ad94834096fb5fa5991051ffccd23f5 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 6 Aug 2025 10:37:18 -0600 Subject: [PATCH 050/141] Add more timers --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index c637280c4c..9954027902 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -121,8 +121,10 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes ! This will get added to the ESMF PET files if DEBUG=TRUE and CREATE_ESMF_PET_FILES=TRUE call ESMF_VMLogMemInfo("clm: Before lnd mesh create in ") #endif + call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF_MeshCreate') mesh_maskinput = ESMF_MeshCreate(filename=trim(meshfile_mask), fileformat=ESMF_FILEFORMAT_ESMFMESH, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return + call t_stopf('lnd_set_decomp_and_domain_from_readmesh: ESMF_MeshCreate') #ifdef DEBUG ! This will get added to the ESMF PET files if DEBUG=TRUE and CREATE_ESMF_PET_FILES=TRUE call ESMF_VMLogMemInfo("clm: After lnd mesh create in ") @@ -148,6 +150,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes call shr_sys_abort('driver '//trim(driver)//' is not supported, must be lilac or cmeps') end if call t_stopf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh') + call t_startf ('lnd_set_decomp_and_domain_from_readmesh: decomp_init') ! Determine lnd decomposition that will be used by ctsm from lndmask_glob call decompInit_lnd(lni=ni, lnj=nj, amask=lndmask_glob) @@ -194,6 +197,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes gindex_ctsm(n) = gindex_ocn(n-nlnd) end if end do + call t_stopf ('lnd_set_decomp_and_domain_from_readmesh: decomp_init') ! Generate a new mesh on the gindex decomposition ! NOTE: The distgrid_ctsm will be used later in lnd_import_export, even though it appears to just be local @@ -508,6 +512,7 @@ subroutine lnd_set_lndmask_from_maskmesh(mesh_lnd, mesh_mask, vm, gsize, lndmask character(len=CL) :: flandfrac_status !------------------------------------------------------------------------------- + call t_startf('lnd_set_lndmask_from_maskmesh') rc = ESMF_SUCCESS flandfrac = './init_generated_files/ctsm_landfrac'//trim(inst_suffix)//'.nc' @@ -546,12 +551,14 @@ subroutine lnd_set_lndmask_from_maskmesh(mesh_lnd, mesh_mask, vm, gsize, lndmask if (ChkErr(rc,__LINE__,u_FILE_u)) return ! create route handle to map ocean mask from mask mesh to land mesh + call t_startf('lnd_set_lndmask_from_maskmesh::ESMF_FieldRegridStore') call ESMF_FieldRegridStore(field_mask, field_lnd, routehandle=rhandle_mask2lnd, & srcMaskValues=(/srcMaskValue/), dstMaskValues=(/dstMaskValue/), & regridmethod=ESMF_REGRIDMETHOD_CONSERVE, normType=ESMF_NORMTYPE_DSTAREA, & srcTermProcessing=srcTermProcessing_Value, & ignoreDegenerate=.true., unmappedaction=ESMF_UNMAPPEDACTION_IGNORE, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return + call t_stopf('lnd_set_lndmask_from_maskmesh::ESMF_FieldRegridStore') ! fill in values for field_mask with mask on mask mesh call ESMF_MeshGet(mesh_mask, elementdistGrid=distgrid_mask, rc=rc) @@ -567,9 +574,11 @@ subroutine lnd_set_lndmask_from_maskmesh(mesh_lnd, mesh_mask, vm, gsize, lndmask dataptr1d(:) = maskmask_loc(:) ! map mask mask to land mesh + call t_startf('lnd_set_lndmask_from_maskmesh::ESMF_FieldRegrid') call ESMF_FieldRegrid(field_mask, field_lnd, routehandle=rhandle_mask2lnd, & termorderflag=ESMF_TERMORDER_SRCSEQ, checkflag=checkflag, zeroregion=ESMF_REGION_TOTAL, rc=rc) if (chkerr(rc,__LINE__,u_FILE_u)) return + call t_stopf('lnd_set_lndmask_from_maskmesh::ESMF_FieldRegrid') call ESMF_MeshGet(mesh_lnd, spatialDim=spatialDim, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -597,17 +606,20 @@ subroutine lnd_set_lndmask_from_maskmesh(mesh_lnd, mesh_mask, vm, gsize, lndmask do n = 1,lsize_lnd lndmask_glob(gindex_input(n)) = lndmask_loc(n) end do + call t_startf('lnd_set_lndmask_from_maskmesh::ESMF_VMAllReduce') allocate(itemp_glob(gsize)) call ESMF_VMAllReduce(vm, sendData=lndmask_glob, recvData=itemp_glob, count=gsize, & reduceflag=ESMF_REDUCE_SUM, rc=rc) lndmask_glob(:) = int(itemp_glob(:)) deallocate(itemp_glob) + call t_stopf('lnd_set_lndmask_from_maskmesh::ESMF_VMAllReduce') ! deallocate memory deallocate(maskmask_loc) deallocate(lndmask_loc) end if + call t_stopf('lnd_set_lndmask_from_maskmesh') end subroutine lnd_set_lndmask_from_maskmesh From 7332c8968ae773e3e3937768456843e85a8c1aa2 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 6 Aug 2025 11:06:07 -0600 Subject: [PATCH 051/141] Update cdeps with more timers --- .gitmodules | 7 +++++-- components/cdeps | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index cd4a79fb45..adc92dfd8f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -91,8 +91,11 @@ fxDONOTUSEurl = https://github.com/ESCOMP/CMEPS.git [submodule "cdeps"] path = components/cdeps -url = https://github.com/ESCOMP/CDEPS.git -fxtag = cdeps1.0.79 +#url = https://github.com/ESCOMP/CDEPS.git +url = https://github.com/ekluzek/CDEPS.git +#fxtag = cdeps1.0.79 +#fxtag = add_more_timers +fxtag = 65f7bb0c88420d58a080f9fcc8afbd945c90211a fxrequired = ToplevelRequired # Standard Fork to compare to with "git fleximod test" to ensure personal forks aren't committed fxDONOTUSEurl = https://github.com/ESCOMP/CDEPS.git diff --git a/components/cdeps b/components/cdeps index b65f283437..65f7bb0c88 160000 --- a/components/cdeps +++ b/components/cdeps @@ -1 +1 @@ -Subproject commit b65f28343708789f75a0f422a2fb6bc02036474e +Subproject commit 65f7bb0c88420d58a080f9fcc8afbd945c90211a From 44a3b4500ed86c8b9c797488089582ab7a4914d0 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 11 Aug 2025 09:06:36 -0600 Subject: [PATCH 052/141] Add asserts for scalars and also text scalars --- src/self_tests/Assertions.F90.in | 81 ++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/self_tests/Assertions.F90.in b/src/self_tests/Assertions.F90.in index 2a4c8cccc6..4a86929a8a 100644 --- a/src/self_tests/Assertions.F90.in +++ b/src/self_tests/Assertions.F90.in @@ -17,6 +17,12 @@ module Assertions public :: assert_equal interface assert_equal + !TYPE double,int,logical + module procedure assert_equal_0d_{TYPE} + + !TYPE text + module procedure assert_equal_0d_{TYPE} + !TYPE double,int,logical module procedure assert_equal_1d_{TYPE} @@ -30,6 +36,8 @@ module Assertions interface vals_are_equal !TYPE double,int,logical module procedure vals_are_equal_{TYPE} + !TYPE text + module procedure vals_are_equal_{TYPE} end interface vals_are_equal contains @@ -75,6 +83,60 @@ contains end subroutine assert_equal_1d_{TYPE} + !----------------------------------------------------------------------- + !TYPE double,int,logical + subroutine assert_equal_0d_{TYPE}(expected, actual, msg, abs_tol) + ! + ! !DESCRIPTION: + ! Assert scalar values are equal + ! + ! !ARGUMENTS: + {VTYPE}, intent(in) :: expected + {VTYPE}, intent(in) :: actual + character(len=*), intent(in) :: msg + + ! absolute tolerance; if not specified, require exact equality; ignored for logicals + real(r8), intent(in), optional :: abs_tol + ! + ! !LOCAL VARIABLES: + integer :: i + + character(len=*), parameter :: subname = 'assert_equal_0d_{TYPE}' + !----------------------------------------------------------------------- + + if (.not. vals_are_equal(actual, expected, abs_tol)) then + write(iulog,*) 'ERROR in assert_equal: ', msg + write(iulog,*) 'Actual : ', actual + write(iulog,*) 'Expected: ', expected + call endrun('ERROR in assert_equal') + end if + + end subroutine assert_equal_0d_{TYPE} + + !----------------------------------------------------------------------- + !TYPE text + subroutine assert_equal_0d_{TYPE}(expected, actual, msg) + ! + ! !DESCRIPTION: + ! Assert scalar values are equal + ! + ! !ARGUMENTS: + {VTYPE}, intent(in) :: expected + {VTYPE}, intent(in) :: actual + character(len=*), intent(in) :: msg + ! + ! !LOCAL VARIABLES: + !----------------------------------------------------------------------- + + if (.not. vals_are_equal(actual, expected)) then + write(iulog,*) 'ERROR in assert_equal: ', msg + write(iulog,*) 'Actual : ', actual + write(iulog,*) 'Expected: ', expected + call endrun('ERROR in assert_equal') + end if + + end subroutine assert_equal_0d_{TYPE} + !----------------------------------------------------------------------- !TYPE double,int,logical subroutine assert_equal_2d_{TYPE}(expected, actual, msg, abs_tol) @@ -198,4 +260,23 @@ contains end function vals_are_equal_{TYPE} + !----------------------------------------------------------------------- + !TYPE text + function vals_are_equal_{TYPE}(actual, expected) result(vals_equal) + ! + ! !DESCRIPTION: + ! Returns true if actual is the same as expected, false otherwise + ! + ! !ARGUMENTS: + logical :: vals_equal ! function result + {VTYPE}, intent(in) :: actual + {VTYPE}, intent(in) :: expected + ! + ! !LOCAL VARIABLES: + !----------------------------------------------------------------------- + + vals_equal = actual == expected + + end function vals_are_equal_{TYPE} + end module Assertions From 82964a0a863d3ccb93ca5d93bfa6a205aba4f477 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 11 Aug 2025 09:07:23 -0600 Subject: [PATCH 053/141] Add some interfaces and handling for self tests --- src/main/abortutils.F90 | 88 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 4 deletions(-) diff --git a/src/main/abortutils.F90 b/src/main/abortutils.F90 index c93fd761bf..01f1a22c5f 100644 --- a/src/main/abortutils.F90 +++ b/src/main/abortutils.F90 @@ -10,22 +10,74 @@ module abortutils ! in conjunction with aborting the model, or at least issuing a warning. !----------------------------------------------------------------------- + use shr_kind_mod, only: CX => shr_kind_cx + use shr_log_mod, only: shr_log_error implicit none private public :: endrun ! Abort the model for abnormal termination public :: write_point_context ! Write context for the given index, including global index information and more + ! Some interfaces for self-test work + public :: endrun_init ! Set up how endrun will behave (used for self-tests) + public :: get_last_endrun_msg ! Return the last endrun message interface endrun module procedure endrun_vanilla module procedure endrun_write_point_context end interface + ! These two are to enable self tests to have endrun calls that do not abort +#ifdef DEBUG + logical :: abort_on_endrun = .true. ! Whether to abort the model on endrun; set to .false. for self-tests + character(len=CX) :: save_msg = 'none' ! string to save from last endrun call +#endif + character(len=*), parameter, private :: sourcefile = & __FILE__ contains + !----------------------------------------------------------------------- + subroutine endrun_init( for_testing_do_not_abort ) + logical , intent(in) :: for_testing_do_not_abort +#ifdef DEBUG + if ( for_testing_do_not_abort )then + save_msg = 'none' ! Reset the saved message + abort_on_endrun = .false. + else + abort_on_endrun = .true. + end if +#else + call shr_log_error( 'ENDRUN: ', errmsg(__FILE__, __LINE__) ) + call endrun( msg='endrun_init called without DEBUG mode, which is not allowed' ) +#endif + end subroutine endrun_init + + !----------------------------------------------------------------------- + function get_last_endrun_msg() + ! + ! !DESCRIPTION: + ! Gives the last message saved from an endrun call that didn't + ! abort due to being in the context of self-tests + ! + ! !USES: + ! + ! !ARGUMENTS: + character(len=:), allocatable :: get_last_endrun_msg ! function result + !----------------------------------------------------------------------- + +#ifdef DEBUG + if (save_msg == 'none') then + call shr_log_error( 'An endrun call was expected, but has not been made yet' ) + end if + get_last_endrun_msg = trim(save_msg) +#else + call shr_log_error( 'ENDRUN: ', errmsg(__FILE__, __LINE__) ) + call endrun( msg='get_last_endrun_msg called without DEBUG mode, which is not allowed' ) +#endif + + end function get_last_endrun_msg + !----------------------------------------------------------------------- subroutine endrun_vanilla(msg, additional_msg) @@ -33,8 +85,10 @@ subroutine endrun_vanilla(msg, additional_msg) ! !DESCRIPTION: ! Abort the model for abnormal termination ! - use shr_sys_mod , only: shr_sys_abort - use clm_varctl , only: iulog + use shr_sys_mod, only: shr_sys_abort + use shr_abort_mod, only: shr_abort_abort + use clm_varctl, only: iulog + use ESMF, only : ESMF_Finalize, ESMF_END_ABORT ! ! !ARGUMENTS: ! Generally you want to at least provide msg. The main reason to separate msg from @@ -45,14 +99,35 @@ subroutine endrun_vanilla(msg, additional_msg) character(len=*), intent(in), optional :: msg ! string to be passed to shr_sys_abort character(len=*), intent(in), optional :: additional_msg ! string to be printed, but not passed to shr_sys_abort !----------------------------------------------------------------------- + character(len=CX) :: abort_msg if (present (additional_msg)) then + call shr_log_error( 'ENDRUN: '// trim(additional_msg) ) write(iulog,*)'ENDRUN: ', trim(additional_msg) else write(iulog,*)'ENDRUN:' end if - call shr_sys_abort(msg) +#ifdef DEBUG + if (.not. abort_on_endrun) then + if (save_msg /= 'none') then + abort_msg = 'a previous error was already logged and now a second one is being, done so fully aborting now' + abort_msg = trim(abort_msg) // ' (Call end_run_init after endrun calls to reset this)' + call shr_abort_abort(abort_msg) + end if + ! Just save msg, finalize ESMF and return + save_msg = trim(msg) + if (present (additional_msg)) then + save_msg = trim(msg)//trim(additional_msg) + call shr_log_error( 'ENDRUN: '// trim(additional_msg) ) + end if + call ESMF_Finalize(endflag=ESMF_END_ABORT) + else +#endif + call shr_abort_abort(msg) +#ifdef DEBUG + end if +#endif end subroutine endrun_vanilla @@ -66,6 +141,7 @@ subroutine endrun_write_point_context(subgrid_index, subgrid_level, msg, additio ! This version also prints additional information about the point causing the error. ! use shr_sys_mod , only: shr_sys_abort + use shr_abort_mod, only: shr_abort_abort use clm_varctl , only: iulog use decompMod , only: subgrid_level_unspecified ! @@ -95,7 +171,11 @@ subroutine endrun_write_point_context(subgrid_index, subgrid_level, msg, additio write(iulog,*)'ENDRUN:' end if - call shr_sys_abort(msg) + if (abort_on_endrun) then + call shr_sys_abort(msg) + else + call shr_abort_abort(msg) + end if end subroutine endrun_write_point_context From 3e939685903e17271a826311335d9f1ac803d96f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 11 Aug 2025 09:08:52 -0600 Subject: [PATCH 054/141] Start adding tests for DecompInit --- src/self_tests/SelfTestDriver.F90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index dd44a185df..6a55cec5fc 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -33,6 +33,7 @@ subroutine self_test_driver(bounds) use clm_varctl, only : for_testing_exit_after_self_tests, iulog use decompMod, only : bounds_type use TestNcdioPio, only : test_ncdio_pio + use TestDecompInit, only : test_decomp_init use ESMF, only : ESMF_LogWrite, ESMF_LOGMSG_INFO, ESMF_Finalize use shr_sys_mod, only : shr_sys_flush use spmdMod, only : masterproc @@ -51,6 +52,7 @@ subroutine self_test_driver(bounds) end if if (for_testing_run_decomp_init_tests) then ntests = ntests + 1 + call test_decomp_init() end if if (for_testing_exit_after_self_tests) then ! Print out some messaging if we are exiting after self tests. From da2fe56f22cc4decb2062f11ab43fc223a9fc77c Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 11 Aug 2025 09:09:11 -0600 Subject: [PATCH 055/141] Start adding tests for DecompInit --- src/self_tests/TestDecompInit.F90 | 126 ++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 src/self_tests/TestDecompInit.F90 diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 new file mode 100644 index 0000000000..f77ce24bbe --- /dev/null +++ b/src/self_tests/TestDecompInit.F90 @@ -0,0 +1,126 @@ +module TestDecompInit + + ! ------------------------------------------------------------------------ + ! !DESCRIPTION: + ! This module contains tests of decomp_init + +#include "shr_assert.h" + use shr_kind_mod, only : r8 => shr_kind_r8, CX => shr_kind_cx + use Assertions, only : assert_equal + use abortutils, only : endrun, endrun_init, get_last_endrun_msg + use spmdMod, only : masterproc, npes + use decompInitMod, only : decompInit_lnd, clump_pproc + use decompMod + + implicit none + private + save + + ! Public routines + + public :: test_decomp_init + + ! Module data used in various tests + + character(len=*), parameter, private :: sourcefile = & + __FILE__ + +contains + + !----------------------------------------------------------------------- + subroutine test_decomp_init() + ! + ! !DESCRIPTION: + ! Drive tests of decomp_init + ! + ! NOTE(wjs, 2020-10-15) Currently, endrun is called when any test assertion fails. I + ! thought about changing this so that, instead, a counter is incremented for each + ! failure, then at the end of the testing (in the higher-level self-test driver), + ! endrun is called if this counter is greater than 0. The benefit of this is that we'd + ! see all test failures, not just the first failure. To do that, we'd need to change + ! the assertions here to increment a counter rather than aborting. However, I'm not + ! spending the time to make this change for now because (1) I'm not sure how much + ! value we'd get from it; (2) even if we made that change, it's still very possible + ! for test code to abort for reasons other than assertions, if something goes wrong + ! inside decomp_init or pio; and (3) some tests here are dependent on earlier tests (for + ! example, the reads depend on the writes having worked), so a failure in an early + ! phase could really muck things up for later testing phases. Migrating to a + ! pFUnit-based unit test would solve this problem, since each pFUnit test is + ! independent, though would prevent us from being able to have dependent tests the + ! way we do here (where reads depend on earlier writes), for better or for worse. + ! + ! !ARGUMENTS: + ! + ! !LOCAL VARIABLES: + !----------------------------------------------------------------------- + + call write_to_log('start_test_decomp_init') + + call test_check_nclumps() + call test_decompInit_lnd_abort_on_bad_clump_pproc() + + call clean + + end subroutine test_decomp_init + + !----------------------------------------------------------------------- + subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() + integer, parameter :: ni = 300, nj = 500 + integer :: amask(ni*nj) + character(len=CX) :: expected_msg, actual_msg + + clump_pproc = 0 + call decompInit_lnd( ni, nj, amask ) + expected_msg = 'clump_pproc= 0 must be greater than 0' + actual_msg = get_last_endrun_msg() + call assert_equal( & + expected=expected_msg, actual=actual_msg, & + msg='decompInit_lnd did not abort with clump_pproc=0' ) + end subroutine test_decompInit_lnd_abort_on_bad_clump_pproc + + !----------------------------------------------------------------------- + subroutine test_check_nclumps() + integer :: expected_nclumps + + call endrun_init( .true. ) ! Do not abort on endrun for self-tests + expected_nclumps = npes / clump_pproc + call assert_equal(expected=expected_nclumps, actual=nclumps, & + msg='nclumps are not as expected') + call endrun_init( .false. ) + end subroutine test_check_nclumps + + !----------------------------------------------------------------------- + subroutine write_to_log(msg) + ! + ! !DESCRIPTION: + ! Write a message to the log file, just from the masterproc + ! + ! !ARGUMENTS: + character(len=*), intent(in) :: msg + ! + ! !LOCAL VARIABLES: + + character(len=*), parameter :: subname = 'write_to_log' + !----------------------------------------------------------------------- + + if (masterproc) then + write(*,'(a)') msg + end if + + end subroutine write_to_log + + !----------------------------------------------------------------------- + subroutine clean + ! + ! !DESCRIPTION: + ! Do end-of-testing cleanup + ! + ! !ARGUMENTS: + ! + ! !LOCAL VARIABLES: + !----------------------------------------------------------------------- + + end subroutine clean + + +end module TestDecompInit From d4522af4adc64b452b473acda803b2201ae51553 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 11 Aug 2025 15:39:02 -0600 Subject: [PATCH 056/141] Add some more logging about self-tests options, and add a normal termination method to abortutils for an early termination --- src/main/abortutils.F90 | 56 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/src/main/abortutils.F90 b/src/main/abortutils.F90 index 01f1a22c5f..67a2571348 100644 --- a/src/main/abortutils.F90 +++ b/src/main/abortutils.F90 @@ -11,12 +11,14 @@ module abortutils !----------------------------------------------------------------------- use shr_kind_mod, only: CX => shr_kind_cx - use shr_log_mod, only: shr_log_error + use shr_log_mod, only: shr_log_error, errMsg => shr_log_errMsg + use shr_sys_mod , only : shr_sys_flush implicit none private public :: endrun ! Abort the model for abnormal termination public :: write_point_context ! Write context for the given index, including global index information and more + public :: terminate_early_without_error ! Terminate the model without error, but with a message ! Some interfaces for self-test work public :: endrun_init ! Set up how endrun will behave (used for self-tests) public :: get_last_endrun_msg ! Return the last endrun message @@ -41,6 +43,10 @@ module abortutils subroutine endrun_init( for_testing_do_not_abort ) logical , intent(in) :: for_testing_do_not_abort #ifdef DEBUG + if (save_msg /= 'none') then + abort_on_endrun = .true. + call endrun( msg='An endrun call happened, but was not handled' ) + end if if ( for_testing_do_not_abort )then save_msg = 'none' ! Reset the saved message abort_on_endrun = .false. @@ -48,7 +54,7 @@ subroutine endrun_init( for_testing_do_not_abort ) abort_on_endrun = .true. end if #else - call shr_log_error( 'ENDRUN: ', errmsg(__FILE__, __LINE__) ) + call shr_log_error( 'ENDRUN: ', errMsg(__FILE__, __LINE__) ) call endrun( msg='endrun_init called without DEBUG mode, which is not allowed' ) #endif end subroutine endrun_init @@ -67,10 +73,15 @@ function get_last_endrun_msg() !----------------------------------------------------------------------- #ifdef DEBUG + if (abort_on_endrun) then + call endrun( msg='Do not call get_last_endrun_msg when abort_on_endrun is true' ) + end if if (save_msg == 'none') then call shr_log_error( 'An endrun call was expected, but has not been made yet' ) end if get_last_endrun_msg = trim(save_msg) + ! Reset endrun_msg to indicate the last error message was handled + save_msg = 'none' #else call shr_log_error( 'ENDRUN: ', errmsg(__FILE__, __LINE__) ) call endrun( msg='get_last_endrun_msg called without DEBUG mode, which is not allowed' ) @@ -89,6 +100,7 @@ subroutine endrun_vanilla(msg, additional_msg) use shr_abort_mod, only: shr_abort_abort use clm_varctl, only: iulog use ESMF, only : ESMF_Finalize, ESMF_END_ABORT + intrinsic :: exit ! ! !ARGUMENTS: ! Generally you want to at least provide msg. The main reason to separate msg from @@ -101,6 +113,7 @@ subroutine endrun_vanilla(msg, additional_msg) !----------------------------------------------------------------------- character(len=CX) :: abort_msg + call shr_sys_flush(iulog) ! Flush the I/O buffers always if (present (additional_msg)) then call shr_log_error( 'ENDRUN: '// trim(additional_msg) ) write(iulog,*)'ENDRUN: ', trim(additional_msg) @@ -113,17 +126,20 @@ subroutine endrun_vanilla(msg, additional_msg) if (save_msg /= 'none') then abort_msg = 'a previous error was already logged and now a second one is being, done so fully aborting now' abort_msg = trim(abort_msg) // ' (Call end_run_init after endrun calls to reset this)' + call shr_sys_flush(iulog) ! Flush the I/O buffers always call shr_abort_abort(abort_msg) end if - ! Just save msg, finalize ESMF and return + ! Just save msg and return + ! Don't finalize ESMF or exit since the self tests need to evaluate that save_msg = trim(msg) if (present (additional_msg)) then save_msg = trim(msg)//trim(additional_msg) call shr_log_error( 'ENDRUN: '// trim(additional_msg) ) + call shr_sys_flush(iulog) ! Flush the I/O buffers always end if - call ESMF_Finalize(endflag=ESMF_END_ABORT) else #endif + call shr_sys_flush(iulog) ! Flush the I/O buffers always call shr_abort_abort(msg) #ifdef DEBUG end if @@ -275,4 +291,36 @@ subroutine write_point_context(subgrid_index, subgrid_level) end subroutine write_point_context + !----------------------------------------------------------------------- + subroutine terminate_early_without_error(msg) + + !----------------------------------------------------------------------- + ! !DESCRIPTION: + ! Terminate the model early without an error + ! + use clm_varctl, only: iulog + use shr_abort_mod, only: shr_abort_abort + use ESMF, only : ESMF_Finalize, ESMF_SUCCESS + intrinsic :: exit + ! + ! !ARGUMENTS: + character(len=*), intent(in), optional :: msg ! string to be logged on termination + !----------------------------------------------------------------------- + integer :: rc ! return code from ESMF_Finalize + + call shr_log_error( 'Finishing early: '// trim(msg) ) + write(iulog,*) 'Finishing early: '// trim(msg) + call shr_sys_flush(iulog) ! Flush the I/O buffers always + call ESMF_Finalize(rc=rc) + if ( rc /= ESMF_SUCCESS ) then + write(iulog,*) 'ESMF_Finalize returned with error code: ', rc + call shr_sys_flush(iulog) ! Flush the I/O buffers always + call shr_abort_abort('ESMF_Finalize failed ', file=sourcefile, line=__LINE__) + end if + call exit(0) ! Exit with success code + + end subroutine terminate_early_without_error + + !----------------------------------------------------------------------- + end module abortutils From 0481f89e9db58487c9fa313bda3030c5936d770c Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 11 Aug 2025 15:39:48 -0600 Subject: [PATCH 057/141] Add return's after endrun calls so that the self-tests can trap expected error messaging --- src/main/decompInitMod.F90 | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index e27edf47a6..bef2fa5c63 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -90,10 +90,12 @@ subroutine decompInit_lnd(lni, lnj, amask) write(iulog,*) 'decompInit_lnd(): Number of gridcell clumps= ',nclumps, & ' is less than the number of processes = ', npes call endrun(msg=errMsg(sourcefile, __LINE__)) + return end if else write(iulog,*)'clump_pproc= ',clump_pproc,' must be greater than 0' call endrun(msg=errMsg(sourcefile, __LINE__)) + return end if ! allocate and initialize procinfo and clumps @@ -103,6 +105,7 @@ subroutine decompInit_lnd(lni, lnj, amask) if (ier /= 0) then write(iulog,*) 'decompInit_lnd(): allocation error for procinfo%cid' call endrun(msg=errMsg(sourcefile, __LINE__)) + return endif procinfo%nclumps = clump_pproc procinfo%cid(:) = -1 @@ -126,6 +129,7 @@ subroutine decompInit_lnd(lni, lnj, amask) if (ier /= 0) then write(iulog,*) 'decompInit_lnd(): allocation error for clumps' call endrun(msg=errMsg(sourcefile, __LINE__)) + return end if clumps(:)%owner = -1 clumps(:)%ncells = 0 @@ -151,6 +155,7 @@ subroutine decompInit_lnd(lni, lnj, amask) if (pid < 0 .or. pid > npes-1) then write(iulog,*) 'decompInit_lnd(): round robin pid error ',n,pid,npes call endrun(msg=errMsg(sourcefile, __LINE__)) + return endif clumps(n)%owner = pid if (iam == pid) then @@ -158,6 +163,7 @@ subroutine decompInit_lnd(lni, lnj, amask) if (cid < 1 .or. cid > clump_pproc) then write(iulog,*) 'decompInit_lnd(): round robin pid error ',n,pid,npes call endrun(msg=errMsg(sourcefile, __LINE__)) + return endif procinfo%cid(cid) = n endif @@ -175,11 +181,13 @@ subroutine decompInit_lnd(lni, lnj, amask) write(iulog,*) 'decompInit_lnd(): Number of processes exceeds number ', & 'of land grid cells',npes,numg call endrun(msg=errMsg(sourcefile, __LINE__)) + return end if if (nclumps > numg) then write(iulog,*) 'decompInit_lnd(): Number of clumps exceeds number ', & 'of land grid cells',nclumps,numg call endrun(msg=errMsg(sourcefile, __LINE__)) + return end if if (float(numg)/float(nclumps) < float(nsegspc)) then @@ -251,12 +259,14 @@ subroutine decompInit_lnd(lni, lnj, amask) if (ier /= 0) then write(iulog,*) 'decompInit_lnd(): allocation error1 for gdc2glo , etc' call endrun(msg=errMsg(sourcefile, __LINE__)) + return end if gdc2glo(:) = 0 allocate(clumpcnt(nclumps),stat=ier) if (ier /= 0) then write(iulog,*) 'decompInit_lnd(): allocation error1 for clumpcnt' call endrun(msg=errMsg(sourcefile, __LINE__)) + return end if ! clumpcnt is the start gdc index of each clump @@ -482,6 +492,7 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) write(iulog ,*) 'decompInit_glcp(): allvecg error cohorts',iam,n,clumps(n)%nCohorts ,allvecg(n,5) call endrun(msg=errMsg(sourcefile, __LINE__)) + return endif enddo From fa64311936900aa58cabfff625bcfded7cea8f9b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 12 Aug 2025 09:32:37 -0600 Subject: [PATCH 058/141] Change the endrun message so that it can be tested for --- src/main/decompInitMod.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index bef2fa5c63..0d668ee7d2 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -93,8 +93,8 @@ subroutine decompInit_lnd(lni, lnj, amask) return end if else - write(iulog,*)'clump_pproc= ',clump_pproc,' must be greater than 0' - call endrun(msg=errMsg(sourcefile, __LINE__)) + write(iulog,*) 'ERROR: Bad clump_pproc=', clump_pproc, errMsg(sourcefile, __LINE__) + call endrun(msg='clump_pproc must be greater than 0') return end if From b134a3d22d9f46c3fae8107dc998891a8567596b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 12 Aug 2025 09:34:43 -0600 Subject: [PATCH 059/141] Add more logging, correct the clump_pproc test, flush log write each time --- src/self_tests/TestDecompInit.F90 | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index f77ce24bbe..6cf64dc857 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -7,6 +7,7 @@ module TestDecompInit #include "shr_assert.h" use shr_kind_mod, only : r8 => shr_kind_r8, CX => shr_kind_cx use Assertions, only : assert_equal + use clm_varctl, only : iulog use abortutils, only : endrun, endrun_init, get_last_endrun_msg use spmdMod, only : masterproc, npes use decompInitMod, only : decompInit_lnd, clump_pproc @@ -56,7 +57,9 @@ subroutine test_decomp_init() call write_to_log('start_test_decomp_init') + call write_to_log('test_check_nclumps') call test_check_nclumps() + call write_to_log('test_decompInit_lnd_abort_on_bad_clump_pproc') call test_decompInit_lnd_abort_on_bad_clump_pproc() call clean @@ -69,10 +72,15 @@ subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() integer :: amask(ni*nj) character(len=CX) :: expected_msg, actual_msg + call endrun_init( .true. ) ! Do not abort on endrun for self-tests clump_pproc = 0 + call write_to_log('decompInit_lnd with clump_pproc=0 should abort') call decompInit_lnd( ni, nj, amask ) - expected_msg = 'clump_pproc= 0 must be greater than 0' + call write_to_log('check expected abort message') + expected_msg = 'clump_pproc must be greater than 0' actual_msg = get_last_endrun_msg() + call endrun_init( .false. ) ! Turn back on to abort on the assert + call write_to_log('call assert_equal to check the abort message') call assert_equal( & expected=expected_msg, actual=actual_msg, & msg='decompInit_lnd did not abort with clump_pproc=0' ) @@ -95,6 +103,7 @@ subroutine write_to_log(msg) ! !DESCRIPTION: ! Write a message to the log file, just from the masterproc ! + use shr_sys_mod, only : shr_sys_flush ! !ARGUMENTS: character(len=*), intent(in) :: msg ! @@ -105,6 +114,7 @@ subroutine write_to_log(msg) if (masterproc) then write(*,'(a)') msg + call shr_sys_flush(iulog) ! Flush the I/O buffers always end if end subroutine write_to_log From 22088aaa2cd92a352cd49876ddf6da7769ea632e Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 12 Aug 2025 09:38:01 -0600 Subject: [PATCH 060/141] Remove ending whitespace, add logging about exiting self-tests, and add a terminate early call, which still registers as an error since the timers aren't completed --- src/self_tests/SelfTestDriver.F90 | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index 6a55cec5fc..79a9f89570 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -37,6 +37,7 @@ subroutine self_test_driver(bounds) use ESMF, only : ESMF_LogWrite, ESMF_LOGMSG_INFO, ESMF_Finalize use shr_sys_mod, only : shr_sys_flush use spmdMod, only : masterproc + use abortutils, only : terminate_early_without_error ! !ARGUMENTS: type(bounds_type), intent(in) :: bounds ! @@ -46,6 +47,12 @@ subroutine self_test_driver(bounds) integer :: ntests = 0 !----------------------------------------------------------------------- + if ( masterproc ) then + write(iulog,*) '-------------------------------' + write(iulog,*) '----- Starting self tests -----' + write(iulog,*) '-------------------------------' + call shr_sys_flush(iulog) + end if if (for_testing_run_ncdiopio_tests) then ntests = ntests + 1 call test_ncdio_pio(bounds) @@ -54,9 +61,15 @@ subroutine self_test_driver(bounds) ntests = ntests + 1 call test_decomp_init() end if + if ( masterproc ) then + write(iulog,*) '-------------------------------' + write(iulog,*) '----- Ending self tests -------' + write(iulog,*) '-------------------------------' + call shr_sys_flush(iulog) + end if if (for_testing_exit_after_self_tests) then ! Print out some messaging if we are exiting after self tests. - if ( masterproc ) then + if ( masterproc ) then if ( ntests == 0 )then write(iulog,*) 'WARNING: You are exiting after self tests were run -- but no self tests were run.' else @@ -65,6 +78,7 @@ subroutine self_test_driver(bounds) call shr_sys_flush(iulog) call ESMF_LogWrite(' exiting after running self tests', ESMF_LOGMSG_INFO) end if + call terminate_early_without_error('Exiting after running self tests') end if end subroutine self_test_driver From 4b83e4acb9ffac3901214ed09ae7e460d893fc1d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 12 Aug 2025 09:56:09 -0600 Subject: [PATCH 061/141] Remove terminate_early_without_error because it fails with timers that aren't closed --- src/main/abortutils.F90 | 31 ------------------------------- src/self_tests/SelfTestDriver.F90 | 3 --- 2 files changed, 34 deletions(-) diff --git a/src/main/abortutils.F90 b/src/main/abortutils.F90 index 67a2571348..acbbdf0c5c 100644 --- a/src/main/abortutils.F90 +++ b/src/main/abortutils.F90 @@ -18,7 +18,6 @@ module abortutils public :: endrun ! Abort the model for abnormal termination public :: write_point_context ! Write context for the given index, including global index information and more - public :: terminate_early_without_error ! Terminate the model without error, but with a message ! Some interfaces for self-test work public :: endrun_init ! Set up how endrun will behave (used for self-tests) public :: get_last_endrun_msg ! Return the last endrun message @@ -292,35 +291,5 @@ subroutine write_point_context(subgrid_index, subgrid_level) end subroutine write_point_context !----------------------------------------------------------------------- - subroutine terminate_early_without_error(msg) - - !----------------------------------------------------------------------- - ! !DESCRIPTION: - ! Terminate the model early without an error - ! - use clm_varctl, only: iulog - use shr_abort_mod, only: shr_abort_abort - use ESMF, only : ESMF_Finalize, ESMF_SUCCESS - intrinsic :: exit - ! - ! !ARGUMENTS: - character(len=*), intent(in), optional :: msg ! string to be logged on termination - !----------------------------------------------------------------------- - integer :: rc ! return code from ESMF_Finalize - - call shr_log_error( 'Finishing early: '// trim(msg) ) - write(iulog,*) 'Finishing early: '// trim(msg) - call shr_sys_flush(iulog) ! Flush the I/O buffers always - call ESMF_Finalize(rc=rc) - if ( rc /= ESMF_SUCCESS ) then - write(iulog,*) 'ESMF_Finalize returned with error code: ', rc - call shr_sys_flush(iulog) ! Flush the I/O buffers always - call shr_abort_abort('ESMF_Finalize failed ', file=sourcefile, line=__LINE__) - end if - call exit(0) ! Exit with success code - - end subroutine terminate_early_without_error - - !----------------------------------------------------------------------- end module abortutils diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index 79a9f89570..b0c92dc7c4 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -37,7 +37,6 @@ subroutine self_test_driver(bounds) use ESMF, only : ESMF_LogWrite, ESMF_LOGMSG_INFO, ESMF_Finalize use shr_sys_mod, only : shr_sys_flush use spmdMod, only : masterproc - use abortutils, only : terminate_early_without_error ! !ARGUMENTS: type(bounds_type), intent(in) :: bounds ! @@ -76,9 +75,7 @@ subroutine self_test_driver(bounds) write(iulog,*) 'Exiting after running ', ntests, ' self tests.' end if call shr_sys_flush(iulog) - call ESMF_LogWrite(' exiting after running self tests', ESMF_LOGMSG_INFO) end if - call terminate_early_without_error('Exiting after running self tests') end if end subroutine self_test_driver From 00f83355381c9e40764b1379533b1b0ef6b9eb37 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 12 Aug 2025 16:42:36 -0600 Subject: [PATCH 062/141] Use iulog at top of module to be available throughout --- src/main/abortutils.F90 | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/abortutils.F90 b/src/main/abortutils.F90 index acbbdf0c5c..87e41e384d 100644 --- a/src/main/abortutils.F90 +++ b/src/main/abortutils.F90 @@ -13,6 +13,7 @@ module abortutils use shr_kind_mod, only: CX => shr_kind_cx use shr_log_mod, only: shr_log_error, errMsg => shr_log_errMsg use shr_sys_mod , only : shr_sys_flush + use clm_varctl, only: iulog implicit none private @@ -47,6 +48,7 @@ subroutine endrun_init( for_testing_do_not_abort ) call endrun( msg='An endrun call happened, but was not handled' ) end if if ( for_testing_do_not_abort )then + write(iulog,*)'Preparing a test that will call endrun' save_msg = 'none' ! Reset the saved message abort_on_endrun = .false. else @@ -97,7 +99,6 @@ subroutine endrun_vanilla(msg, additional_msg) ! use shr_sys_mod, only: shr_sys_abort use shr_abort_mod, only: shr_abort_abort - use clm_varctl, only: iulog use ESMF, only : ESMF_Finalize, ESMF_END_ABORT intrinsic :: exit ! @@ -157,7 +158,6 @@ subroutine endrun_write_point_context(subgrid_index, subgrid_level, msg, additio ! use shr_sys_mod , only: shr_sys_abort use shr_abort_mod, only: shr_abort_abort - use clm_varctl , only: iulog use decompMod , only: subgrid_level_unspecified ! ! Arguments: @@ -204,7 +204,6 @@ subroutine write_point_context(subgrid_index, subgrid_level) ! use shr_sys_mod , only : shr_sys_flush, shr_sys_abort use shr_log_mod , only : errMsg => shr_log_errMsg - use clm_varctl , only : iulog use decompMod , only : subgrid_level_gridcell, subgrid_level_landunit, subgrid_level_column, subgrid_level_patch use decompMod , only : get_global_index use GridcellType , only : grc From 808720a9fe32552117377b8774ad68c803ab027d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 12 Aug 2025 16:43:51 -0600 Subject: [PATCH 063/141] Add some new tests for fails that currently fail on the first one --- src/self_tests/TestDecompInit.F90 | 78 ++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 6cf64dc857..25a28c1092 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -61,6 +61,10 @@ subroutine test_decomp_init() call test_check_nclumps() call write_to_log('test_decompInit_lnd_abort_on_bad_clump_pproc') call test_decompInit_lnd_abort_on_bad_clump_pproc() + call write_to_log('test_decompInit_lnd_abort_on_too_big_clump_pproc') + call test_decompInit_lnd_abort_on_too_big_clump_pproc() + call write_to_log('test_decompInit_lnd_abort_when_npes_too_large') + call test_decompInit_lnd_abort_when_npes_too_large() call clean @@ -69,7 +73,7 @@ end subroutine test_decomp_init !----------------------------------------------------------------------- subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() integer, parameter :: ni = 300, nj = 500 - integer :: amask(ni*nj) + integer :: amask(ni*nj) character(len=CX) :: expected_msg, actual_msg call endrun_init( .true. ) ! Do not abort on endrun for self-tests @@ -86,6 +90,78 @@ subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() msg='decompInit_lnd did not abort with clump_pproc=0' ) end subroutine test_decompInit_lnd_abort_on_bad_clump_pproc + !----------------------------------------------------------------------- + subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc() + integer, parameter :: ni = 300, nj = 500 + integer :: amask(ni*nj) + character(len=CX) :: expected_msg, actual_msg + + call endrun_init( .true. ) ! Do not abort on endrun for self-tests + amask(:) = 1 ! Set all to land + clump_pproc = (ni * nj + 1) / npes + call write_to_log('decompInit_lnd with clump_pproc too large should abort') + call decompInit_lnd( ni, nj, amask ) + call write_to_log('check expected abort message') + expected_msg = 'decompInit_lnd(): Number of clumps exceeds number of land grid cells' + actual_msg = get_last_endrun_msg() + call endrun_init( .false. ) ! Turn back on to abort on the assert + call write_to_log('call assert_equal to check the abort message') + call assert_equal( & + expected=expected_msg, actual=actual_msg, & + msg='decompInit_lnd did not abort with clump_pproc too large' ) + call assert_equal( numg, ni*nj, msg='numg is not as expected' ) + end subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc + + !----------------------------------------------------------------------- + subroutine test_decompInit_lnd_abort_when_npes_too_large() + integer, parameter :: ni = 300, nj = 500 + integer :: amask(ni*nj) + character(len=CX) :: expected_msg, actual_msg + integer :: npes_orig + + ! NOTE: This is arbitrarily modifying the NPES value -- so it MUST be reset set the END! + npes_orig = npes + npes = ni*nj + 1 + + call endrun_init( .true. ) ! Do not abort on endrun for self-tests + amask(:) = 1 ! Set all to land + call write_to_log('decompInit_lnd with npes too large should abort') + call decompInit_lnd( ni, nj, amask ) + call write_to_log('check expected abort message') + expected_msg = 'decompInit_lnd(): Number of processes exceeds number of land grid cells' + actual_msg = get_last_endrun_msg() + call endrun_init( .false. ) ! Turn back on to abort on the assert + call write_to_log('call assert_equal to check the abort message') + call assert_equal( & + expected=expected_msg, actual=actual_msg, & + msg='decompInit_lnd did not abort with npes too large' ) + + ! NOTE: Return npes to its original value + npes = npes_orig + end subroutine test_decompInit_lnd_abort_when_npes_too_large + + !----------------------------------------------------------------------- + subroutine test_decompInit_lnd_abort_on_too_small_nsegspc() + use clm_varctl, only : nsegspc + integer, parameter :: ni = 300, nj = 500 + integer :: amask(ni*nj) + character(len=CX) :: expected_msg, actual_msg + + call endrun_init( .true. ) ! Do not abort on endrun for self-tests + amask(:) = 1 ! Set all to land + nsegspc = 0 + call write_to_log('decompInit_lnd with nsegspc too small should abort') + call decompInit_lnd( ni, nj, amask ) + call write_to_log('check expected abort message') + expected_msg = 'decompInit_lnd(): nsegspc must be greater than 0' + actual_msg = get_last_endrun_msg() + call endrun_init( .false. ) ! Turn back on to abort on the assert + call write_to_log('call assert_equal to check the abort message') + call assert_equal( & + expected=expected_msg, actual=actual_msg, & + msg='decompInit_lnd did not abort with too nsegspc too small' ) + end subroutine test_decompInit_lnd_abort_on_too_small_nsegspc + !----------------------------------------------------------------------- subroutine test_check_nclumps() integer :: expected_nclumps From 7c098039f478c9c346e32ac3c02dd662aadf985f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Sun, 17 Aug 2025 13:46:57 -0600 Subject: [PATCH 064/141] Seperate run_self_tests into a test mod that just sets up the testing for speed and bypassing the run phase, and the part that turns the self tests on, so they can be used differently in the testlist --- .../clm/for_testing_fastsetup_bypassrun/README | 8 ++++++++ .../shell_commands | 0 .../clm/for_testing_fastsetup_bypassrun/user_nl_clm | 10 ++++++++++ .../testdefs/testmods_dirs/clm/run_self_tests/README | 4 +--- .../testmods_dirs/clm/run_self_tests/include_user_mods | 1 + .../testmods_dirs/clm/run_self_tests/user_nl_clm | 9 +-------- 6 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/README rename cime_config/testdefs/testmods_dirs/clm/{run_self_tests => for_testing_fastsetup_bypassrun}/shell_commands (100%) create mode 100644 cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm create mode 100644 cime_config/testdefs/testmods_dirs/clm/run_self_tests/include_user_mods diff --git a/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/README b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/README new file mode 100644 index 0000000000..5d30cc0d4e --- /dev/null +++ b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/README @@ -0,0 +1,8 @@ +The purpose of this testmod directory is to setup for running fast testing +of initialization. So it bypasses the run phase and exits early. + +We use cold start so that we can get through initialization faster, + +I/O is turned off as much as possible. + +And physics options that make the model run faster are used. diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/shell_commands similarity index 100% rename from cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands rename to cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/shell_commands diff --git a/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm new file mode 100644 index 0000000000..573df5c02e --- /dev/null +++ b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm @@ -0,0 +1,10 @@ +! Exit early and bypass the run phase +for_testing_exit_after_self_tests = .true. + +! Turn off history, restarts, and output +hist_empty_htapes = .true. +use_noio = .true. + +! Turn off urban options, and only do urban in gridcells that are majority urban +urban_hac = 'OFF' +toosmall_urban = 98.0d00 ! Minimize urban in gridcells diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README index 985b2bfae4..56457840bf 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/README @@ -2,9 +2,7 @@ The purpose of this testmod directory is to trigger runtime initialization self-tests. This runs a set of unit/integration tests that apply at initialization. -We use cold start so that we can get through initialization faster, -since how we initialize the model is unimportant for these self-tests. -We also exit as early as possible to minimize the time spent. +We inherit the test-mod that sets up for testing and bypassing as much as possible for speed. There are other self_tests that need to be exercised in the model time stepping and are done outside of these. diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/include_user_mods b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/include_user_mods new file mode 100644 index 0000000000..cdf5cc9c81 --- /dev/null +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/include_user_mods @@ -0,0 +1 @@ +../for_testing_fastsetup_bypassrun diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 499770f153..ff8468ea50 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -1,9 +1,2 @@ -for_testing_run_ncdiopio_tests = .false. +for_testing_run_ncdiopio_tests = .true. for_testing_run_decomp_init_tests = .true. -for_testing_exit_after_self_tests = .true. - -! Turn off history, restarts, and output -hist_empty_htapes = .true. -use_noio = .true. -urban_hac = 'OFF' -toosmall_urban = 98.0d00 ! Minimize urban in gridcells From 476088cbdfd052ba8575c344cae36f7741f8ab92 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Sun, 17 Aug 2025 13:56:26 -0600 Subject: [PATCH 065/141] Seperate out decomp_init test list into production tests for timing, and DEBUG tests that just run the self-tests --- cime_config/testdefs/testlist_clm.xml | 33 ++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index 1a53e25ae9..0d4a56ad54 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -4165,7 +4165,28 @@ - + + + + + + + + + + + + + + + + + + + + + + @@ -4174,7 +4195,7 @@ - + @@ -4183,7 +4204,7 @@ - + @@ -4192,7 +4213,7 @@ - + @@ -4201,7 +4222,7 @@ - + @@ -4210,7 +4231,7 @@ - + From 8c2ad4413ef1ee1d5f5bc052979e2930eb78328d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 19 Aug 2025 20:40:28 -0600 Subject: [PATCH 066/141] Make the grid size much smaller and save it at the top of the module, fix some of the endrun msgs --- src/self_tests/TestDecompInit.F90 | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 25a28c1092..a24bec8823 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -23,6 +23,9 @@ module TestDecompInit ! Module data used in various tests + integer, parameter :: ni = 10, nj = 5 + integer :: amask(ni*nj) + character(len=*), parameter, private :: sourcefile = & __FILE__ @@ -72,8 +75,6 @@ end subroutine test_decomp_init !----------------------------------------------------------------------- subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() - integer, parameter :: ni = 300, nj = 500 - integer :: amask(ni*nj) character(len=CX) :: expected_msg, actual_msg call endrun_init( .true. ) ! Do not abort on endrun for self-tests @@ -92,8 +93,6 @@ end subroutine test_decompInit_lnd_abort_on_bad_clump_pproc !----------------------------------------------------------------------- subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc() - integer, parameter :: ni = 300, nj = 500 - integer :: amask(ni*nj) character(len=CX) :: expected_msg, actual_msg call endrun_init( .true. ) ! Do not abort on endrun for self-tests @@ -102,7 +101,7 @@ subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc() call write_to_log('decompInit_lnd with clump_pproc too large should abort') call decompInit_lnd( ni, nj, amask ) call write_to_log('check expected abort message') - expected_msg = 'decompInit_lnd(): Number of clumps exceeds number of land grid cells' + expected_msg = 'Number of clumps exceeds number of land grid cells' actual_msg = get_last_endrun_msg() call endrun_init( .false. ) ! Turn back on to abort on the assert call write_to_log('call assert_equal to check the abort message') @@ -114,8 +113,6 @@ end subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc !----------------------------------------------------------------------- subroutine test_decompInit_lnd_abort_when_npes_too_large() - integer, parameter :: ni = 300, nj = 500 - integer :: amask(ni*nj) character(len=CX) :: expected_msg, actual_msg integer :: npes_orig @@ -128,7 +125,7 @@ subroutine test_decompInit_lnd_abort_when_npes_too_large() call write_to_log('decompInit_lnd with npes too large should abort') call decompInit_lnd( ni, nj, amask ) call write_to_log('check expected abort message') - expected_msg = 'decompInit_lnd(): Number of processes exceeds number of land grid cells' + expected_msg = 'Number of processes exceeds number of land grid cells' actual_msg = get_last_endrun_msg() call endrun_init( .false. ) ! Turn back on to abort on the assert call write_to_log('call assert_equal to check the abort message') @@ -143,8 +140,6 @@ end subroutine test_decompInit_lnd_abort_when_npes_too_large !----------------------------------------------------------------------- subroutine test_decompInit_lnd_abort_on_too_small_nsegspc() use clm_varctl, only : nsegspc - integer, parameter :: ni = 300, nj = 500 - integer :: amask(ni*nj) character(len=CX) :: expected_msg, actual_msg call endrun_init( .true. ) ! Do not abort on endrun for self-tests @@ -153,7 +148,7 @@ subroutine test_decompInit_lnd_abort_on_too_small_nsegspc() call write_to_log('decompInit_lnd with nsegspc too small should abort') call decompInit_lnd( ni, nj, amask ) call write_to_log('check expected abort message') - expected_msg = 'decompInit_lnd(): nsegspc must be greater than 0' + expected_msg = 'nsegspc must be greater than 0' actual_msg = get_last_endrun_msg() call endrun_init( .false. ) ! Turn back on to abort on the assert call write_to_log('call assert_equal to check the abort message') From 9d236caa0fda0e09ab11eee5f36d3eada96adbfc Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 19 Aug 2025 23:22:55 -0600 Subject: [PATCH 067/141] Change endrun calls to use new format for file and line, and remove the subname, create new internal subroutines in decompInit_lnd for allocate, clean, and check errors, move the check errors part to the first thing done --- src/main/decompInitMod.F90 | 201 +++++++++++++++++++++++-------------- 1 file changed, 125 insertions(+), 76 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 0d668ee7d2..d6a03d5dba 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -83,30 +83,15 @@ subroutine decompInit_lnd(lni, lnj, amask) lns = lni * lnj - !--- set and verify nclumps --- - if (clump_pproc > 0) then - nclumps = clump_pproc * npes - if (nclumps < npes) then - write(iulog,*) 'decompInit_lnd(): Number of gridcell clumps= ',nclumps, & - ' is less than the number of processes = ', npes - call endrun(msg=errMsg(sourcefile, __LINE__)) - return - end if - else - write(iulog,*) 'ERROR: Bad clump_pproc=', clump_pproc, errMsg(sourcefile, __LINE__) - call endrun(msg='clump_pproc must be greater than 0') - return - end if + call decompInit_lnd_check_errors() + + call decompInit_lnd_allocate() + + call memcheck('decompInit_lnd: after allocate') - ! allocate and initialize procinfo and clumps + ! Initialize procinfo and clumps ! beg and end indices initialized for simple addition of cells later - allocate(procinfo%cid(clump_pproc), stat=ier) - if (ier /= 0) then - write(iulog,*) 'decompInit_lnd(): allocation error for procinfo%cid' - call endrun(msg=errMsg(sourcefile, __LINE__)) - return - endif procinfo%nclumps = clump_pproc procinfo%cid(:) = -1 procinfo%ncells = 0 @@ -125,12 +110,6 @@ subroutine decompInit_lnd(lni, lnj, amask) procinfo%endp = 0 procinfo%endCohort = 0 - allocate(clumps(nclumps), stat=ier) - if (ier /= 0) then - write(iulog,*) 'decompInit_lnd(): allocation error for clumps' - call endrun(msg=errMsg(sourcefile, __LINE__)) - return - end if clumps(:)%owner = -1 clumps(:)%ncells = 0 clumps(:)%nlunits = 0 @@ -153,43 +132,22 @@ subroutine decompInit_lnd(lni, lnj, amask) do n = 1,nclumps pid = mod(n-1,npes) if (pid < 0 .or. pid > npes-1) then - write(iulog,*) 'decompInit_lnd(): round robin pid error ',n,pid,npes - call endrun(msg=errMsg(sourcefile, __LINE__)) + write(iulog,*) 'Round robin pid error: n, pid, npes = ',n,pid,npes + call endrun(msg="Round robin pid error", file=sourcefile, line=__LINE__) return endif clumps(n)%owner = pid if (iam == pid) then cid = cid + 1 if (cid < 1 .or. cid > clump_pproc) then - write(iulog,*) 'decompInit_lnd(): round robin pid error ',n,pid,npes - call endrun(msg=errMsg(sourcefile, __LINE__)) + write(iulog,*) 'round robin pid error ',n,pid,npes + call endrun(msg="round robin pid error", file=sourcefile, line=__LINE__) return endif procinfo%cid(cid) = n endif enddo - ! count total land gridcells - numg = 0 - do ln = 1,lns - if (amask(ln) == 1) then - numg = numg + 1 - endif - enddo - - if (npes > numg) then - write(iulog,*) 'decompInit_lnd(): Number of processes exceeds number ', & - 'of land grid cells',npes,numg - call endrun(msg=errMsg(sourcefile, __LINE__)) - return - end if - if (nclumps > numg) then - write(iulog,*) 'decompInit_lnd(): Number of clumps exceeds number ', & - 'of land grid cells',nclumps,numg - call endrun(msg=errMsg(sourcefile, __LINE__)) - return - end if - if (float(numg)/float(nclumps) < float(nsegspc)) then seglen1 = .true. seglen = 1.0_r8 @@ -206,7 +164,6 @@ subroutine decompInit_lnd(lni, lnj, amask) ! Assign gridcells to clumps (and thus pes) --- - allocate(lcid(lns)) lcid(:) = 0 ng = 0 do ln = 1,lns @@ -255,19 +212,7 @@ subroutine decompInit_lnd(lni, lnj, amask) ! Set gindex_global - allocate(gdc2glo(numg), stat=ier) - if (ier /= 0) then - write(iulog,*) 'decompInit_lnd(): allocation error1 for gdc2glo , etc' - call endrun(msg=errMsg(sourcefile, __LINE__)) - return - end if gdc2glo(:) = 0 - allocate(clumpcnt(nclumps),stat=ier) - if (ier /= 0) then - write(iulog,*) 'decompInit_lnd(): allocation error1 for clumpcnt' - call endrun(msg=errMsg(sourcefile, __LINE__)) - return - end if ! clumpcnt is the start gdc index of each clump @@ -303,15 +248,11 @@ subroutine decompInit_lnd(lni, lnj, amask) nglob_x = lni ! decompMod module variables nglob_y = lnj ! decompMod module variables call get_proc_bounds(bounds) - allocate(gindex_global(1:bounds%endg)) do n = procinfo%begg,procinfo%endg gindex_global(n-procinfo%begg+1) = gdc2glo(n) enddo - call memcheck('decompInit_lnd: after allocate before deallocate') - - deallocate(clumpcnt) - deallocate(gdc2glo) + call decompInit_lnd_clean() call memcheck('decompInit_lnd: after deallocate') @@ -328,6 +269,113 @@ subroutine decompInit_lnd(lni, lnj, amask) call shr_sys_flush(iulog) call t_stopf('decompInit_lnd') + !------------------------------------------------------------------------------ + ! Internal subroutines for this subroutine + contains + !------------------------------------------------------------------------------ + + !------------------------------------------------------------------------------ + subroutine decompInit_lnd_allocate() + ! Allocate the temporary and long term variables set here + + ! + ! Long-term: + ! Arrays from decompMod that are allocated here + ! This should move to a method in decompMod + ! as should the deallocates + ! + + ! allocate procinfo + allocate(procinfo%cid(clump_pproc), stat=ier) + if (ier /= 0) then + call endrun(msg='allocation error for procinfo%cid', file=sourcefile, line=__LINE__) + return + endif + allocate(clumps(nclumps), stat=ier) + if (ier /= 0) then + write(iulog,*) 'allocation error for clumps: nclumps=', nclumps + call endrun(msg='allocation error for clumps', file=sourcefile, line=__LINE__) + return + end if + + allocate(gdc2glo(numg), stat=ier) + if (ier /= 0) then + call endrun(msg="allocation error1 for gdc2glo , etc", file=sourcefile, line=__LINE__) + return + end if + allocate(gindex_global(1:bounds%endg)) + + ! Temporary arrays that are just used in decompInit_lnd + allocate(lcid(lns)) + allocate(clumpcnt(nclumps),stat=ier) + if (ier /= 0) then + call endrun(msg="allocation error2 for clumpcnt", file=sourcefile, line=__LINE__) + return + end if + + end subroutine decompInit_lnd_allocate + + subroutine decompInit_lnd_clean() + ! Deallocate the temporary variables used in decompInit_lnd + deallocate(clumpcnt) + deallocate(gdc2glo) + !deallocate(lcid) + end subroutine decompInit_lnd_clean + + subroutine decompMod_clean() + ! Deallocate the long-term variables created in decompInit_lnd + ! This should be moved to decompMod + + deallocate(clumps) + deallocate(procinfo%cid) + deallocate(gindex_global) + nclumps = 0 + end subroutine decompMod_clean + + subroutine decompInit_lnd_check_errors() + ! Do some general error checking on input options + + !--- set and verify nclumps --- + if (clump_pproc > 0) then + nclumps = clump_pproc * npes + if (nclumps < npes) then + write(iulog,*) 'Number of gridcell clumps= ',nclumps, & + ' is less than the number of processes = ', npes + call endrun(msg="Number of clumps exceeds number of processes", & + file=sourcefile, line=__LINE__) + return + end if + else + write(iulog,*) 'ERROR: Bad clump_pproc=', clump_pproc + call endrun(msg='clump_pproc must be greater than 0', file=sourcefile, line=__LINE__) + return + end if + + ! count total land gridcells + numg = 0 + do ln = 1,lns + if (amask(ln) == 1) then + numg = numg + 1 + endif + enddo + + if (npes > numg) then + write(iulog,*) 'Number of processes > gridcells: npes=',npes,' num gridcells = ', numg + call endrun(msg="Number of processes exceeds number of land grid cells", & + file=sourcefile, line=__LINE__) + return + end if + if (nclumps > numg) then + write(iulog,*) 'Number of clumps > gridcells nclumps = ', & + nclumps, ' num gridcells = ', numg + call endrun(msg="Number of clumps exceeds number of land grid cells", & + file=sourcefile, line=__LINE__) + return + end if + end subroutine decompInit_lnd_check_errors + + !------------------------------------------------------------------------------ + end subroutine decompInit_lnd !------------------------------------------------------------------------------ @@ -485,13 +533,14 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) clumps(n)%npatches /= allvecg(n,4) .or. & clumps(n)%nCohorts /= allvecg(n,5)) then - write(iulog ,*) 'decompInit_glcp(): allvecg error ncells ',iam,n,clumps(n)%ncells ,allvecg(n,1) - write(iulog ,*) 'decompInit_glcp(): allvecg error lunits ',iam,n,clumps(n)%nlunits ,allvecg(n,2) - write(iulog ,*) 'decompInit_glcp(): allvecg error ncols ',iam,n,clumps(n)%ncols ,allvecg(n,3) - write(iulog ,*) 'decompInit_glcp(): allvecg error patches',iam,n,clumps(n)%npatches ,allvecg(n,4) - write(iulog ,*) 'decompInit_glcp(): allvecg error cohorts',iam,n,clumps(n)%nCohorts ,allvecg(n,5) + write(iulog ,*) 'allvecg error: iam,n ',iam,n + write(iulog ,*) 'allvecg error ncells,allvecg ',iam,n,clumps(n)%ncells ,allvecg(n,1) + write(iulog ,*) 'allvecg error lunits,allvecg ',iam,n,clumps(n)%nlunits ,allvecg(n,2) + write(iulog ,*) 'allvecg error ncols,allvecg ',iam,n,clumps(n)%ncols ,allvecg(n,3) + write(iulog ,*) 'allvecg error patches,allvecg',iam,n,clumps(n)%npatches ,allvecg(n,4) + write(iulog ,*) 'allvecg error cohorts,allvecg',iam,n,clumps(n)%nCohorts ,allvecg(n,5) - call endrun(msg=errMsg(sourcefile, __LINE__)) + call endrun(msg="allvecg error cohorts", file=sourcefile, line=__LINE__) return endif enddo From d4ef84a361dc324bff7a99c1f5fbd1b0f62009e5 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 20 Aug 2025 13:22:52 -0600 Subject: [PATCH 068/141] Make decomp_init single grid case run with mpi-serial --- cime_config/testdefs/testlist_clm.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index 0d4a56ad54..6c3253564a 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -4165,7 +4165,7 @@ - + From d3178e1feb724c5c729a6a7e729a07ce3dd54599 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 21 Aug 2025 14:07:14 -0600 Subject: [PATCH 069/141] Add for_testing options to namelist handling to bypass init and run --- bld/CLMBuildNamelist.pm | 1 + bld/namelist_files/namelist_definition_ctsm.xml | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/bld/CLMBuildNamelist.pm b/bld/CLMBuildNamelist.pm index 4cd02532f8..2f4c3bb3e9 100755 --- a/bld/CLMBuildNamelist.pm +++ b/bld/CLMBuildNamelist.pm @@ -5290,6 +5290,7 @@ sub write_output_files { push @groups, "clm_canopy_inparm"; push @groups, "prigentroughness"; push @groups, "zendersoilerod"; + push @groups, "for_testing_options"; if (remove_leading_and_trailing_quotes($nl->get_value('snow_cover_fraction_method')) eq 'SwensonLawrence2012') { push @groups, "scf_swenson_lawrence_2012_inparm"; } diff --git a/bld/namelist_files/namelist_definition_ctsm.xml b/bld/namelist_files/namelist_definition_ctsm.xml index 03c9ba420e..18cba2a3b8 100644 --- a/bld/namelist_files/namelist_definition_ctsm.xml +++ b/bld/namelist_files/namelist_definition_ctsm.xml @@ -1242,6 +1242,22 @@ Whether to use subgrid fluxes for snow Whether snow on the vegetation canopy affects the radiation/albedo calculations + + + + + + +For testing whether to bypass the rest of the initiatlization after the self test driver is run + + + +For testing whether to bypass most of the run phase other than the clock advance + + + Whether to run some tests of ncdio_pio as part of the model run. This is From d65ecfbc213cc88eb5b5f9ba3b3d50f899dcb359 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 21 Aug 2025 16:43:42 -0600 Subject: [PATCH 070/141] Turn the bypass init and run logicals for testing on --- .../testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 6187386336..9912c2769d 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -1 +1,3 @@ +for_testing_bypass_init = .true. +for_testing_bypass_run = .true. for_testing_run_ncdiopio_tests = .true. From 96e0c9447af695d0f3ea53a45b41bdadd8689237 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 21 Aug 2025 16:44:35 -0600 Subject: [PATCH 071/141] Add a namelist read and some logical settings to bypass init and run phases as well as logical functions to do that --- src/main/clm_driver.F90 | 7 +++ src/main/clm_initializeMod.F90 | 10 +++- src/main/clm_instMod.F90 | 5 +- src/self_tests/SelfTestDriver.F90 | 90 ++++++++++++++++++++++++++++++- 4 files changed, 109 insertions(+), 3 deletions(-) diff --git a/src/main/clm_driver.F90 b/src/main/clm_driver.F90 index 2cea242a67..64db2d88f8 100644 --- a/src/main/clm_driver.F90 +++ b/src/main/clm_driver.F90 @@ -85,6 +85,7 @@ module clm_driver use clm_instMod use SoilMoistureStreamMod , only : PrescribedSoilMoistureInterp, PrescribedSoilMoistureAdvance use SoilBiogeochemDecompCascadeConType , only : no_soil_decomp, decomp_method + use SelfTestDriver , only : for_testing_bypass_run_except_clock_advance ! ! !PUBLIC TYPES: implicit none @@ -165,6 +166,7 @@ subroutine clm_drv(doalb, nextsw_cday, declinp1, declin, rstwr, nlend, rdate, ro ! CalcIrrigationNeeded. Simply declaring this variable makes the ICE go away. real(r8), allocatable :: dummy1_to_make_pgi_happy(:) !----------------------------------------------------------------------- + if ( for_testing_bypass_run_except_clock_advance() ) return ! Determine processor bounds and clumps for this processor @@ -1576,6 +1578,8 @@ subroutine clm_drv_init(bounds, & integer :: fp, fc ! filter indices !----------------------------------------------------------------------- + if ( for_testing_bypass_run_except_clock_advance() ) return + associate( & snl => col%snl , & ! Input: [integer (:) ] number of snow layers @@ -1657,6 +1661,7 @@ subroutine clm_drv_patch2col (bounds, & ! !LOCAL VARIABLES: integer :: c,fc ! indices ! ----------------------------------------------------------------- + if ( for_testing_bypass_run_except_clock_advance() ) return ! Note: lake points are excluded from many of the following ! averages. For some fields, this is because the field doesn't @@ -1752,6 +1757,8 @@ subroutine write_diagnostic (bounds, nstep, lnd2atm_inst) integer :: status(MPI_STATUS_SIZE) ! mpi status !------------------------------------------------------------------------ + if ( for_testing_bypass_run_except_clock_advance() ) return + call get_proc_global(ng=numg) if (masterproc) then diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index da8185be31..80cddde92c 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -30,7 +30,7 @@ module clm_initializeMod use CLMFatesInterfaceMod , only : CLMFatesGlobals1,CLMFatesGlobals2 use CLMFatesInterfaceMod , only : CLMFatesTimesteps use dynSubgridControlMod , only : dynSubgridControl_init, get_reset_dynbal_baselines - use SelfTestDriver , only : self_test_driver + use SelfTestDriver , only : self_test_driver, for_testing_bypass_init_after_self_tests use SoilMoistureStreamMod , only : PrescribedSoilMoistureInit use clm_instMod ! @@ -67,6 +67,7 @@ subroutine initialize1(dtime) use SoilBiogeochemDecompCascadeConType , only : decomp_cascade_par_init use CropReprPoolsMod , only: crop_repr_pools_init use HillslopeHydrologyMod, only: hillslope_properties_init + use SelfTestDriver , only: self_test_readnml ! ! !ARGUMENTS integer, intent(in) :: dtime ! model time step (seconds) @@ -104,6 +105,8 @@ subroutine initialize1(dtime) call surfrd_get_num_patches(fsurdat, actual_maxsoil_patches, actual_numpft, actual_numcft) call surfrd_get_nlevurb(fsurdat, actual_nlevurb) + call self_test_readnml( NLFilename ) + ! If fates is on, we override actual_maxsoil_patches. FATES dictates the ! number of patches per column. We still use numcft from the surface ! file though... @@ -185,6 +188,7 @@ subroutine initialize2(ni,nj, currtime) use FATESFireFactoryMod , only : scalar_lightning use dynFATESLandUseChangeMod , only : dynFatesLandUseInit use HillslopeHydrologyMod , only : InitHillslope + use SelfTestDriver , only : for_testing_bypass_init_after_self_tests ! ! !ARGUMENTS integer, intent(in) :: ni, nj ! global grid sizes @@ -467,6 +471,7 @@ subroutine initialize2(ni,nj, currtime) call bgc_vegetation_inst%Init2(bounds_proc, NLFilename) end if + if ( .not. for_testing_bypass_init_after_self_tests() )then if (use_cn) then ! NOTE(wjs, 2016-02-23) Maybe the rest of the body of this conditional should also @@ -510,6 +515,7 @@ subroutine initialize2(ni,nj, currtime) if (nsrest == nsrContinue ) then call htapes_fieldlist() end if + end if ! Read restart/initial info is_cold_start = .false. @@ -684,6 +690,7 @@ subroutine initialize2(ni,nj, currtime) call hist_htapes_build() end if + if ( .not. for_testing_bypass_init_after_self_tests() )then ! Initialize variables that are associated with accumulated fields. ! The following is called for both initial and restart runs and must ! must be called after the restart file is read @@ -767,6 +774,7 @@ subroutine initialize2(ni,nj, currtime) water_inst%waterdiagnosticbulk_inst, canopystate_inst, & soilstate_inst, soilbiogeochem_carbonflux_inst) end if + end if ! topo_glc_mec was allocated in initialize1, but needed to be kept around through ! initialize2 because it is used to initialize other variables; now it can be deallocated diff --git a/src/main/clm_instMod.F90 b/src/main/clm_instMod.F90 index 7d9a0f6ad2..c8213b75ee 100644 --- a/src/main/clm_instMod.F90 +++ b/src/main/clm_instMod.F90 @@ -208,6 +208,7 @@ subroutine clm_instInit(bounds) use HillslopeHydrologyMod , only : SetHillslopeSoilThickness use initVerticalMod , only : setSoilLayerClass use DustEmisFactory , only : create_dust_emissions + use SelfTestDriver , only : for_testing_bypass_init_after_self_tests ! ! !ARGUMENTS type(bounds_type), intent(in) :: bounds ! processor bounds @@ -269,7 +270,9 @@ subroutine clm_instInit(bounds) call humanindex_inst%Init(bounds) ! Initialize urban time varying data - call urbantv_inst%Init(bounds, NLFilename) + if ( .not. for_testing_bypass_init_after_self_tests() )then + call urbantv_inst%Init(bounds, NLFilename) + end if ! Initialize vertical data components diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index d109a27827..7d784f348e 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -16,7 +16,14 @@ module SelfTestDriver ! Public routines - public :: self_test_driver + public :: self_test_driver ! Run the self-tests asked for + public :: self_test_readnml ! Read in the general self testing options for overall code flow + public :: for_testing_bypass_init_after_self_tests ! For testing bypass the rest of the initialization after the self test driver was run + public :: for_testing_bypass_run_except_clock_advance ! For testing bypass most of the run phase other than the clock advance + + ! Private module data + logical :: for_testing_bypass_init ! For testing bypass the initialization phase after the self-test driver + logical :: for_testing_bypass_run ! For testing bypass most of the run phase except the time advance character(len=*), parameter, private :: sourcefile = & __FILE__ @@ -46,4 +53,85 @@ subroutine self_test_driver(bounds) end subroutine self_test_driver + !----------------------------------------------------------------------- + subroutine self_test_readnml(NLFileName) + ! + ! !DESCRIPTION: + ! Namelist read for the self-test driver. This includes bypass options + ! that will be used in other parts of the code to bypass bits of the code + ! for testing purposes. + ! + ! !USES: + use shr_nl_mod , only : shr_nl_find_group_name + use spmdMod, only : masterproc, mpicom + use shr_mpi_mod, only : shr_mpi_bcast + use clm_varctl, only : iulog + ! + ! !ARGUMENTS: + character(len=*), intent(in) :: NLFilename ! Namelist filename + ! + ! !LOCAL VARIABLES: + integer :: ierr ! error code + integer :: unitn ! unit for namelist file + + ! Namelist name: this has to be matched with the name in the read stqatement + character(len=*), parameter :: nmlname = 'for_testing_options' + !----------------------------------------------------------------------- + + namelist /for_testing_options/ for_testing_bypass_init, for_testing_bypass_run + + ! Initialize options to default values, in case they are not specified in + ! the namelist + + if (masterproc) then + write(iulog,*) 'Read in '//nmlname//' namelist' + open(newunit=unitn, status='old', file=NLFilename) + call shr_nl_find_group_name(unitn, nmlname, status=ierr) + if (ierr == 0) then + read(unit=unitn, nml=for_testing_options, iostat=ierr) + if (ierr /= 0) then + call endrun(msg="ERROR reading "//nmlname//"namelist", file=sourcefile, line=__LINE__) + end if + else + call endrun(msg="ERROR finding "//nmlname//"namelist", file=sourcefile, line=__LINE__) + end if + close(unitn) + end if + + call shr_mpi_bcast (for_testing_bypass_init, mpicom) + call shr_mpi_bcast (for_testing_bypass_run, mpicom) + + if (masterproc) then + write(iulog,*) ' ' + write(iulog,*) nmlname//' settings:' + write(iulog,nml=for_testing_options) + write(iulog,*) ' ' + end if + + end subroutine self_test_readnml + + !----------------------------------------------------------------------- + + logical function for_testing_bypass_init_after_self_tests() + ! Determine if should exit initialization early after having run the self tests + if ( for_testing_bypass_init ) then + for_testing_bypass_init_after_self_tests = .true. + else + for_testing_bypass_init_after_self_tests = .false. + end if + end function for_testing_bypass_init_after_self_tests + + !----------------------------------------------------------------------- + + logical function for_testing_bypass_run_except_clock_advance() + ! Determine if should skip most of the run phase other than the clock advance + if ( for_testing_bypass_init ) then + for_testing_bypass_run_except_clock_advance = .true. + else + for_testing_bypass_run_except_clock_advance = .false. + end if + end function for_testing_bypass_run_except_clock_advance + + !----------------------------------------------------------------------- + end module SelfTestDriver From 70067404e7287c755d3c45b2efdcf126eb3a6a4b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 21 Aug 2025 16:54:44 -0600 Subject: [PATCH 072/141] Add use of abortutils so can make endrun calls --- src/self_tests/SelfTestDriver.F90 | 1 + 1 file changed, 1 insertion(+) diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index 7d784f348e..e19fff58bd 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -9,6 +9,7 @@ module SelfTestDriver use clm_varctl, only : for_testing_run_ncdiopio_tests use decompMod, only : bounds_type use TestNcdioPio, only : test_ncdio_pio + use abortutils, only : endrun implicit none private From a1fbe32f994e613040afc807ef29fd1bf1e1f797 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 21 Aug 2025 20:56:24 -0600 Subject: [PATCH 073/141] Add another error check test, this should fail, as it isn't handeled --- src/self_tests/TestDecompInit.F90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index a24bec8823..75445cd999 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -68,6 +68,8 @@ subroutine test_decomp_init() call test_decompInit_lnd_abort_on_too_big_clump_pproc() call write_to_log('test_decompInit_lnd_abort_when_npes_too_large') call test_decompInit_lnd_abort_when_npes_too_large() + call write_to_log('test_decompInit_lnd_abort_on_too_small_nsegspc') + call test_decompInit_lnd_abort_on_too_small_nsegspc() call clean From 93628b2616a7b6a2dc2b443cc4c748819f262adf Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 22 Aug 2025 03:05:10 -0600 Subject: [PATCH 074/141] Add bypassing the run phase in the for_testing tests, and remove it from run_self_tests which inherits from it --- .../clm/for_testing_fastsetup_bypassrun/user_nl_clm | 3 +++ .../testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm index c2a2d14793..7334edff3d 100644 --- a/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm @@ -1,3 +1,6 @@ +! Skip the run phase +for_testing_bypass_run = .true. + ! Turn off history, restarts, and output hist_empty_htapes = .true. use_noio = .true. diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index c6d13ae7c5..6ee07df73f 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -1,5 +1,8 @@ +! Bypass as much of the init phase as can be done +! Bypassing the run phase already was inherited from the for_testing_fastsetup_bypassrun testmod for_testing_bypass_init = .true. -for_testing_bypass_run = .true. + +! Turn on some of the self tests for_testing_run_ncdiopio_tests = .true. ! Turn off history, restarts, and output From 208a6d211ca74e7713e3c190a6da1ae78b5f0ca6 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 22 Aug 2025 03:09:37 -0600 Subject: [PATCH 075/141] Remove some of the previous bypassing changes that aren't needed here --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 20 ++++++++++---------- src/cpl/nuopc/lnd_import_export.F90 | 5 +++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 7245954b4d..c48452eae9 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -503,12 +503,12 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) else single_column = .false. end if - if ( for_testing_exit_after_self_tests) then + !if ( for_testing_exit_after_self_tests) then ! ******************* ! *** RETURN HERE *** ! ******************* - RETURN - end if + !RETURN + !end if !---------------------------------------------------------------------------- ! Reset shr logging to my log file @@ -788,9 +788,9 @@ subroutine ModelAdvance(gcomp, rc) if (single_column .and. .not. scol_valid) then RETURN end if - if (for_testing_exit_after_self_tests) then - RETURN - end if + !if (for_testing_exit_after_self_tests) then + ! RETURN + !end if !$ call omp_set_num_threads(nthrds) @@ -1022,7 +1022,7 @@ subroutine ModelSetRunClock(gcomp, rc) rc = ESMF_SUCCESS call ESMF_LogWrite(subname//' called', ESMF_LOGMSG_INFO) if (.not. scol_valid) return - if (for_testing_exit_after_self_tests) return + !if (for_testing_exit_after_self_tests) return ! query the Component for its clocks call NUOPC_ModelGet(gcomp, driverClock=dclock, modelClock=mclock, rc=rc) @@ -1335,9 +1335,9 @@ subroutine CheckImport(gcomp, rc) if (single_column .and. .not. scol_valid) then RETURN end if - if (for_testing_exit_after_self_tests) then - RETURN - end if + !if (for_testing_exit_after_self_tests) then + !RETURN + !end if ! The remander of this should be equivalent to the NUOPC internal routine ! from NUOPC_ModeBase.F90 diff --git a/src/cpl/nuopc/lnd_import_export.F90 b/src/cpl/nuopc/lnd_import_export.F90 index b8a5efeb8d..b1a41d0d66 100644 --- a/src/cpl/nuopc/lnd_import_export.F90 +++ b/src/cpl/nuopc/lnd_import_export.F90 @@ -257,11 +257,12 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r if (shr_megan_mechcomps_n .ne. megan_nflds) call shr_sys_abort('ERROR: megan field count mismatch') ! CARMA volumetric soil water from land + call shr_carma_readnl('drv_flds_in', carma_fields) ! export to atm call fldlist_add(fldsFrLnd_num, fldsFrlnd, trim(flds_scalar_name)) + call fldlist_add(fldsFrLnd_num, fldsFrlnd, 'Sl_lfrin') if (send_to_atm) then - call fldlist_add(fldsFrLnd_num, fldsFrlnd, 'Sl_lfrin') call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_t ) call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_tref ) call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_qref ) @@ -782,7 +783,6 @@ subroutine export_fields( gcomp, bounds, glc_present, rof_prognostic, & ! output to mediator ! ----------------------- - if (send_to_atm) then call state_setexport_1d(exportState, Sl_lfrin, ldomain%frac(begg:), init_spval=.false., rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -790,6 +790,7 @@ subroutine export_fields( gcomp, bounds, glc_present, rof_prognostic, & ! output to atm ! ----------------------- + if (send_to_atm) then call state_setexport_1d(exportState, Sl_t , lnd2atm_inst%t_rad_grc(begg:), & init_spval=.true., rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return From 28834c990c2eed5b1a52d93066b1bbae0495cf8d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 22 Aug 2025 11:55:37 -0600 Subject: [PATCH 076/141] Update bld/namelist_files/namelist_definition_ctsm.xml Fix spelling from review. Co-authored-by: Sam Rabin --- bld/namelist_files/namelist_definition_ctsm.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bld/namelist_files/namelist_definition_ctsm.xml b/bld/namelist_files/namelist_definition_ctsm.xml index 18cba2a3b8..db00c59791 100644 --- a/bld/namelist_files/namelist_definition_ctsm.xml +++ b/bld/namelist_files/namelist_definition_ctsm.xml @@ -1249,7 +1249,7 @@ Whether snow on the vegetation canopy affects the radiation/albedo calculations -For testing whether to bypass the rest of the initiatlization after the self test driver is run +For testing whether to bypass the rest of the initialization after the self test driver is run Date: Mon, 25 Aug 2025 13:03:12 -0600 Subject: [PATCH 077/141] decompInit/decompMod: initialize pointers to null, add checking that array sizes are set before allocates, initialize some decompMod values to invalid for error checking, add error checking to get_proc_bounds/get_proc_clumps, seperate out allocate for gindex to own allocate method, as it has be be done later after decomp is done, these are all improvements in #3448 --- src/main/decompInitMod.F90 | 28 ++++++++++++++++- src/main/decompMod.F90 | 62 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 86 insertions(+), 4 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index d6a03d5dba..f446de83e3 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -83,9 +83,15 @@ subroutine decompInit_lnd(lni, lnj, amask) lns = lni * lnj + nclumps = -1 + numg = -1 + + ! Do some error checking and also set nclumps and numg call decompInit_lnd_check_errors() call decompInit_lnd_allocate() + call get_proc_bounds(bounds) + call decompInit_lnd_gindex_global_allocate( bounds ) call memcheck('decompInit_lnd: after allocate') @@ -291,6 +297,10 @@ subroutine decompInit_lnd_allocate() call endrun(msg='allocation error for procinfo%cid', file=sourcefile, line=__LINE__) return endif + if ( nclumps < 1 )then + call endrun(msg="nclumps is NOT set before allocation", file=sourcefile, line=__LINE__) + return + end if allocate(clumps(nclumps), stat=ier) if (ier /= 0) then write(iulog,*) 'allocation error for clumps: nclumps=', nclumps @@ -298,14 +308,21 @@ subroutine decompInit_lnd_allocate() return end if + if ( numg < 1 )then + call endrun(msg="numg is NOT set before allocation", file=sourcefile, line=__LINE__) + return + end if allocate(gdc2glo(numg), stat=ier) if (ier /= 0) then call endrun(msg="allocation error1 for gdc2glo , etc", file=sourcefile, line=__LINE__) return end if - allocate(gindex_global(1:bounds%endg)) ! Temporary arrays that are just used in decompInit_lnd + if ( lns < 1 )then + call endrun(msg="lns is NOT set before allocation", file=sourcefile, line=__LINE__) + return + end if allocate(lcid(lns)) allocate(clumpcnt(nclumps),stat=ier) if (ier /= 0) then @@ -315,6 +332,15 @@ subroutine decompInit_lnd_allocate() end subroutine decompInit_lnd_allocate + subroutine decompInit_lnd_gindex_global_allocate( bounds ) + type(bounds_type), intent(in) :: bounds ! contains subgrid bounds data + if ( bounds%endg < 1 )then + call endrun(msg="endg is NOT set before allocation", file=sourcefile, line=__LINE__) + return + end if + allocate(gindex_global(1:bounds%endg)) + end subroutine decompInit_lnd_gindex_global_allocate + subroutine decompInit_lnd_clean() ! Deallocate the temporary variables used in decompInit_lnd deallocate(clumpcnt) diff --git a/src/main/decompMod.F90 b/src/main/decompMod.F90 index 3603f12cbf..2d6389fe11 100644 --- a/src/main/decompMod.F90 +++ b/src/main/decompMod.F90 @@ -9,6 +9,7 @@ module decompMod use shr_kind_mod, only : r8 => shr_kind_r8 use shr_sys_mod , only : shr_sys_abort ! use shr_sys_abort instead of endrun here to avoid circular dependency + use shr_abort_mod , only : shr_abort_abort ! as above use clm_varctl , only : iulog ! ! !PUBLIC TYPES: @@ -66,7 +67,7 @@ module decompMod !---global information on each pe type processor_type integer :: nclumps ! number of clumps for processor_type iam - integer,pointer :: cid(:) ! clump indices + integer,pointer :: cid(:) => null() ! clump indices integer :: ncells ! number of gridcells in proc integer :: nlunits ! number of landunits in proc integer :: ncols ! number of columns in proc @@ -223,8 +224,18 @@ subroutine get_clump_bounds (n, bounds) #ifdef _OPENMP if ( OMP_GET_NUM_THREADS() == 1 .and. OMP_GET_MAX_THREADS() > 1 )then call shr_sys_abort( trim(subname)//' ERROR: Calling from inside a non-threaded region)') + return end if #endif + if ( .not. associated(procinfo%cid) )then + call shr_sys_abort( 'procinfo%cid) is NOT allocated yet', file=sourcefile, line=__LINE__) + return + end if + if ( n < 1 .or. n > procinfo%nclumps )then + write(iulog,*) 'Input clump index out of bounds: n = ', n + call shr_sys_abort( 'Input clump is out of bounds', file=sourcefile, line=__LINE__) + return + end if cid = procinfo%cid(n) bounds%begp = clumps(cid)%begp - procinfo%begp + 1 @@ -238,6 +249,28 @@ subroutine get_clump_bounds (n, bounds) bounds%begCohort = clumps(cid)%begCohort - procinfo%begCohort + 1 bounds%endCohort = clumps(cid)%endCohort - procinfo%begCohort + 1 + + if ( bounds%endp <= 0 )then + call shr_sys_abort( 'bounds%endp is not valid', file=sourcefile, line=__LINE__) + return + end if + if ( bounds%endc <= 0 )then + call shr_sys_abort( 'bounds%endc is not valid', file=sourcefile, line=__LINE__) + return + end if + if ( bounds%endl <= 0 )then + call shr_sys_abort( 'bounds%endl is not valid', file=sourcefile, line=__LINE__) + return + end if + if ( bounds%endg <= 0 )then + call shr_sys_abort( 'bounds%endg is not valid', file=sourcefile, line=__LINE__) + return + end if + if ( bounds%endCohort <= 0 )then + call shr_sys_abort( 'bounds%endCohort is not valid', file=sourcefile, line=__LINE__) + return + end if + bounds%level = bounds_level_clump bounds%clump_index = n @@ -274,6 +307,7 @@ subroutine get_proc_bounds (bounds, allow_call_from_threaded_region) #ifdef _OPENMP if ( OMP_GET_NUM_THREADS() > 1 .and. .not. l_allow_call_from_threaded_region )then call shr_sys_abort( trim(subname)//' ERROR: Calling from inside a threaded region') + return end if #endif @@ -288,6 +322,27 @@ subroutine get_proc_bounds (bounds, allow_call_from_threaded_region) bounds%begCohort = 1 bounds%endCohort = procinfo%endCohort - procinfo%begCohort + 1 + if ( bounds%endp <= 0 )then + call shr_sys_abort( 'bounds%endp is not valid', file=sourcefile, line=__LINE__) + return + end if + if ( bounds%endc <= 0 )then + call shr_sys_abort( 'bounds%endc is not valid', file=sourcefile, line=__LINE__) + return + end if + if ( bounds%endl <= 0 )then + call shr_sys_abort( 'bounds%endl is not valid', file=sourcefile, line=__LINE__) + return + end if + if ( bounds%endg <= 0 )then + call shr_sys_abort( 'bounds%endg is not valid', file=sourcefile, line=__LINE__) + return + end if + if ( bounds%endCohort <= 0 )then + call shr_sys_abort( 'bounds%endCohort is not valid', file=sourcefile, line=__LINE__) + return + end if + bounds%level = bounds_level_proc bounds%clump_index = -1 ! irrelevant for proc, so assigned a bogus value @@ -380,7 +435,7 @@ integer function get_global_index(subgrid_index, subgrid_level, donot_abort_on_b integer :: beg_index ! beginning proc index for subgrid_level integer :: end_index ! ending proc index for subgrid_level integer :: index ! index of the point to get - integer, pointer :: gindex(:) + integer, pointer :: gindex(:) => null() logical :: abort_on_badindex = .true. !---------------------------------------------------------------- @@ -444,7 +499,7 @@ function get_global_index_array(subgrid_index, bounds1, bounds2, subgrid_level) type(bounds_type) :: bounds_proc ! processor bounds integer :: beg_index ! beginning proc index for subgrid_level integer :: i - integer , pointer :: gindex(:) + integer , pointer :: gindex(:) => null() !---------------------------------------------------------------- SHR_ASSERT_ALL_FL((ubound(subgrid_index) == (/bounds2/)), sourcefile, __LINE__) @@ -546,6 +601,7 @@ subroutine get_subgrid_level_gindex (subgrid_level, gindex) integer , pointer :: gindex(:) !---------------------------------------------------------------------- + gindex => null() ! Make sure gindex is initiatled to null select case (subgrid_level) case(subgrid_level_lndgrid) gindex => gindex_global From 45903c25313a6247a948861fab614717bef54663 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 25 Aug 2025 13:40:29 -0600 Subject: [PATCH 078/141] Add a standard ERP test with threading on to the decomp_init testlist, so a standard non strange test-setup is run in the testlist --- cime_config/testdefs/testlist_clm.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index ab53ea371d..a7842ea338 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -1457,9 +1457,12 @@ + + + From 76d1f5fb5ae813d0ea1f597db2a702666cc1b5cc Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 25 Aug 2025 14:27:36 -0600 Subject: [PATCH 079/141] Update the share code with a reset option, which turns out to not be needed now, but could be later --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index b5c2e8dd07..31e2f8a793 100644 --- a/.gitmodules +++ b/.gitmodules @@ -106,7 +106,7 @@ path = share url = https://github.com/ekluzek/CESM_share #fxtag = share1.1.9 #fxtag = add_jdennis_procstatus_module -fxtag = 9973692556da54f9562935be43c1d43b0607d24b +fxtag = 1a871cad0a90f8a361196f045313cca1919c7cbc fxrequired = ToplevelRequired # Standard Fork to compare to with "git fleximod test" to ensure personal forks aren't committed fxDONOTUSEurl = https://github.com/ESCOMP/CESM_share From 4bc13a36c11833aa7769aeb41472a014ac4edc08 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 25 Aug 2025 19:00:41 -0600 Subject: [PATCH 080/141] Allow get_proc_bounds to exit early if only b3gg/endg will be needed, add error handling of nsegspc, don't check endCohort in get_proc_bounds and get_clump_bounds as doesn't seem to be set --- .../share_esmf/lnd_set_decomp_and_domain.F90 | 4 +- src/main/clm_initializeMod.F90 | 6 +-- src/main/decompInitMod.F90 | 15 +++++-- src/main/decompMod.F90 | 43 ++++++++++++------- 4 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 9954027902..cf47a28724 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -163,7 +163,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes ! Get JUST gridcell processor bounds ! Remaining bounds (landunits, columns, patches) will be set after calling decompInit_glcp ! so get_proc_bounds is called twice and the gridcell information is just filled in twice - call get_proc_bounds(bounds) + call get_proc_bounds(bounds, allow_errors=.true.) begg = bounds%begg endg = bounds%endg @@ -374,7 +374,7 @@ subroutine lnd_set_decomp_and_domain_for_single_column(scol_lon, scol_lat, scol_ call decompInit_lnd(lni=1, lnj=1, amask=(/1/)) ! Initialize processor bounds - call get_proc_bounds(bounds) + call get_proc_bounds(bounds, allow_errors=.true.) ! allow errors since decomp not fully initialized ! Initialize domain data structure call domain_init(domain=ldomain, isgrid2d=.false., ni=1, nj=1, nbeg=1, nend=1) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 36c33dc914..5a786860d7 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -231,8 +231,8 @@ subroutine initialize2(ni,nj, currtime) call t_startf('clm_init2') call t_startf('clm_init2_part1') - ! Get processor bounds for gridcells - call get_proc_bounds(bounds_proc) + ! Get processor bounds for gridcells, just for gridcells + call get_proc_bounds(bounds_proc, allow_errors=.true.) ! Just get proc bounds for gridcells, other variables won't be set until adter decompInit_clumps begg = bounds_proc%begg; endg = bounds_proc%endg ! Initialize glc behavior @@ -292,7 +292,7 @@ subroutine initialize2(ni,nj, currtime) call t_startf('clm_init2_subgrid') ! *** Get ALL processor bounds - for gridcells, landunit, columns and patches *** - call get_proc_bounds(bounds_proc) + call get_proc_bounds(bounds_proc) ! This has to be done after decompInit_clumps is called ! Allocate memory for subgrid data structures ! This is needed here BEFORE the following call to initGridcells diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index f446de83e3..b5dd8d71a9 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -90,8 +90,6 @@ subroutine decompInit_lnd(lni, lnj, amask) call decompInit_lnd_check_errors() call decompInit_lnd_allocate() - call get_proc_bounds(bounds) - call decompInit_lnd_gindex_global_allocate( bounds ) call memcheck('decompInit_lnd: after allocate') @@ -251,9 +249,11 @@ subroutine decompInit_lnd(lni, lnj, amask) ! Initialize global gindex (non-compressed, includes ocean points) ! Note that gindex_global goes from (1:endg) + call get_proc_bounds(bounds, allow_errors=.true.) ! This has to be done after procinfo is finalized + call decompInit_lnd_gindex_global_allocate( bounds ) ! This HAS to be done after prcoinfo is finalized + nglob_x = lni ! decompMod module variables nglob_y = lnj ! decompMod module variables - call get_proc_bounds(bounds) do n = procinfo%begg,procinfo%endg gindex_global(n-procinfo%begg+1) = gdc2glo(n) enddo @@ -361,6 +361,13 @@ end subroutine decompMod_clean subroutine decompInit_lnd_check_errors() ! Do some general error checking on input options + if (nsegspc < 1) then + write(iulog,*) 'nsegspc bad = ', nsegspc + call endrun(msg="Number of segments per clump (nsegspc) is less than 1 and can NOT be", & + file=sourcefile, line=__LINE__) + return + end if + !--- set and verify nclumps --- if (clump_pproc > 0) then nclumps = clump_pproc * npes @@ -448,7 +455,7 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) call t_startf('decompInit_clumps') call memcheck('decompInit_clumps: before alloc') !--- assign gridcells to clumps (and thus pes) --- - call get_proc_bounds(bounds) + call get_proc_bounds(bounds, allow_errors=.true.) begg = bounds%begg; endg = bounds%endg allocate(allvecl(nclumps,5)) ! local clumps [gcells,lunit,cols,patches,coh] diff --git a/src/main/decompMod.F90 b/src/main/decompMod.F90 index 2d6389fe11..aa7b785170 100644 --- a/src/main/decompMod.F90 +++ b/src/main/decompMod.F90 @@ -266,10 +266,12 @@ subroutine get_clump_bounds (n, bounds) call shr_sys_abort( 'bounds%endg is not valid', file=sourcefile, line=__LINE__) return end if - if ( bounds%endCohort <= 0 )then - call shr_sys_abort( 'bounds%endCohort is not valid', file=sourcefile, line=__LINE__) - return - end if + ! End Cohort isn't necessarily valid, so don't do this error check + !if ( bounds%endCohort <= 0 )then + ! write(iulog,*) 'endCohort = ', bounds%endCohort + ! call shr_sys_abort( 'bounds%endCohort is not valid', file=sourcefile, line=__LINE__) + ! return + !end if bounds%level = bounds_level_clump bounds%clump_index = n @@ -277,13 +279,14 @@ subroutine get_clump_bounds (n, bounds) end subroutine get_clump_bounds !------------------------------------------------------------------------------ - subroutine get_proc_bounds (bounds, allow_call_from_threaded_region) + subroutine get_proc_bounds (bounds, allow_call_from_threaded_region, allow_errors) ! ! !DESCRIPTION: ! Retrieve processor bounds ! ! !ARGUMENTS: type(bounds_type), intent(out) :: bounds ! processor bounds bounds + logical, intent(in), optional :: allow_errors ! Don't do the normal error checking ! Normally this routine will abort if it is called from within a threaded region, ! because in most cases you should be calling get_clump_bounds in that situation. If @@ -322,6 +325,20 @@ subroutine get_proc_bounds (bounds, allow_call_from_threaded_region) bounds%begCohort = 1 bounds%endCohort = procinfo%endCohort - procinfo%begCohort + 1 + bounds%level = bounds_level_proc + bounds%clump_index = -1 ! irrelevant for proc, so assigned a bogus value + + ! Soem final error checking + ! Always check that gridcells are set + if ( bounds%endg <= 0 )then + call shr_sys_abort( 'bounds%endg is not valid', file=sourcefile, line=__LINE__) + return + end if + + ! Exit before checking if errors should be allowed + if ( present(allow_errors) ) then + if ( allow_errors ) return + end if if ( bounds%endp <= 0 )then call shr_sys_abort( 'bounds%endp is not valid', file=sourcefile, line=__LINE__) return @@ -334,17 +351,11 @@ subroutine get_proc_bounds (bounds, allow_call_from_threaded_region) call shr_sys_abort( 'bounds%endl is not valid', file=sourcefile, line=__LINE__) return end if - if ( bounds%endg <= 0 )then - call shr_sys_abort( 'bounds%endg is not valid', file=sourcefile, line=__LINE__) - return - end if - if ( bounds%endCohort <= 0 )then - call shr_sys_abort( 'bounds%endCohort is not valid', file=sourcefile, line=__LINE__) - return - end if - - bounds%level = bounds_level_proc - bounds%clump_index = -1 ! irrelevant for proc, so assigned a bogus value + ! End Cohort isn't necessarily valid, so don't do this error check + !if ( bounds%endCohort <= 0 )then + !call shr_sys_abort( 'bounds%endCohort is not valid', file=sourcefile, line=__LINE__) + !return + !end if end subroutine get_proc_bounds From 469c1083163a1c9f48e281a3d70a5144da9bf101 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 26 Aug 2025 20:31:56 -0600 Subject: [PATCH 081/141] Return up the chain in decompInit after endrun, for the testing. Do setup/clean for each DecompInit test, move the decomp_mod_clean to decompMod and use it for the decompInit tests --- src/main/decompInitMod.F90 | 40 +++++++++++-------- src/main/decompMod.F90 | 44 +++++++++++++++++++++ src/self_tests/TestDecompInit.F90 | 66 ++++++++++++++++++++++++++----- 3 files changed, 124 insertions(+), 26 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index b5dd8d71a9..840dd0c8e0 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -87,9 +87,11 @@ subroutine decompInit_lnd(lni, lnj, amask) numg = -1 ! Do some error checking and also set nclumps and numg - call decompInit_lnd_check_errors() + call decompInit_lnd_check_errors( ier ) + if (ier /= 0) return - call decompInit_lnd_allocate() + call decompInit_lnd_allocate( ier ) + if (ier /= 0) return call memcheck('decompInit_lnd: after allocate') @@ -250,7 +252,8 @@ subroutine decompInit_lnd(lni, lnj, amask) ! Initialize global gindex (non-compressed, includes ocean points) ! Note that gindex_global goes from (1:endg) call get_proc_bounds(bounds, allow_errors=.true.) ! This has to be done after procinfo is finalized - call decompInit_lnd_gindex_global_allocate( bounds ) ! This HAS to be done after prcoinfo is finalized + call decompInit_lnd_gindex_global_allocate( bounds, ier ) ! This HAS to be done after prcoinfo is finalized + if (ier /= 0) return nglob_x = lni ! decompMod module variables nglob_y = lnj ! decompMod module variables @@ -281,8 +284,9 @@ subroutine decompInit_lnd(lni, lnj, amask) !------------------------------------------------------------------------------ !------------------------------------------------------------------------------ - subroutine decompInit_lnd_allocate() + subroutine decompInit_lnd_allocate( ier ) ! Allocate the temporary and long term variables set here + integer, intent(out) :: ier ! error code ! ! Long-term: @@ -303,7 +307,7 @@ subroutine decompInit_lnd_allocate() end if allocate(clumps(nclumps), stat=ier) if (ier /= 0) then - write(iulog,*) 'allocation error for clumps: nclumps=', nclumps + write(iulog,*) 'allocation error for clumps: nclumps, ier=', nclumps, ier call endrun(msg='allocation error for clumps', file=sourcefile, line=__LINE__) return end if @@ -332,9 +336,14 @@ subroutine decompInit_lnd_allocate() end subroutine decompInit_lnd_allocate - subroutine decompInit_lnd_gindex_global_allocate( bounds ) + subroutine decompInit_lnd_gindex_global_allocate( bounds, ier ) + integer, intent(out) :: ier ! error code + type(bounds_type), intent(in) :: bounds ! contains subgrid bounds data + + ier = 0 if ( bounds%endg < 1 )then + ier = 1 call endrun(msg="endg is NOT set before allocation", file=sourcefile, line=__LINE__) return end if @@ -348,20 +357,13 @@ subroutine decompInit_lnd_clean() !deallocate(lcid) end subroutine decompInit_lnd_clean - subroutine decompMod_clean() - ! Deallocate the long-term variables created in decompInit_lnd - ! This should be moved to decompMod - - deallocate(clumps) - deallocate(procinfo%cid) - deallocate(gindex_global) - nclumps = 0 - end subroutine decompMod_clean - - subroutine decompInit_lnd_check_errors() + subroutine decompInit_lnd_check_errors( ier ) ! Do some general error checking on input options + integer, intent(out) :: ier ! error code + ier = 0 if (nsegspc < 1) then + ier = 1 write(iulog,*) 'nsegspc bad = ', nsegspc call endrun(msg="Number of segments per clump (nsegspc) is less than 1 and can NOT be", & file=sourcefile, line=__LINE__) @@ -372,6 +374,7 @@ subroutine decompInit_lnd_check_errors() if (clump_pproc > 0) then nclumps = clump_pproc * npes if (nclumps < npes) then + ier = 1 write(iulog,*) 'Number of gridcell clumps= ',nclumps, & ' is less than the number of processes = ', npes call endrun(msg="Number of clumps exceeds number of processes", & @@ -379,6 +382,7 @@ subroutine decompInit_lnd_check_errors() return end if else + ier = 1 write(iulog,*) 'ERROR: Bad clump_pproc=', clump_pproc call endrun(msg='clump_pproc must be greater than 0', file=sourcefile, line=__LINE__) return @@ -393,12 +397,14 @@ subroutine decompInit_lnd_check_errors() enddo if (npes > numg) then + ier = 1 write(iulog,*) 'Number of processes > gridcells: npes=',npes,' num gridcells = ', numg call endrun(msg="Number of processes exceeds number of land grid cells", & file=sourcefile, line=__LINE__) return end if if (nclumps > numg) then + ier = 1 write(iulog,*) 'Number of clumps > gridcells nclumps = ', & nclumps, ' num gridcells = ', numg call endrun(msg="Number of clumps exceeds number of land grid cells", & diff --git a/src/main/decompMod.F90 b/src/main/decompMod.F90 index aa7b785170..19f036bbb1 100644 --- a/src/main/decompMod.F90 +++ b/src/main/decompMod.F90 @@ -47,6 +47,7 @@ module decompMod public :: get_subgrid_level_from_name ! Given a name like nameg, return a subgrid level index like subgrid_level_gridcell public :: get_subgrid_level_gsize ! get global size associated with subgrid_level public :: get_subgrid_level_gindex ! get global index array associated with subgrid_level + public :: decompmod_clean ! Deallocate memory used by decompMod ! !PRIVATE MEMBER FUNCTIONS: ! @@ -633,4 +634,47 @@ subroutine get_subgrid_level_gindex (subgrid_level, gindex) end subroutine get_subgrid_level_gindex + !----------------------------------------------------------------------- + subroutine decompmod_clean() + ! Deallocate the decompMod long-term variables created in decompInit_lnd + + ! Set the total counts to zero + nclumps = 0 + numg = 0 + numl = 0 + numc = 0 + nump = 0 + numCohort = 0 + + ! Deallocate and set the pointers to null + if ( allocated(clumps) )then + deallocate(clumps) + end if + if ( associated(procinfo%cid) )then + deallocate(procinfo%cid) + procinfo%cid => null() + end if + if ( associated(gindex_global) )then + deallocate(gindex_global) + gindex_global => null() + end if + if ( associated(gindex_grc) )then + deallocate( gindex_grc ) + gindex_grc => null() + end if + if ( associated(gindex_lun) )then + deallocate( gindex_lun ) + gindex_lun => null() + end if + if ( associated(gindex_col) )then + deallocate( gindex_col ) + gindex_col => null() + end if + if ( associated(gindex_patch) )then + deallocate( gindex_patch ) + gindex_patch => null() + end if + end subroutine decompMod_clean + !----------------------------------------------------------------------- + end module decompMod diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 75445cd999..f4e86e2724 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -26,6 +26,9 @@ module TestDecompInit integer, parameter :: ni = 10, nj = 5 integer :: amask(ni*nj) + integer :: default_npes + integer :: default_clump_pproc + character(len=*), parameter, private :: sourcefile = & __FILE__ @@ -58,6 +61,8 @@ subroutine test_decomp_init() ! !LOCAL VARIABLES: !----------------------------------------------------------------------- + default_npes = npes + default_clump_pproc = clump_pproc call write_to_log('start_test_decomp_init') call write_to_log('test_check_nclumps') @@ -70,15 +75,27 @@ subroutine test_decomp_init() call test_decompInit_lnd_abort_when_npes_too_large() call write_to_log('test_decompInit_lnd_abort_on_too_small_nsegspc') call test_decompInit_lnd_abort_on_too_small_nsegspc() - - call clean + call write_to_log('test_decompInit_lnd_check_sizes') + call test_decompInit_lnd_check_sizes() end subroutine test_decomp_init + !----------------------------------------------------------------------- + subroutine setup() + use clm_varctl, only : nsegspc + + clump_pproc = default_clump_pproc + nsegspc = 20 + npes = default_npes + amask(:) = 1 ! Set all to land + + end subroutine setup + !----------------------------------------------------------------------- subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() character(len=CX) :: expected_msg, actual_msg + call setup() call endrun_init( .true. ) ! Do not abort on endrun for self-tests clump_pproc = 0 call write_to_log('decompInit_lnd with clump_pproc=0 should abort') @@ -91,12 +108,14 @@ subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() call assert_equal( & expected=expected_msg, actual=actual_msg, & msg='decompInit_lnd did not abort with clump_pproc=0' ) + call clean() end subroutine test_decompInit_lnd_abort_on_bad_clump_pproc !----------------------------------------------------------------------- subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc() character(len=CX) :: expected_msg, actual_msg + call setup() call endrun_init( .true. ) ! Do not abort on endrun for self-tests amask(:) = 1 ! Set all to land clump_pproc = (ni * nj + 1) / npes @@ -110,16 +129,39 @@ subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc() call assert_equal( & expected=expected_msg, actual=actual_msg, & msg='decompInit_lnd did not abort with clump_pproc too large' ) - call assert_equal( numg, ni*nj, msg='numg is not as expected' ) + call clean() end subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc + !----------------------------------------------------------------------- + subroutine test_decompInit_lnd_check_sizes() + use decompMod, only : get_proc_bounds + type(bounds_type) :: bounds + + integer :: expected_endg, expected_numg + + call setup() + expected_numg = ni*nj + if ( expected_numg < npes )then + call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ ) + end if + if ( modulo( expected_numg, npes ) /= 0 )then + call endrun( msg="npes does not evenly divide into numg so this test will not work", file=sourcefile, line=__LINE__ ) + end if + expected_endg = ni*nj / npes + amask(:) = 1 ! Set all to land + call decompInit_lnd( ni, nj, amask ) + call get_proc_bounds(bounds, allow_errors=.true.) + call assert_equal( bounds%begg, 1, msg='begg is not as expected' ) + call assert_equal( bounds%endg, expected_endg, msg='endg is not as expected' ) + call clean() + end subroutine test_decompInit_lnd_check_sizes + !----------------------------------------------------------------------- subroutine test_decompInit_lnd_abort_when_npes_too_large() character(len=CX) :: expected_msg, actual_msg - integer :: npes_orig + call setup() ! NOTE: This is arbitrarily modifying the NPES value -- so it MUST be reset set the END! - npes_orig = npes npes = ni*nj + 1 call endrun_init( .true. ) ! Do not abort on endrun for self-tests @@ -136,7 +178,8 @@ subroutine test_decompInit_lnd_abort_when_npes_too_large() msg='decompInit_lnd did not abort with npes too large' ) ! NOTE: Return npes to its original value - npes = npes_orig + npes = default_npes + call clean() end subroutine test_decompInit_lnd_abort_when_npes_too_large !----------------------------------------------------------------------- @@ -144,30 +187,34 @@ subroutine test_decompInit_lnd_abort_on_too_small_nsegspc() use clm_varctl, only : nsegspc character(len=CX) :: expected_msg, actual_msg + call setup() call endrun_init( .true. ) ! Do not abort on endrun for self-tests amask(:) = 1 ! Set all to land nsegspc = 0 call write_to_log('decompInit_lnd with nsegspc too small should abort') call decompInit_lnd( ni, nj, amask ) call write_to_log('check expected abort message') - expected_msg = 'nsegspc must be greater than 0' + expected_msg = 'Number of segments per clump (nsegspc) is less than 1 and can NOT be' actual_msg = get_last_endrun_msg() call endrun_init( .false. ) ! Turn back on to abort on the assert call write_to_log('call assert_equal to check the abort message') call assert_equal( & expected=expected_msg, actual=actual_msg, & msg='decompInit_lnd did not abort with too nsegspc too small' ) + call clean() end subroutine test_decompInit_lnd_abort_on_too_small_nsegspc !----------------------------------------------------------------------- subroutine test_check_nclumps() integer :: expected_nclumps + call setup() call endrun_init( .true. ) ! Do not abort on endrun for self-tests expected_nclumps = npes / clump_pproc call assert_equal(expected=expected_nclumps, actual=nclumps, & msg='nclumps are not as expected') call endrun_init( .false. ) + call clean() end subroutine test_check_nclumps !----------------------------------------------------------------------- @@ -186,7 +233,7 @@ subroutine write_to_log(msg) !----------------------------------------------------------------------- if (masterproc) then - write(*,'(a)') msg + write(iulog,'(a)') msg call shr_sys_flush(iulog) ! Flush the I/O buffers always end if @@ -196,12 +243,13 @@ end subroutine write_to_log subroutine clean ! ! !DESCRIPTION: - ! Do end-of-testing cleanup + ! Do end-of-testing cleanup after each test ! ! !ARGUMENTS: ! ! !LOCAL VARIABLES: !----------------------------------------------------------------------- + call decompmod_clean() end subroutine clean From 12da9cb74be889fe9f1c48ae3546a33afd5059af Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 27 Aug 2025 11:11:01 -0600 Subject: [PATCH 082/141] Bypass more code after running the self tests --- src/main/clm_initializeMod.F90 | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 5a786860d7..892c0ccd8d 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -345,6 +345,7 @@ subroutine initialize2(ni,nj, currtime) ! Run any requested self-tests call self_test_driver(bounds_proc) + if ( .not. for_testing_bypass_init_after_self_tests() )then ! Deallocate surface grid dynamic memory for variables that aren't needed elsewhere. ! Some things are kept until the end of initialize2; urban_valid is kept through the ! end of the run for error checking, pct_urban_max is kept through the end of the run @@ -361,8 +362,10 @@ subroutine initialize2(ni,nj, currtime) allocate(nutrient_competition_method, & source=create_nutrient_competition_method(bounds_proc)) call readParameters(photosyns_inst) + end if ! End of bypass + ! Self test skipping should still do the timee manager initialization ! Initialize time manager if (nsrest == nsrStartup) then call timemgr_init() @@ -387,6 +390,7 @@ subroutine initialize2(ni,nj, currtime) call InitDaylength(bounds_proc, declin=declin, declinm1=declinm1, obliquity=obliqr) call t_stopf('clm_init2_part2') + if ( .not. for_testing_bypass_init_after_self_tests() )then call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) call BalanceCheckInit() @@ -483,7 +487,6 @@ subroutine initialize2(ni,nj, currtime) call bgc_vegetation_inst%Init2(bounds_proc, NLFilename) end if - if ( .not. for_testing_bypass_init_after_self_tests() )then if (use_cn) then ! NOTE(wjs, 2016-02-23) Maybe the rest of the body of this conditional should also @@ -527,7 +530,7 @@ subroutine initialize2(ni,nj, currtime) if (nsrest == nsrContinue ) then call htapes_fieldlist() end if - end if + end if ! End of bypass ! Read restart/initial info is_cold_start = .false. @@ -623,6 +626,8 @@ subroutine initialize2(ni,nj, currtime) end if call t_startf('clm_init2_part5') + + if ( .not. for_testing_bypass_init_after_self_tests() )then ! If requested, reset dynbal baselines ! This needs to happen after reading the restart file (including after reading the ! interpolated restart file, if applicable). @@ -706,7 +711,6 @@ subroutine initialize2(ni,nj, currtime) call hist_htapes_build() end if - if ( .not. for_testing_bypass_init_after_self_tests() )then ! Initialize variables that are associated with accumulated fields. ! The following is called for both initial and restart runs and must ! must be called after the restart file is read @@ -790,7 +794,7 @@ subroutine initialize2(ni,nj, currtime) water_inst%waterdiagnosticbulk_inst, canopystate_inst, & soilstate_inst, soilbiogeochem_carbonflux_inst) end if - end if + end if ! end of bypass ! topo_glc_mec was allocated in initialize1, but needed to be kept around through ! initialize2 because it is used to initialize other variables; now it can be deallocated From d325b8726c7c479e35e7e85cac6601367ea78aa6 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 27 Aug 2025 13:08:54 -0600 Subject: [PATCH 083/141] Add calls to redo the decomp calls on the full grid so that get_proc_bounds can be called later, and add some bypass in the lnd_comp_nuopc cap layer for initialization --- src/self_tests/TestDecompInit.F90 | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index f4e86e2724..5f0471714b 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -56,9 +56,14 @@ subroutine test_decomp_init() ! independent, though would prevent us from being able to have dependent tests the ! way we do here (where reads depend on earlier writes), for better or for worse. ! + ! !USERS: + use clm_InstMod, only : glc_behavior + use decompInitMod, only : decompInit_clumps, decompInit_glcp + use domainMod, only : ldomain ! !ARGUMENTS: ! ! !LOCAL VARIABLES: + integer, allocatable :: model_amask(:) !----------------------------------------------------------------------- default_npes = npes @@ -78,6 +83,16 @@ subroutine test_decomp_init() call write_to_log('test_decompInit_lnd_check_sizes') call test_decompInit_lnd_check_sizes() + ! + ! Call the decompInit initialization series a last time so that decompMod data can still be used + ! + allocate( model_amask(ldomain%ni*ldomain%nj) ) + model_amask(:) = 1 + call decompInit_lnd( ldomain%ni, ldomain%nj, model_amask ) + call decompInit_clumps(ldomain%ni, ldomain%nj, glc_behavior) + call decompInit_glcp(ldomain%ni, ldomain%nj, glc_behavior) + deallocate( model_amask ) + end subroutine test_decomp_init !----------------------------------------------------------------------- From d92cb69a06cdd431fc2720697afa588db182f6b5 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 27 Aug 2025 13:34:29 -0600 Subject: [PATCH 084/141] Move bypass code around a bit so that most timers aren't half in/half out, and so that the self-tests can run to completion afterwards --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 6 ++++++ src/main/clm_initializeMod.F90 | 2 +- src/main/clm_instMod.F90 | 5 +---- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 1db7e93b17..be54aa8c90 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -362,6 +362,7 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) use lnd_set_decomp_and_domain , only : lnd_set_decomp_and_domain_from_readmesh use lnd_set_decomp_and_domain , only : lnd_set_mesh_for_single_column use lnd_set_decomp_and_domain , only : lnd_set_decomp_and_domain_for_single_column + use SelfTestDriver , only : for_testing_bypass_init_after_self_tests ! input/output variables type(ESMF_GridComp) :: gcomp @@ -685,10 +686,12 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) !-------------------------------- ! Create land export state !-------------------------------- + if ( .not. for_testing_bypass_init_after_self_tests() ) then call get_proc_bounds(bounds) call export_fields(gcomp, bounds, glc_present, rof_prognostic, & water_inst%waterlnd2atmbulk_inst, lnd2atm_inst, lnd2glc_inst, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return + end if ! Set scalars in export state call State_SetScalar(dble(ldomain%ni), flds_scalar_index_nx, exportState, & @@ -735,6 +738,7 @@ subroutine ModelAdvance(gcomp, rc) use clm_instMod , only : water_inst, atm2lnd_inst, glc2lnd_inst, lnd2atm_inst, lnd2glc_inst use decompMod , only : bounds_type, get_proc_bounds use clm_driver , only : clm_drv + use SelfTestDriver, only : for_testing_bypass_init_after_self_tests ! input/output variables type(ESMF_GridComp) :: gcomp @@ -928,11 +932,13 @@ subroutine ModelAdvance(gcomp, rc) ! Pack export state !-------------------------------- + if ( .not. for_testing_bypass_init_after_self_tests() ) then call t_startf ('lc_lnd_export') call export_fields(gcomp, bounds, glc_present, rof_prognostic, & water_inst%waterlnd2atmbulk_inst, lnd2atm_inst, lnd2glc_inst, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return call t_stopf ('lc_lnd_export') + end if !-------------------------------- ! Advance ctsm time step diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 892c0ccd8d..aa07014288 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -625,9 +625,9 @@ subroutine initialize2(ni,nj, currtime) call t_stopf('clm_init2_init_interp') end if + if ( .not. for_testing_bypass_init_after_self_tests() )then call t_startf('clm_init2_part5') - if ( .not. for_testing_bypass_init_after_self_tests() )then ! If requested, reset dynbal baselines ! This needs to happen after reading the restart file (including after reading the ! interpolated restart file, if applicable). diff --git a/src/main/clm_instMod.F90 b/src/main/clm_instMod.F90 index 0c3143a7cb..bc86749d06 100644 --- a/src/main/clm_instMod.F90 +++ b/src/main/clm_instMod.F90 @@ -208,7 +208,6 @@ subroutine clm_instInit(bounds) use HillslopeHydrologyMod , only : SetHillslopeSoilThickness use initVerticalMod , only : setSoilLayerClass use DustEmisFactory , only : create_dust_emissions - use SelfTestDriver , only : for_testing_bypass_init_after_self_tests ! ! !ARGUMENTS type(bounds_type), intent(in) :: bounds ! processor bounds @@ -271,9 +270,7 @@ subroutine clm_instInit(bounds) call humanindex_inst%Init(bounds) ! Initialize urban time varying data - if ( .not. for_testing_bypass_init_after_self_tests() )then - call urbantv_inst%Init(bounds, NLFilename) - end if + call urbantv_inst%Init(bounds, NLFilename) ! Initialize vertical data components From 188f229dade2cf324b42688c1f477af946340d56 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 27 Aug 2025 16:16:33 -0600 Subject: [PATCH 085/141] Also bypass the import fields for_testing option, and move the decompInit_lnd timers to around the calls rather than for the entire subroutine, because the things at the top that may abort will then have a broken timer --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 2 ++ src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 4 ++++ src/main/decompInitMod.F90 | 3 --- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index be54aa8c90..a6c44e9c1e 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -836,11 +836,13 @@ subroutine ModelAdvance(gcomp, rc) ! Unpack import state !-------------------------------- + if ( .not. for_testing_bypass_init_after_self_tests() ) then call t_startf ('lc_lnd_import') call import_fields( gcomp, bounds, glc_present, rof_prognostic, & atm2lnd_inst, glc2lnd_inst, water_inst%wateratm2lndbulk_inst, rc ) if (ChkErr(rc,__LINE__,u_FILE_u)) return call t_stopf ('lc_lnd_import') + end if !-------------------------------- ! Run model diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index cf47a28724..1c592c5290 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -153,7 +153,9 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes call t_startf ('lnd_set_decomp_and_domain_from_readmesh: decomp_init') ! Determine lnd decomposition that will be used by ctsm from lndmask_glob + call t_startf ('decompInit_lnd') call decompInit_lnd(lni=ni, lnj=nj, amask=lndmask_glob) + call t_stopf ('decompInit_lnd') ! Determine ocn decomposition that will be used to create the full mesh ! note that the memory for gindex_ocn will be allocated in the following call @@ -371,7 +373,9 @@ subroutine lnd_set_decomp_and_domain_for_single_column(scol_lon, scol_lat, scol_ !------------------------------------------------------------------------------- ! Determine decomp and ldomain + call t_startf ('decompInit_lnd') call decompInit_lnd(lni=1, lnj=1, amask=(/1/)) + call t_stopf ('decompInit_lnd') ! Initialize processor bounds call get_proc_bounds(bounds, allow_errors=.true.) ! allow errors since decomp not fully initialized diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 840dd0c8e0..9ac21ceef0 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -77,8 +77,6 @@ subroutine decompInit_lnd(lni, lnj, amask) type(bounds_type) :: bounds ! contains subgrid bounds data real(r8) :: msize, mrss !------------------------------------------------------------------------------ - call t_startf('decompInit_lnd') - call memcheck('decompInit_lnd: before allocate') lns = lni * lnj @@ -276,7 +274,6 @@ subroutine decompInit_lnd(lni, lnj, amask) write(iulog,*) end if call shr_sys_flush(iulog) - call t_stopf('decompInit_lnd') !------------------------------------------------------------------------------ ! Internal subroutines for this subroutine From 1c17af420017c498b2c53860c6197775e9ecab4b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 29 Aug 2025 09:44:16 -0600 Subject: [PATCH 086/141] Add unit_test_shr directory to the main model build --- cime_config/buildlib | 1 + 1 file changed, 1 insertion(+) diff --git a/cime_config/buildlib b/cime_config/buildlib index a4b853924e..3ce5080dc4 100755 --- a/cime_config/buildlib +++ b/cime_config/buildlib @@ -135,6 +135,7 @@ def _main_func(): os.path.join(lnd_root, "src", "dyn_subgrid"), os.path.join(lnd_root, "src", "init_interp"), os.path.join(lnd_root, "src", "self_tests"), + os.path.join(lnd_root, "src", "unit_test_shr"), os.path.join(lnd_root, "src", "fates"), os.path.join(lnd_root, "src", "fates", "main"), os.path.join(lnd_root, "src", "fates", "biogeophys"), From f65b59ba7c4789c93adec3df6f193fbf38f8cc1c Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 29 Aug 2025 09:45:30 -0600 Subject: [PATCH 087/141] Move the get_proc_bounds to inside the bypass --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index a6c44e9c1e..d021ecb068 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -829,14 +829,14 @@ subroutine ModelAdvance(gcomp, rc) flds_scalar_index_nextsw_cday, nextsw_cday, & flds_scalar_name, flds_scalar_num, rc) - ! Get proc bounds - call get_proc_bounds(bounds) - !-------------------------------- ! Unpack import state !-------------------------------- if ( .not. for_testing_bypass_init_after_self_tests() ) then + ! Get proc bounds for both import and export + call get_proc_bounds(bounds) + call t_startf ('lc_lnd_import') call import_fields( gcomp, bounds, glc_present, rof_prognostic, & atm2lnd_inst, glc2lnd_inst, water_inst%wateratm2lndbulk_inst, rc ) From 01f26592894cfbe06fdb1c22a0eb507390cc983b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 29 Aug 2025 09:46:54 -0600 Subject: [PATCH 088/141] Add more tests, remove redoing the decomp with the full grid after the DecompInit self tests --- src/self_tests/TestDecompInit.F90 | 101 +++++++++++++++++++++++++++--- 1 file changed, 92 insertions(+), 9 deletions(-) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 5f0471714b..9905582320 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -9,8 +9,9 @@ module TestDecompInit use Assertions, only : assert_equal use clm_varctl, only : iulog use abortutils, only : endrun, endrun_init, get_last_endrun_msg - use spmdMod, only : masterproc, npes - use decompInitMod, only : decompInit_lnd, clump_pproc + use spmdMod, only : masterproc, npes, iam + use decompInitMod, only : decompInit_lnd, clump_pproc, decompInit_clumps + use clm_InstMod, only : glc_behavior use decompMod implicit none @@ -57,7 +58,6 @@ subroutine test_decomp_init() ! way we do here (where reads depend on earlier writes), for better or for worse. ! ! !USERS: - use clm_InstMod, only : glc_behavior use decompInitMod, only : decompInit_clumps, decompInit_glcp use domainMod, only : ldomain ! !ARGUMENTS: @@ -82,16 +82,21 @@ subroutine test_decomp_init() call test_decompInit_lnd_abort_on_too_small_nsegspc() call write_to_log('test_decompInit_lnd_check_sizes') call test_decompInit_lnd_check_sizes() + call write_to_log('test_decompInit_clump_gcell_info_correct') + call test_decompInit_clump_gcell_info_correct() + ! Comment out for now -- needs some work + !call write_to_log('test_decompMod_get_clump_bounds_correct') + !call test_decompMod_get_clump_bounds_correct() ! ! Call the decompInit initialization series a last time so that decompMod data can still be used ! - allocate( model_amask(ldomain%ni*ldomain%nj) ) - model_amask(:) = 1 - call decompInit_lnd( ldomain%ni, ldomain%nj, model_amask ) - call decompInit_clumps(ldomain%ni, ldomain%nj, glc_behavior) - call decompInit_glcp(ldomain%ni, ldomain%nj, glc_behavior) - deallocate( model_amask ) + !allocate( model_amask(ldomain%ni*ldomain%nj) ) + !model_amask(:) = 1 + !call decompInit_lnd( ldomain%ni, ldomain%nj, model_amask ) + !call decompInit_clumps(ldomain%ni, ldomain%nj, glc_behavior) + !call decompInit_glcp(ldomain%ni, ldomain%nj, glc_behavior) + !deallocate( model_amask ) end subroutine test_decomp_init @@ -232,6 +237,84 @@ subroutine test_check_nclumps() call clean() end subroutine test_check_nclumps +!----------------------------------------------------------------------- + subroutine test_decompMod_get_clump_bounds_correct() + ! Some testing for get_clump_bounds + use decompMod, only : get_clump_bounds, bounds_type + use unittestSimpleSubgridSetupsMod, only : setup_ncells_single_veg_patch + use unittestSubgridMod, only : unittest_subgrid_teardown + use pftconMod, only : noveg + type(bounds_type) :: bounds + integer :: expected_begg, expected_endg, expected_numg, gcell_per_task + integer :: iclump + + call setup() + ! Now setup a singple grid that's just the full test with every point a single baresoil patch + call setup_ncells_single_veg_patch( ncells=ni*nj, pft_type=noveg ) + clump_pproc = 1 ! Ensure we are just doing this for one clump per proc for now + expected_numg = ni*nj + if ( expected_numg < npes )then + call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ ) + end if + if ( modulo( expected_numg, npes ) /= 0 )then + call endrun( msg="npes does not evenly divide into numg so this test will not work", file=sourcefile, line=__LINE__ ) + end if + gcell_per_task = expected_numg / npes + expected_begg = gcell_per_task * iam + 1 + expected_endg = expected_begg + gcell_per_task + amask(:) = 1 ! Set all to land + call decompInit_lnd( ni, nj, amask ) + call decompInit_clumps( ni, nj, glc_behavior ) + iclump = 1 ! Clump is just 1 since there's only one clump per task + call get_clump_bounds(iclump, bounds) + call assert_equal( bounds%begg, expected_begg, msg='begg is not as expected' ) + call assert_equal( bounds%endg, expected_endg, msg='endg is not as expected' ) + ! Other subgrtid level information will be the same -- since there's only one landunit, column, and patch per gridcell + call assert_equal( bounds%begl, expected_begg, msg='begl is not as expected' ) + call assert_equal( bounds%endl, expected_endg, msg='endl is not as expected' ) + call assert_equal( bounds%begc, expected_begg, msg='begc is not as expected' ) + call assert_equal( bounds%endc, expected_endg, msg='endc is not as expected' ) + call assert_equal( bounds%begp, expected_begg, msg='begp is not as expected' ) + call assert_equal( bounds%endp, expected_endg, msg='endp is not as expected' ) + call unittest_subgrid_teardown( ) + call clean() + end subroutine test_decompMod_get_clump_bounds_correct + + !----------------------------------------------------------------------- + subroutine test_decompInit_clump_gcell_info_correct() + ! Some testing for get_clump_bounds + use decompMod, only : clumps + integer :: expected_gcells, iclump, g, beg_global_index, gcell_per_task + integer :: expected_begg, expected_endg + + call setup() + expected_gcells = ni*nj + if ( expected_gcells < npes )then + call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ ) + end if + if ( modulo( expected_gcells, npes ) /= 0 )then + call endrun( msg="npes does not evenly divide into gcell so this test will not work", file=sourcefile, line=__LINE__ ) + end if + gcell_per_task = expected_gcells / npes + expected_begg = gcell_per_task * iam + 1 + expected_endg = expected_begg + gcell_per_task + amask(:) = 1 ! Set all to land + call decompInit_lnd( ni, nj, amask ) + ! When clump_pproc is one clumps will be the same as PE + call assert_equal( nclumps, npes, msg='nclumps should match numper of proces when clump_pproc is 1' ) + do iclump = 1, nclumps + call assert_equal( clumps(iclump)%owner, iclump-1, msg='clumps owner is not correct' ) + call assert_equal( clumps(iclump)%ncells, gcell_per_task, msg='clumps ncells is not correct' ) + end do + ! Validate gindex_global over the local task + + beg_global_index = gcell_per_task*iam + do g = procinfo%begg, procinfo%endg + call assert_equal( gindex_global(g), g+beg_global_index, msg='clumps owner is not correct' ) + end do + call clean() + end subroutine test_decompInit_clump_gcell_info_correct + !----------------------------------------------------------------------- subroutine write_to_log(msg) ! From 9dcf9ab6b8680b51a79eb2bc9ae5d259f3886eb7 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 29 Aug 2025 12:42:01 -0600 Subject: [PATCH 089/141] Move comment over for clarity --- src/main/decompInitMod.F90 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 9ac21ceef0..348f3e9c21 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -309,6 +309,7 @@ subroutine decompInit_lnd_allocate( ier ) return end if + ! Temporary arrays that are just used in decompInit_lnd if ( numg < 1 )then call endrun(msg="numg is NOT set before allocation", file=sourcefile, line=__LINE__) return @@ -318,8 +319,6 @@ subroutine decompInit_lnd_allocate( ier ) call endrun(msg="allocation error1 for gdc2glo , etc", file=sourcefile, line=__LINE__) return end if - - ! Temporary arrays that are just used in decompInit_lnd if ( lns < 1 )then call endrun(msg="lns is NOT set before allocation", file=sourcefile, line=__LINE__) return From 9289808d165329563ef950749589d25b2051a3e0 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 2 Sep 2025 16:01:18 -0600 Subject: [PATCH 090/141] Add a MPI_SCAN as a temporary to start testing how it works and compare to the regular operation --- src/main/decompInitMod.F90 | 49 +++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 348f3e9c21..85f3e33ea2 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -11,7 +11,7 @@ module decompInitMod use shr_log_mod , only : errMsg => shr_log_errMsg use spmdMod , only : masterproc, iam, npes, mpicom use abortutils , only : endrun - use clm_varctl , only : iulog + use clm_varctl , onoly : iulog use ctsm_memcheck, only : memcheck use perf_mod , only : t_startf, t_stopf ! @@ -55,6 +55,9 @@ subroutine decompInit_lnd(lni, lnj, amask) use clm_varctl , only : nsegspc use decompMod , only : gindex_global, nclumps, clumps use decompMod , only : bounds_type, get_proc_bounds, procinfo + ! Temporary testing stuff + use Assertions, only : assert_equal + ! end temporary testing stuff ! ! !ARGUMENTS: integer , intent(in) :: amask(:) @@ -76,6 +79,11 @@ subroutine decompInit_lnd(lni, lnj, amask) integer, allocatable :: gdc2glo(:)! used to create gindex_global type(bounds_type) :: bounds ! contains subgrid bounds data real(r8) :: msize, mrss + ! Temporary testing stuff + integer, pointer :: clumpcnt_mpiscan(:)! clumpcnt for local PE based on the MPI_SCAN + integer, allocatable :: gindex_global_mpiscan(:)! ginfrx_global_mpiscan for the local PE based on the MPI_SCAN + integer :: cell_id_offset + ! end temporary testing stuff !------------------------------------------------------------------------------ call memcheck('decompInit_lnd: before allocate') @@ -218,6 +226,30 @@ subroutine decompInit_lnd(lni, lnj, amask) gdc2glo(:) = 0 + ! Temporary testing for MPI_SCAN, for just the local PE + allocate(clumpcnt_mpiscan(iam:iam+clump_pproc)) + allocate(gindex_global_mpiscan(procinfo%ncells)) + + call MPI_SCAN(procinfo%endg, cell_id_offset, 1, MPI_INTEGER, & + MPI_SUM, mpicom, ier) + if ( ier /= 0 )then + call endrun(msg='Error from MPI_SCAN', file=sourcefile, line=__LINE__) + end if + cell_id_offset = cell_id_offset - procinfo%endg + 1 + ! Assume clumps_pproc is 1 for now... + !if ( clump_pproc > 1 )then + !call endrun(msg='This test assumes clump_pproc is 1', file=sourcefile, line=__LINE__) + !end if + m = 0 + do cid = 1, nclumps + if (clumps(cid)%owner == iam) then + clumpcnt_mpiscan(cid) = cell_id_offset + m + ag = clumpcnt_mpiscan(cid) + m = m + 1 + endif + enddo + ! End temporary testing + ! clumpcnt is the start gdc index of each clump ag = 0 @@ -259,6 +291,21 @@ subroutine decompInit_lnd(lni, lnj, amask) gindex_global(n-procinfo%begg+1) = gdc2glo(n) enddo + ! Temporary testing for MPI_SCAN, for just the local PE + do cid = 1,nclumps + if ( clumps(cid)%owner == iam )then + gindex_global_mpiscan(ag) = gdc2glo(clumpcnt_mpiscan(cid)) + clumpcnt_mpiscan = clumpcnt_mpiscan + 1 + call assert_equal(clumpcnt(cid), clumpcnt_mpiscan(cid), & + msg='decompInit_lnd(): clumpcnt MPI_SCAN error') + end if + end do + call assert_equal(gindex_global, gindex_global_mpiscan, & + msg='decompInit_lnd(): clumpcnt MPI_SCAN error') + deallocate(clumpcnt_mpiscan) + deallocate(gindex_global_mpiscan) + ! End temporary testing + call decompInit_lnd_clean() call memcheck('decompInit_lnd: after deallocate') From 62e63f671ecdc6be04b8f5e9bb944f79a96f05e1 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 2 Sep 2025 16:06:18 -0600 Subject: [PATCH 091/141] Fix accidental typo that made it in, that doesn't compile --- src/main/decompInitMod.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 85f3e33ea2..377febed54 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -11,7 +11,7 @@ module decompInitMod use shr_log_mod , only : errMsg => shr_log_errMsg use spmdMod , only : masterproc, iam, npes, mpicom use abortutils , only : endrun - use clm_varctl , onoly : iulog + use clm_varctl , only : iulog use ctsm_memcheck, only : memcheck use perf_mod , only : t_startf, t_stopf ! From 1a24943b51d7b494e4ee4e669e0a4cb6e3cf582f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 2 Sep 2025 16:07:18 -0600 Subject: [PATCH 092/141] Change the test grid total size to 384 so can be divisible by either 128 for Derecho or 48 for Izumi --- src/self_tests/TestDecompInit.F90 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 9905582320..58b0ddaab2 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -24,7 +24,9 @@ module TestDecompInit ! Module data used in various tests - integer, parameter :: ni = 10, nj = 5 + ! Make the size of the test grid 384 so that it can be divided by 128 or 48 + ! for the number of tasks per node on Derecho or Izumi. + integer, parameter :: ni = 16, nj = 24 integer :: amask(ni*nj) integer :: default_npes From a3eb6bb2a7d666976c1a9a358bfe151d23f90418 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 4 Sep 2025 23:08:49 -0600 Subject: [PATCH 093/141] Start filling other procinfo settings with mpi_scan, this passes for mpi-serial --- src/main/decompInitMod.F90 | 45 +++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 377febed54..76e8cfcbf4 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -57,6 +57,7 @@ subroutine decompInit_lnd(lni, lnj, amask) use decompMod , only : bounds_type, get_proc_bounds, procinfo ! Temporary testing stuff use Assertions, only : assert_equal + use decompMod , only : processor_type ! end temporary testing stuff ! ! !ARGUMENTS: @@ -82,6 +83,8 @@ subroutine decompInit_lnd(lni, lnj, amask) ! Temporary testing stuff integer, pointer :: clumpcnt_mpiscan(:)! clumpcnt for local PE based on the MPI_SCAN integer, allocatable :: gindex_global_mpiscan(:)! ginfrx_global_mpiscan for the local PE based on the MPI_SCAN + type(processor_type) :: procinfo_mpiscan ! procinfo for local PE based on the MPI_SCAN + integer :: mycid0, mycid integer :: cell_id_offset ! end temporary testing stuff !------------------------------------------------------------------------------ @@ -227,30 +230,38 @@ subroutine decompInit_lnd(lni, lnj, amask) gdc2glo(:) = 0 ! Temporary testing for MPI_SCAN, for just the local PE - allocate(clumpcnt_mpiscan(iam:iam+clump_pproc)) - allocate(gindex_global_mpiscan(procinfo%ncells)) - - call MPI_SCAN(procinfo%endg, cell_id_offset, 1, MPI_INTEGER, & + call MPI_SCAN(procinfo%ncells, cell_id_offset, 1, MPI_INTEGER, & MPI_SUM, mpicom, ier) if ( ier /= 0 )then call endrun(msg='Error from MPI_SCAN', file=sourcefile, line=__LINE__) end if - cell_id_offset = cell_id_offset - procinfo%endg + 1 + cell_id_offset = cell_id_offset + 1 + write(iulog,*) 'ncells, cell_id_offset, iam = ', procinfo%ncells, cell_id_offset, iam + allocate(clumpcnt_mpiscan(nclumps)) ! Assume clumps_pproc is 1 for now... !if ( clump_pproc > 1 )then !call endrun(msg='This test assumes clump_pproc is 1', file=sourcefile, line=__LINE__) !end if m = 0 + mycid = 0 do cid = 1, nclumps if (clumps(cid)%owner == iam) then - clumpcnt_mpiscan(cid) = cell_id_offset + m - ag = clumpcnt_mpiscan(cid) - m = m + 1 + if ( mycid == 0 ) mycid0 = cid + mycid = cid + write(iulog,*) 'cid for iam = ', cid, iam + clumpcnt_mpiscan(cid) = cell_id_offset endif enddo + allocate( procinfo_mpiscan%cid(mycid0:mycid) ) + procinfo_mpiscan%ncells = procinfo%ncells + procinfo_mpiscan%begg = cell_id_offset - procinfo%ncells + procinfo_mpiscan%endg = cell_id_offset - 1 + do cid = mycid0, mycid + procinfo_mpiscan%cid(cid) = cell_id_offset + cid - mycid0 -1 + end do ! End temporary testing - ! clumpcnt is the start gdc index of each clump + ! clumpcnt is the ending gdc index of each clump ag = 0 clumpcnt = 0 @@ -292,16 +303,24 @@ subroutine decompInit_lnd(lni, lnj, amask) enddo ! Temporary testing for MPI_SCAN, for just the local PE + allocate(gindex_global_mpiscan(1:bounds%endg)) do cid = 1,nclumps if ( clumps(cid)%owner == iam )then - gindex_global_mpiscan(ag) = gdc2glo(clumpcnt_mpiscan(cid)) - clumpcnt_mpiscan = clumpcnt_mpiscan + 1 + ag = clumpcnt(cid) call assert_equal(clumpcnt(cid), clumpcnt_mpiscan(cid), & msg='decompInit_lnd(): clumpcnt MPI_SCAN error') + !call assert_equal(procinfo%cid(cid), procinfo_mpiscan%cid(cid), & + ! msg='decompInit_lnd(): cid MPI_SCAN error') end if end do - call assert_equal(gindex_global, gindex_global_mpiscan, & - msg='decompInit_lnd(): clumpcnt MPI_SCAN error') + !call assert_equal(gindex_global, gindex_global_mpiscan, & + ! msg='decompInit_lnd(): gindex_global MPI_SCAN error') + call assert_equal(procinfo%begg, procinfo_mpiscan%begg, & + msg='decompInit_lnd(): begg MPI_SCAN error') + call assert_equal(procinfo%endg, procinfo_mpiscan%endg, & + msg='decompInit_lnd(): endg MPI_SCAN error') + call assert_equal(procinfo%ncells, procinfo_mpiscan%ncells, & + msg='decompInit_lnd(): ncells MPI_SCAN error') deallocate(clumpcnt_mpiscan) deallocate(gindex_global_mpiscan) ! End temporary testing From 5f97c99e6110caa304dd3138d4a1f85337cc648b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 5 Sep 2025 10:45:58 -0600 Subject: [PATCH 094/141] Remove the mpiscan bit about clumpcnt --- src/main/decompInitMod.F90 | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 76e8cfcbf4..1403a3614b 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -81,10 +81,8 @@ subroutine decompInit_lnd(lni, lnj, amask) type(bounds_type) :: bounds ! contains subgrid bounds data real(r8) :: msize, mrss ! Temporary testing stuff - integer, pointer :: clumpcnt_mpiscan(:)! clumpcnt for local PE based on the MPI_SCAN integer, allocatable :: gindex_global_mpiscan(:)! ginfrx_global_mpiscan for the local PE based on the MPI_SCAN type(processor_type) :: procinfo_mpiscan ! procinfo for local PE based on the MPI_SCAN - integer :: mycid0, mycid integer :: cell_id_offset ! end temporary testing stuff !------------------------------------------------------------------------------ @@ -237,28 +235,13 @@ subroutine decompInit_lnd(lni, lnj, amask) end if cell_id_offset = cell_id_offset + 1 write(iulog,*) 'ncells, cell_id_offset, iam = ', procinfo%ncells, cell_id_offset, iam - allocate(clumpcnt_mpiscan(nclumps)) ! Assume clumps_pproc is 1 for now... !if ( clump_pproc > 1 )then !call endrun(msg='This test assumes clump_pproc is 1', file=sourcefile, line=__LINE__) !end if - m = 0 - mycid = 0 - do cid = 1, nclumps - if (clumps(cid)%owner == iam) then - if ( mycid == 0 ) mycid0 = cid - mycid = cid - write(iulog,*) 'cid for iam = ', cid, iam - clumpcnt_mpiscan(cid) = cell_id_offset - endif - enddo - allocate( procinfo_mpiscan%cid(mycid0:mycid) ) procinfo_mpiscan%ncells = procinfo%ncells procinfo_mpiscan%begg = cell_id_offset - procinfo%ncells procinfo_mpiscan%endg = cell_id_offset - 1 - do cid = mycid0, mycid - procinfo_mpiscan%cid(cid) = cell_id_offset + cid - mycid0 -1 - end do ! End temporary testing ! clumpcnt is the ending gdc index of each clump @@ -304,15 +287,6 @@ subroutine decompInit_lnd(lni, lnj, amask) ! Temporary testing for MPI_SCAN, for just the local PE allocate(gindex_global_mpiscan(1:bounds%endg)) - do cid = 1,nclumps - if ( clumps(cid)%owner == iam )then - ag = clumpcnt(cid) - call assert_equal(clumpcnt(cid), clumpcnt_mpiscan(cid), & - msg='decompInit_lnd(): clumpcnt MPI_SCAN error') - !call assert_equal(procinfo%cid(cid), procinfo_mpiscan%cid(cid), & - ! msg='decompInit_lnd(): cid MPI_SCAN error') - end if - end do !call assert_equal(gindex_global, gindex_global_mpiscan, & ! msg='decompInit_lnd(): gindex_global MPI_SCAN error') call assert_equal(procinfo%begg, procinfo_mpiscan%begg, & @@ -321,7 +295,6 @@ subroutine decompInit_lnd(lni, lnj, amask) msg='decompInit_lnd(): endg MPI_SCAN error') call assert_equal(procinfo%ncells, procinfo_mpiscan%ncells, & msg='decompInit_lnd(): ncells MPI_SCAN error') - deallocate(clumpcnt_mpiscan) deallocate(gindex_global_mpiscan) ! End temporary testing From c592c5b6dcaffb3e67490ffe0f9fae35ea786f43 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Sat, 6 Sep 2025 14:46:01 -0600 Subject: [PATCH 095/141] Don't do the abort testing if not serial as different tasks won't be in sync and doing so was not working --- src/self_tests/TestDecompInit.F90 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 58b0ddaab2..526e8e5d37 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -117,6 +117,7 @@ end subroutine setup subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() character(len=CX) :: expected_msg, actual_msg + if ( npes > 1 ) return ! error checking testing only works seriallly call setup() call endrun_init( .true. ) ! Do not abort on endrun for self-tests clump_pproc = 0 @@ -137,6 +138,7 @@ end subroutine test_decompInit_lnd_abort_on_bad_clump_pproc subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc() character(len=CX) :: expected_msg, actual_msg + if ( npes > 1 ) return ! error checking testing only works seriallly call setup() call endrun_init( .true. ) ! Do not abort on endrun for self-tests amask(:) = 1 ! Set all to land @@ -182,6 +184,7 @@ end subroutine test_decompInit_lnd_check_sizes subroutine test_decompInit_lnd_abort_when_npes_too_large() character(len=CX) :: expected_msg, actual_msg + if ( npes > 1 ) return ! error checking testing only works seriallly call setup() ! NOTE: This is arbitrarily modifying the NPES value -- so it MUST be reset set the END! npes = ni*nj + 1 @@ -209,6 +212,7 @@ subroutine test_decompInit_lnd_abort_on_too_small_nsegspc() use clm_varctl, only : nsegspc character(len=CX) :: expected_msg, actual_msg + if ( npes > 1 ) return ! error checking testing only works seriallly call setup() call endrun_init( .true. ) ! Do not abort on endrun for self-tests amask(:) = 1 ! Set all to land @@ -286,6 +290,8 @@ end subroutine test_decompMod_get_clump_bounds_correct subroutine test_decompInit_clump_gcell_info_correct() ! Some testing for get_clump_bounds use decompMod, only : clumps + use decompMod, only : get_proc_bounds + type(bounds_type) :: bounds integer :: expected_gcells, iclump, g, beg_global_index, gcell_per_task integer :: expected_begg, expected_endg @@ -308,12 +314,6 @@ subroutine test_decompInit_clump_gcell_info_correct() call assert_equal( clumps(iclump)%owner, iclump-1, msg='clumps owner is not correct' ) call assert_equal( clumps(iclump)%ncells, gcell_per_task, msg='clumps ncells is not correct' ) end do - ! Validate gindex_global over the local task - - beg_global_index = gcell_per_task*iam - do g = procinfo%begg, procinfo%endg - call assert_equal( gindex_global(g), g+beg_global_index, msg='clumps owner is not correct' ) - end do call clean() end subroutine test_decompInit_clump_gcell_info_correct From ce2cd5148bb57723f0dcd1d03547dab3b0ad2c40 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Sat, 6 Sep 2025 14:49:47 -0600 Subject: [PATCH 096/141] Add global i,j indices for each gridcell on a processor to the processor_type structure, and start adding a couple methods to help get them set --- src/main/decompMod.F90 | 95 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/src/main/decompMod.F90 b/src/main/decompMod.F90 index 19f036bbb1..454e73cc64 100644 --- a/src/main/decompMod.F90 +++ b/src/main/decompMod.F90 @@ -69,6 +69,8 @@ module decompMod type processor_type integer :: nclumps ! number of clumps for processor_type iam integer,pointer :: cid(:) => null() ! clump indices + integer,allocatable :: gi(:) ! global index on the full 2D grid in "x" (longitude for structured) + integer,allocatable :: gj(:) ! global index on the full 2D grid in "y" (latitudef or structured, 1 for unstructured) integer :: ncells ! number of gridcells in proc integer :: nlunits ! number of landunits in proc integer :: ncols ! number of columns in proc @@ -79,6 +81,9 @@ module decompMod integer :: begc, endc ! beginning and ending column index integer :: begp, endp ! beginning and ending patch index integer :: begCohort, endCohort ! beginning and ending cohort indices + contains + procedure, public :: calc_global_index ! Get the global index for the input grid index on this processor + procedure, public :: calc_globalxy_indices end type processor_type public processor_type type(processor_type),public :: procinfo @@ -122,6 +127,96 @@ module decompMod contains + + function calc_global_index( this, g, ni, nj ) result(global_index) + ! Returns the full grid global vector index from the gridcell on this processor + ! !ARGUMENTS: + class(processor_type), intent(in) :: this + integer, intent(in) :: g ! gridcell index on this processor + integer, intent(in) :: ni, nj ! Global 2D size of full grid + integer :: global_index ! function result, full vector index on the full global grid + + if ( .not. allocated(this%gi) )then + call shr_sys_abort( 'gi is not allocated yet', file=sourcefile, line=__LINE__) + return + end if + if ( .not. allocated(this%gj) )then + call shr_sys_abort( 'gj is not allocated yet', file=sourcefile, line=__LINE__) + return + end if + if ( (g < this%begg) .or. (g > this%endg) ) then + call shr_sys_abort( 'Input index g is out of bounds of this processor', file=sourcefile, line=__LINE__) + return + end if + if ( (ni < 1) .or. (nj < 1) ) then + call shr_sys_abort( 'Global gridsize ni/nj is not set', file=sourcefile, line=__LINE__) + return + end if + global_index = (this%gj(g)-1)*ni + this%gi(g) + if ( (global_index < 1) .or. (global_index > ni*nj) ) then + call shr_sys_abort( 'global_index is out of bounds for this processor', file=sourcefile, line=__LINE__) + return + end if + + end function calc_global_index + + subroutine calc_ijindices_from_full_global_index( g, ni, nj, i, j ) + ! Local private subroutine to calculate the full 2D grid i,j indices from the 1D global vector index + integer, intent(in) :: g ! Input processor global full 2D vector index + integer, intent(in) :: ni, nj ! Size of the full 2D grid + integer, intent(out) :: i, j ! 2D indices in x and y on the full global 2D grid (j will be 1 for an unstructured grid) + + if ( (g < 1) .or. (g > ni*nj) ) then + call shr_sys_abort( 'Input index g is out of bounds', file=sourcefile, line=__LINE__) + return + end if + if ( (ni < 1) .or. (nj < 1) ) then + call shr_sys_abort( 'Global gridsize ni/nj is not set', file=sourcefile, line=__LINE__) + return + end if + j = floor( real(g, r8) / real(ni, r8) ) + 1 + i = g - j*ni + if ( (i < 1) .or. (i > ni) ) then + call shr_sys_abort( 'Computed global i value out of range', file=sourcefile, line=__LINE__) + return + end if + if ( (j < 1) .or. (j > nj) ) then + call shr_sys_abort( 'Computed global j value out of range', file=sourcefile, line=__LINE__) + return + end if + end subroutine calc_ijindices_from_full_global_index + + + subroutine calc_globalxy_indices( this, g, ni, nj, i, j ) + ! !ARGUMENTS: + class(processor_type), intent(in) :: this + integer, intent(in) :: g ! gridcell index on this processor + integer, intent(in) :: ni, nj ! Global 2D size of full grid + integer, intent(out) :: i, j ! 2D indices in x and y on the full global 2D grid (j will be 1 for an unstructured grid) + + integer :: global_index + + if ( .not. allocated(this%gi) )then + call shr_sys_abort( 'gi is not allocated yet', file=sourcefile, line=__LINE__) + return + end if + if ( .not. allocated(this%gj) )then + call shr_sys_abort( 'gj is not allocated yet', file=sourcefile, line=__LINE__) + return + end if + if ( (g < this%begg) .or. (g > this%endg) ) then + call shr_sys_abort( 'Input index g is out of bounds of this processor', file=sourcefile, line=__LINE__) + return + end if + if ( (ni < 1) .or. (nj < 1) ) then + call shr_sys_abort( 'Global gridsize ni/nj is not set', file=sourcefile, line=__LINE__) + return + end if + global_index = this%calc_global_index( g, ni, nj ) + call calc_ijindices_from_full_global_index( global_index, ni, nj, i, j ) + + end subroutine calc_globalxy_indices + !----------------------------------------------------------------------- pure function get_beg(bounds, subgrid_level) result(beg_index) ! From 5e0c6d4dc1f84ac92ac2f4b924733ad115c0411a Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Sat, 6 Sep 2025 14:54:43 -0600 Subject: [PATCH 097/141] Add more testing, test new processor_type methods, set gindex_global for mpiscan and verify it, allocate the new procinfo gi and gj indices, make sure they are set, compiles but fails at run --- src/main/decompInitMod.F90 | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 1403a3614b..1b08e607cb 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -57,7 +57,7 @@ subroutine decompInit_lnd(lni, lnj, amask) use decompMod , only : bounds_type, get_proc_bounds, procinfo ! Temporary testing stuff use Assertions, only : assert_equal - use decompMod , only : processor_type + use decompMod , only : processor_type, get_global_index, subgrid_level_gridcell ! end temporary testing stuff ! ! !ARGUMENTS: @@ -84,6 +84,7 @@ subroutine decompInit_lnd(lni, lnj, amask) integer, allocatable :: gindex_global_mpiscan(:)! ginfrx_global_mpiscan for the local PE based on the MPI_SCAN type(processor_type) :: procinfo_mpiscan ! procinfo for local PE based on the MPI_SCAN integer :: cell_id_offset + integer :: i, j, g ! end temporary testing stuff !------------------------------------------------------------------------------ call memcheck('decompInit_lnd: before allocate') @@ -273,6 +274,27 @@ subroutine decompInit_lnd(lni, lnj, amask) end do end do + g = 0 + do aj = 1,lnj + do ai = 1,lni + an = (aj-1)*lni + ai + + cid = lcid(an) + if ( clumps(cid)%owner == iam ) then + g = g + 1 + procinfo%gi(g) = ai + procinfo%gj(g) = aj + call assert_equal( an, procinfo%calc_global_index( g, lni, lnj ), & + msg='decompInit_lnd(): calc_global_index is off') + call assert_equal( an, get_global_index( g, subgrid_level_gridcell ), & + msg='decompInit_lnd(): get_global_index is off') + call procinfo%calc_globalxy_indices( g, lni, lnj, i, j ) + call assert_equal( (/ ai, aj /), (/ i, j /), & + msg='decompInit_lnd(): i, j indices are off') + end if + end do + end do + ! Initialize global gindex (non-compressed, includes ocean points) ! Note that gindex_global goes from (1:endg) call get_proc_bounds(bounds, allow_errors=.true.) ! This has to be done after procinfo is finalized @@ -287,8 +309,8 @@ subroutine decompInit_lnd(lni, lnj, amask) ! Temporary testing for MPI_SCAN, for just the local PE allocate(gindex_global_mpiscan(1:bounds%endg)) - !call assert_equal(gindex_global, gindex_global_mpiscan, & - ! msg='decompInit_lnd(): gindex_global MPI_SCAN error') + call assert_equal(gindex_global, gindex_global_mpiscan, & + msg='decompInit_lnd(): gindex_global MPI_SCAN error') call assert_equal(procinfo%begg, procinfo_mpiscan%begg, & msg='decompInit_lnd(): begg MPI_SCAN error') call assert_equal(procinfo%endg, procinfo_mpiscan%endg, & @@ -368,6 +390,16 @@ subroutine decompInit_lnd_allocate( ier ) call endrun(msg="allocation error2 for clumpcnt", file=sourcefile, line=__LINE__) return end if + allocate(procinfo%gi(clump_pproc), stat=ier) + if (ier /= 0) then + call endrun(msg='allocation error for procinfo%gi', file=sourcefile, line=__LINE__) + return + endif + allocate(procinfo%gj(clump_pproc), stat=ier) + if (ier /= 0) then + call endrun(msg='allocation error for procinfo%gj', file=sourcefile, line=__LINE__) + return + endif end subroutine decompInit_lnd_allocate From f87db23e3788157beb6a10e6e9d2d986ca6b60ee Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Sat, 6 Sep 2025 15:06:32 -0600 Subject: [PATCH 098/141] Can't do this test yest, as only gridcells are set at this point and the call expects all subgrid levels to be set --- src/main/decompInitMod.F90 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 1b08e607cb..574272800b 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -286,8 +286,9 @@ subroutine decompInit_lnd(lni, lnj, amask) procinfo%gj(g) = aj call assert_equal( an, procinfo%calc_global_index( g, lni, lnj ), & msg='decompInit_lnd(): calc_global_index is off') - call assert_equal( an, get_global_index( g, subgrid_level_gridcell ), & - msg='decompInit_lnd(): get_global_index is off') + ! This can't be done yet, as the other indices aren't set yet + !call assert_equal( an, get_global_index( g, subgrid_level_gridcell ), & + ! msg='decompInit_lnd(): get_global_index is off') call procinfo%calc_globalxy_indices( g, lni, lnj, i, j ) call assert_equal( (/ ai, aj /), (/ i, j /), & msg='decompInit_lnd(): i, j indices are off') From 7cb9d202261b3b6556b79e19885b9f5c6faa94e8 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 8 Sep 2025 01:42:59 -0600 Subject: [PATCH 099/141] Fix setting of gi and gj indices, the serial case runs to completion now --- src/main/decompInitMod.F90 | 44 +++++++++++++++++++++++++++----------- src/main/decompMod.F90 | 24 +++++++++++++++------ 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 574272800b..ad976cac9c 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -274,12 +274,25 @@ subroutine decompInit_lnd(lni, lnj, amask) end do end do + ! Initialize global gindex (non-compressed, includes ocean points) + ! Note that gindex_global goes from (1:endg) + call get_proc_bounds(bounds, allow_errors=.true.) ! This has to be done after procinfo is finalized + call decompInit_lnd_gindex_global_allocate( bounds, ier ) ! This HAS to be done after prcoinfo is finalized + if (ier /= 0) return + + nglob_x = lni ! decompMod module variables + nglob_y = lnj ! decompMod module variables + do n = procinfo%begg,procinfo%endg + gindex_global(n-procinfo%begg+1) = gdc2glo(n) + enddo + + ! Set 2D indices on full grid for each gridcell on this processor g = 0 do aj = 1,lnj do ai = 1,lni an = (aj-1)*lni + ai - cid = lcid(an) + if (cid > 0) then if ( clumps(cid)%owner == iam ) then g = g + 1 procinfo%gi(g) = ai @@ -293,23 +306,18 @@ subroutine decompInit_lnd(lni, lnj, amask) call assert_equal( (/ ai, aj /), (/ i, j /), & msg='decompInit_lnd(): i, j indices are off') end if + end if end do end do - ! Initialize global gindex (non-compressed, includes ocean points) - ! Note that gindex_global goes from (1:endg) - call get_proc_bounds(bounds, allow_errors=.true.) ! This has to be done after procinfo is finalized - call decompInit_lnd_gindex_global_allocate( bounds, ier ) ! This HAS to be done after prcoinfo is finalized - if (ier /= 0) return - - nglob_x = lni ! decompMod module variables - nglob_y = lnj ! decompMod module variables - do n = procinfo%begg,procinfo%endg - gindex_global(n-procinfo%begg+1) = gdc2glo(n) - enddo - ! Temporary testing for MPI_SCAN, for just the local PE allocate(gindex_global_mpiscan(1:bounds%endg)) + do n = procinfo%begg,procinfo%endg + call procinfo%calc_globalxy_indices( n, lni, lnj, i, j ) + procinfo%gi(n) = i + procinfo%gj(n) = j + gindex_global_mpiscan(n) = procinfo%calc_global_index( n, lni, lnj ) + end do call assert_equal(gindex_global, gindex_global_mpiscan, & msg='decompInit_lnd(): gindex_global MPI_SCAN error') call assert_equal(procinfo%begg, procinfo_mpiscan%begg, & @@ -416,6 +424,16 @@ subroutine decompInit_lnd_gindex_global_allocate( bounds, ier ) return end if allocate(gindex_global(1:bounds%endg)) + allocate(procinfo%gi(procinfo%begg:procinfo%endg), stat=ier) + if (ier /= 0) then + call endrun(msg='allocation error for procinfo%gi', file=sourcefile, line=__LINE__) + return + endif + allocate(procinfo%gj(procinfo%begg:procinfo%endg), stat=ier) + if (ier /= 0) then + call endrun(msg='allocation error for procinfo%gj', file=sourcefile, line=__LINE__) + return + endif end subroutine decompInit_lnd_gindex_global_allocate subroutine decompInit_lnd_clean() diff --git a/src/main/decompMod.F90 b/src/main/decompMod.F90 index 454e73cc64..4a9f4cd0a0 100644 --- a/src/main/decompMod.F90 +++ b/src/main/decompMod.F90 @@ -69,8 +69,8 @@ module decompMod type processor_type integer :: nclumps ! number of clumps for processor_type iam integer,pointer :: cid(:) => null() ! clump indices - integer,allocatable :: gi(:) ! global index on the full 2D grid in "x" (longitude for structured) - integer,allocatable :: gj(:) ! global index on the full 2D grid in "y" (latitudef or structured, 1 for unstructured) + integer,pointer :: gi(:) => null() ! global index on the full 2D grid in "x" (longitude for structured) + integer,pointer :: gj(:) => null() ! global index on the full 2D grid in "y" (latitudef or structured, 1 for unstructured) integer :: ncells ! number of gridcells in proc integer :: nlunits ! number of landunits in proc integer :: ncols ! number of columns in proc @@ -136,11 +136,11 @@ function calc_global_index( this, g, ni, nj ) result(global_index) integer, intent(in) :: ni, nj ! Global 2D size of full grid integer :: global_index ! function result, full vector index on the full global grid - if ( .not. allocated(this%gi) )then + if ( .not. associated(this%gi) )then call shr_sys_abort( 'gi is not allocated yet', file=sourcefile, line=__LINE__) return end if - if ( .not. allocated(this%gj) )then + if ( .not. associated(this%gj) )then call shr_sys_abort( 'gj is not allocated yet', file=sourcefile, line=__LINE__) return end if @@ -175,7 +175,9 @@ subroutine calc_ijindices_from_full_global_index( g, ni, nj, i, j ) return end if j = floor( real(g, r8) / real(ni, r8) ) + 1 - i = g - j*ni + if ( mod(g,ni) == 0 ) j = j - 1 + i = g - (j-1)*ni + write(iulog,*) 'i, j = ', i, j if ( (i < 1) .or. (i > ni) ) then call shr_sys_abort( 'Computed global i value out of range', file=sourcefile, line=__LINE__) return @@ -196,11 +198,11 @@ subroutine calc_globalxy_indices( this, g, ni, nj, i, j ) integer :: global_index - if ( .not. allocated(this%gi) )then + if ( .not. associated(this%gi) )then call shr_sys_abort( 'gi is not allocated yet', file=sourcefile, line=__LINE__) return end if - if ( .not. allocated(this%gj) )then + if ( .not. associated(this%gj) )then call shr_sys_abort( 'gj is not allocated yet', file=sourcefile, line=__LINE__) return end if @@ -745,6 +747,14 @@ subroutine decompmod_clean() if ( allocated(clumps) )then deallocate(clumps) end if + if ( associated(procinfo%gi) )then + deallocate(procinfo%gi) + procinfo%gi => null() + end if + if ( associated(procinfo%gj) )then + deallocate(procinfo%gj) + procinfo%gj => null() + end if if ( associated(procinfo%cid) )then deallocate(procinfo%cid) procinfo%cid => null() From 2b15296682cedee99a2866d8e316caad00b86076 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 8 Sep 2025 14:47:55 -0600 Subject: [PATCH 100/141] Change a test to make it valid for clump_pproc or not --- src/self_tests/TestDecompInit.F90 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 526e8e5d37..7705ea80fc 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -309,7 +309,11 @@ subroutine test_decompInit_clump_gcell_info_correct() amask(:) = 1 ! Set all to land call decompInit_lnd( ni, nj, amask ) ! When clump_pproc is one clumps will be the same as PE - call assert_equal( nclumps, npes, msg='nclumps should match numper of proces when clump_pproc is 1' ) + if ( clump_pproc == 1 ) then + call assert_equal( nclumps, npes, msg='nclumps should match number of processors when clump_pproc is 1' ) + else + call assert_equal( nclumps/clump_pproc, npes, msg='nclumps divided by clump_pproc should match number of processors when clump_pproc > 1' ) + end if do iclump = 1, nclumps call assert_equal( clumps(iclump)%owner, iclump-1, msg='clumps owner is not correct' ) call assert_equal( clumps(iclump)%ncells, gcell_per_task, msg='clumps ncells is not correct' ) From 35730c8adee26d37e833f8d1b0e44c7fb4c423bf Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 8 Sep 2025 16:52:39 -0600 Subject: [PATCH 101/141] Add ggidx global index to the processor_type, allocate, set and deallocate for the local task, this works for the serial case --- src/main/decompInitMod.F90 | 45 +++++++++++++++++++------------------- src/main/decompMod.F90 | 32 ++++++++++++++++++--------- 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index ad976cac9c..58ee2ff5b5 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -286,37 +286,30 @@ subroutine decompInit_lnd(lni, lnj, amask) gindex_global(n-procinfo%begg+1) = gdc2glo(n) enddo - ! Set 2D indices on full grid for each gridcell on this processor + ! Temporary testing for MPI_SCAN, for just the local PE g = 0 - do aj = 1,lnj - do ai = 1,lni - an = (aj-1)*lni + ai - cid = lcid(an) - if (cid > 0) then - if ( clumps(cid)%owner == iam ) then - g = g + 1 - procinfo%gi(g) = ai - procinfo%gj(g) = aj - call assert_equal( an, procinfo%calc_global_index( g, lni, lnj ), & - msg='decompInit_lnd(): calc_global_index is off') - ! This can't be done yet, as the other indices aren't set yet - !call assert_equal( an, get_global_index( g, subgrid_level_gridcell ), & - ! msg='decompInit_lnd(): get_global_index is off') - call procinfo%calc_globalxy_indices( g, lni, lnj, i, j ) - call assert_equal( (/ ai, aj /), (/ i, j /), & - msg='decompInit_lnd(): i, j indices are off') - end if + do ln = 1,lns + if (amask(ln) == 1) then + cid = lcid(ln) + if ( cid > 0 )then + if (clumps(cid)%owner == iam) then + g = g + 1 + if ( (g < procinfo%begg) .or. (g > procinfo%endg) )then + call endrun(msg='g out of bounds for MPI_SCAN test', file=sourcefile, line=__LINE__) + end if + procinfo%ggidx(g) = ln + end if + end if end if end do - end do - - ! Temporary testing for MPI_SCAN, for just the local PE + write(iulog,*) ' iam, ggidx = ', iam, procinfo%ggidx(:) allocate(gindex_global_mpiscan(1:bounds%endg)) do n = procinfo%begg,procinfo%endg + write(iulog,*) ' n, lni, lnj, ggidx = ', n, lni, lnj, procinfo%ggidx(n) call procinfo%calc_globalxy_indices( n, lni, lnj, i, j ) procinfo%gi(n) = i procinfo%gj(n) = j - gindex_global_mpiscan(n) = procinfo%calc_global_index( n, lni, lnj ) + gindex_global_mpiscan(n-procinfo%begg+1) = procinfo%ggidx(n) end do call assert_equal(gindex_global, gindex_global_mpiscan, & msg='decompInit_lnd(): gindex_global MPI_SCAN error') @@ -424,6 +417,12 @@ subroutine decompInit_lnd_gindex_global_allocate( bounds, ier ) return end if allocate(gindex_global(1:bounds%endg)) + allocate(procinfo%ggidx(procinfo%begg:procinfo%endg), stat=ier) + if (ier /= 0) then + call endrun(msg='allocation error for procinfo%ggidx', file=sourcefile, line=__LINE__) + return + endif + procinfo%ggidx(:) = -1 allocate(procinfo%gi(procinfo%begg:procinfo%endg), stat=ier) if (ier /= 0) then call endrun(msg='allocation error for procinfo%gi', file=sourcefile, line=__LINE__) diff --git a/src/main/decompMod.F90 b/src/main/decompMod.F90 index 4a9f4cd0a0..618eb9f50b 100644 --- a/src/main/decompMod.F90 +++ b/src/main/decompMod.F90 @@ -69,6 +69,7 @@ module decompMod type processor_type integer :: nclumps ! number of clumps for processor_type iam integer,pointer :: cid(:) => null() ! clump indices + integer,pointer :: ggidx(:) => null() ! global vector index on the full 2D grid integer,pointer :: gi(:) => null() ! global index on the full 2D grid in "x" (longitude for structured) integer,pointer :: gj(:) => null() ! global index on the full 2D grid in "y" (latitudef or structured, 1 for unstructured) integer :: ncells ! number of gridcells in proc @@ -82,7 +83,7 @@ module decompMod integer :: begp, endp ! beginning and ending patch index integer :: begCohort, endCohort ! beginning and ending cohort indices contains - procedure, public :: calc_global_index ! Get the global index for the input grid index on this processor + procedure, public :: calc_global_index_fromij ! Get the global index for the input grid i/j index on this processor procedure, public :: calc_globalxy_indices end type processor_type public processor_type @@ -128,7 +129,7 @@ module decompMod contains - function calc_global_index( this, g, ni, nj ) result(global_index) + function calc_global_index_fromij( this, g, ni, nj ) result(global_index) ! Returns the full grid global vector index from the gridcell on this processor ! !ARGUMENTS: class(processor_type), intent(in) :: this @@ -152,13 +153,23 @@ function calc_global_index( this, g, ni, nj ) result(global_index) call shr_sys_abort( 'Global gridsize ni/nj is not set', file=sourcefile, line=__LINE__) return end if + if ( (this%gi(g) < 1) .or. (this%gi(g) > ni) ) then + write(iulog,*) 'this%gi(g) = ', this%gi(g) + call shr_sys_abort( 'Global gi index is out of bounds', file=sourcefile, line=__LINE__) + return + end if + if ( (this%gj(g) < 1) .or. (this%gj(g) > ni) ) then + write(iulog,*) 'this%gj(g) = ', this%gj(g) + call shr_sys_abort( 'Global gj index is out of bounds', file=sourcefile, line=__LINE__) + return + end if global_index = (this%gj(g)-1)*ni + this%gi(g) if ( (global_index < 1) .or. (global_index > ni*nj) ) then call shr_sys_abort( 'global_index is out of bounds for this processor', file=sourcefile, line=__LINE__) return end if - end function calc_global_index + end function calc_global_index_fromij subroutine calc_ijindices_from_full_global_index( g, ni, nj, i, j ) ! Local private subroutine to calculate the full 2D grid i,j indices from the 1D global vector index @@ -167,6 +178,7 @@ subroutine calc_ijindices_from_full_global_index( g, ni, nj, i, j ) integer, intent(out) :: i, j ! 2D indices in x and y on the full global 2D grid (j will be 1 for an unstructured grid) if ( (g < 1) .or. (g > ni*nj) ) then + write(iulog,*) 'g, ni, nj = ', g, ni, nj call shr_sys_abort( 'Input index g is out of bounds', file=sourcefile, line=__LINE__) return end if @@ -198,12 +210,8 @@ subroutine calc_globalxy_indices( this, g, ni, nj, i, j ) integer :: global_index - if ( .not. associated(this%gi) )then - call shr_sys_abort( 'gi is not allocated yet', file=sourcefile, line=__LINE__) - return - end if - if ( .not. associated(this%gj) )then - call shr_sys_abort( 'gj is not allocated yet', file=sourcefile, line=__LINE__) + if ( .not. associated(this%ggidx) )then + call shr_sys_abort( 'ggidx is not allocated yet', file=sourcefile, line=__LINE__) return end if if ( (g < this%begg) .or. (g > this%endg) ) then @@ -214,7 +222,7 @@ subroutine calc_globalxy_indices( this, g, ni, nj, i, j ) call shr_sys_abort( 'Global gridsize ni/nj is not set', file=sourcefile, line=__LINE__) return end if - global_index = this%calc_global_index( g, ni, nj ) + global_index = this%ggidx(g) call calc_ijindices_from_full_global_index( global_index, ni, nj, i, j ) end subroutine calc_globalxy_indices @@ -747,6 +755,10 @@ subroutine decompmod_clean() if ( allocated(clumps) )then deallocate(clumps) end if + if ( associated(procinfo%ggidx) )then + deallocate(procinfo%ggidx) + procinfo%ggidx => null() + end if if ( associated(procinfo%gi) )then deallocate(procinfo%gi) procinfo%gi => null() From 495efe6ea736042fb0019ff87824945a6283ed3f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 8 Sep 2025 18:11:12 -0600 Subject: [PATCH 102/141] Correct the indexing for ggidx --- src/main/decompInitMod.F90 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 58ee2ff5b5..a0001b0150 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -287,17 +287,18 @@ subroutine decompInit_lnd(lni, lnj, amask) enddo ! Temporary testing for MPI_SCAN, for just the local PE - g = 0 + g = procinfo%begg do ln = 1,lns if (amask(ln) == 1) then cid = lcid(ln) if ( cid > 0 )then if (clumps(cid)%owner == iam) then - g = g + 1 if ( (g < procinfo%begg) .or. (g > procinfo%endg) )then + write(iulog,*) ' iam, g = ', iam, g call endrun(msg='g out of bounds for MPI_SCAN test', file=sourcefile, line=__LINE__) end if procinfo%ggidx(g) = ln + g = g + 1 end if end if end if From 418ee1f843bdb4fa9643418fcefb2b79edb6b06b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 8 Sep 2025 18:12:48 -0600 Subject: [PATCH 103/141] Remove the prints that weren't with an abort --- src/main/decompInitMod.F90 | 2 -- src/main/decompMod.F90 | 1 - 2 files changed, 3 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index a0001b0150..f6d9576588 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -235,7 +235,6 @@ subroutine decompInit_lnd(lni, lnj, amask) call endrun(msg='Error from MPI_SCAN', file=sourcefile, line=__LINE__) end if cell_id_offset = cell_id_offset + 1 - write(iulog,*) 'ncells, cell_id_offset, iam = ', procinfo%ncells, cell_id_offset, iam ! Assume clumps_pproc is 1 for now... !if ( clump_pproc > 1 )then !call endrun(msg='This test assumes clump_pproc is 1', file=sourcefile, line=__LINE__) @@ -303,7 +302,6 @@ subroutine decompInit_lnd(lni, lnj, amask) end if end if end do - write(iulog,*) ' iam, ggidx = ', iam, procinfo%ggidx(:) allocate(gindex_global_mpiscan(1:bounds%endg)) do n = procinfo%begg,procinfo%endg write(iulog,*) ' n, lni, lnj, ggidx = ', n, lni, lnj, procinfo%ggidx(n) diff --git a/src/main/decompMod.F90 b/src/main/decompMod.F90 index 618eb9f50b..3897d61ee7 100644 --- a/src/main/decompMod.F90 +++ b/src/main/decompMod.F90 @@ -189,7 +189,6 @@ subroutine calc_ijindices_from_full_global_index( g, ni, nj, i, j ) j = floor( real(g, r8) / real(ni, r8) ) + 1 if ( mod(g,ni) == 0 ) j = j - 1 i = g - (j-1)*ni - write(iulog,*) 'i, j = ', i, j if ( (i < 1) .or. (i > ni) ) then call shr_sys_abort( 'Computed global i value out of range', file=sourcefile, line=__LINE__) return From a69339ae4b683310213ac427cee17740fe695dcf Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 9 Sep 2025 09:31:49 -0600 Subject: [PATCH 104/141] Get it working for a threaded case, by adding an additional loop over clump_pproc for the setting of ggidx --- src/main/decompInitMod.F90 | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index f6d9576588..afeb687eb9 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -84,7 +84,7 @@ subroutine decompInit_lnd(lni, lnj, amask) integer, allocatable :: gindex_global_mpiscan(:)! ginfrx_global_mpiscan for the local PE based on the MPI_SCAN type(processor_type) :: procinfo_mpiscan ! procinfo for local PE based on the MPI_SCAN integer :: cell_id_offset - integer :: i, j, g + integer :: i, j, g, lc ! end temporary testing stuff !------------------------------------------------------------------------------ call memcheck('decompInit_lnd: before allocate') @@ -235,10 +235,6 @@ subroutine decompInit_lnd(lni, lnj, amask) call endrun(msg='Error from MPI_SCAN', file=sourcefile, line=__LINE__) end if cell_id_offset = cell_id_offset + 1 - ! Assume clumps_pproc is 1 for now... - !if ( clump_pproc > 1 )then - !call endrun(msg='This test assumes clump_pproc is 1', file=sourcefile, line=__LINE__) - !end if procinfo_mpiscan%ncells = procinfo%ncells procinfo_mpiscan%begg = cell_id_offset - procinfo%ncells procinfo_mpiscan%endg = cell_id_offset - 1 @@ -276,39 +272,50 @@ subroutine decompInit_lnd(lni, lnj, amask) ! Initialize global gindex (non-compressed, includes ocean points) ! Note that gindex_global goes from (1:endg) call get_proc_bounds(bounds, allow_errors=.true.) ! This has to be done after procinfo is finalized - call decompInit_lnd_gindex_global_allocate( bounds, ier ) ! This HAS to be done after prcoinfo is finalized + call decompInit_lnd_gindex_global_allocate( bounds, ier ) ! This HAS to be done after procinfo is finalized if (ier /= 0) return nglob_x = lni ! decompMod module variables nglob_y = lnj ! decompMod module variables + + do cid = 1, clump_pproc + write(iulog,*) 'iam, cid, clumps(cid)%owner', iam, cid, clumps(cid)%owner + end do do n = procinfo%begg,procinfo%endg + write(iulog,*) ' g, n, gdc2glo, iam = ', n, n-procinfo%begg+1, gdc2glo(n), iam gindex_global(n-procinfo%begg+1) = gdc2glo(n) enddo ! Temporary testing for MPI_SCAN, for just the local PE g = procinfo%begg + do lc = 1, clump_pproc do ln = 1,lns if (amask(ln) == 1) then cid = lcid(ln) if ( cid > 0 )then if (clumps(cid)%owner == iam) then + if ( procinfo%cid(lc) == cid ) then + write(iulog,*) ' cid, clumps(cid)%owner, iam = ', cid, clumps(cid)%owner, iam if ( (g < procinfo%begg) .or. (g > procinfo%endg) )then write(iulog,*) ' iam, g = ', iam, g call endrun(msg='g out of bounds for MPI_SCAN test', file=sourcefile, line=__LINE__) end if + write(iulog,*) ' g, ggidx, iam = ', n, procinfo%ggidx(g), iam procinfo%ggidx(g) = ln g = g + 1 end if end if + end if end if end do + end do allocate(gindex_global_mpiscan(1:bounds%endg)) do n = procinfo%begg,procinfo%endg - write(iulog,*) ' n, lni, lnj, ggidx = ', n, lni, lnj, procinfo%ggidx(n) + gindex_global_mpiscan(n-procinfo%begg+1) = procinfo%ggidx(n) + write(iulog,*) ' n, lni, lnj, ggidx, iam = ', n, lni, lnj, procinfo%ggidx(n), iam call procinfo%calc_globalxy_indices( n, lni, lnj, i, j ) procinfo%gi(n) = i procinfo%gj(n) = j - gindex_global_mpiscan(n-procinfo%begg+1) = procinfo%ggidx(n) end do call assert_equal(gindex_global, gindex_global_mpiscan, & msg='decompInit_lnd(): gindex_global MPI_SCAN error') @@ -391,16 +398,6 @@ subroutine decompInit_lnd_allocate( ier ) call endrun(msg="allocation error2 for clumpcnt", file=sourcefile, line=__LINE__) return end if - allocate(procinfo%gi(clump_pproc), stat=ier) - if (ier /= 0) then - call endrun(msg='allocation error for procinfo%gi', file=sourcefile, line=__LINE__) - return - endif - allocate(procinfo%gj(clump_pproc), stat=ier) - if (ier /= 0) then - call endrun(msg='allocation error for procinfo%gj', file=sourcefile, line=__LINE__) - return - endif end subroutine decompInit_lnd_allocate From 151e76b87d0c08a87ee7c0c5dc7cfc960d878bde Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 11 Sep 2025 16:11:27 -0600 Subject: [PATCH 105/141] Add more testing for correctness, add some notes about what can be removed for the final version --- src/main/decompInitMod.F90 | 166 ++++++++++++++++++++++++++++++++----- 1 file changed, 146 insertions(+), 20 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index afeb687eb9..c62797de1f 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -31,7 +31,7 @@ module decompInitMod integer, public :: clump_pproc ! number of clumps per MPI process ! ! !PRIVATE TYPES: - integer, pointer :: lcid(:) ! temporary for setting decomposition + integer, pointer :: lcid(:) ! temporary for setting decomposition, allocated set and used in decompInit_lnd, and used and deallocated in decompInit_clumps (Can make it allocatable) integer :: nglob_x, nglob_y ! global sizes integer, parameter :: dbug=0 ! 0 = min, 1=normal, 2=much, 3=max character(len=*), parameter :: sourcefile = & @@ -58,6 +58,7 @@ subroutine decompInit_lnd(lni, lnj, amask) ! Temporary testing stuff use Assertions, only : assert_equal use decompMod , only : processor_type, get_global_index, subgrid_level_gridcell + use decompMod , only : clump_type ! end temporary testing stuff ! ! !ARGUMENTS: @@ -76,16 +77,24 @@ subroutine decompInit_lnd(lni, lnj, amask) integer :: n,m,ng ! indices integer :: ier ! error code integer :: begg, endg ! beg and end gridcells + !--------------------------------------------------------------------- + ! ------ Remove these global arrays when ready ------ integer, pointer :: clumpcnt(:) ! clump index counter integer, allocatable :: gdc2glo(:)! used to create gindex_global + ! --- Remove to here ----------------- + !--------------------------------------------------------------------- type(bounds_type) :: bounds ! contains subgrid bounds data - real(r8) :: msize, mrss + !--------------------------------------------------------------------- ! Temporary testing stuff + real(r8) :: msize, mrss integer, allocatable :: gindex_global_mpiscan(:)! ginfrx_global_mpiscan for the local PE based on the MPI_SCAN type(processor_type) :: procinfo_mpiscan ! procinfo for local PE based on the MPI_SCAN - integer :: cell_id_offset - integer :: i, j, g, lc + type(clump_type), allocatable :: clumps_mpiscan(:) ! procinfo for local PE based on the MPI_SCAN ! end temporary testing stuff + !--------------------------------------------------------------------- + integer :: i, j, g, lc, cid_previous ! Indices + integer :: cell_id_offset ! The offset for the starting gridcell number for this processor + integer :: begcid, endcid ! Beginning and ending cid's for this processor !------------------------------------------------------------------------------ call memcheck('decompInit_lnd: before allocate') @@ -150,8 +159,9 @@ subroutine decompInit_lnd(lni, lnj, amask) call endrun(msg="Round robin pid error", file=sourcefile, line=__LINE__) return endif - clumps(n)%owner = pid + clumps(n)%owner = pid ! This line should be able to be removed when clumps is only for the local task if (iam == pid) then + clumps(n)%owner = pid cid = cid + 1 if (cid < 1 .or. cid > clump_pproc) then write(iulog,*) 'round robin pid error ',n,pid,npes @@ -193,9 +203,11 @@ subroutine decompInit_lnd(lni, lnj, amask) endif lcid(ln) = cid + !--------------------------------------------------------------------- !--- give gridcell cell to pe that owns cid --- !--- this needs to be done to subsequently use function !--- get_proc_bounds(begg,endg) + ! ---- These lines will be able to be removed ----- if (iam == clumps(cid)%owner) then procinfo%ncells = procinfo%ncells + 1 endif @@ -205,9 +217,18 @@ subroutine decompInit_lnd(lni, lnj, amask) if (iam >= clumps(cid)%owner) then procinfo%endg = procinfo%endg + 1 endif + ! --- Remove to here ----------------- + !--------------------------------------------------------------------- + !--- give gridcell to cid for local processor --- + !--- Comment this out when ready ---------------- + if (iam == clumps(cid)%owner) then + !clumps_mpiscan(cid)%ncells = clumps_mpiscan(cid)%ncells + 1 + end if + !--------------------------------------------------------------------- !--- give gridcell to cid --- !--- increment the beg and end indices --- + ! --- This section will be removed --- clumps(cid)%ncells = clumps(cid)%ncells + 1 do m = 1,nclumps if ((clumps(m)%owner > clumps(cid)%owner) .or. & @@ -220,15 +241,21 @@ subroutine decompInit_lnd(lni, lnj, amask) clumps(m)%endg = clumps(m)%endg + 1 endif enddo + ! --- Remove to here ----------------- + !--------------------------------------------------------------------- end if enddo - - ! Set gindex_global - - gdc2glo(:) = 0 - - ! Temporary testing for MPI_SCAN, for just the local PE + !--------------------------------------------------------------------- + ! + ! Do an MPI_SCAN to get the starting index for each processor ---- + ! [Doing this both simplifies the code, reduces non-scalaable memory + ! and reduces execution time for loops that run over all gridcells + ! for each processor.] + ! (Doing the following few lines of coderemoved about 50 lines of complex code + ! as well as loops of size: ni*nj*nclumps, npes*nclumps, and ni*nj + ! that was being done on each processor) + !--------------------------------------------------------------------- call MPI_SCAN(procinfo%ncells, cell_id_offset, 1, MPI_INTEGER, & MPI_SUM, mpicom, ier) if ( ier /= 0 )then @@ -238,8 +265,42 @@ subroutine decompInit_lnd(lni, lnj, amask) procinfo_mpiscan%ncells = procinfo%ncells procinfo_mpiscan%begg = cell_id_offset - procinfo%ncells procinfo_mpiscan%endg = cell_id_offset - 1 + ! Temporary testing for MPI_SCAN, for just the local PE + allocate(procinfo_mpiscan%cid(clump_pproc)) + procinfo_mpiscan%cid = procinfo%cid + cid_previous = 0 + begcid = minval(procinfo_mpiscan%cid(:)) + endcid = maxval(procinfo_mpiscan%cid(:)) + call assert_equal(begcid, procinfo_mpiscan%cid(1), & + msg='decompInit_lnd(): begcid is not the first, MPI_SCAN error') + call assert_equal(endcid, procinfo_mpiscan%cid(clump_pproc), & + msg='decompInit_lnd(): endcid is not the last, MPI_SCAN error') + allocate(clumps_mpiscan(begcid:endcid)) ! End temporary testing + ! ---- Set begg and endg each clump on this processor ---- + do lc = 1, clump_pproc + cid = procinfo_mpiscan%cid(lc) + clumps_mpiscan(cid)%ncells = clumps(cid)%ncells ! This line will be removed + write(iulog,*) 'lc, cid, clumps%ncells', lc, cid, clumps(cid)%ncells + if ( lc == 1 )then + clumps_mpiscan(cid)%begg = procinfo_mpiscan%begg + else + call assert_equal(cid_previous, procinfo_mpiscan%cid(lc-1), & + msg='decompInit_lnd(): cid_previous MPI_SCAN error') + clumps_mpiscan(cid)%begg = clumps_mpiscan(cid_previous)%endg + 1 + end if + clumps_mpiscan(cid)%endg = clumps_mpiscan(cid)%begg + clumps_mpiscan(cid)%ncells - 1 + cid_previous = cid + end do + + !--------------------------------------------------------------------- + ! ------ Remove the following section when ready ------ + ! Set gindex_global + + gdc2glo(:) = 0 + + ! clumpcnt is the ending gdc index of each clump ag = 0 @@ -268,6 +329,8 @@ subroutine decompInit_lnd(lni, lnj, amask) end if end do end do + ! --- Remove to here ----------------- + !--------------------------------------------------------------------- ! Initialize global gindex (non-compressed, includes ocean points) ! Note that gindex_global goes from (1:endg) @@ -278,15 +341,19 @@ subroutine decompInit_lnd(lni, lnj, amask) nglob_x = lni ! decompMod module variables nglob_y = lnj ! decompMod module variables + !--------------------------------------------------------------------- + ! -------Remove the following section when ready --------------------- do cid = 1, clump_pproc - write(iulog,*) 'iam, cid, clumps(cid)%owner', iam, cid, clumps(cid)%owner + ! write(iulog,*) 'iam, cid, clumps(cid)%owner', iam, cid, clumps(cid)%owner end do do n = procinfo%begg,procinfo%endg - write(iulog,*) ' g, n, gdc2glo, iam = ', n, n-procinfo%begg+1, gdc2glo(n), iam + ! write(iulog,*) ' g, n, gdc2glo, iam = ', n, n-procinfo%begg+1, gdc2glo(n), iam gindex_global(n-procinfo%begg+1) = gdc2glo(n) enddo + ! --- Remove to here ----------------- + !--------------------------------------------------------------------- - ! Temporary testing for MPI_SCAN, for just the local PE + ! Get the global vector index on the full grid for each local processors gridcell g = procinfo%begg do lc = 1, clump_pproc do ln = 1,lns @@ -295,12 +362,10 @@ subroutine decompInit_lnd(lni, lnj, amask) if ( cid > 0 )then if (clumps(cid)%owner == iam) then if ( procinfo%cid(lc) == cid ) then - write(iulog,*) ' cid, clumps(cid)%owner, iam = ', cid, clumps(cid)%owner, iam if ( (g < procinfo%begg) .or. (g > procinfo%endg) )then write(iulog,*) ' iam, g = ', iam, g call endrun(msg='g out of bounds for MPI_SCAN test', file=sourcefile, line=__LINE__) end if - write(iulog,*) ' g, ggidx, iam = ', n, procinfo%ggidx(g), iam procinfo%ggidx(g) = ln g = g + 1 end if @@ -309,14 +374,20 @@ subroutine decompInit_lnd(lni, lnj, amask) end if end do end do + + ! Temporary testing for MPI_SCAN, for just the local PE --- allocate the test array allocate(gindex_global_mpiscan(1:bounds%endg)) + ! End temporary testing + + ! ---- Get the global index for each gridcell and save the i,j incices for ach gridcell on this processor do n = procinfo%begg,procinfo%endg - gindex_global_mpiscan(n-procinfo%begg+1) = procinfo%ggidx(n) - write(iulog,*) ' n, lni, lnj, ggidx, iam = ', n, lni, lnj, procinfo%ggidx(n), iam + gindex_global_mpiscan(n-procinfo%begg+1) = procinfo%ggidx(n) ! Change this to gindex_global when ready call procinfo%calc_globalxy_indices( n, lni, lnj, i, j ) procinfo%gi(n) = i procinfo%gj(n) = j end do + + ! Temporary testing for MPI_SCAN, for just the local PE call assert_equal(gindex_global, gindex_global_mpiscan, & msg='decompInit_lnd(): gindex_global MPI_SCAN error') call assert_equal(procinfo%begg, procinfo_mpiscan%begg, & @@ -325,9 +396,35 @@ subroutine decompInit_lnd(lni, lnj, amask) msg='decompInit_lnd(): endg MPI_SCAN error') call assert_equal(procinfo%ncells, procinfo_mpiscan%ncells, & msg='decompInit_lnd(): ncells MPI_SCAN error') + do lc = 1, clump_pproc + cid = procinfo%cid(lc) + call assert_equal(clumps(cid)%begg, clumps_mpiscan(cid)%begg, & + msg='decompInit_lnd(): clumps begg MPI_SCAN error') + call assert_equal(clumps(cid)%endg, clumps_mpiscan(cid)%endg, & + msg='decompInit_lnd(): clumps endg MPI_SCAN error') + end do deallocate(gindex_global_mpiscan) + deallocate(clumps_mpiscan) + + + ! End temporary testing + ! General error checking that the decomposition data is setup correctly + call assert_equal(clumps(begcid)%begg, procinfo%begg, & + msg='decompInit_lnd(): clumps(begcid) begg does not match procinfo begg') + call assert_equal(clumps(endcid)%endg, procinfo%endg, & + msg='decompInit_lnd(): clumps(endcid) endg does not match procinfo endg') + call assert_equal(sum(clumps(begcid:endcid)%ncells), procinfo%ncells, & + msg='decompInit_lnd(): sum of clumps ncells does not match procinfo ncells') + + do cid = begcid, endcid + call assert_equal( (clumps(cid)%endg-clumps(cid)%begg+1), clumps(cid)%ncells, & + msg='decompInit_lnd(): clumps(cid) endg-begg+1 does not match clumps ncells') + end do + call assert_equal( (procinfo%endg-procinfo%begg+1), procinfo%ncells, & + msg='decompInit_lnd(): procinfo endg-begg+1 does not match procinfo ncells') + call decompInit_lnd_clean() call memcheck('decompInit_lnd: after deallocate') @@ -371,6 +468,7 @@ subroutine decompInit_lnd_allocate( ier ) call endrun(msg="nclumps is NOT set before allocation", file=sourcefile, line=__LINE__) return end if + ! This will be moved to the other allocate and for a smaller size ---- allocate(clumps(nclumps), stat=ier) if (ier /= 0) then write(iulog,*) 'allocation error for clumps: nclumps, ier=', nclumps, ier @@ -383,6 +481,10 @@ subroutine decompInit_lnd_allocate( ier ) call endrun(msg="numg is NOT set before allocation", file=sourcefile, line=__LINE__) return end if + allocate(lcid(lns)) + + !--------------------------------------------------------------------- + ! ---- Section to remove when ready ---- allocate(gdc2glo(numg), stat=ier) if (ier /= 0) then call endrun(msg="allocation error1 for gdc2glo , etc", file=sourcefile, line=__LINE__) @@ -392,12 +494,13 @@ subroutine decompInit_lnd_allocate( ier ) call endrun(msg="lns is NOT set before allocation", file=sourcefile, line=__LINE__) return end if - allocate(lcid(lns)) allocate(clumpcnt(nclumps),stat=ier) if (ier /= 0) then call endrun(msg="allocation error2 for clumpcnt", file=sourcefile, line=__LINE__) return end if + ! --- Remove to here ----------------- + !--------------------------------------------------------------------- end subroutine decompInit_lnd_allocate @@ -435,7 +538,7 @@ subroutine decompInit_lnd_clean() ! Deallocate the temporary variables used in decompInit_lnd deallocate(clumpcnt) deallocate(gdc2glo) - !deallocate(lcid) + !--- NOTE: Can only deallocate after decompInit_clumps ---- end subroutine decompInit_lnd_clean subroutine decompInit_lnd_check_errors( ier ) @@ -672,6 +775,29 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) call memcheck('decompInit_clumps: after deallocate') + + ! ------ Reset the clump type array for all non-local cid's to -1 to show it can be made smaller + do cid = 1, nclumps + if (clumps(cid)%owner /= iam) then + clumps(cid)%owner = -1 + clumps(cid)%ncells = -1 + clumps(cid)%nlunits = -1 + clumps(cid)%ncols = -1 + clumps(cid)%npatches = -1 + clumps(cid)%nCohorts = -1 + clumps(cid)%begg = -1 + clumps(cid)%begl = -1 + clumps(cid)%begc = -1 + clumps(cid)%begp = -1 + clumps(cid)%begCohort = -1 + clumps(cid)%endg = -1 + clumps(cid)%endl = -1 + clumps(cid)%endc = -1 + clumps(cid)%endp = -1 + clumps(cid)%endCohort = -1 + end if + end do + ! Diagnostic output call get_proc_global(ng=numg, nl=numl, nc=numc, np=nump, nCohorts=numCohort) From e854b1fbfb153bd088cc88649746d45f7dcb6e63 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 11 Sep 2025 16:13:23 -0600 Subject: [PATCH 106/141] Commit share directory with the latest update --- share | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share b/share index 9973692556..1a871cad0a 160000 --- a/share +++ b/share @@ -1 +1 @@ -Subproject commit 9973692556da54f9562935be43c1d43b0607d24b +Subproject commit 1a871cad0a90f8a361196f045313cca1919c7cbc From dd2a58ecb736b1e014999f28a5e3516f7befa904 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 11 Sep 2025 22:11:16 -0600 Subject: [PATCH 107/141] Get it working for the threaded case --- src/main/decompInitMod.F90 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index c62797de1f..4a449c8e3e 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -275,6 +275,7 @@ subroutine decompInit_lnd(lni, lnj, amask) msg='decompInit_lnd(): begcid is not the first, MPI_SCAN error') call assert_equal(endcid, procinfo_mpiscan%cid(clump_pproc), & msg='decompInit_lnd(): endcid is not the last, MPI_SCAN error') + write(iulog,*) ' begcid, endcid, procinfo_mpiscan%cid = ', begcid, endcid, procinfo_mpiscan%cid allocate(clumps_mpiscan(begcid:endcid)) ! End temporary testing @@ -415,7 +416,10 @@ subroutine decompInit_lnd(lni, lnj, amask) msg='decompInit_lnd(): clumps(begcid) begg does not match procinfo begg') call assert_equal(clumps(endcid)%endg, procinfo%endg, & msg='decompInit_lnd(): clumps(endcid) endg does not match procinfo endg') - call assert_equal(sum(clumps(begcid:endcid)%ncells), procinfo%ncells, & + write(iulog,*) ' iam, clumps ncells = ', iam, clumps(begcid:endcid)%ncells + write(iulog,*) ' iam, sum( clumps ncells ) = ', iam, sum( clumps(procinfo%cid)%ncells ) + write(iulog,*) ' iam, proc ncells = ', iam, procinfo%ncells + call assert_equal(sum(clumps(procinfo%cid)%ncells), procinfo%ncells, & msg='decompInit_lnd(): sum of clumps ncells does not match procinfo ncells') do cid = begcid, endcid From c8c49269b7b144544b55c9f450570e6e2a68da57 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 12 Sep 2025 18:16:40 -0600 Subject: [PATCH 108/141] Comment out the old code and just use the new MPI_SCAN way, this works for serial mode --- src/main/decompInitMod.F90 | 239 ++++++++++++++++++------------------- 1 file changed, 119 insertions(+), 120 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index 4a449c8e3e..e6d3423ac4 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -79,17 +79,17 @@ subroutine decompInit_lnd(lni, lnj, amask) integer :: begg, endg ! beg and end gridcells !--------------------------------------------------------------------- ! ------ Remove these global arrays when ready ------ - integer, pointer :: clumpcnt(:) ! clump index counter - integer, allocatable :: gdc2glo(:)! used to create gindex_global + !integer, pointer :: clumpcnt(:) ! clump index counter + !integer, allocatable :: gdc2glo(:)! used to create gindex_global ! --- Remove to here ----------------- !--------------------------------------------------------------------- type(bounds_type) :: bounds ! contains subgrid bounds data !--------------------------------------------------------------------- ! Temporary testing stuff real(r8) :: msize, mrss - integer, allocatable :: gindex_global_mpiscan(:)! ginfrx_global_mpiscan for the local PE based on the MPI_SCAN - type(processor_type) :: procinfo_mpiscan ! procinfo for local PE based on the MPI_SCAN - type(clump_type), allocatable :: clumps_mpiscan(:) ! procinfo for local PE based on the MPI_SCAN + !integer, allocatable :: gindex_global_mpiscan(:)! ginfrx_global_mpiscan for the local PE based on the MPI_SCAN + !type(processor_type) :: procinfo_mpiscan ! procinfo for local PE based on the MPI_SCAN + !type(clump_type), allocatable :: clumps_mpiscan(:) ! procinfo for local PE based on the MPI_SCAN ! end temporary testing stuff !--------------------------------------------------------------------- integer :: i, j, g, lc, cid_previous ! Indices @@ -159,7 +159,7 @@ subroutine decompInit_lnd(lni, lnj, amask) call endrun(msg="Round robin pid error", file=sourcefile, line=__LINE__) return endif - clumps(n)%owner = pid ! This line should be able to be removed when clumps is only for the local task + !clumps(n)%owner = pid ! This line should be able to be removed when clumps is only for the local task if (iam == pid) then clumps(n)%owner = pid cid = cid + 1 @@ -203,44 +203,45 @@ subroutine decompInit_lnd(lni, lnj, amask) endif lcid(ln) = cid + ! Get the total number of gridcells for the local processor + if (iam == clumps(cid)%owner) then + procinfo%ncells = procinfo%ncells + 1 + endif !--------------------------------------------------------------------- !--- give gridcell cell to pe that owns cid --- !--- this needs to be done to subsequently use function !--- get_proc_bounds(begg,endg) ! ---- These lines will be able to be removed ----- - if (iam == clumps(cid)%owner) then - procinfo%ncells = procinfo%ncells + 1 - endif - if (iam > clumps(cid)%owner) then - procinfo%begg = procinfo%begg + 1 - endif - if (iam >= clumps(cid)%owner) then - procinfo%endg = procinfo%endg + 1 - endif + !if (iam > clumps(cid)%owner) then + ! procinfo%begg = procinfo%begg + 1 + !endif + !if (iam >= clumps(cid)%owner) then + ! procinfo%endg = procinfo%endg + 1 + !endif ! --- Remove to here ----------------- !--------------------------------------------------------------------- !--- give gridcell to cid for local processor --- !--- Comment this out when ready ---------------- if (iam == clumps(cid)%owner) then - !clumps_mpiscan(cid)%ncells = clumps_mpiscan(cid)%ncells + 1 + clumps(cid)%ncells = clumps(cid)%ncells + 1 end if !--------------------------------------------------------------------- !--- give gridcell to cid --- !--- increment the beg and end indices --- ! --- This section will be removed --- - clumps(cid)%ncells = clumps(cid)%ncells + 1 - do m = 1,nclumps - if ((clumps(m)%owner > clumps(cid)%owner) .or. & - (clumps(m)%owner == clumps(cid)%owner .and. m > cid)) then - clumps(m)%begg = clumps(m)%begg + 1 - endif - - if ((clumps(m)%owner > clumps(cid)%owner) .or. & - (clumps(m)%owner == clumps(cid)%owner .and. m >= cid)) then - clumps(m)%endg = clumps(m)%endg + 1 - endif - enddo + !clumps(cid)%ncells = clumps(cid)%ncells + 1 + !do m = 1,nclumps + ! if ((clumps(m)%owner > clumps(cid)%owner) .or. & + ! (clumps(m)%owner == clumps(cid)%owner .and. m > cid)) then + ! clumps(m)%begg = clumps(m)%begg + 1 + ! endif + + ! if ((clumps(m)%owner > clumps(cid)%owner) .or. & + ! (clumps(m)%owner == clumps(cid)%owner .and. m >= cid)) then + ! clumps(m)%endg = clumps(m)%endg + 1 + ! endif + !nddo ! --- Remove to here ----------------- !--------------------------------------------------------------------- @@ -262,36 +263,34 @@ subroutine decompInit_lnd(lni, lnj, amask) call endrun(msg='Error from MPI_SCAN', file=sourcefile, line=__LINE__) end if cell_id_offset = cell_id_offset + 1 - procinfo_mpiscan%ncells = procinfo%ncells - procinfo_mpiscan%begg = cell_id_offset - procinfo%ncells - procinfo_mpiscan%endg = cell_id_offset - 1 + write(iulog,*) 'cell_id_offset = ', cell_id_offset + procinfo%begg = cell_id_offset - procinfo%ncells + procinfo%endg = cell_id_offset - 1 + write(iulog,*) 'procinfo: ncells, begg, endg = ', procinfo%ncells, procinfo%begg, procinfo%endg ! Temporary testing for MPI_SCAN, for just the local PE - allocate(procinfo_mpiscan%cid(clump_pproc)) - procinfo_mpiscan%cid = procinfo%cid - cid_previous = 0 - begcid = minval(procinfo_mpiscan%cid(:)) - endcid = maxval(procinfo_mpiscan%cid(:)) - call assert_equal(begcid, procinfo_mpiscan%cid(1), & - msg='decompInit_lnd(): begcid is not the first, MPI_SCAN error') - call assert_equal(endcid, procinfo_mpiscan%cid(clump_pproc), & - msg='decompInit_lnd(): endcid is not the last, MPI_SCAN error') - write(iulog,*) ' begcid, endcid, procinfo_mpiscan%cid = ', begcid, endcid, procinfo_mpiscan%cid - allocate(clumps_mpiscan(begcid:endcid)) + !allocate(procinfo%cid(clump_pproc)) + !cid_previous = 0 + !begcid = minval(procinfo%cid(:)) + !endcid = maxval(procinfo%cid(:)) + !call assert_equal(begcid, procinfo%cid(1), & + !msg='decompInit_lnd(): begcid is not the first, MPI_SCAN error') + !call assert_equal(endcid, procinfo%cid(clump_pproc), & + !msg='decompInit_lnd(): endcid is not the last, MPI_SCAN error') + !write(iulog,*) ' begcid, endcid, procinfo%cid = ', begcid, endcid, procinfo%cid ! End temporary testing ! ---- Set begg and endg each clump on this processor ---- do lc = 1, clump_pproc - cid = procinfo_mpiscan%cid(lc) - clumps_mpiscan(cid)%ncells = clumps(cid)%ncells ! This line will be removed - write(iulog,*) 'lc, cid, clumps%ncells', lc, cid, clumps(cid)%ncells + cid = procinfo%cid(lc) + clumps(cid)%ncells = clumps(cid)%ncells ! This line will be removed + !write(iulog,*) 'lc, cid, clumps%ncells', lc, cid, clumps(cid)%ncells if ( lc == 1 )then - clumps_mpiscan(cid)%begg = procinfo_mpiscan%begg + clumps(cid)%begg = procinfo%begg else - call assert_equal(cid_previous, procinfo_mpiscan%cid(lc-1), & - msg='decompInit_lnd(): cid_previous MPI_SCAN error') - clumps_mpiscan(cid)%begg = clumps_mpiscan(cid_previous)%endg + 1 + cid_previous = procinfo%cid(lc-1) + clumps(cid)%begg = clumps(cid_previous)%endg + 1 end if - clumps_mpiscan(cid)%endg = clumps_mpiscan(cid)%begg + clumps_mpiscan(cid)%ncells - 1 + clumps(cid)%endg = clumps(cid)%begg + clumps(cid)%ncells - 1 cid_previous = cid end do @@ -299,42 +298,43 @@ subroutine decompInit_lnd(lni, lnj, amask) ! ------ Remove the following section when ready ------ ! Set gindex_global - gdc2glo(:) = 0 + !dc2glo(:) = 0 ! clumpcnt is the ending gdc index of each clump - ag = 0 - clumpcnt = 0 - ag = 1 - do pid = 0,npes-1 - do cid = 1,nclumps - if (clumps(cid)%owner == pid) then - clumpcnt(cid) = ag - ag = ag + clumps(cid)%ncells - endif - enddo - enddo + !g = 0 + !lumpcnt = 0 + !g = 1 + !o pid = 0,npes-1 + !o cid = 1,nclumps + ! if (clumps(cid)%owner == pid) then + ! clumpcnt(cid) = ag + ! ag = ag + clumps(cid)%ncells + ! endif + !nddo + !nddo ! now go through gridcells one at a time and increment clumpcnt ! in order to set gdc2glo - do aj = 1,lnj - do ai = 1,lni - an = (aj-1)*lni + ai - cid = lcid(an) - if (cid > 0) then - ag = clumpcnt(cid) - gdc2glo(ag) = an - clumpcnt(cid) = clumpcnt(cid) + 1 - end if - end do - end do + !o aj = 1,lnj + !o ai = 1,lni + ! an = (aj-1)*lni + ai + ! cid = lcid(an) + ! if (cid > 0) then + ! ag = clumpcnt(cid) + ! gdc2glo(ag) = an + ! clumpcnt(cid) = clumpcnt(cid) + 1 + ! end if + !nd do + !nd do ! --- Remove to here ----------------- !--------------------------------------------------------------------- ! Initialize global gindex (non-compressed, includes ocean points) ! Note that gindex_global goes from (1:endg) + write(iulog,*) 'begg, endg = ', procinfo%begg, procinfo%endg call get_proc_bounds(bounds, allow_errors=.true.) ! This has to be done after procinfo is finalized call decompInit_lnd_gindex_global_allocate( bounds, ier ) ! This HAS to be done after procinfo is finalized if (ier /= 0) return @@ -344,13 +344,13 @@ subroutine decompInit_lnd(lni, lnj, amask) !--------------------------------------------------------------------- ! -------Remove the following section when ready --------------------- - do cid = 1, clump_pproc + !o cid = 1, clump_pproc ! write(iulog,*) 'iam, cid, clumps(cid)%owner', iam, cid, clumps(cid)%owner - end do - do n = procinfo%begg,procinfo%endg + !nd do + !o n = procinfo%begg,procinfo%endg ! write(iulog,*) ' g, n, gdc2glo, iam = ', n, n-procinfo%begg+1, gdc2glo(n), iam - gindex_global(n-procinfo%begg+1) = gdc2glo(n) - enddo + ! gindex_global(n-procinfo%begg+1) = gdc2glo(n) + !nddo ! --- Remove to here ----------------- !--------------------------------------------------------------------- @@ -376,53 +376,52 @@ subroutine decompInit_lnd(lni, lnj, amask) end do end do - ! Temporary testing for MPI_SCAN, for just the local PE --- allocate the test array - allocate(gindex_global_mpiscan(1:bounds%endg)) - ! End temporary testing - ! ---- Get the global index for each gridcell and save the i,j incices for ach gridcell on this processor do n = procinfo%begg,procinfo%endg - gindex_global_mpiscan(n-procinfo%begg+1) = procinfo%ggidx(n) ! Change this to gindex_global when ready + gindex_global(n-procinfo%begg+1) = procinfo%ggidx(n) ! Change this to gindex_global when ready call procinfo%calc_globalxy_indices( n, lni, lnj, i, j ) procinfo%gi(n) = i procinfo%gj(n) = j end do ! Temporary testing for MPI_SCAN, for just the local PE - call assert_equal(gindex_global, gindex_global_mpiscan, & - msg='decompInit_lnd(): gindex_global MPI_SCAN error') - call assert_equal(procinfo%begg, procinfo_mpiscan%begg, & - msg='decompInit_lnd(): begg MPI_SCAN error') - call assert_equal(procinfo%endg, procinfo_mpiscan%endg, & - msg='decompInit_lnd(): endg MPI_SCAN error') - call assert_equal(procinfo%ncells, procinfo_mpiscan%ncells, & - msg='decompInit_lnd(): ncells MPI_SCAN error') - do lc = 1, clump_pproc - cid = procinfo%cid(lc) - call assert_equal(clumps(cid)%begg, clumps_mpiscan(cid)%begg, & - msg='decompInit_lnd(): clumps begg MPI_SCAN error') - call assert_equal(clumps(cid)%endg, clumps_mpiscan(cid)%endg, & - msg='decompInit_lnd(): clumps endg MPI_SCAN error') - end do - deallocate(gindex_global_mpiscan) - deallocate(clumps_mpiscan) + !call assert_equal(gindex_global, gindex_global_mpiscan, & + ! msg='decompInit_lnd(): gindex_global MPI_SCAN error') + !call assert_equal(procinfo%begg, procinfo_mpiscan%begg, & + ! msg='decompInit_lnd(): begg MPI_SCAN error') + !call assert_equal(procinfo%endg, procinfo_mpiscan%endg, & + ! msg='decompInit_lnd(): endg MPI_SCAN error') + !call assert_equal(procinfo%ncells, procinfo_mpiscan%ncells, & + ! msg='decompInit_lnd(): ncells MPI_SCAN error') + !do lc = 1, clump_pproc + ! cid = procinfo%cid(lc) + ! call assert_equal(clumps(cid)%begg, clumps_mpiscan(cid)%begg, & + ! msg='decompInit_lnd(): clumps begg MPI_SCAN error') + ! call assert_equal(clumps(cid)%endg, clumps_mpiscan(cid)%endg, & + ! msg='decompInit_lnd(): clumps endg MPI_SCAN error') + !end do + !deallocate(gindex_global_mpiscan) + !deallocate(clumps_mpiscan) ! End temporary testing ! General error checking that the decomposition data is setup correctly + begcid = procinfo%cid(1) + endcid = procinfo%cid(clump_pproc) call assert_equal(clumps(begcid)%begg, procinfo%begg, & msg='decompInit_lnd(): clumps(begcid) begg does not match procinfo begg') call assert_equal(clumps(endcid)%endg, procinfo%endg, & msg='decompInit_lnd(): clumps(endcid) endg does not match procinfo endg') - write(iulog,*) ' iam, clumps ncells = ', iam, clumps(begcid:endcid)%ncells - write(iulog,*) ' iam, sum( clumps ncells ) = ', iam, sum( clumps(procinfo%cid)%ncells ) - write(iulog,*) ' iam, proc ncells = ', iam, procinfo%ncells + !write(iulog,*) ' iam, clumps ncells = ', iam, clumps(begcid:endcid)%ncells + !write(iulog,*) ' iam, sum( clumps ncells ) = ', iam, sum( clumps(procinfo%cid)%ncells ) + !write(iulog,*) ' iam, proc ncells = ', iam, procinfo%ncells call assert_equal(sum(clumps(procinfo%cid)%ncells), procinfo%ncells, & msg='decompInit_lnd(): sum of clumps ncells does not match procinfo ncells') - do cid = begcid, endcid + do lc = 1, clump_pproc + cid = procinfo%cid(lc) call assert_equal( (clumps(cid)%endg-clumps(cid)%begg+1), clumps(cid)%ncells, & msg='decompInit_lnd(): clumps(cid) endg-begg+1 does not match clumps ncells') end do @@ -489,20 +488,20 @@ subroutine decompInit_lnd_allocate( ier ) !--------------------------------------------------------------------- ! ---- Section to remove when ready ---- - allocate(gdc2glo(numg), stat=ier) - if (ier /= 0) then - call endrun(msg="allocation error1 for gdc2glo , etc", file=sourcefile, line=__LINE__) - return - end if - if ( lns < 1 )then - call endrun(msg="lns is NOT set before allocation", file=sourcefile, line=__LINE__) - return - end if - allocate(clumpcnt(nclumps),stat=ier) - if (ier /= 0) then - call endrun(msg="allocation error2 for clumpcnt", file=sourcefile, line=__LINE__) - return - end if + !llocate(gdc2glo(numg), stat=ier) + !f (ier /= 0) then + ! call endrun(msg="allocation error1 for gdc2glo , etc", file=sourcefile, line=__LINE__) + ! return + !nd if + !f ( lns < 1 )then + ! call endrun(msg="lns is NOT set before allocation", file=sourcefile, line=__LINE__) + ! return + !nd if + !llocate(clumpcnt(nclumps),stat=ier) + !f (ier /= 0) then + ! call endrun(msg="allocation error2 for clumpcnt", file=sourcefile, line=__LINE__) + ! return + !nd if ! --- Remove to here ----------------- !--------------------------------------------------------------------- @@ -540,8 +539,8 @@ end subroutine decompInit_lnd_gindex_global_allocate subroutine decompInit_lnd_clean() ! Deallocate the temporary variables used in decompInit_lnd - deallocate(clumpcnt) - deallocate(gdc2glo) + !deallocate(clumpcnt) + !deallocate(gdc2glo) !--- NOTE: Can only deallocate after decompInit_clumps ---- end subroutine decompInit_lnd_clean From c87adff765126e2d3523b6b33828e9b55988e622 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Sun, 14 Sep 2025 15:29:25 -0600 Subject: [PATCH 109/141] Just do the checking over the local processor clumps and not all the global clumps --- src/main/decompInitMod.F90 | 3 +++ src/self_tests/TestDecompInit.F90 | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index e6d3423ac4..f813e16bb8 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -753,6 +753,8 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) enddo do n = 1,nclumps + ! Only do the error checking over the local processor + if (clumps(n)%owner == iam) then if (clumps(n)%ncells /= allvecg(n,1) .or. & clumps(n)%nlunits /= allvecg(n,2) .or. & clumps(n)%ncols /= allvecg(n,3) .or. & @@ -769,6 +771,7 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) call endrun(msg="allvecg error cohorts", file=sourcefile, line=__LINE__) return endif + endif enddo call memcheck('decompInit_clumps: before deallocate') diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 7705ea80fc..b88b62ce85 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -293,7 +293,7 @@ subroutine test_decompInit_clump_gcell_info_correct() use decompMod, only : get_proc_bounds type(bounds_type) :: bounds integer :: expected_gcells, iclump, g, beg_global_index, gcell_per_task - integer :: expected_begg, expected_endg + integer :: expected_begg, expected_endg, lc call setup() expected_gcells = ni*nj @@ -314,8 +314,10 @@ subroutine test_decompInit_clump_gcell_info_correct() else call assert_equal( nclumps/clump_pproc, npes, msg='nclumps divided by clump_pproc should match number of processors when clump_pproc > 1' ) end if - do iclump = 1, nclumps - call assert_equal( clumps(iclump)%owner, iclump-1, msg='clumps owner is not correct' ) + ! Just test over the local clumps + do lc = 1, clump_pproc + iclump = procinfo%cid(lc) + call assert_equal( clumps(iclump)%owner, iam, msg='clumps owner is not correct' ) call assert_equal( clumps(iclump)%ncells, gcell_per_task, msg='clumps ncells is not correct' ) end do call clean() From 5b713239d19ad734ba7fd5f815f1ed12fac6c049 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 29 Sep 2025 13:53:02 -0600 Subject: [PATCH 110/141] Revert the timers added so going back to 64c700667b41b7631ba77cacf42e1fe6f58ac6f5 for UrbanTimeVarType.F90 --- src/cpl/share_esmf/UrbanTimeVarType.F90 | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/cpl/share_esmf/UrbanTimeVarType.F90 b/src/cpl/share_esmf/UrbanTimeVarType.F90 index 2870ae9b5c..1e6d004e96 100644 --- a/src/cpl/share_esmf/UrbanTimeVarType.F90 +++ b/src/cpl/share_esmf/UrbanTimeVarType.F90 @@ -16,7 +16,6 @@ module UrbanTimeVarType use clm_varcon , only : spval use LandunitType , only : lun use GridcellType , only : grc - use perf_mod , only : t_startf, t_stopf ! implicit none private @@ -144,8 +143,6 @@ subroutine urbantv_init(this, bounds, NLFilename) stream_meshfile_urbantv, & urbantv_tintalgo - call t_startf("urbantv_init") - ! Default values for namelist stream_year_first_urbantv = 1 ! first year in stream to use stream_year_last_urbantv = 1 ! last year in stream to use @@ -198,7 +195,6 @@ subroutine urbantv_init(this, bounds, NLFilename) endif ! Initialize the cdeps data type this%sdat_urbantv - call t_startf("str_data_init") call shr_strdata_init_from_inline(this%sdat_urbantv, & my_task = iam, & logunit = iulog, & @@ -223,9 +219,6 @@ subroutine urbantv_init(this, bounds, NLFilename) if (ESMF_LogFoundError(rcToCheck=rc, msg=ESMF_LOGERR_PASSTHRU, line=__LINE__, file=__FILE__)) then call ESMF_Finalize(endflag=ESMF_END_ABORT) end if - call t_stopf("str_data_init") - - call t_stopf("urbantv_init") end subroutine urbantv_init From 46f1e2ae9cf0435bf3949b1edb32340828b41a19 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 29 Sep 2025 16:10:31 -0600 Subject: [PATCH 111/141] Remove extra timers branch in cdeps --- .gitmodules | 7 ++----- components/cdeps | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.gitmodules b/.gitmodules index dce8491864..bad30c6d3c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -91,11 +91,8 @@ fxDONOTUSEurl = https://github.com/ESCOMP/CMEPS.git [submodule "cdeps"] path = components/cdeps -#url = https://github.com/ESCOMP/CDEPS.git -url = https://github.com/ekluzek/CDEPS.git -#fxtag = cdeps1.0.79 -#fxtag = add_more_timers -fxtag = 65f7bb0c88420d58a080f9fcc8afbd945c90211a +url = https://github.com/ESCOMP/CDEPS.git +fxtag = cdeps1.0.79 fxrequired = ToplevelRequired # Standard Fork to compare to with "git fleximod test" to ensure personal forks aren't committed fxDONOTUSEurl = https://github.com/ESCOMP/CDEPS.git diff --git a/components/cdeps b/components/cdeps index 65f7bb0c88..b65f283437 160000 --- a/components/cdeps +++ b/components/cdeps @@ -1 +1 @@ -Subproject commit 65f7bb0c88420d58a080f9fcc8afbd945c90211a +Subproject commit b65f28343708789f75a0f422a2fb6bc02036474e From ad97c280f6c0b1edd0d54e9943f4955726c37d57 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 29 Sep 2025 16:28:06 -0600 Subject: [PATCH 112/141] Fix the placement of the t_stopf for the final readmesh timer was outside the subroutine --- src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 index 245844a469..aa4dcc1aaa 100644 --- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 +++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 @@ -273,6 +273,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes if (chkerr(rc,__LINE__,u_FILE_u)) return call memcheck('lnd_set_decomp_and_domain_from_readmesh: after deallocate') + call t_stopf('lnd_set_decomp_and_domain_from_readmesh: final') !=============================================================================== ! Internal subroutines for this subroutine @@ -313,7 +314,6 @@ end subroutine from_readmesh_dealloc !------------------------------------------------------------------------------- - call t_stopf('lnd_set_decomp_and_domain_from_readmesh: final') end subroutine lnd_set_decomp_and_domain_from_readmesh From dcdc1ed7bc8ed148b8cc34ecddbf0b3341fbe48b Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 29 Sep 2025 16:30:43 -0600 Subject: [PATCH 113/141] Remove the decompInit_lnd old code commented out, and extra writes to the log that aren't needed anymore --- src/main/decompInitMod.F90 | 148 +------------------------------------ 1 file changed, 3 insertions(+), 145 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index d4f1d5c195..a9e9a960eb 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -78,18 +78,10 @@ subroutine decompInit_lnd(lni, lnj, amask) integer :: ier ! error code integer :: begg, endg ! beg and end gridcells !--------------------------------------------------------------------- - ! ------ Remove these global arrays when ready ------ - !integer, pointer :: clumpcnt(:) ! clump index counter - !integer, allocatable :: gdc2glo(:)! used to create gindex_global - ! --- Remove to here ----------------- - !--------------------------------------------------------------------- type(bounds_type) :: bounds ! contains subgrid bounds data !--------------------------------------------------------------------- ! Temporary testing stuff real(r8) :: msize, mrss - !integer, allocatable :: gindex_global_mpiscan(:)! ginfrx_global_mpiscan for the local PE based on the MPI_SCAN - !type(processor_type) :: procinfo_mpiscan ! procinfo for local PE based on the MPI_SCAN - !type(clump_type), allocatable :: clumps_mpiscan(:) ! procinfo for local PE based on the MPI_SCAN ! end temporary testing stuff !--------------------------------------------------------------------- integer :: i, j, g, lc, cid_previous ! Indices @@ -204,43 +196,11 @@ subroutine decompInit_lnd(lni, lnj, amask) if (iam == clumps(cid)%owner) then procinfo%ncells = procinfo%ncells + 1 endif - !--------------------------------------------------------------------- - !--- give gridcell cell to pe that owns cid --- - !--- this needs to be done to subsequently use function - !--- get_proc_bounds(begg,endg) - ! ---- These lines will be able to be removed ----- - !if (iam > clumps(cid)%owner) then - ! procinfo%begg = procinfo%begg + 1 - !endif - !if (iam >= clumps(cid)%owner) then - ! procinfo%endg = procinfo%endg + 1 - !endif - ! --- Remove to here ----------------- - !--------------------------------------------------------------------- !--- give gridcell to cid for local processor --- - !--- Comment this out when ready ---------------- if (iam == clumps(cid)%owner) then clumps(cid)%ncells = clumps(cid)%ncells + 1 end if - !--------------------------------------------------------------------- - !--- give gridcell to cid --- - !--- increment the beg and end indices --- - ! --- This section will be removed --- - !clumps(cid)%ncells = clumps(cid)%ncells + 1 - !do m = 1,nclumps - ! if ((clumps(m)%owner > clumps(cid)%owner) .or. & - ! (clumps(m)%owner == clumps(cid)%owner .and. m > cid)) then - ! clumps(m)%begg = clumps(m)%begg + 1 - ! endif - - ! if ((clumps(m)%owner > clumps(cid)%owner) .or. & - ! (clumps(m)%owner == clumps(cid)%owner .and. m >= cid)) then - ! clumps(m)%endg = clumps(m)%endg + 1 - ! endif - !nddo - ! --- Remove to here ----------------- - !--------------------------------------------------------------------- end if enddo @@ -250,7 +210,7 @@ subroutine decompInit_lnd(lni, lnj, amask) ! [Doing this both simplifies the code, reduces non-scalaable memory ! and reduces execution time for loops that run over all gridcells ! for each processor.] - ! (Doing the following few lines of coderemoved about 50 lines of complex code + ! (Doing the following few lines of code removed about 50 lines of complex code ! as well as loops of size: ni*nj*nclumps, npes*nclumps, and ni*nj ! that was being done on each processor) !--------------------------------------------------------------------- @@ -260,27 +220,12 @@ subroutine decompInit_lnd(lni, lnj, amask) call endrun(msg='Error from MPI_SCAN', file=sourcefile, line=__LINE__) end if cell_id_offset = cell_id_offset + 1 - write(iulog,*) 'cell_id_offset = ', cell_id_offset procinfo%begg = cell_id_offset - procinfo%ncells procinfo%endg = cell_id_offset - 1 - write(iulog,*) 'procinfo: ncells, begg, endg = ', procinfo%ncells, procinfo%begg, procinfo%endg - ! Temporary testing for MPI_SCAN, for just the local PE - !allocate(procinfo%cid(clump_pproc)) - !cid_previous = 0 - !begcid = minval(procinfo%cid(:)) - !endcid = maxval(procinfo%cid(:)) - !call assert_equal(begcid, procinfo%cid(1), & - !msg='decompInit_lnd(): begcid is not the first, MPI_SCAN error') - !call assert_equal(endcid, procinfo%cid(clump_pproc), & - !msg='decompInit_lnd(): endcid is not the last, MPI_SCAN error') - !write(iulog,*) ' begcid, endcid, procinfo%cid = ', begcid, endcid, procinfo%cid - ! End temporary testing - ! ---- Set begg and endg each clump on this processor ---- do lc = 1, clump_pproc cid = procinfo%cid(lc) clumps(cid)%ncells = clumps(cid)%ncells ! This line will be removed - !write(iulog,*) 'lc, cid, clumps%ncells', lc, cid, clumps(cid)%ncells if ( lc == 1 )then clumps(cid)%begg = procinfo%begg else @@ -291,47 +236,8 @@ subroutine decompInit_lnd(lni, lnj, amask) cid_previous = cid end do - !--------------------------------------------------------------------- - ! ------ Remove the following section when ready ------ - ! Set gindex_global - - !gdc2glo(:) = 0 - - - ! clumpcnt is the ending gdc index of each clump - - !g = 0 - !lumpcnt = 0 - !g = 1 - !o pid = 0,npes-1 - !o cid = 1,nclumps - ! if (clumps(cid)%owner == pid) then - ! clumpcnt(cid) = ag - ! ag = ag + clumps(cid)%ncells - ! endif - !nddo - !nddo - - ! now go through gridcells one at a time and increment clumpcnt - ! in order to set gdc2glo - - !o aj = 1,lnj - !o ai = 1,lni - ! an = (aj-1)*lni + ai - ! cid = lcid(an) - ! if (cid > 0) then - ! ag = clumpcnt(cid) - ! gdc2glo(ag) = an - ! clumpcnt(cid) = clumpcnt(cid) + 1 - ! end if - !nd do - !nd do - ! --- Remove to here ----------------- - !--------------------------------------------------------------------- - ! Initialize global gindex (non-compressed, includes ocean points) ! Note that gindex_global goes from (1:endg) - write(iulog,*) 'begg, endg = ', procinfo%begg, procinfo%endg call get_proc_bounds(bounds, allow_errors=.true.) ! This has to be done after procinfo is finalized call decompInit_lnd_gindex_global_allocate( bounds, ier ) ! This HAS to be done after procinfo is finalized if (ier /= 0) return @@ -339,16 +245,6 @@ subroutine decompInit_lnd(lni, lnj, amask) nglob_x = lni ! decompMod module variables nglob_y = lnj ! decompMod module variables - !--------------------------------------------------------------------- - ! -------Remove the following section when ready --------------------- - !o cid = 1, clump_pproc - ! write(iulog,*) 'iam, cid, clumps(cid)%owner', iam, cid, clumps(cid)%owner - !nd do - !o n = procinfo%begg,procinfo%endg - ! write(iulog,*) ' g, n, gdc2glo, iam = ', n, n-procinfo%begg+1, gdc2glo(n), iam - ! gindex_global(n-procinfo%begg+1) = gdc2glo(n) - !nddo - ! --- Remove to here ----------------- !--------------------------------------------------------------------- ! Get the global vector index on the full grid for each local processors gridcell @@ -381,39 +277,15 @@ subroutine decompInit_lnd(lni, lnj, amask) procinfo%gj(n) = j end do - ! Temporary testing for MPI_SCAN, for just the local PE - !call assert_equal(gindex_global, gindex_global_mpiscan, & - ! msg='decompInit_lnd(): gindex_global MPI_SCAN error') - !call assert_equal(procinfo%begg, procinfo_mpiscan%begg, & - ! msg='decompInit_lnd(): begg MPI_SCAN error') - !call assert_equal(procinfo%endg, procinfo_mpiscan%endg, & - ! msg='decompInit_lnd(): endg MPI_SCAN error') - !call assert_equal(procinfo%ncells, procinfo_mpiscan%ncells, & - ! msg='decompInit_lnd(): ncells MPI_SCAN error') - !do lc = 1, clump_pproc - ! cid = procinfo%cid(lc) - ! call assert_equal(clumps(cid)%begg, clumps_mpiscan(cid)%begg, & - ! msg='decompInit_lnd(): clumps begg MPI_SCAN error') - ! call assert_equal(clumps(cid)%endg, clumps_mpiscan(cid)%endg, & - ! msg='decompInit_lnd(): clumps endg MPI_SCAN error') - !end do - !deallocate(gindex_global_mpiscan) - !deallocate(clumps_mpiscan) - - - - ! End temporary testing - + !--------------------------------------------------------------------- ! General error checking that the decomposition data is setup correctly + !--------------------------------------------------------------------- begcid = procinfo%cid(1) endcid = procinfo%cid(clump_pproc) call assert_equal(clumps(begcid)%begg, procinfo%begg, & msg='decompInit_lnd(): clumps(begcid) begg does not match procinfo begg') call assert_equal(clumps(endcid)%endg, procinfo%endg, & msg='decompInit_lnd(): clumps(endcid) endg does not match procinfo endg') - !write(iulog,*) ' iam, clumps ncells = ', iam, clumps(begcid:endcid)%ncells - !write(iulog,*) ' iam, sum( clumps ncells ) = ', iam, sum( clumps(procinfo%cid)%ncells ) - !write(iulog,*) ' iam, proc ncells = ', iam, procinfo%ncells call assert_equal(sum(clumps(procinfo%cid)%ncells), procinfo%ncells, & msg='decompInit_lnd(): sum of clumps ncells does not match procinfo ncells') @@ -503,20 +375,6 @@ subroutine decompInit_lnd_allocate( ier ) call endrun(msg="allocation error for lcid", file=sourcefile, line=__LINE__) return end if - !--------------------------------------------------------------------- - ! ---- Section to remove when ready ---- - !allocate(gdc2glo(numg), stat=ier) - !if (ier /= 0) then - ! call endrun(msg="allocation error for gdc2glo", file=sourcefile, line=__LINE__) - ! return - !end if - !allocate(clumpcnt(nclumps),stat=ier) - !if (ier /= 0) then - ! call endrun(msg="allocation error for clumpcnt", file=sourcefile, line=__LINE__) - ! return - !end if - ! --- Remove to here ----------------- - !--------------------------------------------------------------------- end subroutine decompInit_lnd_allocate From 4e501a9adca018ed7a7502ddb815942acbfdc9f8 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 29 Sep 2025 16:31:48 -0600 Subject: [PATCH 114/141] Initialize the gi/gj arrays to unset --- src/main/decompInitMod.F90 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index a9e9a960eb..04680e1354 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -408,11 +408,13 @@ subroutine decompInit_lnd_gindex_global_allocate( bounds, ier ) call endrun(msg='allocation error for procinfo%gi', file=sourcefile, line=__LINE__) return endif + procinfo%gi(:) = -1 allocate(procinfo%gj(procinfo%begg:procinfo%endg), stat=ier) if (ier /= 0) then call endrun(msg='allocation error for procinfo%gj', file=sourcefile, line=__LINE__) return endif + procinfo%gj(:) = -1 end subroutine decompInit_lnd_gindex_global_allocate !------------------------------------------------------------------------------ From de1ca06caf866de2c72fac3847cf3c7d2ebd44bd Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 27 Jun 2025 16:54:13 -0600 Subject: [PATCH 115/141] Add namelist controls for self testing Conflicts: bld/namelist_files/namelist_definition_ctsm.xml src/cpl/nuopc/lnd_comp_nuopc.F90 src/main/clm_varctl.F90 src/self_tests/SelfTestDriver.F90 --- .../namelist_definition_ctsm.xml | 10 ++++++++++ src/cpl/nuopc/lnd_comp_nuopc.F90 | 18 ++++++++++++++++++ src/main/controlMod.F90 | 6 +++++- src/self_tests/SelfTestDriver.F90 | 8 ++++++++ 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/bld/namelist_files/namelist_definition_ctsm.xml b/bld/namelist_files/namelist_definition_ctsm.xml index 9814fabe5b..c82a8219f1 100644 --- a/bld/namelist_files/namelist_definition_ctsm.xml +++ b/bld/namelist_files/namelist_definition_ctsm.xml @@ -1274,6 +1274,10 @@ For testing whether to bypass the rest of the initialization after the self test For testing whether to bypass most of the run phase other than the clock advance + +Whether to exit early after the initialization self tests are run. This is typically only used in automated tests. + @@ -1281,6 +1285,12 @@ Whether to run some tests of ncdio_pio as part of the model run. This is typically only used in automated tests. + +Whether to run some tests of decompInit (to get the gridcell to MPI task decomposition) as part of the model run. This is +typically only used in automated tests. + + If true, allocate memory for and use a second crop grain pool. This is diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 3db987f2fa..4c07888e84 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -39,6 +39,7 @@ module lnd_comp_nuopc use clm_varctl , only : single_column, clm_varctl_set, iulog use clm_varctl , only : nsrStartup, nsrContinue, nsrBranch use clm_varctl , only : FL => fname_len + use clm_varctl , only : for_testing_exit_after_self_tests use clm_time_manager , only : set_timemgr_init, advance_timestep use clm_time_manager , only : update_rad_dtime use clm_time_manager , only : get_nstep, get_step_size @@ -500,6 +501,12 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) else single_column = .false. end if + if ( for_testing_exit_after_self_tests) then + ! ******************* + ! *** RETURN HERE *** + ! ******************* + RETURN + end if !---------------------------------------------------------------------------- ! Reset shr logging to my log file @@ -676,6 +683,9 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) call t_startf('clm_init2') call initialize2(ni, nj, currtime) call t_stopf('clm_init2') + if (for_testing_exit_after_self_tests) then + RETURN + end if !-------------------------------- ! Create land export state @@ -786,6 +796,9 @@ subroutine ModelAdvance(gcomp, rc) if (single_column .and. .not. scol_valid) then RETURN end if + if (for_testing_exit_after_self_tests) then + RETURN + end if !$ call omp_set_num_threads(nthrds) @@ -1009,6 +1022,7 @@ subroutine ModelSetRunClock(gcomp, rc) rc = ESMF_SUCCESS call ESMF_LogWrite(subname//' called', ESMF_LOGMSG_INFO) if (.not. scol_valid) return + if (for_testing_exit_after_self_tests) return ! query the Component for its clocks call NUOPC_ModelGet(gcomp, driverClock=dclock, modelClock=mclock, rc=rc) @@ -1292,6 +1306,7 @@ subroutine clm_orbital_update(clock, logunit, mastertask, eccen, obliqr, lambm0 end subroutine clm_orbital_update subroutine CheckImport(gcomp, rc) + use clm_varctl, only : for_testing_exit_after_self_tests type(ESMF_GridComp) :: gcomp integer, intent(out) :: rc character(len=*) , parameter :: subname = "("//__FILE__//":CheckImport)" @@ -1320,6 +1335,9 @@ subroutine CheckImport(gcomp, rc) if (single_column .and. .not. scol_valid) then RETURN end if + if (for_testing_exit_after_self_tests) then + RETURN + end if ! The remander of this should be equivalent to the NUOPC internal routine ! from NUOPC_ModeBase.F90 diff --git a/src/main/controlMod.F90 b/src/main/controlMod.F90 index 089503dc8b..e43bac5a69 100644 --- a/src/main/controlMod.F90 +++ b/src/main/controlMod.F90 @@ -211,6 +211,7 @@ subroutine control_init(dtime) snow_thermal_cond_lake_method, snow_cover_fraction_method, & irrigate, run_zero_weight_urban, all_active, & crop_fsat_equals_zero, for_testing_run_ncdiopio_tests, & + for_testing_run_decomp_init_tests, for_testing_exit_after_self_tests, & for_testing_use_second_grain_pool, for_testing_use_repr_structure_pool, & for_testing_no_crop_seed_replenishment, & z0param_method, use_z0m_snowmelt @@ -766,8 +767,11 @@ subroutine control_spmd() ! Crop saturated excess runoff call mpi_bcast(crop_fsat_equals_zero, 1, MPI_LOGICAL, 0, mpicom, ier) - ! Whether to run tests of ncdio_pio + ! Whether to run self tests call mpi_bcast(for_testing_run_ncdiopio_tests, 1, MPI_LOGICAL, 0, mpicom, ier) + call mpi_bcast(for_testing_run_decomp_init_tests, 1, MPI_LOGICAL, 0, mpicom, ier) + + call mpi_bcast(for_testing_exit_after_self_tests, 1, MPI_LOGICAL, 0, mpicom, ier) ! Various flags used for testing infrastructure for having multiple crop reproductive pools call mpi_bcast(for_testing_use_second_grain_pool, 1, MPI_LOGICAL, 0, mpicom, ier) diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index e19fff58bd..97f23c5ae4 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -40,6 +40,14 @@ subroutine self_test_driver(bounds) ! This subroutine should be called all the time, but each set of self tests is only ! run if the appropriate flag is set. ! + ! !USES: + use clm_varctl, only : for_testing_run_ncdiopio_tests, for_testing_run_decomp_init_tests + use clm_varctl, only : for_testing_exit_after_self_tests, iulog + use decompMod, only : bounds_type + use TestNcdioPio, only : test_ncdio_pio + use ESMF, only : ESMF_LogWrite, ESMF_LOGMSG_INFO, ESMF_Finalize + use shr_sys_mod, only : shr_sys_flush + use spmdMod, only : masterproc ! !ARGUMENTS: type(bounds_type), intent(in) :: bounds ! From c1c7ca338d0aea4a27baca4f0f5fc91800e0802f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 29 Aug 2025 09:44:16 -0600 Subject: [PATCH 116/141] Add unit_test_shr directory to the main model build --- cime_config/buildlib | 1 + 1 file changed, 1 insertion(+) diff --git a/cime_config/buildlib b/cime_config/buildlib index a4b853924e..3ce5080dc4 100755 --- a/cime_config/buildlib +++ b/cime_config/buildlib @@ -135,6 +135,7 @@ def _main_func(): os.path.join(lnd_root, "src", "dyn_subgrid"), os.path.join(lnd_root, "src", "init_interp"), os.path.join(lnd_root, "src", "self_tests"), + os.path.join(lnd_root, "src", "unit_test_shr"), os.path.join(lnd_root, "src", "fates"), os.path.join(lnd_root, "src", "fates", "main"), os.path.join(lnd_root, "src", "fates", "biogeophys"), From db4551c608d413c8d7f5d8547af95ca5ddf9d9f3 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Sat, 23 Aug 2025 11:45:50 -0600 Subject: [PATCH 117/141] Merge remote-tracking branch 'escomp/b4b-dev' into decomp_init_for_testing_work Conflicts: cime_config/testdefs/ExpectedTestFails.xml --- cime_config/testdefs/ExpectedTestFails.xml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cime_config/testdefs/ExpectedTestFails.xml b/cime_config/testdefs/ExpectedTestFails.xml index 47dd00c658..32981676e8 100644 --- a/cime_config/testdefs/ExpectedTestFails.xml +++ b/cime_config/testdefs/ExpectedTestFails.xml @@ -381,13 +381,6 @@ - - - FAIL - #3316 - - - From cb8e7ec721e3d055703020d66bfba2c989db66cf Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 21 Aug 2025 21:07:58 -0600 Subject: [PATCH 118/141] Merge remote-tracking branch 'escomp/b4b-dev' into decomp_init_for_testing_work Conflicts: cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm --- .../clm/for_testing_fastsetup_bypassrun/user_nl_clm | 4 ++-- .../testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm index 7334edff3d..573df5c02e 100644 --- a/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm @@ -1,5 +1,5 @@ -! Skip the run phase -for_testing_bypass_run = .true. +! Exit early and bypass the run phase +for_testing_exit_after_self_tests = .true. ! Turn off history, restarts, and output hist_empty_htapes = .true. diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm index 6ee07df73f..3a71b46936 100644 --- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm @@ -8,3 +8,7 @@ for_testing_run_ncdiopio_tests = .true. ! Turn off history, restarts, and output hist_empty_htapes = .true. use_noio = .true. +for_testing_run_decomp_init_tests = .true. + +! Exit initialization phase after the self tests +for_testing_bypass_init = .true. From 09aa5acf5f65ece1aef44c08c93de237f7e53f5c Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 15:20:33 -0600 Subject: [PATCH 119/141] Balance check doesn't take time, so adjust the timers again for part3 --- src/main/clm_initializeMod.F90 | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 4f56ad2284..0051361ae8 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -420,14 +420,28 @@ subroutine initialize2(ni,nj, currtime) ! Initialize instances of all derived types as well as time constant variables call clm_instInit(bounds_proc) + call t_stopf('clm_init2_part3') + call t_startf('clm_init2_snow_soil_init') call CNParamsSetSoilDepth() ! Initialize SNICAR optical and aging parameters call SnowOptics_init( ) ! SNICAR optical parameters: call SnowAge_init( ) ! SNICAR aging parameters: ! Print history field info to standard out +<<<<<<< HEAD call hist_printflds() +||||||| parent of 1bd240844 (Balance check doesn't take time, so adjust the timers again for part3) + if ( .not. use_noio )then + call hist_printflds() + end if + call t_stopf('clm_init2_part3') +======= + if ( .not. use_noio )then + call hist_printflds() + end if + call t_stopf('clm_init2_snow_soil_init') +>>>>>>> 1bd240844 (Balance check doesn't take time, so adjust the timers again for part3) ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV ! and/or dynamic landunits); note that these will be overwritten in a restart run From bf498ab3511d4a744beda8de2049ca06b63be13f Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 15:03:28 -0600 Subject: [PATCH 120/141] Add another timer within part3, and also turn off some of the history stuff in it when use_noio is TRUE Work on reconciling timers and for_testing bypass code from the mpi_scan branch. Conflicts: src/main/clm_initializeMod.F90 --- src/main/clm_initializeMod.F90 | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 0051361ae8..d875cfe98a 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -337,6 +337,7 @@ subroutine initialize2(ni,nj, currtime) ! Run any requested self-tests call self_test_driver(bounds_proc) + if ( .not. for_testing_bypass_init_after_self_tests() )then ! Deallocate surface grid dynamic memory for variables that aren't needed elsewhere. ! Some things are kept until the end of initialize2; urban_valid is kept through the ! end of the run for error checking, pct_urban_max is kept through the end of the run @@ -353,8 +354,9 @@ subroutine initialize2(ni,nj, currtime) allocate(nutrient_competition_method, & source=create_nutrient_competition_method(bounds_proc)) call readParameters(photosyns_inst) - + end if ! End of bypass + ! Self test skipping should still do the time manager initialization ! Initialize time manager if (nsrest == nsrStartup) then call timemgr_init() @@ -380,6 +382,8 @@ subroutine initialize2(ni,nj, currtime) call t_stopf('clm_init2_part2') call t_startf('clm_init2_part3') + if ( .not. for_testing_bypass_init_after_self_tests() )then + ! Initialize Balance checking (after time-manager) call BalanceCheckInit() @@ -420,28 +424,16 @@ subroutine initialize2(ni,nj, currtime) ! Initialize instances of all derived types as well as time constant variables call clm_instInit(bounds_proc) - call t_stopf('clm_init2_part3') - call t_startf('clm_init2_snow_soil_init') call CNParamsSetSoilDepth() ! Initialize SNICAR optical and aging parameters call SnowOptics_init( ) ! SNICAR optical parameters: call SnowAge_init( ) ! SNICAR aging parameters: ! Print history field info to standard out -<<<<<<< HEAD - call hist_printflds() -||||||| parent of 1bd240844 (Balance check doesn't take time, so adjust the timers again for part3) if ( .not. use_noio )then call hist_printflds() end if - call t_stopf('clm_init2_part3') -======= - if ( .not. use_noio )then - call hist_printflds() - end if - call t_stopf('clm_init2_snow_soil_init') ->>>>>>> 1bd240844 (Balance check doesn't take time, so adjust the timers again for part3) ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV ! and/or dynamic landunits); note that these will be overwritten in a restart run @@ -483,7 +475,6 @@ subroutine initialize2(ni,nj, currtime) call bgc_vegetation_inst%Init2(bounds_proc, NLFilename) end if - if ( .not. for_testing_bypass_init_after_self_tests() )then if (use_cn) then ! NOTE(wjs, 2016-02-23) Maybe the rest of the body of this conditional should also @@ -527,7 +518,7 @@ subroutine initialize2(ni,nj, currtime) if (nsrest == nsrContinue ) then call htapes_fieldlist() end if - end if + end if ! End of bypass ! Read restart/initial info is_cold_start = .false. @@ -621,6 +612,8 @@ subroutine initialize2(ni,nj, currtime) call t_stopf('clm_init2_init_interp') end if + if ( .not. for_testing_bypass_init_after_self_tests() )then + ! If requested, reset dynbal baselines ! This needs to happen after reading the restart file (including after reading the ! interpolated restart file, if applicable). @@ -700,7 +693,6 @@ subroutine initialize2(ni,nj, currtime) call hist_htapes_build() end if - if ( .not. for_testing_bypass_init_after_self_tests() )then ! Initialize variables that are associated with accumulated fields. ! The following is called for both initial and restart runs and must ! must be called after the restart file is read @@ -780,7 +772,7 @@ subroutine initialize2(ni,nj, currtime) water_inst%waterdiagnosticbulk_inst, canopystate_inst, & soilstate_inst, soilbiogeochem_carbonflux_inst) end if - end if + end if ! end of bypass ! topo_glc_mec was allocated in initialize1, but needed to be kept around through ! initialize2 because it is used to initialize other variables; now it can be deallocated From 3c540603b7126be9f99df2755429575b4a7a09d1 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 31 Jul 2025 14:08:14 -0600 Subject: [PATCH 121/141] Add timers for clm_initialize2 that cover the whole subroutine --- src/main/clm_initializeMod.F90 | 45 ++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index d875cfe98a..dc8e8b9c9a 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -224,7 +224,16 @@ subroutine initialize2(ni,nj, currtime) character(len=32) :: subname = 'initialize2' ! subroutine name !----------------------------------------------------------------------- +<<<<<<< HEAD call t_startf('clm_init2_part1') +||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) + call t_startf('clm_init2') + +======= + call t_startf('clm_init2') + + call t_startf('clm_init2_part1') +>>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) ! Get processor bounds for gridcells call get_proc_bounds(bounds_proc) begg = bounds_proc%begg; endg = bounds_proc%endg @@ -277,14 +286,20 @@ subroutine initialize2(ni,nj, currtime) call CLMFatesGlobals2() end if +<<<<<<< HEAD call t_stopf('clm_init2_part1') call t_startf('clm_init2_part2') +||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) +======= + call t_stopf('clm_init2_part1') +>>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) ! Determine decomposition of subgrid scale landunits, columns, patches call t_startf('clm_decompInit_clumps') call decompInit_clumps(ni, nj, glc_behavior) call t_stopf('clm_decompInit_clumps') + call t_startf('clm_init2_subgrid') ! *** Get ALL processor bounds - for gridcells, landunit, columns and patches *** call get_proc_bounds(bounds_proc) @@ -306,12 +321,14 @@ subroutine initialize2(ni,nj, currtime) call initGridCells(bounds_clump, glc_behavior) end do !$OMP END PARALLEL DO + call t_stopf('clm_init2_subgrid') ! Set global seg maps for gridcells, landlunits, columns and patches call t_startf('clm_decompInit_glcp') call decompInit_glcp(ni, nj, glc_behavior) call t_stopf('clm_decompInit_glcp') + call t_startf('clm_init2_part2') if (use_hillslope) then ! Initialize hillslope properties call InitHillslope(bounds_proc, hillslope_file) @@ -379,11 +396,17 @@ subroutine initialize2(ni,nj, currtime) caldaym1 = get_curr_calday(offset=-int(dtime), reuse_day_365_for_day_366=.true.) call shr_orb_decl( caldaym1, eccen, mvelpp, lambm0, obliqr, declinm1, eccf ) call InitDaylength(bounds_proc, declin=declin, declinm1=declinm1, obliquity=obliqr) +<<<<<<< HEAD call t_stopf('clm_init2_part2') call t_startf('clm_init2_part3') if ( .not. for_testing_bypass_init_after_self_tests() )then +||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) +======= + call t_stopf('clm_init2_part2') +>>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) + call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) call BalanceCheckInit() @@ -431,10 +454,18 @@ subroutine initialize2(ni,nj, currtime) call SnowAge_init( ) ! SNICAR aging parameters: ! Print history field info to standard out +<<<<<<< HEAD if ( .not. use_noio )then call hist_printflds() end if +||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) + call hist_printflds() +======= + call hist_printflds() + call t_stopf('clm_init2_part3') +>>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) + call t_startf('clm_init2_part4') ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV ! and/or dynamic landunits); note that these will be overwritten in a restart run call init_subgrid_weights_mod(bounds_proc) @@ -555,6 +586,7 @@ subroutine initialize2(ni,nj, currtime) call restFile_read(bounds_proc, fnamer, glc_behavior, & reset_dynbal_baselines_lake_columns = reset_dynbal_baselines_lake_columns) end if + call t_stopf('clm_init2_part4') ! If appropriate, create interpolated initial conditions if (nsrest == nsrStartup .and. finidat_interp_source /= ' ') then @@ -612,8 +644,13 @@ subroutine initialize2(ni,nj, currtime) call t_stopf('clm_init2_init_interp') end if +<<<<<<< HEAD if ( .not. for_testing_bypass_init_after_self_tests() )then +||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) +======= + call t_startf('clm_init2_part5') +>>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) ! If requested, reset dynbal baselines ! This needs to happen after reading the restart file (including after reading the ! interpolated restart file, if applicable). @@ -793,14 +830,22 @@ subroutine initialize2(ni,nj, currtime) write(iulog,'(72a1)') ("*",i=1,60) write(iulog,*) endif +<<<<<<< HEAD +||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) + call t_stopf('init_wlog') +======= + call t_stopf('clm_init2_part5') +>>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) if (water_inst%DoConsistencyCheck()) then + call t_startf('tracer_consistency_check') !$OMP PARALLEL DO PRIVATE (nc, bounds_clump) do nc = 1,nclumps call get_clump_bounds(nc, bounds_clump) call water_inst%TracerConsistencyCheck(bounds_clump, 'end of initialization') end do !$OMP END PARALLEL DO + call t_stopf('tracer_consistency_check') end if call t_stopf('clm_init2_part3') From ce2d68b1fecdfba1bd12e38f141282e13422fa01 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Tue, 2 Sep 2025 16:07:18 -0600 Subject: [PATCH 122/141] Change the test grid total size to 384 so can be divisible by either 128 for Derecho or 48 for Izumi --- src/self_tests/TestDecompInit.F90 | 357 ++++++++++++++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 src/self_tests/TestDecompInit.F90 diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 new file mode 100644 index 0000000000..58b0ddaab2 --- /dev/null +++ b/src/self_tests/TestDecompInit.F90 @@ -0,0 +1,357 @@ +module TestDecompInit + + ! ------------------------------------------------------------------------ + ! !DESCRIPTION: + ! This module contains tests of decomp_init + +#include "shr_assert.h" + use shr_kind_mod, only : r8 => shr_kind_r8, CX => shr_kind_cx + use Assertions, only : assert_equal + use clm_varctl, only : iulog + use abortutils, only : endrun, endrun_init, get_last_endrun_msg + use spmdMod, only : masterproc, npes, iam + use decompInitMod, only : decompInit_lnd, clump_pproc, decompInit_clumps + use clm_InstMod, only : glc_behavior + use decompMod + + implicit none + private + save + + ! Public routines + + public :: test_decomp_init + + ! Module data used in various tests + + ! Make the size of the test grid 384 so that it can be divided by 128 or 48 + ! for the number of tasks per node on Derecho or Izumi. + integer, parameter :: ni = 16, nj = 24 + integer :: amask(ni*nj) + + integer :: default_npes + integer :: default_clump_pproc + + character(len=*), parameter, private :: sourcefile = & + __FILE__ + +contains + + !----------------------------------------------------------------------- + subroutine test_decomp_init() + ! + ! !DESCRIPTION: + ! Drive tests of decomp_init + ! + ! NOTE(wjs, 2020-10-15) Currently, endrun is called when any test assertion fails. I + ! thought about changing this so that, instead, a counter is incremented for each + ! failure, then at the end of the testing (in the higher-level self-test driver), + ! endrun is called if this counter is greater than 0. The benefit of this is that we'd + ! see all test failures, not just the first failure. To do that, we'd need to change + ! the assertions here to increment a counter rather than aborting. However, I'm not + ! spending the time to make this change for now because (1) I'm not sure how much + ! value we'd get from it; (2) even if we made that change, it's still very possible + ! for test code to abort for reasons other than assertions, if something goes wrong + ! inside decomp_init or pio; and (3) some tests here are dependent on earlier tests (for + ! example, the reads depend on the writes having worked), so a failure in an early + ! phase could really muck things up for later testing phases. Migrating to a + ! pFUnit-based unit test would solve this problem, since each pFUnit test is + ! independent, though would prevent us from being able to have dependent tests the + ! way we do here (where reads depend on earlier writes), for better or for worse. + ! + ! !USERS: + use decompInitMod, only : decompInit_clumps, decompInit_glcp + use domainMod, only : ldomain + ! !ARGUMENTS: + ! + ! !LOCAL VARIABLES: + integer, allocatable :: model_amask(:) + !----------------------------------------------------------------------- + + default_npes = npes + default_clump_pproc = clump_pproc + call write_to_log('start_test_decomp_init') + + call write_to_log('test_check_nclumps') + call test_check_nclumps() + call write_to_log('test_decompInit_lnd_abort_on_bad_clump_pproc') + call test_decompInit_lnd_abort_on_bad_clump_pproc() + call write_to_log('test_decompInit_lnd_abort_on_too_big_clump_pproc') + call test_decompInit_lnd_abort_on_too_big_clump_pproc() + call write_to_log('test_decompInit_lnd_abort_when_npes_too_large') + call test_decompInit_lnd_abort_when_npes_too_large() + call write_to_log('test_decompInit_lnd_abort_on_too_small_nsegspc') + call test_decompInit_lnd_abort_on_too_small_nsegspc() + call write_to_log('test_decompInit_lnd_check_sizes') + call test_decompInit_lnd_check_sizes() + call write_to_log('test_decompInit_clump_gcell_info_correct') + call test_decompInit_clump_gcell_info_correct() + ! Comment out for now -- needs some work + !call write_to_log('test_decompMod_get_clump_bounds_correct') + !call test_decompMod_get_clump_bounds_correct() + + ! + ! Call the decompInit initialization series a last time so that decompMod data can still be used + ! + !allocate( model_amask(ldomain%ni*ldomain%nj) ) + !model_amask(:) = 1 + !call decompInit_lnd( ldomain%ni, ldomain%nj, model_amask ) + !call decompInit_clumps(ldomain%ni, ldomain%nj, glc_behavior) + !call decompInit_glcp(ldomain%ni, ldomain%nj, glc_behavior) + !deallocate( model_amask ) + + end subroutine test_decomp_init + + !----------------------------------------------------------------------- + subroutine setup() + use clm_varctl, only : nsegspc + + clump_pproc = default_clump_pproc + nsegspc = 20 + npes = default_npes + amask(:) = 1 ! Set all to land + + end subroutine setup + + !----------------------------------------------------------------------- + subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() + character(len=CX) :: expected_msg, actual_msg + + call setup() + call endrun_init( .true. ) ! Do not abort on endrun for self-tests + clump_pproc = 0 + call write_to_log('decompInit_lnd with clump_pproc=0 should abort') + call decompInit_lnd( ni, nj, amask ) + call write_to_log('check expected abort message') + expected_msg = 'clump_pproc must be greater than 0' + actual_msg = get_last_endrun_msg() + call endrun_init( .false. ) ! Turn back on to abort on the assert + call write_to_log('call assert_equal to check the abort message') + call assert_equal( & + expected=expected_msg, actual=actual_msg, & + msg='decompInit_lnd did not abort with clump_pproc=0' ) + call clean() + end subroutine test_decompInit_lnd_abort_on_bad_clump_pproc + + !----------------------------------------------------------------------- + subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc() + character(len=CX) :: expected_msg, actual_msg + + call setup() + call endrun_init( .true. ) ! Do not abort on endrun for self-tests + amask(:) = 1 ! Set all to land + clump_pproc = (ni * nj + 1) / npes + call write_to_log('decompInit_lnd with clump_pproc too large should abort') + call decompInit_lnd( ni, nj, amask ) + call write_to_log('check expected abort message') + expected_msg = 'Number of clumps exceeds number of land grid cells' + actual_msg = get_last_endrun_msg() + call endrun_init( .false. ) ! Turn back on to abort on the assert + call write_to_log('call assert_equal to check the abort message') + call assert_equal( & + expected=expected_msg, actual=actual_msg, & + msg='decompInit_lnd did not abort with clump_pproc too large' ) + call clean() + end subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc + + !----------------------------------------------------------------------- + subroutine test_decompInit_lnd_check_sizes() + use decompMod, only : get_proc_bounds + type(bounds_type) :: bounds + + integer :: expected_endg, expected_numg + + call setup() + expected_numg = ni*nj + if ( expected_numg < npes )then + call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ ) + end if + if ( modulo( expected_numg, npes ) /= 0 )then + call endrun( msg="npes does not evenly divide into numg so this test will not work", file=sourcefile, line=__LINE__ ) + end if + expected_endg = ni*nj / npes + amask(:) = 1 ! Set all to land + call decompInit_lnd( ni, nj, amask ) + call get_proc_bounds(bounds, allow_errors=.true.) + call assert_equal( bounds%begg, 1, msg='begg is not as expected' ) + call assert_equal( bounds%endg, expected_endg, msg='endg is not as expected' ) + call clean() + end subroutine test_decompInit_lnd_check_sizes + + !----------------------------------------------------------------------- + subroutine test_decompInit_lnd_abort_when_npes_too_large() + character(len=CX) :: expected_msg, actual_msg + + call setup() + ! NOTE: This is arbitrarily modifying the NPES value -- so it MUST be reset set the END! + npes = ni*nj + 1 + + call endrun_init( .true. ) ! Do not abort on endrun for self-tests + amask(:) = 1 ! Set all to land + call write_to_log('decompInit_lnd with npes too large should abort') + call decompInit_lnd( ni, nj, amask ) + call write_to_log('check expected abort message') + expected_msg = 'Number of processes exceeds number of land grid cells' + actual_msg = get_last_endrun_msg() + call endrun_init( .false. ) ! Turn back on to abort on the assert + call write_to_log('call assert_equal to check the abort message') + call assert_equal( & + expected=expected_msg, actual=actual_msg, & + msg='decompInit_lnd did not abort with npes too large' ) + + ! NOTE: Return npes to its original value + npes = default_npes + call clean() + end subroutine test_decompInit_lnd_abort_when_npes_too_large + + !----------------------------------------------------------------------- + subroutine test_decompInit_lnd_abort_on_too_small_nsegspc() + use clm_varctl, only : nsegspc + character(len=CX) :: expected_msg, actual_msg + + call setup() + call endrun_init( .true. ) ! Do not abort on endrun for self-tests + amask(:) = 1 ! Set all to land + nsegspc = 0 + call write_to_log('decompInit_lnd with nsegspc too small should abort') + call decompInit_lnd( ni, nj, amask ) + call write_to_log('check expected abort message') + expected_msg = 'Number of segments per clump (nsegspc) is less than 1 and can NOT be' + actual_msg = get_last_endrun_msg() + call endrun_init( .false. ) ! Turn back on to abort on the assert + call write_to_log('call assert_equal to check the abort message') + call assert_equal( & + expected=expected_msg, actual=actual_msg, & + msg='decompInit_lnd did not abort with too nsegspc too small' ) + call clean() + end subroutine test_decompInit_lnd_abort_on_too_small_nsegspc + + !----------------------------------------------------------------------- + subroutine test_check_nclumps() + integer :: expected_nclumps + + call setup() + call endrun_init( .true. ) ! Do not abort on endrun for self-tests + expected_nclumps = npes / clump_pproc + call assert_equal(expected=expected_nclumps, actual=nclumps, & + msg='nclumps are not as expected') + call endrun_init( .false. ) + call clean() + end subroutine test_check_nclumps + +!----------------------------------------------------------------------- + subroutine test_decompMod_get_clump_bounds_correct() + ! Some testing for get_clump_bounds + use decompMod, only : get_clump_bounds, bounds_type + use unittestSimpleSubgridSetupsMod, only : setup_ncells_single_veg_patch + use unittestSubgridMod, only : unittest_subgrid_teardown + use pftconMod, only : noveg + type(bounds_type) :: bounds + integer :: expected_begg, expected_endg, expected_numg, gcell_per_task + integer :: iclump + + call setup() + ! Now setup a singple grid that's just the full test with every point a single baresoil patch + call setup_ncells_single_veg_patch( ncells=ni*nj, pft_type=noveg ) + clump_pproc = 1 ! Ensure we are just doing this for one clump per proc for now + expected_numg = ni*nj + if ( expected_numg < npes )then + call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ ) + end if + if ( modulo( expected_numg, npes ) /= 0 )then + call endrun( msg="npes does not evenly divide into numg so this test will not work", file=sourcefile, line=__LINE__ ) + end if + gcell_per_task = expected_numg / npes + expected_begg = gcell_per_task * iam + 1 + expected_endg = expected_begg + gcell_per_task + amask(:) = 1 ! Set all to land + call decompInit_lnd( ni, nj, amask ) + call decompInit_clumps( ni, nj, glc_behavior ) + iclump = 1 ! Clump is just 1 since there's only one clump per task + call get_clump_bounds(iclump, bounds) + call assert_equal( bounds%begg, expected_begg, msg='begg is not as expected' ) + call assert_equal( bounds%endg, expected_endg, msg='endg is not as expected' ) + ! Other subgrtid level information will be the same -- since there's only one landunit, column, and patch per gridcell + call assert_equal( bounds%begl, expected_begg, msg='begl is not as expected' ) + call assert_equal( bounds%endl, expected_endg, msg='endl is not as expected' ) + call assert_equal( bounds%begc, expected_begg, msg='begc is not as expected' ) + call assert_equal( bounds%endc, expected_endg, msg='endc is not as expected' ) + call assert_equal( bounds%begp, expected_begg, msg='begp is not as expected' ) + call assert_equal( bounds%endp, expected_endg, msg='endp is not as expected' ) + call unittest_subgrid_teardown( ) + call clean() + end subroutine test_decompMod_get_clump_bounds_correct + + !----------------------------------------------------------------------- + subroutine test_decompInit_clump_gcell_info_correct() + ! Some testing for get_clump_bounds + use decompMod, only : clumps + integer :: expected_gcells, iclump, g, beg_global_index, gcell_per_task + integer :: expected_begg, expected_endg + + call setup() + expected_gcells = ni*nj + if ( expected_gcells < npes )then + call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ ) + end if + if ( modulo( expected_gcells, npes ) /= 0 )then + call endrun( msg="npes does not evenly divide into gcell so this test will not work", file=sourcefile, line=__LINE__ ) + end if + gcell_per_task = expected_gcells / npes + expected_begg = gcell_per_task * iam + 1 + expected_endg = expected_begg + gcell_per_task + amask(:) = 1 ! Set all to land + call decompInit_lnd( ni, nj, amask ) + ! When clump_pproc is one clumps will be the same as PE + call assert_equal( nclumps, npes, msg='nclumps should match numper of proces when clump_pproc is 1' ) + do iclump = 1, nclumps + call assert_equal( clumps(iclump)%owner, iclump-1, msg='clumps owner is not correct' ) + call assert_equal( clumps(iclump)%ncells, gcell_per_task, msg='clumps ncells is not correct' ) + end do + ! Validate gindex_global over the local task + + beg_global_index = gcell_per_task*iam + do g = procinfo%begg, procinfo%endg + call assert_equal( gindex_global(g), g+beg_global_index, msg='clumps owner is not correct' ) + end do + call clean() + end subroutine test_decompInit_clump_gcell_info_correct + + !----------------------------------------------------------------------- + subroutine write_to_log(msg) + ! + ! !DESCRIPTION: + ! Write a message to the log file, just from the masterproc + ! + use shr_sys_mod, only : shr_sys_flush + ! !ARGUMENTS: + character(len=*), intent(in) :: msg + ! + ! !LOCAL VARIABLES: + + character(len=*), parameter :: subname = 'write_to_log' + !----------------------------------------------------------------------- + + if (masterproc) then + write(iulog,'(a)') msg + call shr_sys_flush(iulog) ! Flush the I/O buffers always + end if + + end subroutine write_to_log + + !----------------------------------------------------------------------- + subroutine clean + ! + ! !DESCRIPTION: + ! Do end-of-testing cleanup after each test + ! + ! !ARGUMENTS: + ! + ! !LOCAL VARIABLES: + !----------------------------------------------------------------------- + call decompmod_clean() + + end subroutine clean + + +end module TestDecompInit From 2fc723fbc84737b1864341f07d148988cf188007 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Sat, 6 Sep 2025 14:46:01 -0600 Subject: [PATCH 123/141] Don't do the abort testing if not serial as different tasks won't be in sync and doing so was not working --- src/self_tests/TestDecompInit.F90 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 58b0ddaab2..526e8e5d37 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -117,6 +117,7 @@ end subroutine setup subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() character(len=CX) :: expected_msg, actual_msg + if ( npes > 1 ) return ! error checking testing only works seriallly call setup() call endrun_init( .true. ) ! Do not abort on endrun for self-tests clump_pproc = 0 @@ -137,6 +138,7 @@ end subroutine test_decompInit_lnd_abort_on_bad_clump_pproc subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc() character(len=CX) :: expected_msg, actual_msg + if ( npes > 1 ) return ! error checking testing only works seriallly call setup() call endrun_init( .true. ) ! Do not abort on endrun for self-tests amask(:) = 1 ! Set all to land @@ -182,6 +184,7 @@ end subroutine test_decompInit_lnd_check_sizes subroutine test_decompInit_lnd_abort_when_npes_too_large() character(len=CX) :: expected_msg, actual_msg + if ( npes > 1 ) return ! error checking testing only works seriallly call setup() ! NOTE: This is arbitrarily modifying the NPES value -- so it MUST be reset set the END! npes = ni*nj + 1 @@ -209,6 +212,7 @@ subroutine test_decompInit_lnd_abort_on_too_small_nsegspc() use clm_varctl, only : nsegspc character(len=CX) :: expected_msg, actual_msg + if ( npes > 1 ) return ! error checking testing only works seriallly call setup() call endrun_init( .true. ) ! Do not abort on endrun for self-tests amask(:) = 1 ! Set all to land @@ -286,6 +290,8 @@ end subroutine test_decompMod_get_clump_bounds_correct subroutine test_decompInit_clump_gcell_info_correct() ! Some testing for get_clump_bounds use decompMod, only : clumps + use decompMod, only : get_proc_bounds + type(bounds_type) :: bounds integer :: expected_gcells, iclump, g, beg_global_index, gcell_per_task integer :: expected_begg, expected_endg @@ -308,12 +314,6 @@ subroutine test_decompInit_clump_gcell_info_correct() call assert_equal( clumps(iclump)%owner, iclump-1, msg='clumps owner is not correct' ) call assert_equal( clumps(iclump)%ncells, gcell_per_task, msg='clumps ncells is not correct' ) end do - ! Validate gindex_global over the local task - - beg_global_index = gcell_per_task*iam - do g = procinfo%begg, procinfo%endg - call assert_equal( gindex_global(g), g+beg_global_index, msg='clumps owner is not correct' ) - end do call clean() end subroutine test_decompInit_clump_gcell_info_correct From d8d656bac74999a8778779e1a340f7af09e5ba71 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 8 Sep 2025 14:47:55 -0600 Subject: [PATCH 124/141] Change a test to make it valid for clump_pproc or not --- src/self_tests/TestDecompInit.F90 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 526e8e5d37..7705ea80fc 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -309,7 +309,11 @@ subroutine test_decompInit_clump_gcell_info_correct() amask(:) = 1 ! Set all to land call decompInit_lnd( ni, nj, amask ) ! When clump_pproc is one clumps will be the same as PE - call assert_equal( nclumps, npes, msg='nclumps should match numper of proces when clump_pproc is 1' ) + if ( clump_pproc == 1 ) then + call assert_equal( nclumps, npes, msg='nclumps should match number of processors when clump_pproc is 1' ) + else + call assert_equal( nclumps/clump_pproc, npes, msg='nclumps divided by clump_pproc should match number of processors when clump_pproc > 1' ) + end if do iclump = 1, nclumps call assert_equal( clumps(iclump)%owner, iclump-1, msg='clumps owner is not correct' ) call assert_equal( clumps(iclump)%ncells, gcell_per_task, msg='clumps ncells is not correct' ) From 4ce6b5fea206c982319b8954b02d56edef642133 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Sun, 14 Sep 2025 15:29:25 -0600 Subject: [PATCH 125/141] Just do the checking over the local processor clumps and not all the global clumps --- src/main/decompInitMod.F90 | 3 +++ src/self_tests/TestDecompInit.F90 | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index aa575bd787..cf84a4a7c6 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -574,6 +574,8 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) enddo do n = 1,nclumps + ! Only do the error checking over the local processor + if (clumps(n)%owner == iam) then if (clumps(n)%ncells /= allvecg(n,1) .or. & clumps(n)%nlunits /= allvecg(n,2) .or. & clumps(n)%ncols /= allvecg(n,3) .or. & @@ -588,6 +590,7 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) call endrun(msg=errMsg(sourcefile, __LINE__)) endif + endif enddo deallocate(allvecg,allvecl) diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 index 7705ea80fc..b88b62ce85 100644 --- a/src/self_tests/TestDecompInit.F90 +++ b/src/self_tests/TestDecompInit.F90 @@ -293,7 +293,7 @@ subroutine test_decompInit_clump_gcell_info_correct() use decompMod, only : get_proc_bounds type(bounds_type) :: bounds integer :: expected_gcells, iclump, g, beg_global_index, gcell_per_task - integer :: expected_begg, expected_endg + integer :: expected_begg, expected_endg, lc call setup() expected_gcells = ni*nj @@ -314,8 +314,10 @@ subroutine test_decompInit_clump_gcell_info_correct() else call assert_equal( nclumps/clump_pproc, npes, msg='nclumps divided by clump_pproc should match number of processors when clump_pproc > 1' ) end if - do iclump = 1, nclumps - call assert_equal( clumps(iclump)%owner, iclump-1, msg='clumps owner is not correct' ) + ! Just test over the local clumps + do lc = 1, clump_pproc + iclump = procinfo%cid(lc) + call assert_equal( clumps(iclump)%owner, iam, msg='clumps owner is not correct' ) call assert_equal( clumps(iclump)%ncells, gcell_per_task, msg='clumps ncells is not correct' ) end do call clean() From 6531dcf982fdeba95dcd214de719ae0e4174eac8 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 1 Oct 2025 01:37:38 -0600 Subject: [PATCH 126/141] Remove the uneeded timers and get back to the 3 part timers as they should be --- src/main/clm_initializeMod.F90 | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index eddb87cd5c..07610ff076 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -314,7 +314,6 @@ subroutine initialize2(ni,nj, currtime) call decompInit_glcp(ni, nj, glc_behavior) call t_stopf('clm_decompInit_glcp') - call t_startf('clm_init2_part2') if (use_hillslope) then ! Initialize hillslope properties call InitHillslope(bounds_proc, hillslope_file) @@ -387,7 +386,6 @@ subroutine initialize2(ni,nj, currtime) call t_startf('clm_init2_part3') if ( .not. for_testing_bypass_init_after_self_tests() )then - call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) call BalanceCheckInit() @@ -428,9 +426,7 @@ subroutine initialize2(ni,nj, currtime) ! Initialize instances of all derived types as well as time constant variables call clm_instInit(bounds_proc) - call t_stopf('clm_init2_part3') - call t_startf('clm_init2_snow_soil_init') call CNParamsSetSoilDepth() ! Initialize SNICAR optical and aging parameters call SnowOptics_init( ) ! SNICAR optical parameters: @@ -440,9 +436,7 @@ subroutine initialize2(ni,nj, currtime) if ( .not. use_noio )then call hist_printflds() end if - call t_stopf('clm_init2_snow_soil_init') - call t_startf('clm_init2_part4') ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV ! and/or dynamic landunits); note that these will be overwritten in a restart run call init_subgrid_weights_mod(bounds_proc) @@ -563,7 +557,6 @@ subroutine initialize2(ni,nj, currtime) call restFile_read(bounds_proc, fnamer, glc_behavior, & reset_dynbal_baselines_lake_columns = reset_dynbal_baselines_lake_columns) end if - call t_stopf('clm_init2_part4') ! If appropriate, create interpolated initial conditions if (nsrest == nsrStartup .and. finidat_interp_source /= ' ') then @@ -622,7 +615,6 @@ subroutine initialize2(ni,nj, currtime) end if if ( .not. for_testing_bypass_init_after_self_tests() )then - call t_startf('clm_init2_part5') ! If requested, reset dynbal baselines ! This needs to happen after reading the restart file (including after reading the From 7dc7dcd43bf37a7ca081e0b8918d5b43d4f9033d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 1 Oct 2025 01:51:36 -0600 Subject: [PATCH 127/141] Resolve the conflicts --- src/main/clm_initializeMod.F90 | 37 ---------------------------------- 1 file changed, 37 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index dc8e8b9c9a..715630ba53 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -224,16 +224,7 @@ subroutine initialize2(ni,nj, currtime) character(len=32) :: subname = 'initialize2' ! subroutine name !----------------------------------------------------------------------- -<<<<<<< HEAD call t_startf('clm_init2_part1') -||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) - call t_startf('clm_init2') - -======= - call t_startf('clm_init2') - - call t_startf('clm_init2_part1') ->>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) ! Get processor bounds for gridcells call get_proc_bounds(bounds_proc) begg = bounds_proc%begg; endg = bounds_proc%endg @@ -286,13 +277,8 @@ subroutine initialize2(ni,nj, currtime) call CLMFatesGlobals2() end if -<<<<<<< HEAD call t_stopf('clm_init2_part1') call t_startf('clm_init2_part2') -||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) -======= - call t_stopf('clm_init2_part1') ->>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) ! Determine decomposition of subgrid scale landunits, columns, patches call t_startf('clm_decompInit_clumps') @@ -396,15 +382,10 @@ subroutine initialize2(ni,nj, currtime) caldaym1 = get_curr_calday(offset=-int(dtime), reuse_day_365_for_day_366=.true.) call shr_orb_decl( caldaym1, eccen, mvelpp, lambm0, obliqr, declinm1, eccf ) call InitDaylength(bounds_proc, declin=declin, declinm1=declinm1, obliquity=obliqr) -<<<<<<< HEAD call t_stopf('clm_init2_part2') call t_startf('clm_init2_part3') if ( .not. for_testing_bypass_init_after_self_tests() )then -||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) -======= - call t_stopf('clm_init2_part2') ->>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) @@ -454,16 +435,9 @@ subroutine initialize2(ni,nj, currtime) call SnowAge_init( ) ! SNICAR aging parameters: ! Print history field info to standard out -<<<<<<< HEAD if ( .not. use_noio )then call hist_printflds() end if -||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) - call hist_printflds() -======= - call hist_printflds() - call t_stopf('clm_init2_part3') ->>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) call t_startf('clm_init2_part4') ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV @@ -644,13 +618,8 @@ subroutine initialize2(ni,nj, currtime) call t_stopf('clm_init2_init_interp') end if -<<<<<<< HEAD if ( .not. for_testing_bypass_init_after_self_tests() )then -||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) -======= - call t_startf('clm_init2_part5') ->>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) ! If requested, reset dynbal baselines ! This needs to happen after reading the restart file (including after reading the ! interpolated restart file, if applicable). @@ -830,12 +799,6 @@ subroutine initialize2(ni,nj, currtime) write(iulog,'(72a1)') ("*",i=1,60) write(iulog,*) endif -<<<<<<< HEAD -||||||| parent of 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) - call t_stopf('init_wlog') -======= - call t_stopf('clm_init2_part5') ->>>>>>> 8914b12ab (Add timers for clm_initialize2 that cover the whole subroutine) if (water_inst%DoConsistencyCheck()) then call t_startf('tracer_consistency_check') From 2a46724c30a4d591e134bfe3bdf39132bbcf9db8 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 22 Aug 2025 03:09:37 -0600 Subject: [PATCH 128/141] Remove some of the previous bypassing changes that aren't needed here --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 4c07888e84..451207d287 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -501,12 +501,12 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) else single_column = .false. end if - if ( for_testing_exit_after_self_tests) then + !if ( for_testing_exit_after_self_tests) then ! ******************* ! *** RETURN HERE *** ! ******************* - RETURN - end if + !RETURN + !end if !---------------------------------------------------------------------------- ! Reset shr logging to my log file @@ -796,9 +796,9 @@ subroutine ModelAdvance(gcomp, rc) if (single_column .and. .not. scol_valid) then RETURN end if - if (for_testing_exit_after_self_tests) then - RETURN - end if + !if (for_testing_exit_after_self_tests) then + ! RETURN + !end if !$ call omp_set_num_threads(nthrds) @@ -1022,7 +1022,7 @@ subroutine ModelSetRunClock(gcomp, rc) rc = ESMF_SUCCESS call ESMF_LogWrite(subname//' called', ESMF_LOGMSG_INFO) if (.not. scol_valid) return - if (for_testing_exit_after_self_tests) return + !if (for_testing_exit_after_self_tests) return ! query the Component for its clocks call NUOPC_ModelGet(gcomp, driverClock=dclock, modelClock=mclock, rc=rc) @@ -1335,9 +1335,9 @@ subroutine CheckImport(gcomp, rc) if (single_column .and. .not. scol_valid) then RETURN end if - if (for_testing_exit_after_self_tests) then - RETURN - end if + !if (for_testing_exit_after_self_tests) then + !RETURN + !end if ! The remander of this should be equivalent to the NUOPC internal routine ! from NUOPC_ModeBase.F90 From c95b886cf8857daf335e0cf1011b68344d9b1eae Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 27 Aug 2025 13:34:29 -0600 Subject: [PATCH 129/141] Move bypass code around a bit so that most timers aren't half in/half out, and so that the self-tests can run to completion afterwards Conflicts: src/cpl/nuopc/lnd_comp_nuopc.F90 src/main/clm_initializeMod.F90 --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 8 ++++++++ src/main/clm_instMod.F90 | 5 +---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 451207d287..9dcbf9236c 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -352,6 +352,7 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) use lnd_set_decomp_and_domain , only : lnd_set_decomp_and_domain_from_readmesh use lnd_set_decomp_and_domain , only : lnd_set_mesh_for_single_column use lnd_set_decomp_and_domain , only : lnd_set_decomp_and_domain_for_single_column + use SelfTestDriver , only : for_testing_bypass_init_after_self_tests ! input/output variables type(ESMF_GridComp) :: gcomp @@ -690,10 +691,12 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) !-------------------------------- ! Create land export state !-------------------------------- + if ( .not. for_testing_bypass_init_after_self_tests() ) then call get_proc_bounds(bounds) call export_fields(gcomp, bounds, glc_present, rof_prognostic, & water_inst%waterlnd2atmbulk_inst, lnd2atm_inst, lnd2glc_inst, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return + end if ! Set scalars in export state call State_SetScalar(dble(ldomain%ni), flds_scalar_index_nx, exportState, & @@ -741,6 +744,7 @@ subroutine ModelAdvance(gcomp, rc) use clm_instMod , only : water_inst, atm2lnd_inst, glc2lnd_inst, lnd2atm_inst, lnd2glc_inst use decompMod , only : bounds_type, get_proc_bounds use clm_driver , only : clm_drv + use SelfTestDriver, only : for_testing_bypass_init_after_self_tests ! input/output variables type(ESMF_GridComp) :: gcomp @@ -930,9 +934,13 @@ subroutine ModelAdvance(gcomp, rc) ! Pack export state !-------------------------------- + if ( .not. for_testing_bypass_init_after_self_tests() ) then + call t_startf ('lc_lnd_export') call export_fields(gcomp, bounds, glc_present, rof_prognostic, & water_inst%waterlnd2atmbulk_inst, lnd2atm_inst, lnd2glc_inst, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return + call t_stopf ('lc_lnd_export') + end if !-------------------------------- ! Advance ctsm time step diff --git a/src/main/clm_instMod.F90 b/src/main/clm_instMod.F90 index c8213b75ee..7d9a0f6ad2 100644 --- a/src/main/clm_instMod.F90 +++ b/src/main/clm_instMod.F90 @@ -208,7 +208,6 @@ subroutine clm_instInit(bounds) use HillslopeHydrologyMod , only : SetHillslopeSoilThickness use initVerticalMod , only : setSoilLayerClass use DustEmisFactory , only : create_dust_emissions - use SelfTestDriver , only : for_testing_bypass_init_after_self_tests ! ! !ARGUMENTS type(bounds_type), intent(in) :: bounds ! processor bounds @@ -270,9 +269,7 @@ subroutine clm_instInit(bounds) call humanindex_inst%Init(bounds) ! Initialize urban time varying data - if ( .not. for_testing_bypass_init_after_self_tests() )then - call urbantv_inst%Init(bounds, NLFilename) - end if + call urbantv_inst%Init(bounds, NLFilename) ! Initialize vertical data components From 86382c673cad60be4406caf0c6a8083340a1f78e Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 27 Aug 2025 16:16:33 -0600 Subject: [PATCH 130/141] Also bypass the import fields for_testing option, and move the decompInit_lnd timers to around the calls rather than for the entire subroutine, because the things at the top that may abort will then have a broken timer Conflicts: src/cpl/nuopc/lnd_comp_nuopc.F90 src/main/decompInitMod.F90 --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 9dcbf9236c..e462c3d2ed 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -842,9 +842,13 @@ subroutine ModelAdvance(gcomp, rc) ! Unpack import state !-------------------------------- + if ( .not. for_testing_bypass_init_after_self_tests() ) then + call t_startf ('lc_lnd_import') call import_fields( gcomp, bounds, glc_present, rof_prognostic, & atm2lnd_inst, glc2lnd_inst, water_inst%wateratm2lndbulk_inst, rc ) if (ChkErr(rc,__LINE__,u_FILE_u)) return + call t_stopf ('lc_lnd_import') + end if !-------------------------------- ! Run model From dac0ae0f9a0f606cac176f98c1a4c605054bf7f1 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Fri, 29 Aug 2025 09:45:30 -0600 Subject: [PATCH 131/141] Move the get_proc_bounds to inside the bypass --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index e462c3d2ed..2d0abcd823 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -835,14 +835,14 @@ subroutine ModelAdvance(gcomp, rc) flds_scalar_index_nextsw_cday, nextsw_cday, & flds_scalar_name, flds_scalar_num, rc) - ! Get proc bounds - call get_proc_bounds(bounds) - !-------------------------------- ! Unpack import state !-------------------------------- if ( .not. for_testing_bypass_init_after_self_tests() ) then + ! Get proc bounds for both import and export + call get_proc_bounds(bounds) + call t_startf ('lc_lnd_import') call import_fields( gcomp, bounds, glc_present, rof_prognostic, & atm2lnd_inst, glc2lnd_inst, water_inst%wateratm2lndbulk_inst, rc ) From e867afe76f3e51ed1185a20aa7b25c61be9049a2 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 2 Jul 2025 10:11:49 -0600 Subject: [PATCH 132/141] Changes to exit early when self test namelist option used for_testing_exit_after_self_tests, change the self tests testmod so that its about initialization, this works with a compset with SATM, but hangs -- because nothing stops the run Conflicts: cime_config/testdefs/testmods_dirs/clm/run_self_tests/README cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm src/cpl/nuopc/lnd_comp_nuopc.F90 --- .../clm/run_self_tests/shell_commands | 9 +++++++++ src/cpl/nuopc/lnd_comp_nuopc.F90 | 12 +++++++++++- src/cpl/nuopc/lnd_import_export.F90 | 19 ++++++++++++++----- 3 files changed, 34 insertions(+), 6 deletions(-) create mode 100755 cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands new file mode 100755 index 0000000000..9383f70de0 --- /dev/null +++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands @@ -0,0 +1,9 @@ +#!/bin/bash +./xmlchange CLM_FORCE_COLDSTART="on" + +# We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to every time step +./xmlchange ROF_NCPL=48 + +# Restarts aren't allowed for these tests, and turn off CPL history +./xmlchange REST_OPTION="never" +./xmlchange HIST_OPTION="never" diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 2d0abcd823..4ce0b3d6af 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -81,6 +81,7 @@ module lnd_comp_nuopc logical :: glc_present logical :: rof_prognostic + logical :: atm_present logical :: atm_prognostic integer, parameter :: dbug = 0 character(*),parameter :: modName = "(lnd_comp_nuopc)" @@ -285,6 +286,11 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) else atm_prognostic = .true. end if + if (trim(atm_model) == 'satm') then + atm_present = .false. + else + atm_present = .true. + end if call NUOPC_CompAttributeGet(gcomp, name='GLC_model', value=glc_model, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return if (trim(glc_model) == 'sglc') then @@ -311,6 +317,9 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) write(iulog,'(a )')' rof component = '//trim(rof_model) write(iulog,'(a )')' glc component = '//trim(glc_model) write(iulog,'(a,L2)')' atm_prognostic = ',atm_prognostic + if (.not. atm_present) then + write(iulog,'(a,L2)')' atm_present = ',atm_present + end if write(iulog,'(a,L2)')' rof_prognostic = ',rof_prognostic write(iulog,'(a,L2)')' glc_present = ',glc_present if (glc_present) then @@ -329,7 +338,8 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) call control_setNL("lnd_in"//trim(inst_suffix)) - call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, atm_prognostic, rc) + call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, & + atm_prognostic, atm_present, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return !---------------------------------------------------------------------------- diff --git a/src/cpl/nuopc/lnd_import_export.F90 b/src/cpl/nuopc/lnd_import_export.F90 index 624590b9a6..b8a5efeb8d 100644 --- a/src/cpl/nuopc/lnd_import_export.F90 +++ b/src/cpl/nuopc/lnd_import_export.F90 @@ -156,7 +156,8 @@ module lnd_import_export contains !=============================================================================== - subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, atm_prognostic, rc) + subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, & + atm_prognostic, atm_present, rc) use shr_carma_mod , only : shr_carma_readnl use shr_ndep_mod , only : shr_ndep_readnl @@ -173,6 +174,7 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r logical , intent(in) :: cism_evolve logical , intent(in) :: rof_prognostic logical , intent(in) :: atm_prognostic + logical , intent(in) :: atm_present integer , intent(out) :: rc ! local variables @@ -210,7 +212,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r ! Need to determine if there is no land for single column before the advertise call is done - if (atm_prognostic .or. force_send_to_atm) then + if (.not. atm_present)then + send_to_atm = .false. + else if (atm_prognostic .or. force_send_to_atm) then send_to_atm = .true. else send_to_atm = .false. @@ -253,12 +257,11 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r if (shr_megan_mechcomps_n .ne. megan_nflds) call shr_sys_abort('ERROR: megan field count mismatch') ! CARMA volumetric soil water from land - call shr_carma_readnl('drv_flds_in', carma_fields) ! export to atm call fldlist_add(fldsFrLnd_num, fldsFrlnd, trim(flds_scalar_name)) - call fldlist_add(fldsFrLnd_num, fldsFrlnd, 'Sl_lfrin') if (send_to_atm) then + call fldlist_add(fldsFrLnd_num, fldsFrlnd, 'Sl_lfrin') call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_t ) call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_tref ) call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_qref ) @@ -339,6 +342,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r call fldlist_add(fldsToLnd_num, fldsToLnd, trim(flds_scalar_name)) + !!!!!!!!!!!!!!!!!!!!!!!!!!! new if section !!!!!!!!!!!!!!!!!!!!!!!!!! + if ( atm_present ) then + ! from atm call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_z ) call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_topo ) @@ -389,6 +395,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_co2diag) end if + end if ! atm_present + !!!!!!!!!!!!!!!!!!!!!!!!!!! new if section !!!!!!!!!!!!!!!!!!!!!!!!!! + if (rof_prognostic) then ! from river call fldlist_add(fldsToLnd_num, fldsToLnd, Flrr_flood ) @@ -773,6 +782,7 @@ subroutine export_fields( gcomp, bounds, glc_present, rof_prognostic, & ! output to mediator ! ----------------------- + if (send_to_atm) then call state_setexport_1d(exportState, Sl_lfrin, ldomain%frac(begg:), init_spval=.false., rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -780,7 +790,6 @@ subroutine export_fields( gcomp, bounds, glc_present, rof_prognostic, & ! output to atm ! ----------------------- - if (send_to_atm) then call state_setexport_1d(exportState, Sl_t , lnd2atm_inst%t_rad_grc(begg:), & init_spval=.true., rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return From d19b8940feefccd89b50aa0f5605a5ab1dba0838 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 11 Aug 2025 09:06:36 -0600 Subject: [PATCH 133/141] Add asserts for scalars and also text scalars --- src/self_tests/Assertions.F90.in | 81 ++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/self_tests/Assertions.F90.in b/src/self_tests/Assertions.F90.in index 2a4c8cccc6..4a86929a8a 100644 --- a/src/self_tests/Assertions.F90.in +++ b/src/self_tests/Assertions.F90.in @@ -17,6 +17,12 @@ module Assertions public :: assert_equal interface assert_equal + !TYPE double,int,logical + module procedure assert_equal_0d_{TYPE} + + !TYPE text + module procedure assert_equal_0d_{TYPE} + !TYPE double,int,logical module procedure assert_equal_1d_{TYPE} @@ -30,6 +36,8 @@ module Assertions interface vals_are_equal !TYPE double,int,logical module procedure vals_are_equal_{TYPE} + !TYPE text + module procedure vals_are_equal_{TYPE} end interface vals_are_equal contains @@ -75,6 +83,60 @@ contains end subroutine assert_equal_1d_{TYPE} + !----------------------------------------------------------------------- + !TYPE double,int,logical + subroutine assert_equal_0d_{TYPE}(expected, actual, msg, abs_tol) + ! + ! !DESCRIPTION: + ! Assert scalar values are equal + ! + ! !ARGUMENTS: + {VTYPE}, intent(in) :: expected + {VTYPE}, intent(in) :: actual + character(len=*), intent(in) :: msg + + ! absolute tolerance; if not specified, require exact equality; ignored for logicals + real(r8), intent(in), optional :: abs_tol + ! + ! !LOCAL VARIABLES: + integer :: i + + character(len=*), parameter :: subname = 'assert_equal_0d_{TYPE}' + !----------------------------------------------------------------------- + + if (.not. vals_are_equal(actual, expected, abs_tol)) then + write(iulog,*) 'ERROR in assert_equal: ', msg + write(iulog,*) 'Actual : ', actual + write(iulog,*) 'Expected: ', expected + call endrun('ERROR in assert_equal') + end if + + end subroutine assert_equal_0d_{TYPE} + + !----------------------------------------------------------------------- + !TYPE text + subroutine assert_equal_0d_{TYPE}(expected, actual, msg) + ! + ! !DESCRIPTION: + ! Assert scalar values are equal + ! + ! !ARGUMENTS: + {VTYPE}, intent(in) :: expected + {VTYPE}, intent(in) :: actual + character(len=*), intent(in) :: msg + ! + ! !LOCAL VARIABLES: + !----------------------------------------------------------------------- + + if (.not. vals_are_equal(actual, expected)) then + write(iulog,*) 'ERROR in assert_equal: ', msg + write(iulog,*) 'Actual : ', actual + write(iulog,*) 'Expected: ', expected + call endrun('ERROR in assert_equal') + end if + + end subroutine assert_equal_0d_{TYPE} + !----------------------------------------------------------------------- !TYPE double,int,logical subroutine assert_equal_2d_{TYPE}(expected, actual, msg, abs_tol) @@ -198,4 +260,23 @@ contains end function vals_are_equal_{TYPE} + !----------------------------------------------------------------------- + !TYPE text + function vals_are_equal_{TYPE}(actual, expected) result(vals_equal) + ! + ! !DESCRIPTION: + ! Returns true if actual is the same as expected, false otherwise + ! + ! !ARGUMENTS: + logical :: vals_equal ! function result + {VTYPE}, intent(in) :: actual + {VTYPE}, intent(in) :: expected + ! + ! !LOCAL VARIABLES: + !----------------------------------------------------------------------- + + vals_equal = actual == expected + + end function vals_are_equal_{TYPE} + end module Assertions From a5d5b5cb705be625212fd36539f201ceb9ded2b3 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 1 Oct 2025 09:32:13 -0600 Subject: [PATCH 134/141] Revert most of 2fd081b544 so removing the changes regarding the addition of atm_present and adjustments to how send_to_atm was done --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 11 +---------- src/cpl/nuopc/lnd_import_export.F90 | 16 ++++------------ 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 4ce0b3d6af..173981341d 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -81,7 +81,6 @@ module lnd_comp_nuopc logical :: glc_present logical :: rof_prognostic - logical :: atm_present logical :: atm_prognostic integer, parameter :: dbug = 0 character(*),parameter :: modName = "(lnd_comp_nuopc)" @@ -286,11 +285,6 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) else atm_prognostic = .true. end if - if (trim(atm_model) == 'satm') then - atm_present = .false. - else - atm_present = .true. - end if call NUOPC_CompAttributeGet(gcomp, name='GLC_model', value=glc_model, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return if (trim(glc_model) == 'sglc') then @@ -317,9 +311,6 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) write(iulog,'(a )')' rof component = '//trim(rof_model) write(iulog,'(a )')' glc component = '//trim(glc_model) write(iulog,'(a,L2)')' atm_prognostic = ',atm_prognostic - if (.not. atm_present) then - write(iulog,'(a,L2)')' atm_present = ',atm_present - end if write(iulog,'(a,L2)')' rof_prognostic = ',rof_prognostic write(iulog,'(a,L2)')' glc_present = ',glc_present if (glc_present) then @@ -339,7 +330,7 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, & - atm_prognostic, atm_present, rc) + atm_prognostic, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return !---------------------------------------------------------------------------- diff --git a/src/cpl/nuopc/lnd_import_export.F90 b/src/cpl/nuopc/lnd_import_export.F90 index b8a5efeb8d..909caf094b 100644 --- a/src/cpl/nuopc/lnd_import_export.F90 +++ b/src/cpl/nuopc/lnd_import_export.F90 @@ -157,7 +157,7 @@ module lnd_import_export !=============================================================================== subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, & - atm_prognostic, atm_present, rc) + atm_prognostic, rc) use shr_carma_mod , only : shr_carma_readnl use shr_ndep_mod , only : shr_ndep_readnl @@ -174,7 +174,6 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r logical , intent(in) :: cism_evolve logical , intent(in) :: rof_prognostic logical , intent(in) :: atm_prognostic - logical , intent(in) :: atm_present integer , intent(out) :: rc ! local variables @@ -212,9 +211,7 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r ! Need to determine if there is no land for single column before the advertise call is done - if (.not. atm_present)then - send_to_atm = .false. - else if (atm_prognostic .or. force_send_to_atm) then + if (atm_prognostic .or. force_send_to_atm) then send_to_atm = .true. else send_to_atm = .false. @@ -257,6 +254,7 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r if (shr_megan_mechcomps_n .ne. megan_nflds) call shr_sys_abort('ERROR: megan field count mismatch') ! CARMA volumetric soil water from land + call shr_carma_readnl('drv_flds_in', carma_fields) ! export to atm call fldlist_add(fldsFrLnd_num, fldsFrlnd, trim(flds_scalar_name)) @@ -342,9 +340,6 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r call fldlist_add(fldsToLnd_num, fldsToLnd, trim(flds_scalar_name)) - !!!!!!!!!!!!!!!!!!!!!!!!!!! new if section !!!!!!!!!!!!!!!!!!!!!!!!!! - if ( atm_present ) then - ! from atm call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_z ) call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_topo ) @@ -395,9 +390,6 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_co2diag) end if - end if ! atm_present - !!!!!!!!!!!!!!!!!!!!!!!!!!! new if section !!!!!!!!!!!!!!!!!!!!!!!!!! - if (rof_prognostic) then ! from river call fldlist_add(fldsToLnd_num, fldsToLnd, Flrr_flood ) @@ -782,7 +774,6 @@ subroutine export_fields( gcomp, bounds, glc_present, rof_prognostic, & ! output to mediator ! ----------------------- - if (send_to_atm) then call state_setexport_1d(exportState, Sl_lfrin, ldomain%frac(begg:), init_spval=.false., rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -790,6 +781,7 @@ subroutine export_fields( gcomp, bounds, glc_present, rof_prognostic, & ! output to atm ! ----------------------- + if (send_to_atm) then call state_setexport_1d(exportState, Sl_t , lnd2atm_inst%t_rad_grc(begg:), & init_spval=.true., rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return From b3185c0922640df0b21ccefc58aa2aa43b1c538d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 1 Oct 2025 10:49:11 -0600 Subject: [PATCH 135/141] Move some for_testing namelist items into the selftests driver namelist --- bld/namelist_files/namelist_definition_ctsm.xml | 6 +++--- src/cpl/nuopc/lnd_comp_nuopc.F90 | 6 +++--- src/main/controlMod.F90 | 9 +-------- src/self_tests/SelfTestDriver.F90 | 14 ++++++++++---- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/bld/namelist_files/namelist_definition_ctsm.xml b/bld/namelist_files/namelist_definition_ctsm.xml index c82a8219f1..2b5b4f2e99 100644 --- a/bld/namelist_files/namelist_definition_ctsm.xml +++ b/bld/namelist_files/namelist_definition_ctsm.xml @@ -1275,18 +1275,18 @@ For testing whether to bypass most of the run phase other than the clock advance + group="for_testing_options" > Whether to exit early after the initialization self tests are run. This is typically only used in automated tests. + group="for_testing_options" > Whether to run some tests of ncdio_pio as part of the model run. This is typically only used in automated tests. + group="for_testing_options" > Whether to run some tests of decompInit (to get the gridcell to MPI task decomposition) as part of the model run. This is typically only used in automated tests. diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index 173981341d..a6e7efb054 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -39,7 +39,6 @@ module lnd_comp_nuopc use clm_varctl , only : single_column, clm_varctl_set, iulog use clm_varctl , only : nsrStartup, nsrContinue, nsrBranch use clm_varctl , only : FL => fname_len - use clm_varctl , only : for_testing_exit_after_self_tests use clm_time_manager , only : set_timemgr_init, advance_timestep use clm_time_manager , only : update_rad_dtime use clm_time_manager , only : get_nstep, get_step_size @@ -50,6 +49,7 @@ module lnd_comp_nuopc use lnd_import_export , only : advertise_fields, realize_fields, import_fields, export_fields use lnd_comp_shr , only : mesh, model_meshfile, model_clock use perf_mod , only : t_startf, t_stopf, t_barrierf + use SelfTestDriver , only : for_testing_exit_after_self_tests implicit none private ! except @@ -353,7 +353,8 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) use lnd_set_decomp_and_domain , only : lnd_set_decomp_and_domain_from_readmesh use lnd_set_decomp_and_domain , only : lnd_set_mesh_for_single_column use lnd_set_decomp_and_domain , only : lnd_set_decomp_and_domain_for_single_column - use SelfTestDriver , only : for_testing_bypass_init_after_self_tests + use SelfTestDriver , only : for_testing_bypass_init_after_self_tests, & + for_testing_exit_after_self_tests ! input/output variables type(ESMF_GridComp) :: gcomp @@ -1319,7 +1320,6 @@ subroutine clm_orbital_update(clock, logunit, mastertask, eccen, obliqr, lambm0 end subroutine clm_orbital_update subroutine CheckImport(gcomp, rc) - use clm_varctl, only : for_testing_exit_after_self_tests type(ESMF_GridComp) :: gcomp integer, intent(out) :: rc character(len=*) , parameter :: subname = "("//__FILE__//":CheckImport)" diff --git a/src/main/controlMod.F90 b/src/main/controlMod.F90 index e43bac5a69..4a956e33b2 100644 --- a/src/main/controlMod.F90 +++ b/src/main/controlMod.F90 @@ -210,8 +210,7 @@ subroutine control_init(dtime) snow_thermal_cond_method, snow_thermal_cond_glc_method, & snow_thermal_cond_lake_method, snow_cover_fraction_method, & irrigate, run_zero_weight_urban, all_active, & - crop_fsat_equals_zero, for_testing_run_ncdiopio_tests, & - for_testing_run_decomp_init_tests, for_testing_exit_after_self_tests, & + crop_fsat_equals_zero, & for_testing_use_second_grain_pool, for_testing_use_repr_structure_pool, & for_testing_no_crop_seed_replenishment, & z0param_method, use_z0m_snowmelt @@ -767,12 +766,6 @@ subroutine control_spmd() ! Crop saturated excess runoff call mpi_bcast(crop_fsat_equals_zero, 1, MPI_LOGICAL, 0, mpicom, ier) - ! Whether to run self tests - call mpi_bcast(for_testing_run_ncdiopio_tests, 1, MPI_LOGICAL, 0, mpicom, ier) - call mpi_bcast(for_testing_run_decomp_init_tests, 1, MPI_LOGICAL, 0, mpicom, ier) - - call mpi_bcast(for_testing_exit_after_self_tests, 1, MPI_LOGICAL, 0, mpicom, ier) - ! Various flags used for testing infrastructure for having multiple crop reproductive pools call mpi_bcast(for_testing_use_second_grain_pool, 1, MPI_LOGICAL, 0, mpicom, ier) call mpi_bcast(for_testing_use_repr_structure_pool, 1, MPI_LOGICAL, 0, mpicom, ier) diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90 index 97f23c5ae4..208c60c1db 100644 --- a/src/self_tests/SelfTestDriver.F90 +++ b/src/self_tests/SelfTestDriver.F90 @@ -6,10 +6,10 @@ module SelfTestDriver ! ! See the README file in this directory for a high-level overview of these self-tests. - use clm_varctl, only : for_testing_run_ncdiopio_tests use decompMod, only : bounds_type use TestNcdioPio, only : test_ncdio_pio use abortutils, only : endrun + use clm_varctl, only : iulog implicit none private @@ -25,6 +25,9 @@ module SelfTestDriver ! Private module data logical :: for_testing_bypass_init ! For testing bypass the initialization phase after the self-test driver logical :: for_testing_bypass_run ! For testing bypass most of the run phase except the time advance + logical :: for_testing_run_ncdiopio_tests ! true => run tests of ncdio_pio + logical :: for_testing_run_decomp_init_tests ! true => run tests of decompInit + logical, public :: for_testing_exit_after_self_tests ! true => exit after running self tests character(len=*), parameter, private :: sourcefile = & __FILE__ @@ -41,8 +44,6 @@ subroutine self_test_driver(bounds) ! run if the appropriate flag is set. ! ! !USES: - use clm_varctl, only : for_testing_run_ncdiopio_tests, for_testing_run_decomp_init_tests - use clm_varctl, only : for_testing_exit_after_self_tests, iulog use decompMod, only : bounds_type use TestNcdioPio, only : test_ncdio_pio use ESMF, only : ESMF_LogWrite, ESMF_LOGMSG_INFO, ESMF_Finalize @@ -87,7 +88,9 @@ subroutine self_test_readnml(NLFileName) character(len=*), parameter :: nmlname = 'for_testing_options' !----------------------------------------------------------------------- - namelist /for_testing_options/ for_testing_bypass_init, for_testing_bypass_run + namelist /for_testing_options/ for_testing_bypass_init, for_testing_bypass_run, & + for_testing_run_ncdiopio_tests, for_testing_run_decomp_init_tests, & + for_testing_exit_after_self_tests ! Initialize options to default values, in case they are not specified in ! the namelist @@ -109,6 +112,9 @@ subroutine self_test_readnml(NLFileName) call shr_mpi_bcast (for_testing_bypass_init, mpicom) call shr_mpi_bcast (for_testing_bypass_run, mpicom) + call shr_mpi_bcast(for_testing_run_ncdiopio_tests, mpicom) + call shr_mpi_bcast(for_testing_run_decomp_init_tests, mpicom) + call shr_mpi_bcast(for_testing_exit_after_self_tests, mpicom) if (masterproc) then write(iulog,*) ' ' From 1acf630483c81badfc2435ae0963693cf439b81c Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 1 Oct 2025 01:37:38 -0600 Subject: [PATCH 136/141] Remove the uneeded timers and get back to the 3 part timers as they should be Conflicts: src/main/clm_initializeMod.F90 --- src/main/clm_initializeMod.F90 | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 715630ba53..4ebf88b10c 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -314,7 +314,6 @@ subroutine initialize2(ni,nj, currtime) call decompInit_glcp(ni, nj, glc_behavior) call t_stopf('clm_decompInit_glcp') - call t_startf('clm_init2_part2') if (use_hillslope) then ! Initialize hillslope properties call InitHillslope(bounds_proc, hillslope_file) @@ -386,8 +385,6 @@ subroutine initialize2(ni,nj, currtime) call t_startf('clm_init2_part3') if ( .not. for_testing_bypass_init_after_self_tests() )then - - call t_startf('clm_init2_part3') ! Initialize Balance checking (after time-manager) call BalanceCheckInit() @@ -439,7 +436,6 @@ subroutine initialize2(ni,nj, currtime) call hist_printflds() end if - call t_startf('clm_init2_part4') ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV ! and/or dynamic landunits); note that these will be overwritten in a restart run call init_subgrid_weights_mod(bounds_proc) @@ -560,7 +556,6 @@ subroutine initialize2(ni,nj, currtime) call restFile_read(bounds_proc, fnamer, glc_behavior, & reset_dynbal_baselines_lake_columns = reset_dynbal_baselines_lake_columns) end if - call t_stopf('clm_init2_part4') ! If appropriate, create interpolated initial conditions if (nsrest == nsrStartup .and. finidat_interp_source /= ' ') then From 2636975bd21457620dea28a5c9e622a7b903ccdb Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Wed, 1 Oct 2025 11:07:52 -0600 Subject: [PATCH 137/141] Remove some changes from the baseline code that aren't needed especially some timers accidentally brought in again --- src/cpl/nuopc/lnd_comp_nuopc.F90 | 3 +-- src/cpl/nuopc/lnd_import_export.F90 | 5 ++--- src/main/clm_initializeMod.F90 | 4 ---- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90 index a6e7efb054..3e0fc3c2f7 100644 --- a/src/cpl/nuopc/lnd_comp_nuopc.F90 +++ b/src/cpl/nuopc/lnd_comp_nuopc.F90 @@ -329,8 +329,7 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc) call control_setNL("lnd_in"//trim(inst_suffix)) - call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, & - atm_prognostic, rc) + call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, atm_prognostic, rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return !---------------------------------------------------------------------------- diff --git a/src/cpl/nuopc/lnd_import_export.F90 b/src/cpl/nuopc/lnd_import_export.F90 index 909caf094b..624590b9a6 100644 --- a/src/cpl/nuopc/lnd_import_export.F90 +++ b/src/cpl/nuopc/lnd_import_export.F90 @@ -156,8 +156,7 @@ module lnd_import_export contains !=============================================================================== - subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, & - atm_prognostic, rc) + subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, atm_prognostic, rc) use shr_carma_mod , only : shr_carma_readnl use shr_ndep_mod , only : shr_ndep_readnl @@ -258,8 +257,8 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r ! export to atm call fldlist_add(fldsFrLnd_num, fldsFrlnd, trim(flds_scalar_name)) + call fldlist_add(fldsFrLnd_num, fldsFrlnd, 'Sl_lfrin') if (send_to_atm) then - call fldlist_add(fldsFrLnd_num, fldsFrlnd, 'Sl_lfrin') call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_t ) call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_tref ) call fldlist_add(fldsFrLnd_num, fldsFrlnd, Sl_qref ) diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90 index 4ebf88b10c..0ffa7737a8 100644 --- a/src/main/clm_initializeMod.F90 +++ b/src/main/clm_initializeMod.F90 @@ -285,7 +285,6 @@ subroutine initialize2(ni,nj, currtime) call decompInit_clumps(ni, nj, glc_behavior) call t_stopf('clm_decompInit_clumps') - call t_startf('clm_init2_subgrid') ! *** Get ALL processor bounds - for gridcells, landunit, columns and patches *** call get_proc_bounds(bounds_proc) @@ -307,7 +306,6 @@ subroutine initialize2(ni,nj, currtime) call initGridCells(bounds_clump, glc_behavior) end do !$OMP END PARALLEL DO - call t_stopf('clm_init2_subgrid') ! Set global seg maps for gridcells, landlunits, columns and patches call t_startf('clm_decompInit_glcp') @@ -796,14 +794,12 @@ subroutine initialize2(ni,nj, currtime) endif if (water_inst%DoConsistencyCheck()) then - call t_startf('tracer_consistency_check') !$OMP PARALLEL DO PRIVATE (nc, bounds_clump) do nc = 1,nclumps call get_clump_bounds(nc, bounds_clump) call water_inst%TracerConsistencyCheck(bounds_clump, 'end of initialization') end do !$OMP END PARALLEL DO - call t_stopf('tracer_consistency_check') end if call t_stopf('clm_init2_part3') From def2c979f428d26736004380e7fd90a4f9d38140 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 2 Oct 2025 00:58:04 -0600 Subject: [PATCH 138/141] Remove TestDecompInit for now, bring it in, in another PR --- src/self_tests/TestDecompInit.F90 | 363 ------------------------------ 1 file changed, 363 deletions(-) delete mode 100644 src/self_tests/TestDecompInit.F90 diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90 deleted file mode 100644 index b88b62ce85..0000000000 --- a/src/self_tests/TestDecompInit.F90 +++ /dev/null @@ -1,363 +0,0 @@ -module TestDecompInit - - ! ------------------------------------------------------------------------ - ! !DESCRIPTION: - ! This module contains tests of decomp_init - -#include "shr_assert.h" - use shr_kind_mod, only : r8 => shr_kind_r8, CX => shr_kind_cx - use Assertions, only : assert_equal - use clm_varctl, only : iulog - use abortutils, only : endrun, endrun_init, get_last_endrun_msg - use spmdMod, only : masterproc, npes, iam - use decompInitMod, only : decompInit_lnd, clump_pproc, decompInit_clumps - use clm_InstMod, only : glc_behavior - use decompMod - - implicit none - private - save - - ! Public routines - - public :: test_decomp_init - - ! Module data used in various tests - - ! Make the size of the test grid 384 so that it can be divided by 128 or 48 - ! for the number of tasks per node on Derecho or Izumi. - integer, parameter :: ni = 16, nj = 24 - integer :: amask(ni*nj) - - integer :: default_npes - integer :: default_clump_pproc - - character(len=*), parameter, private :: sourcefile = & - __FILE__ - -contains - - !----------------------------------------------------------------------- - subroutine test_decomp_init() - ! - ! !DESCRIPTION: - ! Drive tests of decomp_init - ! - ! NOTE(wjs, 2020-10-15) Currently, endrun is called when any test assertion fails. I - ! thought about changing this so that, instead, a counter is incremented for each - ! failure, then at the end of the testing (in the higher-level self-test driver), - ! endrun is called if this counter is greater than 0. The benefit of this is that we'd - ! see all test failures, not just the first failure. To do that, we'd need to change - ! the assertions here to increment a counter rather than aborting. However, I'm not - ! spending the time to make this change for now because (1) I'm not sure how much - ! value we'd get from it; (2) even if we made that change, it's still very possible - ! for test code to abort for reasons other than assertions, if something goes wrong - ! inside decomp_init or pio; and (3) some tests here are dependent on earlier tests (for - ! example, the reads depend on the writes having worked), so a failure in an early - ! phase could really muck things up for later testing phases. Migrating to a - ! pFUnit-based unit test would solve this problem, since each pFUnit test is - ! independent, though would prevent us from being able to have dependent tests the - ! way we do here (where reads depend on earlier writes), for better or for worse. - ! - ! !USERS: - use decompInitMod, only : decompInit_clumps, decompInit_glcp - use domainMod, only : ldomain - ! !ARGUMENTS: - ! - ! !LOCAL VARIABLES: - integer, allocatable :: model_amask(:) - !----------------------------------------------------------------------- - - default_npes = npes - default_clump_pproc = clump_pproc - call write_to_log('start_test_decomp_init') - - call write_to_log('test_check_nclumps') - call test_check_nclumps() - call write_to_log('test_decompInit_lnd_abort_on_bad_clump_pproc') - call test_decompInit_lnd_abort_on_bad_clump_pproc() - call write_to_log('test_decompInit_lnd_abort_on_too_big_clump_pproc') - call test_decompInit_lnd_abort_on_too_big_clump_pproc() - call write_to_log('test_decompInit_lnd_abort_when_npes_too_large') - call test_decompInit_lnd_abort_when_npes_too_large() - call write_to_log('test_decompInit_lnd_abort_on_too_small_nsegspc') - call test_decompInit_lnd_abort_on_too_small_nsegspc() - call write_to_log('test_decompInit_lnd_check_sizes') - call test_decompInit_lnd_check_sizes() - call write_to_log('test_decompInit_clump_gcell_info_correct') - call test_decompInit_clump_gcell_info_correct() - ! Comment out for now -- needs some work - !call write_to_log('test_decompMod_get_clump_bounds_correct') - !call test_decompMod_get_clump_bounds_correct() - - ! - ! Call the decompInit initialization series a last time so that decompMod data can still be used - ! - !allocate( model_amask(ldomain%ni*ldomain%nj) ) - !model_amask(:) = 1 - !call decompInit_lnd( ldomain%ni, ldomain%nj, model_amask ) - !call decompInit_clumps(ldomain%ni, ldomain%nj, glc_behavior) - !call decompInit_glcp(ldomain%ni, ldomain%nj, glc_behavior) - !deallocate( model_amask ) - - end subroutine test_decomp_init - - !----------------------------------------------------------------------- - subroutine setup() - use clm_varctl, only : nsegspc - - clump_pproc = default_clump_pproc - nsegspc = 20 - npes = default_npes - amask(:) = 1 ! Set all to land - - end subroutine setup - - !----------------------------------------------------------------------- - subroutine test_decompInit_lnd_abort_on_bad_clump_pproc() - character(len=CX) :: expected_msg, actual_msg - - if ( npes > 1 ) return ! error checking testing only works seriallly - call setup() - call endrun_init( .true. ) ! Do not abort on endrun for self-tests - clump_pproc = 0 - call write_to_log('decompInit_lnd with clump_pproc=0 should abort') - call decompInit_lnd( ni, nj, amask ) - call write_to_log('check expected abort message') - expected_msg = 'clump_pproc must be greater than 0' - actual_msg = get_last_endrun_msg() - call endrun_init( .false. ) ! Turn back on to abort on the assert - call write_to_log('call assert_equal to check the abort message') - call assert_equal( & - expected=expected_msg, actual=actual_msg, & - msg='decompInit_lnd did not abort with clump_pproc=0' ) - call clean() - end subroutine test_decompInit_lnd_abort_on_bad_clump_pproc - - !----------------------------------------------------------------------- - subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc() - character(len=CX) :: expected_msg, actual_msg - - if ( npes > 1 ) return ! error checking testing only works seriallly - call setup() - call endrun_init( .true. ) ! Do not abort on endrun for self-tests - amask(:) = 1 ! Set all to land - clump_pproc = (ni * nj + 1) / npes - call write_to_log('decompInit_lnd with clump_pproc too large should abort') - call decompInit_lnd( ni, nj, amask ) - call write_to_log('check expected abort message') - expected_msg = 'Number of clumps exceeds number of land grid cells' - actual_msg = get_last_endrun_msg() - call endrun_init( .false. ) ! Turn back on to abort on the assert - call write_to_log('call assert_equal to check the abort message') - call assert_equal( & - expected=expected_msg, actual=actual_msg, & - msg='decompInit_lnd did not abort with clump_pproc too large' ) - call clean() - end subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc - - !----------------------------------------------------------------------- - subroutine test_decompInit_lnd_check_sizes() - use decompMod, only : get_proc_bounds - type(bounds_type) :: bounds - - integer :: expected_endg, expected_numg - - call setup() - expected_numg = ni*nj - if ( expected_numg < npes )then - call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ ) - end if - if ( modulo( expected_numg, npes ) /= 0 )then - call endrun( msg="npes does not evenly divide into numg so this test will not work", file=sourcefile, line=__LINE__ ) - end if - expected_endg = ni*nj / npes - amask(:) = 1 ! Set all to land - call decompInit_lnd( ni, nj, amask ) - call get_proc_bounds(bounds, allow_errors=.true.) - call assert_equal( bounds%begg, 1, msg='begg is not as expected' ) - call assert_equal( bounds%endg, expected_endg, msg='endg is not as expected' ) - call clean() - end subroutine test_decompInit_lnd_check_sizes - - !----------------------------------------------------------------------- - subroutine test_decompInit_lnd_abort_when_npes_too_large() - character(len=CX) :: expected_msg, actual_msg - - if ( npes > 1 ) return ! error checking testing only works seriallly - call setup() - ! NOTE: This is arbitrarily modifying the NPES value -- so it MUST be reset set the END! - npes = ni*nj + 1 - - call endrun_init( .true. ) ! Do not abort on endrun for self-tests - amask(:) = 1 ! Set all to land - call write_to_log('decompInit_lnd with npes too large should abort') - call decompInit_lnd( ni, nj, amask ) - call write_to_log('check expected abort message') - expected_msg = 'Number of processes exceeds number of land grid cells' - actual_msg = get_last_endrun_msg() - call endrun_init( .false. ) ! Turn back on to abort on the assert - call write_to_log('call assert_equal to check the abort message') - call assert_equal( & - expected=expected_msg, actual=actual_msg, & - msg='decompInit_lnd did not abort with npes too large' ) - - ! NOTE: Return npes to its original value - npes = default_npes - call clean() - end subroutine test_decompInit_lnd_abort_when_npes_too_large - - !----------------------------------------------------------------------- - subroutine test_decompInit_lnd_abort_on_too_small_nsegspc() - use clm_varctl, only : nsegspc - character(len=CX) :: expected_msg, actual_msg - - if ( npes > 1 ) return ! error checking testing only works seriallly - call setup() - call endrun_init( .true. ) ! Do not abort on endrun for self-tests - amask(:) = 1 ! Set all to land - nsegspc = 0 - call write_to_log('decompInit_lnd with nsegspc too small should abort') - call decompInit_lnd( ni, nj, amask ) - call write_to_log('check expected abort message') - expected_msg = 'Number of segments per clump (nsegspc) is less than 1 and can NOT be' - actual_msg = get_last_endrun_msg() - call endrun_init( .false. ) ! Turn back on to abort on the assert - call write_to_log('call assert_equal to check the abort message') - call assert_equal( & - expected=expected_msg, actual=actual_msg, & - msg='decompInit_lnd did not abort with too nsegspc too small' ) - call clean() - end subroutine test_decompInit_lnd_abort_on_too_small_nsegspc - - !----------------------------------------------------------------------- - subroutine test_check_nclumps() - integer :: expected_nclumps - - call setup() - call endrun_init( .true. ) ! Do not abort on endrun for self-tests - expected_nclumps = npes / clump_pproc - call assert_equal(expected=expected_nclumps, actual=nclumps, & - msg='nclumps are not as expected') - call endrun_init( .false. ) - call clean() - end subroutine test_check_nclumps - -!----------------------------------------------------------------------- - subroutine test_decompMod_get_clump_bounds_correct() - ! Some testing for get_clump_bounds - use decompMod, only : get_clump_bounds, bounds_type - use unittestSimpleSubgridSetupsMod, only : setup_ncells_single_veg_patch - use unittestSubgridMod, only : unittest_subgrid_teardown - use pftconMod, only : noveg - type(bounds_type) :: bounds - integer :: expected_begg, expected_endg, expected_numg, gcell_per_task - integer :: iclump - - call setup() - ! Now setup a singple grid that's just the full test with every point a single baresoil patch - call setup_ncells_single_veg_patch( ncells=ni*nj, pft_type=noveg ) - clump_pproc = 1 ! Ensure we are just doing this for one clump per proc for now - expected_numg = ni*nj - if ( expected_numg < npes )then - call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ ) - end if - if ( modulo( expected_numg, npes ) /= 0 )then - call endrun( msg="npes does not evenly divide into numg so this test will not work", file=sourcefile, line=__LINE__ ) - end if - gcell_per_task = expected_numg / npes - expected_begg = gcell_per_task * iam + 1 - expected_endg = expected_begg + gcell_per_task - amask(:) = 1 ! Set all to land - call decompInit_lnd( ni, nj, amask ) - call decompInit_clumps( ni, nj, glc_behavior ) - iclump = 1 ! Clump is just 1 since there's only one clump per task - call get_clump_bounds(iclump, bounds) - call assert_equal( bounds%begg, expected_begg, msg='begg is not as expected' ) - call assert_equal( bounds%endg, expected_endg, msg='endg is not as expected' ) - ! Other subgrtid level information will be the same -- since there's only one landunit, column, and patch per gridcell - call assert_equal( bounds%begl, expected_begg, msg='begl is not as expected' ) - call assert_equal( bounds%endl, expected_endg, msg='endl is not as expected' ) - call assert_equal( bounds%begc, expected_begg, msg='begc is not as expected' ) - call assert_equal( bounds%endc, expected_endg, msg='endc is not as expected' ) - call assert_equal( bounds%begp, expected_begg, msg='begp is not as expected' ) - call assert_equal( bounds%endp, expected_endg, msg='endp is not as expected' ) - call unittest_subgrid_teardown( ) - call clean() - end subroutine test_decompMod_get_clump_bounds_correct - - !----------------------------------------------------------------------- - subroutine test_decompInit_clump_gcell_info_correct() - ! Some testing for get_clump_bounds - use decompMod, only : clumps - use decompMod, only : get_proc_bounds - type(bounds_type) :: bounds - integer :: expected_gcells, iclump, g, beg_global_index, gcell_per_task - integer :: expected_begg, expected_endg, lc - - call setup() - expected_gcells = ni*nj - if ( expected_gcells < npes )then - call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ ) - end if - if ( modulo( expected_gcells, npes ) /= 0 )then - call endrun( msg="npes does not evenly divide into gcell so this test will not work", file=sourcefile, line=__LINE__ ) - end if - gcell_per_task = expected_gcells / npes - expected_begg = gcell_per_task * iam + 1 - expected_endg = expected_begg + gcell_per_task - amask(:) = 1 ! Set all to land - call decompInit_lnd( ni, nj, amask ) - ! When clump_pproc is one clumps will be the same as PE - if ( clump_pproc == 1 ) then - call assert_equal( nclumps, npes, msg='nclumps should match number of processors when clump_pproc is 1' ) - else - call assert_equal( nclumps/clump_pproc, npes, msg='nclumps divided by clump_pproc should match number of processors when clump_pproc > 1' ) - end if - ! Just test over the local clumps - do lc = 1, clump_pproc - iclump = procinfo%cid(lc) - call assert_equal( clumps(iclump)%owner, iam, msg='clumps owner is not correct' ) - call assert_equal( clumps(iclump)%ncells, gcell_per_task, msg='clumps ncells is not correct' ) - end do - call clean() - end subroutine test_decompInit_clump_gcell_info_correct - - !----------------------------------------------------------------------- - subroutine write_to_log(msg) - ! - ! !DESCRIPTION: - ! Write a message to the log file, just from the masterproc - ! - use shr_sys_mod, only : shr_sys_flush - ! !ARGUMENTS: - character(len=*), intent(in) :: msg - ! - ! !LOCAL VARIABLES: - - character(len=*), parameter :: subname = 'write_to_log' - !----------------------------------------------------------------------- - - if (masterproc) then - write(iulog,'(a)') msg - call shr_sys_flush(iulog) ! Flush the I/O buffers always - end if - - end subroutine write_to_log - - !----------------------------------------------------------------------- - subroutine clean - ! - ! !DESCRIPTION: - ! Do end-of-testing cleanup after each test - ! - ! !ARGUMENTS: - ! - ! !LOCAL VARIABLES: - !----------------------------------------------------------------------- - call decompmod_clean() - - end subroutine clean - - -end module TestDecompInit From bf947fbd778df6d8ed3325ad840bc9c687bd4b6d Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 2 Oct 2025 00:58:32 -0600 Subject: [PATCH 139/141] Remove the update to Assertions and bring it in, in another PR --- src/self_tests/Assertions.F90.in | 81 -------------------------------- 1 file changed, 81 deletions(-) diff --git a/src/self_tests/Assertions.F90.in b/src/self_tests/Assertions.F90.in index 4a86929a8a..2a4c8cccc6 100644 --- a/src/self_tests/Assertions.F90.in +++ b/src/self_tests/Assertions.F90.in @@ -17,12 +17,6 @@ module Assertions public :: assert_equal interface assert_equal - !TYPE double,int,logical - module procedure assert_equal_0d_{TYPE} - - !TYPE text - module procedure assert_equal_0d_{TYPE} - !TYPE double,int,logical module procedure assert_equal_1d_{TYPE} @@ -36,8 +30,6 @@ module Assertions interface vals_are_equal !TYPE double,int,logical module procedure vals_are_equal_{TYPE} - !TYPE text - module procedure vals_are_equal_{TYPE} end interface vals_are_equal contains @@ -83,60 +75,6 @@ contains end subroutine assert_equal_1d_{TYPE} - !----------------------------------------------------------------------- - !TYPE double,int,logical - subroutine assert_equal_0d_{TYPE}(expected, actual, msg, abs_tol) - ! - ! !DESCRIPTION: - ! Assert scalar values are equal - ! - ! !ARGUMENTS: - {VTYPE}, intent(in) :: expected - {VTYPE}, intent(in) :: actual - character(len=*), intent(in) :: msg - - ! absolute tolerance; if not specified, require exact equality; ignored for logicals - real(r8), intent(in), optional :: abs_tol - ! - ! !LOCAL VARIABLES: - integer :: i - - character(len=*), parameter :: subname = 'assert_equal_0d_{TYPE}' - !----------------------------------------------------------------------- - - if (.not. vals_are_equal(actual, expected, abs_tol)) then - write(iulog,*) 'ERROR in assert_equal: ', msg - write(iulog,*) 'Actual : ', actual - write(iulog,*) 'Expected: ', expected - call endrun('ERROR in assert_equal') - end if - - end subroutine assert_equal_0d_{TYPE} - - !----------------------------------------------------------------------- - !TYPE text - subroutine assert_equal_0d_{TYPE}(expected, actual, msg) - ! - ! !DESCRIPTION: - ! Assert scalar values are equal - ! - ! !ARGUMENTS: - {VTYPE}, intent(in) :: expected - {VTYPE}, intent(in) :: actual - character(len=*), intent(in) :: msg - ! - ! !LOCAL VARIABLES: - !----------------------------------------------------------------------- - - if (.not. vals_are_equal(actual, expected)) then - write(iulog,*) 'ERROR in assert_equal: ', msg - write(iulog,*) 'Actual : ', actual - write(iulog,*) 'Expected: ', expected - call endrun('ERROR in assert_equal') - end if - - end subroutine assert_equal_0d_{TYPE} - !----------------------------------------------------------------------- !TYPE double,int,logical subroutine assert_equal_2d_{TYPE}(expected, actual, msg, abs_tol) @@ -260,23 +198,4 @@ contains end function vals_are_equal_{TYPE} - !----------------------------------------------------------------------- - !TYPE text - function vals_are_equal_{TYPE}(actual, expected) result(vals_equal) - ! - ! !DESCRIPTION: - ! Returns true if actual is the same as expected, false otherwise - ! - ! !ARGUMENTS: - logical :: vals_equal ! function result - {VTYPE}, intent(in) :: actual - {VTYPE}, intent(in) :: expected - ! - ! !LOCAL VARIABLES: - !----------------------------------------------------------------------- - - vals_equal = actual == expected - - end function vals_are_equal_{TYPE} - end module Assertions From 80192dca65e14b6d78d9561aba4c32ce9b0b45ee Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Thu, 2 Oct 2025 01:21:19 -0600 Subject: [PATCH 140/141] Remove update in DecompInitMod for now --- src/main/decompInitMod.F90 | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90 index cf84a4a7c6..aa575bd787 100644 --- a/src/main/decompInitMod.F90 +++ b/src/main/decompInitMod.F90 @@ -574,8 +574,6 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) enddo do n = 1,nclumps - ! Only do the error checking over the local processor - if (clumps(n)%owner == iam) then if (clumps(n)%ncells /= allvecg(n,1) .or. & clumps(n)%nlunits /= allvecg(n,2) .or. & clumps(n)%ncols /= allvecg(n,3) .or. & @@ -590,7 +588,6 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior) call endrun(msg=errMsg(sourcefile, __LINE__)) endif - endif enddo deallocate(allvecg,allvecl) From 77126f09839b0556dbd1cd807f14e56e6cdb9523 Mon Sep 17 00:00:00 2001 From: Erik Kluzek Date: Mon, 3 Nov 2025 14:43:12 -0700 Subject: [PATCH 141/141] Remove the for_testing_bypass_init namelist option as the exit after self-tests is what really needs to be done --- bld/namelist_files/namelist_definition_ctsm.xml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/bld/namelist_files/namelist_definition_ctsm.xml b/bld/namelist_files/namelist_definition_ctsm.xml index 2b5b4f2e99..7f893ff8c2 100644 --- a/bld/namelist_files/namelist_definition_ctsm.xml +++ b/bld/namelist_files/namelist_definition_ctsm.xml @@ -1264,11 +1264,6 @@ Whether snow on the vegetation canopy affects the radiation/albedo calculations - -For testing whether to bypass the rest of the initialization after the self test driver is run - - For testing whether to bypass most of the run phase other than the clock advance