diff --git a/.gitmodules b/.gitmodules
index 2abc02aa48..e1383dcc45 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -99,8 +99,11 @@ fxDONOTUSEurl = https://github.com/ESCOMP/CDEPS.git
 
 [submodule "share"]
 path = share
-url = https://github.com/ESCOMP/CESM_share
-fxtag = share1.1.9
+#url = https://github.com/ESCOMP/CESM_share
+url = https://github.com/ekluzek/CESM_share
+#fxtag = share1.1.9
+#fxtag = add_jdennis_procstatus_module
+fxtag = 1a871cad0a90f8a361196f045313cca1919c7cbc
 fxrequired = ToplevelRequired
 # Standard Fork to compare to with "git fleximod test" to ensure personal forks aren't committed
 fxDONOTUSEurl = https://github.com/ESCOMP/CESM_share
diff --git a/bld/CLMBuildNamelist.pm b/bld/CLMBuildNamelist.pm
index 8cc5b25187..c0057883a6 100755
--- a/bld/CLMBuildNamelist.pm
+++ b/bld/CLMBuildNamelist.pm
@@ -5319,6 +5319,7 @@ sub write_output_files {
   push @groups, "clm_canopy_inparm";
   push @groups, "prigentroughness";
   push @groups, "zendersoilerod";
+  push @groups, "for_testing_options";
   if (remove_leading_and_trailing_quotes($nl->get_value('snow_cover_fraction_method')) eq 'SwensonLawrence2012') {
      push @groups, "scf_swenson_lawrence_2012_inparm";
   }
diff --git a/bld/namelist_files/namelist_definition_ctsm.xml b/bld/namelist_files/namelist_definition_ctsm.xml
index 69a243bd27..7f893ff8c2 100644
--- a/bld/namelist_files/namelist_definition_ctsm.xml
+++ b/bld/namelist_files/namelist_definition_ctsm.xml
@@ -1259,12 +1259,33 @@ Whether to use subgrid fluxes for snow
 Whether snow on the vegetation canopy affects the radiation/albedo calculations
 </entry>
 
+<!-- ========================================================================================  -->
+<!-- Namelist items for doing specific things for testing                                      -->
+<!-- for_testing section:                                                                      -->
+<!-- ========================================================================================  -->
+
+<entry id="for_testing_bypass_run" type="logical" category="default_settings"
+       group="for_testing_options" >
+For testing whether to bypass most of the run phase other than the clock advance
+</entry>
+
+<entry id="for_testing_exit_after_self_tests" type="logical" category="default_settings"
+       group="for_testing_options" >
+Whether to exit early after the initialization self tests are run. This is typically only used in automated tests.
+</entry>
+
 <entry id="for_testing_run_ncdiopio_tests" type="logical" category="default_settings"
-       group="clm_inparm" >
+       group="for_testing_options" >
 Whether to run some tests of ncdio_pio as part of the model run. This is
 typically only used in automated tests.
 </entry>
 
+<entry id="for_testing_run_decomp_init_tests" type="logical" category="default_settings"
+       group="for_testing_options" >
+Whether to run some tests of decompInit (to get the gridcell to MPI task decomposition) as part of the model run. This is
+typically only used in automated tests.
+</entry>
+
 <entry id="for_testing_use_second_grain_pool" type="logical" category="default_settings"
        group="clm_inparm" >
 If true, allocate memory for and use a second crop grain pool. This is
diff --git a/cime_config/buildlib b/cime_config/buildlib
index a4b853924e..3ce5080dc4 100755
--- a/cime_config/buildlib
+++ b/cime_config/buildlib
@@ -135,6 +135,7 @@ def _main_func():
                 os.path.join(lnd_root, "src", "dyn_subgrid"),
                 os.path.join(lnd_root, "src", "init_interp"),
                 os.path.join(lnd_root, "src", "self_tests"),
+                os.path.join(lnd_root, "src", "unit_test_shr"),
                 os.path.join(lnd_root, "src", "fates"),
                 os.path.join(lnd_root, "src", "fates", "main"),
                 os.path.join(lnd_root, "src", "fates", "biogeophys"),
diff --git a/cime_config/testdefs/ExpectedTestFails.xml b/cime_config/testdefs/ExpectedTestFails.xml
index 1acc725738..0abbe99972 100644
--- a/cime_config/testdefs/ExpectedTestFails.xml
+++ b/cime_config/testdefs/ExpectedTestFails.xml
@@ -390,13 +390,6 @@
 
   <!-- decomp_init test list-->
 
-  <test name="SMS_Ln1_PL.mpasa15_mpasa15.I2000Clm45Sp.derecho_intel.clm-run_self_tests">
-    <phase name="RUN">
-      <status>FAIL</status>
-      <issue>#3316</issue>
-    </phase>
-  </test>
-
   <!-- interim_restart (and aux_clm) test list-->
 
   <test name="ERR_Ld7.f10_f10_mg37.I2000Clm50BgcCropRtm.derecho_gnu.clm-default">
diff --git a/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm
index c2a2d14793..573df5c02e 100644
--- a/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm
+++ b/cime_config/testdefs/testmods_dirs/clm/for_testing_fastsetup_bypassrun/user_nl_clm
@@ -1,3 +1,6 @@
+! Exit early and bypass the run phase
+for_testing_exit_after_self_tests = .true.
+
 ! Turn off history, restarts, and output
 hist_empty_htapes = .true.
 use_noio = .true.
diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands
new file mode 100755
index 0000000000..9383f70de0
--- /dev/null
+++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/shell_commands
@@ -0,0 +1,9 @@
+#!/bin/bash
+./xmlchange CLM_FORCE_COLDSTART="on"
+
+# We use this testmod in a _Ln1 test; this requires forcing the ROF coupling frequency to every time step
+./xmlchange ROF_NCPL=48
+
+# Restarts aren't allowed for these tests, and turn off CPL history
+./xmlchange REST_OPTION="never"
+./xmlchange HIST_OPTION="never"
diff --git a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm
index 9e8e0fcd04..c1ac6a7174 100644
--- a/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm
+++ b/cime_config/testdefs/testmods_dirs/clm/run_self_tests/user_nl_clm
@@ -1,5 +1,15 @@
+! Bypass as much of the init phase as can be done
+! Bypassing the run phase already was inherited from the for_testing_fastsetup_bypassrun testmod
+for_testing_bypass_init = .true.
+
+! Turn on some of the self tests
 for_testing_run_ncdiopio_tests = .true.
+for_testing_run_decomp_init_tests = .true.
 
 ! Turn off history, restarts, and output
 hist_empty_htapes = .true.
 use_noio = .true.
+for_testing_run_decomp_init_tests = .true.
+
+! Exit initialization phase after the self tests
+for_testing_bypass_init = .true.
diff --git a/share b/share
index 14338bef3f..1a871cad0a 160000
--- a/share
+++ b/share
@@ -1 +1 @@
-Subproject commit 14338bef3fa604d49160e376257264db1d3313e5
+Subproject commit 1a871cad0a90f8a361196f045313cca1919c7cbc
diff --git a/src/cpl/nuopc/lnd_comp_nuopc.F90 b/src/cpl/nuopc/lnd_comp_nuopc.F90
index 3db987f2fa..1ba75e7845 100644
--- a/src/cpl/nuopc/lnd_comp_nuopc.F90
+++ b/src/cpl/nuopc/lnd_comp_nuopc.F90
@@ -39,6 +39,7 @@ module lnd_comp_nuopc
   use clm_varctl             , only : single_column, clm_varctl_set, iulog
   use clm_varctl             , only : nsrStartup, nsrContinue, nsrBranch
   use clm_varctl             , only : FL => fname_len
+  use clm_varctl             , only : for_testing_exit_after_self_tests
   use clm_time_manager       , only : set_timemgr_init, advance_timestep
   use clm_time_manager       , only : update_rad_dtime
   use clm_time_manager       , only : get_nstep, get_step_size
@@ -49,6 +50,7 @@ module lnd_comp_nuopc
   use lnd_import_export      , only : advertise_fields, realize_fields, import_fields, export_fields
   use lnd_comp_shr           , only : mesh, model_meshfile, model_clock
   use perf_mod               , only : t_startf, t_stopf, t_barrierf
+  use SelfTestDriver         , only : for_testing_exit_after_self_tests
 
   implicit none
   private ! except
@@ -80,6 +82,7 @@ module lnd_comp_nuopc
 
   logical                :: glc_present
   logical                :: rof_prognostic
+  logical                :: atm_present
   logical                :: atm_prognostic
   integer, parameter     :: dbug = 0
   character(*),parameter :: modName =  "(lnd_comp_nuopc)"
@@ -284,6 +287,11 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc)
     else
        atm_prognostic = .true.
     end if
+    if (trim(atm_model) == 'satm') then
+       atm_present = .false.
+    else
+       atm_present = .true.
+    end if
     call NUOPC_CompAttributeGet(gcomp, name='GLC_model', value=glc_model, rc=rc)
     if (ChkErr(rc,__LINE__,u_FILE_u)) return
     if (trim(glc_model) == 'sglc') then
@@ -310,6 +318,9 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc)
        write(iulog,'(a   )')' rof component                 = '//trim(rof_model)
        write(iulog,'(a   )')' glc component                 = '//trim(glc_model)
        write(iulog,'(a,L2)')' atm_prognostic                = ',atm_prognostic
+       if (.not. atm_present) then
+          write(iulog,'(a,L2)')' atm_present                  = ',atm_present
+       end if
        write(iulog,'(a,L2)')' rof_prognostic                = ',rof_prognostic
        write(iulog,'(a,L2)')' glc_present                   = ',glc_present
        if (glc_present) then
@@ -328,7 +339,8 @@ subroutine InitializeAdvertise(gcomp, importState, exportState, clock, rc)
     call control_setNL("lnd_in"//trim(inst_suffix))
 
 
-    call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, atm_prognostic, rc)
+    call advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, &
+                          atm_prognostic, atm_present, rc)
     if (ChkErr(rc,__LINE__,u_FILE_u)) return
 
     !----------------------------------------------------------------------------
@@ -351,6 +363,8 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc)
     use lnd_set_decomp_and_domain , only : lnd_set_decomp_and_domain_from_readmesh
     use lnd_set_decomp_and_domain , only : lnd_set_mesh_for_single_column
     use lnd_set_decomp_and_domain , only : lnd_set_decomp_and_domain_for_single_column
+    use SelfTestDriver            , only : for_testing_bypass_init_after_self_tests, &
+                                           for_testing_exit_after_self_tests
 
     ! input/output variables
     type(ESMF_GridComp)  :: gcomp
@@ -500,6 +514,12 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc)
     else
        single_column = .false.
     end if
+    !if ( for_testing_exit_after_self_tests) then
+       ! *******************
+       ! *** RETURN HERE ***
+       ! *******************
+       !RETURN
+    !end if
 
     !----------------------------------------------------------------------------
     ! Reset shr logging to my log file
@@ -676,14 +696,19 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc)
     call t_startf('clm_init2')
     call initialize2(ni, nj, currtime)
     call t_stopf('clm_init2')
+    if (for_testing_exit_after_self_tests) then
+       RETURN
+    end if
 
     !--------------------------------
     ! Create land export state
     !--------------------------------
+    if ( .not. for_testing_bypass_init_after_self_tests() ) then
     call get_proc_bounds(bounds)
     call export_fields(gcomp, bounds, glc_present, rof_prognostic, &
          water_inst%waterlnd2atmbulk_inst, lnd2atm_inst, lnd2glc_inst, rc)
     if (ChkErr(rc,__LINE__,u_FILE_u)) return
+    end if
 
     ! Set scalars in export state
     call State_SetScalar(dble(ldomain%ni), flds_scalar_index_nx, exportState, &
@@ -731,6 +756,7 @@ subroutine ModelAdvance(gcomp, rc)
     use clm_instMod , only : water_inst, atm2lnd_inst, glc2lnd_inst, lnd2atm_inst, lnd2glc_inst
     use decompMod   , only : bounds_type, get_proc_bounds
     use clm_driver  , only : clm_drv
+    use SelfTestDriver, only : for_testing_bypass_init_after_self_tests
 
     ! input/output variables
     type(ESMF_GridComp)  :: gcomp
@@ -786,6 +812,9 @@ subroutine ModelAdvance(gcomp, rc)
     if (single_column .and. .not. scol_valid) then
        RETURN
     end if
+    !if (for_testing_exit_after_self_tests) then
+      ! RETURN
+    !end if
 
     !$  call omp_set_num_threads(nthrds)
 
@@ -818,16 +847,20 @@ subroutine ModelAdvance(gcomp, rc)
          flds_scalar_index_nextsw_cday, nextsw_cday, &
          flds_scalar_name, flds_scalar_num, rc)
 
-    ! Get proc bounds
-    call get_proc_bounds(bounds)
-
     !--------------------------------
     ! Unpack import state
     !--------------------------------
 
+    if ( .not. for_testing_bypass_init_after_self_tests() ) then
+    ! Get proc bounds for both import and export
+    call get_proc_bounds(bounds)
+
+    call t_startf ('lc_lnd_import')
     call import_fields( gcomp, bounds, glc_present, rof_prognostic, &
          atm2lnd_inst, glc2lnd_inst, water_inst%wateratm2lndbulk_inst, rc )
     if (ChkErr(rc,__LINE__,u_FILE_u)) return
+    call t_stopf ('lc_lnd_import')
+    end if
 
     !--------------------------------
     ! Run model
@@ -917,9 +950,13 @@ subroutine ModelAdvance(gcomp, rc)
     ! Pack export state
     !--------------------------------
 
+    if ( .not. for_testing_bypass_init_after_self_tests() ) then
+    call t_startf ('lc_lnd_export')
     call export_fields(gcomp, bounds, glc_present, rof_prognostic, &
          water_inst%waterlnd2atmbulk_inst, lnd2atm_inst, lnd2glc_inst, rc)
     if (ChkErr(rc,__LINE__,u_FILE_u)) return
+    call t_stopf ('lc_lnd_export')
+    end if
 
     !--------------------------------
     ! Advance ctsm time step
@@ -1009,6 +1046,7 @@ subroutine ModelSetRunClock(gcomp, rc)
     rc = ESMF_SUCCESS
     call ESMF_LogWrite(subname//' called', ESMF_LOGMSG_INFO)
     if (.not. scol_valid) return
+    !if (for_testing_exit_after_self_tests) return
 
     ! query the Component for its clocks
     call NUOPC_ModelGet(gcomp, driverClock=dclock, modelClock=mclock, rc=rc)
@@ -1292,6 +1330,7 @@ subroutine clm_orbital_update(clock, logunit,  mastertask, eccen, obliqr, lambm0
   end subroutine clm_orbital_update
 
   subroutine CheckImport(gcomp, rc)
+    use clm_varctl, only : for_testing_exit_after_self_tests
     type(ESMF_GridComp) :: gcomp
     integer, intent(out) :: rc
     character(len=*) , parameter :: subname = "("//__FILE__//":CheckImport)"
@@ -1320,6 +1359,9 @@ subroutine CheckImport(gcomp, rc)
     if (single_column .and. .not. scol_valid) then
        RETURN
     end if
+    !if (for_testing_exit_after_self_tests) then
+       !RETURN
+    !end if
     ! The remander of this should be equivalent to the NUOPC internal routine
     ! from NUOPC_ModeBase.F90
 
diff --git a/src/cpl/nuopc/lnd_import_export.F90 b/src/cpl/nuopc/lnd_import_export.F90
index 624590b9a6..b1a41d0d66 100644
--- a/src/cpl/nuopc/lnd_import_export.F90
+++ b/src/cpl/nuopc/lnd_import_export.F90
@@ -156,7 +156,8 @@ module lnd_import_export
 contains
 !===============================================================================
 
-  subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, atm_prognostic, rc)
+  subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, rof_prognostic, &
+                              atm_prognostic, atm_present, rc)
 
     use shr_carma_mod     , only : shr_carma_readnl
     use shr_ndep_mod      , only : shr_ndep_readnl
@@ -173,6 +174,7 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r
     logical          , intent(in)  :: cism_evolve
     logical          , intent(in)  :: rof_prognostic
     logical          , intent(in)  :: atm_prognostic
+    logical          , intent(in)  :: atm_present
     integer          , intent(out) :: rc
 
     ! local variables
@@ -210,7 +212,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r
 
     ! Need to determine if there is no land for single column before the advertise call is done
 
-    if (atm_prognostic .or. force_send_to_atm) then
+    if (.not. atm_present)then
+       send_to_atm = .false.
+    else if (atm_prognostic .or. force_send_to_atm) then
        send_to_atm = .true.
     else
        send_to_atm = .false.
@@ -339,6 +343,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r
 
     call fldlist_add(fldsToLnd_num, fldsToLnd, trim(flds_scalar_name))
 
+    !!!!!!!!!!!!!!!!!!!!!!!!!!! new if section !!!!!!!!!!!!!!!!!!!!!!!!!!
+    if ( atm_present ) then
+   
     ! from atm
     call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_z         )
     call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_topo      )
@@ -389,6 +396,9 @@ subroutine advertise_fields(gcomp, flds_scalar_name, glc_present, cism_evolve, r
        call fldlist_add(fldsToLnd_num, fldsToLnd, Sa_co2diag)
     end if
 
+    end if ! atm_present
+    !!!!!!!!!!!!!!!!!!!!!!!!!!! new if section !!!!!!!!!!!!!!!!!!!!!!!!!!
+
     if (rof_prognostic) then
        ! from river
        call fldlist_add(fldsToLnd_num, fldsToLnd, Flrr_flood   )
diff --git a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90 b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90
index 5a2536f63b..aa4dcc1aaa 100644
--- a/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90
+++ b/src/cpl/share_esmf/lnd_set_decomp_and_domain.F90
@@ -21,6 +21,7 @@ module lnd_set_decomp_and_domain
   use clm_varctl   , only : iulog, inst_suffix, FL => fname_len
   use abortutils   , only : endrun
   use perf_mod     , only : t_startf, t_stopf
+  use ctsm_memcheck, only : memcheck
 
   implicit none
   private ! except
@@ -69,7 +70,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes
     ! local variables
     type(ESMF_Mesh)        :: mesh_maskinput
     type(ESMF_Mesh)        :: mesh_lndinput
-    type(ESMF_DistGrid)    :: distgrid_ctsm
+    type(ESMF_DistGrid)    :: distgrid_ctsm   ! This appears to be local but is used later in lnd_import_export
     type(ESMF_Field)       :: field_lnd
     type(ESMF_Field)       :: field_ctsm
     type(ESMF_RouteHandle) :: rhandle_lnd2ctsm
@@ -86,6 +87,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes
     real(r8) , pointer     :: lndfrac_glob(:)
     real(r8) , pointer     :: lndfrac_loc_input(:)
     real(r8) , pointer     :: dataptr1d(:)
+    real(r8) :: msize, mrss
     !-------------------------------------------------------------------------------
 
     call t_startf('lnd_set_decomp_and_domain_from_readmesh: setup')
@@ -104,6 +106,8 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes
        write(iulog,*)
     end if
 
+    call memcheck('lnd_set_decomp_and_domain_from_readmesh: before allocate')
+
     ! Determine global 2d sizes from read of dimensions of surface dataset and allocate global memory
     call lnd_get_global_dims(ni, nj, gsize, isgrid2d)
 
@@ -121,8 +125,10 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes
           ! This will get added to the ESMF PET files if DEBUG=TRUE and CREATE_ESMF_PET_FILES=TRUE
           call ESMF_VMLogMemInfo("clm: Before lnd mesh create in ")
 #endif
+          call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF_MeshCreate')
           mesh_maskinput = ESMF_MeshCreate(filename=trim(meshfile_mask), fileformat=ESMF_FILEFORMAT_ESMFMESH, rc=rc)
           if (ChkErr(rc,__LINE__,u_FILE_u)) return
+          call t_stopf('lnd_set_decomp_and_domain_from_readmesh: ESMF_MeshCreate')
 #ifdef DEBUG
           ! This will get added to the ESMF PET files if DEBUG=TRUE and CREATE_ESMF_PET_FILES=TRUE
           call ESMF_VMLogMemInfo("clm: After lnd mesh create in ")
@@ -163,7 +169,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes
     ! Get JUST gridcell processor bounds
     ! Remaining bounds (landunits, columns, patches) will be set after calling decompInit_glcp
     ! so get_proc_bounds is called twice and the gridcell information is just filled in twice
-    call get_proc_bounds(bounds)
+    call get_proc_bounds(bounds, allow_errors=.true.)
     begg = bounds%begg
     endg = bounds%endg
 
@@ -197,8 +203,11 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes
           gindex_ctsm(n) = gindex_ocn(n-nlnd)
        end if
     end do
+    call t_stopf ('lnd_set_decomp_and_domain_from_readmesh: decomp_init')
 
     ! Generate a new mesh on the gindex decomposition
+    ! NOTE: The distgrid_ctsm will be used later in lnd_import_export, even though it appears to just be local
+    call t_startf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition')
     distGrid_ctsm = ESMF_DistGridCreate(arbSeqIndexList=gindex_ctsm, rc=rc)
     if (ChkErr(rc,__LINE__,u_FILE_u)) return
     mesh_ctsm = ESMF_MeshCreate(mesh_lndinput, elementDistGrid=distgrid_ctsm, rc=rc)
@@ -207,6 +216,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes
     ! Set ldomain%lonc, ldomain%latc and ldomain%area
     call lnd_set_ldomain_gridinfo_from_mesh(mesh_ctsm, vm, gindex_ctsm, begg, endg, isgrid2d, ni, nj, ldomain, rc)
     if (ChkErr(rc,__LINE__,u_FILE_u)) return
+    call t_stopf('lnd_set_decomp_and_domain_from_readmesh: ESMF mesh on new decomposition')
 
     ! Set ldomain%lfrac
     ! Create fields on the input decomp and ctsm decomp
@@ -215,6 +225,7 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes
     ! Redistribute field_lnd to field_ctsm
 
     ! Determine ldomain%frac using ctsm decomposition
+    call t_startf('lnd_set_decomp_and_domain_from_readmesh: land frac')
     if (trim(driver) == 'cmeps') then
 
        if (trim(meshfile_mask) /= trim(meshfile_lnd)) then
@@ -254,14 +265,56 @@ subroutine lnd_set_decomp_and_domain_from_readmesh(driver, vm, meshfile_lnd, mes
        deallocate(lndfrac_glob)
 
     end if
+    call t_stopf('lnd_set_decomp_and_domain_from_readmesh: land frac')
+    call memcheck('lnd_set_decomp_and_domain_from_readmesh: just before deallocate')
 
-    ! Deallocate local pointer memory
-    deallocate(gindex_lnd)
-    deallocate(gindex_ocn)
-    deallocate(gindex_ctsm)
+    ! Deallocate local pointer memory including ESMF objects
+    call from_readmesh_dealloc( rc )
+    if (chkerr(rc,__LINE__,u_FILE_u)) return 
 
+    call memcheck('lnd_set_decomp_and_domain_from_readmesh: after deallocate')
     call t_stopf('lnd_set_decomp_and_domain_from_readmesh: final')
 
+    !===============================================================================
+    ! Internal subroutines for this subroutine
+    contains
+    !===============================================================================
+
+    subroutine from_readmesh_dealloc( rc )
+       use ESMF, only : ESMF_FieldRedistRelease, ESMF_DistGridDestroy, ESMF_FieldDestroy, ESMF_MeshDestroy
+       integer, intent(out) :: rc ! ESMF return code to indicate deallocate was successful
+
+       logical :: no_esmf_garbage = .true. ! If .true. release all ESMF data (which can be problematic if referenced again)
+
+       rc = ESMF_SUCCESS
+
+       deallocate(lndfrac_loc_input)
+       deallocate(gindex_lnd)
+       deallocate(gindex_ocn)
+       deallocate(gindex_ctsm)
+       ! Destroy or release all of the ESMF objects
+       call ESMF_FieldRedistRelease( rhandle_lnd2ctsm, noGarbage=no_esmf_garbage, rc=rc)
+       if (chkerr(rc,__LINE__,u_FILE_u)) return
+       !--------------------------------------------------------------------------
+       ! NOTE: We can't destroy the distgrid -- because it will be used later
+       ! As such we don't do the following...  EBK 08/01/2025
+       !call ESMF_DistGridDestroy( distgrid_ctsm, rc=rc)
+       !if (chkerr(rc,__LINE__,u_FILE_u)) return
+       !--------------------------------------------------------------------------
+       call ESMF_FieldDestroy( field_lnd, noGarbage=no_esmf_garbage, rc=rc)
+       if (chkerr(rc,__LINE__,u_FILE_u)) return
+       call ESMF_FieldDestroy( field_ctsm, noGarbage=no_esmf_garbage, rc=rc)
+       if (chkerr(rc,__LINE__,u_FILE_u)) return
+       call  ESMF_MeshDestroy( mesh_maskinput, noGarbage=no_esmf_garbage, rc=rc)
+       if (chkerr(rc,__LINE__,u_FILE_u)) return
+       call  ESMF_MeshDestroy( mesh_lndinput, noGarbage=no_esmf_garbage, rc=rc)
+       if (chkerr(rc,__LINE__,u_FILE_u)) return
+
+    end subroutine from_readmesh_dealloc
+
+    !-------------------------------------------------------------------------------
+
+
   end subroutine lnd_set_decomp_and_domain_from_readmesh
 
   !===============================================================================
@@ -331,7 +384,7 @@ subroutine lnd_set_decomp_and_domain_for_single_column(scol_lon, scol_lat, scol_
     call t_stopf ('decompInit_lnd')
 
     ! Initialize processor bounds
-    call get_proc_bounds(bounds)
+    call get_proc_bounds(bounds, allow_errors=.true.) ! allow errors since decomp not fully initialized
 
     ! Initialize domain data structure
     call domain_init(domain=ldomain, isgrid2d=.false., ni=1, nj=1, nbeg=1, nend=1)
@@ -469,6 +522,7 @@ subroutine lnd_set_lndmask_from_maskmesh(mesh_lnd, mesh_mask, vm, gsize, lndmask
     character(len=CL)      :: flandfrac_status
     !-------------------------------------------------------------------------------
 
+    call t_startf('lnd_set_lndmask_from_maskmesh')
     rc = ESMF_SUCCESS
 
     flandfrac = './init_generated_files/ctsm_landfrac'//trim(inst_suffix)//'.nc'
@@ -507,12 +561,14 @@ subroutine lnd_set_lndmask_from_maskmesh(mesh_lnd, mesh_mask, vm, gsize, lndmask
        if (ChkErr(rc,__LINE__,u_FILE_u)) return
 
        ! create route handle to map ocean mask from mask mesh to land mesh
+       call t_startf('lnd_set_lndmask_from_maskmesh::ESMF_FieldRegridStore')
        call ESMF_FieldRegridStore(field_mask, field_lnd, routehandle=rhandle_mask2lnd, &
             srcMaskValues=(/srcMaskValue/), dstMaskValues=(/dstMaskValue/), &
             regridmethod=ESMF_REGRIDMETHOD_CONSERVE, normType=ESMF_NORMTYPE_DSTAREA, &
             srcTermProcessing=srcTermProcessing_Value, &
             ignoreDegenerate=.true., unmappedaction=ESMF_UNMAPPEDACTION_IGNORE, rc=rc)
        if (chkerr(rc,__LINE__,u_FILE_u)) return
+       call t_stopf('lnd_set_lndmask_from_maskmesh::ESMF_FieldRegridStore')
 
        ! fill in values for field_mask with mask on mask mesh
        call ESMF_MeshGet(mesh_mask, elementdistGrid=distgrid_mask, rc=rc)
@@ -528,9 +584,11 @@ subroutine lnd_set_lndmask_from_maskmesh(mesh_lnd, mesh_mask, vm, gsize, lndmask
        dataptr1d(:) = maskmask_loc(:)
 
        ! map mask mask to land mesh
+       call t_startf('lnd_set_lndmask_from_maskmesh::ESMF_FieldRegrid')
        call ESMF_FieldRegrid(field_mask, field_lnd, routehandle=rhandle_mask2lnd, &
             termorderflag=ESMF_TERMORDER_SRCSEQ, checkflag=checkflag, zeroregion=ESMF_REGION_TOTAL, rc=rc)
        if (chkerr(rc,__LINE__,u_FILE_u)) return
+       call t_stopf('lnd_set_lndmask_from_maskmesh::ESMF_FieldRegrid')
 
        call ESMF_MeshGet(mesh_lnd, spatialDim=spatialDim, rc=rc)
        if (ChkErr(rc,__LINE__,u_FILE_u)) return
@@ -558,17 +616,20 @@ subroutine lnd_set_lndmask_from_maskmesh(mesh_lnd, mesh_mask, vm, gsize, lndmask
        do n = 1,lsize_lnd
           lndmask_glob(gindex_input(n)) = lndmask_loc(n)
        end do
+       call t_startf('lnd_set_lndmask_from_maskmesh::ESMF_VMAllReduce')
        allocate(itemp_glob(gsize))
        call ESMF_VMAllReduce(vm, sendData=lndmask_glob, recvData=itemp_glob, count=gsize, &
             reduceflag=ESMF_REDUCE_SUM, rc=rc)
        lndmask_glob(:) = int(itemp_glob(:))
        deallocate(itemp_glob)
+       call t_stopf('lnd_set_lndmask_from_maskmesh::ESMF_VMAllReduce')
 
        ! deallocate memory
        deallocate(maskmask_loc)
        deallocate(lndmask_loc)
 
     end if
+    call t_stopf('lnd_set_lndmask_from_maskmesh')
 
   end subroutine lnd_set_lndmask_from_maskmesh
 
diff --git a/src/main/abortutils.F90 b/src/main/abortutils.F90
index 8afa4ef195..745baaafce 100644
--- a/src/main/abortutils.F90
+++ b/src/main/abortutils.F90
@@ -10,24 +10,88 @@ module abortutils
   ! in conjunction with aborting the model, or at least issuing a warning.
   !-----------------------------------------------------------------------
 
-  use shr_log_mod  , only : errMsg => shr_log_errMsg
-
+  use shr_kind_mod, only: CX => shr_kind_cx
+  use shr_log_mod, only: errMsg => shr_log_errMsg
+  use shr_sys_mod , only : shr_sys_flush
+  use clm_varctl, only: iulog
   implicit none
   private
 
   public :: endrun              ! Abort the model for abnormal termination
   public :: write_point_context ! Write context for the given index, including global index information and more
+  ! Some interfaces for self-test work
+  public :: endrun_init         ! Set up how endrun will behave (used for self-tests)
+  public :: get_last_endrun_msg     ! Return the last endrun message
 
   interface endrun
      module procedure endrun_vanilla
      module procedure endrun_write_point_context
   end interface
 
+  ! These two are to enable self tests to have endrun calls that do not abort
+#ifdef DEBUG
+  logical :: abort_on_endrun = .true. ! Whether to abort the model on endrun; set to .false. for self-tests
+  character(len=CX) :: save_msg = 'none'   ! string to save from last endrun call
+#endif
+
   character(len=*), parameter, private :: sourcefile = &
        __FILE__
 
 contains
 
+  !-----------------------------------------------------------------------
+  subroutine endrun_init( for_testing_do_not_abort )
+     logical , intent(in) :: for_testing_do_not_abort
+#ifdef DEBUG
+      if (save_msg /= 'none') then
+         abort_on_endrun = .true.
+         call endrun( msg='An endrun call happened, but was not handled' )
+      end if
+      if ( for_testing_do_not_abort )then
+         write(iulog,*)'Preparing a test that will call endrun'
+         save_msg = 'none'  ! Reset the saved message
+         abort_on_endrun = .false.
+      else
+         abort_on_endrun = .true.
+      end if
+#else
+      call endrun( msg='endrun_init called without DEBUG mode, which is not allowed', &
+                   file=__FILE__, line=__LINE__ )
+#endif
+  end subroutine endrun_init
+
+    !-----------------------------------------------------------------------
+  function get_last_endrun_msg()
+    !
+    ! !DESCRIPTION:
+    ! Gives the last message saved from an endrun call that didn't 
+    ! abort due to being in the context of self-tests
+    !
+    ! !USES:
+    !
+    ! !ARGUMENTS:
+    character(len=:), allocatable :: get_last_endrun_msg  ! function result
+    !-----------------------------------------------------------------------
+
+#ifdef DEBUG
+    if (abort_on_endrun) then
+      call endrun( msg='Do not call get_last_endrun_msg when abort_on_endrun is true', &
+                   file=sourcefile, line=__LINE__ )
+    end if
+    if (save_msg == 'none') then
+       write(iulog,*) 'An endrun call was expected, but has not been made yet', &
+                      errMsg( sourcefile, __LINE__ )
+    end if
+    get_last_endrun_msg = trim(save_msg)
+    ! Reset endrun_msg to indicate the last error message was handled
+    save_msg = 'none'
+#else
+      call endrun( msg='get_last_endrun_msg called without DEBUG mode, which is not allowed', &
+                   file=sourcefile, line=__LINE__ )
+#endif
+
+  end function get_last_endrun_msg
+
   !-----------------------------------------------------------------------
   subroutine endrun_vanilla(msg, additional_msg, line, file)
 
@@ -36,7 +100,6 @@ subroutine endrun_vanilla(msg, additional_msg, line, file)
     ! Abort the model for abnormal termination
     !
     use shr_abort_mod , only: shr_abort_abort
-    use clm_varctl  , only: iulog
     !
     ! !ARGUMENTS:
     ! Generally you want to at least provide msg. The main reason to separate msg from
@@ -49,18 +112,42 @@ subroutine endrun_vanilla(msg, additional_msg, line, file)
     integer , intent(in), optional :: line                   ! Line number for the endrun call
     character(len=*), intent(in), optional :: file           ! file for the endrun call
     !-----------------------------------------------------------------------
+    character(len=CX) :: abort_msg
 
+    call shr_sys_flush(iulog)  ! Flush the I/O buffers always
     if (present (additional_msg)) then
-       write(iulog,*)'ENDRUN: ', trim(additional_msg)
+       write(iulog,*)'ENDRUN: '// trim(additional_msg)
     else
-       write(iulog,*)'ENDRUN:'
+       write(iulog,*)'ENDRUN: '
     end if
 
-    ! Don't pass file and line to shr_abort_abort since the PFUNIT test version doesn't have those options
-    if ( present(file) .and. present(line) ) then
-       write(iulog,*) errMsg(file, line)
-    end if
-    call shr_abort_abort(string=msg)
+#ifdef DEBUG
+   if (.not. abort_on_endrun) then
+      if (save_msg /= 'none') then
+         abort_msg = 'a previous error was already logged and now a second one is being, done so fully aborting now'
+         abort_msg = trim(abort_msg) // ' (Call end_run_init after endrun calls to reset this)'
+         call shr_sys_flush(iulog)  ! Flush the I/O buffers always
+         call shr_abort_abort(abort_msg)
+      end if
+      ! Just save msg and return
+      ! Don't finalize ESMF or exit since the self tests need to evaluate that
+      save_msg = trim(msg)
+      if (present (additional_msg)) then
+         save_msg = trim(msg)//trim(additional_msg)
+         write(iulog,*) 'ENDRUN: '// trim(additional_msg)
+         call shr_sys_flush(iulog)  ! Flush the I/O buffers always
+      end if
+   else
+#endif
+      ! Don't pass file and line to shr_abort_abort since the PFUNIT test version doesn't have those options
+      if ( present(file) .and. present(line) ) then
+         write(iulog,*) errMsg(file, line)
+      end if
+      call shr_sys_flush(iulog)  ! Flush the I/O buffers always
+      call shr_abort_abort(string=msg)
+#ifdef DEBUG
+   end if
+#endif
 
   end subroutine endrun_vanilla
 
@@ -73,7 +160,6 @@ subroutine endrun_write_point_context(subgrid_index, subgrid_level, msg, additio
     !
     ! This version also prints additional information about the point causing the error.
     !
-    use clm_varctl  , only: iulog
     use decompMod   , only: subgrid_level_unspecified
     !
     ! Arguments:
@@ -106,7 +192,6 @@ subroutine write_point_context(subgrid_index, subgrid_level)
     !
     ! NOTE: DO NOT CALL AN ABORT FROM HERE AS THAT WOULD SHORT CIRUIT THE ERROR REPORTING!!
     !
-    use clm_varctl   , only : iulog
     use decompMod    , only : subgrid_level_gridcell, subgrid_level_landunit, subgrid_level_column, subgrid_level_patch
     use decompMod    , only : get_global_index
     use GridcellType , only : grc
@@ -246,4 +331,6 @@ subroutine write_point_context(subgrid_index, subgrid_level)
 
   end subroutine write_point_context
 
+  !-----------------------------------------------------------------------
+
 end module abortutils
diff --git a/src/main/clm_driver.F90 b/src/main/clm_driver.F90
index 3a47a7eed3..279154f52c 100644
--- a/src/main/clm_driver.F90
+++ b/src/main/clm_driver.F90
@@ -85,6 +85,7 @@ module clm_driver
   use clm_instMod
   use SoilMoistureStreamMod  , only : PrescribedSoilMoistureInterp, PrescribedSoilMoistureAdvance
   use SoilBiogeochemDecompCascadeConType , only : no_soil_decomp, decomp_method
+  use SelfTestDriver         , only : for_testing_bypass_run_except_clock_advance
   !
   ! !PUBLIC TYPES:
   implicit none
@@ -165,6 +166,7 @@ subroutine clm_drv(doalb, nextsw_cday, declinp1, declin, rstwr, nlend, rdate, ro
     ! CalcIrrigationNeeded. Simply declaring this variable makes the ICE go away.
     real(r8), allocatable :: dummy1_to_make_pgi_happy(:)
     !-----------------------------------------------------------------------
+    if ( for_testing_bypass_run_except_clock_advance() ) return
 
     ! Determine processor bounds and clumps for this processor
 
@@ -1576,6 +1578,8 @@ subroutine clm_drv_init(bounds, &
     integer :: fp, fc                  ! filter indices
     !-----------------------------------------------------------------------
 
+    if ( for_testing_bypass_run_except_clock_advance() ) return
+
     associate(                                                             &
          snl                => col%snl                                   , & ! Input:  [integer  (:)   ]  number of snow layers
 
@@ -1657,6 +1661,7 @@ subroutine clm_drv_patch2col (bounds, &
     ! !LOCAL VARIABLES:
     integer :: c,fc              ! indices
     ! -----------------------------------------------------------------
+    if ( for_testing_bypass_run_except_clock_advance() ) return
 
     ! Note: lake points are excluded from many of the following
     ! averages. For some fields, this is because the field doesn't
@@ -1752,6 +1757,8 @@ subroutine write_diagnostic (bounds, nstep, lnd2atm_inst)
     integer :: status(MPI_STATUS_SIZE) ! mpi status
     !------------------------------------------------------------------------
 
+    if ( for_testing_bypass_run_except_clock_advance() ) return
+
     call get_proc_global(ng=numg)
 
     if (masterproc) then
diff --git a/src/main/clm_initializeMod.F90 b/src/main/clm_initializeMod.F90
index 387f4cd4ca..d75cbb2871 100644
--- a/src/main/clm_initializeMod.F90
+++ b/src/main/clm_initializeMod.F90
@@ -30,7 +30,7 @@ module clm_initializeMod
   use CLMFatesInterfaceMod  , only : CLMFatesGlobals1,CLMFatesGlobals2
   use CLMFatesInterfaceMod  , only : CLMFatesTimesteps
   use dynSubgridControlMod  , only : dynSubgridControl_init, get_reset_dynbal_baselines
-  use SelfTestDriver        , only : self_test_driver
+  use SelfTestDriver        , only : self_test_driver, for_testing_bypass_init_after_self_tests
   use SoilMoistureStreamMod , only : PrescribedSoilMoistureInit
   use clm_instMod
   !
@@ -67,6 +67,7 @@ subroutine initialize1(dtime)
     use SoilBiogeochemDecompCascadeConType , only : decomp_cascade_par_init
     use CropReprPoolsMod     , only: crop_repr_pools_init
     use HillslopeHydrologyMod, only: hillslope_properties_init
+    use SelfTestDriver       , only: self_test_readnml
     !
     ! !ARGUMENTS
     integer, intent(in) :: dtime    ! model time step (seconds)
@@ -102,6 +103,8 @@ subroutine initialize1(dtime)
     call surfrd_get_num_patches(fsurdat, actual_maxsoil_patches, actual_numpft, actual_numcft)
     call surfrd_get_nlevurb(fsurdat, actual_nlevurb)
 
+    call self_test_readnml( NLFilename )
+
     ! If fates is on, we override actual_maxsoil_patches. FATES dictates the
     ! number of patches per column.  We still use numcft from the surface
     ! file though...
@@ -182,6 +185,7 @@ subroutine initialize2(ni,nj, currtime)
     use FATESFireFactoryMod           , only : scalar_lightning
     use dynFATESLandUseChangeMod      , only : dynFatesLandUseInit
     use HillslopeHydrologyMod         , only : InitHillslope
+    use SelfTestDriver                , only : for_testing_bypass_init_after_self_tests
     !
     ! !ARGUMENTS
     integer, intent(in) :: ni, nj         ! global grid sizes
@@ -221,8 +225,8 @@ subroutine initialize2(ni,nj, currtime)
     !-----------------------------------------------------------------------
 
     call t_startf('clm_init2_part1')
-    ! Get processor bounds for gridcells
-    call get_proc_bounds(bounds_proc)
+    ! Get processor bounds for gridcells, just for gridcells
+    call get_proc_bounds(bounds_proc, allow_errors=.true.)  ! Just get proc bounds for gridcells, other variables won't be set until adter decompInit_clumps
     begg = bounds_proc%begg; endg = bounds_proc%endg
 
     ! Initialize glc behavior
@@ -281,8 +285,9 @@ subroutine initialize2(ni,nj, currtime)
     call decompInit_clumps(ni, nj, glc_behavior)
     call t_stopf('clm_decompInit_clumps')
 
+    call t_startf('clm_init2_subgrid')
     ! *** Get ALL processor bounds - for gridcells, landunit, columns and patches ***
-    call get_proc_bounds(bounds_proc)
+    call get_proc_bounds(bounds_proc)   ! This has to be done after decompInit_clumps is called
 
     ! Allocate memory for subgrid data structures
     ! This is needed here BEFORE the following call to initGridcells
@@ -302,6 +307,7 @@ subroutine initialize2(ni,nj, currtime)
        call initGridCells(bounds_clump, glc_behavior)
     end do
     !$OMP END PARALLEL DO
+    call t_stopf('clm_init2_subgrid')
 
     ! Set global seg maps for gridcells, landlunits, columns and patches
     call t_startf('clm_decompInit_glcp')
@@ -333,6 +339,7 @@ subroutine initialize2(ni,nj, currtime)
     ! Run any requested self-tests
     call self_test_driver(bounds_proc)
 
+    if ( .not. for_testing_bypass_init_after_self_tests() )then
     ! Deallocate surface grid dynamic memory for variables that aren't needed elsewhere.
     ! Some things are kept until the end of initialize2; urban_valid is kept through the
     ! end of the run for error checking, pct_urban_max is kept through the end of the run
@@ -349,8 +356,9 @@ subroutine initialize2(ni,nj, currtime)
     allocate(nutrient_competition_method, &
          source=create_nutrient_competition_method(bounds_proc))
     call readParameters(photosyns_inst)
-
+    end if   ! End of bypass
     
+    ! Self test skipping should still do the time manager initialization
     ! Initialize time manager
     if (nsrest == nsrStartup) then
        call timemgr_init()
@@ -376,6 +384,7 @@ subroutine initialize2(ni,nj, currtime)
     call t_stopf('clm_init2_part2')
     call t_startf('clm_init2_part3')
 
+    if ( .not. for_testing_bypass_init_after_self_tests() )then
     ! Initialize Balance checking (after time-manager)
     call BalanceCheckInit()
 
@@ -423,7 +432,9 @@ subroutine initialize2(ni,nj, currtime)
     call SnowAge_init( )    ! SNICAR aging   parameters:
 
     ! Print history field info to standard out
-    call hist_printflds()
+    if ( .not. use_noio )then
+       call hist_printflds()
+    end if
 
     ! Initializate dynamic subgrid weights (for prescribed transient Patches, CNDV
     ! and/or dynamic landunits); note that these will be overwritten in a restart run
@@ -508,6 +519,7 @@ subroutine initialize2(ni,nj, currtime)
     if (nsrest == nsrContinue ) then
        call htapes_fieldlist()
     end if
+    end if ! End of bypass
 
     ! Read restart/initial info
     is_cold_start = .false.
@@ -601,6 +613,8 @@ subroutine initialize2(ni,nj, currtime)
        call t_stopf('clm_init2_init_interp')
     end if
 
+    if ( .not. for_testing_bypass_init_after_self_tests() )then
+
     ! If requested, reset dynbal baselines
     ! This needs to happen after reading the restart file (including after reading the
     ! interpolated restart file, if applicable).
@@ -759,6 +773,7 @@ subroutine initialize2(ni,nj, currtime)
             water_inst%waterdiagnosticbulk_inst, canopystate_inst, &
             soilstate_inst, soilbiogeochem_carbonflux_inst)
     end if
+    end if ! end of bypass
     
     ! topo_glc_mec was allocated in initialize1, but needed to be kept around through
     ! initialize2 because it is used to initialize other variables; now it can be deallocated
@@ -781,12 +796,14 @@ subroutine initialize2(ni,nj, currtime)
     endif
 
     if (water_inst%DoConsistencyCheck()) then
+       call t_startf('tracer_consistency_check')
        !$OMP PARALLEL DO PRIVATE (nc, bounds_clump)
        do nc = 1,nclumps
           call get_clump_bounds(nc, bounds_clump)
           call water_inst%TracerConsistencyCheck(bounds_clump, 'end of initialization')
        end do
        !$OMP END PARALLEL DO
+       call t_stopf('tracer_consistency_check')
     end if
 
     call t_stopf('clm_init2_part3')
diff --git a/src/main/clm_instMod.F90 b/src/main/clm_instMod.F90
index 7d9a0f6ad2..bc86749d06 100644
--- a/src/main/clm_instMod.F90
+++ b/src/main/clm_instMod.F90
@@ -229,6 +229,7 @@ subroutine clm_instInit(bounds)
     integer :: dummy_to_make_pgi_happy
     !----------------------------------------------------------------------
 
+    call t_startf('clm_instInit_part1')
     ! Note: h2osno_col and snow_depth_col are initialized as local variables
     ! since they are needed to initialize vertical data structures
 
@@ -286,6 +287,9 @@ subroutine clm_instInit(bounds)
        call setSoilLayerClass(bounds)
     endif
 
+    call t_stopf('clm_instInit_part1')
+
+    call t_startf('clm_instInit_part2')
     !-----------------------------------------------
     ! Set cold-start values for snow levels, snow layers and snow interfaces
     !-----------------------------------------------
@@ -338,6 +342,10 @@ subroutine clm_instInit(bounds)
 
     call glacier_smb_inst%Init(bounds)
 
+    call t_stopf('clm_instInit_part2')
+
+    call t_startf('clm_instInit_part3')
+
     ! COMPILER_BUG(wjs, 2014-11-29, pgi 14.7) Without the following assignment, the
     ! assertion in energyflux_inst%Init fails with pgi 14.7 on yellowstone, presumably due
     ! to a compiler bug.
@@ -473,6 +481,7 @@ subroutine clm_instInit(bounds)
     deallocate (h2osno_col)
     deallocate (snow_depth_col)
     deallocate (exice_init_conc_col)
+    call t_stopf('clm_instInit_part3')
 
     ! ------------------------------------------------------------------------
     ! Initialize accumulated fields
diff --git a/src/main/clm_varctl.F90 b/src/main/clm_varctl.F90
index 83133acf2b..c807c47fdf 100644
--- a/src/main/clm_varctl.F90
+++ b/src/main/clm_varctl.F90
@@ -52,6 +52,12 @@ module clm_varctl
   ! true => run tests of ncdio_pio
   logical, public :: for_testing_run_ncdiopio_tests = .false.
 
+  ! true => run tests of decompInit
+  logical, public :: for_testing_run_decomp_init_tests = .false.
+
+  ! true => exit after the self-tests run
+  logical, public :: for_testing_exit_after_self_tests = .false.
+
   ! true => allocate memory for and use a second grain pool. This is meant only for
   ! software testing of infrastructure to support the AgSys crop model integration. This
   ! option can be dropped once AgSys is integrated and we have tests of it.
diff --git a/src/main/controlMod.F90 b/src/main/controlMod.F90
index 089503dc8b..4a956e33b2 100644
--- a/src/main/controlMod.F90
+++ b/src/main/controlMod.F90
@@ -210,7 +210,7 @@ subroutine control_init(dtime)
          snow_thermal_cond_method, snow_thermal_cond_glc_method, &
          snow_thermal_cond_lake_method, snow_cover_fraction_method, &
          irrigate, run_zero_weight_urban, all_active, &
-         crop_fsat_equals_zero, for_testing_run_ncdiopio_tests, &
+         crop_fsat_equals_zero, &
          for_testing_use_second_grain_pool, for_testing_use_repr_structure_pool, &
          for_testing_no_crop_seed_replenishment, &
          z0param_method, use_z0m_snowmelt
@@ -766,9 +766,6 @@ subroutine control_spmd()
     ! Crop saturated excess runoff
     call mpi_bcast(crop_fsat_equals_zero, 1, MPI_LOGICAL, 0, mpicom, ier)
 
-    ! Whether to run tests of ncdio_pio
-    call mpi_bcast(for_testing_run_ncdiopio_tests, 1, MPI_LOGICAL, 0, mpicom, ier)
-
     ! Various flags used for testing infrastructure for having multiple crop reproductive pools
     call mpi_bcast(for_testing_use_second_grain_pool, 1, MPI_LOGICAL, 0, mpicom, ier)
     call mpi_bcast(for_testing_use_repr_structure_pool, 1, MPI_LOGICAL, 0, mpicom, ier)
diff --git a/src/main/decompInitMod.F90 b/src/main/decompInitMod.F90
index aa575bd787..04680e1354 100644
--- a/src/main/decompInitMod.F90
+++ b/src/main/decompInitMod.F90
@@ -12,12 +12,14 @@ module decompInitMod
   use spmdMod      , only : masterproc, iam, npes, mpicom
   use abortutils   , only : endrun
   use clm_varctl   , only : iulog
+  use ctsm_memcheck, only : memcheck
+  use perf_mod     , only : t_startf, t_stopf
   !
   implicit none
   private
   !
   ! !PUBLIC TYPES:
-  !
+  !b
   ! !PUBLIC MEMBER FUNCTIONS:
   public :: decompInit_lnd    ! initializes lnd grid decomposition into clumps and processors
   public :: decompInit_clumps ! initializes atm grid decomposition into clumps
@@ -29,11 +31,12 @@ module decompInitMod
   integer, public :: clump_pproc ! number of clumps per MPI process
   !
   ! !PRIVATE TYPES:
-  integer, pointer   :: lcid(:)          ! temporary for setting decomposition
+  integer, pointer   :: lcid(:)          ! temporary for setting decomposition, allocated set and used in decompInit_lnd, and used and deallocated in decompInit_clumps  (Can make it allocatable)
   integer            :: nglob_x, nglob_y ! global sizes
   integer, parameter :: dbug=0           ! 0 = min, 1=normal, 2=much, 3=max
   character(len=*), parameter :: sourcefile = &
        __FILE__
+   real(r8) :: msize, mrss ! memory usage variables
 
 #include <mpif.h>         ! mpi library include file
   !------------------------------------------------------------------------------
@@ -52,6 +55,11 @@ subroutine decompInit_lnd(lni, lnj, amask)
     use clm_varctl , only : nsegspc
     use decompMod  , only : gindex_global, nclumps, clumps
     use decompMod  , only : bounds_type, get_proc_bounds, procinfo
+    ! Temporary testing stuff
+    use Assertions, only : assert_equal
+    use decompMod  , only : processor_type, get_global_index, subgrid_level_gridcell
+    use decompMod  , only : clump_type
+    ! end temporary testing stuff
     !
     ! !ARGUMENTS:
     integer , intent(in) :: amask(:)
@@ -69,10 +77,18 @@ subroutine decompInit_lnd(lni, lnj, amask)
     integer :: n,m,ng                 ! indices
     integer :: ier                    ! error code
     integer :: begg, endg             ! beg and end gridcells
-    integer, pointer  :: clumpcnt(:)  ! clump index counter
-    integer, allocatable :: gdc2glo(:)! used to create gindex_global
+    !---------------------------------------------------------------------
     type(bounds_type) :: bounds       ! contains subgrid bounds data
+    !---------------------------------------------------------------------
+    ! Temporary testing stuff
+    real(r8) :: msize, mrss
+    ! end temporary testing stuff
+    !---------------------------------------------------------------------
+    integer :: i, j, g, lc, cid_previous    ! Indices
+    integer :: cell_id_offset               ! The offset for the starting gridcell number for this processor
+    integer :: begcid, endcid               ! Beginning and ending cid's for this processor
     !------------------------------------------------------------------------------
+    call memcheck('decompInit_lnd: before allocate')
     ! Set some global scalars: nclumps, numg and lns
     call decompInit_lnd_set_nclumps_numg_lns( )
 
@@ -83,6 +99,8 @@ subroutine decompInit_lnd(lni, lnj, amask)
     call decompInit_lnd_allocate( ier )
     if (ier /= 0) return
 
+    call memcheck('decompInit_lnd: after allocate')
+
     ! Initialize procinfo and clumps
     ! beg and end indices initialized for simple addition of cells later
 
@@ -126,15 +144,18 @@ subroutine decompInit_lnd(lni, lnj, amask)
     do n = 1,nclumps
        pid = mod(n-1,npes)
        if (pid < 0 .or. pid > npes-1) then
-          write(iulog,*) 'decompInit_lnd(): round robin pid error ',n,pid,npes
-          call endrun(msg=errMsg(sourcefile, __LINE__))
+          write(iulog,*) 'Round robin pid error: n, pid, npes = ',n,pid,npes
+          call endrun(msg="Round robin pid error", file=sourcefile, line=__LINE__)
+          return
        endif
-       clumps(n)%owner = pid
+       !clumps(n)%owner = pid   ! This line should be able to be removed when clumps is only for the local task
        if (iam == pid) then
+          clumps(n)%owner = pid
           cid = cid + 1
           if (cid < 1 .or. cid > clump_pproc) then
-             write(iulog,*) 'decompInit_lnd(): round robin pid error ',n,pid,npes
-             call endrun(msg=errMsg(sourcefile, __LINE__))
+             write(iulog,*) 'round robin pid error ',n,pid,npes
+             call endrun(msg="round robin pid error", file=sourcefile, line=__LINE__)
+             return
           endif
           procinfo%cid(cid) = n
        endif
@@ -171,80 +192,115 @@ subroutine decompInit_lnd(lni, lnj, amask)
           endif
           lcid(ln) = cid
 
-          !--- give gridcell cell to pe that owns cid ---
-          !--- this needs to be done to subsequently use function
-          !--- get_proc_bounds(begg,endg)
+          ! Get the total number of gridcells for the local processor
           if (iam == clumps(cid)%owner) then
              procinfo%ncells  = procinfo%ncells  + 1
           endif
-          if (iam >  clumps(cid)%owner) then
-             procinfo%begg = procinfo%begg + 1
-          endif
-          if (iam >= clumps(cid)%owner) then
-             procinfo%endg = procinfo%endg + 1
-          endif
 
-          !--- give gridcell to cid ---
-          !--- increment the beg and end indices ---
-          clumps(cid)%ncells  = clumps(cid)%ncells  + 1
-          do m = 1,nclumps
-             if ((clumps(m)%owner >  clumps(cid)%owner) .or. &
-                 (clumps(m)%owner == clumps(cid)%owner .and. m > cid)) then
-                clumps(m)%begg = clumps(m)%begg + 1
-             endif
-
-             if ((clumps(m)%owner >  clumps(cid)%owner) .or. &
-                 (clumps(m)%owner == clumps(cid)%owner .and. m >= cid)) then
-                clumps(m)%endg = clumps(m)%endg + 1
-             endif
-          enddo
+          !--- give gridcell to cid for local processor ---
+          if (iam == clumps(cid)%owner) then
+             clumps(cid)%ncells  = clumps(cid)%ncells  + 1
+          end if
 
        end if
     enddo
-
-    ! clumpcnt is the ending gdc index of each clump
-
-    ag = 0
-    clumpcnt = 0
-    ag = 1
-    do pid = 0,npes-1
-    do cid = 1,nclumps
-       if (clumps(cid)%owner == pid) then
-         clumpcnt(cid) = ag
-         ag = ag + clumps(cid)%ncells
-       endif
-    enddo
-    enddo
-
-    ! now go through gridcells one at a time and increment clumpcnt
-    ! in order to set gdc2glo
-
-    do aj = 1,lnj
-    do ai = 1,lni
-       an = (aj-1)*lni + ai
-       cid = lcid(an)
-       if (cid > 0) then
-          ag = clumpcnt(cid)
-          gdc2glo(ag) = an
-          clumpcnt(cid) = clumpcnt(cid) + 1
+    !---------------------------------------------------------------------
+    !
+    ! Do an MPI_SCAN to get the starting index for each processor ----
+    ! [Doing this both simplifies the code, reduces non-scalaable memory
+    ! and reduces execution time for loops that run over all gridcells
+    ! for each processor.]
+    ! (Doing the following few lines of code removed about 50 lines of complex code
+    !  as well as loops of size: ni*nj*nclumps, npes*nclumps, and ni*nj
+    !  that was being done on each processor)
+    !---------------------------------------------------------------------
+    call MPI_SCAN(procinfo%ncells, cell_id_offset, 1, MPI_INTEGER, &
+                  MPI_SUM, mpicom, ier)
+    if ( ier /= 0 )then
+        call endrun(msg='Error from MPI_SCAN', file=sourcefile, line=__LINE__)
+    end if
+    cell_id_offset = cell_id_offset + 1
+    procinfo%begg = cell_id_offset - procinfo%ncells
+    procinfo%endg = cell_id_offset - 1
+    ! ---- Set begg and endg each clump on this processor ----
+    do lc = 1, clump_pproc
+       cid = procinfo%cid(lc)
+       clumps(cid)%ncells = clumps(cid)%ncells     ! This line will be removed
+       if ( lc == 1 )then
+          clumps(cid)%begg = procinfo%begg
+       else
+          cid_previous = procinfo%cid(lc-1)
+          clumps(cid)%begg = clumps(cid_previous)%endg + 1
        end if
-    end do
+       clumps(cid)%endg = clumps(cid)%begg + clumps(cid)%ncells - 1
+       cid_previous = cid
     end do
 
     ! Initialize global gindex (non-compressed, includes ocean points)
     ! Note that gindex_global goes from (1:endg)
-    call get_proc_bounds(bounds)    ! This has to be done after procinfo is finalized
-    call decompInit_lnd_gindex_global_allocate( bounds, ier ) ! This HAS to be done after prcoinfo is finalized
+    call get_proc_bounds(bounds, allow_errors=.true.)    ! This has to be done after procinfo is finalized
+    call decompInit_lnd_gindex_global_allocate( bounds, ier ) ! This HAS to be done after procinfo is finalized
     if (ier /= 0) return
 
     nglob_x = lni !  decompMod module variables
     nglob_y = lnj !  decompMod module variables
+
+    !---------------------------------------------------------------------
+
+    ! Get the global vector index on the full grid for each local processors gridcell
+    g = procinfo%begg
+    do lc = 1, clump_pproc
+    do ln = 1,lns
+       if (amask(ln) == 1) then
+          cid = lcid(ln)
+          if ( cid > 0 )then
+          if (clumps(cid)%owner == iam) then
+          if ( procinfo%cid(lc) == cid ) then
+             if ( (g < procinfo%begg) .or. (g > procinfo%endg) )then
+                write(iulog,*) ' iam, g = ', iam, g
+                call endrun(msg='g out of bounds for MPI_SCAN test', file=sourcefile, line=__LINE__)
+             end if  
+             procinfo%ggidx(g) = ln
+             g = g + 1
+          end if
+          end if
+          end if
+       end if
+    end do
+    end do
+
+    ! ---- Get the global index for each gridcell and save the i,j incices for ach gridcell on this processor
     do n = procinfo%begg,procinfo%endg
-       gindex_global(n-procinfo%begg+1) = gdc2glo(n)
-    enddo
+        gindex_global(n-procinfo%begg+1) = procinfo%ggidx(n)    ! Change this to gindex_global when ready
+        call procinfo%calc_globalxy_indices( n, lni, lnj, i, j )
+        procinfo%gi(n) = i
+        procinfo%gj(n) = j
+    end do
+
+    !---------------------------------------------------------------------
+    ! General error checking that the decomposition data is setup correctly
+    !---------------------------------------------------------------------
+    begcid = procinfo%cid(1)
+    endcid = procinfo%cid(clump_pproc)
+    call assert_equal(clumps(begcid)%begg, procinfo%begg, &
+                      msg='decompInit_lnd(): clumps(begcid) begg does not match procinfo begg')
+    call assert_equal(clumps(endcid)%endg, procinfo%endg, &
+                      msg='decompInit_lnd(): clumps(endcid) endg does not match procinfo endg')
+    call assert_equal(sum(clumps(procinfo%cid)%ncells), procinfo%ncells, &
+                      msg='decompInit_lnd(): sum of clumps ncells does not match procinfo ncells')
+
+    do lc = 1, clump_pproc
+       cid = procinfo%cid(lc)
+       call assert_equal( (clumps(cid)%endg-clumps(cid)%begg+1), clumps(cid)%ncells, &
+                         msg='decompInit_lnd(): clumps(cid) endg-begg+1 does not match clumps ncells')
+    end do
+    call assert_equal( (procinfo%endg-procinfo%begg+1), procinfo%ncells, &
+                      msg='decompInit_lnd(): procinfo endg-begg+1 does not match procinfo ncells')
 
     call decompInit_lnd_clean()
 
+    call memcheck('decompInit_lnd: after deallocate')
+
     ! Diagnostic output
     if (masterproc) then
        write(iulog,*)' Surface Grid Characteristics'
@@ -266,6 +322,7 @@ subroutine decompInit_lnd(lni, lnj, amask)
       subroutine decompInit_lnd_allocate( ier )
          ! Allocate the temporary and long term variables set and used in decompInit_lnd
          integer, intent(out) :: ier ! error code
+         !------------------------------------------------------------------------------
          !
          ! Long-term allocation:
          ! Arrays from decompMod are allocated here
@@ -277,8 +334,13 @@ subroutine decompInit_lnd_allocate( ier )
          !
          ! NOTE: nclumps, numg, and lns must be set before calling this routine!
          ! So decompInit_lnd_set_nclumps_numg_lns must be called first
+         !------------------------------------------------------------------------------
 
+         !-------------------------------------------------------------
          ! Allocate the longer term decompMod data
+         !-------------------------------------------------------------
+
+         ! allocate procinfo 
          allocate(procinfo%cid(clump_pproc), stat=ier)
          if (ier /= 0) then
             call endrun(msg='allocation error for procinfo%cid', file=sourcefile, line=__LINE__)
@@ -289,6 +351,7 @@ subroutine decompInit_lnd_allocate( ier )
             call endrun(msg="nclumps is NOT set before allocation", file=sourcefile, line=__LINE__)
             return
          end if
+        ! TODO: This will be moved to the other allocate and for a smaller size ----
          allocate(clumps(nclumps), stat=ier)
          if (ier /= 0) then
             write(iulog,*) 'allocation error for clumps: nclumps, ier=', nclumps, ier
@@ -296,17 +359,13 @@ subroutine decompInit_lnd_allocate( ier )
             return
          end if
 
+         !-------------------------------------------------------------
+         ! Temporary arrays that are just used in decompInit_lnd
+         !-------------------------------------------------------------
          if ( numg < 1 )then
             call endrun(msg="numg is NOT set before allocation", file=sourcefile, line=__LINE__)
             return
          end if
-         allocate(gdc2glo(numg), stat=ier)
-         if (ier /= 0) then
-            call endrun(msg="allocation error for gdc2glo", file=sourcefile, line=__LINE__)
-            return
-         end if
-
-         ! Temporary arrays that are just used in decompInit_lnd
          if ( lns < 1 )then
             call endrun(msg="lns is NOT set before allocation", file=sourcefile, line=__LINE__)
             return
@@ -316,11 +375,6 @@ subroutine decompInit_lnd_allocate( ier )
             call endrun(msg="allocation error for lcid", file=sourcefile, line=__LINE__)
             return
          end if
-         allocate(clumpcnt(nclumps),stat=ier)
-         if (ier /= 0) then
-            call endrun(msg="allocation error for clumpcnt", file=sourcefile, line=__LINE__)
-            return
-         end if
 
       end subroutine decompInit_lnd_allocate
 
@@ -342,15 +396,35 @@ subroutine decompInit_lnd_gindex_global_allocate( bounds, ier )
             call endrun(msg="allocation error for gindex_global", file=sourcefile, line=__LINE__)
             return
          end if
+         ! TODO: Remove the data, and only use the subroutine to calculate when needed
+         allocate(procinfo%ggidx(procinfo%begg:procinfo%endg), stat=ier)
+         if (ier /= 0) then
+            call endrun(msg='allocation error for procinfo%ggidx', file=sourcefile, line=__LINE__)
+            return
+         endif
+         procinfo%ggidx(:) = -1
+         allocate(procinfo%gi(procinfo%begg:procinfo%endg), stat=ier)
+         if (ier /= 0) then
+            call endrun(msg='allocation error for procinfo%gi', file=sourcefile, line=__LINE__)
+            return
+         endif
+         procinfo%gi(:) = -1
+         allocate(procinfo%gj(procinfo%begg:procinfo%endg), stat=ier)
+         if (ier /= 0) then
+            call endrun(msg='allocation error for procinfo%gj', file=sourcefile, line=__LINE__)
+            return
+         endif
+         procinfo%gj(:) = -1
       end subroutine decompInit_lnd_gindex_global_allocate
 
       !------------------------------------------------------------------------------
 
       subroutine decompInit_lnd_clean()
          ! Deallocate the temporary variables used in decompInit_lnd
-         deallocate(clumpcnt)
-         deallocate(gdc2glo)
-         !deallocate(lcid)
+         !deallocate(clumpcnt)
+         !deallocate(gdc2glo)
+         !--- NOTE: Can only deallocate lcid after decompInit_clumps ----
+         ! TODO: Move the deallocate for lcid to here, after decompInit_clumps only calculates the local taskj
       end subroutine decompInit_lnd_clean
 
       !------------------------------------------------------------------------------
@@ -468,8 +542,10 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior)
     character(len=32), parameter :: subname = 'decompInit_clumps'
     !------------------------------------------------------------------------------
 
+    call t_startf('decompInit_clumps')
+    call memcheck('decompInit_clumps: before alloc')
     !--- assign gridcells to clumps (and thus pes) ---
-    call get_proc_bounds(bounds)
+    call get_proc_bounds(bounds, allow_errors=.true.)
     begg = bounds%begg; endg = bounds%endg
 
     allocate(allvecl(nclumps,5))   ! local  clumps [gcells,lunit,cols,patches,coh]
@@ -574,25 +650,57 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior)
     enddo
 
     do n = 1,nclumps
+       ! Only do the error checking over the local processor
+       if (clumps(n)%owner == iam) then
        if (clumps(n)%ncells   /= allvecg(n,1) .or. &
            clumps(n)%nlunits  /= allvecg(n,2) .or. &
            clumps(n)%ncols    /= allvecg(n,3) .or. &
            clumps(n)%npatches /= allvecg(n,4) .or. &
            clumps(n)%nCohorts /= allvecg(n,5)) then
 
-          write(iulog ,*) 'decompInit_glcp(): allvecg error ncells ',iam,n,clumps(n)%ncells   ,allvecg(n,1)
-          write(iulog ,*) 'decompInit_glcp(): allvecg error lunits ',iam,n,clumps(n)%nlunits  ,allvecg(n,2)
-          write(iulog ,*) 'decompInit_glcp(): allvecg error ncols  ',iam,n,clumps(n)%ncols    ,allvecg(n,3)
-          write(iulog ,*) 'decompInit_glcp(): allvecg error patches',iam,n,clumps(n)%npatches ,allvecg(n,4)
-          write(iulog ,*) 'decompInit_glcp(): allvecg error cohorts',iam,n,clumps(n)%nCohorts ,allvecg(n,5)
+          write(iulog ,*) 'allvecg error: iam,n ',iam,n
+          write(iulog ,*) 'allvecg error ncells,allvecg ',iam,n,clumps(n)%ncells   ,allvecg(n,1)
+          write(iulog ,*) 'allvecg error lunits,allvecg ',iam,n,clumps(n)%nlunits  ,allvecg(n,2)
+          write(iulog ,*) 'allvecg error ncols,allvecg  ',iam,n,clumps(n)%ncols    ,allvecg(n,3)
+          write(iulog ,*) 'allvecg error patches,allvecg',iam,n,clumps(n)%npatches ,allvecg(n,4)
+          write(iulog ,*) 'allvecg error cohorts,allvecg',iam,n,clumps(n)%nCohorts ,allvecg(n,5)
 
-          call endrun(msg=errMsg(sourcefile, __LINE__))
+          call endrun(msg="allvecg error cohorts", file=sourcefile, line=__LINE__)
+          return
+       endif
        endif
     enddo
 
+    call memcheck('decompInit_clumps: before deallocate')
+
     deallocate(allvecg,allvecl)
     deallocate(lcid)
 
+    call memcheck('decompInit_clumps: after deallocate')
+
+
+    ! ------ Reset the clump type array for all non-local cid's to -1 to show it can be made smaller
+    do cid = 1, nclumps
+         if (clumps(cid)%owner /= iam) then
+            clumps(cid)%owner     = -1
+            clumps(cid)%ncells    = -1
+            clumps(cid)%nlunits   = -1
+            clumps(cid)%ncols     = -1
+            clumps(cid)%npatches  = -1
+            clumps(cid)%nCohorts  = -1
+            clumps(cid)%begg      = -1
+            clumps(cid)%begl      = -1
+            clumps(cid)%begc      = -1
+            clumps(cid)%begp      = -1
+            clumps(cid)%begCohort = -1
+            clumps(cid)%endg      = -1
+            clumps(cid)%endl      = -1
+            clumps(cid)%endc      = -1
+            clumps(cid)%endp      = -1
+            clumps(cid)%endCohort = -1
+         end if
+    end do
+
     ! Diagnostic output
 
     call get_proc_global(ng=numg, nl=numl, nc=numc, np=nump, nCohorts=numCohort)
@@ -685,6 +793,7 @@ subroutine decompInit_clumps(lni,lnj,glc_behavior)
        call shr_sys_flush(iulog)
        call mpi_barrier(mpicom,ier)
     end do
+    call t_stopf('decompInit_clumps')
 
   end subroutine decompInit_clumps
 
@@ -969,6 +1078,8 @@ subroutine decompInit_glcp(lni,lnj,glc_behavior)
     deallocate(start_global)
     if (allocated(index_lndgridcells)) deallocate(index_lndgridcells)
 
+    call t_stopf('decompInit_glcp')
+
   end subroutine decompInit_glcp
 
 end module decompInitMod
diff --git a/src/main/decompMod.F90 b/src/main/decompMod.F90
index adf85fa5b7..3897d61ee7 100644
--- a/src/main/decompMod.F90
+++ b/src/main/decompMod.F90
@@ -9,6 +9,7 @@ module decompMod
   use shr_kind_mod, only : r8 => shr_kind_r8
 
   use shr_sys_mod , only : shr_sys_abort ! use shr_sys_abort instead of endrun here to avoid circular dependency
+  use shr_abort_mod , only : shr_abort_abort ! as above
   use clm_varctl  , only : iulog
   !
   ! !PUBLIC TYPES:
@@ -67,7 +68,10 @@ module decompMod
   !---global information on each pe
   type processor_type
      integer :: nclumps              ! number of clumps for processor_type iam
-     integer,pointer :: cid(:)       ! clump indices
+     integer,pointer :: cid(:) => null()  ! clump indices
+     integer,pointer :: ggidx(:) => null()   ! global vector index on the full 2D grid
+     integer,pointer :: gi(:) => null()   ! global index on the full 2D grid in "x" (longitude for structured)
+     integer,pointer :: gj(:) => null()   ! global index on the full 2D grid in "y" (latitudef or structured, 1 for unstructured)
      integer :: ncells               ! number of gridcells in proc
      integer :: nlunits              ! number of landunits in proc
      integer :: ncols                ! number of columns in proc
@@ -78,6 +82,9 @@ module decompMod
      integer :: begc, endc           ! beginning and ending column index
      integer :: begp, endp           ! beginning and ending patch index
      integer :: begCohort, endCohort ! beginning and ending cohort indices
+  contains
+     procedure, public :: calc_global_index_fromij  ! Get the global index for the input grid i/j index on this processor
+     procedure, public :: calc_globalxy_indices
   end type processor_type
   public processor_type
   type(processor_type),public :: procinfo
@@ -121,6 +128,104 @@ module decompMod
 
 contains
 
+
+  function calc_global_index_fromij( this, g, ni, nj ) result(global_index)
+    ! Returns the full grid global vector index from the gridcell on this processor
+    ! !ARGUMENTS:
+    class(processor_type), intent(in) :: this
+    integer, intent(in) :: g       ! gridcell index on this processor
+    integer, intent(in) :: ni, nj  ! Global 2D size of full grid
+    integer :: global_index        ! function result, full vector index on the full global grid
+
+    if ( .not. associated(this%gi) )then
+       call shr_sys_abort( 'gi is not allocated yet', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( .not. associated(this%gj) )then
+       call shr_sys_abort( 'gj is not allocated yet', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( (g < this%begg) .or. (g > this%endg) ) then
+       call shr_sys_abort( 'Input index g is out of bounds of this processor', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( (ni < 1) .or. (nj < 1) ) then
+       call shr_sys_abort( 'Global gridsize ni/nj is not set', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( (this%gi(g) < 1) .or. (this%gi(g) > ni) ) then
+       write(iulog,*) 'this%gi(g) = ', this%gi(g)
+       call shr_sys_abort( 'Global gi index is out of bounds', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( (this%gj(g) < 1) .or. (this%gj(g) > ni) ) then
+       write(iulog,*) 'this%gj(g) = ', this%gj(g)
+       call shr_sys_abort( 'Global gj index is out of bounds', file=sourcefile, line=__LINE__)
+       return
+    end if
+    global_index = (this%gj(g)-1)*ni + this%gi(g)
+    if ( (global_index < 1) .or. (global_index > ni*nj) ) then
+       call shr_sys_abort( 'global_index is out of bounds for this processor', file=sourcefile, line=__LINE__)
+       return
+    end if
+
+  end function calc_global_index_fromij
+
+  subroutine calc_ijindices_from_full_global_index( g, ni, nj, i, j )
+     ! Local private subroutine to calculate the full 2D grid i,j indices from the 1D global vector index
+     integer, intent(in) :: g    ! Input processor global full 2D vector index
+     integer, intent(in) :: ni, nj   ! Size of the full 2D grid
+     integer, intent(out) :: i, j  ! 2D indices in x and y on the full global 2D grid (j will be 1 for an unstructured grid)
+
+     if ( (g < 1) .or. (g > ni*nj) ) then
+        write(iulog,*) 'g, ni, nj = ', g, ni, nj
+        call shr_sys_abort( 'Input index g is out of bounds', file=sourcefile, line=__LINE__)
+        return
+     end if
+    if ( (ni < 1) .or. (nj < 1) ) then
+       call shr_sys_abort( 'Global gridsize ni/nj is not set', file=sourcefile, line=__LINE__)
+       return
+    end if
+    j = floor( real(g, r8) / real(ni, r8) ) + 1
+    if ( mod(g,ni) == 0 ) j = j - 1
+    i = g - (j-1)*ni
+    if ( (i < 1) .or. (i > ni) ) then
+       call shr_sys_abort( 'Computed global i value out of range', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( (j < 1) .or. (j > nj) ) then
+       call shr_sys_abort( 'Computed global j value out of range', file=sourcefile, line=__LINE__)
+       return
+    end if
+  end subroutine calc_ijindices_from_full_global_index
+
+
+  subroutine calc_globalxy_indices( this, g, ni, nj, i, j )
+    ! !ARGUMENTS:
+    class(processor_type), intent(in) :: this
+    integer, intent(in) :: g ! gridcell index on this processor
+    integer, intent(in) :: ni, nj ! Global 2D size of full grid
+    integer, intent(out) :: i, j ! 2D indices in x and y on the full global 2D grid (j will be 1 for an unstructured grid)
+
+    integer :: global_index
+
+    if ( .not. associated(this%ggidx) )then
+       call shr_sys_abort( 'ggidx is not allocated yet', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( (g < this%begg) .or. (g > this%endg) ) then
+       call shr_sys_abort( 'Input index g is out of bounds of this processor', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( (ni < 1) .or. (nj < 1) ) then
+       call shr_sys_abort( 'Global gridsize ni/nj is not set', file=sourcefile, line=__LINE__)
+       return
+    end if
+    global_index = this%ggidx(g)
+    call calc_ijindices_from_full_global_index( global_index, ni, nj, i, j )
+
+  end subroutine calc_globalxy_indices
+
   !-----------------------------------------------------------------------
   pure function get_beg(bounds, subgrid_level) result(beg_index)
     !
@@ -224,8 +329,18 @@ subroutine get_clump_bounds (n, bounds)
 #ifdef _OPENMP
     if ( OMP_GET_NUM_THREADS() == 1 .and. OMP_GET_MAX_THREADS() > 1 )then
        call shr_sys_abort( trim(subname)//' ERROR: Calling from inside a non-threaded region)')
+       return
     end if
 #endif
+    if ( .not. associated(procinfo%cid) )then
+       call shr_sys_abort( 'procinfo%cid) is NOT allocated yet', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( n < 1 .or. n > procinfo%nclumps )then
+       write(iulog,*) 'Input clump index out of bounds: n = ', n
+       call shr_sys_abort( 'Input clump is out of bounds', file=sourcefile, line=__LINE__)
+       return
+    end if
 
     cid  = procinfo%cid(n)
     bounds%begp      = clumps(cid)%begp - procinfo%begp + 1
@@ -239,19 +354,44 @@ subroutine get_clump_bounds (n, bounds)
     bounds%begCohort = clumps(cid)%begCohort - procinfo%begCohort + 1
     bounds%endCohort = clumps(cid)%endCohort - procinfo%begCohort + 1
 
+
+    if ( bounds%endp <= 0 )then
+       call shr_sys_abort( 'bounds%endp is not valid', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( bounds%endc <= 0 )then
+       call shr_sys_abort( 'bounds%endc is not valid', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( bounds%endl <= 0 )then
+       call shr_sys_abort( 'bounds%endl is not valid', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( bounds%endg <= 0 )then
+       call shr_sys_abort( 'bounds%endg is not valid', file=sourcefile, line=__LINE__)
+       return
+    end if
+    ! End Cohort isn't necessarily valid, so don't do this error check
+    !if ( bounds%endCohort <= 0 )then
+    !   write(iulog,*) 'endCohort = ', bounds%endCohort
+    !   call shr_sys_abort( 'bounds%endCohort is not valid', file=sourcefile, line=__LINE__)
+    !   return
+    !end if
+
     bounds%level = bounds_level_clump
     bounds%clump_index = n
 
   end subroutine get_clump_bounds
 
   !------------------------------------------------------------------------------
-  subroutine get_proc_bounds (bounds, allow_call_from_threaded_region)
+  subroutine get_proc_bounds (bounds, allow_call_from_threaded_region, allow_errors)
     !
     ! !DESCRIPTION:
     ! Retrieve processor bounds
     !
     ! !ARGUMENTS:
     type(bounds_type), intent(out) :: bounds ! processor bounds bounds
+    logical, intent(in), optional :: allow_errors ! Don't do the normal error checking
 
     ! Normally this routine will abort if it is called from within a threaded region,
     ! because in most cases you should be calling get_clump_bounds in that situation. If
@@ -275,6 +415,7 @@ subroutine get_proc_bounds (bounds, allow_call_from_threaded_region)
 #ifdef _OPENMP
     if ( OMP_GET_NUM_THREADS() > 1 .and. .not. l_allow_call_from_threaded_region )then
        call shr_sys_abort( trim(subname)//' ERROR: Calling from inside  a threaded region')
+       return
     end if
 #endif
 
@@ -292,6 +433,35 @@ subroutine get_proc_bounds (bounds, allow_call_from_threaded_region)
     bounds%level = bounds_level_proc
     bounds%clump_index = -1           ! irrelevant for proc, so assigned a bogus value
 
+    ! Soem final error checking
+    ! Always check that gridcells are set
+    if ( bounds%endg <= 0 )then
+       call shr_sys_abort( 'bounds%endg is not valid', file=sourcefile, line=__LINE__)
+       return
+    end if
+
+    ! Exit before checking if errors should be allowed
+    if ( present(allow_errors) ) then
+      if ( allow_errors ) return
+    end if
+    if ( bounds%endp <= 0 )then
+       call shr_sys_abort( 'bounds%endp is not valid', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( bounds%endc <= 0 )then
+       call shr_sys_abort( 'bounds%endc is not valid', file=sourcefile, line=__LINE__)
+       return
+    end if
+    if ( bounds%endl <= 0 )then
+       call shr_sys_abort( 'bounds%endl is not valid', file=sourcefile, line=__LINE__)
+       return
+    end if
+    ! End Cohort isn't necessarily valid, so don't do this error check
+    !if ( bounds%endCohort <= 0 )then
+       !call shr_sys_abort( 'bounds%endCohort is not valid', file=sourcefile, line=__LINE__)
+       !return
+    !end if
+
   end subroutine get_proc_bounds
 
   !------------------------------------------------------------------------------
@@ -381,7 +551,7 @@ integer function get_global_index(subgrid_index, subgrid_level, donot_abort_on_b
     integer           :: beg_index     ! beginning proc index for subgrid_level
     integer           :: end_index     ! ending proc index for subgrid_level
     integer           :: index         ! index of the point to get
-    integer, pointer  :: gindex(:)
+    integer, pointer  :: gindex(:) => null()
     logical           :: abort_on_badindex = .true.
     !----------------------------------------------------------------
 
@@ -445,7 +615,7 @@ function get_global_index_array(subgrid_index, bounds1, bounds2, subgrid_level)
     type(bounds_type) :: bounds_proc   ! processor bounds
     integer           :: beg_index     ! beginning proc index for subgrid_level
     integer           :: i
-    integer , pointer :: gindex(:)
+    integer , pointer :: gindex(:) => null()
     !----------------------------------------------------------------
 
     SHR_ASSERT_ALL_FL((ubound(subgrid_index) == (/bounds2/)), sourcefile, __LINE__)
@@ -547,6 +717,7 @@ subroutine get_subgrid_level_gindex (subgrid_level, gindex)
     integer         , pointer    :: gindex(:)
     !----------------------------------------------------------------------
 
+    gindex => null()   ! Make sure gindex is initiatled to null
     select case (subgrid_level)
     case(subgrid_level_lndgrid)
        gindex => gindex_global
@@ -583,6 +754,18 @@ subroutine decompmod_clean()
     if ( allocated(clumps) )then
       deallocate(clumps)
     end if
+    if ( associated(procinfo%ggidx) )then
+      deallocate(procinfo%ggidx)
+      procinfo%ggidx => null()
+    end if
+    if ( associated(procinfo%gi) )then
+      deallocate(procinfo%gi)
+      procinfo%gi => null()
+    end if
+    if ( associated(procinfo%gj) )then
+      deallocate(procinfo%gj)
+      procinfo%gj => null()
+    end if
     if ( associated(procinfo%cid) )then
       deallocate(procinfo%cid)
       procinfo%cid => null()
diff --git a/src/main/initVerticalMod.F90 b/src/main/initVerticalMod.F90
index 64383e7a7c..4a1177666e 100644
--- a/src/main/initVerticalMod.F90
+++ b/src/main/initVerticalMod.F90
@@ -29,6 +29,7 @@ module initVerticalMod
   use ColumnType        , only : col                
   use glcBehaviorMod    , only : glc_behavior_type
   use abortUtils        , only : endrun    
+  use perf_mod          , only : t_startf, t_stopf
   use ncdio_pio
   !
   ! !PUBLIC TYPES:
@@ -189,6 +190,7 @@ subroutine initVertical(bounds, glc_behavior, thick_wall, thick_roof)
     integer               :: jmin_bedrock
     character(len=*), parameter :: subname = 'initVertical'
     !------------------------------------------------------------------------
+    call t_startf('initVertical')
 
     begc = bounds%begc; endc= bounds%endc
     begl = bounds%begl; endl= bounds%endl
@@ -669,6 +671,8 @@ subroutine initVertical(bounds, glc_behavior, thick_wall, thick_roof)
 
     call ncd_pio_closefile(ncid)
 
+    call t_stopf('initVertical')
+
   end subroutine initVertical
 
   !-----------------------------------------------------------------------
diff --git a/src/self_tests/Assertions.F90.in b/src/self_tests/Assertions.F90.in
index 2a4c8cccc6..4a86929a8a 100644
--- a/src/self_tests/Assertions.F90.in
+++ b/src/self_tests/Assertions.F90.in
@@ -17,6 +17,12 @@ module Assertions
   public :: assert_equal
 
   interface assert_equal
+     !TYPE double,int,logical
+     module procedure assert_equal_0d_{TYPE}
+
+     !TYPE text
+     module procedure assert_equal_0d_{TYPE}
+
      !TYPE double,int,logical
      module procedure assert_equal_1d_{TYPE}
 
@@ -30,6 +36,8 @@ module Assertions
   interface vals_are_equal
      !TYPE double,int,logical
      module procedure vals_are_equal_{TYPE}
+     !TYPE text
+     module procedure vals_are_equal_{TYPE}
   end interface vals_are_equal
 
 contains
@@ -75,6 +83,60 @@ contains
 
   end subroutine assert_equal_1d_{TYPE}
 
+  !-----------------------------------------------------------------------
+  !TYPE double,int,logical
+  subroutine assert_equal_0d_{TYPE}(expected, actual, msg, abs_tol)
+    !
+    ! !DESCRIPTION:
+    ! Assert scalar values are equal
+    !
+    ! !ARGUMENTS:
+    {VTYPE}, intent(in) :: expected
+    {VTYPE}, intent(in) :: actual
+    character(len=*), intent(in) :: msg
+
+    ! absolute tolerance; if not specified, require exact equality; ignored for logicals
+    real(r8), intent(in), optional :: abs_tol
+    !
+    ! !LOCAL VARIABLES:
+    integer :: i
+
+    character(len=*), parameter :: subname = 'assert_equal_0d_{TYPE}'
+    !-----------------------------------------------------------------------
+
+    if (.not. vals_are_equal(actual, expected, abs_tol)) then
+       write(iulog,*) 'ERROR in assert_equal: ', msg
+       write(iulog,*) 'Actual  : ', actual
+       write(iulog,*) 'Expected: ', expected
+       call endrun('ERROR in assert_equal')
+    end if
+
+  end subroutine assert_equal_0d_{TYPE}
+
+  !-----------------------------------------------------------------------
+  !TYPE text
+  subroutine assert_equal_0d_{TYPE}(expected, actual, msg)
+    !
+    ! !DESCRIPTION:
+    ! Assert scalar values are equal
+    !
+    ! !ARGUMENTS:
+    {VTYPE}, intent(in) :: expected
+    {VTYPE}, intent(in) :: actual
+    character(len=*), intent(in) :: msg
+    !
+    ! !LOCAL VARIABLES:
+    !-----------------------------------------------------------------------
+
+    if (.not. vals_are_equal(actual, expected)) then
+       write(iulog,*) 'ERROR in assert_equal: ', msg
+       write(iulog,*) 'Actual  : ', actual
+       write(iulog,*) 'Expected: ', expected
+       call endrun('ERROR in assert_equal')
+    end if
+
+  end subroutine assert_equal_0d_{TYPE}
+
   !-----------------------------------------------------------------------
   !TYPE double,int,logical
   subroutine assert_equal_2d_{TYPE}(expected, actual, msg, abs_tol)
@@ -198,4 +260,23 @@ contains
 
   end function vals_are_equal_{TYPE}
 
+  !-----------------------------------------------------------------------
+  !TYPE text
+  function vals_are_equal_{TYPE}(actual, expected) result(vals_equal)
+    !
+    ! !DESCRIPTION:
+    ! Returns true if actual is the same as expected, false otherwise
+    !
+    ! !ARGUMENTS:
+    logical :: vals_equal  ! function result
+    {VTYPE}, intent(in) :: actual
+    {VTYPE}, intent(in) :: expected
+    !
+    ! !LOCAL VARIABLES:
+    !-----------------------------------------------------------------------
+
+    vals_equal = actual == expected
+
+  end function vals_are_equal_{TYPE}
+
 end module Assertions
diff --git a/src/self_tests/SelfTestDriver.F90 b/src/self_tests/SelfTestDriver.F90
index d109a27827..a081deb18e 100644
--- a/src/self_tests/SelfTestDriver.F90
+++ b/src/self_tests/SelfTestDriver.F90
@@ -6,9 +6,10 @@ module SelfTestDriver
   !
   ! See the README file in this directory for a high-level overview of these self-tests.
 
-  use clm_varctl, only : for_testing_run_ncdiopio_tests
   use decompMod, only : bounds_type
   use TestNcdioPio, only : test_ncdio_pio
+  use abortutils, only : endrun
+  use clm_varctl, only : iulog
 
   implicit none
   private
@@ -16,7 +17,17 @@ module SelfTestDriver
 
   ! Public routines
 
-  public :: self_test_driver
+  public :: self_test_driver ! Run the self-tests asked for
+  public :: self_test_readnml  ! Read in the general self testing options for overall code flow
+  public :: for_testing_bypass_init_after_self_tests ! For testing bypass the rest of the initialization after the self test driver was run
+  public :: for_testing_bypass_run_except_clock_advance ! For testing bypass most of the run phase other than the clock advance
+
+  ! Private module data
+  logical :: for_testing_bypass_init ! For testing bypass the initialization phase after the self-test driver
+  logical :: for_testing_bypass_run ! For testing bypass most of the run phase except the time advance
+  logical :: for_testing_run_ncdiopio_tests  ! true => run tests of ncdio_pio
+  logical :: for_testing_run_decomp_init_tests ! true => run tests of decompInit
+  logical, public :: for_testing_exit_after_self_tests ! true => exit after running self tests
 
   character(len=*), parameter, private :: sourcefile = &
        __FILE__
@@ -32,18 +43,139 @@ subroutine self_test_driver(bounds)
     ! This subroutine should be called all the time, but each set of self tests is only
     ! run if the appropriate flag is set.
     !
+    ! !USES:
+    use decompMod, only : bounds_type
+    use TestNcdioPio, only : test_ncdio_pio
+    use ESMF, only : ESMF_LogWrite, ESMF_LOGMSG_INFO, ESMF_Finalize
+    use shr_sys_mod, only : shr_sys_flush
+    use spmdMod, only : masterproc
     ! !ARGUMENTS:
     type(bounds_type), intent(in) :: bounds
     !
     ! !LOCAL VARIABLES:
 
     character(len=*), parameter :: subname = 'self_test_driver'
+    integer :: ntests = 0
     !-----------------------------------------------------------------------
 
+    if ( masterproc ) then
+       write(iulog,*) '-------------------------------'
+       write(iulog,*) '----- Starting self tests -----'
+       write(iulog,*) '-------------------------------'
+       call shr_sys_flush(iulog)
+    end if
     if (for_testing_run_ncdiopio_tests) then
+       ntests = ntests + 1
        call test_ncdio_pio(bounds)
     end if
+    if (for_testing_run_decomp_init_tests) then
+       ntests = ntests + 1
+       call test_decomp_init()
+    end if
+    if ( masterproc ) then
+       write(iulog,*) '-------------------------------'
+       write(iulog,*) '----- Ending self tests -------'
+       write(iulog,*) '-------------------------------'
+       call shr_sys_flush(iulog)
+    end if
+    if (for_testing_exit_after_self_tests) then
+       ! Print out some messaging if we are exiting after self tests.
+       if ( masterproc ) then
+          if ( ntests == 0 )then
+             write(iulog,*) 'WARNING: You are exiting after self tests were run -- but no self tests were run.'
+          else
+             write(iulog,*) 'Exiting after running ', ntests, ' self tests.'
+          end if
+          call shr_sys_flush(iulog)
+       end if
+    end if
 
   end subroutine self_test_driver
 
+  !-----------------------------------------------------------------------
+  subroutine self_test_readnml(NLFileName)
+    !
+    ! !DESCRIPTION:
+    ! Namelist read for the self-test driver. This includes bypass options
+    ! that will be used in other parts of the code to bypass bits of the code
+    ! for testing purposes.
+    !
+    ! !USES:
+    use shr_nl_mod , only : shr_nl_find_group_name
+    use spmdMod, only : masterproc, mpicom
+    use shr_mpi_mod, only : shr_mpi_bcast
+    use clm_varctl, only : iulog
+    !
+    ! !ARGUMENTS:
+    character(len=*), intent(in) :: NLFilename  ! Namelist filename
+    !
+    ! !LOCAL VARIABLES:
+    integer :: ierr ! error code
+    integer :: unitn ! unit for namelist file
+
+    ! Namelist name: this has to be matched with the name in the read stqatement
+    character(len=*), parameter :: nmlname = 'for_testing_options'
+    !-----------------------------------------------------------------------
+
+    namelist /for_testing_options/ for_testing_bypass_init, for_testing_bypass_run, &
+         for_testing_run_ncdiopio_tests, for_testing_run_decomp_init_tests, &
+         for_testing_exit_after_self_tests
+
+    ! Initialize options to default values, in case they are not specified in
+    ! the namelist
+
+    if (masterproc) then
+       write(iulog,*) 'Read in '//nmlname//' namelist'
+       open(newunit=unitn, status='old', file=NLFilename)
+       call shr_nl_find_group_name(unitn, nmlname, status=ierr)
+       if (ierr == 0) then
+          read(unit=unitn, nml=for_testing_options, iostat=ierr)
+          if (ierr /= 0) then
+             call endrun(msg="ERROR reading "//nmlname//"namelist", file=sourcefile, line=__LINE__)
+          end if
+       else
+          call endrun(msg="ERROR finding "//nmlname//"namelist", file=sourcefile, line=__LINE__)
+       end if
+       close(unitn)
+    end if
+
+    call shr_mpi_bcast (for_testing_bypass_init, mpicom)
+    call shr_mpi_bcast (for_testing_bypass_run, mpicom)
+    call shr_mpi_bcast(for_testing_run_ncdiopio_tests, mpicom)
+    call shr_mpi_bcast(for_testing_run_decomp_init_tests, mpicom)
+    call shr_mpi_bcast(for_testing_exit_after_self_tests, mpicom)
+
+    if (masterproc) then
+       write(iulog,*) ' '
+       write(iulog,*) nmlname//' settings:'
+       write(iulog,nml=for_testing_options)
+       write(iulog,*) ' '
+    end if
+
+  end subroutine self_test_readnml
+
+  !-----------------------------------------------------------------------
+
+  logical function for_testing_bypass_init_after_self_tests()
+    ! Determine if should exit initialization early after having run the self tests
+    if ( for_testing_bypass_init ) then
+       for_testing_bypass_init_after_self_tests = .true.
+    else
+       for_testing_bypass_init_after_self_tests = .false.
+    end if
+  end function for_testing_bypass_init_after_self_tests
+
+  !-----------------------------------------------------------------------
+
+  logical function for_testing_bypass_run_except_clock_advance()
+    ! Determine if should skip most of the run phase other than the clock advance
+    if ( for_testing_bypass_init ) then
+       for_testing_bypass_run_except_clock_advance = .true.
+    else
+       for_testing_bypass_run_except_clock_advance = .false.
+    end if
+  end function for_testing_bypass_run_except_clock_advance
+
+  !-----------------------------------------------------------------------
+
 end module SelfTestDriver
diff --git a/src/self_tests/TestDecompInit.F90 b/src/self_tests/TestDecompInit.F90
new file mode 100644
index 0000000000..b88b62ce85
--- /dev/null
+++ b/src/self_tests/TestDecompInit.F90
@@ -0,0 +1,363 @@
+module TestDecompInit
+
+  ! ------------------------------------------------------------------------
+  ! !DESCRIPTION:
+  ! This module contains tests of decomp_init
+
+#include "shr_assert.h"
+  use shr_kind_mod, only : r8 => shr_kind_r8, CX => shr_kind_cx
+  use Assertions, only : assert_equal
+  use clm_varctl, only : iulog
+  use abortutils, only : endrun, endrun_init, get_last_endrun_msg
+  use spmdMod, only : masterproc, npes, iam
+  use decompInitMod, only : decompInit_lnd, clump_pproc, decompInit_clumps
+  use clm_InstMod, only : glc_behavior
+  use decompMod
+
+  implicit none
+  private
+  save
+
+  ! Public routines
+
+  public :: test_decomp_init
+
+  ! Module data used in various tests
+
+  ! Make the size of the test grid 384 so that it can be divided by 128 or 48
+  ! for the number of tasks per node on Derecho or Izumi.
+  integer, parameter :: ni = 16, nj = 24
+  integer :: amask(ni*nj)
+
+  integer :: default_npes
+  integer :: default_clump_pproc
+
+  character(len=*), parameter, private :: sourcefile = &
+       __FILE__
+
+contains
+
+  !-----------------------------------------------------------------------
+  subroutine test_decomp_init()
+    !
+    ! !DESCRIPTION:
+    ! Drive tests of decomp_init
+    !
+    ! NOTE(wjs, 2020-10-15) Currently, endrun is called when any test assertion fails. I
+    ! thought about changing this so that, instead, a counter is incremented for each
+    ! failure, then at the end of the testing (in the higher-level self-test driver),
+    ! endrun is called if this counter is greater than 0. The benefit of this is that we'd
+    ! see all test failures, not just the first failure. To do that, we'd need to change
+    ! the assertions here to increment a counter rather than aborting. However, I'm not
+    ! spending the time to make this change for now because (1) I'm not sure how much
+    ! value we'd get from it; (2) even if we made that change, it's still very possible
+    ! for test code to abort for reasons other than assertions, if something goes wrong
+    ! inside decomp_init or pio; and (3) some tests here are dependent on earlier tests (for
+    ! example, the reads depend on the writes having worked), so a failure in an early
+    ! phase could really muck things up for later testing phases. Migrating to a
+    ! pFUnit-based unit test would solve this problem, since each pFUnit test is
+    ! independent, though would prevent us from being able to have dependent tests the
+    ! way we do here (where reads depend on earlier writes), for better or for worse.
+    !
+    ! !USERS:
+    use decompInitMod, only : decompInit_clumps, decompInit_glcp
+    use domainMod, only : ldomain
+    ! !ARGUMENTS:
+    !
+    ! !LOCAL VARIABLES:
+    integer, allocatable :: model_amask(:)
+    !-----------------------------------------------------------------------
+
+    default_npes = npes
+    default_clump_pproc = clump_pproc
+    call write_to_log('start_test_decomp_init')
+
+    call write_to_log('test_check_nclumps')
+    call test_check_nclumps()
+    call write_to_log('test_decompInit_lnd_abort_on_bad_clump_pproc')
+    call test_decompInit_lnd_abort_on_bad_clump_pproc()
+    call write_to_log('test_decompInit_lnd_abort_on_too_big_clump_pproc')
+    call test_decompInit_lnd_abort_on_too_big_clump_pproc()
+    call write_to_log('test_decompInit_lnd_abort_when_npes_too_large')
+    call test_decompInit_lnd_abort_when_npes_too_large()
+    call write_to_log('test_decompInit_lnd_abort_on_too_small_nsegspc')
+    call test_decompInit_lnd_abort_on_too_small_nsegspc()
+    call write_to_log('test_decompInit_lnd_check_sizes')
+    call test_decompInit_lnd_check_sizes()
+    call write_to_log('test_decompInit_clump_gcell_info_correct')
+    call test_decompInit_clump_gcell_info_correct()
+    ! Comment out for now -- needs some work
+    !call write_to_log('test_decompMod_get_clump_bounds_correct')
+    !call test_decompMod_get_clump_bounds_correct()
+
+    !
+    ! Call the decompInit initialization series a last time so that decompMod data can still be used
+    !
+    !allocate( model_amask(ldomain%ni*ldomain%nj) )
+    !model_amask(:) = 1
+    !call decompInit_lnd( ldomain%ni, ldomain%nj, model_amask )
+    !call decompInit_clumps(ldomain%ni, ldomain%nj, glc_behavior)
+    !call decompInit_glcp(ldomain%ni, ldomain%nj, glc_behavior)
+    !deallocate( model_amask )
+
+  end subroutine test_decomp_init
+
+  !-----------------------------------------------------------------------
+  subroutine setup()
+     use clm_varctl, only : nsegspc
+
+     clump_pproc = default_clump_pproc
+     nsegspc = 20
+     npes = default_npes
+     amask(:) = 1 ! Set all to land
+
+  end subroutine setup
+
+  !-----------------------------------------------------------------------
+  subroutine test_decompInit_lnd_abort_on_bad_clump_pproc()
+     character(len=CX) :: expected_msg, actual_msg
+
+     if ( npes > 1 ) return   ! error checking testing only works seriallly
+     call setup()
+     call endrun_init( .true. )  ! Do not abort on endrun for self-tests
+     clump_pproc = 0
+     call write_to_log('decompInit_lnd with clump_pproc=0 should abort')
+     call decompInit_lnd( ni, nj, amask )
+     call write_to_log('check expected abort message')
+     expected_msg = 'clump_pproc must be greater than 0'
+     actual_msg = get_last_endrun_msg()
+     call endrun_init( .false. )   ! Turn back on to abort on the assert
+     call write_to_log('call assert_equal to check the abort message')
+     call assert_equal( &
+           expected=expected_msg, actual=actual_msg, &
+           msg='decompInit_lnd did not abort with clump_pproc=0' )
+     call clean()
+  end subroutine test_decompInit_lnd_abort_on_bad_clump_pproc
+
+  !-----------------------------------------------------------------------
+  subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc()
+     character(len=CX) :: expected_msg, actual_msg
+
+     if ( npes > 1 ) return   ! error checking testing only works seriallly
+     call setup()
+     call endrun_init( .true. )  ! Do not abort on endrun for self-tests
+     amask(:) = 1 ! Set all to land
+     clump_pproc = (ni * nj + 1) / npes
+     call write_to_log('decompInit_lnd with clump_pproc too large should abort')
+     call decompInit_lnd( ni, nj, amask )
+     call write_to_log('check expected abort message')
+     expected_msg = 'Number of clumps exceeds number of land grid cells'
+     actual_msg = get_last_endrun_msg()
+     call endrun_init( .false. )   ! Turn back on to abort on the assert
+     call write_to_log('call assert_equal to check the abort message')
+     call assert_equal( &
+           expected=expected_msg, actual=actual_msg, &
+           msg='decompInit_lnd did not abort with clump_pproc too large' )
+     call clean()
+  end subroutine test_decompInit_lnd_abort_on_too_big_clump_pproc
+
+  !-----------------------------------------------------------------------
+  subroutine test_decompInit_lnd_check_sizes()
+     use decompMod, only : get_proc_bounds
+     type(bounds_type) :: bounds
+
+     integer :: expected_endg, expected_numg
+
+     call setup()
+     expected_numg = ni*nj
+     if ( expected_numg < npes )then
+        call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ )
+     end if
+     if ( modulo( expected_numg, npes ) /= 0 )then
+        call endrun( msg="npes does not evenly divide into numg so this test will not work", file=sourcefile, line=__LINE__ )
+     end if
+     expected_endg = ni*nj / npes
+     amask(:) = 1 ! Set all to land
+     call decompInit_lnd( ni, nj, amask )
+     call get_proc_bounds(bounds, allow_errors=.true.)
+     call assert_equal( bounds%begg, 1, msg='begg is not as expected' )
+     call assert_equal( bounds%endg, expected_endg, msg='endg is not as expected' )
+     call clean()
+  end subroutine test_decompInit_lnd_check_sizes
+
+  !-----------------------------------------------------------------------
+  subroutine test_decompInit_lnd_abort_when_npes_too_large()
+     character(len=CX) :: expected_msg, actual_msg
+
+     if ( npes > 1 ) return   ! error checking testing only works seriallly
+     call setup()
+     ! NOTE: This is arbitrarily modifying the NPES value -- so it MUST be reset set the END!
+     npes = ni*nj + 1
+
+     call endrun_init( .true. )  ! Do not abort on endrun for self-tests
+     amask(:) = 1 ! Set all to land
+     call write_to_log('decompInit_lnd with npes too large should abort')
+     call decompInit_lnd( ni, nj, amask )
+     call write_to_log('check expected abort message')
+     expected_msg = 'Number of processes exceeds number of land grid cells'
+     actual_msg = get_last_endrun_msg()
+     call endrun_init( .false. )   ! Turn back on to abort on the assert
+     call write_to_log('call assert_equal to check the abort message')
+     call assert_equal( &
+           expected=expected_msg, actual=actual_msg, &
+           msg='decompInit_lnd did not abort with npes too large' )
+
+     ! NOTE: Return npes to its original value
+     npes = default_npes
+     call clean()
+  end subroutine test_decompInit_lnd_abort_when_npes_too_large
+
+  !-----------------------------------------------------------------------
+  subroutine test_decompInit_lnd_abort_on_too_small_nsegspc()
+     use clm_varctl, only : nsegspc
+     character(len=CX) :: expected_msg, actual_msg
+
+     if ( npes > 1 ) return   ! error checking testing only works seriallly
+     call setup()
+     call endrun_init( .true. )  ! Do not abort on endrun for self-tests
+     amask(:) = 1 ! Set all to land
+     nsegspc = 0
+     call write_to_log('decompInit_lnd with nsegspc too small should abort')
+     call decompInit_lnd( ni, nj, amask )
+     call write_to_log('check expected abort message')
+     expected_msg = 'Number of segments per clump (nsegspc) is less than 1 and can NOT be'
+     actual_msg = get_last_endrun_msg()
+     call endrun_init( .false. )   ! Turn back on to abort on the assert
+     call write_to_log('call assert_equal to check the abort message')
+     call assert_equal( &
+           expected=expected_msg, actual=actual_msg, &
+           msg='decompInit_lnd did not abort with too nsegspc too small' )
+     call clean()
+  end subroutine test_decompInit_lnd_abort_on_too_small_nsegspc
+
+  !-----------------------------------------------------------------------
+  subroutine test_check_nclumps()
+    integer :: expected_nclumps
+
+    call setup()
+    call endrun_init( .true. )  ! Do not abort on endrun for self-tests
+    expected_nclumps = npes / clump_pproc
+    call assert_equal(expected=expected_nclumps, actual=nclumps, &
+         msg='nclumps are not as expected')
+    call endrun_init( .false. )
+    call clean()
+  end subroutine test_check_nclumps
+
+!-----------------------------------------------------------------------
+  subroutine test_decompMod_get_clump_bounds_correct()
+    ! Some testing for get_clump_bounds
+    use decompMod, only : get_clump_bounds, bounds_type
+    use unittestSimpleSubgridSetupsMod, only :  setup_ncells_single_veg_patch
+    use unittestSubgridMod, only : unittest_subgrid_teardown
+    use pftconMod, only : noveg
+    type(bounds_type) :: bounds
+    integer :: expected_begg, expected_endg, expected_numg, gcell_per_task
+    integer :: iclump
+
+    call setup()
+     ! Now setup a singple grid that's just the full test with every point a single baresoil patch
+     call setup_ncells_single_veg_patch( ncells=ni*nj, pft_type=noveg )
+    clump_pproc = 1  ! Ensure we are just doing this for one clump per proc for now
+    expected_numg = ni*nj
+    if ( expected_numg < npes )then
+       call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ )
+    end if
+    if ( modulo( expected_numg, npes ) /= 0 )then
+       call endrun( msg="npes does not evenly divide into numg so this test will not work", file=sourcefile, line=__LINE__ )
+    end if
+    gcell_per_task = expected_numg / npes
+    expected_begg = gcell_per_task * iam + 1
+    expected_endg = expected_begg + gcell_per_task
+    amask(:) = 1 ! Set all to land
+    call decompInit_lnd( ni, nj, amask )
+    call decompInit_clumps( ni, nj, glc_behavior )
+    iclump = 1 ! Clump is just 1 since there's only one clump per task
+    call get_clump_bounds(iclump, bounds)
+    call assert_equal( bounds%begg, expected_begg, msg='begg is not as expected' )
+    call assert_equal( bounds%endg, expected_endg, msg='endg is not as expected' )
+    ! Other subgrtid level information will be the same -- since there's only one landunit, column, and patch per gridcell
+    call assert_equal( bounds%begl, expected_begg, msg='begl is not as expected' )
+    call assert_equal( bounds%endl, expected_endg, msg='endl is not as expected' )
+    call assert_equal( bounds%begc, expected_begg, msg='begc is not as expected' )
+    call assert_equal( bounds%endc, expected_endg, msg='endc is not as expected' )
+    call assert_equal( bounds%begp, expected_begg, msg='begp is not as expected' )
+    call assert_equal( bounds%endp, expected_endg, msg='endp is not as expected' )
+    call unittest_subgrid_teardown( )
+    call clean()
+  end subroutine test_decompMod_get_clump_bounds_correct
+
+  !-----------------------------------------------------------------------
+  subroutine test_decompInit_clump_gcell_info_correct()
+    ! Some testing for get_clump_bounds
+    use decompMod, only : clumps
+    use decompMod, only : get_proc_bounds
+    type(bounds_type) :: bounds
+    integer :: expected_gcells, iclump, g, beg_global_index, gcell_per_task
+    integer :: expected_begg, expected_endg, lc
+
+    call setup()
+    expected_gcells = ni*nj
+    if ( expected_gcells < npes )then
+       call endrun( msg="npes is too large for this test", file=sourcefile, line=__LINE__ )
+    end if
+    if ( modulo( expected_gcells, npes ) /= 0 )then
+       call endrun( msg="npes does not evenly divide into gcell so this test will not work", file=sourcefile, line=__LINE__ )
+    end if
+    gcell_per_task = expected_gcells / npes
+    expected_begg = gcell_per_task * iam + 1
+    expected_endg = expected_begg + gcell_per_task
+    amask(:) = 1 ! Set all to land
+    call decompInit_lnd( ni, nj, amask )
+    ! When clump_pproc is one clumps will be the same as PE
+    if ( clump_pproc == 1 ) then
+       call assert_equal( nclumps, npes, msg='nclumps should match number of processors when clump_pproc is 1' )
+    else
+       call assert_equal( nclumps/clump_pproc, npes, msg='nclumps divided by clump_pproc should match number of processors when clump_pproc > 1' )
+    end if
+    ! Just test over the local clumps
+    do lc = 1, clump_pproc
+       iclump = procinfo%cid(lc)
+       call assert_equal( clumps(iclump)%owner, iam, msg='clumps owner is not correct' )
+       call assert_equal( clumps(iclump)%ncells, gcell_per_task, msg='clumps ncells is not correct' )
+    end do
+    call clean()
+  end subroutine test_decompInit_clump_gcell_info_correct
+
+  !-----------------------------------------------------------------------
+  subroutine write_to_log(msg)
+    !
+    ! !DESCRIPTION:
+    ! Write a message to the log file, just from the masterproc
+    !
+    use shr_sys_mod, only : shr_sys_flush
+    ! !ARGUMENTS:
+    character(len=*), intent(in) :: msg
+    !
+    ! !LOCAL VARIABLES:
+
+    character(len=*), parameter :: subname = 'write_to_log'
+    !-----------------------------------------------------------------------
+
+    if (masterproc) then
+       write(iulog,'(a)') msg
+       call shr_sys_flush(iulog)  ! Flush the I/O buffers always
+    end if
+
+  end subroutine write_to_log
+
+  !-----------------------------------------------------------------------
+  subroutine clean
+    !
+    ! !DESCRIPTION:
+    ! Do end-of-testing cleanup after each test
+    !
+    ! !ARGUMENTS:
+    !
+    ! !LOCAL VARIABLES:
+    !-----------------------------------------------------------------------
+    call decompmod_clean()
+
+  end subroutine clean
+
+
+end module TestDecompInit
diff --git a/src/utils/ctsm_memcheck.F90 b/src/utils/ctsm_memcheck.F90
new file mode 100644
index 0000000000..e477c89678
--- /dev/null
+++ b/src/utils/ctsm_memcheck.F90
@@ -0,0 +1,32 @@
+module ctsm_memcheck
+
+    use shr_kind_mod, only: r8 => shr_kind_r8
+    use clm_varctl, only : iulog
+    use spmdMod, only : masterproc
+    implicit none
+    private
+
+    public :: memcheck
+contains
+
+    subroutine memcheck(msg)
+        use proc_status_vm, only : prt_vm_status, shr_malloc_trim
+        use shr_mem_mod, only : shr_mem_getusage
+        use shr_sys_mod, only : shr_sys_flush
+        character(len=*), intent(in) :: msg
+
+        real(r8) :: msize, mrss  ! Memory size and resident set size
+
+        call shr_malloc_trim()  ! Make sure the OS trims the memory in response to deallocates
+
+        ! Only output memory on main task as memory usage should be similar between tasks
+        if (masterproc) then
+           call prt_vm_status('CTSM(Memory check): ' // trim(msg))
+           call shr_mem_getusage( msize, mrss, prt=.true.)
+           write(iulog,*) '        msize, mrss = ',msize, mrss
+           call shr_sys_flush(iulog)
+        end if
+
+    end subroutine memcheck
+
+end module ctsm_memcheck
\ No newline at end of file