2525 * Copyright (c) 2020-2021 Google, LLC. All rights reserved.
2626 * Copyright (c) 2019-2021 Triad National Security, LLC. All rights
2727 * reserved.
28+ * Copyright (c) 2025 Stony Brook University. All rights reserved.
2829 * $COPYRIGHT$
2930 *
3031 * Additional copyrights may follow
6970#include "ompi/mca/bml/base/base.h"
7071#include "ompi/mca/mtl/base/base.h"
7172
73+ static int ompi_osc_rdma_shared_query (struct ompi_win_t * win , int rank , size_t * size ,
74+ ptrdiff_t * disp_unit , void * baseptr );
7275static int ompi_osc_rdma_component_register (void );
7376static int ompi_osc_rdma_component_init (bool enable_progress_threads , bool enable_mpi_threads );
7477static int ompi_osc_rdma_component_finalize (void );
@@ -113,6 +116,7 @@ ompi_osc_rdma_component_t mca_osc_rdma_component = {
113116MCA_BASE_COMPONENT_INIT (ompi , osc , rdma )
114117
115118ompi_osc_base_module_t ompi_osc_rdma_module_rdma_template = {
119+ .osc_win_shared_query = ompi_osc_rdma_shared_query ,
116120 .osc_win_attach = ompi_osc_rdma_attach ,
117121 .osc_win_detach = ompi_osc_rdma_detach ,
118122 .osc_free = ompi_osc_rdma_free ,
@@ -527,6 +531,7 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s
527531 module -> my_peer = my_peer ;
528532 module -> free_after = module -> rank_array ;
529533 my_peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE ;
534+ my_peer -> flags |= OMPI_OSC_RDMA_PEER_CPU_ATOMICS ;
530535 my_peer -> state = (uint64_t ) (uintptr_t ) module -> state ;
531536
532537 if (use_cpu_atomics ) {
@@ -636,7 +641,6 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
636641 /* ensure proper alignment */
637642 if (MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
638643 data_base += OPAL_ALIGN_PAD_AMOUNT (data_base , memory_alignment );
639- size += OPAL_ALIGN_PAD_AMOUNT (size , memory_alignment );
640644 }
641645
642646 do {
@@ -836,6 +840,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
836840 if (MPI_WIN_FLAVOR_DYNAMIC == module -> flavor ) {
837841 if (use_cpu_atomics && peer_rank == my_rank ) {
838842 peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE ;
843+ peer -> flags |= OMPI_OSC_RDMA_PEER_CPU_ATOMICS ;
839844 }
840845 /* nothing more to do */
841846 continue ;
@@ -850,7 +855,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
850855 ex_peer -> size = temp [i ].size ;
851856 }
852857
853- if (use_cpu_atomics && ( MPI_WIN_FLAVOR_ALLOCATE == module -> flavor || peer_rank == my_rank ) ) {
858+ if (MPI_WIN_FLAVOR_ALLOCATE == module -> flavor || peer_rank == my_rank ) {
854859 /* base is local and cpu atomics are available */
855860 if (MPI_WIN_FLAVOR_ALLOCATE == module -> flavor ) {
856861 ex_peer -> super .base = (uintptr_t ) module -> segment_base + offset ;
@@ -859,7 +864,11 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s
859864 }
860865
861866 peer -> flags |= OMPI_OSC_RDMA_PEER_LOCAL_BASE ;
867+ if (use_cpu_atomics ) {
868+ peer -> flags |= OMPI_OSC_RDMA_PEER_CPU_ATOMICS ;
869+ }
862870 offset += temp [i ].size ;
871+ offset += OPAL_ALIGN_PAD_AMOUNT (offset , memory_alignment );
863872 } else {
864873 ex_peer -> super .base = peer_region -> base ;
865874
@@ -898,7 +907,7 @@ static void ompi_osc_rdma_ensure_local_add_procs (void)
898907 /* this will cause add_proc to get called if it has not already been called */
899908 (void ) mca_bml_base_get_endpoint (proc );
900909 }
901- }
910+ }
902911
903912 free (procs );
904913}
@@ -1632,3 +1641,60 @@ ompi_osc_rdma_set_no_lock_info(opal_infosubscriber_t *obj, const char *key, cons
16321641 */
16331642 return module -> no_locks ? "true" : "false" ;
16341643}
1644+
1645+ int ompi_osc_rdma_shared_query (
1646+ struct ompi_win_t * win , int rank , size_t * size ,
1647+ ptrdiff_t * disp_unit , void * baseptr )
1648+ {
1649+ int rc = OMPI_ERR_NOT_SUPPORTED ;
1650+ ompi_osc_rdma_peer_t * peer ;
1651+ int actual_rank = rank ;
1652+ ompi_osc_rdma_module_t * module = GET_MODULE (win );
1653+
1654+ peer = ompi_osc_module_get_peer (module , actual_rank );
1655+ if (NULL == peer ) {
1656+ return OMPI_ERR_NOT_SUPPORTED ;
1657+ }
1658+
1659+ /* currently only supported for allocated windows */
1660+ if (MPI_WIN_FLAVOR_ALLOCATE != module -> flavor ) {
1661+ return OMPI_ERR_NOT_SUPPORTED ;
1662+ }
1663+
1664+ if (!ompi_osc_rdma_peer_local_base (peer )) {
1665+ return OMPI_ERR_NOT_SUPPORTED ;
1666+ }
1667+
1668+ if (MPI_PROC_NULL == rank ) {
1669+ /* iterate until we find a rank that has a non-zero size */
1670+ for (int i = 0 ; i < ompi_comm_size (module -> comm ) ; ++ i ) {
1671+ peer = ompi_osc_module_get_peer (module , i );
1672+ ompi_osc_rdma_peer_extended_t * ex_peer = (ompi_osc_rdma_peer_extended_t * ) peer ;
1673+ if (!ompi_osc_rdma_peer_local_base (peer )) {
1674+ continue ;
1675+ } else if (module -> same_size && ex_peer -> super .base ) {
1676+ break ;
1677+ } else if (ex_peer -> size > 0 ) {
1678+ break ;
1679+ }
1680+ }
1681+ }
1682+
1683+ if (module -> same_size && module -> same_disp_unit ) {
1684+ * size = module -> size ;
1685+ * disp_unit = module -> disp_unit ;
1686+ ompi_osc_rdma_peer_basic_t * ex_peer = (ompi_osc_rdma_peer_basic_t * ) peer ;
1687+ * ((void * * ) baseptr ) = (void * ) (intptr_t )ex_peer -> base ;
1688+ rc = OMPI_SUCCESS ;
1689+ } else {
1690+ ompi_osc_rdma_peer_extended_t * ex_peer = (ompi_osc_rdma_peer_extended_t * ) peer ;
1691+ if (ex_peer -> super .base != 0 ) {
1692+ /* we know the base of the peer */
1693+ * ((void * * ) baseptr ) = (void * ) (intptr_t )ex_peer -> super .base ;
1694+ * size = ex_peer -> size ;
1695+ * disp_unit = ex_peer -> disp_unit ;
1696+ rc = OMPI_SUCCESS ;
1697+ }
1698+ }
1699+ return rc ;
1700+ }
0 commit comments