Skip to content

Commit f876258

Browse files
Region transfer all optimization, update vpicio_mts and bdcats_mts, enable IDIOMS in cmake, reduce printfs (#295)
* Improve performance for transfer all, reduce the log messages * Committing clang-format changes * Reduce the log warning messages * update * Committing clang-format changes * Update vpicio and bdcats mts version * Committing clang-format changes * Update cmakelist and test codes * Committing clang-format changes * Update cmakelist * Add bdcats_mts * Committing clang-format changes * remove unnecessary file * Conditional test with IDIOMS * Update vpic and bdcats _mts * Committing clang-format changes * Minor updates * Committing clang-format changes * Fix IDIOMS cmake --------- Co-authored-by: github-actions <github-actions[bot]@users.noreply.github.com>
1 parent 1da9e1a commit f876258

File tree

9 files changed

+454
-226
lines changed

9 files changed

+454
-226
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Ubuntu (cache,IDIOMS)
2+
3+
on:
4+
pull_request:
5+
branches: [ stable, develop ]
6+
7+
push:
8+
branches: [ stable, develop ]
9+
10+
workflow_dispatch:
11+
12+
jobs:
13+
PDC:
14+
runs-on: ubuntu-latest
15+
timeout-minutes: 60
16+
17+
steps:
18+
- uses: actions/checkout@v3
19+
20+
- name: Dependencies
21+
run: .github/workflows/dependencies-linux.sh
22+
23+
- name: Build PDC with IDIOMS
24+
run: |
25+
mkdir build && cd build
26+
cmake ../ -DBUILD_MPI_TESTING=ON -DBUILD_SHARED_LIBS=ON -DPDC_ENABLE_IDIOMS=ON -DPDC_SERVER_CACHE=ON -DBUILD_TESTING=ON -DPDC_ENABLE_MPI=ON -DCMAKE_C_COMPILER=mpicc -DCMAKE_POLICY_VERSION_MINIMUM=3.5
27+
make -j2
28+
29+
- name: Test PDC
30+
working-directory: build
31+
run: ctest -L serial --output-on-failure

CMakeLists.txt

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -328,12 +328,18 @@ if(PDC_USE_CRAY_DRC)
328328
endif()
329329

330330
#-----------------------------------------------------------------------------
331-
# DART Suffix Tree mode
331+
# IDIOMS Index
332332
#-----------------------------------------------------------------------------
333-
option(PDC_DART_SUFFIX_TREE_MODE "Enable DART Suffix Tree mode." ON)
334-
if(PDC_DART_SUFFIX_TREE_MODE)
335-
set(PDC_DART_SFX_TREE 1)
336-
# add_compile_definitions(PDC_DART_SFX_TREE=${PDC_DART_SFX_TREE})
333+
option(PDC_ENABLE_IDIOMS "Enable IDIOMS metadata." OFF)
334+
if(PDC_ENABLE_IDIOMS)
335+
set(PDC_ENABLE_IDIOMS 1)
336+
option(PDC_DART_SUFFIX_TREE_MODE "Enable DART Suffix Tree mode." ON)
337+
if(PDC_DART_SUFFIX_TREE_MODE)
338+
set(PDC_DART_SFX_TREE 1)
339+
# add_compile_definitions(PDC_DART_SFX_TREE=${PDC_DART_SFX_TREE})
340+
endif()
341+
342+
337343
endif()
338344

339345
#-----------------------------------------------------------------------------

src/api/pdc_client_connect.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,17 +1255,19 @@ PDC_Client_mercury_init(hg_class_t **hg_class, hg_context_t **hg_context, int po
12551255
#endif
12561256

12571257
if ((hg_transport = getenv("HG_TRANSPORT")) == NULL) {
1258-
LOG_INFO("Environment variable HG_TRANSPORT was NOT set\n");
12591258
hg_transport = default_hg_transport;
1259+
if (pdc_client_mpi_rank_g == 0)
1260+
LOG_INFO("Environment variable HG_TRANSPORT was NOT set, default to %s\n", default_hg_transport);
12601261
}
12611262
else
12621263
LOG_INFO("Environment variable HG_TRANSPORT was set\n");
12631264
if ((hostname = getenv("HG_HOST")) == NULL) {
1264-
LOG_INFO("Environment variable HG_HOST was NOT set\n");
12651265
hostname = PDC_malloc(HOSTNAME_LEN);
12661266
memset(hostname, 0, HOSTNAME_LEN);
12671267
gethostname(hostname, HOSTNAME_LEN - 1);
12681268
free_hostname = true;
1269+
if (pdc_client_mpi_rank_g == 0)
1270+
LOG_INFO("Environment variable HG_HOST was NOT set, default to %s\n", hostname);
12691271
}
12701272
else
12711273
LOG_INFO("Environment variable HG_HOST was set\n");

src/api/pdc_region/pdc_region_transfer.c

Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -855,51 +855,49 @@ register_metadata(pdc_transfer_request_start_all_pkg **transfer_request_input, i
855855
remain_size = input_size - size;
856856
output_size = 0;
857857

858-
index = 0;
859858
qsort(transfer_requests, size, sizeof(pdc_transfer_request_start_all_pkg *),
860859
sort_by_metadata_server_start_all);
861-
for (i = 1; i < size; ++i) {
862-
if (transfer_requests[i]->transfer_request->metadata_server_id !=
863-
transfer_requests[i - 1]->transfer_request->metadata_server_id) {
864-
n_objs = i - index;
865-
pack_region_metadata_query(transfer_requests + index, n_objs, &buf, &total_buf_size);
866-
PDC_Client_transfer_request_metadata_query(
867-
&bulk_handle, buf, total_buf_size, n_objs,
868-
transfer_requests[index]->transfer_request->metadata_server_id, is_write, &output_buf_size,
869-
&query_id);
870-
PDCregion_transfer_add_bulk_handle(transfer_requests[index]->transfer_request, bulk_handle);
871-
buf = (char *)PDC_free(buf);
872-
if (query_id) {
873-
output_buf = (char *)PDC_malloc(output_buf_size);
874-
PDC_Client_transfer_request_metadata_query2(
875-
&bulk_handle, output_buf, output_buf_size, query_id,
876-
transfer_requests[index]->transfer_request->metadata_server_id);
877-
PDCregion_transfer_add_bulk_handle(transfer_requests[index]->transfer_request, bulk_handle);
878-
unpack_region_metadata_query(output_buf, transfer_requests + index, &transfer_request_head,
879-
&transfer_request_end, &output_size);
880-
output_buf = (char *)PDC_free(output_buf);
881-
if (transfer_request_front_head) {
882-
previous->next = transfer_request_head;
883-
}
884-
else {
885-
transfer_request_front_head = transfer_request_head;
886-
}
887-
previous = transfer_request_end;
888-
}
889-
index = i;
860+
861+
// Each iteration finds the first transfer that has a target meta server different from the previous one
862+
// index is the first transfer index
863+
int current_unique_idx = 0;
864+
int *unique_server_xfer_idx = NULL;
865+
int *unique_server_nboj = NULL;
866+
if (size > 0) {
867+
unique_server_xfer_idx = (int *)PDC_calloc(size, sizeof(int));
868+
unique_server_nboj = (int *)PDC_calloc(size, sizeof(int));
869+
}
870+
871+
// Iterate through the input array
872+
for (i = 0; i < size; ++i) {
873+
if (i == 0 || transfer_requests[i]->transfer_request->metadata_server_id !=
874+
transfer_requests[i - 1]->transfer_request->metadata_server_id) {
875+
// Check if the current element is different from the previous one
876+
// or if it's the first element
877+
unique_server_xfer_idx[current_unique_idx] = i;
878+
unique_server_nboj[current_unique_idx] = 1;
879+
880+
current_unique_idx++;
881+
}
882+
else {
883+
unique_server_nboj[current_unique_idx - 1]++;
890884
}
891885
}
886+
int num_unique_server_ids = current_unique_idx;
887+
888+
// Now we will try to distribute the metadata requests to different servers across clients
889+
for (i = 0; i < num_unique_server_ids; i++) {
890+
int current_index = (pdc_client_mpi_rank_g + i) % num_unique_server_ids;
891+
index = unique_server_xfer_idx[current_index];
892+
n_objs = unique_server_nboj[current_index];
892893

893-
if (size) {
894-
n_objs = size - index;
895894
pack_region_metadata_query(transfer_requests + index, n_objs, &buf, &total_buf_size);
896895
PDC_Client_transfer_request_metadata_query(
897896
&bulk_handle, buf, total_buf_size, n_objs,
898897
transfer_requests[index]->transfer_request->metadata_server_id, is_write, &output_buf_size,
899898
&query_id);
900899
PDCregion_transfer_add_bulk_handle(transfer_requests[index]->transfer_request, bulk_handle);
901900
buf = (char *)PDC_free(buf);
902-
// If it is a valid query ID, then it means regions are overlapping.
903901
if (query_id) {
904902
output_buf = (char *)PDC_malloc(output_buf_size);
905903
PDC_Client_transfer_request_metadata_query2(
@@ -909,16 +907,21 @@ register_metadata(pdc_transfer_request_start_all_pkg **transfer_request_input, i
909907
unpack_region_metadata_query(output_buf, transfer_requests + index, &transfer_request_head,
910908
&transfer_request_end, &output_size);
911909
output_buf = (char *)PDC_free(output_buf);
912-
if (transfer_request_front_head) {
910+
911+
if (transfer_request_front_head)
913912
previous->next = transfer_request_head;
914-
}
915-
else {
913+
else
916914
transfer_request_front_head = transfer_request_head;
917-
}
915+
918916
previous = transfer_request_end;
919917
}
920918
}
921919

920+
if (unique_server_xfer_idx)
921+
free(unique_server_xfer_idx);
922+
if (unique_server_nboj)
923+
free(unique_server_nboj);
924+
922925
if (output_size) {
923926
transfer_request_output = (pdc_transfer_request_start_all_pkg **)PDC_malloc(
924927
sizeof(pdc_transfer_request_start_all_pkg *) * (output_size + remain_size));

src/server/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ if(PDC_ENABLE_ZFP)
2323
# find_path(ZFP_INCLUDE_DIR include/zfp.h)
2424
endif()
2525

26+
if(PDC_ENABLE_IDIOMS)
27+
add_definitions(-DPDC_ENABLE_IDIOMS=1)
28+
endif()
2629

2730
include_directories(
2831
${PDC_COMMON_INCLUDE_DIRS}

src/server/pdc_server.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -774,17 +774,19 @@ PDC_Server_init(int port, hg_class_t **hg_class, hg_context_t **hg_context)
774774
total_mem_usage_g += (sizeof(char) + sizeof(char *));
775775

776776
if ((hg_transport = getenv("HG_TRANSPORT")) == NULL) {
777-
LOG_INFO("Environment variable HG_TRANSPORT was NOT set\n");
778777
hg_transport = default_hg_transport;
778+
if (pdc_server_rank_g == 0)
779+
LOG_INFO("Environment variable HG_TRANSPORT was NOT set, default to %s\n", hg_transport);
779780
}
780781
else
781782
LOG_INFO("Environment variable HG_TRANSPORT was set\n");
782783
if ((hostname = getenv("HG_HOST")) == NULL) {
783-
LOG_INFO("Environment variable HG_HOST was NOT set\n");
784784
hostname = PDC_malloc(HOSTNAME_LEN);
785785
memset(hostname, 0, HOSTNAME_LEN);
786786
gethostname(hostname, HOSTNAME_LEN - 1);
787787
free_hostname = true;
788+
if (pdc_server_rank_g == 0)
789+
LOG_INFO("Environment variable HG_HOST was NOT set, default to %s\n", hostname);
788790
}
789791
else
790792
LOG_INFO("Environment variable HG_HOST was set\n");
@@ -922,8 +924,12 @@ PDC_Server_init(int port, hg_class_t **hg_class, hg_context_t **hg_context)
922924
LOG_INFO("Read cache enabled\n");
923925
#endif
924926

927+
#ifdef PDC_ENABLE_IDIOMS
925928
// Initialize IDIOMS
929+
if (pdc_server_rank_g == 0)
930+
LOG_INFO("IDIOMS index enabled\n");
926931
PDC_Server_metadata_index_init(pdc_server_size_g, pdc_server_rank_g);
932+
#endif
927933

928934
// TODO: support restart with different number of servers than previous run
929935
char checkpoint_file[ADDR_MAX + sizeof(int) + 1];
@@ -934,7 +940,9 @@ PDC_Server_init(int port, hg_class_t **hg_class, hg_context_t **hg_context)
934940
ret_value = PDC_Server_restart(checkpoint_file);
935941
if (ret_value != SUCCEED)
936942
PGOTO_ERROR(FAIL, "Error with PDC_Server_restart");
943+
#ifdef PDC_ENABLE_IDIOMS
937944
metadata_index_recover(pdc_server_tmp_dir_g, pdc_server_size_g, pdc_server_rank_g);
945+
#endif
938946
}
939947
else {
940948
// We are starting a brand new server
@@ -1364,7 +1372,9 @@ PDC_Server_checkpoint()
13641372
LOG_INFO("Checkpointed %10d objects, with %10d regions \n", all_metadata_size, all_region_count);
13651373
}
13661374

1375+
#ifdef PDC_ENABLE_IDIOMS
13671376
metadata_index_dump(pdc_server_tmp_dir_g, pdc_server_rank_g);
1377+
#endif
13681378

13691379
done:
13701380
FUNC_LEAVE(ret_value);
@@ -1417,11 +1427,6 @@ PDC_Server_restart(char *filename)
14171427
if (file == NULL)
14181428
PGOTO_ERROR(FAIL, "Error with fopen, filename: [%s]", filename);
14191429

1420-
char *slurm_jobid = getenv("SLURM_JOB_ID");
1421-
if (slurm_jobid == NULL) {
1422-
LOG_ERROR("Error getting slurm job id from SLURM_JOB_ID\n");
1423-
}
1424-
14251430
if (fread(&n_cont, sizeof(int), 1, file) != 1) {
14261431
LOG_ERROR("Read failed for n_count\n");
14271432
}
@@ -1971,8 +1976,10 @@ PDC_Server_mercury_register()
19711976
PDC_region_analysis_release_register(hg_class_g);
19721977

19731978
// DART Index
1979+
#ifdef PDC_ENABLE_IDIOMS
19741980
PDC_dart_get_server_info_register(hg_class_g);
19751981
PDC_dart_perform_one_server_register(hg_class_g);
1982+
#endif
19761983

19771984
// Server to client RPC
19781985
server_lookup_client_register_id_g = PDC_server_lookup_client_register(hg_class_g);

src/tests/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ set(PROGRAMS
8080
misc/vpicio
8181
misc/vpicio_mts
8282
misc/bdcats
83+
misc/bdcats_mts
8384
deprecated/vpicio_old
8485
deprecated/bdcats_old
8586
tags/kvtag_add_get
@@ -139,9 +140,14 @@ if(BUILD_MPI_TESTING)
139140
tags/kvtag_affix_query_scale
140141
tags/kvtag_add_get_benchmark
141142
tags/kvtag_add_get_scale
143+
)
144+
145+
if(PDC_ENABLE_IDIOMS)
146+
list(APPEND MPI_PROGRAMS
142147
misc/llsm_idioms_bench
143148
misc/idioms_ci_test
144149
)
150+
endif()
145151

146152
foreach(program ${MPI_PROGRAMS})
147153
get_filename_component(program_name ${program} NAME)
@@ -261,7 +267,9 @@ add_test(NAME obj_buf WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTO
261267
add_test(NAME obj_tags WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND run_test.sh ./obj_tags )
262268
add_test(NAME kvtag_add_get WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND run_test.sh ./kvtag_add_get)
263269
add_test(NAME kvtag_query WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND run_test.sh ./kvtag_query 100 1 10 0)
270+
if(PDC_ENABLE_IDIOMS)
264271
add_test(NAME idioms_ci_test WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND run_test.sh ./idioms_ci_test)
272+
endif()
265273
add_test(NAME obj_info WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND run_test.sh ./obj_info )
266274
add_test(NAME obj_put_data WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND run_test.sh ./obj_put_data )
267275
add_test(NAME obj_get_data WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY} COMMAND run_test.sh ./obj_get_data )
@@ -332,7 +340,9 @@ set_tests_properties(obj_buf PROPERTIES LABELS serial)
332340
set_tests_properties(obj_tags PROPERTIES LABELS serial)
333341
set_tests_properties(kvtag_add_get PROPERTIES LABELS serial)
334342
set_tests_properties(kvtag_query PROPERTIES LABELS serial)
343+
if(PDC_ENABLE_IDIOMS)
335344
set_tests_properties(idioms_ci_test PROPERTIES LABELS serial)
345+
endif()
336346
set_tests_properties(obj_info PROPERTIES LABELS serial)
337347
set_tests_properties(obj_put_data PROPERTIES LABELS serial)
338348
set_tests_properties(obj_get_data PROPERTIES LABELS serial)

0 commit comments

Comments
 (0)