From 8d614200747bfbc398bb0efb8c86e784e12162ef Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 10 May 2017 08:13:53 +0200 Subject: [PATCH 1/4] Intermediate commit, likely broken --- dash/include/dash/algorithm/Transform.h | 66 +++++++++++++++++++++---- dash/test/algorithm/TransformTest.cc | 27 ++++++++++ 2 files changed, 83 insertions(+), 10 deletions(-) diff --git a/dash/include/dash/algorithm/Transform.h b/dash/include/dash/algorithm/Transform.h index 4090082e2..edf52d30d 100644 --- a/dash/include/dash/algorithm/Transform.h +++ b/dash/include/dash/algorithm/Transform.h @@ -48,8 +48,33 @@ inline dart_ret_t transform_blocking_impl( return result; } +/** + * Wrapper of the non-blocking DART accumulate operation with local completion. + * Allows re-use of \c values pointer after the call returns. + */ +template< typename ValueType > +dart_ret_t transform_local_blocking_impl( + dart_gptr_t dest, + ValueType * values, + size_t nvalues, + dart_operation_t op) +{ + static_assert(dash::dart_datatype::value != DART_TYPE_UNDEFINED, + "Cannot accumulate unknown type!"); + + dart_ret_t result = dart_accumulate( + dest, + reinterpret_cast(values), + nvalues, + dash::dart_datatype::value, + op); + dart_flush_local(dest); + return result; +} + /** * Wrapper of the non-blocking DART accumulate operation. + * The pointer \c values should not be re-used before the operation completed. */ template< typename ValueType > dart_ret_t transform_impl( @@ -67,7 +92,6 @@ dart_ret_t transform_impl( nvalues, dash::dart_datatype::value, op); - dart_flush_local(dest); return result; } @@ -272,8 +296,8 @@ GlobOutputIt transform( { DASH_LOG_DEBUG("dash::transform(af, al, bf, outf, binop)"); // Outut range different from rhs input range is not supported yet - auto in_first = in_a_first; - auto in_last = in_a_last; + ValueType* in_first = &(*in_a_first); + ValueType* in_last = &(*in_a_last); std::vector in_range; if (in_b_first == out_first) { // Output range is rhs input range: C += A @@ -281,9 +305,11 @@ GlobOutputIt transform( } else { // Output range different from rhs input range: C = A+B // Input is (in_a_first, in_a_last) + (in_b_first, in_b_last): + auto lpos = in_b_first.lpos(); std::transform( in_a_first, in_a_last, - in_b_first, + in_b_first.globmem().lbegin() + + (lpos.unit == dash::myid()) ? lpos.index : 0, std::back_inserter(in_range), binary_op); in_first = in_range.data(); @@ -292,19 +318,39 @@ GlobOutputIt transform( dash::util::Trace trace("transform"); - // Resolve local range from global range: // Number of elements in local range: size_t num_local_elements = std::distance(in_first, in_last); // Global iterator to dart_gptr_t: dart_gptr_t dest_gptr = out_first.dart_gptr(); // Send accumulate message: - trace.enter_state("transform_blocking"); - dash::internal::transform_blocking_impl( + auto &pattern = out_first.pattern(); + auto &team = pattern.team(); + size_t towrite = num_local_elements; + while (towrite > 0) { + auto lpos = out_first.lpos(); + size_t lsize = pattern.local_size(lpos.unit); + size_t num_values = std::min(lsize - lpos.index, towrite); + std::cout << dash::myid() << ": lpos={" << lpos.index << ", " << lpos.unit << "}; num_values=" << num_values << " lsize=" << lsize << std::endl; + dart_gptr_t dest_gptr = out_first.dart_gptr(); + dash::internal::transform_impl( dest_gptr, in_first, - num_local_elements, + num_values, binary_op.dart_operation()); - trace.exit_state("transform_blocking"); + out_first += num_values; + towrite -= num_values; + } + + dart_flush_all(out_first.dart_gptr()); + + +// trace.enter_state("transform_blocking"); +// dash::internal::transform_blocking_impl( +// dest_gptr, +// in_first, +// num_local_elements, +// binary_op.dart_operation()); +// trace.exit_state("transform_blocking"); // The position past the last element transformed in global element space // cannot be resolved from the size of the local range if the local range // spans over more than one block. Otherwise, the difference of two global @@ -320,7 +366,7 @@ GlobOutputIt transform( // For ranges over block borders, we would have to resolve the global // position past the last element transformed from the iterator's pattern // (see dash::PatternIterator). - return out_first + num_local_elements; + return out_first; } /** diff --git a/dash/test/algorithm/TransformTest.cc b/dash/test/algorithm/TransformTest.cc index 4a9d6e4c9..5cefd721a 100644 --- a/dash/test/algorithm/TransformTest.cc +++ b/dash/test/algorithm/TransformTest.cc @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include @@ -221,3 +223,28 @@ TEST_F(TransformTest, MatrixGlobalPlusGlobalBlocking) EXPECT_EQ_U(first_l_block_a_begin, first_l_block_a_offsets); } + + +TEST_F(TransformTest, LocalIteratorInput) +{ + using value_t = int; + std::vector local_v(100); + std::fill(local_v.begin(), local_v.end(), (value_t)dash::myid()); + dash::Array global_v(local_v.size() + 1); + dash::fill(global_v.begin(), global_v.end(), 0.0); + // start from the second element + auto it = dash::transform( + local_v.begin(), + local_v.end(), + global_v.begin() + 1, + global_v.begin() + 1, + dash::max() + ); + + ASSERT_EQ_U(it, global_v.end() - 1); + + dash::for_each(global_v.begin() + 1, global_v.end(), + [](value_t val){ ASSERT_EQ_U(val, dash::size() - 1); }); + + global_v.barrier(); +} From 1b431e0c2b923b626d6a976d4800dbe8da5acaac Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 10 May 2017 14:44:23 +0200 Subject: [PATCH 2/4] Implement global-to-global dash::copy --- dash/include/dash/algorithm/Copy.h | 22 +++++++++++++---- dash/test/algorithm/CopyTest.cc | 38 ++++++++++++++++++++++++++++++ dash/test/container/MatrixTest.cc | 7 +++--- 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/dash/include/dash/algorithm/Copy.h b/dash/include/dash/algorithm/Copy.h index 368d58ed9..132a1cf5c 100644 --- a/dash/include/dash/algorithm/Copy.h +++ b/dash/include/dash/algorithm/Copy.h @@ -1186,7 +1186,6 @@ copy_async( * \ingroup DashAlgorithms */ template < - typename ValueType, class GlobInputIt, class GlobOutputIt > GlobOutputIt copy( @@ -1196,11 +1195,24 @@ GlobOutputIt copy( { DASH_LOG_TRACE("dash::copy()", "blocking, global to global"); - // TODO: - // - Implement adapter for local-to-global dash::copy here - // - Return if global input range has no local sub-range + auto num_elements = dash::distance(in_first, in_last); + + auto li_range_in = local_index_range(in_first, in_last); + auto num_local_elem = li_range_in.end - li_range_in.begin; + auto pattern = in_first.pattern(); + + auto local_in_first = in_first + pattern.global(li_range_in.begin); + auto local_in_last = in_first + pattern.global(li_range_in.end - 1); + + // copy our local portion into the global output range + if (num_elements > 0) { + dash::copy( + local_in_first.local(), + local_in_last.local() + 1, + out_first + pattern.global(li_range_in.begin)); + } - return GlobOutputIt(); + return (out_first + num_elements); } #endif // DOXYGEN diff --git a/dash/test/algorithm/CopyTest.cc b/dash/test/algorithm/CopyTest.cc index c27b60f70..c5cb5c2ed 100644 --- a/dash/test/algorithm/CopyTest.cc +++ b/dash/test/algorithm/CopyTest.cc @@ -5,6 +5,9 @@ #include #include +#include +#include +#include #include #include #include @@ -803,6 +806,41 @@ TEST_F(CopyTest, AsyncGlobalToLocalBlock) } } + +TEST_F(CopyTest, GlobalToGlobal) +{ + using value_t = int; + constexpr int elem_per_unit = 100; + dash::Array source(dash::size() * elem_per_unit); + dash::Array target(dash::size() * elem_per_unit); + + dash::fill(target.begin(), target.end(), 0); + dash::generate_with_index(source.begin(), source.end(), + [](size_t idx) { + return dash::myid() * 1000 + idx; + } + ); + + source.barrier(); + + + // copy the first local range with an offset + dash::copy(source.begin(), source.end() + 1, target.begin()); + + dash::for_each_with_index(source.begin(), source.end(), + [](value_t val, size_t idx) { + ASSERT_EQ_U(val, dash::myid() * 1000 + idx); + } + ); + + dash::for_each_with_index(target.begin() + 1, target.end(), + [](value_t val, size_t idx) { + ASSERT_EQ_U(val, dash::myid() * 1000 + idx); + } + ); + +} + #if 0 // TODO TEST_F(CopyTest, AsyncAllToLocalVector) diff --git a/dash/test/container/MatrixTest.cc b/dash/test/container/MatrixTest.cc index 10f609986..043e5c32c 100644 --- a/dash/test/container/MatrixTest.cc +++ b/dash/test/container/MatrixTest.cc @@ -736,10 +736,11 @@ TEST_F(MatrixTest, BlockCopy) LOG_MESSAGE("Team barrier passed"); // Copy block 1 of matrix_a to block 0 of matrix_b: - dash::copy(matrix_a.block(1).begin(), - matrix_a.block(1).end(), - matrix_b.block(0).begin()); + dash::copy(matrix_a.block(1).begin(), + matrix_a.block(1).end(), + matrix_b.block(0).begin()); + // TODO: Add checks here! LOG_MESSAGE("Wait for team barrier ..."); dash::barrier(); LOG_MESSAGE("Team barrier passed"); From b0b07b3d6a60e700d9311bf7053900ab8807b642 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 10 May 2017 14:46:07 +0200 Subject: [PATCH 3/4] Honor distribution in dash::transform --- dash/include/dash/algorithm/Transform.h | 44 ++++++++++++++----------- dash/test/algorithm/TransformTest.cc | 19 +++++++++-- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/dash/include/dash/algorithm/Transform.h b/dash/include/dash/algorithm/Transform.h index edf52d30d..f5364557c 100644 --- a/dash/include/dash/algorithm/Transform.h +++ b/dash/include/dash/algorithm/Transform.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -295,52 +296,55 @@ GlobOutputIt transform( BinaryOperation binary_op) { DASH_LOG_DEBUG("dash::transform(af, al, bf, outf, binop)"); + auto &pattern = out_first.pattern(); // Outut range different from rhs input range is not supported yet ValueType* in_first = &(*in_a_first); ValueType* in_last = &(*in_a_last); - std::vector in_range; + // Number of elements in local range: + size_t num_local_elements = std::distance(in_first, in_last); + auto out_last = out_first + num_local_elements; + if (out_last.gpos() > pattern.size()) { + DASH_THROW(dash::exception::OutOfRange, + "Too many input elements in dash::transform"); + } if (in_b_first == out_first) { // Output range is rhs input range: C += A // Input is (in_a_first, in_a_last). } else { // Output range different from rhs input range: C = A+B // Input is (in_a_first, in_a_last) + (in_b_first, in_b_last): - auto lpos = in_b_first.lpos(); - std::transform( - in_a_first, in_a_last, - in_b_first.globmem().lbegin() - + (lpos.unit == dash::myid()) ? lpos.index : 0, - std::back_inserter(in_range), - binary_op); - in_first = in_range.data(); - in_last = in_first + in_range.size(); + dash::copy( + in_b_first, + in_b_first + std::distance(in_a_first, in_a_last), + out_first); } dash::util::Trace trace("transform"); - // Number of elements in local range: - size_t num_local_elements = std::distance(in_first, in_last); // Global iterator to dart_gptr_t: dart_gptr_t dest_gptr = out_first.dart_gptr(); // Send accumulate message: - auto &pattern = out_first.pattern(); auto &team = pattern.team(); size_t towrite = num_local_elements; + auto out_it = out_first; + auto in_it = in_first; while (towrite > 0) { - auto lpos = out_first.lpos(); + auto lpos = out_it.lpos(); size_t lsize = pattern.local_size(lpos.unit); size_t num_values = std::min(lsize - lpos.index, towrite); - std::cout << dash::myid() << ": lpos={" << lpos.index << ", " << lpos.unit << "}; num_values=" << num_values << " lsize=" << lsize << std::endl; - dart_gptr_t dest_gptr = out_first.dart_gptr(); + dart_gptr_t dest_gptr = out_it.dart_gptr(); + // use non-blocking transform and wait for all at the end dash::internal::transform_impl( dest_gptr, - in_first, + in_it, num_values, binary_op.dart_operation()); - out_first += num_values; - towrite -= num_values; + out_it += num_values; + in_it += num_values; + towrite -= num_values; } +// out_first.team().barrier(); dart_flush_all(out_first.dart_gptr()); @@ -366,7 +370,7 @@ GlobOutputIt transform( // For ranges over block borders, we would have to resolve the global // position past the last element transformed from the iterator's pattern // (see dash::PatternIterator). - return out_first; + return out_it; } /** diff --git a/dash/test/algorithm/TransformTest.cc b/dash/test/algorithm/TransformTest.cc index 5cefd721a..85370ec0c 100644 --- a/dash/test/algorithm/TransformTest.cc +++ b/dash/test/algorithm/TransformTest.cc @@ -229,9 +229,15 @@ TEST_F(TransformTest, LocalIteratorInput) { using value_t = int; std::vector local_v(100); + size_t idx = 0; std::fill(local_v.begin(), local_v.end(), (value_t)dash::myid()); + for (auto& elem : local_v) { + elem = dash::myid() * 1000 + idx; + idx++; + } dash::Array global_v(local_v.size() + 1); dash::fill(global_v.begin(), global_v.end(), 0.0); + global_v.barrier(); // start from the second element auto it = dash::transform( local_v.begin(), @@ -241,10 +247,17 @@ TEST_F(TransformTest, LocalIteratorInput) dash::max() ); - ASSERT_EQ_U(it, global_v.end() - 1); + global_v.barrier(); + + ASSERT_EQ_U(it, global_v.end()); + +// size_t idx = 0; - dash::for_each(global_v.begin() + 1, global_v.end(), - [](value_t val){ ASSERT_EQ_U(val, dash::size() - 1); }); + dash::for_each_with_index(global_v.begin() + 1, global_v.end(), + [](value_t val, size_t idx){ + ASSERT_EQ_U(val, (dash::size() - 1) * 1000 + (idx - 1)); + ++idx; + }); global_v.barrier(); } From 7fa405e0d65f6a79e87215cdd12b4a3cd19df505 Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Wed, 10 May 2017 16:58:54 +0200 Subject: [PATCH 4/4] Make g2g dash::copy work with matrix blocks and adapt test --- dash/include/dash/algorithm/Copy.h | 34 +++++++++++++++---- dash/include/dash/iterator/GlobViewIter.h | 4 +-- dash/test/algorithm/CopyTest.cc | 25 ++++++++++---- dash/test/container/MatrixTest.cc | 40 ++++++++++++++--------- 4 files changed, 74 insertions(+), 29 deletions(-) diff --git a/dash/include/dash/algorithm/Copy.h b/dash/include/dash/algorithm/Copy.h index 132a1cf5c..87e5f4732 100644 --- a/dash/include/dash/algorithm/Copy.h +++ b/dash/include/dash/algorithm/Copy.h @@ -1195,22 +1195,44 @@ GlobOutputIt copy( { DASH_LOG_TRACE("dash::copy()", "blocking, global to global"); - auto num_elements = dash::distance(in_first, in_last); + DASH_LOG_TRACE_VAR("dash::copy()", in_first); + DASH_LOG_TRACE_VAR("dash::copy()", in_last); + DASH_LOG_TRACE_VAR("dash::copy()", out_first); + auto num_elements = dash::distance(in_first, in_last); auto li_range_in = local_index_range(in_first, in_last); auto num_local_elem = li_range_in.end - li_range_in.begin; - auto pattern = in_first.pattern(); + DASH_LOG_TRACE_VAR("dash::copy()", num_elements); + DASH_LOG_TRACE_VAR("dash::copy()", num_local_elem); - auto local_in_first = in_first + pattern.global(li_range_in.begin); - auto local_in_last = in_first + pattern.global(li_range_in.end - 1); // copy our local portion into the global output range - if (num_elements > 0) { + if (num_local_elem > 0) { + auto pattern = in_first.pattern(); + // the distance from the first local element to the in_first iterator + auto in_offset = pattern.global(li_range_in.begin) + - in_first.global().gpos(); + + // the first local element + auto local_in_first = in_first + in_offset; + // the last local element + auto local_in_last = in_first + (num_local_elem + in_offset - 1); + auto local_out_first = out_first + in_offset; + + DASH_LOG_TRACE("Copying from range \n [", + pattern.global(li_range_in.begin), ", ", + pattern.global(li_range_in.end - 1), "] \n [", local_in_first, + "] to \n ", local_out_first, " (global offset ", in_offset, ") "); + dash::copy( local_in_first.local(), + // pointer one past the last element local_in_last.local() + 1, - out_first + pattern.global(li_range_in.begin)); + local_out_first); } + // TODO: + // - Implement adapter for local-to-global dash::copy here + // - Return if global input range has no local sub-range return (out_first + num_elements); } diff --git a/dash/include/dash/iterator/GlobViewIter.h b/dash/include/dash/iterator/GlobViewIter.h index e568719aa..8cd19022a 100644 --- a/dash/include/dash/iterator/GlobViewIter.h +++ b/dash/include/dash/iterator/GlobViewIter.h @@ -1091,10 +1091,10 @@ std::ostream & operator<<( ElementType, Pattern, GlobStaticMem, Pointer, Reference> & it) { std::ostringstream ss; - dash::GlobPtr ptr(it); +// dash::GlobPtr ptr(it); ss << "dash::GlobViewIter<" << typeid(ElementType).name() << ">(" << "idx:" << it._idx << ", " - << "gptr:" << ptr << ")"; + << "gptr:" << it.global().dart_gptr() << ")"; return operator<<(os, ss.str()); } diff --git a/dash/test/algorithm/CopyTest.cc b/dash/test/algorithm/CopyTest.cc index c5cb5c2ed..3b00107eb 100644 --- a/dash/test/algorithm/CopyTest.cc +++ b/dash/test/algorithm/CopyTest.cc @@ -823,19 +823,32 @@ TEST_F(CopyTest, GlobalToGlobal) source.barrier(); + // copy the full range + dash::copy(source.begin(), source.end(), target.begin()); + source.barrier(); - // copy the first local range with an offset - dash::copy(source.begin(), source.end() + 1, target.begin()); - - dash::for_each_with_index(source.begin(), source.end(), + dash::for_each_with_index(target.begin(), target.end(), [](value_t val, size_t idx) { ASSERT_EQ_U(val, dash::myid() * 1000 + idx); } ); - dash::for_each_with_index(target.begin() + 1, target.end(), + // copy the range with an offset (effectively moving the input + // range to the left by 1) + dash::copy(source.begin() + 1, source.end(), target.begin()); + source.barrier(); + + dash::for_each_with_index(target.begin(), target.end() - 1, [](value_t val, size_t idx) { - ASSERT_EQ_U(val, dash::myid() * 1000 + idx); + std::cout << idx << ": " << val << std::endl; + // the array has shifted so the last element is different + if ((idx % elem_per_unit) == (elem_per_unit - 1)) { + // the last element comes from the next unit + // this element has not been copied on the last unit + ASSERT_EQ_U(val, (dash::myid() + 1) * 1000 + idx + 1); + } else { + ASSERT_EQ_U(val, dash::myid() * 1000 + idx + 1); + } } ); diff --git a/dash/test/container/MatrixTest.cc b/dash/test/container/MatrixTest.cc index 043e5c32c..f99603ecc 100644 --- a/dash/test/container/MatrixTest.cc +++ b/dash/test/container/MatrixTest.cc @@ -721,29 +721,39 @@ TEST_F(MatrixTest, BlockCopy) dash::Team::All(), team_spec); // Fill matrix + auto block_a = matrix_a.block(1); + auto block_b = matrix_b.block(0); if (myid == 0) { LOG_MESSAGE("Assigning matrix values"); - for(size_t col = 0; col < matrix_a.extent(0); ++col) { - for(size_t row = 0; row < matrix_a.extent(1); ++row) { - auto value = (row * matrix_a.extent(0)) + col; - matrix_a[col][row] = value; - matrix_b[col][row] = value; + for(size_t row = 0; row < matrix_a.extent(0); ++row) { + for(size_t col = 0; col < matrix_a.extent(1); ++col) { + auto value = (row * 1000) + col; + matrix_a[row][col] = value; + matrix_b[row][col] = value; } } } - LOG_MESSAGE("Wait for team barrier ..."); - dash::barrier(); - LOG_MESSAGE("Team barrier passed"); + + matrix_b.barrier(); + + LOG_MESSAGE("Copying block"); // Copy block 1 of matrix_a to block 0 of matrix_b: - dash::copy(matrix_a.block(1).begin(), - matrix_a.block(1).end(), - matrix_b.block(0).begin()); + dash::copy(block_a.begin(), + block_a.end(), + block_b.begin()); + matrix_b.barrier(); - // TODO: Add checks here! - LOG_MESSAGE("Wait for team barrier ..."); - dash::barrier(); - LOG_MESSAGE("Team barrier passed"); + LOG_MESSAGE("Checking copy result"); + if (myid == 0) { + LOG_MESSAGE("Checking copied matrix block values"); + for(size_t col = 0; col < block_a.extent(0); ++col) { + for(size_t row = 0; row < block_a.extent(1); ++row) { + ASSERT_EQ_U(static_cast(block_b[col][row]), + static_cast(block_a[col][row])); + } + } + } } TEST_F(MatrixTest, StorageOrder)