From f73e7a1784b80a0d1584d5141e10f525497dd99c Mon Sep 17 00:00:00 2001 From: Dylan Lim Date: Mon, 24 Feb 2025 19:37:03 -0800 Subject: [PATCH 1/2] accessor, array_shape, copy_tensor_accessor, datatype_dispatch, allocator, and perf_metrics tests --- .envrc | 3 + .proj.toml | 1 + .vimrc | 8 ++ lib/kernels/include/kernels/accessor.h | 9 +- .../include/kernels/copy_tensor_accessor.h | 11 ++ lib/kernels/src/accessor.cc | 73 +++++----- lib/kernels/src/array_shape.cc | 38 ++++- lib/kernels/src/copy_tensor_accessor.cc | 59 ++++++++ lib/kernels/src/legion_dim.cc | 1 - lib/kernels/src/perf_metrics.cc | 5 +- lib/kernels/test/src/test_accessor.cc | 136 ++++++++++++++++++ lib/kernels/test/src/test_array_shape.cc | 105 ++++++++++++++ lib/kernels/test/src/test_attention_kernel.cc | 10 +- .../test/src/test_batch_matmul_kernel.cc | 6 +- .../test/src/test_batch_norm_kernel.cc | 8 +- lib/kernels/test/src/test_cast_kernel.cc | 10 +- lib/kernels/test/src/test_combine_kernel.cc | 10 +- lib/kernels/test/src/test_concat_kernel.cc | 12 +- .../test/src/test_copy_tensor_accessor.cc | 76 ++++++++++ .../test/src/test_datatype_dispatch.cc | 65 +++++++++ lib/kernels/test/src/test_dropout.cc | 2 +- lib/kernels/test/src/test_flat_kernel.cc | 2 +- lib/kernels/test/src/test_gather_kernels.cc | 18 +-- .../test/src/test_layer_norm_kernels.cc | 4 +- lib/kernels/test/src/test_legion_dim.cc | 29 ++++ .../test/src/test_local_cpu_allocator.cc | 19 +++ .../test/src/test_local_cuda_allocator.cc | 19 +++ .../test/src/test_managed_ff_stream.cc | 18 +-- lib/kernels/test/src/test_partition_kernel.cc | 2 +- lib/kernels/test/src/test_perf_metrics.cc | 127 ++++++++++++++++ lib/kernels/test/src/test_pool_2d_kernels.cc | 4 +- lib/kernels/test/src/test_reduction_kernel.cc | 4 +- lib/kernels/test/src/test_replicate_kernel.cc | 12 +- lib/kernels/test/src/test_reshape_kernel.cc | 2 +- lib/kernels/test/src/test_reverse_kernels.cc | 8 +- lib/kernels/test/src/test_softmax_kernel.cc | 2 +- lib/kernels/test/src/test_split_kernel.cc | 4 +- lib/kernels/test/src/test_transpose_kernel.cc | 2 +- lib/kernels/test/src/test_utils.cc | 63 +------- lib/kernels/test/src/test_utils.h | 16 +-- 40 files changed, 815 insertions(+), 188 deletions(-) create mode 100644 .envrc create mode 100644 .vimrc create mode 100644 lib/kernels/test/src/test_accessor.cc create mode 100644 lib/kernels/test/src/test_array_shape.cc create mode 100644 lib/kernels/test/src/test_copy_tensor_accessor.cc create mode 100644 lib/kernels/test/src/test_datatype_dispatch.cc create mode 100644 lib/kernels/test/src/test_legion_dim.cc create mode 100644 lib/kernels/test/src/test_local_cpu_allocator.cc create mode 100644 lib/kernels/test/src/test_local_cuda_allocator.cc create mode 100644 lib/kernels/test/src/test_perf_metrics.cc diff --git a/.envrc b/.envrc new file mode 100644 index 0000000000..2797f0f929 --- /dev/null +++ b/.envrc @@ -0,0 +1,3 @@ +source_up_if_exists + +use flake diff --git a/.proj.toml b/.proj.toml index 10307a6efa..b3b90bbada 100644 --- a/.proj.toml +++ b/.proj.toml @@ -15,6 +15,7 @@ build_targets = [ "models", "export-model-arch", "substitution-to-dot", + "kernels-tests", ] test_targets = [ diff --git a/.vimrc b/.vimrc new file mode 100644 index 0000000000..4c8a8a8279 --- /dev/null +++ b/.vimrc @@ -0,0 +1,8 @@ +" example search path configuration +set path=lib/runtime/**,lib/** + +" set build target +" let g:target = "pcg" + +" set test target +" let g:test_target = "utils-test" diff --git a/lib/kernels/include/kernels/accessor.h b/lib/kernels/include/kernels/accessor.h index 52ca62e217..8bbcf3ef95 100644 --- a/lib/kernels/include/kernels/accessor.h +++ b/lib/kernels/include/kernels/accessor.h @@ -267,6 +267,12 @@ std::vector const *> return out; } +bool accessor_data_is_equal(GenericTensorAccessorR const &accessor_a, + GenericTensorAccessorR const &accessor_b); + +bool accessors_are_equal(GenericTensorAccessorR const &accessor_a, + GenericTensorAccessorR const &accessor_b); + GenericTensorAccessorR read_only_accessor_from_write_accessor( GenericTensorAccessorW const &write_accessor); @@ -280,9 +286,6 @@ bool shape_and_dtype_matches(GenericTensorAccessorR const &accessor, std::pair get_shape_and_datatype(GenericTensorAccessorR const &accessor); -void copy_accessor_data_to_l_from_r(GenericTensorAccessorW &dst_accessor, - GenericTensorAccessorR const &src_accessor); - } // namespace FlexFlow namespace FlexFlow { diff --git a/lib/kernels/include/kernels/copy_tensor_accessor.h b/lib/kernels/include/kernels/copy_tensor_accessor.h index da8af71e4f..97b6254750 100644 --- a/lib/kernels/include/kernels/copy_tensor_accessor.h +++ b/lib/kernels/include/kernels/copy_tensor_accessor.h @@ -6,6 +6,9 @@ namespace FlexFlow { +void copy_accessor_data_to_l_from_r(GenericTensorAccessorW &dst_accessor, + GenericTensorAccessorR const &src_accessor); + GenericTensorAccessorR copy_tensor_accessor_r(GenericTensorAccessorR const &src_accessor, Allocator &allocator); @@ -14,6 +17,14 @@ GenericTensorAccessorW copy_tensor_accessor_w(GenericTensorAccessorW const &src_accessor, Allocator &allocator); +GenericTensorAccessorW + copy_accessor_w_to_cpu_if_necessary(GenericTensorAccessorW const &accessor, + Allocator &allocator); + +GenericTensorAccessorR + copy_accessor_r_to_cpu_if_necessary(GenericTensorAccessorR const &accessor, + Allocator &allocator); + } // namespace FlexFlow #endif diff --git a/lib/kernels/src/accessor.cc b/lib/kernels/src/accessor.cc index 1a0abec1c5..43f57717f8 100644 --- a/lib/kernels/src/accessor.cc +++ b/lib/kernels/src/accessor.cc @@ -1,38 +1,45 @@ #include "kernels/accessor.h" -#include "kernels/allocation.h" +#include "kernels/copy_tensor_accessor.h" #include "kernels/datatype_dispatch.h" +#include "kernels/local_cpu_allocator.h" +#include +#include namespace FlexFlow { -void copy_accessor_data_to_l_from_r( - GenericTensorAccessorW &dst_accessor, - GenericTensorAccessorR const &src_accessor) { - size_t num_bytes = - dst_accessor.shape.get_volume().unwrap_nonnegative() * - size_of_datatype(dst_accessor.data_type).unwrap_nonnegative(); - - DeviceType dst_device_type = dst_accessor.device_type; - DeviceType src_device_type = src_accessor.device_type; - - if (src_device_type == DeviceType::CPU && - dst_device_type == DeviceType::CPU) { - memcpy(dst_accessor.ptr, src_accessor.ptr, num_bytes); - } else if (src_device_type == DeviceType::CPU && - dst_device_type == DeviceType::GPU) { - checkCUDA(cudaMemcpy( - dst_accessor.ptr, src_accessor.ptr, num_bytes, cudaMemcpyHostToDevice)); - } else if (src_device_type == DeviceType::GPU && - dst_device_type == DeviceType::CPU) { - checkCUDA(cudaMemcpy( - dst_accessor.ptr, src_accessor.ptr, num_bytes, cudaMemcpyDeviceToHost)); - } else { - assert(src_device_type == DeviceType::GPU); - assert(dst_device_type == DeviceType::GPU); - checkCUDA(cudaMemcpy(dst_accessor.ptr, - src_accessor.ptr, - num_bytes, - cudaMemcpyDeviceToDevice)); +template +struct AccessorDataIsEqual { + bool operator()(GenericTensorAccessorR const &a, + GenericTensorAccessorR const &b) { + int const num_elements = a.shape.num_elements().unwrap_nonnegative(); + if (num_elements != b.shape.num_elements().unwrap_nonnegative()) { + return false; + } + + Allocator cpu_allocator = create_local_cpu_memory_allocator(); + auto cpu_a = copy_accessor_r_to_cpu_if_necessary(a, cpu_allocator); + auto cpu_b = copy_accessor_r_to_cpu_if_necessary(b, cpu_allocator); + + using T = real_type_t
; + T const *a_ptr = cpu_a.get
(); + T const *b_ptr = cpu_b.get
(); + + return std::equal(a_ptr, a_ptr + num_elements, b_ptr); } +}; + +bool accessor_data_is_equal(GenericTensorAccessorR const &accessor_a, + GenericTensorAccessorR const &accessor_b) { + return DataTypeDispatch1{}( + accessor_a.data_type, accessor_a, accessor_b); +} + +bool accessors_are_equal(GenericTensorAccessorR const &accessor_a, + GenericTensorAccessorR const &accessor_b) { + return accessor_a.data_type == accessor_b.data_type && + accessor_a.device_type == accessor_b.device_type && + accessor_a.shape == accessor_b.shape && + accessor_data_is_equal(accessor_a, accessor_b); } GenericTensorAccessorW::operator GenericTensorAccessorR() const { @@ -56,12 +63,12 @@ std::tupletie() == other.tie(); + return accessors_are_equal(*this, other); } bool GenericTensorAccessorW::operator!=( GenericTensorAccessorW const &other) const { - return this->tie() != other.tie(); + return !(accessors_are_equal(*this, other)); } int32_t *GenericTensorAccessorW::get_int32_ptr() const { @@ -112,12 +119,12 @@ std::tupletie() == other.tie(); + return accessors_are_equal(*this, other); } bool GenericTensorAccessorR::operator!=( GenericTensorAccessorR const &other) const { - return this->tie() != other.tie(); + return !(accessors_are_equal(*this, other)); } int32_t const *GenericTensorAccessorR::get_int32_ptr() const { diff --git a/lib/kernels/src/array_shape.cc b/lib/kernels/src/array_shape.cc index 30db65cc03..499aebad86 100644 --- a/lib/kernels/src/array_shape.cc +++ b/lib/kernels/src/array_shape.cc @@ -51,18 +51,40 @@ nonnegative_int ArrayShape::at(ff_dim_t idx) const { return dims.at(legion_dim_from_ff_dim(idx, this->num_dims())); } +legion_dim_t ArrayShape::last_idx() const { + if (this->dims.size() == 0) { + throw mk_runtime_error("Cannot get last index of an empty shape"); + } + return legion_dim_t(nonnegative_int{this->dims.size() - 1}); +} + +legion_dim_t ArrayShape::neg_idx(int idx) const { + if (std::abs(idx) > this->dims.size()) { + throw mk_runtime_error( + fmt::format("Invalid negative index: {} (shape has {} dimensions)", + idx, + this->dims.size())); + } + + if (idx >= 0) { + throw mk_runtime_error(fmt::format( + "Idx should be negative for negative indexing, got {}", idx)); + } + + return legion_dim_t(nonnegative_int{this->dims.size() + idx}); +} + bool ArrayShape::operator==(ArrayShape const &other) const { - return this->tie() == other.tie(); + return this->dims == other.dims; } bool ArrayShape::operator!=(ArrayShape const &other) const { - return this->tie() != other.tie(); + return !(this->dims == other.dims); } ArrayShape ArrayShape::sub_shape( std::optional> start, std::optional> end) const { - nonnegative_int num_dims = this->num_dims(); auto to_legion_index = [num_dims](auto arg) -> nonnegative_int { @@ -85,7 +107,9 @@ ArrayShape ArrayShape::sub_shape( "Invalid sub_shape range: start={}, end={}", start_idx, end_idx)); } - return ArrayShape(&this->dims[legion_dim_t{start_idx}], end_idx - start_idx); + return ArrayShape(std::vector( + this->dims.begin() + start_idx.unwrap_nonnegative(), + this->dims.begin() + end_idx.unwrap_nonnegative())); } std::optional ArrayShape::at_maybe(legion_dim_t index) const { @@ -97,7 +121,11 @@ std::optional ArrayShape::at_maybe(legion_dim_t index) const { } std::optional ArrayShape::at_maybe(ff_dim_t index) const { - return this->at_maybe(legion_dim_from_ff_dim(index, this->num_dims())); + if (index.value < this->num_dims()) { + return this->at_maybe(legion_dim_from_ff_dim(index, this->num_dims())); + } else { + return std::nullopt; + } } std::tuple const &> ArrayShape::tie() const { diff --git a/lib/kernels/src/copy_tensor_accessor.cc b/lib/kernels/src/copy_tensor_accessor.cc index 6a3ad8033a..cc033223f8 100644 --- a/lib/kernels/src/copy_tensor_accessor.cc +++ b/lib/kernels/src/copy_tensor_accessor.cc @@ -3,6 +3,37 @@ namespace FlexFlow { +void copy_accessor_data_to_l_from_r( + GenericTensorAccessorW &dst_accessor, + GenericTensorAccessorR const &src_accessor) { + size_t num_bytes = + dst_accessor.shape.get_volume().unwrap_nonnegative() * + size_of_datatype(dst_accessor.data_type).unwrap_nonnegative(); + + DeviceType dst_device_type = dst_accessor.device_type; + DeviceType src_device_type = src_accessor.device_type; + + if (src_device_type == DeviceType::CPU && + dst_device_type == DeviceType::CPU) { + memcpy(dst_accessor.ptr, src_accessor.ptr, num_bytes); + } else if (src_device_type == DeviceType::CPU && + dst_device_type == DeviceType::GPU) { + checkCUDA(cudaMemcpy( + dst_accessor.ptr, src_accessor.ptr, num_bytes, cudaMemcpyHostToDevice)); + } else if (src_device_type == DeviceType::GPU && + dst_device_type == DeviceType::CPU) { + checkCUDA(cudaMemcpy( + dst_accessor.ptr, src_accessor.ptr, num_bytes, cudaMemcpyDeviceToHost)); + } else { + assert(src_device_type == DeviceType::GPU); + assert(dst_device_type == DeviceType::GPU); + checkCUDA(cudaMemcpy(dst_accessor.ptr, + src_accessor.ptr, + num_bytes, + cudaMemcpyDeviceToDevice)); + } +} + template struct CopyTensorAccessorW { GenericTensorAccessorW operator()(GenericTensorAccessorW const &src_accessor, @@ -45,4 +76,32 @@ GenericTensorAccessorR src_accessor.data_type, src_accessor, allocator); } +GenericTensorAccessorR + copy_accessor_r_to_cpu_if_necessary(GenericTensorAccessorR const &accessor, + Allocator &cpu_allocator) { + if (cpu_allocator.get_allocation_device_type() == DeviceType::GPU) { + throw mk_runtime_error("Allocator must be a CPU allocator"); + } + + GenericTensorAccessorR cpu_accessor = accessor; + if (accessor.device_type == DeviceType::GPU) { + cpu_accessor = copy_tensor_accessor_r(accessor, cpu_allocator); + } + return cpu_accessor; +} + +GenericTensorAccessorW + copy_accessor_w_to_cpu_if_necessary(GenericTensorAccessorW const &accessor, + Allocator &cpu_allocator) { + if (cpu_allocator.get_allocation_device_type() == DeviceType::GPU) { + throw mk_runtime_error("Allocator must be a CPU allocator"); + } + + GenericTensorAccessorW cpu_accessor = accessor; + if (accessor.device_type == DeviceType::GPU) { + cpu_accessor = copy_tensor_accessor_w(accessor, cpu_allocator); + } + return cpu_accessor; +} + } // namespace FlexFlow diff --git a/lib/kernels/src/legion_dim.cc b/lib/kernels/src/legion_dim.cc index 4e7fc56848..14016a6202 100644 --- a/lib/kernels/src/legion_dim.cc +++ b/lib/kernels/src/legion_dim.cc @@ -10,7 +10,6 @@ legion_dim_t add_to_legion_dim(legion_dim_t legion_dim, int value) { legion_dim_t legion_dim_from_ff_dim(ff_dim_t ff_dim, nonnegative_int num_dimensions) { return legion_dim_t{num_dimensions - ff_dim.value - 1_n}; - ; } } // namespace FlexFlow diff --git a/lib/kernels/src/perf_metrics.cc b/lib/kernels/src/perf_metrics.cc index 2036ddd35a..ab0e113a26 100644 --- a/lib/kernels/src/perf_metrics.cc +++ b/lib/kernels/src/perf_metrics.cc @@ -15,8 +15,9 @@ PerfMetrics::PerfMetrics(int _train_all, double _start_time_micro, double _current_time_micro) : train_all(_train_all), train_correct(_train_correct), cce_loss(_cce_loss), - mse_loss(_mse_loss), rmse_loss(_rmse_loss), mae_loss(_mae_loss), - start_time(_start_time_micro), current_time(_current_time_micro) {} + sparse_cce_loss(_sparse_cce_loss), mse_loss(_mse_loss), + rmse_loss(_rmse_loss), mae_loss(_mae_loss), start_time(_start_time_micro), + current_time(_current_time_micro) {} float get_throughput(PerfMetrics const &m) { return m.train_all / (m.current_time - m.start_time); diff --git a/lib/kernels/test/src/test_accessor.cc b/lib/kernels/test/src/test_accessor.cc new file mode 100644 index 0000000000..e9611a928c --- /dev/null +++ b/lib/kernels/test/src/test_accessor.cc @@ -0,0 +1,136 @@ +#include "doctest/doctest.h" +#include "kernels/accessor.h" +#include "op-attrs/datatype_value.h" +#include "test_utils.h" + +using namespace ::FlexFlow; + +template +void check_accessor_get(GenericTensorAccessorR const &accessor, + real_type_t
expected) { + CHECK(*accessor.get
() == expected); + + if constexpr (DT == DataType::INT32) { + CHECK(*accessor.get_int32_ptr() == expected); + } else if constexpr (DT == DataType::INT64) { + CHECK(*accessor.get_int64_ptr() == expected); + } else if constexpr (DT == DataType::FLOAT) { + CHECK(*accessor.get_float_ptr() == doctest::Approx(expected)); + } else if constexpr (DT == DataType::DOUBLE) { + CHECK(*accessor.get_double_ptr() == doctest::Approx(expected)); + } else if constexpr (DT == DataType::HALF) { + CHECK(*accessor.get_half_ptr() == doctest::Approx(expected)); + } +} + +template +void run_accessor_w_test(DataTypeValue value, + real_type_t
expected, + Allocator allocator) { + TensorShape shape = make_tensor_shape_from_ff_ordered({1_n}, DT); + GenericTensorAccessorW accessor = + create_filled_accessor_w(shape, allocator, value); + check_accessor_get
(read_only_accessor_from_write_accessor(accessor), + expected); +} + +template +void run_accessor_r_test(DataTypeValue value, + real_type_t
expected, + Allocator allocator) { + TensorShape shape = make_tensor_shape_from_ff_ordered({1_n}, DT); + GenericTensorAccessorR accessor = + create_filled_accessor_r(shape, allocator, value); + check_accessor_get
(accessor, expected); +} + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("Test GenericTensorAccessors") { + Allocator cpu_allocator = create_local_cpu_memory_allocator(); + + SUBCASE("Test GenericTensorAccessorW") { + SUBCASE("Test get methods for GenericTensorAccessorW") { + run_accessor_w_test( + make_int32_data_type_value(12345), 12345, cpu_allocator); + run_accessor_w_test( + make_int64_data_type_value(12345LL), 12345LL, cpu_allocator); + run_accessor_w_test( + make_float_data_type_value(1.23f), 1.23f, cpu_allocator); + run_accessor_w_test( + make_double_data_type_value(1.23), 1.23, cpu_allocator); + } + + SUBCASE("Test operator== and operator!= for GenericTensorAccessorW") { + TensorShape shape = + make_tensor_shape_from_ff_ordered({1_n}, DataType::INT32); + + GenericTensorAccessorW accessor1 = create_filled_accessor_w( + shape, cpu_allocator, make_int32_data_type_value(12345)); + GenericTensorAccessorW accessor2 = create_filled_accessor_w( + shape, cpu_allocator, make_int32_data_type_value(12345)); + GenericTensorAccessorW accessor3 = create_filled_accessor_w( + shape, cpu_allocator, make_int32_data_type_value(54321)); + + CHECK(accessor1 == accessor2); + CHECK(accessor1 != accessor3); + } + + SUBCASE("Test at() method for GenericTensorAccessorW") { + DataType const DT = DataType::INT32; + TensorShape shape = make_tensor_shape_from_ff_ordered({3_n, 3_n}, DT); + + GenericTensorAccessorW accessor_1 = + create_random_filled_accessor_w(shape, cpu_allocator); + GenericTensorAccessorW accessor_2 = + copy_tensor_accessor_w(accessor_1, cpu_allocator); + + CHECK(accessor_1.at
({0, 0}) == accessor_2.at
({0, 0})); + CHECK(accessor_1.at
({1, 0}) == accessor_2.at
({1, 0})); + CHECK(accessor_1.at
({2, 2}) == accessor_2.at
({2, 2})); + } + } + + SUBCASE("Test GenericTensorAccessorR") { + + SUBCASE("Test get methods for GenericTensorAccessorR") { + run_accessor_r_test( + make_int32_data_type_value(12345), 12345, cpu_allocator); + run_accessor_r_test( + make_int64_data_type_value(12345LL), 12345LL, cpu_allocator); + run_accessor_r_test( + make_float_data_type_value(1.23f), 1.23f, cpu_allocator); + run_accessor_r_test( + make_double_data_type_value(1.23), 1.23, cpu_allocator); + } + + SUBCASE("Test operator== and operator!= for GenericTensorAccessorR") { + TensorShape shape = + make_tensor_shape_from_ff_ordered({1_n}, DataType::INT32); + + GenericTensorAccessorR accessor1 = create_filled_accessor_r( + shape, cpu_allocator, make_int32_data_type_value(12345)); + GenericTensorAccessorR accessor2 = create_filled_accessor_r( + shape, cpu_allocator, make_int32_data_type_value(12345)); + GenericTensorAccessorR accessor3 = create_filled_accessor_r( + shape, cpu_allocator, make_int32_data_type_value(54321)); + + CHECK(accessor1 == accessor2); + CHECK(accessor1 != accessor3); + } + + SUBCASE("Test at() method for GenericTensorAccessorR") { + DataType const DT = DataType::INT32; + TensorShape shape = make_tensor_shape_from_ff_ordered({3_n, 3_n}, DT); + + GenericTensorAccessorR accessor_1 = + create_random_filled_accessor_r(shape, cpu_allocator); + GenericTensorAccessorR accessor_2 = + copy_tensor_accessor_r(accessor_1, cpu_allocator); + + CHECK(accessor_1.at
({0, 0}) == accessor_2.at
({0, 0})); + CHECK(accessor_1.at
({1, 0}) == accessor_2.at
({1, 0})); + CHECK(accessor_1.at
({2, 2}) == accessor_2.at
({2, 2})); + } + } + } +} diff --git a/lib/kernels/test/src/test_array_shape.cc b/lib/kernels/test/src/test_array_shape.cc new file mode 100644 index 0000000000..7ede1791ef --- /dev/null +++ b/lib/kernels/test/src/test_array_shape.cc @@ -0,0 +1,105 @@ +#include "doctest/doctest.h" +#include "kernels/array_shape.h" +#include "test_utils.h" + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("Test ArrayShape") { + ArrayShape shape({1_n, 2_n, 3_n, 4_n}); + + SUBCASE("Test get_volume() and num_elements()") { + CHECK(shape.get_volume() == 1 * 2 * 3 * 4); + CHECK(shape.num_elements() == 1 * 2 * 3 * 4); + } + + SUBCASE("Test num_dims() and get_dim()") { + CHECK(shape.num_dims() == 4); + CHECK(shape.get_dim() == 4); + } + + SUBCASE("Test operator[] and at()") { + CHECK(shape[legion_dim_t{0_n}] == 1); + CHECK(shape[legion_dim_t{1_n}] == 2); + CHECK(shape[legion_dim_t{2_n}] == 3); + CHECK(shape[legion_dim_t{3_n}] == 4); + + CHECK(shape.at(legion_dim_t{0_n}) == 1); + CHECK(shape.at(legion_dim_t{1_n}) == 2); + CHECK(shape.at(legion_dim_t{2_n}) == 3); + CHECK(shape.at(legion_dim_t{3_n}) == 4); + + CHECK(shape.at(ff_dim_t{0_n}) == 4); + CHECK(shape.at(ff_dim_t{1_n}) == 3); + CHECK(shape.at(ff_dim_t{2_n}) == 2); + CHECK(shape.at(ff_dim_t{3_n}) == 1); + } + + SUBCASE("Test operator== and operator!=") { + ArrayShape shape2({1_n, 2_n, 3_n, 4_n}); + ArrayShape shape3({1_n, 2_n, 3_n, 5_n}); + + CHECK(shape == shape2); + CHECK(shape != shape3); + } + + SUBCASE("Test last_idx()") { + CHECK(shape.last_idx() == legion_dim_t{3_n}); + + ArrayShape empty_shape(std::vector{}); + CHECK_THROWS(empty_shape.last_idx()); + } + + SUBCASE("Test neg_idx()") { + CHECK(shape.neg_idx(-1) == legion_dim_t{3_n}); + CHECK(shape.neg_idx(-2) == legion_dim_t{2_n}); + CHECK(shape.neg_idx(-3) == legion_dim_t{1_n}); + CHECK(shape.neg_idx(-4) == legion_dim_t{0_n}); + + CHECK_THROWS(shape.neg_idx(-5)); + } + + SUBCASE("Test at_maybe()") { + CHECK(shape.at_maybe(legion_dim_t{0_n}).value() == 1); + CHECK(shape.at_maybe(legion_dim_t{1_n}).value() == 2); + CHECK(shape.at_maybe(legion_dim_t{2_n}).value() == 3); + CHECK(shape.at_maybe(legion_dim_t{3_n}).value() == 4); + CHECK(!shape.at_maybe(legion_dim_t{4_n}).has_value()); + + CHECK(shape.at_maybe(ff_dim_t{0_n}).value() == 4); + CHECK(shape.at_maybe(ff_dim_t{1_n}).value() == 3); + CHECK(shape.at_maybe(ff_dim_t{2_n}).value() == 2); + CHECK(shape.at_maybe(ff_dim_t{3_n}).value() == 1); + CHECK(!shape.at_maybe(ff_dim_t{4_n}).has_value()); + } + + SUBCASE("Test subshape()") { + SUBCASE("Test basic subshape") { + ArrayShape ref_shape({2_n, 3_n}); + ArrayShape subshape = + shape.sub_shape(legion_dim_t{1_n}, legion_dim_t{3_n}); + + CHECK(ref_shape == subshape); + } + + SUBCASE("Test empty subshape") { + ArrayShape ref_shape(std::vector{}); + ArrayShape subshape = + shape.sub_shape(legion_dim_t{0_n}, legion_dim_t{0_n}); + CHECK(ref_shape == subshape); + } + + SUBCASE("Test subshape with no start") { + ArrayShape ref_shape({1_n, 2_n, 3_n}); + ArrayShape subshape = shape.sub_shape(std::nullopt, legion_dim_t{3_n}); + CHECK(ref_shape == subshape); + } + + SUBCASE("Test subshape with no end") { + ArrayShape ref_shape({2_n, 3_n, 4_n}); + ArrayShape subshape = shape.sub_shape(legion_dim_t{1_n}, std::nullopt); + CHECK(ref_shape == subshape); + } + } + } +} diff --git a/lib/kernels/test/src/test_attention_kernel.cc b/lib/kernels/test/src/test_attention_kernel.cc index bd0167a677..6b54554a9b 100644 --- a/lib/kernels/test/src/test_attention_kernel.cc +++ b/lib/kernels/test/src/test_attention_kernel.cc @@ -41,15 +41,15 @@ TEST_SUITE(FF_TEST_SUITE) { /*kvSeqLength=*/kvSeqLength.unwrap_nonnegative(), /*add_bias_kv=*/false); - TensorShape query_shape = make_tensor_shape_from_legion_dims( + TensorShape query_shape = make_tensor_shape_from_ff_ordered( {qoSeqLength, num_samples, qSize}, DataType::FLOAT); - TensorShape key_shape = make_tensor_shape_from_legion_dims( + TensorShape key_shape = make_tensor_shape_from_ff_ordered( {kvSeqLength, num_samples, kSize}, DataType::FLOAT); - TensorShape value_shape = make_tensor_shape_from_legion_dims( + TensorShape value_shape = make_tensor_shape_from_ff_ordered( {kvSeqLength, num_samples, vSize}, DataType::FLOAT); - TensorShape output_shape = make_tensor_shape_from_legion_dims( + TensorShape output_shape = make_tensor_shape_from_ff_ordered( {qoSeqLength, num_samples, oProjSize}, DataType::FLOAT); - TensorShape weight_shape = make_tensor_shape_from_legion_dims( + TensorShape weight_shape = make_tensor_shape_from_ff_ordered( {nonnegative_int{state.weightSize}}, DataType::FLOAT); GenericTensorAccessorW query_accessor = diff --git a/lib/kernels/test/src/test_batch_matmul_kernel.cc b/lib/kernels/test/src/test_batch_matmul_kernel.cc index d78d5daee5..ba9b3ac0e2 100644 --- a/lib/kernels/test/src/test_batch_matmul_kernel.cc +++ b/lib/kernels/test/src/test_batch_matmul_kernel.cc @@ -22,11 +22,11 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); TensorShape input_shape_a = - make_tensor_shape_from_legion_dims({m, k, batch}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({m, k, batch}, DataType::FLOAT); TensorShape input_shape_b = - make_tensor_shape_from_legion_dims({k, n, batch}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({k, n, batch}, DataType::FLOAT); TensorShape output_shape = - make_tensor_shape_from_legion_dims({m, n, batch}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({m, n, batch}, DataType::FLOAT); GenericTensorAccessorW a_accessor = create_random_filled_accessor_w(input_shape_a, allocator); diff --git a/lib/kernels/test/src/test_batch_norm_kernel.cc b/lib/kernels/test/src/test_batch_norm_kernel.cc index d0ec2559ba..698a320a69 100644 --- a/lib/kernels/test/src/test_batch_norm_kernel.cc +++ b/lib/kernels/test/src/test_batch_norm_kernel.cc @@ -29,13 +29,13 @@ TEST_SUITE(FF_TEST_SUITE) { /*output_w=*/output_w.unwrap_nonnegative(), /*relu=*/true); - TensorShape input_shape = make_tensor_shape_from_legion_dims( + TensorShape input_shape = make_tensor_shape_from_ff_ordered( {output_n, output_c, output_h, output_w}, DataType::FLOAT); - TensorShape output_shape = make_tensor_shape_from_legion_dims( + TensorShape output_shape = make_tensor_shape_from_ff_ordered( {output_n, output_c, output_h, output_w}, DataType::FLOAT); - TensorShape scale_shape = make_tensor_shape_from_legion_dims( + TensorShape scale_shape = make_tensor_shape_from_ff_ordered( {output_n, output_c, output_h, output_w}, DataType::FLOAT); - TensorShape bias_shape = make_tensor_shape_from_legion_dims( + TensorShape bias_shape = make_tensor_shape_from_ff_ordered( {output_n, output_c, output_h, output_w}, DataType::FLOAT); GenericTensorAccessorW input_accessor = diff --git a/lib/kernels/test/src/test_cast_kernel.cc b/lib/kernels/test/src/test_cast_kernel.cc index c59d8eae3f..d314a6bcc2 100644 --- a/lib/kernels/test/src/test_cast_kernel.cc +++ b/lib/kernels/test/src/test_cast_kernel.cc @@ -11,9 +11,9 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); TensorShape input_shape = - make_tensor_shape_from_legion_dims({100_n, 100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({100_n, 100_n}, DataType::FLOAT); TensorShape output_shape = - make_tensor_shape_from_legion_dims({100_n, 100_n}, DataType::DOUBLE); + make_tensor_shape_from_ff_ordered({100_n, 100_n}, DataType::DOUBLE); SUBCASE("forward_kernel") { GenericTensorAccessorR input_accessor = @@ -48,9 +48,9 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator cpu_allocator = create_local_cpu_memory_allocator(); TensorShape input_shape = - make_tensor_shape_from_legion_dims({10_n, 2_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({10_n, 2_n}, DataType::FLOAT); TensorShape output_shape = - make_tensor_shape_from_legion_dims({10_n, 2_n}, DataType::DOUBLE); + make_tensor_shape_from_ff_ordered({10_n, 2_n}, DataType::DOUBLE); // Only calling forward kernel as backward kernel is exactly the same SUBCASE("forward_kernel") { @@ -72,7 +72,7 @@ TEST_SUITE(FF_TEST_SUITE) { Kernels::Cast::cpu_forward_kernel(input_accessor_cpu, output_accessor_cpu); - CHECK(accessors_are_equal(output_accessor_gpu, output_accessor_cpu)); + CHECK(accessor_data_is_equal(output_accessor_gpu, output_accessor_cpu)); } } } diff --git a/lib/kernels/test/src/test_combine_kernel.cc b/lib/kernels/test/src/test_combine_kernel.cc index 97fa81920b..b30d1ab7f4 100644 --- a/lib/kernels/test/src/test_combine_kernel.cc +++ b/lib/kernels/test/src/test_combine_kernel.cc @@ -14,7 +14,7 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); TensorShape input_shape = - make_tensor_shape_from_legion_dims({100_n, 100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({100_n, 100_n}, DataType::FLOAT); TensorShape output_shape = input_shape; SUBCASE("forward_kernel") { @@ -50,7 +50,7 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator cpu_allocator = create_local_cpu_memory_allocator(); TensorShape input_shape = - make_tensor_shape_from_legion_dims({5_n, 5_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({5_n, 5_n}, DataType::FLOAT); TensorShape output_shape = input_shape; SUBCASE("forward_kernel") { @@ -72,7 +72,7 @@ TEST_SUITE(FF_TEST_SUITE) { Kernels::Combine::cpu_forward_kernel(input_accessor_cpu, output_accessor_cpu); - CHECK(accessors_are_equal(output_accessor_gpu, output_accessor_cpu)); + CHECK(accessor_data_is_equal(output_accessor_gpu, output_accessor_cpu)); } SUBCASE("backward_kernel") { @@ -95,8 +95,8 @@ TEST_SUITE(FF_TEST_SUITE) { Kernels::Combine::cpu_backward_kernel(output_grad_accessor_cpu, input_grad_accessor_cpu); - CHECK(accessors_are_equal(input_grad_accessor_gpu, - input_grad_accessor_cpu)); + CHECK(accessor_data_is_equal(input_grad_accessor_gpu, + input_grad_accessor_cpu)); } } } diff --git a/lib/kernels/test/src/test_concat_kernel.cc b/lib/kernels/test/src/test_concat_kernel.cc index 22da72912a..f8bc31c3d5 100644 --- a/lib/kernels/test/src/test_concat_kernel.cc +++ b/lib/kernels/test/src/test_concat_kernel.cc @@ -19,7 +19,7 @@ TEST_SUITE(FF_TEST_SUITE) { nonnegative_int input_cols, TensorShape output_shape, ff_dim_t concat_axis) { - TensorShape input_shape = make_tensor_shape_from_legion_dims( + TensorShape input_shape = make_tensor_shape_from_ff_ordered( {input_rows, input_cols}, DataType::FLOAT); std::vector input_accessors = @@ -41,7 +41,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("test forward concat, axis = 0") { nonnegative_int input_rows = 2_n; nonnegative_int input_cols = 4_n; - TensorShape output_shape = make_tensor_shape_from_legion_dims( + TensorShape output_shape = make_tensor_shape_from_ff_ordered( {num_inputs * input_rows, input_cols}, DataType::FLOAT); run_forward_test(input_rows, input_cols, output_shape, ff_dim_t{0_n}); } @@ -49,7 +49,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("test forward concat, axis = 1") { nonnegative_int input_rows = 4_n; nonnegative_int input_cols = 2_n; - TensorShape output_shape = make_tensor_shape_from_legion_dims( + TensorShape output_shape = make_tensor_shape_from_ff_ordered( {input_rows, num_inputs * input_cols}, DataType::FLOAT); run_forward_test(input_rows, input_cols, output_shape, ff_dim_t{1_n}); } @@ -60,7 +60,7 @@ TEST_SUITE(FF_TEST_SUITE) { nonnegative_int input_cols, TensorShape output_shape, ff_dim_t concat_axis) { - TensorShape input_shape = make_tensor_shape_from_legion_dims( + TensorShape input_shape = make_tensor_shape_from_ff_ordered( {input_rows, input_cols}, DataType::FLOAT); GenericTensorAccessorR output_grad_accessor = @@ -84,7 +84,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("test backward concat, axis = 0") { nonnegative_int input_rows = 2_n; nonnegative_int input_cols = 4_n; - TensorShape output_shape = make_tensor_shape_from_legion_dims( + TensorShape output_shape = make_tensor_shape_from_ff_ordered( {num_inputs * input_rows, input_cols}, DataType::FLOAT); run_backward_test(input_rows, input_cols, output_shape, ff_dim_t{0_n}); } @@ -92,7 +92,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("test backward concat, axis = 1") { nonnegative_int input_rows = 4_n; nonnegative_int input_cols = 2_n; - TensorShape output_shape = make_tensor_shape_from_legion_dims( + TensorShape output_shape = make_tensor_shape_from_ff_ordered( {input_rows, num_inputs * input_cols}, DataType::FLOAT); run_backward_test(input_rows, input_cols, output_shape, ff_dim_t{1_n}); } diff --git a/lib/kernels/test/src/test_copy_tensor_accessor.cc b/lib/kernels/test/src/test_copy_tensor_accessor.cc new file mode 100644 index 0000000000..a6a4cfde53 --- /dev/null +++ b/lib/kernels/test/src/test_copy_tensor_accessor.cc @@ -0,0 +1,76 @@ +#include "doctest/doctest.h" +#include "kernels/accessor.h" +#include "op-attrs/datatype_value.h" +#include "test_utils.h" + +using namespace ::FlexFlow; +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("Test copy_tensor_accessor") { + Allocator cpu_allocator = create_local_cpu_memory_allocator(); + Allocator gpu_allocator = create_local_cuda_memory_allocator(); + TensorShape shape = + make_tensor_shape_from_ff_ordered({5_n, 5_n}, DataType::FLOAT); + + SUBCASE("Test copy_tensor_accessor_r") { + GenericTensorAccessorR src_accessor = + create_random_filled_accessor_r(shape, cpu_allocator); + GenericTensorAccessorR dst_accessor = + copy_tensor_accessor_r(src_accessor, cpu_allocator); + + CHECK(accessor_data_is_equal(src_accessor, dst_accessor)); + } + + SUBCASE("Test copy_tensor_accessor_w") { + GenericTensorAccessorW src_accessor = + create_random_filled_accessor_w(shape, cpu_allocator); + GenericTensorAccessorW dst_accessor = + copy_tensor_accessor_w(src_accessor, cpu_allocator); + + CHECK(accessor_data_is_equal(src_accessor, dst_accessor)); + } + + SUBCASE("Test copy_accessor_r_to_cpu_if_necessary") { + SUBCASE("Test necessary") { + GenericTensorAccessorR src_accessor = + create_random_filled_accessor_r(shape, gpu_allocator); + GenericTensorAccessorR dst_accessor = + copy_accessor_r_to_cpu_if_necessary(src_accessor, cpu_allocator); + + CHECK(accessor_data_is_equal(src_accessor, dst_accessor)); + CHECK(dst_accessor.device_type == DeviceType::CPU); + } + + SUBCASE("Test not necessary") { + GenericTensorAccessorR src_accessor = + create_random_filled_accessor_r(shape, cpu_allocator); + GenericTensorAccessorR dst_accessor = + copy_accessor_r_to_cpu_if_necessary(src_accessor, cpu_allocator); + + CHECK(accessor_data_is_equal(src_accessor, dst_accessor)); + CHECK(dst_accessor.device_type == DeviceType::CPU); + } + } + + SUBCASE("Test copy_accessor_w_to_cpu_if_necessary") { + SUBCASE("Test necessary") { + GenericTensorAccessorW src_accessor = + create_random_filled_accessor_w(shape, gpu_allocator); + GenericTensorAccessorW dst_accessor = + copy_accessor_w_to_cpu_if_necessary(src_accessor, cpu_allocator); + + CHECK(accessor_data_is_equal(src_accessor, dst_accessor)); + CHECK(dst_accessor.device_type == DeviceType::CPU); + } + + SUBCASE("Test not necessary") { + GenericTensorAccessorW src_accessor = + create_random_filled_accessor_w(shape, cpu_allocator); + GenericTensorAccessorW dst_accessor = + copy_accessor_w_to_cpu_if_necessary(src_accessor, cpu_allocator); + + CHECK(accessor_data_is_equal(src_accessor, dst_accessor)); + CHECK(dst_accessor.device_type == DeviceType::CPU); + } + } + } +} diff --git a/lib/kernels/test/src/test_datatype_dispatch.cc b/lib/kernels/test/src/test_datatype_dispatch.cc new file mode 100644 index 0000000000..41737d715a --- /dev/null +++ b/lib/kernels/test/src/test_datatype_dispatch.cc @@ -0,0 +1,65 @@ +#include "doctest/doctest.h" +#include "kernels/datatype_dispatch.h" + +using namespace ::FlexFlow; + +template +struct TestDatatypeDispatch1 { + int operator()(int value) { + if (DT == DataType::FLOAT) { + return value + 1; + } else if (DT == DataType::INT32) { + return value + 2; + } else { + return value + 3; + } + } +}; + +template +struct TestDatatypeDispatch2 { + void operator()(int &value) { + if (IDT == DataType::INT32 && ODT == DataType::FLOAT) { + value *= 2; + } else if (IDT == DataType::FLOAT && ODT == DataType::INT32) { + value *= 3; + } else { + value *= 4; + } + } +}; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("Test DataTypeDispatch") { + SUBCASE("Test DataTypeDispatch1") { + CHECK(DataTypeDispatch1{}(DataType::FLOAT, 1) == + 2); + CHECK(DataTypeDispatch1{}(DataType::INT32, 1) == + 3); + CHECK(DataTypeDispatch1{}(DataType::DOUBLE, 1) == + 4); + } + + SUBCASE("Test DataTypeDispatch2") { + int value = 1; + + SUBCASE("Case One") { + DataTypeDispatch2{}( + DataType::INT32, DataType::FLOAT, value); + CHECK(value == 2); + } + + SUBCASE("Case Two") { + DataTypeDispatch2{}( + DataType::FLOAT, DataType::INT32, value); + CHECK(value == 3); + } + + SUBCASE("Test Three") { + DataTypeDispatch2{}( + DataType::DOUBLE, DataType::DOUBLE, value); + CHECK(value == 4); + } + } + } +} diff --git a/lib/kernels/test/src/test_dropout.cc b/lib/kernels/test/src/test_dropout.cc index 1a34c59be6..e5eba341f3 100644 --- a/lib/kernels/test/src/test_dropout.cc +++ b/lib/kernels/test/src/test_dropout.cc @@ -14,7 +14,7 @@ TEST_SUITE(FF_TEST_SUITE) { }; TensorShape input_shape = - make_tensor_shape_from_legion_dims({10_n, 10_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({10_n, 10_n}, DataType::FLOAT); TensorShape output_shape = input_shape; ManagedFFStream managed_stream{}; diff --git a/lib/kernels/test/src/test_flat_kernel.cc b/lib/kernels/test/src/test_flat_kernel.cc index 238c4ac361..ee4554d00a 100644 --- a/lib/kernels/test/src/test_flat_kernel.cc +++ b/lib/kernels/test/src/test_flat_kernel.cc @@ -14,7 +14,7 @@ TEST_SUITE(FF_TEST_SUITE) { ManagedFFStream managed_stream{}; TensorShape input_shape = - make_tensor_shape_from_legion_dims({100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({100_n}, DataType::FLOAT); TensorShape output_shape = input_shape; GenericTensorAccessorR input_accessor = diff --git a/lib/kernels/test/src/test_gather_kernels.cc b/lib/kernels/test/src/test_gather_kernels.cc index 043617c790..64cc824b9b 100644 --- a/lib/kernels/test/src/test_gather_kernels.cc +++ b/lib/kernels/test/src/test_gather_kernels.cc @@ -37,21 +37,21 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("test gather forward, 2D") { TensorShape input_shape = - make_tensor_shape_from_legion_dims({2_n, 100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({2_n, 100_n}, DataType::FLOAT); TensorShape index_shape = - make_tensor_shape_from_legion_dims({2_n, 20_n}, DataType::INT32); + make_tensor_shape_from_ff_ordered({2_n, 20_n}, DataType::INT32); TensorShape output_shape = - make_tensor_shape_from_legion_dims({2_n, 20_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({2_n, 20_n}, DataType::FLOAT); run_forward_test(input_shape, index_shape, output_shape); } SUBCASE("test gather forward, 1D") { TensorShape input_shape = - make_tensor_shape_from_legion_dims({100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({100_n}, DataType::FLOAT); TensorShape index_shape = - make_tensor_shape_from_legion_dims({10_n}, DataType::INT32); + make_tensor_shape_from_ff_ordered({10_n}, DataType::INT32); TensorShape output_shape = - make_tensor_shape_from_legion_dims({10_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({10_n}, DataType::FLOAT); run_forward_test(input_shape, index_shape, output_shape); } } @@ -77,11 +77,11 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("test gather backward, 2D") { TensorShape input_shape = - make_tensor_shape_from_legion_dims({2_n, 100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({2_n, 100_n}, DataType::FLOAT); TensorShape index_shape = - make_tensor_shape_from_legion_dims({2_n, 25_n}, DataType::INT32); + make_tensor_shape_from_ff_ordered({2_n, 25_n}, DataType::INT32); TensorShape output_shape = - make_tensor_shape_from_legion_dims({2_n, 25_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({2_n, 25_n}, DataType::FLOAT); run_backward_test(input_shape, index_shape, output_shape); } } diff --git a/lib/kernels/test/src/test_layer_norm_kernels.cc b/lib/kernels/test/src/test_layer_norm_kernels.cc index 8368fe4efd..4d5802936e 100644 --- a/lib/kernels/test/src/test_layer_norm_kernels.cc +++ b/lib/kernels/test/src/test_layer_norm_kernels.cc @@ -12,11 +12,11 @@ TEST_SUITE(FF_TEST_SUITE) { float epsilon = 1e-5f; bool elementwise_affine = true; - TensorShape input_shape = make_tensor_shape_from_legion_dims( + TensorShape input_shape = make_tensor_shape_from_ff_ordered( {batch_size, feature_size}, DataType::FLOAT); TensorShape output_shape = input_shape; TensorShape feature_shape = - make_tensor_shape_from_legion_dims({feature_size}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({feature_size}, DataType::FLOAT); ManagedPerDeviceFFHandle managed_handle{ /*workSpaceSize=*/1024 * 1024, diff --git a/lib/kernels/test/src/test_legion_dim.cc b/lib/kernels/test/src/test_legion_dim.cc new file mode 100644 index 0000000000..c06b779ad8 --- /dev/null +++ b/lib/kernels/test/src/test_legion_dim.cc @@ -0,0 +1,29 @@ +#include "doctest/doctest.h" +#include "kernels/legion_dim.h" + +using namespace FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("Test LegionDim") { + SUBCASE("Test add_to_legion_dim") { + legion_dim_t dim{1_n}; + CHECK(add_to_legion_dim(dim, 2) == legion_dim_t{3_n}); + } + + SUBCASE("Test legion_dim_from_ff_dim") { + CHECK(legion_dim_from_ff_dim(ff_dim_t{0_n}, 4_n) == legion_dim_t{3_n}); + CHECK(legion_dim_from_ff_dim(ff_dim_t{1_n}, 4_n) == legion_dim_t{2_n}); + CHECK(legion_dim_from_ff_dim(ff_dim_t{2_n}, 4_n) == legion_dim_t{1_n}); + CHECK(legion_dim_from_ff_dim(ff_dim_t{3_n}, 4_n) == legion_dim_t{0_n}); + } + + SUBCASE("Test LegionOrdered") { + LegionOrdered legion_ordered{1, 2, 3, 4}; + + SUBCASE("Test ff_ordered_from_legion_ordered") { + CHECK(ff_ordered_from_legion_ordered(legion_ordered) == + FFOrdered{4, 3, 2, 1}); + } + } + } +} diff --git a/lib/kernels/test/src/test_local_cpu_allocator.cc b/lib/kernels/test/src/test_local_cpu_allocator.cc new file mode 100644 index 0000000000..fa6bce36db --- /dev/null +++ b/lib/kernels/test/src/test_local_cpu_allocator.cc @@ -0,0 +1,19 @@ +#include "kernels/local_cpu_allocator.h" +#include "doctest/doctest.h" + +using namespace ::FlexFlow; +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("Test LocalCPUAllocator") { + Allocator cpu_allocator = create_local_cpu_memory_allocator(); + + SUBCASE("Test allocate and deallocate") { + void *ptr = cpu_allocator.allocate(100); + CHECK(ptr != nullptr); + cpu_allocator.deallocate(ptr); + } + + SUBCASE("Test get_allocation_device_type") { + CHECK(cpu_allocator.get_allocation_device_type() == DeviceType::CPU); + } + } +} diff --git a/lib/kernels/test/src/test_local_cuda_allocator.cc b/lib/kernels/test/src/test_local_cuda_allocator.cc new file mode 100644 index 0000000000..c091576bd3 --- /dev/null +++ b/lib/kernels/test/src/test_local_cuda_allocator.cc @@ -0,0 +1,19 @@ +#include "kernels/local_cuda_allocator.h" +#include "doctest/doctest.h" + +using namespace ::FlexFlow; +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("Test LocalCUDAAllocator") { + Allocator gpu_allocator = create_local_cuda_memory_allocator(); + + SUBCASE("Test allocate and deallocate") { + void *ptr = gpu_allocator.allocate(100); + CHECK(ptr != nullptr); + gpu_allocator.deallocate(ptr); + } + + SUBCASE("Test get_allocation_device_type") { + CHECK(gpu_allocator.get_allocation_device_type() == DeviceType::GPU); + } + } +} diff --git a/lib/kernels/test/src/test_managed_ff_stream.cc b/lib/kernels/test/src/test_managed_ff_stream.cc index 87b564d284..841c9a82ab 100644 --- a/lib/kernels/test/src/test_managed_ff_stream.cc +++ b/lib/kernels/test/src/test_managed_ff_stream.cc @@ -37,21 +37,21 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("test gather forward, 2D") { TensorShape input_shape = - make_tensor_shape_from_legion_dims({2_n, 100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({2_n, 100_n}, DataType::FLOAT); TensorShape index_shape = - make_tensor_shape_from_legion_dims({2_n, 20_n}, DataType::INT32); + make_tensor_shape_from_ff_ordered({2_n, 20_n}, DataType::INT32); TensorShape output_shape = - make_tensor_shape_from_legion_dims({2_n, 20_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({2_n, 20_n}, DataType::FLOAT); run_forward_test(input_shape, index_shape, output_shape); } SUBCASE("test gather forward, 1D") { TensorShape input_shape = - make_tensor_shape_from_legion_dims({100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({100_n}, DataType::FLOAT); TensorShape index_shape = - make_tensor_shape_from_legion_dims({10_n}, DataType::INT32); + make_tensor_shape_from_ff_ordered({10_n}, DataType::INT32); TensorShape output_shape = - make_tensor_shape_from_legion_dims({10_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({10_n}, DataType::FLOAT); run_forward_test(input_shape, index_shape, output_shape); } } @@ -77,11 +77,11 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("test gather backward, 2D") { TensorShape input_shape = - make_tensor_shape_from_legion_dims({2_n, 100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({2_n, 100_n}, DataType::FLOAT); TensorShape index_shape = - make_tensor_shape_from_legion_dims({2_n, 25_n}, DataType::INT32); + make_tensor_shape_from_ff_ordered({2_n, 25_n}, DataType::INT32); TensorShape output_shape = - make_tensor_shape_from_legion_dims({2_n, 25_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({2_n, 25_n}, DataType::FLOAT); run_backward_test(input_shape, index_shape, output_shape); } } diff --git a/lib/kernels/test/src/test_partition_kernel.cc b/lib/kernels/test/src/test_partition_kernel.cc index c1be78bd16..e9fab697bb 100644 --- a/lib/kernels/test/src/test_partition_kernel.cc +++ b/lib/kernels/test/src/test_partition_kernel.cc @@ -18,7 +18,7 @@ TEST_SUITE(FF_TEST_SUITE) { managed_handle.raw_handle(), DataType::FLOAT); TensorShape input_shape = - make_tensor_shape_from_legion_dims({10_n, 10_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({10_n, 10_n}, DataType::FLOAT); TensorShape output_shape = input_shape; SUBCASE("forward_kernel") { diff --git a/lib/kernels/test/src/test_perf_metrics.cc b/lib/kernels/test/src/test_perf_metrics.cc new file mode 100644 index 0000000000..e958a808b7 --- /dev/null +++ b/lib/kernels/test/src/test_perf_metrics.cc @@ -0,0 +1,127 @@ +#include "kernels/perf_metrics.h" +#include "doctest/doctest.h" + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("Test PerfMetrics Constructors and Metric Functions") { + SUBCASE("Test constructor with start_time only") { + double start = 100.0; + PerfMetrics pm(start); + + CHECK(pm.start_time == start); + CHECK(pm.current_time == start); + + CHECK(pm.train_all == 0); + if (pm.train_correct.has_value()) { + CHECK(pm.train_correct.value() == 0); + } + + CHECK(!pm.cce_loss.has_value()); + + if (pm.sparse_cce_loss.has_value()) { + CHECK(pm.sparse_cce_loss.value() == doctest::Approx(0.0f)); + } + if (pm.mse_loss.has_value()) { + CHECK(pm.mse_loss.value() == doctest::Approx(0.0f)); + } + if (pm.rmse_loss.has_value()) { + CHECK(pm.rmse_loss.value() == doctest::Approx(0.0f)); + } + if (pm.mae_loss.has_value()) { + CHECK(pm.mae_loss.value() == doctest::Approx(0.0f)); + } + } + + SUBCASE("Test full constructor and throughput/accuracy") { + int train_all = 200; + int train_correct = 150; + float cce = 1.2f; + float sparse_cce = 1.0f; + float mse = 0.5f; + float rmse = 0.7f; + float mae = 0.3f; + double start = 100.0; + double curr = 110.0; + PerfMetrics pm(train_all, + train_correct, + cce, + sparse_cce, + mse, + rmse, + mae, + start, + curr); + + CHECK(pm.train_all == train_all); + CHECK(pm.train_correct.has_value()); + CHECK(pm.train_correct.value() == train_correct); + CHECK(pm.cce_loss.has_value()); + CHECK(pm.cce_loss.value() == doctest::Approx(cce)); + CHECK(pm.sparse_cce_loss.has_value()); + CHECK(pm.sparse_cce_loss.value() == doctest::Approx(sparse_cce)); + CHECK(pm.mse_loss.has_value()); + CHECK(pm.mse_loss.value() == doctest::Approx(mse)); + CHECK(pm.rmse_loss.has_value()); + CHECK(pm.rmse_loss.value() == doctest::Approx(rmse)); + CHECK(pm.mae_loss.has_value()); + CHECK(pm.mae_loss.value() == doctest::Approx(mae)); + CHECK(pm.start_time == start); + CHECK(pm.current_time == curr); + + float expected_throughput = train_all / (curr - start); + CHECK(get_throughput(pm) == doctest::Approx(expected_throughput)); + + float expected_accuracy = static_cast(train_correct) / train_all; + CHECK(get_accuracy(pm) == doctest::Approx(expected_accuracy)); + } + + SUBCASE("Test update function") { + PerfMetrics pm1(100, 50, 1.0f, 0.5f, 0.3f, 0.2f, 0.1f, 0.0, 1.0); + PerfMetrics pm2(50, 30, 0.5f, 0.3f, 0.2f, 0.1f, 0.05f, 0.0, 1.5); + + PerfMetrics updated = update(pm1, pm2); + + CHECK(updated.train_all == (100 + 50)); + if (updated.train_correct.has_value()) { + CHECK(updated.train_correct.value() == (50 + 30)); + } + + CHECK(updated.cce_loss.has_value()); + CHECK(updated.cce_loss.value() == doctest::Approx(1.0f + 0.5f)); + CHECK(updated.sparse_cce_loss.has_value()); + CHECK(updated.sparse_cce_loss.value() == doctest::Approx(0.5f + 0.3f)); + CHECK(updated.mse_loss.has_value()); + CHECK(updated.mse_loss.value() == doctest::Approx(0.3f + 0.2f)); + CHECK(updated.rmse_loss.has_value()); + CHECK(updated.rmse_loss.value() == doctest::Approx(0.2f + 0.1f)); + CHECK(updated.mae_loss.has_value()); + CHECK(updated.mae_loss.value() == doctest::Approx(0.1f + 0.05f)); + CHECK(updated.current_time == pm2.current_time); + } + + SUBCASE("Test apply_scale function") { + PerfMetrics pm(100, 50, 2.0f, 1.0f, 0.8f, 0.6f, 0.4f, 0.0, 2.0); + float scale = 0.5f; + PerfMetrics scaled = apply_scale(pm, scale); + + CHECK(scaled.cce_loss.has_value()); + CHECK(scaled.cce_loss.value() == doctest::Approx(2.0f * scale)); + CHECK(scaled.sparse_cce_loss.has_value()); + CHECK(scaled.sparse_cce_loss.value() == doctest::Approx(1.0f * scale)); + CHECK(scaled.mse_loss.has_value()); + CHECK(scaled.mse_loss.value() == doctest::Approx(0.8f * scale)); + CHECK(scaled.rmse_loss.has_value()); + CHECK(scaled.rmse_loss.value() == doctest::Approx(0.6f * scale)); + CHECK(scaled.mae_loss.has_value()); + CHECK(scaled.mae_loss.value() == doctest::Approx(0.4f * scale)); + + CHECK(scaled.train_all == pm.train_all); + if (scaled.train_correct.has_value()) { + CHECK(scaled.train_correct.value() == pm.train_correct.value()); + } + CHECK(scaled.start_time == pm.start_time); + CHECK(scaled.current_time == pm.current_time); + } + } +} diff --git a/lib/kernels/test/src/test_pool_2d_kernels.cc b/lib/kernels/test/src/test_pool_2d_kernels.cc index ff74f6fb28..06db1989eb 100644 --- a/lib/kernels/test/src/test_pool_2d_kernels.cc +++ b/lib/kernels/test/src/test_pool_2d_kernels.cc @@ -49,9 +49,9 @@ TEST_SUITE(FF_TEST_SUITE) { /*stride_w=*/stride_w.unwrap_nonnegative(), /*pool_type=*/pool_type); - TensorShape input_shape = make_tensor_shape_from_legion_dims( + TensorShape input_shape = make_tensor_shape_from_ff_ordered( {input_w, input_h, input_c, input_n}, DataType::FLOAT); - TensorShape output_shape = make_tensor_shape_from_legion_dims( + TensorShape output_shape = make_tensor_shape_from_ff_ordered( {output_w, output_h, output_c, output_n}, DataType::FLOAT); GenericTensorAccessorW input_accessor = diff --git a/lib/kernels/test/src/test_reduction_kernel.cc b/lib/kernels/test/src/test_reduction_kernel.cc index f91c4959cc..921a5ff08c 100644 --- a/lib/kernels/test/src/test_reduction_kernel.cc +++ b/lib/kernels/test/src/test_reduction_kernel.cc @@ -8,7 +8,7 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("Test Reduction Forward and Backward Kernel") { std::size_t num_replicas = 5; - TensorShape input_shape = make_tensor_shape_from_legion_dims( + TensorShape input_shape = make_tensor_shape_from_ff_ordered( {10_n, 10_n, 10_n, 10_n, 10_n}, DataType::FLOAT); ManagedPerDeviceFFHandle managed_handle{ @@ -20,7 +20,7 @@ TEST_SUITE(FF_TEST_SUITE) { SUBCASE("forward_kernel") { TensorShape output_shape = - make_tensor_shape_from_legion_dims({10_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({10_n}, DataType::FLOAT); GenericTensorAccessorR input_accessor = create_random_filled_accessor_r(input_shape, allocator); diff --git a/lib/kernels/test/src/test_replicate_kernel.cc b/lib/kernels/test/src/test_replicate_kernel.cc index 87834d83ac..6009b3c501 100644 --- a/lib/kernels/test/src/test_replicate_kernel.cc +++ b/lib/kernels/test/src/test_replicate_kernel.cc @@ -9,9 +9,9 @@ TEST_SUITE(FF_TEST_SUITE) { nonnegative_int num_replicas = 10_n; TensorShape input_shape = - make_tensor_shape_from_legion_dims({100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({100_n}, DataType::FLOAT); TensorShape output_shape = - make_tensor_shape_from_legion_dims({100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({100_n}, DataType::FLOAT); ManagedPerDeviceFFHandle managed_handle{ /*workSpaceSize=*/1024 * 1024, @@ -51,8 +51,8 @@ TEST_SUITE(FF_TEST_SUITE) { nonnegative_int num_replicas = 2_n; TensorShape input_shape = - make_tensor_shape_from_legion_dims({5_n}, DataType::FLOAT); - TensorShape output_shape = make_tensor_shape_from_legion_dims( + make_tensor_shape_from_ff_ordered({5_n}, DataType::FLOAT); + TensorShape output_shape = make_tensor_shape_from_ff_ordered( {num_replicas, 5_n}, DataType::FLOAT); ManagedPerDeviceFFHandle managed_handle{ @@ -82,7 +82,7 @@ TEST_SUITE(FF_TEST_SUITE) { Kernels::Replicate::cpu_forward_kernel(input_accessor_cpu, output_accessor_cpu); - CHECK(accessors_are_equal(output_accessor_gpu, output_accessor_cpu)); + CHECK(accessor_data_is_equal(output_accessor_gpu, output_accessor_cpu)); } SUBCASE("backward_kernel") { @@ -108,7 +108,7 @@ TEST_SUITE(FF_TEST_SUITE) { input_grad_accessor_cpu, num_replicas.unwrap_nonnegative()); - CHECK(accessors_are_equal(input_grad_accessor_gpu, + CHECK(accessor_data_is_equal(input_grad_accessor_gpu, input_grad_accessor_cpu)); } } diff --git a/lib/kernels/test/src/test_reshape_kernel.cc b/lib/kernels/test/src/test_reshape_kernel.cc index ee7530c017..fa67953947 100644 --- a/lib/kernels/test/src/test_reshape_kernel.cc +++ b/lib/kernels/test/src/test_reshape_kernel.cc @@ -13,7 +13,7 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); TensorShape input_shape = - make_tensor_shape_from_legion_dims({100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({100_n}, DataType::FLOAT); TensorShape output_shape = input_shape; ReshapePerDeviceState state = diff --git a/lib/kernels/test/src/test_reverse_kernels.cc b/lib/kernels/test/src/test_reverse_kernels.cc index 481958fdfc..78ee803da6 100644 --- a/lib/kernels/test/src/test_reverse_kernels.cc +++ b/lib/kernels/test/src/test_reverse_kernels.cc @@ -11,7 +11,7 @@ TEST_SUITE(FF_TEST_SUITE) { nonnegative_int reverse_dim_size = 10_n; nonnegative_int in_blk_size = 10_n; - TensorShape input_shape = make_tensor_shape_from_legion_dims( + TensorShape input_shape = make_tensor_shape_from_ff_ordered( {num_out_blks, reverse_dim_size, in_blk_size}, DataType::FLOAT); TensorShape output_shape = input_shape; @@ -65,7 +65,7 @@ TEST_SUITE(FF_TEST_SUITE) { nonnegative_int reverse_dim_size = 4_n; nonnegative_int in_blk_size = 3_n; - TensorShape input_shape = make_tensor_shape_from_legion_dims( + TensorShape input_shape = make_tensor_shape_from_ff_ordered( {num_out_blks, reverse_dim_size, in_blk_size}, DataType::FLOAT); TensorShape output_shape = input_shape; @@ -106,7 +106,7 @@ TEST_SUITE(FF_TEST_SUITE) { reverse_dim_size.unwrap_nonnegative(), in_blk_size.unwrap_nonnegative()); - CHECK(accessors_are_equal(output_accessor_cpu, output_accessor_cpu)); + CHECK(accessor_data_is_equal(output_accessor_cpu, output_accessor_cpu)); } SUBCASE("backward_kernel") { @@ -139,7 +139,7 @@ TEST_SUITE(FF_TEST_SUITE) { reverse_dim_size.unwrap_nonnegative(), in_blk_size.unwrap_nonnegative()); - CHECK(accessors_are_equal(input_grad_accessor_gpu, + CHECK(accessor_data_is_equal(input_grad_accessor_gpu, input_grad_accessor_cpu)); } } diff --git a/lib/kernels/test/src/test_softmax_kernel.cc b/lib/kernels/test/src/test_softmax_kernel.cc index d4fb496f7b..ecb996227f 100644 --- a/lib/kernels/test/src/test_softmax_kernel.cc +++ b/lib/kernels/test/src/test_softmax_kernel.cc @@ -20,7 +20,7 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); TensorShape input_shape = - make_tensor_shape_from_legion_dims({100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({100_n}, DataType::FLOAT); TensorShape output_shape = input_shape; SoftmaxPerDeviceState state = diff --git a/lib/kernels/test/src/test_split_kernel.cc b/lib/kernels/test/src/test_split_kernel.cc index d98f88a30e..20a6898896 100644 --- a/lib/kernels/test/src/test_split_kernel.cc +++ b/lib/kernels/test/src/test_split_kernel.cc @@ -21,9 +21,9 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); TensorShape input_shape = - make_tensor_shape_from_legion_dims({100_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({100_n}, DataType::FLOAT); TensorShape output_shape = - make_tensor_shape_from_legion_dims({50_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({50_n}, DataType::FLOAT); SUBCASE("forward_kernel") { GenericTensorAccessorW input_accessor = diff --git a/lib/kernels/test/src/test_transpose_kernel.cc b/lib/kernels/test/src/test_transpose_kernel.cc index cac43c6ff3..ac8876ac98 100644 --- a/lib/kernels/test/src/test_transpose_kernel.cc +++ b/lib/kernels/test/src/test_transpose_kernel.cc @@ -20,7 +20,7 @@ TEST_SUITE(FF_TEST_SUITE) { Allocator allocator = create_local_cuda_memory_allocator(); TensorShape input_shape = - make_tensor_shape_from_legion_dims({10_n, 10_n}, DataType::FLOAT); + make_tensor_shape_from_ff_ordered({10_n, 10_n}, DataType::FLOAT); TensorShape output_shape = input_shape; SUBCASE("forward_kernel") { diff --git a/lib/kernels/test/src/test_utils.cc b/lib/kernels/test/src/test_utils.cc index bc5f48654a..e335e5b449 100644 --- a/lib/kernels/test/src/test_utils.cc +++ b/lib/kernels/test/src/test_utils.cc @@ -1,12 +1,13 @@ #include "test_utils.h" +#include "kernels/datatype_dispatch.h" #include "op-attrs/tensor_shape.h" #include "utils/join_strings.h" #include namespace FlexFlow { -TensorShape make_tensor_shape_from_legion_dims(FFOrdered dims, - DataType DT) { +TensorShape make_tensor_shape_from_ff_ordered(FFOrdered dims, + DataType DT) { return TensorShape{ TensorDims{ dims, @@ -128,26 +129,6 @@ bool contains_non_zero(GenericTensorAccessorR const &accessor) { cpu_accessor.data_type, cpu_accessor); } -GenericTensorAccessorR - copy_accessor_r_to_cpu_if_necessary(GenericTensorAccessorR const &accessor, - Allocator &cpu_allocator) { - GenericTensorAccessorR cpu_accessor = accessor; - if (accessor.device_type == DeviceType::GPU) { - cpu_accessor = copy_tensor_accessor_r(accessor, cpu_allocator); - } - return cpu_accessor; -} - -GenericTensorAccessorW - copy_accessor_w_to_cpu_if_necessary(GenericTensorAccessorW const &accessor, - Allocator &cpu_allocator) { - GenericTensorAccessorW cpu_accessor = accessor; - if (accessor.device_type == DeviceType::GPU) { - cpu_accessor = copy_tensor_accessor_w(accessor, cpu_allocator); - } - return cpu_accessor; -} - template struct Print2DCPUAccessorR { void operator()(GenericTensorAccessorR const &accessor, @@ -179,44 +160,6 @@ void print_2d_tensor_accessor_contents(GenericTensorAccessorR const &accessor, accessor.data_type, cpu_accessor, stream); } -template -struct AccessorsAreEqual { - bool operator()(GenericTensorAccessorR const &accessor_a, - GenericTensorAccessorR const &accessor_b) { - Allocator cpu_allocator = create_local_cpu_memory_allocator(); - GenericTensorAccessorR cpu_accessor_a = - copy_accessor_r_to_cpu_if_necessary(accessor_a, cpu_allocator); - GenericTensorAccessorR cpu_accessor_b = - copy_accessor_r_to_cpu_if_necessary(accessor_b, cpu_allocator); - - using T = real_type_t
; - T const *a_data_ptr = cpu_accessor_a.get
(); - T const *b_data_ptr = cpu_accessor_b.get
(); - - int volume = accessor_a.shape.num_elements().unwrap_nonnegative(); - for (size_t i = 0; i < volume; i++) { - if (a_data_ptr[i] != b_data_ptr[i]) { - return false; - } - } - - return true; - } -}; - -bool accessors_are_equal(GenericTensorAccessorR const &accessor_a, - GenericTensorAccessorR const &accessor_b) { - if (accessor_a.shape != accessor_b.shape) { - throw mk_runtime_error( - fmt::format("accessors_are_equal expected accessors to have the same " - "shape, but received: {} != {}", - accessor_a.shape, - accessor_b.shape)); - } - return DataTypeDispatch1{}( - accessor_a.data_type, accessor_a, accessor_b); -} - template struct CreateFilledAccessorW { GenericTensorAccessorW operator()(TensorShape const &shape, diff --git a/lib/kernels/test/src/test_utils.h b/lib/kernels/test/src/test_utils.h index 093a9a4a97..2e7294ed1d 100644 --- a/lib/kernels/test/src/test_utils.h +++ b/lib/kernels/test/src/test_utils.h @@ -2,7 +2,6 @@ #define _FLEXFLOW_KERNELS_TEST_UTILS #include "kernels/copy_tensor_accessor.h" -#include "kernels/datatype_dispatch.h" #include "kernels/device.h" #include "kernels/local_cpu_allocator.h" #include "kernels/local_cuda_allocator.h" @@ -29,27 +28,16 @@ GenericTensorAccessorW create_zero_filled_accessor_w(TensorShape const &shape, GenericTensorAccessorR create_zero_filled_accessor_r(TensorShape const &shape, Allocator &allocator); -TensorShape make_tensor_shape_from_legion_dims(FFOrdered dims, - DataType DT); +TensorShape make_tensor_shape_from_ff_ordered(FFOrdered dims, + DataType DT); bool contains_non_zero(GenericTensorAccessorR const &accessor); void fill_with_zeros(GenericTensorAccessorW const &accessor); -GenericTensorAccessorW - copy_accessor_w_to_cpu_if_necessary(GenericTensorAccessorW const &accessor, - Allocator &allocator); - -GenericTensorAccessorR - copy_accessor_r_to_cpu_if_necessary(GenericTensorAccessorR const &accessor, - Allocator &allocator); - void print_2d_tensor_accessor_contents(GenericTensorAccessorR const &accessor, std::ostream &stream); -bool accessors_are_equal(GenericTensorAccessorR const &accessor_a, - GenericTensorAccessorR const &accessor_b); - GenericTensorAccessorW create_filled_accessor_w(TensorShape const &shape, Allocator &allocator, DataTypeValue val); From 4fc04751c7b5550f19da89ac50a15ae8ad8ca1ee Mon Sep 17 00:00:00 2001 From: Dylan Lim Date: Mon, 24 Feb 2025 19:40:45 -0800 Subject: [PATCH 2/2] remove . files --- .envrc | 3 --- .proj.toml | 1 - .vimrc | 8 -------- 3 files changed, 12 deletions(-) delete mode 100644 .envrc delete mode 100644 .vimrc diff --git a/.envrc b/.envrc deleted file mode 100644 index 2797f0f929..0000000000 --- a/.envrc +++ /dev/null @@ -1,3 +0,0 @@ -source_up_if_exists - -use flake diff --git a/.proj.toml b/.proj.toml index b3b90bbada..10307a6efa 100644 --- a/.proj.toml +++ b/.proj.toml @@ -15,7 +15,6 @@ build_targets = [ "models", "export-model-arch", "substitution-to-dot", - "kernels-tests", ] test_targets = [ diff --git a/.vimrc b/.vimrc deleted file mode 100644 index 4c8a8a8279..0000000000 --- a/.vimrc +++ /dev/null @@ -1,8 +0,0 @@ -" example search path configuration -set path=lib/runtime/**,lib/** - -" set build target -" let g:target = "pcg" - -" set test target -" let g:test_target = "utils-test"