diff --git a/CMakeExt/Cuda.cmake b/CMakeExt/Cuda.cmake new file mode 100644 index 000000000..d1ffd5efe --- /dev/null +++ b/CMakeExt/Cuda.cmake @@ -0,0 +1 @@ +find_package(CUDA) diff --git a/CMakeExt/GenerateDASHCXX.cmake b/CMakeExt/GenerateDASHCXX.cmake index 2b99088db..1c9172256 100644 --- a/CMakeExt/GenerateDASHCXX.cmake +++ b/CMakeExt/GenerateDASHCXX.cmake @@ -94,3 +94,18 @@ if (";${DART_IMPLEMENTATIONS_LIST};" MATCHES ";shmem;") )" ) endif() + +if (ENABLE_CUDA AND CUDA_FOUND) + set(CXXFLAGS_WRAP "-DDASH_USE_CUDA ${CXXFLAGS_WRAP}") + set(DASHCC "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc -ccbin ${CMAKE_CXX_COMPILER}") + configure_file( + ${CMAKE_SOURCE_DIR}/dash/scripts/dashcc/dashcxx.in + ${CMAKE_BINARY_DIR}/bin/dash-nvcc + @ONLY) + install( + FILES ${CMAKE_BINARY_DIR}/bin/dash-nvcc + DESTINATION ${CMAKE_INSTALL_PREFIX}/bin + PERMISSIONS OWNER_WRITE OWNER_READ GROUP_READ WORLD_READ + OWNER_EXECUTE GROUP_EXECUTE WORLD_EXECUTE) +endif() + diff --git a/CMakeLists.txt b/CMakeLists.txt index a21f6debe..c5b28cb91 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,6 +123,7 @@ include(${CMAKE_SOURCE_DIR}/CMakeExt/NUMA.cmake) include(${CMAKE_SOURCE_DIR}/CMakeExt/IPM.cmake) include(${CMAKE_SOURCE_DIR}/CMakeExt/PLASMA.cmake) include(${CMAKE_SOURCE_DIR}/CMakeExt/HDF5.cmake) +include(${CMAKE_SOURCE_DIR}/CMakeExt/Cuda.cmake) if (ENABLE_MKL) include(${CMAKE_SOURCE_DIR}/CMakeExt/MKL.cmake) diff --git a/dash/CMakeLists.txt b/dash/CMakeLists.txt index 5d0fe2719..08ad6cd7c 100644 --- a/dash/CMakeLists.txt +++ b/dash/CMakeLists.txt @@ -212,6 +212,13 @@ if (ENABLE_SCALAPACK) endif() endif() +if (ENABLE_CUDA AND CUDA_FOUND) + set(ADDITIONAL_INCLUDES ${ADDITIONAL_INCLUDES} + ${CUDA_INCLUDE_DIRS}) + set (ADDITIONAL_LIBRARIES ${ADDITIONAL_LIBRARIES} + ${CUDA_LIBRARIES}) +endif() + if (HAVE_STD_TRIVIALLY_COPYABLE) set (ADDITIONAL_COMPILE_FLAGS ${ADDITIONAL_COMPILE_FLAGS} -DDASH_HAVE_STD_TRIVIALLY_COPYABLE) diff --git a/dash/include/dash/algorithm/Sort.h b/dash/include/dash/algorithm/Sort.h index c0d9f56cd..0b198ad53 100644 --- a/dash/include/dash/algorithm/Sort.h +++ b/dash/include/dash/algorithm/Sort.h @@ -18,6 +18,13 @@ #include #include +#ifdef DASH_USE_CUDA +#include +#include +#include +#include +#endif + namespace dash { #ifdef DOXYGEN @@ -114,7 +121,15 @@ void sort(GlobRandomIt begin, GlobRandomIt end, SortableHash sortable_hash) if (pattern.team().size() == 1) { DASH_LOG_TRACE("dash::sort", "Sorting on a team with only 1 unit"); trace.enter_state("final_local_sort"); +#ifdef DASH_USE_CUDA + thrust::device_vector d_vec(std::distance(begin.local(), end.local())); + thrust::copy(begin.local(), end.local(), d_vec.begin()); + thrust::sort(d_vec.begin(), d_vec.end()); + thrust::copy(d_vec.begin(), d_vec.end(), begin.local()); + assert(std::distance(begin.local(), end.local()) == std::distance(d_vec.begin(), d_vec.end())); +#else std::sort(begin.local(), end.local(), sort_comp); +#endif trace.exit_state("final_local_sort"); return; } @@ -145,7 +160,15 @@ void sort(GlobRandomIt begin, GlobRandomIt end, SortableHash sortable_hash) // initial local_sort trace.enter_state("1:initial_local_sort"); +#ifdef DASH_USE_CUDA + cudaSetDevice(dash::myid() % 2); + thrust::device_vector d_vec(std::distance(lbegin, lend)); + thrust::copy(lbegin, lend, d_vec.begin()); + thrust::sort(d_vec.begin(), d_vec.end()); + thrust::copy(d_vec.begin(), d_vec.end(), lbegin); +#else std::sort(lbegin, lend, sort_comp); +#endif trace.exit_state("1:initial_local_sort"); trace.enter_state("2:init_temporary_global_data"); diff --git a/dash/scripts/dashcc/dashcxx.in b/dash/scripts/dashcc/dashcxx.in index 83290a404..6232f3592 100644 --- a/dash/scripts/dashcc/dashcxx.in +++ b/dash/scripts/dashcc/dashcxx.in @@ -21,6 +21,7 @@ COMPILE_ONLY=false LINK_ONLY=true DASH_NOCPPFLAGS=false COMPILER_ARGS="" +IS_CUDA=false function INVOKE_COMPILER() { @@ -50,9 +51,13 @@ for arg in $@ ; do LINK_ONLY=false fi # check for source files - if [[ $arg == *\.cc || $arg == *\.cpp ]] ; then + if [[ $arg == *\.cc || $arg == *\.cpp || $arg == *\.cu ]] ; then LINK_ONLY=false fi + if [[ $arg == *\.cu ]] ; then + LINK_ONLY=false + IS_CUDA=true + fi if [ "$arg" == "--dash:verbose" -o "$arg" == "-dash:verbose" ] ; then DASH_VERBOSE=true elif [ "$arg" == "--dash:nocppflags" -o "$arg" == "-dash:nocppflags" ] ; then