diff --git a/CMakeLists.txt b/CMakeLists.txt index f3cfc209d..a05ab5d7b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,14 +1,15 @@ -cmake_minimum_required (VERSION 3.0) +cmake_minimum_required (VERSION 3.12) if (NOT DEFINED CMAKE_BUILD_TYPE) set (CMAKE_BUILD_TYPE Release CACHE STRING "Build type") endif () -project (fftw) +project (FFTW3 + VERSION 3.3.8 + HOMEPAGE_URL http://www.fftw.org/) -if (POLICY CMP0042) - cmake_policy (SET CMP0042 NEW) -endif () +cmake_policy (SET CMP0042 NEW) +cmake_policy (SET CMP0076 NEW) option (BUILD_SHARED_LIBS "Build shared libraries" ON) option (BUILD_TESTS "Build tests" ON) @@ -28,8 +29,23 @@ option (ENABLE_AVX2 "Compile with AVX2 instruction set support" OFF) option (DISABLE_FORTRAN "Disable Fortran wrapper routines" OFF) -include(GNUInstallDirs) +set(ENABLED_PRECISIONS 0) +foreach(SELECTED_PRECISION ENABLE_FLOAT ENABLE_LONG_DOUBLE ENABLE_QUAD_PRECISION) + if(${SELECTED_PRECISION}) + math(EXPR ENABLED_PRECISIONS "${ENABLED_PRECISIONS} + 1") + endif() +endforeach(SELECTED_PRECISION) +if(ENABLED_PRECISIONS GREATER 1) + message(FATAL_ERROR "Only one or no (defaults to double precision) precision can be selected") +endif() +if((ENABLE_LONG_DOUBLE OR ENABLE_QUAD_PRECISION) AND (ENABLE_SSE2 OR ENABLE_AVX OR ENABLE_AVX2)) + message(FATAL_ERROR "ENABLE_SSE2, ENABLE_AVX and ENABLE_AVX2 not supported when ENABLE_LONG_DOUBLE or ENABLE_QUAD_PRECISION was selected") +endif() + +include(GNUInstallDirs) +include(GenerateExportHeader) +include(CMakePackageConfigHelpers) include (CheckIncludeFile) check_include_file (alloca.h HAVE_ALLOCA_H) @@ -113,19 +129,17 @@ endif () if (ENABLE_THREADS) - find_package (Threads) + find_package (Threads REQUIRED) endif () if (Threads_FOUND) if(CMAKE_USE_PTHREADS_INIT) - set (USING_POSIX_THREADS 1) + set(USING_POSIX_THREADS TRUE) endif () set (HAVE_THREADS TRUE) endif () if (ENABLE_OPENMP) - find_package (OpenMP) -endif () -if (OPENMP_FOUND) + find_package (OpenMP REQUIRED) set (HAVE_OPENMP TRUE) endif () @@ -190,91 +204,6 @@ endif () if (HAVE_SSE2 OR HAVE_AVX) set (HAVE_SIMD TRUE) endif () -file(GLOB fftw_api_SOURCE api/*.c api/*.h) -file(GLOB fftw_dft_SOURCE dft/*.c dft/*.h) -file(GLOB fftw_dft_scalar_SOURCE dft/scalar/*.c dft/scalar/*.h) -file(GLOB fftw_dft_scalar_codelets_SOURCE dft/scalar/codelets/*.c dft/scalar/codelets/*.h) -file(GLOB fftw_dft_simd_SOURCE dft/simd/*.c dft/simd/*.h) - -file(GLOB fftw_dft_simd_sse2_SOURCE dft/simd/sse2/*.c dft/simd/sse2/*.h) -file(GLOB fftw_dft_simd_avx_SOURCE dft/simd/avx/*.c dft/simd/avx/*.h) -file(GLOB fftw_dft_simd_avx2_SOURCE dft/simd/avx2/*.c dft/simd/avx2/*.h dft/simd/avx2-128/*.c dft/simd/avx2-128/*.h) -file(GLOB fftw_kernel_SOURCE kernel/*.c kernel/*.h) -file(GLOB fftw_rdft_SOURCE rdft/*.c rdft/*.h) -file(GLOB fftw_rdft_scalar_SOURCE rdft/scalar/*.c rdft/scalar/*.h) - -file(GLOB fftw_rdft_scalar_r2cb_SOURCE rdft/scalar/r2cb/*.c - rdft/scalar/r2cb/*.h) -file(GLOB fftw_rdft_scalar_r2cf_SOURCE rdft/scalar/r2cf/*.c - rdft/scalar/r2cf/*.h) -file(GLOB fftw_rdft_scalar_r2r_SOURCE rdft/scalar/r2r/*.c - rdft/scalar/r2r/*.h) - -file(GLOB fftw_rdft_simd_SOURCE rdft/simd/*.c rdft/simd/*.h) -file(GLOB fftw_rdft_simd_sse2_SOURCE rdft/simd/sse2/*.c rdft/simd/sse2/*.h) -file(GLOB fftw_rdft_simd_avx_SOURCE rdft/simd/avx/*.c rdft/simd/avx/*.h) -file(GLOB fftw_rdft_simd_avx2_SOURCE rdft/simd/avx2/*.c rdft/simd/avx2/*.h rdft/simd/avx2-128/*.c rdft/simd/avx2-128/*.h) - -file(GLOB fftw_reodft_SOURCE reodft/*.c reodft/*.h) -file(GLOB fftw_simd_support_SOURCE simd-support/*.c simd-support/*.h) -file(GLOB fftw_libbench2_SOURCE libbench2/*.c libbench2/*.h) -list (REMOVE_ITEM fftw_libbench2_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/libbench2/useropt.c) - -set(SOURCEFILES - ${fftw_api_SOURCE} - ${fftw_dft_SOURCE} - ${fftw_dft_scalar_SOURCE} - ${fftw_dft_scalar_codelets_SOURCE} - ${fftw_dft_simd_SOURCE} - ${fftw_kernel_SOURCE} - ${fftw_rdft_SOURCE} - ${fftw_rdft_scalar_SOURCE} - - ${fftw_rdft_scalar_r2cb_SOURCE} - ${fftw_rdft_scalar_r2cf_SOURCE} - ${fftw_rdft_scalar_r2r_SOURCE} - - ${fftw_rdft_simd_SOURCE} - ${fftw_reodft_SOURCE} - ${fftw_simd_support_SOURCE} - ${fftw_threads_SOURCE} -) - -set(fftw_par_SOURCE - threads/api.c - threads/conf.c - threads/ct.c - threads/dft-vrank-geq1.c - threads/f77api.c - threads/hc2hc.c - threads/rdft-vrank-geq1.c - threads/vrank-geq1-rdft2.c) - -set (fftw_threads_SOURCE ${fftw_par_SOURCE} threads/threads.c) -set (fftw_omp_SOURCE ${fftw_par_SOURCE} threads/openmp.c) - - -include_directories (.) - - -if (WITH_COMBINED_THREADS) - list (APPEND SOURCEFILES ${fftw_threads_SOURCE}) -endif () - - -if (HAVE_SSE2) - list (APPEND SOURCEFILES ${fftw_dft_simd_sse2_SOURCE} ${fftw_rdft_simd_sse2_SOURCE}) -endif () - -if (HAVE_AVX) - list (APPEND SOURCEFILES ${fftw_dft_simd_avx_SOURCE} ${fftw_rdft_simd_avx_SOURCE}) -endif () - -if (HAVE_AVX2) - list (APPEND SOURCEFILES ${fftw_dft_simd_avx2_SOURCE} ${fftw_rdft_simd_avx2_SOURCE}) -endif () - -set (FFTW_VERSION 3.3.7) set (PREC_SUFFIX) if (ENABLE_FLOAT) @@ -299,12 +228,27 @@ set (fftw3_lib fftw3${PREC_SUFFIX}) configure_file (cmake.config.h.in config.h @ONLY) include_directories (${CMAKE_CURRENT_BINARY_DIR}) -if (BUILD_SHARED_LIBS) - add_definitions (-DFFTW_DLL) -endif () +set(PUBLIC_HEADER_FILES + "${CMAKE_CURRENT_LIST_DIR}/api/fftw3.h" + "${CMAKE_CURRENT_BINARY_DIR}/fftw3_export.h") + +add_library (${fftw3_lib}) +add_library (${PROJECT_NAME}::${fftw3_lib} ALIAS ${fftw3_lib}) + +target_include_directories (${fftw3_lib} + INTERFACE + $ + $ + $) + +set_target_properties(${fftw3_lib} PROPERTIES + PUBLIC_HEADER "${PUBLIC_HEADER_FILES}" + DEFINE_SYMBOL "fftw3_EXPORTS") -add_library (${fftw3_lib} ${SOURCEFILES}) -target_include_directories (${fftw3_lib} INTERFACE $) +generate_export_header(${fftw3_lib} + BASE_NAME "fftw3") + +set (fftw3_libs ${fftw3_lib}) if (MSVC AND NOT (CMAKE_C_COMPILER_ID STREQUAL "Intel")) target_compile_definitions (${fftw3_lib} PRIVATE /bigobj) endif () @@ -330,59 +274,123 @@ endif () set (subtargets ${fftw3_lib}) if (Threads_FOUND) + target_compile_definitions (${fftw3_lib} + PUBLIC + ENABLE_THREADS) if (WITH_COMBINED_THREADS) - target_link_libraries (${fftw3_lib} ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries (${fftw3_lib} Threads::Threads) else () - add_library (${fftw3_lib}_threads ${fftw_threads_SOURCE}) - target_include_directories (${fftw3_lib}_threads INTERFACE $) - target_link_libraries (${fftw3_lib}_threads ${fftw3_lib}) - target_link_libraries (${fftw3_lib}_threads ${CMAKE_THREAD_LIBS_INIT}) + add_library (${fftw3_lib}_threads) + add_library (${PROJECT_NAME}::${fftw3_lib}_threads ALIAS ${fftw3_lib}_threads) + set_target_properties(${fftw3_lib}_threads PROPERTIES + DEFINE_SYMBOL "fftw3_threads_EXPORTS") + generate_export_header(${fftw3_lib}_threads + BASE_NAME "fftw3_threads") + target_compile_definitions (${fftw3_lib} + PRIVATE + ENABLE_EXPORT_ADDITIONAL_FUNCTIONS) + target_compile_definitions (${fftw3_lib}_threads + PRIVATE + ENABLE_EXPORT_ADDITIONAL_FUNCTIONS + BUILD_THREADS_EXTERNAL) + target_link_libraries (${fftw3_lib}_threads + PUBLIC + ${PROJECT_NAME}::${fftw3_lib} + PRIVATE + Threads::Threads) list (APPEND subtargets ${fftw3_lib}_threads) endif () endif () -if (OPENMP_FOUND) - add_library (${fftw3_lib}_omp ${fftw_omp_SOURCE}) - target_include_directories (${fftw3_lib}_omp INTERFACE $) - target_link_libraries (${fftw3_lib}_omp ${fftw3_lib}) - target_link_libraries (${fftw3_lib}_omp ${CMAKE_THREAD_LIBS_INIT}) +if (OpenMP_FOUND) + add_library (${fftw3_lib}_omp) + add_library (${PROJECT_NAME}::${fftw3_lib}_omp ALIAS ${fftw3_lib}_omp) + set_target_properties(${fftw3_lib}_omp PROPERTIES + DEFINE_SYMBOL "fftw3_threads_EXPORTS") + generate_export_header(${fftw3_lib}_omp + BASE_NAME "fftw3_threads") + target_compile_definitions (${fftw3_lib} + PUBLIC + ENABLE_THREADS + PRIVATE + ENABLE_EXPORT_ADDITIONAL_FUNCTIONS) + target_compile_definitions (${fftw3_lib}_omp + PRIVATE + ENABLE_EXPORT_ADDITIONAL_FUNCTIONS + BUILD_THREADS_EXTERNAL) + target_link_libraries (${fftw3_lib}_omp + PUBLIC + ${PROJECT_NAME}::${fftw3_lib} + OpenMP::OpenMP_C) list (APPEND subtargets ${fftw3_lib}_omp) - target_compile_options (${fftw3_lib}_omp PRIVATE ${OpenMP_C_FLAGS}) endif () +add_subdirectory(api) +add_subdirectory(dft) +add_subdirectory(kernel) +add_subdirectory(rdft) +add_subdirectory(reodft) +add_subdirectory(simd-support) +add_subdirectory(threads) + foreach(subtarget ${subtargets}) - set_target_properties (${subtarget} PROPERTIES SOVERSION 3.5.7 VERSION 3) - install (TARGETS ${subtarget} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endforeach () -install(TARGETS ${fftw3_lib} - EXPORT FFTW3LibraryDepends - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) - -install (FILES api/fftw3.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + set_target_properties (${subtarget} PROPERTIES + LANGUAGES C + SOVERSION 3.5.7 + C_STANDARD 11 + C_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN 1) + target_include_directories (${subtarget} + PRIVATE + $ + INTERFACE + $ + $ + $) +endforeach() + +install (TARGETS ${subtargets} + EXPORT ${PROJECT_NAME}Targets + ARCHIVE COMPONENT FFTW3_LIBRARY + LIBRARY COMPONENT FFTW3_LIBRARY + RUNTIME COMPONENT FFTW3_LIBRARY # This is for Windows + PUBLIC_HEADER COMPONENT FFTW3_DEVELOPMENT) + if (EXISTS ${CMAKE_SOURCE_DIR}/api/fftw3.f) - install (FILES api/fftw3.f api/fftw3l.f03 api/fftw3q.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + install (FILES api/fftw3.f api/fftw3l.f03 api/fftw3q.f03 + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT FFTW3_DEVELOPMENT) endif () if (EXISTS ${CMAKE_SOURCE_DIR}/api/fftw3.f03.in) file (READ api/fftw3.f03.in FFTW3_F03_IN OFFSET 42) file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 "! Generated automatically. DO NOT EDIT!\n\n") file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 " integer, parameter :: C_FFTW_R2R_KIND = ${C_FFTW_R2R_KIND}\n\n") file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 "${FFTW3_F03_IN}") - install (FILES ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} + COMPONENT FFTW3_DEVELOPMENT) endif () if (BUILD_TESTS) - add_executable (bench ${fftw_libbench2_SOURCE} tests/bench.c tests/hook.c tests/fftw-bench.c) + add_executable (bench) + add_subdirectory(libbench2) + add_subdirectory(tests) - if (ENABLE_THREADS AND NOT WITH_COMBINED_THREADS) - target_link_libraries (bench ${fftw3_lib}_threads) + target_include_directories (bench + PRIVATE + $) + + if (Threads_FOUND) + if(WITH_COMBINED_THREADS) + target_link_libraries (bench ${PROJECT_NAME}::${fftw3_lib}) + else() + target_link_libraries (bench ${PROJECT_NAME}::${fftw3_lib}_threads) + endif() + elseif (OpenMP_FOUND) + target_link_libraries (bench ${PROJECT_NAME}::${fftw3_lib}_omp) else () - target_link_libraries (bench ${fftw3_lib}) + target_link_libraries (bench ${PROJECT_NAME}::${fftw3_lib}) endif () @@ -405,25 +413,50 @@ set (prefix ${CMAKE_INSTALL_PREFIX}) set (exec_prefix ${CMAKE_INSTALL_PREFIX}) set (libdir ${CMAKE_INSTALL_FULL_LIBDIR}) set (includedir ${CMAKE_INSTALL_FULL_INCLUDEDIR}) -set (VERSION ${FFTW_VERSION}) +set (VERSION ${PROJECT_VERSION}) configure_file (fftw.pc.in fftw${PREC_SUFFIX}.pc @ONLY) install (FILES ${CMAKE_CURRENT_BINARY_DIR}/fftw${PREC_SUFFIX}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig - COMPONENT Development) + COMPONENT FFTW3_DEVELOPMENT) # cmake file -set (FFTW3_LIBRARIES "FFTW3::${fftw3_lib}") -configure_file (FFTW3Config.cmake.in FFTW3${PREC_SUFFIX}Config.cmake @ONLY) -configure_file (FFTW3ConfigVersion.cmake.in FFTW3${PREC_SUFFIX}ConfigVersion.cmake @ONLY) +set (ConfigPackageLocation "${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX}") + +configure_file (FFTW3Config.cmake.in ${PROJECT_NAME}${PREC_SUFFIX}Config.cmake @ONLY) +write_basic_package_version_file(${PROJECT_NAME}${PREC_SUFFIX}ConfigVersion.cmake + COMPATIBILITY SameMajorVersion) + install (FILES - ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}Config.cmake - ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}ConfigVersion.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX} - COMPONENT Development) - -export (TARGETS ${fftw3_lib} NAMESPACE FFTW3:: FILE ${PROJECT_BINARY_DIR}/FFTW3LibraryDepends.cmake) -install(EXPORT FFTW3LibraryDepends - NAMESPACE FFTW3:: - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX} - COMPONENT Development) + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}${PREC_SUFFIX}Config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}${PREC_SUFFIX}ConfigVersion.cmake + DESTINATION ${ConfigPackageLocation} + COMPONENT FFTW3_DEVELOPMENT) + +install(EXPORT ${PROJECT_NAME}Targets FILE ${PROJECT_NAME}${PREC_SUFFIX}Targets.cmake + NAMESPACE ${PROJECT_NAME}${PREC_SUFFIX}:: + DESTINATION ${ConfigPackageLocation} + COMPONENT FFTW3_DEVELOPMENT) + + +# CPack settings +set(CPACK_PACKAGE_NAME "libfftw3") +set(CPACK_PACKAGE_DESCRIPTION "Library for computing Fast Fourier Transforms") +set(CPACK_PACKAGE_CONTACT "Martin Stolpe martin.stolpe@iaf.fraunhofer.de") +set(CPACK_PACKAGE_VENDOR "Fraunhofer-Institut für Angewandte Festkörperphysik IAF") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "") +set(CPACK_DEB_COMPONENT_INSTALL ON) +set(CPACK_DEBIAN_FFTW3_LIBRARY_PACKAGE_NAME "libfftw3") +set(CPACK_DEBIAN_FFTW3_DEVELOPMENT_PACKAGE_NAME "libfftw3-dev") + +include(CPack) + +cpack_add_component(FFTW3_LIBRARY + DISPLAY_NAME "libfftw3" + DESCRIPTION "Library for computing Fast Fourier Transforms" +) +cpack_add_component(FFTW3_DEVELOPMENT + DISPLAY_NAME "libfftw3-dev" + DESCRIPTION "Library for computing Fast Fourier Transforms - development" + DEPENDS FFTW3_LIBRARY +) diff --git a/FFTW3Config.cmake.in b/FFTW3Config.cmake.in index 6b1fbc2e1..38da81d75 100644 --- a/FFTW3Config.cmake.in +++ b/FFTW3Config.cmake.in @@ -1,17 +1,17 @@ -# defined since 2.8.3 -if (CMAKE_VERSION VERSION_LESS 2.8.3) - get_filename_component (CMAKE_CURRENT_LIST_DIR ${CMAKE_CURRENT_LIST_FILE} PATH) -endif () - -# Allows loading FFTW3 settings from another project -set (FFTW3_CONFIG_FILE "${CMAKE_CURRENT_LIST_FILE}") - -set (FFTW3@PREC_SUFFIX@_LIBRARIES fftw3@PREC_SUFFIX@) -set (FFTW3@PREC_SUFFIX@_LIBRARY_DIRS @CMAKE_INSTALL_FULL_LIBDIR@) -set (FFTW3@PREC_SUFFIX@_INCLUDE_DIRS @CMAKE_INSTALL_FULL_INCLUDEDIR@) - -include ("${CMAKE_CURRENT_LIST_DIR}/FFTW3LibraryDepends.cmake") - -if (CMAKE_VERSION VERSION_LESS 2.8.3) - set (CMAKE_CURRENT_LIST_DIR) -endif () +get_filename_component(@PROJECT_NAME@@PREC_SUFFIX@_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +include(CMakeFindDependencyMacro) + +set(ENABLE_OPENMP @ENABLE_OPENMP@) +set(ENABLE_THREADS @ENABLE_THREADS@) + +if(${ENABLE_OPENMP}) + find_dependency(OpenMP REQUIRED) +endif() +if(${ENABLE_THREADS}) + find_dependency(Threads REQUIRED) +endif() + +if(NOT TARGET @PROJECT_NAME@@PREC_SUFFIX@::fftw3@PREC_SUFFIX@) + include("${@PROJECT_NAME@@PREC_SUFFIX@_CMAKE_DIR}/@PROJECT_NAME@@PREC_SUFFIX@Targets.cmake") +endif() + diff --git a/FFTW3ConfigVersion.cmake.in b/FFTW3ConfigVersion.cmake.in deleted file mode 100644 index cb906a406..000000000 --- a/FFTW3ConfigVersion.cmake.in +++ /dev/null @@ -1,12 +0,0 @@ - -set (PACKAGE_VERSION "@FFTW_VERSION@") - -# Check whether the requested PACKAGE_FIND_VERSION is compatible -if ("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") - set (PACKAGE_VERSION_COMPATIBLE FALSE) -else () - set (PACKAGE_VERSION_COMPATIBLE TRUE) - if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") - set (PACKAGE_VERSION_EXACT TRUE) - endif () -endif () diff --git a/api/CMakeLists.txt b/api/CMakeLists.txt new file mode 100644 index 000000000..802b7686c --- /dev/null +++ b/api/CMakeLists.txt @@ -0,0 +1,83 @@ +target_sources(${fftw3_lib} + PUBLIC + $ + PRIVATE + api.h + f77funcs.h + guru.h + guru64.h + mktensor-iodims.h + plan-guru-dft-c2r.h + plan-guru-dft-r2c.h + plan-guru-dft.h + plan-guru-r2r.h + plan-guru-split-dft-c2r.h + plan-guru-split-dft-r2c.h + plan-guru-split-dft.h + x77.h + apiplan.c + configure.c + execute-dft-c2r.c + execute-dft-r2c.c + execute-dft.c + execute-r2r.c + execute-split-dft-c2r.c + execute-split-dft-r2c.c + execute-split-dft.c + execute.c + export-wisdom-to-file.c + export-wisdom-to-string.c + export-wisdom.c + f77api.c + flops.c + forget-wisdom.c + import-system-wisdom.c + import-wisdom-from-file.c + import-wisdom-from-string.c + import-wisdom.c + malloc.c + map-r2r-kind.c + mapflags.c + mkprinter-file.c + mkprinter-str.c + mktensor-iodims.c + mktensor-iodims64.c + mktensor-rowmajor.c + plan-dft-1d.c + plan-dft-2d.c + plan-dft-3d.c + plan-dft-c2r-1d.c + plan-dft-c2r-2d.c + plan-dft-c2r-3d.c + plan-dft-c2r.c + plan-dft-r2c-1d.c + plan-dft-r2c-2d.c + plan-dft-r2c-3d.c + plan-dft-r2c.c + plan-dft.c + plan-guru-dft-c2r.c + plan-guru-dft-r2c.c + plan-guru-dft.c + plan-guru-r2r.c + plan-guru-split-dft-c2r.c + plan-guru-split-dft-r2c.c + plan-guru-split-dft.c + plan-guru64-dft-c2r.c + plan-guru64-dft-r2c.c + plan-guru64-dft.c + plan-guru64-r2r.c + plan-guru64-split-dft-c2r.c + plan-guru64-split-dft-r2c.c + plan-guru64-split-dft.c + plan-many-dft-c2r.c + plan-many-dft-r2c.c + plan-many-dft.c + plan-many-r2r.c + plan-r2r-1d.c + plan-r2r-2d.c + plan-r2r-3d.c + plan-r2r.c + print-plan.c + rdft2-pad.c + the-planner.c + version.c) diff --git a/api/api.h b/api/api.h index e3e94ddd1..e8b642cb9 100644 --- a/api/api.h +++ b/api/api.h @@ -22,28 +22,6 @@ #ifndef __API_H__ #define __API_H__ -#ifndef CALLING_FFTW /* defined in hook.c, when calling internal functions */ -# define COMPILING_FFTW /* used for DLL symbol exporting in fftw3.h */ -#endif - -/* When compiling with GNU libtool on Windows, DLL_EXPORT is #defined - for compiling the shared-library code. In this case, we'll #define - FFTW_DLL to add dllexport attributes to the specified functions in - fftw3.h. - - If we don't specify dllexport explicitly, then libtool - automatically exports all symbols. However, if we specify - dllexport explicitly for any functions, then libtool apparently - doesn't do any automatic exporting. (Not documented, grrr, but - this is the observed behavior with libtool 1.5.8.) Thus, using - this forces us to correctly dllexport every exported symbol, or - linking bench.exe will fail. This has the advantage of forcing - us to mark things correctly, which is necessary for other compilers - (such as MS VC++). */ -#ifdef DLL_EXPORT -# define FFTW_DLL -#endif - /* just in case: force not to use C99 complex numbers (we need this for IBM xlc because _Complex_I is treated specially and is defined even if is not included) */ @@ -90,14 +68,14 @@ int X(guru_kosherp)(int rank, const X(iodim) *dims, int X(guru64_kosherp)(int rank, const X(iodim64) *dims, int howmany_rank, const X(iodim64) *howmany_dims); -/* Note: FFTW_EXTERN is used for "internal" functions used in tests/hook.c */ +/* Note: extern is used for "internal" functions used in tests/hook.c */ -FFTW_EXTERN printer *X(mkprinter_file)(FILE *f); +FFTW3_EXPORT printer *X(mkprinter_file)(FILE *f); printer *X(mkprinter_cnt)(size_t *cnt); printer *X(mkprinter_str)(char *s); -FFTW_EXTERN planner *X(the_planner)(void); +FFTW3_EXPORT planner *X(the_planner)(void); void X(configure_planner)(planner *plnr); void X(mapflags)(planner *, unsigned); @@ -108,7 +86,7 @@ rdft_kind *X(map_r2r_kind)(int rank, const X(r2r_kind) * kind); typedef void (*planner_hook_t)(void); -void X(set_planner_hooks)(planner_hook_t before, planner_hook_t after); +void EXPORT_ADDITIONAL_FUNCTIONS X(set_planner_hooks)(planner_hook_t before, planner_hook_t after); #ifdef __cplusplus } /* extern "C" */ diff --git a/api/f77funcs.h b/api/f77funcs.h index 1e557dc1e..8c690d54f 100644 --- a/api/f77funcs.h +++ b/api/f77funcs.h @@ -23,28 +23,28 @@ f77api.c, possibly multiple times in order to support multiple compiler manglings (via redefinition of F77). */ -FFTW_VOIDFUNC F77(execute, EXECUTE)(X(plan) * const p) +void F77(execute, EXECUTE)(X(plan) * const p) { plan *pln = (*p)->pln; pln->adt->solve(pln, (*p)->prb); } -FFTW_VOIDFUNC F77(destroy_plan, DESTROY_PLAN)(X(plan) *p) +void F77(destroy_plan, DESTROY_PLAN)(X(plan) *p) { X(destroy_plan)(*p); } -FFTW_VOIDFUNC F77(cleanup, CLEANUP)(void) +void F77(cleanup, CLEANUP)(void) { X(cleanup)(); } -FFTW_VOIDFUNC F77(forget_wisdom, FORGET_WISDOM)(void) +void F77(forget_wisdom, FORGET_WISDOM)(void) { X(forget_wisdom)(); } -FFTW_VOIDFUNC F77(export_wisdom, EXPORT_WISDOM)(void (*f77_write_char)(char *, void *), +void F77(export_wisdom, EXPORT_WISDOM)(void (*f77_write_char)(char *, void *), void *data) { write_char_data ad; @@ -53,7 +53,7 @@ FFTW_VOIDFUNC F77(export_wisdom, EXPORT_WISDOM)(void (*f77_write_char)(char *, v X(export_wisdom)(write_char, (void *) &ad); } -FFTW_VOIDFUNC F77(import_wisdom, IMPORT_WISDOM)(int *isuccess, +void F77(import_wisdom, IMPORT_WISDOM)(int *isuccess, void (*f77_read_char)(int *, void *), void *data) { @@ -63,40 +63,40 @@ FFTW_VOIDFUNC F77(import_wisdom, IMPORT_WISDOM)(int *isuccess, *isuccess = X(import_wisdom)(read_char, (void *) &ed); } -FFTW_VOIDFUNC F77(import_system_wisdom, IMPORT_SYSTEM_WISDOM)(int *isuccess) +void F77(import_system_wisdom, IMPORT_SYSTEM_WISDOM)(int *isuccess) { *isuccess = X(import_system_wisdom)(); } -FFTW_VOIDFUNC F77(print_plan, PRINT_PLAN)(X(plan) * const p) +void F77(print_plan, PRINT_PLAN)(X(plan) * const p) { X(print_plan)(*p); fflush(stdout); } -FFTW_VOIDFUNC F77(flops,FLOPS)(X(plan) *p, double *add, double *mul, double *fma) +void F77(flops,FLOPS)(X(plan) *p, double *add, double *mul, double *fma) { X(flops)(*p, add, mul, fma); } -FFTW_VOIDFUNC F77(estimate_cost,ESTIMATE_COST)(double *cost, X(plan) * const p) +void F77(estimate_cost,ESTIMATE_COST)(double *cost, X(plan) * const p) { *cost = X(estimate_cost)(*p); } -FFTW_VOIDFUNC F77(cost,COST)(double *cost, X(plan) * const p) +void F77(cost,COST)(double *cost, X(plan) * const p) { *cost = X(cost)(*p); } -FFTW_VOIDFUNC F77(set_timelimit,SET_TIMELIMIT)(double *t) +void F77(set_timelimit,SET_TIMELIMIT)(double *t) { X(set_timelimit)(*t); } /******************************** DFT ***********************************/ -FFTW_VOIDFUNC F77(plan_dft, PLAN_DFT)(X(plan) *p, int *rank, const int *n, +void F77(plan_dft, PLAN_DFT)(X(plan) *p, int *rank, const int *n, C *in, C *out, int *sign, int *flags) { int *nrev = reverse_n(*rank, n); @@ -104,26 +104,26 @@ FFTW_VOIDFUNC F77(plan_dft, PLAN_DFT)(X(plan) *p, int *rank, const int *n, X(ifree0)(nrev); } -FFTW_VOIDFUNC F77(plan_dft_1d, PLAN_DFT_1D)(X(plan) *p, int *n, C *in, C *out, +void F77(plan_dft_1d, PLAN_DFT_1D)(X(plan) *p, int *n, C *in, C *out, int *sign, int *flags) { *p = X(plan_dft_1d)(*n, in, out, *sign, *flags); } -FFTW_VOIDFUNC F77(plan_dft_2d, PLAN_DFT_2D)(X(plan) *p, int *nx, int *ny, +void F77(plan_dft_2d, PLAN_DFT_2D)(X(plan) *p, int *nx, int *ny, C *in, C *out, int *sign, int *flags) { *p = X(plan_dft_2d)(*ny, *nx, in, out, *sign, *flags); } -FFTW_VOIDFUNC F77(plan_dft_3d, PLAN_DFT_3D)(X(plan) *p, int *nx, int *ny, int *nz, +void F77(plan_dft_3d, PLAN_DFT_3D)(X(plan) *p, int *nx, int *ny, int *nz, C *in, C *out, int *sign, int *flags) { *p = X(plan_dft_3d)(*nz, *ny, *nx, in, out, *sign, *flags); } -FFTW_VOIDFUNC F77(plan_many_dft, PLAN_MANY_DFT)(X(plan) *p, int *rank, const int *n, +void F77(plan_many_dft, PLAN_MANY_DFT)(X(plan) *p, int *rank, const int *n, int *howmany, C *in, const int *inembed, int *istride, int *idist, @@ -143,7 +143,7 @@ FFTW_VOIDFUNC F77(plan_many_dft, PLAN_MANY_DFT)(X(plan) *p, int *rank, const int X(ifree0)(nrev); } -FFTW_VOIDFUNC F77(plan_guru_dft, PLAN_GURU_DFT)(X(plan) *p, int *rank, const int *n, +void F77(plan_guru_dft, PLAN_GURU_DFT)(X(plan) *p, int *rank, const int *n, const int *is, const int *os, int *howmany_rank, const int *h_n, const int *h_is, const int *h_os, @@ -157,7 +157,7 @@ FFTW_VOIDFUNC F77(plan_guru_dft, PLAN_GURU_DFT)(X(plan) *p, int *rank, const int X(ifree0)(dims); } -FFTW_VOIDFUNC F77(plan_guru_split_dft, PLAN_GURU_SPLIT_DFT)(X(plan) *p, int *rank, const int *n, +void F77(plan_guru_split_dft, PLAN_GURU_SPLIT_DFT)(X(plan) *p, int *rank, const int *n, const int *is, const int *os, int *howmany_rank, const int *h_n, const int *h_is, const int *h_os, @@ -171,7 +171,7 @@ FFTW_VOIDFUNC F77(plan_guru_split_dft, PLAN_GURU_SPLIT_DFT)(X(plan) *p, int *ran X(ifree0)(dims); } -FFTW_VOIDFUNC F77(execute_dft, EXECUTE_DFT)(X(plan) * const p, C *in, C *out) +void F77(execute_dft, EXECUTE_DFT)(X(plan) * const p, C *in, C *out) { plan_dft *pln = (plan_dft *) (*p)->pln; if ((*p)->sign == FFT_SIGN) @@ -180,7 +180,7 @@ FFTW_VOIDFUNC F77(execute_dft, EXECUTE_DFT)(X(plan) * const p, C *in, C *out) pln->apply((plan *) pln, in[0]+1, in[0], out[0]+1, out[0]); } -FFTW_VOIDFUNC F77(execute_split_dft, EXECUTE_SPLIT_DFT)(X(plan) * const p, +void F77(execute_split_dft, EXECUTE_SPLIT_DFT)(X(plan) * const p, R *ri, R *ii, R *ro, R *io) { plan_dft *pln = (plan_dft *) (*p)->pln; @@ -189,7 +189,7 @@ FFTW_VOIDFUNC F77(execute_split_dft, EXECUTE_SPLIT_DFT)(X(plan) * const p, /****************************** DFT r2c *********************************/ -FFTW_VOIDFUNC F77(plan_dft_r2c, PLAN_DFT_R2C)(X(plan) *p, int *rank, const int *n, +void F77(plan_dft_r2c, PLAN_DFT_R2C)(X(plan) *p, int *rank, const int *n, R *in, C *out, int *flags) { int *nrev = reverse_n(*rank, n); @@ -197,19 +197,19 @@ FFTW_VOIDFUNC F77(plan_dft_r2c, PLAN_DFT_R2C)(X(plan) *p, int *rank, const int * X(ifree0)(nrev); } -FFTW_VOIDFUNC F77(plan_dft_r2c_1d, PLAN_DFT_R2C_1D)(X(plan) *p, int *n, R *in, C *out, +void F77(plan_dft_r2c_1d, PLAN_DFT_R2C_1D)(X(plan) *p, int *n, R *in, C *out, int *flags) { *p = X(plan_dft_r2c_1d)(*n, in, out, *flags); } -FFTW_VOIDFUNC F77(plan_dft_r2c_2d, PLAN_DFT_R2C_2D)(X(plan) *p, int *nx, int *ny, +void F77(plan_dft_r2c_2d, PLAN_DFT_R2C_2D)(X(plan) *p, int *nx, int *ny, R *in, C *out, int *flags) { *p = X(plan_dft_r2c_2d)(*ny, *nx, in, out, *flags); } -FFTW_VOIDFUNC F77(plan_dft_r2c_3d, PLAN_DFT_R2C_3D)(X(plan) *p, +void F77(plan_dft_r2c_3d, PLAN_DFT_R2C_3D)(X(plan) *p, int *nx, int *ny, int *nz, R *in, C *out, int *flags) @@ -217,7 +217,7 @@ FFTW_VOIDFUNC F77(plan_dft_r2c_3d, PLAN_DFT_R2C_3D)(X(plan) *p, *p = X(plan_dft_r2c_3d)(*nz, *ny, *nx, in, out, *flags); } -FFTW_VOIDFUNC F77(plan_many_dft_r2c, PLAN_MANY_DFT_R2C)( +void F77(plan_many_dft_r2c, PLAN_MANY_DFT_R2C)( X(plan) *p, int *rank, const int *n, int *howmany, R *in, const int *inembed, int *istride, int *idist, @@ -236,7 +236,7 @@ FFTW_VOIDFUNC F77(plan_many_dft_r2c, PLAN_MANY_DFT_R2C)( X(ifree0)(nrev); } -FFTW_VOIDFUNC F77(plan_guru_dft_r2c, PLAN_GURU_DFT_R2C)( +void F77(plan_guru_dft_r2c, PLAN_GURU_DFT_R2C)( X(plan) *p, int *rank, const int *n, const int *is, const int *os, int *howmany_rank, const int *h_n, @@ -251,7 +251,7 @@ FFTW_VOIDFUNC F77(plan_guru_dft_r2c, PLAN_GURU_DFT_R2C)( X(ifree0)(dims); } -FFTW_VOIDFUNC F77(plan_guru_split_dft_r2c, PLAN_GURU_SPLIT_DFT_R2C)( +void F77(plan_guru_split_dft_r2c, PLAN_GURU_SPLIT_DFT_R2C)( X(plan) *p, int *rank, const int *n, const int *is, const int *os, int *howmany_rank, const int *h_n, @@ -266,14 +266,14 @@ FFTW_VOIDFUNC F77(plan_guru_split_dft_r2c, PLAN_GURU_SPLIT_DFT_R2C)( X(ifree0)(dims); } -FFTW_VOIDFUNC F77(execute_dft_r2c, EXECUTE_DFT_R2C)(X(plan) * const p, R *in, C *out) +void F77(execute_dft_r2c, EXECUTE_DFT_R2C)(X(plan) * const p, R *in, C *out) { plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln; problem_rdft2 *prb = (problem_rdft2 *) (*p)->prb; pln->apply((plan *) pln, in, in + (prb->r1 - prb->r0), out[0], out[0]+1); } -FFTW_VOIDFUNC F77(execute_split_dft_r2c, EXECUTE_SPLIT_DFT_R2C)(X(plan) * const p, +void F77(execute_split_dft_r2c, EXECUTE_SPLIT_DFT_R2C)(X(plan) * const p, R *in, R *ro, R *io) { plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln; @@ -283,7 +283,7 @@ FFTW_VOIDFUNC F77(execute_split_dft_r2c, EXECUTE_SPLIT_DFT_R2C)(X(plan) * const /****************************** DFT c2r *********************************/ -FFTW_VOIDFUNC F77(plan_dft_c2r, PLAN_DFT_C2R)(X(plan) *p, int *rank, const int *n, +void F77(plan_dft_c2r, PLAN_DFT_C2R)(X(plan) *p, int *rank, const int *n, C *in, R *out, int *flags) { int *nrev = reverse_n(*rank, n); @@ -291,19 +291,19 @@ FFTW_VOIDFUNC F77(plan_dft_c2r, PLAN_DFT_C2R)(X(plan) *p, int *rank, const int * X(ifree0)(nrev); } -FFTW_VOIDFUNC F77(plan_dft_c2r_1d, PLAN_DFT_C2R_1D)(X(plan) *p, int *n, C *in, R *out, +void F77(plan_dft_c2r_1d, PLAN_DFT_C2R_1D)(X(plan) *p, int *n, C *in, R *out, int *flags) { *p = X(plan_dft_c2r_1d)(*n, in, out, *flags); } -FFTW_VOIDFUNC F77(plan_dft_c2r_2d, PLAN_DFT_C2R_2D)(X(plan) *p, int *nx, int *ny, +void F77(plan_dft_c2r_2d, PLAN_DFT_C2R_2D)(X(plan) *p, int *nx, int *ny, C *in, R *out, int *flags) { *p = X(plan_dft_c2r_2d)(*ny, *nx, in, out, *flags); } -FFTW_VOIDFUNC F77(plan_dft_c2r_3d, PLAN_DFT_C2R_3D)(X(plan) *p, +void F77(plan_dft_c2r_3d, PLAN_DFT_C2R_3D)(X(plan) *p, int *nx, int *ny, int *nz, C *in, R *out, int *flags) @@ -311,7 +311,7 @@ FFTW_VOIDFUNC F77(plan_dft_c2r_3d, PLAN_DFT_C2R_3D)(X(plan) *p, *p = X(plan_dft_c2r_3d)(*nz, *ny, *nx, in, out, *flags); } -FFTW_VOIDFUNC F77(plan_many_dft_c2r, PLAN_MANY_DFT_C2R)( +void F77(plan_many_dft_c2r, PLAN_MANY_DFT_C2R)( X(plan) *p, int *rank, const int *n, int *howmany, C *in, const int *inembed, int *istride, int *idist, @@ -330,7 +330,7 @@ FFTW_VOIDFUNC F77(plan_many_dft_c2r, PLAN_MANY_DFT_C2R)( X(ifree0)(nrev); } -FFTW_VOIDFUNC F77(plan_guru_dft_c2r, PLAN_GURU_DFT_C2R)( +void F77(plan_guru_dft_c2r, PLAN_GURU_DFT_C2R)( X(plan) *p, int *rank, const int *n, const int *is, const int *os, int *howmany_rank, const int *h_n, @@ -345,7 +345,7 @@ FFTW_VOIDFUNC F77(plan_guru_dft_c2r, PLAN_GURU_DFT_C2R)( X(ifree0)(dims); } -FFTW_VOIDFUNC F77(plan_guru_split_dft_c2r, PLAN_GURU_SPLIT_DFT_C2R)( +void F77(plan_guru_split_dft_c2r, PLAN_GURU_SPLIT_DFT_C2R)( X(plan) *p, int *rank, const int *n, const int *is, const int *os, int *howmany_rank, const int *h_n, @@ -360,14 +360,14 @@ FFTW_VOIDFUNC F77(plan_guru_split_dft_c2r, PLAN_GURU_SPLIT_DFT_C2R)( X(ifree0)(dims); } -FFTW_VOIDFUNC F77(execute_dft_c2r, EXECUTE_DFT_C2R)(X(plan) * const p, C *in, R *out) +void F77(execute_dft_c2r, EXECUTE_DFT_C2R)(X(plan) * const p, C *in, R *out) { plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln; problem_rdft2 *prb = (problem_rdft2 *) (*p)->prb; pln->apply((plan *) pln, out, out + (prb->r1 - prb->r0), in[0], in[0]+1); } -FFTW_VOIDFUNC F77(execute_split_dft_c2r, EXECUTE_SPLIT_DFT_C2R)(X(plan) * const p, +void F77(execute_split_dft_c2r, EXECUTE_SPLIT_DFT_C2R)(X(plan) * const p, R *ri, R *ii, R *out) { plan_rdft2 *pln = (plan_rdft2 *) (*p)->pln; @@ -377,7 +377,7 @@ FFTW_VOIDFUNC F77(execute_split_dft_c2r, EXECUTE_SPLIT_DFT_C2R)(X(plan) * const /****************************** r2r *********************************/ -FFTW_VOIDFUNC F77(plan_r2r, PLAN_R2R)(X(plan) *p, int *rank, const int *n, +void F77(plan_r2r, PLAN_R2R)(X(plan) *p, int *rank, const int *n, R *in, R *out, int *kind, int *flags) { @@ -388,13 +388,13 @@ FFTW_VOIDFUNC F77(plan_r2r, PLAN_R2R)(X(plan) *p, int *rank, const int *n, X(ifree0)(nrev); } -FFTW_VOIDFUNC F77(plan_r2r_1d, PLAN_R2R_1D)(X(plan) *p, int *n, R *in, R *out, +void F77(plan_r2r_1d, PLAN_R2R_1D)(X(plan) *p, int *n, R *in, R *out, int *kind, int *flags) { *p = X(plan_r2r_1d)(*n, in, out, (X(r2r_kind)) *kind, *flags); } -FFTW_VOIDFUNC F77(plan_r2r_2d, PLAN_R2R_2D)(X(plan) *p, int *nx, int *ny, +void F77(plan_r2r_2d, PLAN_R2R_2D)(X(plan) *p, int *nx, int *ny, R *in, R *out, int *kindx, int *kindy, int *flags) { @@ -402,7 +402,7 @@ FFTW_VOIDFUNC F77(plan_r2r_2d, PLAN_R2R_2D)(X(plan) *p, int *nx, int *ny, (X(r2r_kind)) *kindy, (X(r2r_kind)) *kindx, *flags); } -FFTW_VOIDFUNC F77(plan_r2r_3d, PLAN_R2R_3D)(X(plan) *p, +void F77(plan_r2r_3d, PLAN_R2R_3D)(X(plan) *p, int *nx, int *ny, int *nz, R *in, R *out, int *kindx, int *kindy, int *kindz, @@ -413,7 +413,7 @@ FFTW_VOIDFUNC F77(plan_r2r_3d, PLAN_R2R_3D)(X(plan) *p, (X(r2r_kind)) *kindx, *flags); } -FFTW_VOIDFUNC F77(plan_many_r2r, PLAN_MANY_R2R)( +void F77(plan_many_r2r, PLAN_MANY_R2R)( X(plan) *p, int *rank, const int *n, int *howmany, R *in, const int *inembed, int *istride, int *idist, @@ -434,7 +434,7 @@ FFTW_VOIDFUNC F77(plan_many_r2r, PLAN_MANY_R2R)( X(ifree0)(nrev); } -FFTW_VOIDFUNC F77(plan_guru_r2r, PLAN_GURU_R2R)( +void F77(plan_guru_r2r, PLAN_GURU_R2R)( X(plan) *p, int *rank, const int *n, const int *is, const int *os, int *howmany_rank, const int *h_n, @@ -451,7 +451,7 @@ FFTW_VOIDFUNC F77(plan_guru_r2r, PLAN_GURU_R2R)( X(ifree0)(dims); } -FFTW_VOIDFUNC F77(execute_r2r, EXECUTE_R2R)(X(plan) * const p, R *in, R *out) +void F77(execute_r2r, EXECUTE_R2R)(X(plan) * const p, R *in, R *out) { plan_rdft *pln = (plan_rdft *) (*p)->pln; pln->apply((plan *) pln, in, out); diff --git a/api/fftw3.h b/api/fftw3.h index 7bd4c6e55..f4d33b499 100644 --- a/api/fftw3.h +++ b/api/fftw3.h @@ -47,6 +47,17 @@ #ifndef FFTW3_H #define FFTW3_H +#include +#ifdef ENABLE_THREADS + #ifdef BUILD_THREADS_EXTERNAL + #include + #else + #define FFTW3_THREADS_EXPORT FFTW3_EXPORT + #endif +#else + #define FFTW3_THREADS_EXPORT +#endif /* ENABLE_THREADS */ + #include #ifdef __cplusplus @@ -68,24 +79,6 @@ extern "C" #define FFTW_MANGLE_LONG_DOUBLE(name) FFTW_CONCAT(fftwl_, name) #define FFTW_MANGLE_QUAD(name) FFTW_CONCAT(fftwq_, name) -/* IMPORTANT: for Windows compilers, you should add a line - #define FFTW_DLL - here and in kernel/ifftw.h if you are compiling/using FFTW as a - DLL, in order to do the proper importing/exporting, or - alternatively compile with -DFFTW_DLL or the equivalent - command-line flag. This is not necessary under MinGW/Cygwin, where - libtool does the imports/exports automatically. */ -#if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__)) - /* annoying Windows syntax for shared-library declarations */ -# if defined(COMPILING_FFTW) /* defined in api.h when compiling FFTW */ -# define FFTW_EXTERN extern __declspec(dllexport) -# else /* user is calling FFTW; import symbol */ -# define FFTW_EXTERN extern __declspec(dllimport) -# endif -#else -# define FFTW_EXTERN extern -#endif - /* specify calling convention (Windows only) */ #if defined(_WIN32) || defined(__WIN32__) # define FFTW_CDECL __cdecl @@ -138,24 +131,24 @@ typedef enum fftw_r2r_kind_do_not_use_me X(r2r_kind); \ typedef fftw_write_char_func_do_not_use_me X(write_char_func); \ typedef fftw_read_char_func_do_not_use_me X(read_char_func); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(execute)(const X(plan) p); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft)(int rank, const int *n, \ C *in, C *out, int sign, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_1d)(int n, C *in, C *out, int sign, \ unsigned flags); \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_2d)(int n0, int n1, \ C *in, C *out, int sign, unsigned flags); \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_3d)(int n0, int n1, int n2, \ C *in, C *out, int sign, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_many_dft)(int rank, const int *n, \ int howmany, \ C *in, const int *inembed, \ @@ -164,27 +157,27 @@ FFTW_CDECL X(plan_many_dft)(int rank, const int *n, \ int ostride, int odist, \ int sign, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru_dft)(int rank, const X(iodim) *dims, \ int howmany_rank, \ const X(iodim) *howmany_dims, \ C *in, C *out, \ int sign, unsigned flags); \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru_split_dft)(int rank, const X(iodim) *dims, \ int howmany_rank, \ const X(iodim) *howmany_dims, \ R *ri, R *ii, R *ro, R *io, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru64_dft)(int rank, \ const X(iodim64) *dims, \ int howmany_rank, \ const X(iodim64) *howmany_dims, \ C *in, C *out, \ int sign, unsigned flags); \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru64_split_dft)(int rank, \ const X(iodim64) *dims, \ int howmany_rank, \ @@ -192,14 +185,14 @@ FFTW_CDECL X(plan_guru64_split_dft)(int rank, \ R *ri, R *ii, R *ro, R *io, \ unsigned flags); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(execute_dft)(const X(plan) p, C *in, C *out); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(execute_split_dft)(const X(plan) p, R *ri, R *ii, \ R *ro, R *io); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_many_dft_r2c)(int rank, const int *n, \ int howmany, \ R *in, const int *inembed, \ @@ -208,23 +201,23 @@ FFTW_CDECL X(plan_many_dft_r2c)(int rank, const int *n, \ int ostride, int odist, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_r2c)(int rank, const int *n, \ R *in, C *out, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_r2c_1d)(int n,R *in,C *out,unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_r2c_2d)(int n0, int n1, \ R *in, C *out, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_r2c_3d)(int n0, int n1, \ int n2, \ R *in, C *out, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_many_dft_c2r)(int rank, const int *n, \ int howmany, \ C *in, const int *inembed, \ @@ -233,51 +226,51 @@ FFTW_CDECL X(plan_many_dft_c2r)(int rank, const int *n, \ int ostride, int odist, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_c2r)(int rank, const int *n, \ C *in, R *out, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_c2r_1d)(int n,C *in,R *out,unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_c2r_2d)(int n0, int n1, \ C *in, R *out, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_dft_c2r_3d)(int n0, int n1, \ int n2, \ C *in, R *out, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru_dft_r2c)(int rank, const X(iodim) *dims, \ int howmany_rank, \ const X(iodim) *howmany_dims, \ R *in, C *out, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru_dft_c2r)(int rank, const X(iodim) *dims, \ int howmany_rank, \ const X(iodim) *howmany_dims, \ C *in, R *out, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru_split_dft_r2c)(int rank, const X(iodim) *dims, \ int howmany_rank, \ const X(iodim) *howmany_dims, \ R *in, R *ro, R *io, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru_split_dft_c2r)(int rank, const X(iodim) *dims, \ int howmany_rank, \ const X(iodim) *howmany_dims, \ R *ri, R *ii, R *out, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru64_dft_r2c)(int rank, \ const X(iodim64) *dims, \ int howmany_rank, \ @@ -285,7 +278,7 @@ FFTW_CDECL X(plan_guru64_dft_r2c)(int rank, \ R *in, C *out, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru64_dft_c2r)(int rank, \ const X(iodim64) *dims, \ int howmany_rank, \ @@ -293,34 +286,34 @@ FFTW_CDECL X(plan_guru64_dft_c2r)(int rank, \ C *in, R *out, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru64_split_dft_r2c)(int rank, const X(iodim64) *dims, \ int howmany_rank, \ const X(iodim64) *howmany_dims, \ R *in, R *ro, R *io, \ unsigned flags); \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru64_split_dft_c2r)(int rank, const X(iodim64) *dims, \ int howmany_rank, \ const X(iodim64) *howmany_dims, \ R *ri, R *ii, R *out, \ unsigned flags); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(execute_dft_r2c)(const X(plan) p, R *in, C *out); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(execute_dft_c2r)(const X(plan) p, C *in, R *out); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(execute_split_dft_r2c)(const X(plan) p, \ R *in, R *ro, R *io); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(execute_split_dft_c2r)(const X(plan) p, \ R *ri, R *ii, R *out); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_many_r2r)(int rank, const int *n, \ int howmany, \ R *in, const int *inembed, \ @@ -329,127 +322,115 @@ FFTW_CDECL X(plan_many_r2r)(int rank, const int *n, \ int ostride, int odist, \ const X(r2r_kind) *kind, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_r2r)(int rank, const int *n, R *in, R *out, \ const X(r2r_kind) *kind, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_r2r_1d)(int n, R *in, R *out, \ X(r2r_kind) kind, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_r2r_2d)(int n0, int n1, R *in, R *out, \ X(r2r_kind) kind0, X(r2r_kind) kind1, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_r2r_3d)(int n0, int n1, int n2, \ R *in, R *out, X(r2r_kind) kind0, \ X(r2r_kind) kind1, X(r2r_kind) kind2, \ unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru_r2r)(int rank, const X(iodim) *dims, \ int howmany_rank, \ const X(iodim) *howmany_dims, \ R *in, R *out, \ const X(r2r_kind) *kind, unsigned flags); \ \ -FFTW_EXTERN X(plan) \ +FFTW3_EXPORT X(plan) \ FFTW_CDECL X(plan_guru64_r2r)(int rank, const X(iodim64) *dims, \ int howmany_rank, \ const X(iodim64) *howmany_dims, \ R *in, R *out, \ const X(r2r_kind) *kind, unsigned flags); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(execute_r2r)(const X(plan) p, R *in, R *out); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(destroy_plan)(X(plan) p); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(forget_wisdom)(void); \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(cleanup)(void); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(set_timelimit)(double t); \ \ -FFTW_EXTERN void \ -FFTW_CDECL X(plan_with_nthreads)(int nthreads); \ - \ -FFTW_EXTERN int \ -FFTW_CDECL X(init_threads)(void); \ - \ -FFTW_EXTERN void \ -FFTW_CDECL X(cleanup_threads)(void); \ - \ -FFTW_EXTERN void \ -FFTW_CDECL X(make_planner_thread_safe)(void); \ - \ -FFTW_EXTERN int \ +FFTW3_EXPORT int \ FFTW_CDECL X(export_wisdom_to_filename)(const char *filename); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(export_wisdom_to_file)(FILE *output_file); \ \ -FFTW_EXTERN char * \ +FFTW3_EXPORT char * \ FFTW_CDECL X(export_wisdom_to_string)(void); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(export_wisdom)(X(write_char_func) write_char, \ void *data); \ -FFTW_EXTERN int \ +FFTW3_EXPORT int \ FFTW_CDECL X(import_system_wisdom)(void); \ \ -FFTW_EXTERN int \ +FFTW3_EXPORT int \ FFTW_CDECL X(import_wisdom_from_filename)(const char *filename); \ \ -FFTW_EXTERN int \ +FFTW3_EXPORT int \ FFTW_CDECL X(import_wisdom_from_file)(FILE *input_file); \ \ -FFTW_EXTERN int \ +FFTW3_EXPORT int \ FFTW_CDECL X(import_wisdom_from_string)(const char *input_string); \ \ -FFTW_EXTERN int \ +FFTW3_EXPORT int \ FFTW_CDECL X(import_wisdom)(X(read_char_func) read_char, void *data); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(fprint_plan)(const X(plan) p, FILE *output_file); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(print_plan)(const X(plan) p); \ \ -FFTW_EXTERN char * \ +FFTW3_EXPORT char * \ FFTW_CDECL X(sprint_plan)(const X(plan) p); \ \ -FFTW_EXTERN void * \ +FFTW3_EXPORT void * \ FFTW_CDECL X(malloc)(size_t n); \ \ -FFTW_EXTERN R * \ +FFTW3_EXPORT R * \ FFTW_CDECL X(alloc_real)(size_t n); \ -FFTW_EXTERN C * \ +FFTW3_EXPORT C * \ FFTW_CDECL X(alloc_complex)(size_t n); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(free)(void *p); \ \ -FFTW_EXTERN void \ +FFTW3_EXPORT void \ FFTW_CDECL X(flops)(const X(plan) p, \ double *add, double *mul, double *fmas); \ -FFTW_EXTERN double \ +FFTW3_EXPORT double \ FFTW_CDECL X(estimate_cost)(const X(plan) p); \ \ -FFTW_EXTERN double \ +FFTW3_EXPORT double \ FFTW_CDECL X(cost)(const X(plan) p); \ \ -FFTW_EXTERN int \ +FFTW3_EXPORT int \ FFTW_CDECL X(alignment_of)(R *p); \ \ -FFTW_EXTERN const char X(version)[]; \ -FFTW_EXTERN const char X(cc)[]; \ -FFTW_EXTERN const char X(codelet_optim)[]; +extern FFTW3_EXPORT const char X(version)[]; \ +extern FFTW3_EXPORT const char X(cc)[]; \ +extern FFTW3_EXPORT const char X(codelet_optim)[]; /* end of FFTW_DEFINE_API macro */ @@ -458,6 +439,26 @@ FFTW_DEFINE_API(FFTW_MANGLE_DOUBLE, double, fftw_complex) FFTW_DEFINE_API(FFTW_MANGLE_FLOAT, float, fftwf_complex) FFTW_DEFINE_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex) +#if defined(ENABLE_THREADS) || defined(WITH_COMBINED_THREADS) +#define FFTW_DEFINE_THREADS_API(X, R, C) \ +FFTW3_THREADS_EXPORT void \ +FFTW_CDECL X(plan_with_nthreads)(int nthreads); \ + \ +FFTW3_THREADS_EXPORT int \ +FFTW_CDECL X(init_threads)(void); \ + \ +FFTW3_THREADS_EXPORT void \ +FFTW_CDECL X(cleanup_threads)(void); \ + \ +FFTW3_THREADS_EXPORT void \ +FFTW_CDECL X(make_planner_thread_safe)(void); + +FFTW_DEFINE_THREADS_API(FFTW_MANGLE_DOUBLE, double, fftw_complex) +FFTW_DEFINE_THREADS_API(FFTW_MANGLE_FLOAT, float, fftwf_complex) +FFTW_DEFINE_THREADS_API(FFTW_MANGLE_LONG_DOUBLE, long double, fftwl_complex) +#endif /* ENABLE_THREADS */ + + /* __float128 (quad precision) is a gcc extension on i386, x86_64, and ia64 for gcc >= 4.6 (compiled in FFTW with --enable-quad-precision) */ #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) \ @@ -508,7 +509,7 @@ FFTW_DEFINE_API(FFTW_MANGLE_QUAD, __float128, fftwq_complex) #define FFTW_ALLOW_PRUNING (1U << 20) #ifdef __cplusplus -} /* extern "C" */ +} /* FFTW3_EXPORT "C" */ #endif /* __cplusplus */ #endif /* FFTW3_H */ diff --git a/api/x77.h b/api/x77.h index 9a1ab8340..49be21095 100644 --- a/api/x77.h +++ b/api/x77.h @@ -60,10 +60,3 @@ # define F77_FUNC_(a, A) a ## __ # undef F77_FUNC_EQUIV #endif - -/* annoying Windows syntax for shared-library declarations */ -#if defined(FFTW_DLL) && (defined(_WIN32) || defined(__WIN32__)) -# define FFTW_VOIDFUNC __declspec(dllexport) void -#else -# define FFTW_VOIDFUNC void -#endif diff --git a/conanfile.py b/conanfile.py new file mode 100644 index 000000000..3f9e39adb --- /dev/null +++ b/conanfile.py @@ -0,0 +1,65 @@ +from conans import ConanFile, CMake + +class fftw3(ConanFile): + name = "fftw3" + version = "3.3.8" + homepage = "http://www.fftw.org/" + license = "GPL-2.0" + url = "https://vgit.iaf.fhg.de/A3/SY/MMW_1_0/fftw" + description = "FFTW3 library" + settings = "os", "compiler", "build_type", "arch" + exports_sources = "*" + generators = "cmake_paths" + options = { + "shared": [True, False], + "pic": [True, False], + "openmp": [True, False], + "pthread": [True, False], + "combined_threads": [True, False], + "precision": ["float", "double", "longdouble", "quad"], + "sse": [True, False], + "sse2": [True, False], + "avx": [True, False], + "avx2": [True, False], + "fortran": [True, False] + } + default_options = { + "shared": True, + "pic": True, + "openmp": False, + "pthread": False, + "combined_threads": False, + "precision": "double", + "sse": False, + "sse2": False, + "avx": False, + "avx2": False, + "fortran": False + } + + def set_compile_options(self, cmake): + cmake.definitions["CMAKE_POSITION_INDEPENDENT_CODE"] = self.options.pic + cmake.definitions["BUILD_TESTS"] = False + cmake.definitions["ENABLE_OPENMP"] = self.options.openmp + cmake.definitions["ENABLE_THREADS"] = self.options.pthread + cmake.definitions["WITH_COMBINED_THREADS"] = self.options.combined_threads + cmake.definitions["ENABLE_FLOAT"] = self.options.precision == "float" + cmake.definitions["ENABLE_LONG_DOUBLE"] = self.options.precision == "longdouble" + cmake.definitions["ENABLE_QUAD_PRECISION"] = self.options.precision == "quad" + cmake.definitions["ENABLE_SSE"] = self.options.sse + cmake.definitions["ENABLE_SSE2"] = self.options.sse2 + cmake.definitions["ENABLE_AVX"] = self.options.avx + cmake.definitions["ENABLE_AVX2"] = self.options.avx2 + cmake.definitions["DISABLE_FORTRAN"] = self.options.fortran + return cmake + + def build(self): + cmake = CMake(self) + cmake = self.set_compile_options(cmake) + cmake.configure() + cmake.build() + + def package(self): + self.copy("COPYING", dst="licenses") + cmake = CMake(self) + cmake.install() diff --git a/dft/CMakeLists.txt b/dft/CMakeLists.txt new file mode 100644 index 000000000..541b10482 --- /dev/null +++ b/dft/CMakeLists.txt @@ -0,0 +1,32 @@ +target_sources(${fftw3_lib} + PRIVATE + codelet-dft.h + ct.h + dft.h + bluestein.c + buffered.c + conf.c + ct.c + dftw-direct.c + dftw-directsq.c + dftw-generic.c + dftw-genericbuf.c + direct.c + generic.c + indirect-transpose.c + indirect.c + kdft-dif.c + kdft-difsq.c + kdft-dit.c + kdft.c + nop.c + plan.c + problem.c + rader.c + rank-geq2.c + solve.c + vrank-geq1.c + zero.c) + +add_subdirectory(scalar) +add_subdirectory(simd) \ No newline at end of file diff --git a/dft/ct.c b/dft/ct.c index 1a6fa9352..f04e3c47d 100644 --- a/dft/ct.c +++ b/dft/ct.c @@ -21,8 +21,8 @@ #include "dft/ct.h" -ct_solver *(*X(mksolver_ct_hook))(size_t, INT, int, - ct_mkinferior, ct_force_vrecursion) = 0; +ct_solver *(*X(mksolver_ct_hook))(size_t, INT, int, + ct_mkinferior, ct_force_vrecursion) = 0; typedef struct { plan_dft super; diff --git a/dft/ct.h b/dft/ct.h index 022e29b23..c021b525a 100644 --- a/dft/ct.h +++ b/dft/ct.h @@ -18,6 +18,9 @@ * */ +#ifndef __DFT_CT_H__ +#define __DFT_CT_H__ + #include "dft/dft.h" typedef void (*dftwapply)(const plan *ego, R *rio, R *iio); @@ -36,7 +39,7 @@ typedef struct { dftwapply apply; } plan_dftw; -extern plan *X(mkplan_dftw)(size_t size, const plan_adt *adt, dftwapply apply); +EXPORT_ADDITIONAL_FUNCTIONS plan *X(mkplan_dftw)(size_t size, const plan_adt *adt, dftwapply apply); #define MKPLAN_DFTW(type, adt, apply) \ (type *)X(mkplan_dftw)(sizeof(type), adt, apply) @@ -52,12 +55,15 @@ struct ct_solver_s { ct_force_vrecursion force_vrecursionp; }; -int X(ct_applicable)(const ct_solver *, const problem *, planner *); +int EXPORT_ADDITIONAL_FUNCTIONS X(ct_applicable)(const ct_solver *, const problem *, planner *); ct_solver *X(mksolver_ct)(size_t size, INT r, int dec, ct_mkinferior mkcldw, - ct_force_vrecursion force_vrecursionp); -extern ct_solver *(*X(mksolver_ct_hook))(size_t, INT, int, - ct_mkinferior, ct_force_vrecursion); + ct_force_vrecursion force_vrecursionp); +extern EXPORT_ADDITIONAL_FUNCTIONS ct_solver *(*X(mksolver_ct_hook))(size_t, INT, int, + ct_mkinferior, ct_force_vrecursion); + +extern EXPORT_ADDITIONAL_FUNCTIONS ct_solver *(*X(mksolver_ct_hook))(size_t, INT, int, + ct_mkinferior, ct_force_vrecursion); void X(regsolver_ct_directw)(planner *plnr, kdftw codelet, const ct_desc *desc, int dec); @@ -66,3 +72,5 @@ void X(regsolver_ct_directwbuf)(planner *plnr, solver *X(mksolver_ctsq)(kdftwsq codelet, const ct_desc *desc, int dec); void X(regsolver_ct_directwsq)(planner *plnr, kdftwsq codelet, const ct_desc *desc, int dec); + +#endif /* __DFT_CT_H__ */ diff --git a/dft/dft.h b/dft/dft.h index 740260ebc..867c376cf 100644 --- a/dft/dft.h +++ b/dft/dft.h @@ -38,13 +38,13 @@ typedef struct { } problem_dft; void X(dft_zerotens)(tensor *sz, R *ri, R *ii); -problem *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz, +problem EXPORT_ADDITIONAL_FUNCTIONS *X(mkproblem_dft)(const tensor *sz, const tensor *vecsz, R *ri, R *ii, R *ro, R *io); -problem *X(mkproblem_dft_d)(tensor *sz, tensor *vecsz, +problem EXPORT_ADDITIONAL_FUNCTIONS *X(mkproblem_dft_d)(tensor *sz, tensor *vecsz, R *ri, R *ii, R *ro, R *io); /* solve.c: */ -void X(dft_solve)(const plan *ego_, const problem *p_); +void EXPORT_ADDITIONAL_FUNCTIONS X(dft_solve)(const plan *ego_, const problem *p_); /* plan.c: */ typedef void (*dftapply) (const plan *ego, R *ri, R *ii, R *ro, R *io); @@ -54,7 +54,7 @@ typedef struct { dftapply apply; } plan_dft; -plan *X(mkplan_dft)(size_t size, const plan_adt *adt, dftapply apply); +plan EXPORT_ADDITIONAL_FUNCTIONS *X(mkplan_dft)(size_t size, const plan_adt *adt, dftapply apply); #define MKPLAN_DFT(type, adt, apply) \ (type *)X(mkplan_dft)(sizeof(type), adt, apply) diff --git a/dft/scalar/CMakeLists.txt b/dft/scalar/CMakeLists.txt new file mode 100644 index 000000000..90cbdded4 --- /dev/null +++ b/dft/scalar/CMakeLists.txt @@ -0,0 +1,10 @@ +target_sources(${fftw3_lib} + PRIVATE + f.h + n.h + q.h + t.h + n.c + t.c) + +add_subdirectory(codelets) \ No newline at end of file diff --git a/dft/scalar/codelets/CMakeLists.txt b/dft/scalar/codelets/CMakeLists.txt new file mode 100644 index 000000000..d88ac451f --- /dev/null +++ b/dft/scalar/codelets/CMakeLists.txt @@ -0,0 +1,53 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + n1_10.c + n1_11.c + n1_12.c + n1_13.c + n1_14.c + n1_15.c + n1_16.c + n1_2.c + n1_20.c + n1_25.c + n1_3.c + n1_32.c + n1_4.c + n1_5.c + n1_6.c + n1_64.c + n1_7.c + n1_8.c + n1_9.c + q1_2.c + q1_3.c + q1_4.c + q1_5.c + q1_6.c + q1_8.c + t1_10.c + t1_12.c + t1_15.c + t1_16.c + t1_2.c + t1_20.c + t1_25.c + t1_3.c + t1_32.c + t1_4.c + t1_5.c + t1_6.c + t1_64.c + t1_7.c + t1_8.c + t1_9.c + t2_10.c + t2_16.c + t2_20.c + t2_25.c + t2_32.c + t2_4.c + t2_5.c + t2_64.c + t2_8.c) \ No newline at end of file diff --git a/dft/simd/CMakeLists.txt b/dft/simd/CMakeLists.txt new file mode 100644 index 000000000..a7c006342 --- /dev/null +++ b/dft/simd/CMakeLists.txt @@ -0,0 +1,29 @@ +target_sources(${fftw3_lib} + PRIVATE + n1b.h + n1f.h + n2b.h + n2f.h + n2s.h + q1b.h + q1f.h + t1b.h + t1bu.h + t1f.h + t1fu.h + t2b.h + t2f.h + t3b.h + t3f.h + ts.h) + +if (HAVE_SSE2) + add_subdirectory(sse2) +endif() +if (HAVE_AVX) + add_subdirectory(avx) +endif () +if (HAVE_AVX2) + add_subdirectory(avx2) + add_subdirectory(avx2-128) +endif () \ No newline at end of file diff --git a/dft/simd/avx/CMakeLists.txt b/dft/simd/avx/CMakeLists.txt new file mode 100644 index 000000000..60f9ca9be --- /dev/null +++ b/dft/simd/avx/CMakeLists.txt @@ -0,0 +1,174 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + genus.c + n1bv_10.c + n1bv_11.c + n1bv_12.c + n1bv_128.c + n1bv_13.c + n1bv_14.c + n1bv_15.c + n1bv_16.c + n1bv_2.c + n1bv_20.c + n1bv_25.c + n1bv_3.c + n1bv_32.c + n1bv_4.c + n1bv_5.c + n1bv_6.c + n1bv_64.c + n1bv_7.c + n1bv_8.c + n1bv_9.c + n1fv_10.c + n1fv_11.c + n1fv_12.c + n1fv_128.c + n1fv_13.c + n1fv_14.c + n1fv_15.c + n1fv_16.c + n1fv_2.c + n1fv_20.c + n1fv_25.c + n1fv_3.c + n1fv_32.c + n1fv_4.c + n1fv_5.c + n1fv_6.c + n1fv_64.c + n1fv_7.c + n1fv_8.c + n1fv_9.c + n2bv_10.c + n2bv_12.c + n2bv_14.c + n2bv_16.c + n2bv_2.c + n2bv_20.c + n2bv_32.c + n2bv_4.c + n2bv_6.c + n2bv_64.c + n2bv_8.c + n2fv_10.c + n2fv_12.c + n2fv_14.c + n2fv_16.c + n2fv_2.c + n2fv_20.c + n2fv_32.c + n2fv_4.c + n2fv_6.c + n2fv_64.c + n2fv_8.c + n2sv_16.c + n2sv_32.c + n2sv_4.c + n2sv_64.c + n2sv_8.c + q1bv_2.c + q1bv_4.c + q1bv_5.c + q1bv_8.c + q1fv_2.c + q1fv_4.c + q1fv_5.c + q1fv_8.c + t1buv_10.c + t1buv_2.c + t1buv_3.c + t1buv_4.c + t1buv_5.c + t1buv_6.c + t1buv_7.c + t1buv_8.c + t1buv_9.c + t1bv_10.c + t1bv_12.c + t1bv_15.c + t1bv_16.c + t1bv_2.c + t1bv_20.c + t1bv_25.c + t1bv_3.c + t1bv_32.c + t1bv_4.c + t1bv_5.c + t1bv_6.c + t1bv_64.c + t1bv_7.c + t1bv_8.c + t1bv_9.c + t1fuv_10.c + t1fuv_2.c + t1fuv_3.c + t1fuv_4.c + t1fuv_5.c + t1fuv_6.c + t1fuv_7.c + t1fuv_8.c + t1fuv_9.c + t1fv_10.c + t1fv_12.c + t1fv_15.c + t1fv_16.c + t1fv_2.c + t1fv_20.c + t1fv_25.c + t1fv_3.c + t1fv_32.c + t1fv_4.c + t1fv_5.c + t1fv_6.c + t1fv_64.c + t1fv_7.c + t1fv_8.c + t1fv_9.c + t1sv_16.c + t1sv_2.c + t1sv_32.c + t1sv_4.c + t1sv_8.c + t2bv_10.c + t2bv_16.c + t2bv_2.c + t2bv_20.c + t2bv_25.c + t2bv_32.c + t2bv_4.c + t2bv_5.c + t2bv_64.c + t2bv_8.c + t2fv_10.c + t2fv_16.c + t2fv_2.c + t2fv_20.c + t2fv_25.c + t2fv_32.c + t2fv_4.c + t2fv_5.c + t2fv_64.c + t2fv_8.c + t2sv_16.c + t2sv_32.c + t2sv_4.c + t2sv_8.c + t3bv_10.c + t3bv_16.c + t3bv_20.c + t3bv_25.c + t3bv_32.c + t3bv_4.c + t3bv_5.c + t3bv_8.c + t3fv_10.c + t3fv_16.c + t3fv_20.c + t3fv_25.c + t3fv_32.c + t3fv_4.c + t3fv_5.c + t3fv_8.c) \ No newline at end of file diff --git a/dft/simd/avx2-128/CMakeLists.txt b/dft/simd/avx2-128/CMakeLists.txt new file mode 100644 index 000000000..60f9ca9be --- /dev/null +++ b/dft/simd/avx2-128/CMakeLists.txt @@ -0,0 +1,174 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + genus.c + n1bv_10.c + n1bv_11.c + n1bv_12.c + n1bv_128.c + n1bv_13.c + n1bv_14.c + n1bv_15.c + n1bv_16.c + n1bv_2.c + n1bv_20.c + n1bv_25.c + n1bv_3.c + n1bv_32.c + n1bv_4.c + n1bv_5.c + n1bv_6.c + n1bv_64.c + n1bv_7.c + n1bv_8.c + n1bv_9.c + n1fv_10.c + n1fv_11.c + n1fv_12.c + n1fv_128.c + n1fv_13.c + n1fv_14.c + n1fv_15.c + n1fv_16.c + n1fv_2.c + n1fv_20.c + n1fv_25.c + n1fv_3.c + n1fv_32.c + n1fv_4.c + n1fv_5.c + n1fv_6.c + n1fv_64.c + n1fv_7.c + n1fv_8.c + n1fv_9.c + n2bv_10.c + n2bv_12.c + n2bv_14.c + n2bv_16.c + n2bv_2.c + n2bv_20.c + n2bv_32.c + n2bv_4.c + n2bv_6.c + n2bv_64.c + n2bv_8.c + n2fv_10.c + n2fv_12.c + n2fv_14.c + n2fv_16.c + n2fv_2.c + n2fv_20.c + n2fv_32.c + n2fv_4.c + n2fv_6.c + n2fv_64.c + n2fv_8.c + n2sv_16.c + n2sv_32.c + n2sv_4.c + n2sv_64.c + n2sv_8.c + q1bv_2.c + q1bv_4.c + q1bv_5.c + q1bv_8.c + q1fv_2.c + q1fv_4.c + q1fv_5.c + q1fv_8.c + t1buv_10.c + t1buv_2.c + t1buv_3.c + t1buv_4.c + t1buv_5.c + t1buv_6.c + t1buv_7.c + t1buv_8.c + t1buv_9.c + t1bv_10.c + t1bv_12.c + t1bv_15.c + t1bv_16.c + t1bv_2.c + t1bv_20.c + t1bv_25.c + t1bv_3.c + t1bv_32.c + t1bv_4.c + t1bv_5.c + t1bv_6.c + t1bv_64.c + t1bv_7.c + t1bv_8.c + t1bv_9.c + t1fuv_10.c + t1fuv_2.c + t1fuv_3.c + t1fuv_4.c + t1fuv_5.c + t1fuv_6.c + t1fuv_7.c + t1fuv_8.c + t1fuv_9.c + t1fv_10.c + t1fv_12.c + t1fv_15.c + t1fv_16.c + t1fv_2.c + t1fv_20.c + t1fv_25.c + t1fv_3.c + t1fv_32.c + t1fv_4.c + t1fv_5.c + t1fv_6.c + t1fv_64.c + t1fv_7.c + t1fv_8.c + t1fv_9.c + t1sv_16.c + t1sv_2.c + t1sv_32.c + t1sv_4.c + t1sv_8.c + t2bv_10.c + t2bv_16.c + t2bv_2.c + t2bv_20.c + t2bv_25.c + t2bv_32.c + t2bv_4.c + t2bv_5.c + t2bv_64.c + t2bv_8.c + t2fv_10.c + t2fv_16.c + t2fv_2.c + t2fv_20.c + t2fv_25.c + t2fv_32.c + t2fv_4.c + t2fv_5.c + t2fv_64.c + t2fv_8.c + t2sv_16.c + t2sv_32.c + t2sv_4.c + t2sv_8.c + t3bv_10.c + t3bv_16.c + t3bv_20.c + t3bv_25.c + t3bv_32.c + t3bv_4.c + t3bv_5.c + t3bv_8.c + t3fv_10.c + t3fv_16.c + t3fv_20.c + t3fv_25.c + t3fv_32.c + t3fv_4.c + t3fv_5.c + t3fv_8.c) \ No newline at end of file diff --git a/dft/simd/avx2/CMakeLists.txt b/dft/simd/avx2/CMakeLists.txt new file mode 100644 index 000000000..60f9ca9be --- /dev/null +++ b/dft/simd/avx2/CMakeLists.txt @@ -0,0 +1,174 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + genus.c + n1bv_10.c + n1bv_11.c + n1bv_12.c + n1bv_128.c + n1bv_13.c + n1bv_14.c + n1bv_15.c + n1bv_16.c + n1bv_2.c + n1bv_20.c + n1bv_25.c + n1bv_3.c + n1bv_32.c + n1bv_4.c + n1bv_5.c + n1bv_6.c + n1bv_64.c + n1bv_7.c + n1bv_8.c + n1bv_9.c + n1fv_10.c + n1fv_11.c + n1fv_12.c + n1fv_128.c + n1fv_13.c + n1fv_14.c + n1fv_15.c + n1fv_16.c + n1fv_2.c + n1fv_20.c + n1fv_25.c + n1fv_3.c + n1fv_32.c + n1fv_4.c + n1fv_5.c + n1fv_6.c + n1fv_64.c + n1fv_7.c + n1fv_8.c + n1fv_9.c + n2bv_10.c + n2bv_12.c + n2bv_14.c + n2bv_16.c + n2bv_2.c + n2bv_20.c + n2bv_32.c + n2bv_4.c + n2bv_6.c + n2bv_64.c + n2bv_8.c + n2fv_10.c + n2fv_12.c + n2fv_14.c + n2fv_16.c + n2fv_2.c + n2fv_20.c + n2fv_32.c + n2fv_4.c + n2fv_6.c + n2fv_64.c + n2fv_8.c + n2sv_16.c + n2sv_32.c + n2sv_4.c + n2sv_64.c + n2sv_8.c + q1bv_2.c + q1bv_4.c + q1bv_5.c + q1bv_8.c + q1fv_2.c + q1fv_4.c + q1fv_5.c + q1fv_8.c + t1buv_10.c + t1buv_2.c + t1buv_3.c + t1buv_4.c + t1buv_5.c + t1buv_6.c + t1buv_7.c + t1buv_8.c + t1buv_9.c + t1bv_10.c + t1bv_12.c + t1bv_15.c + t1bv_16.c + t1bv_2.c + t1bv_20.c + t1bv_25.c + t1bv_3.c + t1bv_32.c + t1bv_4.c + t1bv_5.c + t1bv_6.c + t1bv_64.c + t1bv_7.c + t1bv_8.c + t1bv_9.c + t1fuv_10.c + t1fuv_2.c + t1fuv_3.c + t1fuv_4.c + t1fuv_5.c + t1fuv_6.c + t1fuv_7.c + t1fuv_8.c + t1fuv_9.c + t1fv_10.c + t1fv_12.c + t1fv_15.c + t1fv_16.c + t1fv_2.c + t1fv_20.c + t1fv_25.c + t1fv_3.c + t1fv_32.c + t1fv_4.c + t1fv_5.c + t1fv_6.c + t1fv_64.c + t1fv_7.c + t1fv_8.c + t1fv_9.c + t1sv_16.c + t1sv_2.c + t1sv_32.c + t1sv_4.c + t1sv_8.c + t2bv_10.c + t2bv_16.c + t2bv_2.c + t2bv_20.c + t2bv_25.c + t2bv_32.c + t2bv_4.c + t2bv_5.c + t2bv_64.c + t2bv_8.c + t2fv_10.c + t2fv_16.c + t2fv_2.c + t2fv_20.c + t2fv_25.c + t2fv_32.c + t2fv_4.c + t2fv_5.c + t2fv_64.c + t2fv_8.c + t2sv_16.c + t2sv_32.c + t2sv_4.c + t2sv_8.c + t3bv_10.c + t3bv_16.c + t3bv_20.c + t3bv_25.c + t3bv_32.c + t3bv_4.c + t3bv_5.c + t3bv_8.c + t3fv_10.c + t3fv_16.c + t3fv_20.c + t3fv_25.c + t3fv_32.c + t3fv_4.c + t3fv_5.c + t3fv_8.c) \ No newline at end of file diff --git a/dft/simd/sse2/CMakeLists.txt b/dft/simd/sse2/CMakeLists.txt new file mode 100644 index 000000000..60f9ca9be --- /dev/null +++ b/dft/simd/sse2/CMakeLists.txt @@ -0,0 +1,174 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + genus.c + n1bv_10.c + n1bv_11.c + n1bv_12.c + n1bv_128.c + n1bv_13.c + n1bv_14.c + n1bv_15.c + n1bv_16.c + n1bv_2.c + n1bv_20.c + n1bv_25.c + n1bv_3.c + n1bv_32.c + n1bv_4.c + n1bv_5.c + n1bv_6.c + n1bv_64.c + n1bv_7.c + n1bv_8.c + n1bv_9.c + n1fv_10.c + n1fv_11.c + n1fv_12.c + n1fv_128.c + n1fv_13.c + n1fv_14.c + n1fv_15.c + n1fv_16.c + n1fv_2.c + n1fv_20.c + n1fv_25.c + n1fv_3.c + n1fv_32.c + n1fv_4.c + n1fv_5.c + n1fv_6.c + n1fv_64.c + n1fv_7.c + n1fv_8.c + n1fv_9.c + n2bv_10.c + n2bv_12.c + n2bv_14.c + n2bv_16.c + n2bv_2.c + n2bv_20.c + n2bv_32.c + n2bv_4.c + n2bv_6.c + n2bv_64.c + n2bv_8.c + n2fv_10.c + n2fv_12.c + n2fv_14.c + n2fv_16.c + n2fv_2.c + n2fv_20.c + n2fv_32.c + n2fv_4.c + n2fv_6.c + n2fv_64.c + n2fv_8.c + n2sv_16.c + n2sv_32.c + n2sv_4.c + n2sv_64.c + n2sv_8.c + q1bv_2.c + q1bv_4.c + q1bv_5.c + q1bv_8.c + q1fv_2.c + q1fv_4.c + q1fv_5.c + q1fv_8.c + t1buv_10.c + t1buv_2.c + t1buv_3.c + t1buv_4.c + t1buv_5.c + t1buv_6.c + t1buv_7.c + t1buv_8.c + t1buv_9.c + t1bv_10.c + t1bv_12.c + t1bv_15.c + t1bv_16.c + t1bv_2.c + t1bv_20.c + t1bv_25.c + t1bv_3.c + t1bv_32.c + t1bv_4.c + t1bv_5.c + t1bv_6.c + t1bv_64.c + t1bv_7.c + t1bv_8.c + t1bv_9.c + t1fuv_10.c + t1fuv_2.c + t1fuv_3.c + t1fuv_4.c + t1fuv_5.c + t1fuv_6.c + t1fuv_7.c + t1fuv_8.c + t1fuv_9.c + t1fv_10.c + t1fv_12.c + t1fv_15.c + t1fv_16.c + t1fv_2.c + t1fv_20.c + t1fv_25.c + t1fv_3.c + t1fv_32.c + t1fv_4.c + t1fv_5.c + t1fv_6.c + t1fv_64.c + t1fv_7.c + t1fv_8.c + t1fv_9.c + t1sv_16.c + t1sv_2.c + t1sv_32.c + t1sv_4.c + t1sv_8.c + t2bv_10.c + t2bv_16.c + t2bv_2.c + t2bv_20.c + t2bv_25.c + t2bv_32.c + t2bv_4.c + t2bv_5.c + t2bv_64.c + t2bv_8.c + t2fv_10.c + t2fv_16.c + t2fv_2.c + t2fv_20.c + t2fv_25.c + t2fv_32.c + t2fv_4.c + t2fv_5.c + t2fv_64.c + t2fv_8.c + t2sv_16.c + t2sv_32.c + t2sv_4.c + t2sv_8.c + t3bv_10.c + t3bv_16.c + t3bv_20.c + t3bv_25.c + t3bv_32.c + t3bv_4.c + t3bv_5.c + t3bv_8.c + t3fv_10.c + t3fv_16.c + t3fv_20.c + t3fv_25.c + t3fv_32.c + t3fv_4.c + t3fv_5.c + t3fv_8.c) \ No newline at end of file diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt new file mode 100644 index 000000000..e94400bf5 --- /dev/null +++ b/kernel/CMakeLists.txt @@ -0,0 +1,47 @@ +target_sources(${fftw3_lib} + PRIVATE + cycle.h + ifftw.h + align.c + alloc.c + assert.c + awake.c + buffered.c + cpy1d.c + cpy2d-pair.c + cpy2d.c + ct.c + debug.c + extract-reim.c + hash.c + iabs.c + kalloc.c + md5-1.c + md5.c + minmax.c + ops.c + pickdim.c + plan.c + planner.c + primes.c + print.c + problem.c + rader.c + scan.c + solver.c + solvtab.c + stride.c + tensor.c + tensor1.c + tensor2.c + tensor3.c + tensor4.c + tensor5.c + tensor7.c + tensor8.c + tensor9.c + tile2d.c + timer.c + transpose.c + trig.c + twiddle.c) \ No newline at end of file diff --git a/kernel/ifftw.h b/kernel/ifftw.h index 0733e7566..567027552 100644 --- a/kernel/ifftw.h +++ b/kernel/ifftw.h @@ -29,6 +29,7 @@ #include /* va_list */ #include /* ptrdiff_t */ #include /* INT_MAX */ +#include #if HAVE_SYS_TYPES_H # include @@ -42,23 +43,17 @@ # include /* uintptr_t, maybe */ #endif +#ifdef ENABLE_EXPORT_ADDITIONAL_FUNCTIONS + #define EXPORT_ADDITIONAL_FUNCTIONS FFTW3_EXPORT +#else + #define EXPORT_ADDITIONAL_FUNCTIONS +#endif /* ENABLE_EXPORT_ADDITIONAL_FUNCTIONS */ + #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ -/* Windows annoyances -- since tests/hook.c uses some internal - FFTW functions, we need to given them the dllexport attribute - under Windows when compiling as a DLL (see api/fftw3.h). */ -#if defined(FFTW_EXTERN) -# define IFFTW_EXTERN FFTW_EXTERN -#elif (defined(FFTW_DLL) || defined(DLL_EXPORT)) \ - && (defined(_WIN32) || defined(__WIN32__)) -# define IFFTW_EXTERN extern __declspec(dllexport) -#else -# define IFFTW_EXTERN extern -#endif - /* determine precision and name-mangling scheme */ #define CONCAT(prefix, name) prefix ## name #if defined(FFTW_SINGLE) @@ -89,7 +84,7 @@ typedef ptrdiff_t INT; #define NELEM(array) ((sizeof(array) / sizeof((array)[0]))) #define FFT_SIGN (-1) /* sign convention for forward transforms */ -extern void X(extract_reim)(int sign, R *c, R **r, R **i); +void X(extract_reim)(int sign, R *c, R **r, R **i); #define REGISTER_SOLVER(p, s) X(solver_register)(p, s) @@ -110,15 +105,15 @@ extern void X(extract_reim)(int sign, R *c, R **r, R **i); #define HAVE_SIMD 0 #endif -extern int X(have_simd_sse2)(void); -extern int X(have_simd_avx)(void); -extern int X(have_simd_avx_128_fma)(void); -extern int X(have_simd_avx2)(void); -extern int X(have_simd_avx2_128)(void); -extern int X(have_simd_avx512)(void); -extern int X(have_simd_altivec)(void); -extern int X(have_simd_vsx)(void); -extern int X(have_simd_neon)(void); +int X(have_simd_sse2)(void); +int X(have_simd_avx)(void); +int X(have_simd_avx_128_fma)(void); +int X(have_simd_avx2)(void); +int X(have_simd_avx2_128)(void); +int X(have_simd_avx512)(void); +int X(have_simd_altivec)(void); +int X(have_simd_vsx)(void); +int X(have_simd_neon)(void); /* forward declarations */ typedef struct problem_s problem; @@ -242,7 +237,7 @@ void *alloca(size_t); /*-----------------------------------------------------------------------*/ /* assert.c: */ -IFFTW_EXTERN void X(assertion_failed)(const char *s, +FFTW3_EXPORT void X(assertion_failed)(const char *s, int line, const char *file); /* always check */ @@ -257,13 +252,13 @@ IFFTW_EXTERN void X(assertion_failed)(const char *s, #define A(ex) /* nothing */ #endif -extern void X(debug)(const char *format, ...); +void X(debug)(const char *format, ...); #define D X(debug) /*-----------------------------------------------------------------------*/ /* kalloc.c: */ -extern void *X(kernel_malloc)(size_t n); -extern void X(kernel_free)(void *p); +void *X(kernel_malloc)(size_t n); +void X(kernel_free)(void *p); /*-----------------------------------------------------------------------*/ /* alloc.c: */ @@ -285,10 +280,10 @@ enum malloc_tag { MALLOC_WHAT_LAST /* must be last */ }; -IFFTW_EXTERN void X(ifree)(void *ptr); -extern void X(ifree0)(void *ptr); +EXPORT_ADDITIONAL_FUNCTIONS void X(ifree)(void *ptr); +void X(ifree0)(void *ptr); -IFFTW_EXTERN void *X(malloc_plain)(size_t sz); +EXPORT_ADDITIONAL_FUNCTIONS void *X(malloc_plain)(size_t sz); #define MALLOC(n, what) X(malloc_plain)(n) /*-----------------------------------------------------------------------*/ @@ -340,12 +335,12 @@ typedef struct { double other; } opcnt; -void X(ops_zero)(opcnt *dst); +void EXPORT_ADDITIONAL_FUNCTIONS X(ops_zero)(opcnt *dst); void X(ops_other)(INT o, opcnt *dst); void X(ops_cpy)(const opcnt *src, opcnt *dst); void X(ops_add)(const opcnt *a, const opcnt *b, opcnt *dst); -void X(ops_add2)(const opcnt *a, opcnt *dst); +void EXPORT_ADDITIONAL_FUNCTIONS X(ops_add2)(const opcnt *a, opcnt *dst); /* dst = m * a + b */ void X(ops_madd)(INT m, const opcnt *a, const opcnt *b, opcnt *dst); @@ -356,7 +351,7 @@ void X(ops_madd2)(INT m, const opcnt *a, opcnt *dst); /*-----------------------------------------------------------------------*/ /* minmax.c: */ -INT X(imax)(INT a, INT b); +INT EXPORT_ADDITIONAL_FUNCTIONS X(imax)(INT a, INT b); INT X(imin)(INT a, INT b); /*-----------------------------------------------------------------------*/ @@ -431,8 +426,8 @@ typedef enum { INPLACE_IS, INPLACE_OS } inplace_kind; tensor *X(mktensor)(int rnk); tensor *X(mktensor_0d)(void); -tensor *X(mktensor_1d)(INT n, INT is, INT os); -tensor *X(mktensor_2d)(INT n0, INT is0, INT os0, +tensor EXPORT_ADDITIONAL_FUNCTIONS *X(mktensor_1d)(INT n, INT is, INT os); +tensor EXPORT_ADDITIONAL_FUNCTIONS *X(mktensor_2d)(INT n0, INT is0, INT os0, INT n1, INT is1, INT os1); tensor *X(mktensor_3d)(INT n0, INT is0, INT os0, INT n1, INT is1, INT os1, @@ -456,7 +451,7 @@ int X(tensor_inplace_strides)(const tensor *sz); int X(tensor_inplace_strides2)(const tensor *a, const tensor *b); int X(tensor_strides_decrease)(const tensor *sz, const tensor *vecsz, inplace_kind k); -tensor *X(tensor_copy)(const tensor *sz); +tensor EXPORT_ADDITIONAL_FUNCTIONS *X(tensor_copy)(const tensor *sz); int X(tensor_kosherp)(const tensor *x); tensor *X(tensor_copy_inplace)(const tensor *sz, inplace_kind k); @@ -466,8 +461,8 @@ tensor *X(tensor_compress)(const tensor *sz); tensor *X(tensor_compress_contiguous)(const tensor *sz); tensor *X(tensor_append)(const tensor *a, const tensor *b); void X(tensor_split)(const tensor *sz, tensor **a, int a_rnk, tensor **b); -int X(tensor_tornk1)(const tensor *t, INT *n, INT *is, INT *os); -void X(tensor_destroy)(tensor *sz); +int EXPORT_ADDITIONAL_FUNCTIONS X(tensor_tornk1)(const tensor *t, INT *n, INT *is, INT *os); +void EXPORT_ADDITIONAL_FUNCTIONS X(tensor_destroy)(tensor *sz); void X(tensor_destroy2)(tensor *a, tensor *b); void X(tensor_destroy4)(tensor *a, tensor *b, tensor *c, tensor *d); void X(tensor_print)(const tensor *sz, printer *p); @@ -524,7 +519,7 @@ struct printer_s { printer *X(mkprinter)(size_t size, void (*putchr)(printer *p, char c), void (*cleanup)(printer *p)); -IFFTW_EXTERN void X(printer_destroy)(printer *p); +void FFTW3_EXPORT X(printer_destroy)(printer *p); /*-----------------------------------------------------------------------*/ /* scan.c */ @@ -564,8 +559,8 @@ struct plan_s { }; plan *X(mkplan)(size_t size, const plan_adt *adt); -void X(plan_destroy_internal)(plan *ego); -IFFTW_EXTERN void X(plan_awake)(plan *ego, enum wakefulness wakefulness); +void EXPORT_ADDITIONAL_FUNCTIONS X(plan_destroy_internal)(plan *ego); +void FFTW3_EXPORT X(plan_awake)(plan *ego, enum wakefulness wakefulness); void X(plan_null_destroy)(plan *ego); /*-----------------------------------------------------------------------*/ @@ -581,10 +576,10 @@ struct solver_s { int refcnt; }; -solver *X(mksolver)(size_t size, const solver_adt *adt); +solver EXPORT_ADDITIONAL_FUNCTIONS *X(mksolver)(size_t size, const solver_adt *adt); void X(solver_use)(solver *ego); void X(solver_destroy)(solver *ego); -void X(solver_register)(planner *plnr, solver *s); +void EXPORT_ADDITIONAL_FUNCTIONS X(solver_register)(planner *plnr, solver *s); /* shorthand */ #define MKSOLVER(type, adt) (type *)X(mksolver)(sizeof(type), adt) @@ -798,7 +793,7 @@ void X(planner_destroy)(planner *ego); /* make plan, destroy problem */ -plan *X(mkplan_d)(planner *ego, problem *p); +plan EXPORT_ADDITIONAL_FUNCTIONS *X(mkplan_d)(planner *ego, problem *p); plan *X(mkplan_f_d)(planner *ego, problem *p, unsigned l_set, unsigned u_set, unsigned u_reset); @@ -815,7 +810,7 @@ extern const INT X(an_INT_guaranteed_to_be_zero); #ifdef PRECOMPUTE_ARRAY_INDICES typedef INT *stride; #define WS(stride, i) (stride[i]) -extern stride X(mkstride)(INT n, INT s); +stride X(mkstride)(INT n, INT s); void X(stride_destroy)(stride p); /* hackery to prevent the compiler from copying the strides array onto the stack */ @@ -859,13 +854,13 @@ typedef INT stride; struct solvtab_s { void (*reg)(planner *); const char *reg_nam; }; typedef struct solvtab_s solvtab[]; -void X(solvtab_exec)(const solvtab tbl, planner *p); +void EXPORT_ADDITIONAL_FUNCTIONS X(solvtab_exec)(const solvtab tbl, planner *p); #define SOLVTAB(s) { s, STRINGIZE(s) } #define SOLVTAB_END { 0, 0 } /*-----------------------------------------------------------------------*/ /* pickdim.c */ -int X(pickdim)(int which_dim, const int *buddies, size_t nbuddies, +int EXPORT_ADDITIONAL_FUNCTIONS X(pickdim)(int which_dim, const int *buddies, size_t nbuddies, const tensor *sz, int oop, int *dp); /*-----------------------------------------------------------------------*/ @@ -934,7 +929,7 @@ int X(is_prime)(INT n); INT X(next_prime)(INT n); int X(factors_into)(INT n, const INT *primes); int X(factors_into_small_primes)(INT n); -INT X(choose_radix)(INT r, INT n); +INT EXPORT_ADDITIONAL_FUNCTIONS X(choose_radix)(INT r, INT n); INT X(isqrt)(INT n); INT X(modulo)(INT a, INT n); @@ -1018,7 +1013,7 @@ extern unsigned X(random_estimate_seed); double X(measure_execution_time)(const planner *plnr, plan *pln, const problem *p); -IFFTW_EXTERN int X(ialignment_of)(R *p); +int X(ialignment_of)(R *p); unsigned X(hash)(const char *s); INT X(nbuf)(INT n, INT vl, INT maxnbuf); int X(nbuf_redundant)(INT n, INT vl, size_t which, diff --git a/kernel/kalloc.c b/kernel/kalloc.c index e42f6f34d..660dfffd1 100644 --- a/kernel/kalloc.c +++ b/kernel/kalloc.c @@ -125,6 +125,11 @@ void *X(kernel_malloc)(size_t n) # undef real_free # define real_free MPFree +# elif defined(__MINGW32__) + p = _aligned_malloc(n, MIN_ALIGNMENT); +# undef real_free +# define real_free _aligned_free + # else /* Add your machine here and send a patch to fftw@fftw.org or (e.g. for Windows) configure --with-our-malloc */ diff --git a/libbench2/CMakeLists.txt b/libbench2/CMakeLists.txt new file mode 100644 index 000000000..d13793721 --- /dev/null +++ b/libbench2/CMakeLists.txt @@ -0,0 +1,39 @@ +target_sources(bench + PRIVATE + bench-user.h + bench.h + my-getopt.h + verify.h + after-ccopy-from.c + after-ccopy-to.c + after-hccopy-from.c + after-hccopy-to.c + after-rcopy-from.c + after-rcopy-to.c + allocate.c + aset.c + bench-cost-postprocess.c + bench-exit.c + bench-main.c + can-do.c + caset.c + dotens2.c + info.c + main.c + mflops.c + mp.c + my-getopt.c + ovtpvt.c + pow2.c + problem.c + report.c + speed.c + tensor.c + timer.c + util.c + verify-dft.c + verify-lib.c + verify-r2r.c + verify-rdft2.c + verify.c + zero.c) \ No newline at end of file diff --git a/libbench2/bench.h b/libbench2/bench.h index 4aeb3f7dc..dbe9f9510 100644 --- a/libbench2/bench.h +++ b/libbench2/bench.h @@ -25,7 +25,7 @@ extern double time_min; extern int time_repeat; -extern void timer_init(double tmin, int repeat); +void timer_init(double tmin, int repeat); /* report functions */ extern void (*report)(const bench_problem *p, double *t, int st); @@ -39,25 +39,25 @@ void report_can_do(const char *param); void report_info(const char *param); void report_info_all(void); -extern int aligned_main(int argc, char *argv[]); -extern int bench_main(int argc, char *argv[]); +int aligned_main(int argc, char *argv[]); +int bench_main(int argc, char *argv[]); -extern void speed(const char *param, int setup_only); -extern void accuracy(const char *param, int rounds, int impulse_rounds); +void speed(const char *param, int setup_only); +void accuracy(const char *param, int rounds, int impulse_rounds); -extern double mflops(const bench_problem *p, double t); +double mflops(const bench_problem *p, double t); -extern double bench_drand(void); -extern void bench_srand(int seed); +double bench_drand(void); +void bench_srand(int seed); -extern bench_problem *problem_parse(const char *desc); +bench_problem *problem_parse(const char *desc); -extern void ovtpvt(const char *format, ...); -extern void ovtpvt_err(const char *format, ...); +void ovtpvt(const char *format, ...); +void ovtpvt_err(const char *format, ...); -extern void fftaccuracy(int n, bench_complex *a, bench_complex *ffta, +void fftaccuracy(int n, bench_complex *a, bench_complex *ffta, int sign, double err[6]); -extern void fftaccuracy_done(void); +void fftaccuracy_done(void); -extern void caset(bench_complex *A, int n, bench_complex x); -extern void aset(bench_real *A, int n, bench_real x); +void caset(bench_complex *A, int n, bench_complex x); +void aset(bench_real *A, int n, bench_real x); diff --git a/mpi/f03-wrap.sh b/mpi/f03-wrap.sh index 530a0d08b..7cc8dff8d 100755 --- a/mpi/f03-wrap.sh +++ b/mpi/f03-wrap.sh @@ -14,9 +14,6 @@ echo "#include \"fftw3-mpi.h\"" echo "#include \"ifftw-mpi.h\"" echo -# Declare prototypes using FFTW_EXTERN, important for Windows DLLs -grep -v 'mpi.h' fftw3-mpi.h | gcc -E -I../api - |grep "fftw_mpi_init" |tr ';' '\n' | grep "MPI_Comm" | perl genf03-wrap.pl | grep "MPI_Fint" | sed 's/^/FFTW_EXTERN /;s/$/;/' - grep -v 'mpi.h' fftw3-mpi.h | gcc -E -I../api - |grep "fftw_mpi_init" |tr ';' '\n' | grep "MPI_Comm" | perl genf03-wrap.pl diff --git a/mpi/fftw3-mpi.h b/mpi/fftw3-mpi.h index 6e6dd2b62..5325fb2b1 100644 --- a/mpi/fftw3-mpi.h +++ b/mpi/fftw3-mpi.h @@ -75,124 +75,124 @@ struct fftw_mpi_ddim_do_not_use_me { \ typedef struct fftw_mpi_ddim_do_not_use_me XM(ddim); \ \ -FFTW_EXTERN void XM(init)(void); \ -FFTW_EXTERN void XM(cleanup)(void); \ +extern void XM(init)(void); \ +extern void XM(cleanup)(void); \ \ -FFTW_EXTERN ptrdiff_t XM(local_size_many_transposed) \ +extern ptrdiff_t XM(local_size_many_transposed) \ (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ ptrdiff_t block0, ptrdiff_t block1, MPI_Comm comm, \ ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ -FFTW_EXTERN ptrdiff_t XM(local_size_many) \ +extern ptrdiff_t XM(local_size_many) \ (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ ptrdiff_t block0, MPI_Comm comm, \ ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ -FFTW_EXTERN ptrdiff_t XM(local_size_transposed) \ +extern ptrdiff_t XM(local_size_transposed) \ (int rnk, const ptrdiff_t *n, MPI_Comm comm, \ ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ -FFTW_EXTERN ptrdiff_t XM(local_size) \ +extern ptrdiff_t XM(local_size) \ (int rnk, const ptrdiff_t *n, MPI_Comm comm, \ ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ -FFTW_EXTERN ptrdiff_t XM(local_size_many_1d)( \ +extern ptrdiff_t XM(local_size_many_1d)( \ ptrdiff_t n0, ptrdiff_t howmany, \ MPI_Comm comm, int sign, unsigned flags, \ ptrdiff_t *local_ni, ptrdiff_t *local_i_start, \ ptrdiff_t *local_no, ptrdiff_t *local_o_start); \ -FFTW_EXTERN ptrdiff_t XM(local_size_1d)( \ +extern ptrdiff_t XM(local_size_1d)( \ ptrdiff_t n0, MPI_Comm comm, int sign, unsigned flags, \ ptrdiff_t *local_ni, ptrdiff_t *local_i_start, \ ptrdiff_t *local_no, ptrdiff_t *local_o_start); \ -FFTW_EXTERN ptrdiff_t XM(local_size_2d)( \ +extern ptrdiff_t XM(local_size_2d)( \ ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, \ ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ -FFTW_EXTERN ptrdiff_t XM(local_size_2d_transposed)( \ +extern ptrdiff_t XM(local_size_2d_transposed)( \ ptrdiff_t n0, ptrdiff_t n1, MPI_Comm comm, \ ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ -FFTW_EXTERN ptrdiff_t XM(local_size_3d)( \ +extern ptrdiff_t XM(local_size_3d)( \ ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm, \ ptrdiff_t *local_n0, ptrdiff_t *local_0_start); \ -FFTW_EXTERN ptrdiff_t XM(local_size_3d_transposed)( \ +extern ptrdiff_t XM(local_size_3d_transposed)( \ ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm, \ ptrdiff_t *local_n0, ptrdiff_t *local_0_start, \ ptrdiff_t *local_n1, ptrdiff_t *local_1_start); \ \ -FFTW_EXTERN X(plan) XM(plan_many_transpose) \ +extern X(plan) XM(plan_many_transpose) \ (ptrdiff_t n0, ptrdiff_t n1, \ ptrdiff_t howmany, ptrdiff_t block0, ptrdiff_t block1, \ R *in, R *out, MPI_Comm comm, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_transpose) \ +extern X(plan) XM(plan_transpose) \ (ptrdiff_t n0, ptrdiff_t n1, \ R *in, R *out, MPI_Comm comm, unsigned flags); \ \ -FFTW_EXTERN X(plan) XM(plan_many_dft) \ +extern X(plan) XM(plan_many_dft) \ (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ ptrdiff_t block, ptrdiff_t tblock, C *in, C *out, \ MPI_Comm comm, int sign, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_dft) \ +extern X(plan) XM(plan_dft) \ (int rnk, const ptrdiff_t *n, C *in, C *out, \ MPI_Comm comm, int sign, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_dft_1d) \ +extern X(plan) XM(plan_dft_1d) \ (ptrdiff_t n0, C *in, C *out, \ MPI_Comm comm, int sign, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_dft_2d) \ +extern X(plan) XM(plan_dft_2d) \ (ptrdiff_t n0, ptrdiff_t n1, C *in, C *out, \ MPI_Comm comm, int sign, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_dft_3d) \ +extern X(plan) XM(plan_dft_3d) \ (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, C *in, C *out, \ MPI_Comm comm, int sign, unsigned flags); \ \ -FFTW_EXTERN X(plan) XM(plan_many_r2r) \ +extern X(plan) XM(plan_many_r2r) \ (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ ptrdiff_t iblock, ptrdiff_t oblock, R *in, R *out, \ MPI_Comm comm, const X(r2r_kind) *kind, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_r2r) \ +extern X(plan) XM(plan_r2r) \ (int rnk, const ptrdiff_t *n, R *in, R *out, \ MPI_Comm comm, const X(r2r_kind) *kind, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_r2r_2d) \ +extern X(plan) XM(plan_r2r_2d) \ (ptrdiff_t n0, ptrdiff_t n1, R *in, R *out, MPI_Comm comm, \ X(r2r_kind) kind0, X(r2r_kind) kind1, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_r2r_3d) \ +extern X(plan) XM(plan_r2r_3d) \ (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, \ R *in, R *out, MPI_Comm comm, X(r2r_kind) kind0, \ X(r2r_kind) kind1, X(r2r_kind) kind2, unsigned flags); \ \ -FFTW_EXTERN X(plan) XM(plan_many_dft_r2c) \ +extern X(plan) XM(plan_many_dft_r2c) \ (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ ptrdiff_t iblock, ptrdiff_t oblock, R *in, C *out, \ MPI_Comm comm, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_dft_r2c) \ +extern X(plan) XM(plan_dft_r2c) \ (int rnk, const ptrdiff_t *n, R *in, C *out, \ MPI_Comm comm, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_dft_r2c_2d) \ +extern X(plan) XM(plan_dft_r2c_2d) \ (ptrdiff_t n0, ptrdiff_t n1, R *in, C *out, \ MPI_Comm comm, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_dft_r2c_3d) \ +extern X(plan) XM(plan_dft_r2c_3d) \ (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, R *in, C *out, \ MPI_Comm comm, unsigned flags); \ \ -FFTW_EXTERN X(plan) XM(plan_many_dft_c2r) \ +extern X(plan) XM(plan_many_dft_c2r) \ (int rnk, const ptrdiff_t *n, ptrdiff_t howmany, \ ptrdiff_t iblock, ptrdiff_t oblock, C *in, R *out, \ MPI_Comm comm, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_dft_c2r) \ +extern X(plan) XM(plan_dft_c2r) \ (int rnk, const ptrdiff_t *n, C *in, R *out, \ MPI_Comm comm, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_dft_c2r_2d) \ +extern X(plan) XM(plan_dft_c2r_2d) \ (ptrdiff_t n0, ptrdiff_t n1, C *in, R *out, \ MPI_Comm comm, unsigned flags); \ -FFTW_EXTERN X(plan) XM(plan_dft_c2r_3d) \ +extern X(plan) XM(plan_dft_c2r_3d) \ (ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, C *in, R *out, \ MPI_Comm comm, unsigned flags); \ \ -FFTW_EXTERN void XM(gather_wisdom)(MPI_Comm comm_); \ -FFTW_EXTERN void XM(broadcast_wisdom)(MPI_Comm comm_); \ +extern void XM(gather_wisdom)(MPI_Comm comm_); \ +extern void XM(broadcast_wisdom)(MPI_Comm comm_); \ \ -FFTW_EXTERN void XM(execute_dft)(X(plan) p, C *in, C *out); \ -FFTW_EXTERN void XM(execute_dft_r2c)(X(plan) p, R *in, C *out); \ -FFTW_EXTERN void XM(execute_dft_c2r)(X(plan) p, C *in, R *out); \ -FFTW_EXTERN void XM(execute_r2r)(X(plan) p, R *in, R *out); +extern void XM(execute_dft)(X(plan) p, C *in, C *out); \ +extern void XM(execute_dft_r2c)(X(plan) p, R *in, C *out); \ +extern void XM(execute_dft_c2r)(X(plan) p, C *in, R *out); \ +extern void XM(execute_r2r)(X(plan) p, R *in, R *out); diff --git a/rdft/CMakeLists.txt b/rdft/CMakeLists.txt new file mode 100644 index 000000000..3240c9742 --- /dev/null +++ b/rdft/CMakeLists.txt @@ -0,0 +1,49 @@ +target_sources(${fftw3_lib} + PRIVATE + codelet-rdft.h + ct-hc2c.h + hc2hc.h + rdft.h + buffered.c + buffered2.c + conf.c + ct-hc2c-direct.c + ct-hc2c.c + dft-r2hc.c + dht-r2hc.c + dht-rader.c + direct-r2c.c + direct-r2r.c + direct2.c + generic.c + hc2hc-direct.c + hc2hc-generic.c + hc2hc.c + indirect.c + khc2c.c + khc2hc.c + kr2c.c + kr2r.c + nop.c + nop2.c + plan.c + plan2.c + problem.c + problem2.c + rank-geq2-rdft2.c + rank-geq2.c + rank0-rdft2.c + rank0.c + rdft-dht.c + rdft2-inplace-strides.c + rdft2-rdft.c + rdft2-strides.c + rdft2-tensor-max-index.c + solve.c + solve2.c + vrank-geq1-rdft2.c + vrank-geq1.c + vrank3-transpose.c) + +add_subdirectory(scalar) +add_subdirectory(simd) \ No newline at end of file diff --git a/rdft/ct-hc2c.h b/rdft/ct-hc2c.h index 41bf4d887..cd4efc2a4 100644 --- a/rdft/ct-hc2c.h +++ b/rdft/ct-hc2c.h @@ -34,7 +34,7 @@ typedef struct { hc2capply apply; } plan_hc2c; -extern plan *X(mkplan_hc2c)(size_t size, const plan_adt *adt, +plan *X(mkplan_hc2c)(size_t size, const plan_adt *adt, hc2capply apply); #define MKPLAN_HC2C(type, adt, apply) \ diff --git a/rdft/hc2hc.h b/rdft/hc2hc.h index cf003fc68..ebf975a0e 100644 --- a/rdft/hc2hc.h +++ b/rdft/hc2hc.h @@ -18,6 +18,9 @@ * */ +#ifndef __RDFT_HCHC_H__ +#define __RDFT_HC2HC_H__ + #include "rdft/rdft.h" typedef void (*hc2hcapply) (const plan *ego, R *IO); @@ -32,7 +35,7 @@ typedef struct { hc2hcapply apply; } plan_hc2hc; -extern plan *X(mkplan_hc2hc)(size_t size, const plan_adt *adt, +plan *X(mkplan_hc2hc)(size_t size, const plan_adt *adt, hc2hcapply apply); #define MKPLAN_HC2HC(type, adt, apply) \ @@ -46,9 +49,11 @@ struct hc2hc_solver_s { }; hc2hc_solver *X(mksolver_hc2hc)(size_t size, INT r, hc2hc_mkinferior mkcldw); -extern hc2hc_solver *(*X(mksolver_hc2hc_hook))(size_t, INT, hc2hc_mkinferior); +extern EXPORT_ADDITIONAL_FUNCTIONS hc2hc_solver *(*X(mksolver_hc2hc_hook))(size_t, INT, hc2hc_mkinferior); void X(regsolver_hc2hc_direct)(planner *plnr, khc2hc codelet, const hc2hc_desc *desc); -int X(hc2hc_applicable)(const hc2hc_solver *, const problem *, planner *); +int EXPORT_ADDITIONAL_FUNCTIONS X(hc2hc_applicable)(const hc2hc_solver *, const problem *, planner *); + +#endif /* __RDFT_HC2HC_H__ */ diff --git a/rdft/rdft.h b/rdft/rdft.h index 4dff775db..e33e9e166 100644 --- a/rdft/rdft.h +++ b/rdft/rdft.h @@ -44,9 +44,9 @@ typedef struct { } problem_rdft; void X(rdft_zerotens)(tensor *sz, R *I); -problem *X(mkproblem_rdft)(const tensor *sz, const tensor *vecsz, +problem EXPORT_ADDITIONAL_FUNCTIONS *X(mkproblem_rdft)(const tensor *sz, const tensor *vecsz, R *I, R *O, const rdft_kind *kind); -problem *X(mkproblem_rdft_d)(tensor *sz, tensor *vecsz, +problem EXPORT_ADDITIONAL_FUNCTIONS *X(mkproblem_rdft_d)(tensor *sz, tensor *vecsz, R *I, R *O, const rdft_kind *kind); problem *X(mkproblem_rdft_0_d)(tensor *vecsz, R *I, R *O); problem *X(mkproblem_rdft_1)(const tensor *sz, const tensor *vecsz, @@ -57,7 +57,7 @@ problem *X(mkproblem_rdft_1_d)(tensor *sz, tensor *vecsz, const char *X(rdft_kind_str)(rdft_kind kind); /* solve.c: */ -void X(rdft_solve)(const plan *ego_, const problem *p_); +void EXPORT_ADDITIONAL_FUNCTIONS X(rdft_solve)(const plan *ego_, const problem *p_); /* plan.c: */ typedef void (*rdftapply) (const plan *ego, R *I, R *O); @@ -67,7 +67,7 @@ typedef struct { rdftapply apply; } plan_rdft; -plan *X(mkplan_rdft)(size_t size, const plan_adt *adt, rdftapply apply); +plan EXPORT_ADDITIONAL_FUNCTIONS *X(mkplan_rdft)(size_t size, const plan_adt *adt, rdftapply apply); #define MKPLAN_RDFT(type, adt, apply) \ (type *)X(mkplan_rdft)(sizeof(type), adt, apply) @@ -123,22 +123,22 @@ typedef struct { rdft_kind kind; /* assert(kind < DHT) */ } problem_rdft2; -problem *X(mkproblem_rdft2)(const tensor *sz, const tensor *vecsz, +problem EXPORT_ADDITIONAL_FUNCTIONS *X(mkproblem_rdft2)(const tensor *sz, const tensor *vecsz, R *r0, R *r1, R *cr, R *ci, rdft_kind kind); problem *X(mkproblem_rdft2_d)(tensor *sz, tensor *vecsz, R *r0, R *r1, R *cr, R *ci, rdft_kind kind); problem *X(mkproblem_rdft2_d_3pointers)(tensor *sz, tensor *vecsz, R *r, R *cr, R *ci, rdft_kind kind); -int X(rdft2_inplace_strides)(const problem_rdft2 *p, int vdim); +int EXPORT_ADDITIONAL_FUNCTIONS X(rdft2_inplace_strides)(const problem_rdft2 *p, int vdim); INT X(rdft2_tensor_max_index)(const tensor *sz, rdft_kind k); -void X(rdft2_strides)(rdft_kind kind, const iodim *d, INT *rs, INT *cs); +void EXPORT_ADDITIONAL_FUNCTIONS X(rdft2_strides)(rdft_kind kind, const iodim *d, INT *rs, INT *cs); INT X(rdft2_complex_n)(INT real_n, rdft_kind kind); /* verify.c: */ void X(rdft2_verify)(plan *pln, const problem_rdft2 *p, int rounds); /* solve.c: */ -void X(rdft2_solve)(const plan *ego_, const problem *p_); +void EXPORT_ADDITIONAL_FUNCTIONS X(rdft2_solve)(const plan *ego_, const problem *p_); /* plan.c: */ typedef void (*rdft2apply) (const plan *ego, R *r0, R *r1, R *cr, R *ci); @@ -148,7 +148,7 @@ typedef struct { rdft2apply apply; } plan_rdft2; -plan *X(mkplan_rdft2)(size_t size, const plan_adt *adt, rdft2apply apply); +plan EXPORT_ADDITIONAL_FUNCTIONS *X(mkplan_rdft2)(size_t size, const plan_adt *adt, rdft2apply apply); #define MKPLAN_RDFT2(type, adt, apply) \ (type *)X(mkplan_rdft2)(sizeof(type), adt, apply) diff --git a/rdft/scalar/CMakeLists.txt b/rdft/scalar/CMakeLists.txt new file mode 100644 index 000000000..daba91838 --- /dev/null +++ b/rdft/scalar/CMakeLists.txt @@ -0,0 +1,19 @@ +target_sources(${fftw3_lib} + PRIVATE + hb.h + hc2cb.h + hc2cf.h + hf.h + r2cb.h + r2cbIII.h + r2cf.h + r2cfII.h + r2r.h + hc2c.c + hfb.c + r2c.c + r2r.c) + +add_subdirectory(r2cb) +add_subdirectory(r2cf) +add_subdirectory(r2r) \ No newline at end of file diff --git a/rdft/scalar/r2cb/CMakeLists.txt b/rdft/scalar/r2cb/CMakeLists.txt new file mode 100644 index 000000000..003383c66 --- /dev/null +++ b/rdft/scalar/r2cb/CMakeLists.txt @@ -0,0 +1,90 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + hb2_16.c + hb2_20.c + hb2_25.c + hb2_32.c + hb2_4.c + hb2_5.c + hb2_8.c + hb_10.c + hb_12.c + hb_15.c + hb_16.c + hb_2.c + hb_20.c + hb_25.c + hb_3.c + hb_32.c + hb_4.c + hb_5.c + hb_6.c + hb_64.c + hb_7.c + hb_8.c + hb_9.c + hc2cb2_16.c + hc2cb2_20.c + hc2cb2_32.c + hc2cb2_4.c + hc2cb2_8.c + hc2cbdft2_16.c + hc2cbdft2_20.c + hc2cbdft2_32.c + hc2cbdft2_4.c + hc2cbdft2_8.c + hc2cbdft_10.c + hc2cbdft_12.c + hc2cbdft_16.c + hc2cbdft_2.c + hc2cbdft_20.c + hc2cbdft_32.c + hc2cbdft_4.c + hc2cbdft_6.c + hc2cbdft_8.c + hc2cb_10.c + hc2cb_12.c + hc2cb_16.c + hc2cb_2.c + hc2cb_20.c + hc2cb_32.c + hc2cb_4.c + hc2cb_6.c + hc2cb_8.c + r2cbIII_10.c + r2cbIII_12.c + r2cbIII_15.c + r2cbIII_16.c + r2cbIII_2.c + r2cbIII_20.c + r2cbIII_25.c + r2cbIII_3.c + r2cbIII_32.c + r2cbIII_4.c + r2cbIII_5.c + r2cbIII_6.c + r2cbIII_64.c + r2cbIII_7.c + r2cbIII_8.c + r2cbIII_9.c + r2cb_10.c + r2cb_11.c + r2cb_12.c + r2cb_128.c + r2cb_13.c + r2cb_14.c + r2cb_15.c + r2cb_16.c + r2cb_2.c + r2cb_20.c + r2cb_25.c + r2cb_3.c + r2cb_32.c + r2cb_4.c + r2cb_5.c + r2cb_6.c + r2cb_64.c + r2cb_7.c + r2cb_8.c + r2cb_9.c) \ No newline at end of file diff --git a/rdft/scalar/r2cf/CMakeLists.txt b/rdft/scalar/r2cf/CMakeLists.txt new file mode 100644 index 000000000..17e7c7c15 --- /dev/null +++ b/rdft/scalar/r2cf/CMakeLists.txt @@ -0,0 +1,90 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + hc2cf2_16.c + hc2cf2_20.c + hc2cf2_32.c + hc2cf2_4.c + hc2cf2_8.c + hc2cfdft2_16.c + hc2cfdft2_20.c + hc2cfdft2_32.c + hc2cfdft2_4.c + hc2cfdft2_8.c + hc2cfdft_10.c + hc2cfdft_12.c + hc2cfdft_16.c + hc2cfdft_2.c + hc2cfdft_20.c + hc2cfdft_32.c + hc2cfdft_4.c + hc2cfdft_6.c + hc2cfdft_8.c + hc2cf_10.c + hc2cf_12.c + hc2cf_16.c + hc2cf_2.c + hc2cf_20.c + hc2cf_32.c + hc2cf_4.c + hc2cf_6.c + hc2cf_8.c + hf2_16.c + hf2_20.c + hf2_25.c + hf2_32.c + hf2_4.c + hf2_5.c + hf2_8.c + hf_10.c + hf_12.c + hf_15.c + hf_16.c + hf_2.c + hf_20.c + hf_25.c + hf_3.c + hf_32.c + hf_4.c + hf_5.c + hf_6.c + hf_64.c + hf_7.c + hf_8.c + hf_9.c + r2cfII_10.c + r2cfII_12.c + r2cfII_15.c + r2cfII_16.c + r2cfII_2.c + r2cfII_20.c + r2cfII_25.c + r2cfII_3.c + r2cfII_32.c + r2cfII_4.c + r2cfII_5.c + r2cfII_6.c + r2cfII_64.c + r2cfII_7.c + r2cfII_8.c + r2cfII_9.c + r2cf_10.c + r2cf_11.c + r2cf_12.c + r2cf_128.c + r2cf_13.c + r2cf_14.c + r2cf_15.c + r2cf_16.c + r2cf_2.c + r2cf_20.c + r2cf_25.c + r2cf_3.c + r2cf_32.c + r2cf_4.c + r2cf_5.c + r2cf_6.c + r2cf_64.c + r2cf_7.c + r2cf_8.c + r2cf_9.c) \ No newline at end of file diff --git a/rdft/scalar/r2r/CMakeLists.txt b/rdft/scalar/r2r/CMakeLists.txt new file mode 100644 index 000000000..41a8779e8 --- /dev/null +++ b/rdft/scalar/r2r/CMakeLists.txt @@ -0,0 +1,5 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + e01_8.c + e10_8.c) \ No newline at end of file diff --git a/rdft/simd/CMakeLists.txt b/rdft/simd/CMakeLists.txt new file mode 100644 index 000000000..b173dd3b1 --- /dev/null +++ b/rdft/simd/CMakeLists.txt @@ -0,0 +1,15 @@ +target_sources(${fftw3_lib} + PRIVATE + hc2cbv.h + hc2cfv.h) + +if (HAVE_SSE2) + add_subdirectory(sse2) +endif() +if (HAVE_AVX) + add_subdirectory(avx) +endif () +if (HAVE_AVX2) + add_subdirectory(avx2) + add_subdirectory(avx2-128) +endif () \ No newline at end of file diff --git a/rdft/simd/avx/CMakeLists.txt b/rdft/simd/avx/CMakeLists.txt new file mode 100644 index 000000000..81f0e10ee --- /dev/null +++ b/rdft/simd/avx/CMakeLists.txt @@ -0,0 +1,42 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + genus.c + hc2cbdftv_10.c + hc2cbdftv_12.c + hc2cbdftv_16.c + hc2cbdftv_2.c + hc2cbdftv_20.c + hc2cbdftv_32.c + hc2cbdftv_4.c + hc2cbdftv_6.c + hc2cbdftv_8.c + hc2cfdftv_10.c + hc2cfdftv_12.c + hc2cfdftv_16.c + hc2cfdftv_2.c + hc2cfdftv_20.c + hc2cfdftv_32.c + hc2cfdftv_4.c + hc2cfdftv_6.c + hc2cfdftv_8.c + codlist.c + genus.c + hc2cbdftv_10.c + hc2cbdftv_12.c + hc2cbdftv_16.c + hc2cbdftv_2.c + hc2cbdftv_20.c + hc2cbdftv_32.c + hc2cbdftv_4.c + hc2cbdftv_6.c + hc2cbdftv_8.c + hc2cfdftv_10.c + hc2cfdftv_12.c + hc2cfdftv_16.c + hc2cfdftv_2.c + hc2cfdftv_20.c + hc2cfdftv_32.c + hc2cfdftv_4.c + hc2cfdftv_6.c + hc2cfdftv_8.c) \ No newline at end of file diff --git a/rdft/simd/avx2-128/CMakeLists.txt b/rdft/simd/avx2-128/CMakeLists.txt new file mode 100644 index 000000000..efe36f118 --- /dev/null +++ b/rdft/simd/avx2-128/CMakeLists.txt @@ -0,0 +1,22 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + genus.c + hc2cbdftv_10.c + hc2cbdftv_12.c + hc2cbdftv_16.c + hc2cbdftv_2.c + hc2cbdftv_20.c + hc2cbdftv_32.c + hc2cbdftv_4.c + hc2cbdftv_6.c + hc2cbdftv_8.c + hc2cfdftv_10.c + hc2cfdftv_12.c + hc2cfdftv_16.c + hc2cfdftv_2.c + hc2cfdftv_20.c + hc2cfdftv_32.c + hc2cfdftv_4.c + hc2cfdftv_6.c + hc2cfdftv_8.c) \ No newline at end of file diff --git a/rdft/simd/avx2/CMakeLists.txt b/rdft/simd/avx2/CMakeLists.txt new file mode 100644 index 000000000..efe36f118 --- /dev/null +++ b/rdft/simd/avx2/CMakeLists.txt @@ -0,0 +1,22 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + genus.c + hc2cbdftv_10.c + hc2cbdftv_12.c + hc2cbdftv_16.c + hc2cbdftv_2.c + hc2cbdftv_20.c + hc2cbdftv_32.c + hc2cbdftv_4.c + hc2cbdftv_6.c + hc2cbdftv_8.c + hc2cfdftv_10.c + hc2cfdftv_12.c + hc2cfdftv_16.c + hc2cfdftv_2.c + hc2cfdftv_20.c + hc2cfdftv_32.c + hc2cfdftv_4.c + hc2cfdftv_6.c + hc2cfdftv_8.c) \ No newline at end of file diff --git a/rdft/simd/sse2/CMakeLists.txt b/rdft/simd/sse2/CMakeLists.txt new file mode 100644 index 000000000..efe36f118 --- /dev/null +++ b/rdft/simd/sse2/CMakeLists.txt @@ -0,0 +1,22 @@ +target_sources(${fftw3_lib} + PRIVATE + codlist.c + genus.c + hc2cbdftv_10.c + hc2cbdftv_12.c + hc2cbdftv_16.c + hc2cbdftv_2.c + hc2cbdftv_20.c + hc2cbdftv_32.c + hc2cbdftv_4.c + hc2cbdftv_6.c + hc2cbdftv_8.c + hc2cfdftv_10.c + hc2cfdftv_12.c + hc2cfdftv_16.c + hc2cfdftv_2.c + hc2cfdftv_20.c + hc2cfdftv_32.c + hc2cfdftv_4.c + hc2cfdftv_6.c + hc2cfdftv_8.c) \ No newline at end of file diff --git a/reodft/CMakeLists.txt b/reodft/CMakeLists.txt new file mode 100644 index 000000000..e50007aa1 --- /dev/null +++ b/reodft/CMakeLists.txt @@ -0,0 +1,13 @@ +target_sources(${fftw3_lib} + PRIVATE + reodft.h + conf.c + redft00e-r2hc-pad.c + redft00e-r2hc.c + reodft00e-splitradix.c + reodft010e-r2hc.c + reodft11e-r2hc-odd.c + reodft11e-r2hc.c + reodft11e-radix2.c + rodft00e-r2hc-pad.c + rodft00e-r2hc.c) \ No newline at end of file diff --git a/simd-support/CMakeLists.txt b/simd-support/CMakeLists.txt new file mode 100644 index 000000000..df0b4854d --- /dev/null +++ b/simd-support/CMakeLists.txt @@ -0,0 +1,27 @@ +target_sources(${fftw3_lib} + PRIVATE + amd64-cpuid.h + simd-altivec.h + simd-avx-128-fma.h + simd-avx.h + simd-avx2-128.h + simd-avx2.h + simd-avx512.h + simd-common.h + simd-generic128.h + simd-generic256.h + simd-kcvi.h + simd-neon.h + simd-sse2.h + simd-vsx.h + x86-cpuid.h + altivec.c + avx-128-fma.c + avx.c + avx2.c + avx512.c + kcvi.c + neon.c + sse2.c + taint.c + vsx.c) \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 000000000..ee3039436 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,6 @@ +target_sources(bench + PRIVATE + fftw-bench.h + bench.c + fftw-bench.c + hook.c) \ No newline at end of file diff --git a/tests/fftw-bench.c b/tests/fftw-bench.c index 6595e2f31..f7f27e263 100644 --- a/tests/fftw-bench.c +++ b/tests/fftw-bench.c @@ -108,10 +108,11 @@ void rdwisdom(void) #ifdef HAVE_SMP if (threads_ok) { +#if defined(HAVE_THREADS) BENCH_ASSERT(FFTW(init_threads)()); FFTW(plan_with_nthreads)(nthreads); FFTW(make_planner_thread_safe)(); -#ifdef _OPENMP +#elif defined(HAVE_OPENMP) omp_set_num_threads(nthreads); #endif } @@ -269,7 +270,7 @@ void cleanup(void) initial_cleanup(); wrwisdom(); -#ifdef HAVE_SMP +#ifdef HAVE_THREADS FFTW(cleanup_threads)(); #else FFTW(cleanup)(); @@ -278,7 +279,7 @@ void cleanup(void) # ifdef FFTW_DEBUG_MALLOC { /* undocumented memory checker */ - FFTW_EXTERN void FFTW(malloc_print_minfo)(int v); + extern void FFTW(malloc_print_minfo)(int v); FFTW(malloc_print_minfo)(verbose); } # endif diff --git a/tests/hook.c b/tests/hook.c index ddeb2d0b8..00cc8ab26 100644 --- a/tests/hook.c +++ b/tests/hook.c @@ -14,7 +14,6 @@ #include #include "libbench2/bench-user.h" -#define CALLING_FFTW /* hack for Windows DLL nonsense */ #include "api/api.h" #include "dft/dft.h" #include "rdft/rdft.h" @@ -212,7 +211,7 @@ static void hook(planner *plnr, plan *pln, const problem *p_, int optimalp) if (bp) { X(plan) the_plan_save = the_plan; - the_plan = (apiplan *) MALLOC(sizeof(apiplan), PLANS); + the_plan = (apiplan *) X(malloc)(sizeof(apiplan)); the_plan->pln = pln; the_plan->prb = (problem *) p_; @@ -220,7 +219,7 @@ static void hook(planner *plnr, plan *pln, const problem *p_, int optimalp) verify_problem(bp, rounds, tol); X(plan_awake)(pln, SLEEPY); - X(ifree)(the_plan); + X(free)(the_plan); the_plan = the_plan_save; problem_destroy(bp); diff --git a/threads/CMakeLists.txt b/threads/CMakeLists.txt new file mode 100644 index 000000000..531fd84b7 --- /dev/null +++ b/threads/CMakeLists.txt @@ -0,0 +1,32 @@ +set(THREAD_SOURCES_COMMON + f77funcs.h + threads.h + api.c + conf.c + ct.c + dft-vrank-geq1.c + f77api.c + hc2hc.c + rdft-vrank-geq1.c + vrank-geq1-rdft2.c) + +if (ENABLE_THREADS) + if (WITH_COMBINED_THREADS) + target_sources(${fftw3_lib} + PRIVATE + ${THREAD_SOURCES_COMMON} + threads.c) + else () + target_sources(${fftw3_lib}_threads + PRIVATE + ${THREAD_SOURCES_COMMON} + threads.c) + endif () +endif () + +if (ENABLE_OPENMP) + target_sources(${fftw3_lib}_omp + PRIVATE + ${THREAD_SOURCES_COMMON} + openmp.c) +endif () \ No newline at end of file diff --git a/threads/api.c b/threads/api.c index eae2cd4b7..47c081e2e 100644 --- a/threads/api.c +++ b/threads/api.c @@ -25,14 +25,14 @@ static int threads_inited = 0; static void threads_register_hooks(void) { - X(mksolver_ct_hook) = X(mksolver_ct_threads); - X(mksolver_hc2hc_hook) = X(mksolver_hc2hc_threads); + X(mksolver_ct_hook) = X(mksolver_ct_threads); + X(mksolver_hc2hc_hook) = X(mksolver_hc2hc_threads); } static void threads_unregister_hooks(void) { - X(mksolver_ct_hook) = 0; - X(mksolver_hc2hc_hook) = 0; + X(mksolver_ct_hook) = 0; + X(mksolver_hc2hc_hook) = 0; } /* should be called before all other FFTW functions! */ diff --git a/threads/f77funcs.h b/threads/f77funcs.h index 8c477b5ed..ee8c70493 100644 --- a/threads/f77funcs.h +++ b/threads/f77funcs.h @@ -23,17 +23,17 @@ f77api.c, possibly multiple times in order to support multiple compiler manglings (via redefinition of F77). */ -FFTW_VOIDFUNC F77(plan_with_nthreads, PLAN_WITH_NTHREADS)(int *nthreads) +void F77(plan_with_nthreads, PLAN_WITH_NTHREADS)(int *nthreads) { X(plan_with_nthreads)(*nthreads); } -FFTW_VOIDFUNC F77(init_threads, INIT_THREADS)(int *okay) +void F77(init_threads, INIT_THREADS)(int *okay) { *okay = X(init_threads)(); } -FFTW_VOIDFUNC F77(cleanup_threads, CLEANUP_THREADS)(void) +void F77(cleanup_threads, CLEANUP_THREADS)(void) { X(cleanup_threads)(); }