diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index 4d0496b19..000000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,448 +0,0 @@ -cmake_minimum_required (VERSION 3.0) - -if (NOT DEFINED CMAKE_BUILD_TYPE) - set (CMAKE_BUILD_TYPE Release CACHE STRING "Build type") -endif () - -project (fftw) - -if (POLICY CMP0042) - cmake_policy (SET CMP0042 NEW) -endif () - -option (BUILD_SHARED_LIBS "Build shared libraries" ON) -option (BUILD_TESTS "Build tests" ON) - -option (ENABLE_OPENMP "Use OpenMP for multithreading" OFF) -option (ENABLE_THREADS "Use pthread for multithreading" OFF) -option (WITH_COMBINED_THREADS "Merge thread library" OFF) - -option (ENABLE_FLOAT "single-precision" OFF) -option (ENABLE_LONG_DOUBLE "long-double precision" OFF) -option (ENABLE_QUAD_PRECISION "quadruple-precision" OFF) - -option (ENABLE_SSE "Compile with SSE instruction set support" OFF) -option (ENABLE_SSE2 "Compile with SSE2 instruction set support" OFF) -option (ENABLE_AVX "Compile with AVX instruction set support" OFF) -option (ENABLE_AVX2 "Compile with AVX2 instruction set support" OFF) - -option (DISABLE_FORTRAN "Disable Fortran wrapper routines" OFF) - -include(GNUInstallDirs) - - -include (CheckIncludeFile) -check_include_file (alloca.h HAVE_ALLOCA_H) -check_include_file (altivec.h HAVE_ALTIVEC_H) -check_include_file (c_asm.h HAVE_C_ASM_H) -check_include_file (dlfcn.h HAVE_DLFCN_H) -check_include_file (intrinsics.h HAVE_INTRINSICS_H) -check_include_file (inttypes.h HAVE_INTTYPES_H) -check_include_file (libintl.h HAVE_LIBINTL_H) -check_include_file (limits.h HAVE_LIMITS_H) -check_include_file (mach/mach_time.h HAVE_MACH_MACH_TIME_H) -check_include_file (malloc.h HAVE_MALLOC_H) -check_include_file (memory.h HAVE_MEMORY_H) -check_include_file (stddef.h HAVE_STDDEF_H) -check_include_file (stdint.h HAVE_STDINT_H) -check_include_file (stdlib.h HAVE_STDLIB_H) -check_include_file (string.h HAVE_STRING_H) -check_include_file (strings.h HAVE_STRINGS_H) -check_include_file (sys/types.h HAVE_SYS_TYPES_H) -check_include_file (sys/time.h HAVE_SYS_TIME_H) -check_include_file (sys/stat.h HAVE_SYS_STAT_H) -check_include_file (sys/sysctl.h HAVE_SYS_SYSCTL_H) -check_include_file (time.h HAVE_TIME_H) -check_include_file (uintptr.h HAVE_UINTPTR_H) -check_include_file (unistd.h HAVE_UNISTD_H) -if (HAVE_TIME_H AND HAVE_SYS_TIME_H) - set (TIME_WITH_SYS_TIME TRUE) -endif () - -include (CheckPrototypeDefinition) -check_prototype_definition (drand48 "double drand48 (void)" "0" stdlib.h HAVE_DECL_DRAND48) -check_prototype_definition (srand48 "void srand48(long int seedval)" "0" stdlib.h HAVE_DECL_SRAND48) -check_prototype_definition (cosl "long double cosl( long double arg )" "0" math.h HAVE_DECL_COSL) -check_prototype_definition (sinl "long double sinl( long double arg )" "0" math.h HAVE_DECL_SINL) -check_prototype_definition (memalign "void *memalign(size_t alignment, size_t size)" "0" malloc.h HAVE_DECL_MEMALIGN) -check_prototype_definition (posix_memalign "int posix_memalign(void **memptr, size_t alignment, size_t size)" "0" stdlib.h HAVE_DECL_POSIX_MEMALIGN) - -include (CheckSymbolExists) -check_symbol_exists (clock_gettime time.h HAVE_CLOCK_GETTIME) -check_symbol_exists (gettimeofday sys/time.h HAVE_GETTIMEOFDAY) -check_symbol_exists (getpagesize unistd.h HAVE_GETPAGESIZE) -check_symbol_exists (drand48 stdlib.h HAVE_DRAND48) -check_symbol_exists (srand48 stdlib.h HAVE_SRAND48) -check_symbol_exists (memalign malloc.h HAVE_MEMALIGN) -check_symbol_exists (posix_memalign stdlib.h HAVE_POSIX_MEMALIGN) -check_symbol_exists (mach_absolute_time mach/mach_time.h HAVE_MACH_ABSOLUTE_TIME) -check_symbol_exists (alloca alloca.h HAVE_ALLOCA) -if (NOT HAVE_ALLOCA) - unset (HAVE_ALLOCA CACHE) - check_symbol_exists (alloca malloc.h HAVE_ALLOCA) -endif () -check_symbol_exists (isnan math.h HAVE_ISNAN) -check_symbol_exists (snprintf stdio.h HAVE_SNPRINTF) -check_symbol_exists (strchr string.h HAVE_STRCHR) -check_symbol_exists (sysctl unistd.h HAVE_SYSCTL) - -if (UNIX) - set (CMAKE_REQUIRED_LIBRARIES m) -endif () -check_symbol_exists (cosl math.h HAVE_COSL) -check_symbol_exists (sinl math.h HAVE_SINL) - -include (CheckTypeSize) -check_type_size ("float" SIZEOF_FLOAT) -check_type_size ("double" SIZEOF_DOUBLE) -check_type_size ("int" SIZEOF_INT) -check_type_size ("long" SIZEOF_LONG) -check_type_size ("long long" SIZEOF_LONG_LONG) -check_type_size ("unsigned int" SIZEOF_UNSIGNED_INT) -check_type_size ("unsigned long" SIZEOF_UNSIGNED_LONG) -check_type_size ("unsigned long long" SIZEOF_UNSIGNED_LONG_LONG) -check_type_size ("size_t" SIZEOF_SIZE_T) -check_type_size ("ptrdiff_t" SIZEOF_PTRDIFF_T) -math (EXPR SIZEOF_INT_BITS "8 * ${SIZEOF_INT}") -set (C_FFTW_R2R_KIND "C_INT${SIZEOF_INT_BITS}_T") - -find_library (LIBM_LIBRARY NAMES m) -if (LIBM_LIBRARY) - set (HAVE_LIBM TRUE) -endif () - - -if (ENABLE_THREADS) - find_package (Threads) -endif () -if (Threads_FOUND) - if(CMAKE_USE_PTHREADS_INIT) - set (USING_POSIX_THREADS 1) - endif () - set (HAVE_THREADS TRUE) -endif () - -if (ENABLE_OPENMP) - find_package (OpenMP) -endif () -if (OPENMP_FOUND) - set (HAVE_OPENMP TRUE) -endif () - -include (CheckCCompilerFlag) - -if (ENABLE_SSE) - foreach (FLAG "-msse" "/arch:SSE") - unset (HAVE_SSE CACHE) - unset (HAVE_SSE) - check_c_compiler_flag (${FLAG} HAVE_SSE) - if (HAVE_SSE) - set (SSE_FLAG ${FLAG}) - break() - endif () - endforeach () -endif () - -if (ENABLE_SSE2) - foreach (FLAG "-msse2" "/arch:SSE2") - unset (HAVE_SSE2 CACHE) - unset (HAVE_SSE2) - check_c_compiler_flag (${FLAG} HAVE_SSE2) - if (HAVE_SSE2) - set (SSE2_FLAG ${FLAG}) - break() - endif () - endforeach () -endif () - -if (ENABLE_AVX) - foreach (FLAG "-mavx" "/arch:AVX") - unset (HAVE_AVX CACHE) - unset (HAVE_AVX) - check_c_compiler_flag (${FLAG} HAVE_AVX) - if (HAVE_AVX) - set (AVX_FLAG ${FLAG}) - break() - endif () - endforeach () -endif () - -if (ENABLE_AVX2) - foreach (FLAG "-mavx2" "/arch:AVX2") - unset (HAVE_AVX2 CACHE) - unset (HAVE_AVX2) - check_c_compiler_flag (${FLAG} HAVE_AVX2) - if (HAVE_AVX2) - set (AVX2_FLAG ${FLAG}) - break() - endif () - endforeach () -endif () - -# AVX2 codelets require FMA support as well -if (ENABLE_AVX2) - foreach (FLAG "-mfma" "/arch:FMA") - unset (HAVE_FMA CACHE) - unset (HAVE_FMA) - check_c_compiler_flag (${FLAG} HAVE_FMA) - if (HAVE_FMA) - set (FMA_FLAG ${FLAG}) - break() - endif () - endforeach () -endif () - -if (HAVE_SSE2 OR HAVE_AVX) - set (HAVE_SIMD TRUE) -endif () -file(GLOB fftw_api_SOURCE api/*.c api/*.h) -file(GLOB fftw_dft_SOURCE dft/*.c dft/*.h) -file(GLOB fftw_dft_scalar_SOURCE dft/scalar/*.c dft/scalar/*.h) -file(GLOB fftw_dft_scalar_codelets_SOURCE dft/scalar/codelets/*.c dft/scalar/codelets/*.h) -file(GLOB fftw_dft_simd_SOURCE dft/simd/*.c dft/simd/*.h) - -file(GLOB fftw_dft_simd_sse2_SOURCE dft/simd/sse2/*.c dft/simd/sse2/*.h) -file(GLOB fftw_dft_simd_avx_SOURCE dft/simd/avx/*.c dft/simd/avx/*.h) -file(GLOB fftw_dft_simd_avx2_SOURCE dft/simd/avx2/*.c dft/simd/avx2/*.h dft/simd/avx2-128/*.c dft/simd/avx2-128/*.h) -file(GLOB fftw_kernel_SOURCE kernel/*.c kernel/*.h) -file(GLOB fftw_rdft_SOURCE rdft/*.c rdft/*.h) -file(GLOB fftw_rdft_scalar_SOURCE rdft/scalar/*.c rdft/scalar/*.h) - -file(GLOB fftw_rdft_scalar_r2cb_SOURCE rdft/scalar/r2cb/*.c - rdft/scalar/r2cb/*.h) -file(GLOB fftw_rdft_scalar_r2cf_SOURCE rdft/scalar/r2cf/*.c - rdft/scalar/r2cf/*.h) -file(GLOB fftw_rdft_scalar_r2r_SOURCE rdft/scalar/r2r/*.c - rdft/scalar/r2r/*.h) - -file(GLOB fftw_rdft_simd_SOURCE rdft/simd/*.c rdft/simd/*.h) -file(GLOB fftw_rdft_simd_sse2_SOURCE rdft/simd/sse2/*.c rdft/simd/sse2/*.h) -file(GLOB fftw_rdft_simd_avx_SOURCE rdft/simd/avx/*.c rdft/simd/avx/*.h) -file(GLOB fftw_rdft_simd_avx2_SOURCE rdft/simd/avx2/*.c rdft/simd/avx2/*.h rdft/simd/avx2-128/*.c rdft/simd/avx2-128/*.h) - -file(GLOB fftw_reodft_SOURCE reodft/*.c reodft/*.h) -file(GLOB fftw_simd_support_SOURCE simd-support/*.c simd-support/*.h) -file(GLOB fftw_libbench2_SOURCE libbench2/*.c libbench2/*.h) -list (REMOVE_ITEM fftw_libbench2_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/libbench2/useropt.c) - -set(SOURCEFILES - ${fftw_api_SOURCE} - ${fftw_dft_SOURCE} - ${fftw_dft_scalar_SOURCE} - ${fftw_dft_scalar_codelets_SOURCE} - ${fftw_dft_simd_SOURCE} - ${fftw_kernel_SOURCE} - ${fftw_rdft_SOURCE} - ${fftw_rdft_scalar_SOURCE} - - ${fftw_rdft_scalar_r2cb_SOURCE} - ${fftw_rdft_scalar_r2cf_SOURCE} - ${fftw_rdft_scalar_r2r_SOURCE} - - ${fftw_rdft_simd_SOURCE} - ${fftw_reodft_SOURCE} - ${fftw_simd_support_SOURCE} - ${fftw_threads_SOURCE} -) - -set(fftw_par_SOURCE - threads/api.c - threads/conf.c - threads/ct.c - threads/dft-vrank-geq1.c - threads/f77api.c - threads/hc2hc.c - threads/rdft-vrank-geq1.c - threads/vrank-geq1-rdft2.c) - -set (fftw_threads_SOURCE ${fftw_par_SOURCE} threads/threads.c) -set (fftw_omp_SOURCE ${fftw_par_SOURCE} threads/openmp.c) - - -include_directories (.) - - -if (WITH_COMBINED_THREADS) - list (APPEND SOURCEFILES ${fftw_threads_SOURCE}) -endif () - - -if (HAVE_SSE2) - list (APPEND SOURCEFILES ${fftw_dft_simd_sse2_SOURCE} ${fftw_rdft_simd_sse2_SOURCE}) -endif () - -if (HAVE_AVX) - list (APPEND SOURCEFILES ${fftw_dft_simd_avx_SOURCE} ${fftw_rdft_simd_avx_SOURCE}) -endif () - -if (HAVE_AVX2) - list (APPEND SOURCEFILES ${fftw_dft_simd_avx2_SOURCE} ${fftw_rdft_simd_avx2_SOURCE}) -endif () - -set (FFTW_VERSION 3.3.10) - -set (PREC_SUFFIX) -if (ENABLE_FLOAT) - set (FFTW_SINGLE TRUE) - set (BENCHFFT_SINGLE TRUE) - set (PREC_SUFFIX f) -endif () - -if (ENABLE_LONG_DOUBLE) - set (FFTW_LDOUBLE TRUE) - set (BENCHFFT_LDOUBLE TRUE) - set (PREC_SUFFIX l) -endif () - -if (ENABLE_QUAD_PRECISION) - set (FFTW_QUAD TRUE) - set (BENCHFFT_QUAD TRUE) - set (PREC_SUFFIX q) -endif () -set (fftw3_lib fftw3${PREC_SUFFIX}) - -configure_file (cmake.config.h.in config.h @ONLY) -include_directories (${CMAKE_CURRENT_BINARY_DIR}) - -if (BUILD_SHARED_LIBS) - add_definitions (-DFFTW_DLL) -endif () - -add_library (${fftw3_lib} ${SOURCEFILES}) -target_include_directories (${fftw3_lib} INTERFACE $) -if (MSVC) - if (CMAKE_C_COMPILER_ID STREQUAL "Clang") - add_compile_options(-bigobj) - elseif (CMAKE_C_COMPILER_ID STREQUAL "MSVC") - add_compile_options(/bigobj) - endif () -endif () -if (HAVE_SSE) - set_source_files_properties (${fftw_dft_simd_sse2_SOURCE} - ${fftw_rdft_simd_sse2_SOURCE} - PROPERTIES COMPILE_FLAGS "${SSE_FLAG}") -endif () -if (HAVE_SSE2) - set_source_files_properties (${fftw_dft_simd_sse2_SOURCE} - ${fftw_rdft_simd_sse2_SOURCE} - PROPERTIES COMPILE_FLAGS "${SSE2_FLAG}") -endif () -if (HAVE_AVX) - set_source_files_properties (${fftw_dft_simd_avx_SOURCE} - ${fftw_rdft_simd_avx_SOURCE} - PROPERTIES COMPILE_FLAGS "${AVX_FLAG}") -endif () -if (HAVE_AVX2) - set_source_files_properties (${fftw_dft_simd_avx2_SOURCE} - ${fftw_rdft_simd_avx2_SOURCE} - PROPERTIES COMPILE_FLAGS "${AVX2_FLAG}") -endif () -if (HAVE_FMA) - set_source_files_properties (${fftw_dft_simd_avx2_SOURCE} - ${fftw_rdft_simd_avx2_SOURCE} - PROPERTIES COMPILE_FLAGS "${FMA_FLAG}") -endif () -if (HAVE_LIBM) - target_link_libraries (${fftw3_lib} m) -endif () - -set (subtargets ${fftw3_lib}) - -if (Threads_FOUND) - if (WITH_COMBINED_THREADS) - target_link_libraries (${fftw3_lib} ${CMAKE_THREAD_LIBS_INIT}) - else () - add_library (${fftw3_lib}_threads ${fftw_threads_SOURCE}) - target_include_directories (${fftw3_lib}_threads INTERFACE $) - target_link_libraries (${fftw3_lib}_threads ${fftw3_lib}) - target_link_libraries (${fftw3_lib}_threads ${CMAKE_THREAD_LIBS_INIT}) - list (APPEND subtargets ${fftw3_lib}_threads) - endif () -endif () - -if (OPENMP_FOUND) - add_library (${fftw3_lib}_omp ${fftw_omp_SOURCE}) - target_include_directories (${fftw3_lib}_omp INTERFACE $) - target_link_libraries (${fftw3_lib}_omp ${fftw3_lib}) - target_link_libraries (${fftw3_lib}_omp ${CMAKE_THREAD_LIBS_INIT}) - list (APPEND subtargets ${fftw3_lib}_omp) - target_compile_options (${fftw3_lib}_omp PRIVATE ${OpenMP_C_FLAGS}) -endif () - -foreach(subtarget ${subtargets}) - set_target_properties (${subtarget} PROPERTIES SOVERSION 3.6.9 VERSION 3) - install (TARGETS ${subtarget} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endforeach () -install(TARGETS ${fftw3_lib} - EXPORT FFTW3LibraryDepends - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) - -install (FILES api/fftw3.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/api/fftw3.f) - install (FILES api/fftw3.f api/fftw3l.f03 api/fftw3q.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -endif () -if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/api/fftw3.f03.in) - file (READ api/fftw3.f03.in FFTW3_F03_IN OFFSET 42) - file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 "! Generated automatically. DO NOT EDIT!\n\n") - file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 " integer, parameter :: C_FFTW_R2R_KIND = ${C_FFTW_R2R_KIND}\n\n") - file (APPEND ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 "${FFTW3_F03_IN}") - install (FILES ${CMAKE_CURRENT_BINARY_DIR}/fftw3.f03 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -endif () - -if (BUILD_TESTS) - - add_executable (bench ${fftw_libbench2_SOURCE} tests/bench.c tests/hook.c tests/fftw-bench.c) - - if (ENABLE_THREADS AND NOT WITH_COMBINED_THREADS) - target_link_libraries (bench ${fftw3_lib}_threads) - else () - target_link_libraries (bench ${fftw3_lib}) - endif () - - - enable_testing () - - if (Threads_FOUND) - - macro (fftw_add_test problem) - add_test (NAME ${problem} COMMAND bench -s ${problem}) - endmacro () - - fftw_add_test (32x64) - fftw_add_test (ib256) - - endif () -endif () - -# pkgconfig file -set (prefix ${CMAKE_INSTALL_PREFIX}) -set (exec_prefix ${CMAKE_INSTALL_PREFIX}) -set (libdir ${CMAKE_INSTALL_FULL_LIBDIR}) -set (includedir ${CMAKE_INSTALL_FULL_INCLUDEDIR}) -set (VERSION ${FFTW_VERSION}) -configure_file (fftw.pc.in fftw3${PREC_SUFFIX}.pc @ONLY) -install (FILES - ${CMAKE_CURRENT_BINARY_DIR}/fftw3${PREC_SUFFIX}.pc - DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig - COMPONENT Development) - -# cmake file -set (FFTW3_LIBRARIES "FFTW3::${fftw3_lib}") -configure_file (FFTW3Config.cmake.in FFTW3${PREC_SUFFIX}Config.cmake @ONLY) -configure_file (FFTW3ConfigVersion.cmake.in FFTW3${PREC_SUFFIX}ConfigVersion.cmake @ONLY) -install (FILES - ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}Config.cmake - ${CMAKE_CURRENT_BINARY_DIR}/FFTW3${PREC_SUFFIX}ConfigVersion.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX} - COMPONENT Development) - -export (TARGETS ${fftw3_lib} NAMESPACE FFTW3:: FILE ${PROJECT_BINARY_DIR}/FFTW3LibraryDepends.cmake) -install(EXPORT FFTW3LibraryDepends - NAMESPACE FFTW3:: - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/fftw3${PREC_SUFFIX} - COMPONENT Development) diff --git a/FFTW3Config.cmake.in b/FFTW3Config.cmake.in deleted file mode 100644 index 6e073f5c8..000000000 --- a/FFTW3Config.cmake.in +++ /dev/null @@ -1,17 +0,0 @@ -# defined since 2.8.3 -if (CMAKE_VERSION VERSION_LESS 2.8.3) - get_filename_component (CMAKE_CURRENT_LIST_DIR ${CMAKE_CURRENT_LIST_FILE} PATH) -endif () - -# Allows loading FFTW3 settings from another project -set (FFTW3_CONFIG_FILE "${CMAKE_CURRENT_LIST_FILE}") - -set (FFTW3@PREC_SUFFIX@_LIBRARIES fftw3@PREC_SUFFIX@) -set (FFTW3@PREC_SUFFIX@_LIBRARY_DIRS @CMAKE_INSTALL_FULL_LIBDIR@) -set (FFTW3@PREC_SUFFIX@_INCLUDE_DIRS @CMAKE_INSTALL_FULL_INCLUDEDIR@) - -include ("${CMAKE_CURRENT_LIST_DIR}/FFTW3LibraryDepends.cmake" OPTIONAL) - -if (CMAKE_VERSION VERSION_LESS 2.8.3) - set (CMAKE_CURRENT_LIST_DIR) -endif () diff --git a/FFTW3ConfigVersion.cmake.in b/FFTW3ConfigVersion.cmake.in deleted file mode 100644 index cb906a406..000000000 --- a/FFTW3ConfigVersion.cmake.in +++ /dev/null @@ -1,12 +0,0 @@ - -set (PACKAGE_VERSION "@FFTW_VERSION@") - -# Check whether the requested PACKAGE_FIND_VERSION is compatible -if ("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") - set (PACKAGE_VERSION_COMPATIBLE FALSE) -else () - set (PACKAGE_VERSION_COMPATIBLE TRUE) - if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") - set (PACKAGE_VERSION_EXACT TRUE) - endif () -endif () diff --git a/Makefile.am b/Makefile.am index 402300080..483b362b1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -42,8 +42,7 @@ endif SUBDIRS=support $(GENFFT) kernel simd-support dft rdft reodft api \ libbench2 $(CHICKEN_EGG) tests mpi $(DOCDIR) tools m4 EXTRA_DIST=COPYRIGHT bootstrap.sh CONVENTIONS fftw.pc.in \ -CMakeLists.txt cmake.config.h.in FFTW3Config.cmake.in \ -FFTW3ConfigVersion.cmake.in README-perfcnt.md +README-perfcnt.md SIMD_LIBS = simd-support/libsimd_support.la @@ -168,19 +167,6 @@ fftw3@PREC_SUFFIX@.pc: fftw.pc pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = fftw3@PREC_SUFFIX@.pc -FFTW3@PREC_SUFFIX@Config.cmake: $(top_srcdir)/FFTW3Config.cmake.in - $(SED) \ - -e 's|[@]PREC_SUFFIX@|@PREC_SUFFIX@|g' \ - -e 's|[@]CMAKE_INSTALL_FULL_LIBDIR@|$(libdir)|g' \ - -e 's|[@]CMAKE_INSTALL_FULL_INCLUDEDIR@|$(includedir)|g' \ - $(top_srcdir)/FFTW3Config.cmake.in > $@ -FFTW3@PREC_SUFFIX@ConfigVersion.cmake: $(top_srcdir)/FFTW3ConfigVersion.cmake.in - $(SED) \ - -e 's|[@]FFTW_VERSION@|@PACKAGE_VERSION@|g' \ - $(top_srcdir)/FFTW3ConfigVersion.cmake.in > $@ -cmakedir = $(libdir)/cmake/fftw3 -cmake_DATA = FFTW3@PREC_SUFFIX@Config.cmake FFTW3@PREC_SUFFIX@ConfigVersion.cmake - WISDOM_DIR = /etc/fftw WISDOM = wisdom@PREC_SUFFIX@ diff --git a/README-meson.md b/README-meson.md new file mode 100644 index 000000000..5cd384c31 --- /dev/null +++ b/README-meson.md @@ -0,0 +1,108 @@ +# Meson Build System + +A Meson-based build system is included in order to provide a modern alternative +to Autotools for those who need it (such as for better integration with IDEs +and easier inclusion as subproject/dependency of other software). + +Note that it is **EXPERIMENTAL**. It should mostly just work, but if you run +into any problems, please report them on our issue tracker and consider using +the Autotools system in the meantime. + +It might one day replace Autotools for maintainer tasks, but for the sake of +compatibility, there are currently no plans to remove Autotools: FFTW’s build +tasks are relatively complex, it runs on a wide variety of platforms, and some +(paying) users are not at liberty to drastically alter their build environment. + +For that reason, changes to the Meson system should only touch files directly +belonging to it until a decision has been made to promote it to the primary +build system. This means some parts of it will not be as clean and efficient +as they could be. + +## Completeness + +The Meson build system should have feature parity with Autotools, except for +the following: + +* Fortran wrappers (needs replacements for Autoconf-provided macros) +* ARM v7a/v8 performance counters +* Commercialized tarballs + +## Requirements + +### General + +A recent enough version of [Meson](https://mesonbuild.com/) or a compatible +implementation of it (such as [muon](https://sr.ht/~lattis/muon/)) is required. +The exact minimum version is specified at the start of the `meson.build` file. + +FFTW itself needs a C compiler. GCC and Clang are well-tested. + +ICL and MSVC should also work, but currently only static libraries can be built +successfully with Microsoft’s toolchains. For now, we strongly recommend using +a MinGW-w64-based toolchain to build FFTW for Windows. [MXE](https://mxe.cc/) +can be used to build one if necessary. + +Running the test suite currently requires Perl. + +### Git only + +Additionally, if you have obtained this copy of FFTW from the Git repository, +you will need the [Dune](https://dune.build/) build system for OCaml. Much of +FFTW’s source code is *generated*, and this is required in order to build the +generators. This has only been tested on x86 Linux with glibc. +GNU indent is required as well, since the generated sources in FFTW releases +should be formatted. +Release tarballs already contain the generated sources. + +To build the full documentation (already included in release tarballs), +you need: + +* Perl +* `makeinfo` +* `fig2dev` (part of transfig) +* `texi2pdf` (part of texinfo). + +To generate the Fortran wrappers (included in release tarballs), you need Perl. + +## Usage + +See the [Meson documentation](https://mesonbuild.com/Manual.html) for +instructions, including usage with IDEs and +[cross compilers](https://mesonbuild.com/Cross-compilation.html). + +The short version: + +```sh +meson setup builddir +cd builddir +meson compile +meson test --suite small +meson install +``` + +Some IDEs, like KDevelop, already support Meson. VSCode has an extension +you can install from the marketplace. Once that is set up, you can simply +open the FFTW source directory and everything will be configured automatically. + +### Generating Release Tarballs + +Assuming you have all of the above and the current Git commit is tagged, you +can simply run `meson dist` in a configured build directory. This will do the +following by default: + +* Collect the sources in the current Git `HEAD` commit + NOTE: NOT whatever is currently checked out, and thus NOT including untracked + or modified files! Meson will warn about this and require a command line flag + to proceed if it detects uncommitted changes. +* Cement the tarball’s version (so it won’t depend on Git) +* Generate `ChangeLog` from Git +* Generate all codelets and copy them to the tarball tree +* Patch `configure.ac` to match Meson’s version info +* Configure the resulting sources and run the full test suite +* Upon success, generate a compressed tarball and accompanying checksum file + +If the commit is not tagged, the build system still packages a tarball, but it +will print a warning and refuse to include the Autotools build system. + +Most of the above happens in a shell script generated from +`support/meson_dist.sh.in`. diff --git a/README.md b/README.md index 7931f139e..b0fcf0a33 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ generated automatically. This repository contains the *generator* and it does not contain the *generated code*. *YOU WILL BE UNABLE TO COMPILE CODE FROM THIS REPOSITORY* unless you have special tools and know what you are doing. In particular, do not expect things to -work by simply executing `configure; make` or `cmake`. +work by simply executing `configure; make` or `meson setup `. Most users should ignore this repository, and should instead download official tarballs from http://fftw.org/, which contain the generated diff --git a/api/meson.build b/api/meson.build new file mode 100644 index 000000000..f01cca9be --- /dev/null +++ b/api/meson.build @@ -0,0 +1,76 @@ +# TODO: Generate Fortran wrappers +summary('Fortran wrappers', '(not supported with Meson yet)', section: 'Build') + +fftw_api_inc = include_directories('.') +fftw_srcset.add( + files( + 'apiplan.c', + 'configure.c', + 'execute-dft-c2r.c', + 'execute-dft-r2c.c', + 'execute-dft.c', + 'execute-r2r.c', + 'execute-split-dft-c2r.c', + 'execute-split-dft-r2c.c', + 'execute-split-dft.c', + 'execute.c', + 'export-wisdom-to-file.c', + 'export-wisdom-to-string.c', + 'export-wisdom.c', + 'f77api.c', + 'flops.c', + 'forget-wisdom.c', + 'import-system-wisdom.c', + 'import-wisdom-from-file.c', + 'import-wisdom-from-string.c', + 'import-wisdom.c', + 'malloc.c', + 'map-r2r-kind.c', + 'mapflags.c', + 'mkprinter-file.c', + 'mkprinter-str.c', + 'mktensor-iodims.c', + 'mktensor-iodims64.c', + 'mktensor-rowmajor.c', + 'plan-dft-1d.c', + 'plan-dft-2d.c', + 'plan-dft-3d.c', + 'plan-dft-c2r-1d.c', + 'plan-dft-c2r-2d.c', + 'plan-dft-c2r-3d.c', + 'plan-dft-c2r.c', + 'plan-dft-r2c-1d.c', + 'plan-dft-r2c-2d.c', + 'plan-dft-r2c-3d.c', + 'plan-dft-r2c.c', + 'plan-dft.c', + 'plan-guru-dft-c2r.c', + 'plan-guru-dft-r2c.c', + 'plan-guru-dft.c', + 'plan-guru-r2r.c', + 'plan-guru-split-dft-c2r.c', + 'plan-guru-split-dft-r2c.c', + 'plan-guru-split-dft.c', + 'plan-guru64-dft-c2r.c', + 'plan-guru64-dft-r2c.c', + 'plan-guru64-dft.c', + 'plan-guru64-r2r.c', + 'plan-guru64-split-dft-c2r.c', + 'plan-guru64-split-dft-r2c.c', + 'plan-guru64-split-dft.c', + 'plan-many-dft-c2r.c', + 'plan-many-dft-r2c.c', + 'plan-many-dft.c', + 'plan-many-r2r.c', + 'plan-r2r-1d.c', + 'plan-r2r-2d.c', + 'plan-r2r-3d.c', + 'plan-r2r.c', + 'print-plan.c', + 'rdft2-pad.c', + 'the-planner.c', + 'version.c', + ), +) + +install_headers(files('fftw3.h')) diff --git a/cmake.config.h.in b/cmake.config.h.in deleted file mode 100644 index 1f4c50559..000000000 --- a/cmake.config.h.in +++ /dev/null @@ -1,395 +0,0 @@ - -/* Define to compile in long-double precision. */ -#cmakedefine BENCHFFT_LDOUBLE 1 - -/* Define to compile in quad precision. */ -#cmakedefine BENCHFFT_QUAD 1 - -/* Define to compile in single precision. */ -#cmakedefine BENCHFFT_SINGLE 1 - -/* Define to 1 if using `alloca.c'. */ -/* #undef C_ALLOCA */ - -/* Define to disable Fortran wrappers. */ -#cmakedefine DISABLE_FORTRAN 1 - -/* Define to dummy `main' function (if any) required to link to the Fortran - libraries. */ -/* #undef F77_DUMMY_MAIN */ - -/* Define to a macro mangling the given C identifier (in lower and upper - case), which must not contain underscores, for linking with Fortran. */ -#define F77_FUNC(name,NAME) name ## _ - -/* As F77_FUNC, but for C identifiers containing underscores. */ -#define F77_FUNC_(name,NAME) name ## _ - -/* Define if F77_FUNC and F77_FUNC_ are equivalent. */ -#define F77_FUNC_EQUIV 1 - -/* Define if F77 and FC dummy `main' functions are identical. */ -/* #undef FC_DUMMY_MAIN_EQ_F77 */ - -/* C compiler name and flags */ -#define FFTW_CC "@CMAKE_C_COMPILER@" - -/* Define to enable extra FFTW debugging code. */ -/* #undef FFTW_DEBUG */ - -/* Define to enable the use of alloca(). */ -#define FFTW_ENABLE_ALLOCA 1 - -/* Define to compile in long-double precision. */ -#cmakedefine FFTW_LDOUBLE 1 - -/* Define to compile in quad precision. */ -#cmakedefine FFTW_QUAD 1 - -/* Define to enable pseudorandom estimate planning for debugging. */ -/* #undef FFTW_RANDOM_ESTIMATOR */ - -/* Define to compile in single precision. */ -#cmakedefine FFTW_SINGLE 1 - -/* Define to 1 if you have the `abort' function. */ -#define HAVE_ABORT 1 - -/* Define to 1 if you have `alloca', as a function or macro. */ -#cmakedefine HAVE_ALLOCA 1 - -/* Define to 1 if you have and it should be used (not on Ultrix). - */ -#cmakedefine HAVE_ALLOCA_H 1 - -/* Define to enable Altivec optimizations. */ -/* #undef HAVE_ALTIVEC */ - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_ALTIVEC_H 1 - -/* Define if you have enabled the cycle counter on ARMv8 */ -/* #undef HAVE_ARMV8CC */ - -/* Define to enable AVX optimizations. */ -#cmakedefine HAVE_AVX 1 - -/* Define to enable AVX2 optimizations. */ -#cmakedefine HAVE_AVX2 1 - -/* Define to enable AVX512 optimizations. */ -/* #undef HAVE_AVX512 */ - -/* Define to enable 128-bit FMA AVX optimization */ -/* #undef HAVE_AVX_128_FMA */ - -/* Define to 1 if you have the `BSDgettimeofday' function. */ -/* #undef HAVE_BSDGETTIMEOFDAY */ - -/* Define to 1 if you have the `clock_gettime' function. */ -#cmakedefine01 HAVE_CLOCK_GETTIME - -/* Define to 1 if you have the `cosl' function. */ -#cmakedefine HAVE_COSL 1 - -/* Define to 1 if you have the declaration of `cosl', and to 0 if you don't. - */ -#cmakedefine01 HAVE_DECL_COSL - -/* Define to 1 if you have the declaration of `cosq', and to 0 if you don't. - */ -#cmakedefine01 HAVE_DECL_COSQ - -/* Define to 1 if you have the declaration of `drand48', and to 0 if you - don't. */ -#cmakedefine01 HAVE_DECL_DRAND48 - -/* Define to 1 if you have the declaration of `memalign', and to 0 if you - don't. */ -#cmakedefine01 HAVE_DECL_MEMALIGN - -/* Define to 1 if you have the declaration of `posix_memalign', and to 0 if - you don't. */ -#cmakedefine01 HAVE_DECL_POSIX_MEMALIGN - -/* Define to 1 if you have the declaration of `sinl', and to 0 if you don't. - */ -#cmakedefine01 HAVE_DECL_SINL - -/* Define to 1 if you have the declaration of `sinq', and to 0 if you don't. - */ -#cmakedefine01 HAVE_DECL_SINQ - -/* Define to 1 if you have the declaration of `srand48', and to 0 if you - don't. */ -#cmakedefine01 HAVE_DECL_SRAND48 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_DLFCN_H 1 - -/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */ -/* #undef HAVE_DOPRNT */ - -/* Define to 1 if you have the `drand48' function. */ -#cmakedefine HAVE_DRAND48 1 - -/* Define if you have a machine with fused multiply-add */ -/* #undef HAVE_FMA */ - -/* Define to enable generic (gcc) 128-bit SIMD optimizations. */ -/* #undef HAVE_GENERIC_SIMD128 */ - -/* Define to enable generic (gcc) 256-bit SIMD optimizations. */ -/* #undef HAVE_GENERIC_SIMD256 */ - -/* Define to 1 if you have the `gethrtime' function. */ -/* #undef HAVE_GETHRTIME */ - -/* Define to 1 if you have the `getpagesize' function. */ -#cmakedefine HAVE_GETPAGESIZE 1 - -/* Define to 1 if you have the `gettimeofday' function. */ -#cmakedefine HAVE_GETTIMEOFDAY 1 - -/* Define to 1 if hrtime_t is defined in */ -/* #undef HAVE_HRTIME_T */ - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_INTTYPES_H 1 - -/* Define if the isnan() function/macro is available. */ -#cmakedefine HAVE_ISNAN 1 - -/* Define to enable KCVI optimizations. */ -/* #undef HAVE_KCVI */ - -/* Define to 1 if you have the `m' library (-lm). */ -#cmakedefine HAVE_LIBM 1 - -/* Define to 1 if you have the `quadmath' library (-lquadmath). */ -/* #undef HAVE_LIBQUADMATH */ - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_LIMITS_H 1 - -/* Define to 1 if the compiler supports `long double' */ -#define HAVE_LONG_DOUBLE 1 - -/* Define to 1 if you have the `mach_absolute_time' function. */ -#cmakedefine HAVE_MACH_ABSOLUTE_TIME 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_MALLOC_H 1 - -/* Define to 1 if you have the `memalign' function. */ -#cmakedefine HAVE_MEMALIGN 1 - -/* Define to 1 if you have the `memmove' function. */ -#cmakedefine HAVE_MEMMOVE 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `memset' function. */ -#define HAVE_MEMSET 1 - -/* Define to enable use of MIPS ZBus cycle-counter. */ -/* #undef HAVE_MIPS_ZBUS_TIMER */ - -/* Define if you have the MPI library. */ -/* #undef HAVE_MPI */ - -/* Define to enable ARM NEON optimizations. */ -/* #undef HAVE_NEON */ - -/* Define if OpenMP is enabled */ -#cmakedefine HAVE_OPENMP - -/* Define to 1 if you have the `posix_memalign' function. */ -#cmakedefine HAVE_POSIX_MEMALIGN 1 - -/* Define if you have POSIX threads libraries and header files. */ -/* #undef HAVE_PTHREAD */ - -/* Define to 1 if you have the `read_real_time' function. */ -/* #undef HAVE_READ_REAL_TIME */ - -/* Define to 1 if you have the `sinl' function. */ -#cmakedefine HAVE_SINL 1 - -/* Define to 1 if you have the `snprintf' function. */ -#cmakedefine HAVE_SNPRINTF 1 - -/* Define to 1 if you have the `sqrt' function. */ -#define HAVE_SQRT 1 - -/* Define to enable SSE/SSE2 optimizations. */ -#cmakedefine HAVE_SSE2 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_STDDEF_H 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_STDINT_H 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_STDLIB_H 1 - -/* Define to 1 if you have the `strchr' function. */ -#define HAVE_STRCHR 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_STRINGS_H 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_STRING_H 1 - -/* Define to 1 if you have the `sysctl' function. */ -#cmakedefine HAVE_SYSCTL 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_TIME_H 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have the `tanl' function. */ -/* #undef HAVE_TANL */ - -/* Define if we have a threads library. */ -#cmakedefine HAVE_THREADS 1 - -/* Define to 1 if you have the `time_base_to_time' function. */ -/* #undef HAVE_TIME_BASE_TO_TIME */ - -/* Define to 1 if the system has the type `uintptr_t'. */ -#define HAVE_UINTPTR_T 1 - -/* Define to 1 if you have the header file. */ -#cmakedefine HAVE_UNISTD_H 1 - -/* Define to 1 if you have the `vprintf' function. */ -#define HAVE_VPRINTF 1 - -/* Define to enable IBM VSX optimizations. */ -/* #undef HAVE_VSX */ - -/* Define if you have the UNICOS _rtc() intrinsic. */ -/* #undef HAVE__RTC */ - -/* Define to the sub-directory in which libtool stores uninstalled libraries. - */ -#define LT_OBJDIR ".libs/" - -/* Name of package */ -#define PACKAGE "fftw" - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "fftw@fftw.org" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "fftw" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "fftw @FFTW_VERSION@" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "fftw" - -/* Define to the home page for this package. */ -#define PACKAGE_URL "" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "@FFTW_VERSION@" - -/* Define to necessary symbol if this constant uses a non-standard name on - your system. */ -/* #undef PTHREAD_CREATE_JOINABLE */ - -/* The size of `double', as computed by sizeof. */ -#define SIZEOF_DOUBLE @SIZEOF_DOUBLE@ - -/* The size of `fftw_r2r_kind', as computed by sizeof. */ -#define SIZEOF_FFTW_R2R_KIND 4 - -/* The size of `float', as computed by sizeof. */ -#define SIZEOF_FLOAT @SIZEOF_FLOAT@ - -/* The size of `int', as computed by sizeof. */ -#define SIZEOF_INT @SIZEOF_INT@ - -/* The size of `long', as computed by sizeof. */ -#define SIZEOF_LONG @SIZEOF_LONG@ - -/* The size of `long long', as computed by sizeof. */ -#define SIZEOF_LONG_LONG @SIZEOF_LONG_LONG@ - -/* The size of `MPI_Fint', as computed by sizeof. */ -/* #undef SIZEOF_MPI_FINT */ - -/* The size of `ptrdiff_t', as computed by sizeof. */ -#define SIZEOF_PTRDIFF_T @SIZEOF_PTRDIFF_T@ - -/* The size of `size_t', as computed by sizeof. */ -#define SIZEOF_SIZE_T @SIZEOF_SIZE_T@ - -/* The size of `unsigned int', as computed by sizeof. */ -#define SIZEOF_UNSIGNED_INT @SIZEOF_UNSIGNED_INT@ - -/* The size of `unsigned long', as computed by sizeof. */ -#define SIZEOF_UNSIGNED_LONG @SIZEOF_UNSIGNED_LONG@ - -/* The size of `unsigned long long', as computed by sizeof. */ -#define SIZEOF_UNSIGNED_LONG_LONG @SIZEOF_UNSIGNED_LONG_LONG@ - -/* The size of `void *', as computed by sizeof. */ -#define SIZEOF_VOID_P @CMAKE_SIZEOF_VOID_P@ - -/* If using the C implementation of alloca, define if you know the - direction of stack growth for your system; otherwise it will be - automatically deduced at runtime. - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown */ -/* #undef STACK_DIRECTION */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define to 1 if you can safely include both and . */ -#cmakedefine TIME_WITH_SYS_TIME 1 - -/* Define if we have and are using POSIX threads. */ -#cmakedefine USING_POSIX_THREADS 1 - -/* Version number of package */ -#define VERSION "@FFTW_VERSION@" - -/* Use common Windows Fortran mangling styles for the Fortran interfaces. */ -/* #undef WINDOWS_F77_MANGLING */ - -/* Include g77-compatible wrappers in addition to any other Fortran wrappers. - */ -#cmakedefine WITH_G77_WRAPPERS 1 - -/* Use our own aligned malloc routine; mainly helpful for Windows systems - lacking aligned allocation system-library routines. */ -/* #undef WITH_OUR_MALLOC */ - -/* Use low-precision timers, making planner very slow */ -/* #undef WITH_SLOW_TIMER */ - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -/* #undef inline */ -#endif - -/* Define to `unsigned int' if does not define. */ -/* #undef size_t */ diff --git a/dft/meson.build b/dft/meson.build new file mode 100644 index 000000000..215d64389 --- /dev/null +++ b/dft/meson.build @@ -0,0 +1,32 @@ +fftw_srcset.add( + files( + 'bluestein.c', + 'buffered.c', + 'conf.c', + 'ct.c', + 'dftw-direct.c', + 'dftw-directsq.c', + 'dftw-generic.c', + 'dftw-genericbuf.c', + 'direct.c', + 'generic.c', + 'indirect-transpose.c', + 'indirect.c', + 'kdft-dif.c', + 'kdft-difsq.c', + 'kdft-dit.c', + 'kdft.c', + 'nop.c', + 'plan.c', + 'problem.c', + 'rader.c', + 'rank-geq2.c', + 'solve.c', + 'vrank-geq1.c', + 'zero.c', + ), +) + +codelet_prelude = support_dir / 'codelet_prelude.dft' +subdir('scalar') +subdir('simd') diff --git a/dft/scalar/codelets/meson.build b/dft/scalar/codelets/meson.build new file mode 100644 index 000000000..d77e49b77 --- /dev/null +++ b/dft/scalar/codelets/meson.build @@ -0,0 +1,166 @@ +genfft_params = { + ########################################################################### + # n1_ is a hard-coded FFT of size (base cases of FFT recursion) + 'n1': { + 'generator': gen_notw, + 'include': 'n.h', + 'extra_args': [], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 64, 20, 25, + # 30, 40, 50, + ], + }, + ########################################################################### + # t1_ is a "twiddle" FFT of size , implementing a radix-r DIT step + 't1': { + 'generator': gen_twiddle, + 'include': 't.h', + 'extra_args': [], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 16, 32, 64, 20, 25, + # 30, 40, 50 + ], + }, + # t2_ is also a twiddle FFT, but instead of using a complete lookup table + # of trig. functions, it partially generates the trig. values on the fly + # (this is faster for large sizes). + 't2': { + 'generator': gen_twiddle, + 'include': 't.h', + 'extra_args': [ + '-twiddle-log3', + '-precompute-twiddles' + ], + 'ns': [ + 4, 8, 16, 32, 64, + 5, 10, 20, 25 + ], + }, + ########################################################################### + # The F (DIF) codelets are used for a kind of in-place transform algorithm, + # but the planner seems to never (or hardly ever) use them on the machines + # we have access to, preferring the Q codelets and the use of buffers + # for sub-transforms. So, we comment them out, at least for now. + + # f1_ is a "twiddle" FFT of size , implementing a radix-r DIF step + 'f1': { + 'generator': gen_twiddle, + 'include': 'f.h', + 'extra_args': [ + '-dif' + ], + 'ns': [ + # 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 16, 32, 64, + ], + }, + # like f1, but partially generates its trig. table on the fly + 'f2': { + 'generator': gen_twiddle, + 'include': 'f.h', + 'extra_args': [ + '-dif', + '-twiddle-log3', + '-precompute-twiddles' + ], + 'ns': [ + # 4, 8, 16, 32, 64, + ], + }, + ########################################################################### + # q1_ is twiddle FFTs of size (DIF step), where the output is + # transposed. This is used for in-place transposes in sizes that are + # divisible by ^2. These codelets have size ~ ^2, so you should + # probably not use bigger than 8 or so. + 'q1': { + 'generator': gen_twidsq, + 'include': 'q.h', + 'extra_args': [ + '-dif', + '-reload-twiddle' + ], + 'ns': [ + 2, 4, 8, + 3, 5, 6 + ], + }, + 'q2': { + 'generator': gen_twidsq, + 'include': 'q.h', + 'extra_args': [ + '-dif', + '-twiddle-log3', + '-precompute-twiddles' + ], + 'ns': [], + }, +} + +flags_common = [ + '-compact', + '-variables', '4', + '-pipeline-latency', '4' +] + +incdir = fs.relative_to( + meson.current_source_dir() / '..', + meson.project_source_root(), +) + +codelets = [] +foreach type, params : genfft_params + foreach n : params['ns'] + codelet = f'@type@_@n@' + codelets += codelet + outname = codelet + '.c' + + if generate_codelets + tgt = custom_target( + outname, + output: outname, + capture: true, + command: [ + gen_codelet, + codelet_prelude, + params['generator'], + flags_common, + params['extra_args'], + '-include', incdir / params['include'], + '-n', f'@n@', + '-name', codelet, + ], + ) + fftw_srcset.add(tgt) + fftw_extra_srcset.add(tgt) + else + fftw_srcset.add(files(outname)) + endif + endforeach +endforeach + +if generate_codelets + codlist = configuration_data( + {'solvtab_name': 'X(solvtab_dft_standard)', 'extra_includes': ''}, + ) + + decls = [] + foreach codelet : codelets + decls += f'extern void X(codelet_@codelet@)(planner *);' + endforeach + codlist.set('decls', '\n'.join(decls)) + + solvtab_entries = [] + foreach codelet : codelets + solvtab_entries += f' SOLVTAB(X(codelet_@codelet@)),' + endforeach + codlist.set('solvtab_entries', '\n'.join(solvtab_entries)) + + cfgf = configure_file( + configuration: codlist, + input: support_dir / 'codlist.c.in', + output: 'codlist.c', + ) + fftw_srcset.add(cfgf) + fftw_extra_srcset.add(cfgf) +else + fftw_srcset.add(files('codlist.c')) +endif diff --git a/dft/scalar/meson.build b/dft/scalar/meson.build new file mode 100644 index 000000000..e48320763 --- /dev/null +++ b/dft/scalar/meson.build @@ -0,0 +1,3 @@ +fftw_srcset.add(files('n.c', 't.c')) + +subdir('codelets') diff --git a/dft/simd/altivec/meson.build b/dft/simd/altivec/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/altivec/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/avx-128-fma/meson.build b/dft/simd/avx-128-fma/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/avx-128-fma/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/avx/meson.build b/dft/simd/avx/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/avx/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/avx2-128/meson.build b/dft/simd/avx2-128/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/avx2-128/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/avx2/meson.build b/dft/simd/avx2/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/avx2/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/avx512/meson.build b/dft/simd/avx512/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/avx512/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/common/meson.build b/dft/simd/common/meson.build new file mode 100644 index 000000000..eff3fbe44 --- /dev/null +++ b/dft/simd/common/meson.build @@ -0,0 +1,76 @@ +flags_common = [ + '-compact', + '-variables', '4', + '-pipeline-latency', '8', + '-simd', +] + +incdir = fs.relative_to( + meson.current_source_dir() / '..', + meson.project_source_root(), +) + +simd_common_codelets = [] +codelets = [] +foreach type, params : genfft_params_simd + foreach n : params['ns'] + codelet = f'@type@_@n@' + codelets += codelet + outname = codelet + '.c' + + if generate_codelets + tgt = custom_target( + outname, + output: outname, + capture: true, + command: [ + gen_codelet, + codelet_prelude, + params['generator'], + flags_common, + params['extra_args'], + '-include', incdir / params['include'], + '-n', f'@n@', + '-name', codelet, + ], + ) + simd_common_codelets += tgt + else + simd_common_codelets += files(outname) + endif + endforeach +endforeach + +if generate_codelets + codlist = configuration_data( + { + 'solvtab_name': 'XSIMD(solvtab_dft)', + 'extra_includes': '#include SIMD_HEADER', + }, + ) + + decls = [] + foreach codelet : codelets + decls += f'extern void XSIMD(codelet_@codelet@)(planner *);' + endforeach + codlist.set('decls', '\n'.join(decls)) + + solvtab_entries = [] + foreach codelet : codelets + solvtab_entries += f' SOLVTAB(XSIMD(codelet_@codelet@)),' + endforeach + codlist.set('solvtab_entries', '\n'.join(solvtab_entries)) + + simd_common_codelets += \ + configure_file( + configuration: codlist, + input: support_dir / 'codlist.c.in', + output: 'codlist.c', + ) + simd_common_codelets += \ + configure_file(input: 'genus.c', output: 'genus.c', copy: true) +else + simd_common_codelets += files('codlist.c', 'genus.c') +endif + +fftw_extra_srcset.add(simd_common_codelets) diff --git a/dft/simd/generic-simd128/meson.build b/dft/simd/generic-simd128/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/generic-simd128/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/generic-simd256/meson.build b/dft/simd/generic-simd256/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/generic-simd256/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/kcvi/meson.build b/dft/simd/kcvi/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/kcvi/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/lasx/meson.build b/dft/simd/lasx/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/lasx/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/lsx/meson.build b/dft/simd/lsx/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/lsx/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/meson.build b/dft/simd/meson.build new file mode 100644 index 000000000..2a006ebf5 --- /dev/null +++ b/dft/simd/meson.build @@ -0,0 +1,299 @@ +# This file contains a standard list of DFT SIMD codelets. It is +# used by common/meson.build to generate the C files with the actual +# codelets in them. It is used by {sse,sse2,...}/meson.build to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +genfft_params_simd = { + ########################################################################### + # n1fv_ is a hard-coded FFTW_FORWARD FFT of size , using SIMD + 'n1fv': { + 'generator': gen_notw_c, + 'include': 'n1f.h', + 'extra_args': [], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 64, 128, + 20, 25, + ], + }, + # as above, with restricted input vector stride + 'n2fv': { + 'generator': gen_notw_c, + 'include': 'n2f.h', + 'extra_args': [ + '-with-ostride', '2', + '-store-multiple', '2' + ], + 'ns': [2, 4, 6, 8, 10, 12, 14, 16, 32, 64, 20], + }, + # as above, but FFTW_BACKWARD + 'n1bv': { + 'generator': gen_notw_c, + 'include': 'n1b.h', + 'extra_args': [ + '-sign', '1' + ], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 64, 128, + 20, 25, + ], + }, + 'n2bv': { + 'generator': gen_notw_c, + 'include': 'n2b.h', + 'extra_args': [ + '-sign', '1', + '-with-ostride', '2', + '-store-multiple', '2', + ], + 'ns': [ + 2, 4, 6, 8, 10, 12, 14, 16, 32, 64, 20 + ], + }, + # split-complex codelets + 'n2sv': { + 'generator': gen_notw, + 'include': 'n2s.h', + 'extra_args': [ + '-with-ostride', '1', + '-store-multiple', '4' + ], + 'ns': [ + 4, 8, 16, 32, 64 + ], + }, + ########################################################################### + # t1fv_ is a "twiddle" FFT of size , implementing a radix-r DIT step + # for an FFTW_FORWARD transform, using SIMD + 't1fv': { + 'generator': gen_twiddle_c, + 'include': 't1f.h', + 'extra_args': [], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 16, 32, 64, + 20, 25 + ], + }, + # same as t1fv_*, but with different twiddle storage scheme + 't2fv': { + 'generator': gen_twiddle_c, + 'include': 't2f.h', + 'extra_args': [], + 'ns': [ + 2, 4, 8, 16, 32, 64, 5, 10, + 20, 25 + ], + }, + 't3fv': { + 'generator': gen_twiddle_c, + 'include': 't3f.h', + 'extra_args': [ + '-twiddle-log3', + '-precompute-twiddles', + '-no-generate-bytw', + ], + 'ns': [ + 4, 8, 16, 32, 5, 10, 20, 25 + ], + }, + 't1fuv': { + 'generator': gen_twiddle_c, + 'include': 't1fu.h', + 'extra_args': [], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10 + ], + }, + # as above, but FFTW_BACKWARD + 't1bv': { + 'generator': gen_twiddle_c, + 'include': 't1b.h', + 'extra_args': [ + '-sign', '1' + ], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 16, 32, 64, + 20, 25 + ], + }, + # same as t1bv_*, but with different twiddle storage scheme + 't2bv': { + 'generator': gen_twiddle_c, + 'include': 't2b.h', + 'extra_args': [ + '-sign', '1' + ], + 'ns': [ + 2, 4, 8, 16, 32, 64, + 5, 10, 20, 25 + ], + }, + 't3bv': { + 'generator': gen_twiddle_c, + 'include': 't3b.h', + 'extra_args': [ + '-twiddle-log3', + '-precompute-twiddles', + '-no-generate-bytw', + '-sign', '1', + ], + 'ns': [ + 4, 8, 16, 32, + 5, 10, 20, 25 + ], + }, + 't1buv': { + 'generator': gen_twiddle_c, + 'include': 't1bu.h', + 'extra_args': [ + '-sign', '1' + ], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10 + ], + }, + # split-complex codelets + 't1sv': { + 'generator': gen_twiddle, + 'include': 'ts.h', + 'extra_args': [], + 'ns': [ + 2, 4, 8, 16, 32 + ], + }, + 't2sv': { + 'generator': gen_twiddle, + 'include': 'ts.h', + 'extra_args': [ + '-twiddle-log3', + '-precompute-twiddles' + ], + 'ns': [ + 4, 8, 16, 32 + ], + }, + ########################################################################### + # q1fv_ is twiddle FFTW_FORWARD FFTs of size (DIF step), + # where the output is transposed, using SIMD. This is used for + # in-place transposes in sizes that are divisible by ^2. These + # codelets have size ~ ^2, so you should probably not use + # bigger than 8 or so. + 'q1fv': { + 'generator': gen_twidsq_c, + 'include': 'q1f.h', + 'extra_args': [ + '-dif' + ], + 'ns': [ + 2, 4, 5, 8 + ], + }, + 'q1bv': { + 'generator': gen_twidsq_c, + 'include': 'q1b.h', + 'extra_args': [ + '-dif', + '-sign', '1' + ], + 'ns': [ + 2, 4, 5, 8 + ], + }, +} + +subdir('common') + +subdirs = { + 'HAVE_ALTIVEC': { + 'dirs': {'altivec' : 'simd-support/simd-altivec.h'}, + 'flags': simd_flags_altivec, + }, + 'HAVE_SSE2': { + 'dirs': {'sse2' : 'simd-support/simd-sse2.h'}, + 'flags': simd_flags_sse2, + }, + 'HAVE_AVX': { + 'dirs': {'avx' : 'simd-support/simd-avx.h'}, + 'flags': simd_flags_avx, + }, + 'HAVE_AVX_128_FMA': { + 'dirs': {'avx-128-fma' : 'simd-support/simd-avx-128-fma.h'}, + 'flags': simd_flags_avx_128_fma, + }, + 'HAVE_AVX2': { + 'dirs': { + 'avx2' : 'simd-support/simd-avx2.h', + 'avx2-128' : 'simd-support/simd-avx2-128.h', + }, + 'flags': simd_flags_avx2, + }, + 'HAVE_AVX512': { + 'dirs': {'avx512' : 'simd-support/simd-avx512.h'}, + 'flags': simd_flags_avx512, + }, + 'HAVE_KCVI': { + 'dirs': {'kcvi' : 'simd-support/simd-kcvi.h'}, + 'flags': simd_flags_kcvi, + }, + 'HAVE_GENERIC_SIMD128': { + 'dirs': {'generic-simd128' : 'simd-support/simd-generic128.h'}, + 'flags': simd_flags_generic_simd128, + }, + 'HAVE_GENERIC_SIMD256': { + 'dirs': {'generic-simd256' : 'simd-support/simd-generic256.h'}, + 'flags': simd_flags_generic_simd256, + }, + 'HAVE_LASX': { + 'dirs': {'lasx' : 'simd-support/simd-lasx.h'}, + 'flags': simd_flags_lasx, + }, + 'HAVE_LSX': { + 'dirs': {'lsx' : 'simd-support/simd-lsx.h'}, + 'flags': simd_flags_lsx, + }, + 'HAVE_NEON': { + 'dirs': {'neon' : 'simd-support/simd-neon.h'}, + 'flags': simd_flags_neon, + }, + 'HAVE_SVE': { + 'dirs': { + 'sve128' : 'simd-support/simd-maskedsve128.h', + 'sve256' : 'simd-support/simd-maskedsve256.h', + 'sve512' : 'simd-support/simd-maskedsve512.h', + 'sve1024' : 'simd-support/simd-maskedsve1024.h', + 'sve2048' : 'simd-support/simd-maskedsve2048.h', + }, + 'flags': simd_flags_sve, + }, + 'HAVE_VSX': { + 'dirs': {'vsx' : 'simd-support/simd-vsx.h'}, + 'flags': simd_flags_vsx, + }, +} + +foreach cond, simd : subdirs + simd_flags = simd['flags'] + foreach dir, header : simd['dirs'] + simd_header = header + simd_name = dir + subdir(dir) + + if conf.has(cond) + lib = static_library( + simd_name, + build_by_default: false, + c_args: [simd_flags, f'@cc_def@SIMD_HEADER="@simd_header@"'], + include_directories: fftw_root_inc, + sources: simd_common_codelets, + ) + + fftw_srcset.add(declare_dependency(link_with: lib)) + endif + endforeach +endforeach diff --git a/dft/simd/neon/meson.build b/dft/simd/neon/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/neon/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/sse2/meson.build b/dft/simd/sse2/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/sse2/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/sve1024/meson.build b/dft/simd/sve1024/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/sve1024/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/sve128/meson.build b/dft/simd/sve128/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/sve128/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/sve2048/meson.build b/dft/simd/sve2048/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/sve2048/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/sve256/meson.build b/dft/simd/sve256/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/sve256/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/sve512/meson.build b/dft/simd/sve512/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/sve512/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/dft/simd/vsx/meson.build b/dft/simd/vsx/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/dft/simd/vsx/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/doc/meson.build b/doc/meson.build new file mode 100644 index 000000000..b974aeda8 --- /dev/null +++ b/doc/meson.build @@ -0,0 +1,144 @@ +docs_opt = get_option('docs') +makeinfo = find_program('makeinfo', required: docs_opt) +perl = find_program('perl', required: docs_opt) +fig2dev = find_program('fig2dev', required: docs_opt) +texi2pdf = find_program('texi2pdf', required: docs_opt) + +build_docs = makeinfo.found() and perl.found() and fig2dev.found() and texi2pdf.found() + +summary('Rebuild documentation', build_docs, bool_yn: true, section: 'Build') + +if not build_docs + subdir_done() +endif + +version_texi = custom_target( + input: 'fftw3.texi', + output: 'version.texi', + capture: true, + command: [ + perl, + files('mkversion.pl'), + '@INPUT@', + meson.project_version(), + ], +) + +fftw_extra_srcset.add(version_texi) + +rfftwnd_eps = custom_target( + input: 'rfftwnd.fig', + output: '@BASENAME@.eps', + command: [ + fig2dev, + '-L', 'eps', + '-m', '.7', + '@INPUT@', + '@OUTPUT@' + ], +) + +rfftwnd_for_html_png = custom_target( + input: 'rfftwnd.fig', + output: '@BASENAME@-for-html.png', + command: [ + fig2dev, + '-L', 'png', + '-m', '1', + '-S', '4', + '@INPUT@', + '@OUTPUT@', + ], +) + +rfftwnd_pdf = custom_target( + input: 'rfftwnd.fig', + output: '@BASENAME@.pdf', + command: [ + fig2dev, + '-L', 'pdf', + '-m', '.7', + '@INPUT@', + '@OUTPUT@' + ], +) + +fftw_extra_srcset.add(rfftwnd_eps) +fftw_extra_srcset.add(rfftwnd_for_html_png) +fftw_extra_srcset.add(rfftwnd_pdf) + +texifiles = files( + 'acknowledgements.texi', + 'cindex.texi', + 'fftw3.texi', + 'findex.texi', + 'install.texi', + 'intro.texi', + 'legacy-fortran.texi', + 'license.texi', + 'modern-fortran.texi', + 'mpi.texi', + 'other.texi', + 'reference.texi', + 'threads.texi', + 'tutorial.texi', + 'upgrading.texi', +) +eqn_images = files( + 'equation-dft.png', + 'equation-dht.png', + 'equation-idft.png', + 'equation-redft00.png', + 'equation-redft01.png', + 'equation-redft10.png', + 'equation-redft11.png', + 'equation-rodft00.png', + 'equation-rodft01.png', + 'equation-rodft10.png', + 'equation-rodft11.png', +) + +info = custom_target( + input: [texifiles, version_texi], + output: 'fftw3.info', + command: [ + makeinfo, + '-I', meson.current_source_dir(), + '-I', meson.current_build_dir(), + '--number-sections', + '--no-split', + '-o', '@OUTPUT@', + files('fftw3.texi'), + ], +) +fftw_extra_srcset.add(info) + +html = custom_target( + input: [texifiles, version_texi, rfftwnd_for_html_png, eqn_images], + output: 'html', + command: [ + makeinfo, + '-I', meson.current_source_dir(), + '-I', meson.current_build_dir(), + '--html', + '--number-sections', + '-o', '@OUTPUT@', + files('fftw3.texi'), + ], +) +fftw_extra_srcset.add(html) + +pdf = custom_target( + input: [texifiles, version_texi, rfftwnd_pdf, eqn_images], + output: 'fftw3.pdf', + command: [ + texi2pdf, + '-q', + '-I', meson.current_source_dir(), + '-I', meson.current_build_dir(), + '--pdf', + '-o', '@OUTPUT@', + files('fftw3.texi'), + ], +) +fftw_extra_srcset.add(pdf) diff --git a/doc/mkversion.pl b/doc/mkversion.pl new file mode 100755 index 000000000..9af9cff19 --- /dev/null +++ b/doc/mkversion.pl @@ -0,0 +1,15 @@ +#!/usr/bin/env perl +use strict; +use locale; +use POSIX; + +setlocale("LC_ALL", "C"); + +my $fmt = "\@set UPDATED %d %B %Y +\@set UPDATED-MONTH %B %Y +\@set EDITION @ARGV[1] +\@set VERSION @ARGV[1] +"; + +my $stat = (stat(@ARGV[0]))[9]; +print (strftime($fmt, localtime($stat))) diff --git a/genfft/dune b/genfft/dune new file mode 100644 index 000000000..f9f1e710c --- /dev/null +++ b/genfft/dune @@ -0,0 +1,17 @@ +(executables + (names + gen_hc2cdft_c + gen_hc2cdft + gen_hc2c + gen_hc2hc + gen_mdct + gen_notw_c + gen_notw + gen_r2cb + gen_r2cf + gen_r2r + gen_twiddle_c + gen_twiddle + gen_twidsq_c + gen_twidsq) + (libraries num)) diff --git a/genfft/dune-project b/genfft/dune-project new file mode 100644 index 000000000..a6d5a48db --- /dev/null +++ b/genfft/dune-project @@ -0,0 +1,7 @@ +(lang dune 3.0) +(package + (name genfft) + (allow_empty) + (depends + (ocaml (< 5)) + num)) diff --git a/genfft/dune-workspace b/genfft/dune-workspace new file mode 100644 index 000000000..500b8da9f --- /dev/null +++ b/genfft/dune-workspace @@ -0,0 +1,6 @@ +(lang dune 3.0) + +(context + (default + (name genfft) + (profile release))) diff --git a/genfft/dune.lock/lock.dune b/genfft/dune.lock/lock.dune new file mode 100644 index 000000000..5b26fa958 --- /dev/null +++ b/genfft/dune.lock/lock.dune @@ -0,0 +1,29 @@ +(lang package 0.1) + +(dependency_hash e962c9dd00628da55c08da1ea317fcaa) + +(ocaml ocaml-base-compiler) + +(repositories + (complete true) + (used + ((source + https://github.com/ocaml-dune/opam-overlays.git#2a9543286ff0e0656058fee5c0da7abc16b8717d)) + ((source + https://github.com/ocaml/opam-repository.git#2fd4164ca1e27b8c6027454c4844c1a1f6dca0bc)))) + +(expanded_solver_variable_bindings + (variable_values + (with-dev-setup false) + (sys-ocaml-version 4.14.2) + (post true) + (os-distribution opensuse-tumbleweed) + (os linux) + (opam-version 2.2.0~alpha-vendored) + (arch x86_64)) + (unset_variables + sys-ocaml-libc + sys-ocaml-cc + sys-ocaml-arch + enable-ocaml-beta-repository + build)) diff --git a/genfft/dune.lock/num.pkg b/genfft/dune.lock/num.pkg new file mode 100644 index 000000000..5748f76a2 --- /dev/null +++ b/genfft/dune.lock/num.pkg @@ -0,0 +1,28 @@ +(version 1.6) + +(build + (run + %{make} + PROFILE=release + (when + (catch_undefined_var + (and_absorb_undefined_var + (not %{pkg:ocaml:preinstalled}) + (< %{pkg:ocaml:version} 5.0.0~~)) + false) + opam-legacy) + (when + (catch_undefined_var + (or_absorb_undefined_var + %{pkg:ocaml:preinstalled} + (>= %{pkg:ocaml:version} 5.0.0~~)) + false) + opam-modern))) + +(depends ocaml) + +(source + (fetch + (url https://github.com/ocaml/num/archive/refs/tags/v1.6.tar.gz) + (checksum + sha256=b5cce325449aac746d5ca963d84688a627cca5b38d41e636cf71c68b60495b3e))) diff --git a/genfft/dune.lock/ocaml-base-compiler.pkg b/genfft/dune.lock/ocaml-base-compiler.pkg new file mode 100644 index 000000000..61e3681a7 --- /dev/null +++ b/genfft/dune.lock/ocaml-base-compiler.pkg @@ -0,0 +1,48 @@ +(version 4.14.2) + +(install + (withenv + ((= MSYS2_ARG_CONV_EXCL *)) + (run %{make} install))) + +(build + (withenv + ((= MSYS2_ARG_CONV_EXCL *)) + (progn + (run + ./configure + (when + (catch_undefined_var + (and_absorb_undefined_var + %{pkg:system-msvc:installed} + %{pkg:arch-x86_64:installed}) + false) + --host=x86_64-pc-windows) + (when + (catch_undefined_var + (and_absorb_undefined_var + %{pkg:system-msvc:installed} + %{pkg:arch-x86_32:installed}) + false) + --host=i686-pc-windows) + --prefix=%{prefix} + --docdir=%{doc}/ocaml + -C) + (run %{make} -j%{jobs})))) + +(source + (fetch + (url https://github.com/ocaml/ocaml/archive/4.14.2.tar.gz) + (checksum + sha256=c2d706432f93ba85bd3383fa451d74543c32a4e84a1afaf3e8ace18f7f097b43))) + +(exported_env + (= CAML_LD_LIBRARY_PATH "\%{lib}%/stublibs")) + +(extra_sources + (ocaml-base-compiler.install + (fetch + (url + https://raw.githubusercontent.com/ocaml/opam-source-archives/main/patches/ocaml-base-compiler/ocaml-base-compiler.install) + (checksum + sha256=79f2a1a5044a91350a0eb6ce12e261a72a2855c094c425cddf3860e58c486678)))) diff --git a/genfft/dune.lock/ocaml-config.pkg b/genfft/dune.lock/ocaml-config.pkg new file mode 100644 index 000000000..408e7cd10 --- /dev/null +++ b/genfft/dune.lock/ocaml-config.pkg @@ -0,0 +1,20 @@ +(version 2) + +(build + (substitute gen_ocaml_config.ml.in gen_ocaml_config.ml)) + +(depends ocaml-base-compiler) + +(extra_sources + (gen_ocaml_config.ml.in + (fetch + (url + https://raw.githubusercontent.com/ocaml/opam-source-archives/main/patches/ocaml-config/gen_ocaml_config.ml.in.2) + (checksum + sha256=22eb7c0211fc426028e444b272b97eac1e8287a49a512aebaa33c608652cfd29))) + (ocaml-config.install + (fetch + (url + https://raw.githubusercontent.com/ocaml/opam-source-archives/main/patches/ocaml-config/ocaml-config.install) + (checksum + sha256=6e4fd93f4cce6bad0ed3c08afd0248dbe7d7817109281de6294e5b5ef5597051)))) diff --git a/genfft/dune.lock/ocaml.pkg b/genfft/dune.lock/ocaml.pkg new file mode 100644 index 000000000..ef0e0eb38 --- /dev/null +++ b/genfft/dune.lock/ocaml.pkg @@ -0,0 +1,18 @@ +(version 4.14.2) + +(build + (withenv + ((= CAML_LD_LIBRARY_PATH "")) + (run + ocaml + %{pkg:ocaml-config:share}/gen_ocaml_config.ml + %{pkg-self:version} + %{pkg-self:name}))) + +(depends ocaml-config ocaml-base-compiler) + +(exported_env + (+= OCAMLTOP_INCLUDE_PATH "\%{toplevel}%") + (= CAML_LD_LIBRARY_PATH "\%{_:stubsdir}%") + (+= CAML_LD_LIBRARY_PATH "\%{lib}%/stublibs") + (= OCAML_TOPLEVEL_PATH "\%{toplevel}%")) diff --git a/genfft/meson.build b/genfft/meson.build new file mode 100644 index 000000000..5aabc56d2 --- /dev/null +++ b/genfft/meson.build @@ -0,0 +1,82 @@ +gen_opt = get_option('generate_codelets') +gen_opt = gen_opt.enable_auto_if(is_git) +gen_opt = gen_opt.disable_auto_if(not is_git) + +dune = find_program('dune', required: gen_opt) +indent = find_program('indent', required: gen_opt) +generate_codelets = dune.found() and indent.found() + +summary( + 'Generate codelets', + generate_codelets, + bool_yn: true, + section: 'Build' +) + +if not dune.found() + dune = disabler() +endif + +genfft_progs = [ + 'gen_notw', + 'gen_notw_c', + 'gen_twiddle', + 'gen_twiddle_c', + 'gen_twidsq', + 'gen_twidsq_c', + 'gen_r2r', + 'gen_r2cf', + 'gen_r2cb', + 'gen_hc2c', + 'gen_hc2cdft', + 'gen_hc2cdft_c', + 'gen_hc2hc', + 'gen_mdct', +] + +genfft_progs_exe = [] +foreach prog : genfft_progs + genfft_progs_exe += prog + '.exe' +endforeach + +dune_args = [ + '--root=' + meson.current_source_dir(), + '--build-dir=' + meson.current_build_dir() / '..', + '--no-print-directory', +] + +dune_lock = custom_target( + output: 'dune.lock', + command: [dune, 'pkg', 'lock', dune_args], + build_by_default: false, +) + +run_target( + 'dune-pkg', + command: [dune, 'build', dune_args, '@pkg-install'], + depends: dune_lock, +) + +# Need to build all at once instead of dependency-driven to avoid Dune +# deadlocking itself +genfft_exe = custom_target( + 'genfft', + output: genfft_progs_exe, + command: [dune, 'build', dune_args], + # Dune will manage its dependencies + build_always_stale: true, +) + +foreach i : range(genfft_progs.length()) + set_variable(genfft_progs[i], genfft_exe[i]) +endforeach + +if generate_codelets + gen_codelet = [ + support_dir / 'gen_codelet.sh', + meson.project_source_root() / 'COPYRIGHT', + support_dir / 'twovers.sh', + indent.full_path(), + ] + gen_codelet_simd = support_dir / 'gen_codelet_simd.sh' +endif diff --git a/kernel/meson.build b/kernel/meson.build new file mode 100644 index 000000000..218f70ba0 --- /dev/null +++ b/kernel/meson.build @@ -0,0 +1,47 @@ +fftw_srcset.add( + files( + 'align.c', + 'alloc.c', + 'assert.c', + 'awake.c', + 'buffered.c', + 'cpy1d.c', + 'cpy2d-pair.c', + 'cpy2d.c', + 'ct.c', + 'debug.c', + 'extract-reim.c', + 'hash.c', + 'iabs.c', + 'kalloc.c', + 'md5-1.c', + 'md5.c', + 'minmax.c', + 'ops.c', + 'pickdim.c', + 'plan.c', + 'planner.c', + 'primes.c', + 'print.c', + 'problem.c', + 'rader.c', + 'scan.c', + 'solver.c', + 'solvtab.c', + 'stride.c', + 'tensor.c', + 'tensor1.c', + 'tensor2.c', + 'tensor3.c', + 'tensor4.c', + 'tensor5.c', + 'tensor7.c', + 'tensor8.c', + 'tensor9.c', + 'tile2d.c', + 'timer.c', + 'transpose.c', + 'trig.c', + 'twiddle.c', + ), +) diff --git a/libbench2/meson.build b/libbench2/meson.build new file mode 100644 index 000000000..ff94e73e2 --- /dev/null +++ b/libbench2/meson.build @@ -0,0 +1,42 @@ +fftw_libbench2_src = files( + 'after-ccopy-from.c', + 'after-ccopy-to.c', + 'after-hccopy-from.c', + 'after-hccopy-to.c', + 'after-rcopy-from.c', + 'after-rcopy-to.c', + 'allocate.c', + 'aset.c', + 'bench-cost-postprocess.c', + 'bench-exit.c', + 'bench-main.c', + 'can-do.c', + 'caset.c', + 'dotens2.c', + 'info.c', + 'main.c', + 'mflops.c', + 'mp.c', + 'my-getopt.c', + 'ovtpvt.c', + 'pow2.c', + 'problem.c', + 'report.c', + 'speed.c', + 'tensor.c', + 'timer.c', + 'util.c', + 'verify-dft.c', + 'verify-lib.c', + 'verify-r2r.c', + 'verify-rdft2.c', + 'verify.c', + 'zero.c', +) + +libbench2 = static_library( + 'libbench2', + fftw_libbench2_src, + include_directories: '..', + build_by_default: false, +) diff --git a/meson.build b/meson.build new file mode 100644 index 000000000..b9fdf4701 --- /dev/null +++ b/meson.build @@ -0,0 +1,544 @@ +project( + 'fftw', + 'c', + meson_version: '>=1.3.0', + # version will be cemented automatically for dist tarballs + version: run_command( + 'git', + '-C', meson.project_source_root(), + 'describe', '--tags', '--always', '--dirty', + check: true, + ).stdout().strip().replace('fftw-', '',), + license: 'GPL-2.0', + license_files: 'COPYING', + default_options: {'buildtype': 'release'}, +) + +fs = import('fs') + +is_git = fs.exists('.git') +version_clean = meson.project_version().split('-')[0].split('.') + +# match libtool versioning scheme +so_current = 9 # increment for all API changes +so_revision = version_clean[2].to_int() # always increment +so_age = 6 # increment only for backwards-compatible API changes +shlib_version = '@0@.@1@.@2@'.format(so_current - so_age, so_age, so_revision) + +cc = meson.get_compiler('c') +cc_syntax = cc.get_argument_syntax() +cc_id = cc.get_id() +cc_str = ' '.join(cc.cmd_array() + get_option('c_args')) + +# We’re not exporting some internal symbols that are needed to link extra libs +# and tools to the main library, so MSVC-style linkers only work for static libs +if ['link', 'lld-link', 'xilink'].contains(cc.get_linker_id()) + if get_option('default_library') != 'static' + error('Only static libraries are supported with this linker. Change with: --default-library static') + endif +endif + +fftw_root_inc = include_directories('.') +support_dir = meson.project_source_root() / 'support' + +add_global_arguments(cc.get_supported_arguments('-Wno-unused'), language: 'c') + +if cc_syntax == 'gcc' + cc_def = '-D' +elif cc_syntax == 'msvc' + cc_def = '/D' +endif + +shlib_args = cc_def + 'FFTW_DLL' + +conf = configuration_data( + {'C_COMPILER': cc_str, 'DISABLE_FORTRAN': not get_option('fortran')}, +) +conf.set_quoted('PACKAGE', meson.project_name()) +conf.set_quoted('PACKAGE_VERSION', meson.project_version()) +conf.set_quoted('VERSION', meson.project_version()) +conf.set_quoted('FFTW_CC', cc_str) + +check_headers = [ + 'alloca.h', + 'altivec.h', + 'c_asm.h', + 'dlfcn.h', + 'intrinsics.h', + 'inttypes.h', + 'libintl.h', + 'limits.h', + 'mach/mach_time.h', + 'malloc.h', + 'memory.h', + 'stddef.h', + 'stdint.h', + 'stdlib.h', + 'string.h', + 'strings.h', + 'sys/types.h', + 'sys/time.h', + 'sys/stat.h', + 'sys/sysctl.h', + 'time.h', + 'uintptr.h', + 'unistd.h', + 'xmmintrin.h', +] + +foreach h : check_headers + if cc.has_header(h) + conf.set10('HAVE_' + h.underscorify().to_upper(), true) + endif +endforeach + +conf.set10( + 'TIME_WITH_SYS_TIME', + conf.has('HAVE_TIME_H') and conf.has('HAVE_SYS_TIME_H'), +) + +check_decls = { + 'stdlib.h': ['drand48', 'srand48', 'posix_memalign'], + 'math.h': ['cosl', 'sinl'], + 'malloc.h': ['memalign'], +} + +foreach h, decls : check_decls + foreach d : decls + conf.set10( + 'HAVE_DECL_' + d.underscorify().to_upper(), + cc.has_header_symbol(h, d), + ) + endforeach +endforeach + +check_funcs = [ + 'BSDgettimeofday', + 'gettimeofday', + 'hrtime_t', + 'getpagesize', + 'sysctl', + 'drand48', + 'srand48', + 'posix_memalign', + 'memalign', + 'mach_absolute_time', + 'isnan', + 'cosl', + 'sinl', + 'snprintf', + 'strchr', +] + +foreach f : check_funcs + if cc.has_function(f) + conf.set('HAVE_' + f.underscorify().to_upper(), 1) + endif +endforeach + +conf.set10('HAVE_CLOCK_GETTIME', cc.has_header_symbol('time.h', 'clock_gettime')) +conf.set( + 'HAVE_ALLOCA', + cc.has_header_symbol('alloca.h', 'alloca') or cc.has_header_symbol( + 'malloc.h', + 'alloca', + ), +) + +ssmod = import('sourceset') +fftw_srcset = ssmod.source_set() +fftw_threads_srcset = ssmod.source_set() +fftw_omp_srcset = ssmod.source_set() +fftw_mpi_srcset = ssmod.source_set() +fftw_extra_srcset = ssmod.source_set() + +m_dep = cc.find_library('m', required: false) +conf.set('HAVE_LIBM', m_dep.found()) +fftw_srcset.add(m_dep) + +sizes = [ + 'float', + 'double', + 'int', + 'long', + 'long long', + 'unsigned int', + 'unsigned long', + 'unsigned long long', + 'size_t', + 'ptrdiff_t', +] + +conf.set('FFTW_RANDOM_ESTIMATOR', get_option('random_estimator')) +conf.set('FFTW_ENABLE_ALLOCA', get_option('alloca')) +conf.set('WITH_SLOW_TIMER', get_option('slow_timer')) +conf.set( + 'HAVE_MIPS_ZBUS_TIMER', + get_option('mips_zbus_timer').require( + host_machine.cpu_family() in ['mips', 'mips64'], + error_message: 'Only makes sense when targeting MIPS', + ).allowed(), +) + +have_memalign = ( + conf.has('HAVE_MEMALIGN') or + conf.has('HAVE_POSIX_MEMALIGN') +) +opt_our_malloc = get_option('our_malloc') +opt_our_malloc = opt_our_malloc.enable_auto_if(not have_memalign) +conf.set('WITH_OUR_MALLOC', opt_our_malloc.allowed()) + +prefer_fma = get_option('prefer_fma') +prefer_fma = prefer_fma.disable_auto_if( + host_machine.cpu_family() not in ['ppc', 'ppc64', 'ia64', 'mips64'], +) +conf.set('ARCH_PREFERS_FMA', prefer_fma.allowed()) + +incoming_stack_boundary = get_option('incoming_stack_boundary') +if incoming_stack_boundary > 0 + isb_arg = f'-mincoming-stack-boundary=@incoming_stack_boundary@' + if cc_syntax == 'gcc' and cc.has_argument(isb_arg) + add_global_arguments(isb_arg, language: 'c') + else + error('incoming_stack_boundary: not available with this compiler') + endif +endif + +foreach t : sizes + conf.set('SIZEOF_' + t.underscorify().to_upper(), cc.sizeof(t)) +endforeach +conf.set('SIZEOF_VOID_P', cc.sizeof('void*')) +conf.set( + 'C_FFTW_R2R_KIND', + 'C_INT@0@'.format( + cc.sizeof( + 'fftw_r2r_kind', + prefix: '#include "api/fftw3.h"', + include_directories: fftw_root_inc, + ) * 8, + ), +) + +precision = get_option('precision') +if precision == 'single' + prec_suffix = 'f' + conf.set10('FFTW_SINGLE', true) + conf.set10('BENCHFFT_SINGLE', true) +elif precision == 'long-double' + prec_suffix = 'l' + conf.set10('FFTW_LDOUBLE', true) + conf.set10('BENCHFFT_LDOUBLE', true) +elif precision == 'quad' + prec_suffix = 'q' + conf.set10('FFTW_QUAD', true) + conf.set10('BENCHFFT_QUAD', true) +else + prec_suffix = '' +endif + +fftw_lib_name = 'fftw3' + prec_suffix + +quadmath_dep = cc.find_library('quadmath', required: precision == 'quad') +conf.set10('HAVE_LIBQUADMATH', quadmath_dep.found()) +fftw_srcset.add(quadmath_dep) + +simd_checks = { + # flags: extra compiler flags for compilers using a specific syntax. + # meson supports gcc-like and msvc-like compilers. + 'sse2': { + 'flags': { + 'gcc': precision == 'single' ? ['-msse'] : ['-msse2'], + 'msvc': precision == 'single' ? ['/arch:SSE'] : ['/arch:SSE2'], + }, + 'flags_global': {'gcc': ['-mfpmath=sse']}, + 'precision': ['single', 'double'], + }, + 'avx': { + 'flags': {'gcc': ['-mavx'], 'msvc': ['/arch:AVX']}, + 'precision': ['single', 'double'], + }, + 'avx2': { + 'flags': { + 'gcc': ['-mavx2', '-mfma'], + 'msvc': ['/arch:AVX2'], + 'ignore_unsupported': {'intel': ['-mfma']}, + }, + 'precision': ['single', 'double'], + }, + 'avx512': { + 'condition': host_machine.cpu_family() == 'x86_64', + 'flags': {'gcc': ['-mavx512f'], 'msvc': ['/arch:AVX512']}, + 'precision': ['single', 'double'], + }, + 'avx-128-fma': { + 'flags': {'gcc': ['-mfma4']}, + 'precision': ['single', 'double'], + }, + 'altivec': { + 'flags': {'gcc': ['-maltivec', '-mabi=altivec']}, + 'precision': ['single'], + }, + 'neon': { + 'flags': { + 'gcc': host_machine.cpu_family() == 'aarch64' ? [] : ['-mfpu=neon'], + 'msvc': [], + }, + 'precision': ( + host_machine.cpu_family() == 'aarch64' ? + [ 'single', 'double', ] : [ 'single', ] + ), + }, + 'sve': { + 'flags': {'gcc': []}, + 'precision': ['single', 'double'], + 'condition': cc.has_header_symbol('arm_sve.h', '__ARM_FEATURE_SVE'), + }, + 'kcvi': {'flags': {'gcc': ['-mmic']}, 'precision': ['single', 'double']}, + 'vsx': {'flags': {'gcc': ['-mvsx']}, 'precision': ['single', 'double']}, + 'lsx': {'flags': {'gcc': ['-mlsx']}, 'precision': ['single', 'double']}, + 'lasx': {'flags': {'gcc': ['-mlasx']}, 'precision': ['single', 'double']}, + 'generic-simd128': {'flags': {'gcc': []}, 'precision': ['single', 'double']}, + 'generic-simd256': {'flags': {'gcc': []}, 'precision': ['single', 'double']}, +} + +foreach option, checks : simd_checks + flags = [] + opt = get_option(option) + opt = opt.require( + cc_syntax in checks['flags'], + error_message: 'not supported with this compiler', + ) + + if 'precision' in checks + prec_print = ' or '.join(checks['precision']) + opt = opt.require( + checks['precision'].contains(precision), + error_message: f'requires @prec_print@ precision', + ) + endif + + if 'condition' in checks + opt = opt.require( + checks['condition'], + error_message: 'not supported with this compiler', + ) + endif + + if opt.allowed() + foreach flag : checks['flags'][cc_syntax] + if cc.has_argument(flag) + flags += flag + elif not ('ignore_unsupported' in checks['flags'] + and cc_id in checks['flags']['ignore_unsupported'] + and checks['flags']['ignore_unsupported'][cc_id].contains(flag)) + + opt = opt.require( + false, + error_message: f'compiler does not support required flag "@flag@"', + ) + endif + endforeach + + if opt.allowed() + conf.set10('HAVE_' + option.underscorify().to_upper(), true) + + if 'flags_global' in checks and cc_syntax in checks['flags_global'] + add_global_arguments( + cc.get_supported_arguments(checks['flags_global'][cc_syntax]), + language: 'c', + ) + endif + + endif + endif + + summary(option, opt.allowed(), bool_yn: true, section: 'SIMD Optimization') + + set_variable('simd_flags_' + option.underscorify(), flags) +endforeach + +subdir('doc') +subdir('api') +subdir('genfft') + +# NOTE: Lots of duplicated code there. +# Suggest restructuring if maintainer stuff becomes Meson-exclusive. +subdir('dft') +subdir('rdft') + +subdir('reodft') +subdir('kernel') +subdir('simd-support') +subdir('threads') +subdir('libbench2') + +configure_file(output: 'config.h', configuration: conf) + +pkg = import('pkgconfig') +pcname = 'FFTW' +pcdesc = 'fast Fourier transform library' +pcurl = 'http://fftw.org' +inc = include_directories('api') + +fftw_src = fftw_srcset.apply(conf, strict: false) +fftw_lib = library( + fftw_lib_name, + [fftw_src.sources()], + c_shared_args: shlib_args, + dependencies: fftw_src.dependencies(), + version: shlib_version, + install: true, +) +fftw3_dep = declare_dependency(link_with: fftw_lib, include_directories: inc) + +pkg.generate( + fftw_lib, + filebase: fftw_lib.name(), + name: pcname, + description: pcdesc, + url: pcurl, +) + +if fftw_lib_name != 'fftw3' + pkg.generate( + fftw_lib, + filebase: 'fftw3', + name: pcname, + description: pcdesc, + url: pcurl, + ) +endif + +if threads_dep.found() and not conf.get('COMBINED_THREADS') + fftw_threads_src = fftw_threads_srcset.apply(conf) + fftw_threads_lib = library( + fftw_lib_name + '_threads', + fftw_threads_src.sources(), + c_shared_args: shlib_args, + link_with: fftw_lib, + dependencies: fftw_threads_src.dependencies(), + version: shlib_version, + install: true, + ) + fftw3_threads_dep = declare_dependency( + link_with: fftw_threads_lib, + include_directories: inc, + ) + + pkg.generate( + fftw_threads_lib, + filebase: fftw_threads_lib.name(), + name: pcname, + description: pcdesc, + url: pcurl, + ) +endif + +if openmp_dep.found() + fftw_omp_src = fftw_omp_srcset.apply(conf) + fftw_omp_lib = library( + fftw_lib_name + '_omp', + fftw_omp_src.sources(), + c_shared_args: shlib_args, + link_with: fftw_lib, + dependencies: fftw_omp_src.dependencies(), + version: shlib_version, + install: true, + ) + + fftw3_omp_dep = declare_dependency( + link_with: fftw_omp_lib, + include_directories: inc, + ) + pkg.generate( + fftw_omp_lib, + filebase: fftw_omp_lib.name(), + name: pcname, + description: pcdesc, + url: pcurl, + ) +endif + +subdir('mpi') +subdir('tests') +subdir('tools') + +have_cycle_counter = cc.has_header_symbol( + 'kernel/cycle.h', + 'HAVE_TICK_COUNTER', + prefix: '#include "config.h"', + include_directories: fftw_root_inc, +) + +summary( + 'cycle counter', + have_cycle_counter ? + 'found' : 'not found - using ESTIMATE mode for all plans (see manual)', + section: 'Library' +) + +# NOTE: Only takes care of the main library. +have_cmake = find_program('cmake', required: false).found() +summary('CMake support files', have_cmake, bool_yn: true, section: 'Build') +if have_cmake + cmake = import('cmake') + + cmake.write_basic_package_version_file( + name: 'FFTW3', + version: shlib_version, + compatibility: 'SameMajorVersion' + ) + cmake.configure_package_config_file( + name: 'FFTW3', + input: 'support' / 'FFTW3Config.cmake.in', + configuration: { + 'version': shlib_version, + 'prec_suffix': prec_suffix, + 'lib_name': fftw_lib_name, + 'libdir': get_option('prefix') / get_option('libdir'), + 'includedir': get_option('prefix') / get_option('includedir'), + } + ) +endif + +if is_git and generate_codelets and build_docs + extra_src = fftw_extra_srcset.all_sources() + + # HACK: This target is called by the dist script so that the SIMD codelet + # files are generated and collected. This is only needed for Autotools, and + # can probably be avoided with a small change to make it mirror what we do + # with Meson: Pass the required SIMD header on the compiler command line. + # Care should be taken to avoid breaking Makefile dependency tracking, as it + # is not as robust as Meson’s. + custom_target('dist_src', input: extra_src, command: 'true', output: '.dist') + bdir = meson.project_build_root() + + extra_files = [] + foreach file : extra_src + f = fs.relative_to(file, bdir) + extra_files += f'"@f@"' + endforeach + + dist_cfg = configuration_data({'files': ' '.join(extra_files)}) + dist_script = configure_file( + configuration: dist_cfg, + input: 'support' / 'meson_dist.sh.in', + output: 'meson_dist.sh', + ) + + meson.add_dist_script( + dist_script, + '@0@.@1@'.format(version_clean[0], version_clean[1]), + version_clean[2], + so_current.to_string(), + so_revision.to_string(), + so_age.to_string(), + ) +elif is_git + warning('Git copy but not generating codelets and docs - dist target unavailable') +endif + +summary({ + 'precision': precision, + 'ABI version': shlib_version +}, section: 'Library') diff --git a/meson_options.txt b/meson_options.txt new file mode 100644 index 000000000..41c0c8689 --- /dev/null +++ b/meson_options.txt @@ -0,0 +1,29 @@ +option('openmp', type: 'feature', description: 'Build OpenMP support library') +option('threads', type: 'feature', description: 'Build thread support library') +option('combined_threads', type: 'feature', value: 'disabled', description: 'Merge thread library and main library') +option('mpi', type: 'feature', description: 'Build MPI support library') +option('sse2', type: 'feature', description: 'SSE/SSE2 optimization') +option('avx', type: 'feature', description: 'AVX optimization') +option('avx2', type: 'feature', description: 'AVX2 optimization') +option('avx512', type: 'feature', description: 'AVX512 optimization') +option('avx-128-fma', type: 'feature', description: '128-bit FMA AVX optimization') +option('prefer_fma', type: 'feature', description: 'Prefer fused-multiply-add instructions (preferred on some architectures)') +option('kcvi', type: 'feature', description: 'Knights Corner vector instructions optimization') +option('vsx', type: 'feature', description: 'IBM VSX optimization') +option('lsx', type: 'feature', description: 'LoongArch LSX optimization') +option('lasx', type: 'feature', description: 'LoongArch LASX optimization') +option('sve', type: 'feature', description: 'ARM SVE optimization') +option('neon', type: 'feature', description: 'ARM NEON optimization') +option('altivec', type: 'feature', description: 'Altivec optimization') +option('generic-simd128', type: 'feature', value: 'disabled', description: 'Generic (compiler-specific) 128-bit SIMD optimization') +option('generic-simd256', type: 'feature', value: 'disabled', description: 'Generic (compiler-specific) 256-bit SIMD optimization') +option('precision', type: 'combo', choices: ['single', 'double', 'long-double', 'quad'], value: 'double', description: 'floating-point precision') +option('generate_codelets', type: 'feature', description: 'Generate codelets from scratch (not recommended for users)') +option('docs', type: 'feature', description: 'Rebuild documentation') +option('random_estimator', type: 'boolean', value: false, description: 'Pseudo-random estimate planning (for debugging)') +option('fortran', type: 'boolean', description: 'Fortran-callable wrappers') +option('slow_timer', type: 'boolean', value: false, description: 'Use low-precision timers (SLOW)') +option('mips_zbus_timer', type: 'feature', value: 'disabled', description: 'Use MIPS ZBus cycle counter (check hardware support!)') +option('alloca', type: 'boolean') +option('our_malloc', type: 'feature', description: 'Use our aligned malloc (helpful for Windows)') +option('incoming_stack_boundary', type: 'integer', min: 0, value: 0, description: 'Assume the stack is aligned to (1< is a hard-coded complex-to-real FFT of size (base cases + # of real-output FFT recursion) + 'r2cb': { + 'generator': gen_r2cb, + 'include': 'r2cb.h', + 'extra_args': [], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 64, 128, + 20, 25, + # 30, 40, 50, + ], + }, + ########################################################################### + # hb_ is a "twiddle" FFT of size , implementing a radix-r DIF + # step for a real-output FFT. Every hb codelet must have a + # corresponding r2cbIII codelet (see below)! + 'hb': { + 'generator': gen_hc2hc, + 'include': 'hb.h', + 'extra_args': [ + '-dif' + ], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 16, 32, 64, + 20, 25, + # 30, 40, 50, + ], + }, + # like hb, but generates part of its trig table on the fly (good for large n) + 'hb2': { + 'generator': gen_hc2hc, + 'include': 'hb.h', + 'extra_args': [ + '-dif', + '-twiddle-log3', + '-precompute-twiddles' + ], + 'ns': [ + 4, 8, 16, 32, + 5, 20, 25 + ], + }, + # an r2cb transform where the output is shifted by half a sample (input + # is multiplied by a phase). This is needed as part of the DIF recursion; + # every hb_ or hb2_ codelet should have a corresponding r2cbIII_ + 'r2cbIII': { + 'generator': gen_r2cb, + 'include': 'r2cbIII.h', + 'extra_args': [ + '-dft-III' + ], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 16, 32, 64, + 20, 25, + # 30, 40, 50, + ], + }, + ########################################################################### + # hc2cb_ is a "twiddle" FFT of size , implementing a radix-r DIF + # step for a real-input FFT with rdft2-style output. must be even. + 'hc2cb': { + 'generator': gen_hc2c, + 'include': 'hc2cb.h', + 'extra_args': [ + '-dif' + ], + 'ns': [ + 2, 4, 6, 8, 10, 12, 16, 32, + 20, + # 30, + ], + }, + 'hc2cbdft': { + 'generator': gen_hc2cdft, + 'include': 'hc2cb.h', + 'extra_args': [ + '-dif' + ], + 'ns': [ + 2, 4, 6, 8, 10, 12, 16, 32, + 20, + # 30, + ], + }, + # like hc2cb, but generates part of its trig table on the fly (good + # for large n) + 'hc2cb2': { + 'generator': gen_hc2c, + 'include': 'hc2cb.h', + 'extra_args': [ + '-dif', + '-twiddle-log3', + '-precompute-twiddles' + ], + 'ns': [ + 4, 8, 16, 32, + 20, + # 30, + ], + }, + 'hc2cbdft2': { + 'generator': gen_hc2cdft, + 'include': 'hc2cb.h', + 'extra_args': [ + '-dif' + ], + 'ns': [ + 4, 8, 16, 32, + 20, + # 30, + ], + }, +} + +flags_common = [ + '-compact', + '-variables', '4', + '-pipeline-latency', '4', + '-sign', '1', +] +solvtab_name = 'X(solvtab_rdft_r2cb)' + +incdir = fs.relative_to( + meson.current_source_dir() / '..', + meson.project_source_root(), +) + +codelets = [] +foreach type, params : genfft_params + foreach n : params['ns'] + codelet = f'@type@_@n@' + codelets += codelet + outname = codelet + '.c' + + if generate_codelets + tgt = custom_target( + outname, + output: outname, + capture: true, + command: [ + gen_codelet, + codelet_prelude, + params['generator'], + flags_common, + params['extra_args'], + '-include', incdir / params['include'], + '-n', f'@n@', + '-name', codelet, + ], + ) + fftw_srcset.add(tgt) + fftw_extra_srcset.add(tgt) + else + fftw_srcset.add(files(outname)) + endif + endforeach +endforeach + +if generate_codelets + codlist = configuration_data( + {'solvtab_name': solvtab_name, 'extra_includes': ''}, + ) + + decls = [] + foreach codelet : codelets + decls += f'extern void X(codelet_@codelet@)(planner *);' + endforeach + codlist.set('decls', '\n'.join(decls)) + + solvtab_entries = [] + foreach codelet : codelets + solvtab_entries += f' SOLVTAB(X(codelet_@codelet@)),' + endforeach + codlist.set('solvtab_entries', '\n'.join(solvtab_entries)) + + cfgf = configure_file( + configuration: codlist, + input: support_dir / 'codlist.c.in', + output: 'codlist.c', + ) + fftw_srcset.add(cfgf) + fftw_extra_srcset.add(cfgf) +else + fftw_srcset.add(files('codlist.c')) +endif diff --git a/rdft/scalar/r2cf/meson.build b/rdft/scalar/r2cf/meson.build new file mode 100644 index 000000000..4d94cc8e6 --- /dev/null +++ b/rdft/scalar/r2cf/meson.build @@ -0,0 +1,200 @@ +# This file specifies a set of codelets, efficient transforms +# of small sizes, that are used as building blocks (kernels) by FFTW +# to build up large transforms, as well as the options for generating +# and compiling them. + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +genfft_params = { + ########################################################################### + # r2cf_ is a hard-coded real-to-complex FFT of size (base cases + # of real-input FFT recursion) + 'r2cf': { + 'generator': gen_r2cf, + 'include': 'r2cf.h', + 'extra_args': [], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 32, 64, 128, + 20, 25, + # 30, 40, 50, + ], + }, + ########################################################################### + # hf_ is a "twiddle" FFT of size , implementing a radix-r DIT + # step for a real-input FFT. Every hf codelet must have a + # corresponding r2cfII codelet (see below)! + 'hf': { + 'generator': gen_hc2hc, + 'include': 'hf.h', + 'extra_args': [ + '-dit' + ], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 16, 32, 64, + 20, 25, + # 30, 40, 50, + ], + }, + # like hf, but generates part of its trig table on the fly (good for large n) + 'hf2': { + 'generator': gen_hc2hc, + 'include': 'hf.h', + 'extra_args': [ + '-dit', + '-twiddle-log3', + '-precompute-twiddles' + ], + 'ns': [ + 4, 8, 16, 32, + 5, 20, 25 + ], + }, + # an r2cf transform where the input is shifted by half a sample (output + # is multiplied by a phase). This is needed as part of the DIT recursion; + # every hf_ or hf2_ codelet should have a corresponding r2cfII_ + 'r2cfII': { + 'generator': gen_r2cf, + 'include': 'r2cfII.h', + 'extra_args': [ + '-dft-II' + ], + 'ns': [ + 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 16, 32, 64, + 20, 25, + # 30, 40, 50, + ], + }, + ########################################################################### + # hc2cf_ is a "twiddle" FFT of size , implementing a radix-r DIT + # step for a real-input FFT with rdft2-style output. must be even. + 'hc2cf': { + 'generator': gen_hc2c, + 'include': 'hc2cf.h', + 'extra_args': [ + '-dit' + ], + 'ns': [ + 2, 4, 6, 8, 10, 12, 16, 32, + 20, + # 30, + ], + }, + 'hc2cfdft': { + 'generator': gen_hc2cdft, + 'include': 'hc2cf.h', + 'extra_args': [ + '-dit' + ], + 'ns': [ + 2, 4, 6, 8, 10, 12, 16, 32, + 20, + # 30, + ], + }, + # like hc2cf, but generates part of its trig table on the fly (good + # for large n) + 'hc2cf2': { + 'generator': gen_hc2c, + 'include': 'hc2cf.h', + 'extra_args': [ + '-dit', + '-twiddle-log3', + '-precompute-twiddles' + ], + 'ns': [ + 4, 8, 16, 32, + 20, + # 30, + ], + }, + 'hc2cfdft2': { + 'generator': gen_hc2cdft, + 'include': 'hc2cf.h', + 'extra_args': [ + '-dit', + '-twiddle-log3', + '-precompute-twiddles' + ], + 'ns': [ + 4, 8, 16, 32, + 20, + # 30, + ], + }, +} + +flags_common = [ + '-compact', + '-variables', '4', + '-pipeline-latency', '4' +] +solvtab_name = 'X(solvtab_rdft_r2cf)' + +incdir = fs.relative_to( + meson.current_source_dir() / '..', + meson.project_source_root(), +) + +codelets = [] +foreach type, params : genfft_params + foreach n : params['ns'] + codelet = f'@type@_@n@' + codelets += codelet + outname = codelet + '.c' + + if generate_codelets + tgt = custom_target( + outname, + output: outname, + capture: true, + command: [ + gen_codelet, + codelet_prelude, + params['generator'], + flags_common, + params['extra_args'], + '-include', incdir / params['include'], + '-n', f'@n@', + '-name', codelet, + ], + ) + fftw_srcset.add(tgt) + fftw_extra_srcset.add(tgt) + else + fftw_srcset.add(files(outname)) + endif + endforeach +endforeach + +if generate_codelets + codlist = configuration_data( + {'solvtab_name': solvtab_name, 'extra_includes': ''}, + ) + + decls = [] + foreach codelet : codelets + decls += f'extern void X(codelet_@codelet@)(planner *);' + endforeach + codlist.set('decls', '\n'.join(decls)) + + solvtab_entries = [] + foreach codelet : codelets + solvtab_entries += f' SOLVTAB(X(codelet_@codelet@)),' + endforeach + codlist.set('solvtab_entries', '\n'.join(solvtab_entries)) + + cfgf = configure_file( + configuration: codlist, + input: support_dir / 'codlist.c.in', + output: 'codlist.c', + ) + fftw_srcset.add(cfgf) + fftw_extra_srcset.add(cfgf) +else + fftw_srcset.add(files('codlist.c')) +endif diff --git a/rdft/scalar/r2r/meson.build b/rdft/scalar/r2r/meson.build new file mode 100644 index 000000000..93f8edeec --- /dev/null +++ b/rdft/scalar/r2r/meson.build @@ -0,0 +1,185 @@ +# This file specifies a set of codelets, efficient transforms +# of small sizes, that are used as building blocks (kernels) by FFTW +# to build up large transforms, as well as the options for generating +# and compiling them. + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +genfft_params = { + ########################################################################### + # The following lines specify the REDFT/RODFT/DHT sizes for which to generate + # specialized codelets. Currently, only REDFT01/10 of size 8 (used in JPEG). + + # e_ is a hard-coded REDFT FFT (DCT) of size + 'e00': { + 'generator': gen_r2r, + 'include': 'r2r.h', + 'extra_args': [ + '-redft00' + ], + 'ns': [ + # 2, 3, 4, 5, 6, 7, 8, + ], + }, + 'e01': { + 'generator': gen_r2r, + 'include': 'r2r.h', + 'extra_args': [ + '-redft01' + ], + 'ns': [ + 8, + # 2, 3, 4, 5, 6, 7, + ], + }, + 'e10': { + 'generator': gen_r2r, + 'include': 'r2r.h', + 'extra_args': [ + '-redft10' + ], + 'ns': [ + 8, + # 2, 3, 4, 5, 6, 7, + ], + }, + 'e11': { + 'generator': gen_r2r, + 'include': 'r2r.h', + 'extra_args': [ + '-redft11' + ], + 'ns': [ + # 2, 3, 4, 5, 6, 7, 8, + ], + }, + # o_ is a hard-coded RODFT FFT (DST) of size + 'o00': { + 'generator': gen_r2r, + 'include': 'r2r.h', + 'extra_args': [ + '-rodft00' + ], + 'ns': [ + # 2, 3, 4, 5, 6, 7, 8, + ], + }, + 'o01': { + 'generator': gen_r2r, + 'include': 'r2r.h', + 'extra_args': [ + '-rodft01' + ], + 'ns': [ + # 2, 3, 4, 5, 6, 7, 8, + ], + }, + 'o10': { + 'generator': gen_r2r, + 'include': 'r2r.h', + 'extra_args': [ + '-rodft10' + ], + 'ns': [ + # 2, 3, 4, 5, 6, 7, 8, + ], + }, + 'o11': { + 'generator': gen_r2r, + 'include': 'r2r.h', + 'extra_args': [ + '-rodft11' + ], + 'ns': [ + # 2, 3, 4, 5, 6, 7, 8, + ], + }, + # dht_ is a hard-coded DHT of size + 'dht': { + 'generator': gen_r2r, + 'include': 'r2r.h', + 'extra_args': [ + '-dht', + '-sign', '1' + ], + 'ns': [ + # 2, 3, 4, 5, 6, 7, 8, + ], + }, +} + +flags_common = [ + '-compact', + '-variables', '4', + '-pipeline-latency', '4' +] +solvtab_name = 'X(solvtab_rdft_r2r)' + +incdir = fs.relative_to( + meson.current_source_dir() / '..', + meson.project_source_root(), +) + +codelets = [] +foreach type, params : genfft_params + foreach n : params['ns'] + codelet = f'@type@_@n@' + codelets += codelet + outname = codelet + '.c' + + if generate_codelets + tgt = custom_target( + outname, + output: outname, + capture: true, + command: [ + gen_codelet, + codelet_prelude, + params['generator'], + flags_common, + params['extra_args'], + '-include', incdir / params['include'], + '-n', f'@n@', + '-name', codelet, + ], + ) + fftw_srcset.add(tgt) + fftw_extra_srcset.add(tgt) + else + fftw_srcset.add(files(outname)) + endif + endforeach +endforeach + +if generate_codelets + codlist = configuration_data( + {'solvtab_name': solvtab_name, 'extra_includes': ''}, + ) + + decls = [] + foreach codelet : codelets + decls += f'extern void X(codelet_@codelet@)(planner *);' + endforeach + codlist.set('decls', '\n'.join(decls)) + + solvtab_entries = [] + foreach codelet : codelets + solvtab_entries += f' SOLVTAB(X(codelet_@codelet@)),' + endforeach + codlist.set('solvtab_entries', '\n'.join(solvtab_entries)) + + cfgf = configure_file( + configuration: codlist, + input: support_dir / 'codlist.c.in', + output: 'codlist.c', + ) + fftw_srcset.add(cfgf) + fftw_extra_srcset.add(cfgf) +else + fftw_srcset.add(files('codlist.c')) +endif diff --git a/rdft/simd/altivec/meson.build b/rdft/simd/altivec/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/altivec/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/avx-128-fma/meson.build b/rdft/simd/avx-128-fma/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/avx-128-fma/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/avx/meson.build b/rdft/simd/avx/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/avx/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/avx2-128/meson.build b/rdft/simd/avx2-128/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/avx2-128/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/avx2/meson.build b/rdft/simd/avx2/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/avx2/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/avx512/meson.build b/rdft/simd/avx512/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/avx512/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/common/meson.build b/rdft/simd/common/meson.build new file mode 100644 index 000000000..f90ee6c34 --- /dev/null +++ b/rdft/simd/common/meson.build @@ -0,0 +1,78 @@ +flags_common = [ + '-compact', + '-variables', '32', + '-pipeline-latency', '8', + '-trivial-stores', + '-no-generate-bytw', + '-simd', +] + +incdir = fs.relative_to( + meson.current_source_dir() / '..', + meson.project_source_root(), +) + +simd_common_codelets = [] +codelets = [] +foreach type, params : genfft_params_simd + foreach n : params['ns'] + codelet = f'@type@_@n@' + codelets += codelet + outname = codelet + '.c' + + if generate_codelets + tgt = custom_target( + outname, + output: outname, + capture: true, + command: [ + gen_codelet, + codelet_prelude, + params['generator'], + flags_common, + params['extra_args'], + '-include', incdir / params['include'], + '-n', f'@n@', + '-name', codelet, + ], + ) + simd_common_codelets += tgt + else + simd_common_codelets += files(outname) + endif + endforeach +endforeach + +if generate_codelets + codlist = configuration_data( + { + 'solvtab_name': 'XSIMD(solvtab_rdft)', + 'extra_includes': '#include SIMD_HEADER', + }, + ) + + decls = [] + foreach codelet : codelets + decls += f'extern void XSIMD(codelet_@codelet@)(planner *);' + endforeach + codlist.set('decls', '\n'.join(decls)) + + solvtab_entries = [] + foreach codelet : codelets + solvtab_entries += f' SOLVTAB(XSIMD(codelet_@codelet@)),' + endforeach + codlist.set('solvtab_entries', '\n'.join(solvtab_entries)) + + simd_common_codelets += \ + configure_file( + configuration: codlist, + input: support_dir / 'codlist.c.in', + output: 'codlist.c', + ) + simd_common_codelets += \ + configure_file(input: 'genus.c', output: 'genus.c', copy: true) +else + simd_common_codelets += files('codlist.c', 'genus.c') +endif + +fftw_extra_srcset.add(simd_common_codelets) diff --git a/rdft/simd/generic-simd128/meson.build b/rdft/simd/generic-simd128/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/generic-simd128/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/generic-simd256/meson.build b/rdft/simd/generic-simd256/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/generic-simd256/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/kcvi/meson.build b/rdft/simd/kcvi/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/kcvi/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/lasx/meson.build b/rdft/simd/lasx/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/lasx/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/lsx/meson.build b/rdft/simd/lsx/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/lsx/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/meson.build b/rdft/simd/meson.build new file mode 100644 index 000000000..ff08bbab1 --- /dev/null +++ b/rdft/simd/meson.build @@ -0,0 +1,128 @@ +# This file contains a standard list of RDFT SIMD codelets. It is +# used by common/meson.build to generate the C files with the actual +# codelets in them. It is used by {sse,sse2,...}/meson.build to +# generate and compile stub files that include common/*.c + +# You can customize FFTW for special needs, e.g. to handle certain +# sizes more efficiently, by adding new codelets to the lists of those +# included by default. If you change the list of codelets, any new +# ones you added will be automatically generated when you run the +# bootstrap script (see "Generating your own code" in the FFTW +# manual). + +genfft_params_simd = { + 'hc2cfdftv': { + 'generator': gen_hc2cdft_c, + 'include': 'hc2cfv.h', + 'extra_args': [ + '-dit' + ], + 'ns': [ + 2, 4, 6, 8, 10, 12, 16, 32, + 20 + ], + }, + 'hc2cbdftv': { + 'generator': gen_hc2cdft_c, + 'include': 'hc2cbv.h', + 'extra_args': [ + '-dif', + '-sign', '1' + ], + 'ns': [ + 2, 4, 6, 8, 10, 12, 16, 32, + 20 + ], + }, +} + +subdir('common') + +subdirs = { + 'HAVE_ALTIVEC': { + 'dirs': {'altivec' : 'simd-support/simd-altivec.h'}, + 'flags': simd_flags_altivec, + }, + 'HAVE_SSE2': { + 'dirs': {'sse2' : 'simd-support/simd-sse2.h'}, + 'flags': simd_flags_sse2, + }, + 'HAVE_AVX': { + 'dirs': {'avx' : 'simd-support/simd-avx.h'}, + 'flags': simd_flags_avx, + }, + 'HAVE_AVX_128_FMA': { + 'dirs': {'avx-128-fma' : 'simd-support/simd-avx-128-fma.h'}, + 'flags': simd_flags_avx_128_fma, + }, + 'HAVE_AVX2': { + 'dirs': { + 'avx2' : 'simd-support/simd-avx2.h', + 'avx2-128' : 'simd-support/simd-avx2-128.h', + }, + 'flags': simd_flags_avx2, + }, + 'HAVE_AVX512': { + 'dirs': {'avx512' : 'simd-support/simd-avx512.h'}, + 'flags': simd_flags_avx512, + }, + 'HAVE_KCVI': { + 'dirs': {'kcvi' : 'simd-support/simd-kcvi.h'}, + 'flags': simd_flags_kcvi, + }, + 'HAVE_GENERIC_SIMD128': { + 'dirs': {'generic-simd128' : 'simd-support/simd-generic128.h'}, + 'flags': simd_flags_generic_simd128, + }, + 'HAVE_GENERIC_SIMD256': { + 'dirs': {'generic-simd256' : 'simd-support/simd-generic256.h'}, + 'flags': simd_flags_generic_simd256, + }, + 'HAVE_LASX': { + 'dirs': {'lasx' : 'simd-support/simd-lasx.h'}, + 'flags': simd_flags_lasx, + }, + 'HAVE_LSX': { + 'dirs': {'lsx' : 'simd-support/simd-lsx.h'}, + 'flags': simd_flags_lsx, + }, + 'HAVE_NEON': { + 'dirs': {'neon' : 'simd-support/simd-neon.h'}, + 'flags': simd_flags_neon, + }, + 'HAVE_SVE': { + 'dirs': { + 'sve128' : 'simd-support/simd-maskedsve128.h', + 'sve256' : 'simd-support/simd-maskedsve256.h', + 'sve512' : 'simd-support/simd-maskedsve512.h', + 'sve1024' : 'simd-support/simd-maskedsve1024.h', + 'sve2048' : 'simd-support/simd-maskedsve2048.h', + }, + 'flags': simd_flags_sve, + }, + 'HAVE_VSX': { + 'dirs': {'vsx' : 'simd-support/simd-vsx.h'}, + 'flags': simd_flags_vsx, + }, +} + +foreach cond, simd : subdirs + simd_flags = simd['flags'] + foreach dir, header : simd['dirs'] + simd_header = header + simd_name = dir + subdir(dir) + + if conf.has(cond) + lib = static_library( + simd_name, + build_by_default: false, + c_args: [simd_flags, f'@cc_def@SIMD_HEADER="@simd_header@"'], + include_directories: fftw_root_inc, + sources: simd_common_codelets, + ) + + fftw_srcset.add(declare_dependency(link_with: lib)) + endif + endforeach +endforeach diff --git a/rdft/simd/neon/meson.build b/rdft/simd/neon/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/neon/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/sse2/meson.build b/rdft/simd/sse2/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/sse2/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/sve1024/meson.build b/rdft/simd/sve1024/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/sve1024/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/sve128/meson.build b/rdft/simd/sve128/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/sve128/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/sve2048/meson.build b/rdft/simd/sve2048/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/sve2048/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/sve256/meson.build b/rdft/simd/sve256/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/sve256/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/sve512/meson.build b/rdft/simd/sve512/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/sve512/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/rdft/simd/vsx/meson.build b/rdft/simd/vsx/meson.build new file mode 100644 index 000000000..bd1a37d04 --- /dev/null +++ b/rdft/simd/vsx/meson.build @@ -0,0 +1,10 @@ +if generate_codelets + foreach ccodelet: simd_common_codelets + tgt = custom_target(input: ccodelet, output: '@PLAINNAME@', + capture: true, + command: [gen_codelet_simd, simd_header, '@PLAINNAME@']) + + # We really only need these for dist tarballs. + fftw_extra_srcset.add(tgt) + endforeach +endif diff --git a/reodft/meson.build b/reodft/meson.build new file mode 100644 index 000000000..9cabff5c7 --- /dev/null +++ b/reodft/meson.build @@ -0,0 +1,14 @@ +fftw_srcset.add( + files( + 'conf.c', + 'redft00e-r2hc-pad.c', + 'redft00e-r2hc.c', + 'reodft00e-splitradix.c', + 'reodft010e-r2hc.c', + 'reodft11e-r2hc-odd.c', + 'reodft11e-r2hc.c', + 'reodft11e-radix2.c', + 'rodft00e-r2hc-pad.c', + 'rodft00e-r2hc.c', + ), +) diff --git a/simd-support/meson.build b/simd-support/meson.build new file mode 100644 index 000000000..56727c24b --- /dev/null +++ b/simd-support/meson.build @@ -0,0 +1,16 @@ +fftw_srcset.add( + files( + 'altivec.c', + 'avx-128-fma.c', + 'avx.c', + 'avx2.c', + 'avx512.c', + 'kcvi.c', + 'lasx.c', + 'lsx.c', + 'neon.c', + 'sse2.c', + 'taint.c', + 'vsx.c', + ), +) diff --git a/support/FFTW3Config.cmake.in b/support/FFTW3Config.cmake.in new file mode 100644 index 000000000..57106fedd --- /dev/null +++ b/support/FFTW3Config.cmake.in @@ -0,0 +1,7 @@ +set(FFTW3_VERSION @version@) + +@PACKAGE_INIT@ + +set_and_check(FFTW3@prec_suffix@_LIBRARIES @lib_name@) +set_and_check(FFTW3@prec_suffix@_LIBRARY_DIRS @libdir@) +set_and_check(FFTW3@prec_suffix@_INCLUDE_DIRS @includedir@) diff --git a/support/codlist.c.in b/support/codlist.c.in new file mode 100644 index 000000000..e2986d7d3 --- /dev/null +++ b/support/codlist.c.in @@ -0,0 +1,11 @@ +#include "kernel/ifftw.h" +@extra_includes@ + +@decls@ + + +extern const solvtab @solvtab_name@; +const solvtab @solvtab_name@ = { +@solvtab_entries@ + SOLVTAB_END +}; diff --git a/support/gen_codelet.sh b/support/gen_codelet.sh new file mode 100755 index 000000000..bc1b0c461 --- /dev/null +++ b/support/gen_codelet.sh @@ -0,0 +1,13 @@ +#!/bin/sh +set -e + +copyright_f="$1" +shift +twovers_f="$1" +shift +indent_f="$1" +shift +prelude_f="$1" +shift + +(cat "$copyright_f" "$prelude_f"; $twovers_f $*) | sed -e s/@DATE@/"`date`"/ | $indent_f -kr -cs -i5 -l800 -fca -nfc1 -sc -sob -cli4 -TR -Tplanner -TV diff --git a/support/gen_codelet_simd.sh b/support/gen_codelet_simd.sh new file mode 100755 index 000000000..991e723e0 --- /dev/null +++ b/support/gen_codelet_simd.sh @@ -0,0 +1,12 @@ +#!/bin/sh +set -e + +header="$1" +shift +codelet="$1" + +cat < "$MESON_PROJECT_DIST_ROOT/ChangeLog" + +meson compile dist_src +for f in @files@; do + cp -rp "$MESON_PROJECT_BUILD_ROOT/$f" "$MESON_PROJECT_DIST_ROOT/$f" +done + +cd "$MESON_PROJECT_DIST_ROOT" +if git -C "$MESON_PROJECT_SOURCE_ROOT" describe --tags --exact-match; then + pat_major='(FFTW_MAJOR_VERSION\s*,\s*)[^\)]*' + pat_minor='(FFTW_MINOR_VERSION\s*,\s*)[^\)]*' + sed -i -E "s/$pat_major/\1$version_major/;s/$pat_minor/\1$version_minor/" configure.ac + + pat_shared='(SHARED_VERSION_INFO=")([^:]*):([^:]*):([^"]*)(")' + sed -i -E "s/$pat_shared/\1$version_current:\3:$version_age\5/" configure.ac + + sh bootstrap.sh + make distclean + find -name .deps -type d -delete +else + echo "***************************************************" + echo " WARNING: Current commit does not match any tag." + echo " Refusing to include Autotools build system files." + echo "***************************************************" + + find -name Makefile.* -type f -delete + rm configure.ac + rm fftw.pc.in + rm bootstrap.sh + rm mkdist.sh + rm commercialize.sh +fi diff --git a/tests/meson.build b/tests/meson.build new file mode 100644 index 000000000..2acfcffd9 --- /dev/null +++ b/tests/meson.build @@ -0,0 +1,182 @@ +bench_src = files('bench.c', 'fftw-bench.c', 'hook.c') +bench_libs = [fftw_lib, libbench2] +bench_deps = [fftw_src.dependencies()] +bench_defs = [] + +if threads_dep.found() + if not conf.get('COMBINED_THREADS') + bench_libs += fftw_threads_lib + bench_deps += threads_dep + endif +elif openmp_dep.found() + bench_libs += fftw_omp_lib + bench_deps += openmp_dep +endif + +bench_exe = executable( + 'bench', + bench_src, + build_by_default: false, + c_args: bench_defs, + link_with: bench_libs, + include_directories: fftw_root_inc, + dependencies: bench_deps, +) + +perl = find_program('perl', required: false) +summary('Tests', perl.found(), bool_yn: true, section: 'Build') + +if not perl.found() + subdir_done() +endif +check_pl = ['-w', meson.current_source_dir() / 'check.pl', '-v', bench_exe] + +test( + 'random, count=30', + perl, + suite: ['basic'], + timeout: 0, + is_parallel: true, + args: [check_pl, '-r', '-c=30'], +) +test( + 'random, count=30, nthreads=2', + perl, + suite: ['basic', 'threaded'], + timeout: 0, + is_parallel: false, + args: [check_pl, '-r', '-c=30', '--nthreads=2'], +) +test( + 'random, count=5, nthreads=2, threads_callback', + perl, + suite: ['basic', 'threaded'], + timeout: 0, + is_parallel: false, + args: [check_pl, '-r', '-c=5', '--nthreads=2', '--threads_callback'], +) + +test( + 'all, validate wisdom', + perl, + suite: ['big'], + timeout: 0, + is_parallel: true, + args: [check_pl, '-a', '--validate-wisdom'], +) +test( + 'all, validate wisdom, nthreads=2', + perl, + suite: ['big', 'threaded'], + timeout: 0, + is_parallel: false, + args: [check_pl, '-a', '--validate-wisdom', '--nthreads=2'], +) +test( + 'all, validate wisdom, nthreads=3', + perl, + suite: ['big', 'threaded'], + timeout: 0, + is_parallel: false, + args: [check_pl, '-a', '--validate-wisdom', '--nthreads=3'], +) +test( + 'all, validate wisdom, nthreads=10', + perl, + suite: ['big', 'threaded'], + timeout: 0, + is_parallel: false, + args: [check_pl, '-a', '--validate-wisdom', '--nthreads=10'], +) +test( + 'all, validate wisdom, nthreads=2, threads_callback', + perl, + suite: ['big', 'threaded'], + timeout: 0, + is_parallel: false, + args: [ + check_pl, + '-a', + '--validate-wisdom', + '--nthreads=2', + '--threads_callback', + ], +) + +test( + 'random, count=1', + perl, + suite: ['small'], + timeout: 0, + is_parallel: true, + args: [check_pl, '-r', '-c=1'], +) +test( + 'random, count=5, estimate', + perl, + suite: ['small'], + timeout: 0, + is_parallel: true, + args: [check_pl, '-r', '-c=5', '--estimate'], +) +test( + 'random, count=2, estimate, nthreads=2', + perl, + suite: ['small', 'threaded'], + timeout: 0, + is_parallel: false, + args: [check_pl, '-r', '-c=2', '--estimate', '--nthreads=2'], +) + +test( + 'all, patient, paranoid', + perl, + suite: ['paranoid'], + timeout: 0, + is_parallel: true, + args: [check_pl, '-a', '--patient', '--paranoid'], +) +foreach n : [10, 7, 3, 2] + test( + f'all, patient, paranoid, nthreads=@n@', + perl, + suite: ['paranoid', 'threaded'], + timeout: 0, + is_parallel: false, + args: [check_pl, '-a', '--patient', '--paranoid', '--nthreads=@n@'], + ) +endforeach + +test( + 'all, exhaustive, paranoid', + perl, + suite: ['exhaustive', 'paranoid'], + timeout: 0, + is_parallel: true, + args: [check_pl, '-a', '--exhaustive', '--paranoid'], +) +foreach n : [10, 7, 3, 2] + test( + f'all, exhaustive, paranoid, nthreads=@n@', + perl, + suite: ['exhaustive', 'paranoid', 'threaded'], + timeout: 0, + is_parallel: false, + args: [check_pl, '-a', '--exhaustive', '--paranoid', '--nthreads=@n@'], + ) +endforeach +test( + 'all, exhaustive, paranoid, nthreads=2, threads_callback', + perl, + suite: ['exhaustive', 'paranoid', 'threaded'], + timeout: 0, + is_parallel: false, + args: [ + check_pl, + '-a', + '--exhaustive', + '--paranoid', + '--nthreads=2', + '--threads_callback', + ], +) diff --git a/threads/meson.build b/threads/meson.build new file mode 100644 index 000000000..94a541bba --- /dev/null +++ b/threads/meson.build @@ -0,0 +1,40 @@ +src_par = files( + 'api.c', + 'conf.c', + 'ct.c', + 'dft-vrank-geq1.c', + 'f77api.c', + 'hc2hc.c', + 'rdft-vrank-geq1.c', + 'vrank-geq1-rdft2.c', +) + +threads_dep = dependency('threads', required: get_option('threads')) +conf.set('HAVE_THREADS', threads_dep.found()) +conf.set( + 'USING_POSIX_THREADS', + threads_dep.found() and cc.has_header('pthread.h'), +) + +openmp_dep = dependency('openmp', required: get_option('openmp')) +conf.set('HAVE_OPENMP', openmp_dep.found()) + +combined_threads = get_option('combined_threads').require( + threads_dep.found(), + error_message: 'combined_threads: requires threads', +) +conf.set('COMBINED_THREADS', combined_threads.allowed()) + +fftw_threads_srcset.add([threads_dep, src_par, files('threads.c')]) +fftw_omp_srcset.add([openmp_dep, src_par, files('openmp.c')]) + +fftw_srcset.add_all(when: ['COMBINED_THREADS'], if_true: fftw_threads_srcset) + +summary( + { + 'threads': threads_dep.found(), + 'OpenMP': openmp_dep.found(), + }, + bool_yn: true, + section: 'Library' +) diff --git a/tools/meson.build b/tools/meson.build new file mode 100644 index 000000000..b3d30c3d6 --- /dev/null +++ b/tools/meson.build @@ -0,0 +1,48 @@ +wisdom_libs = [fftw_lib, libbench2] +wisdom_deps = [fftw_src.dependencies()] +wisdom_src = files('fftw-wisdom.c') + +if threads_dep.found() + if not conf.get('COMBINED_THREADS') + wisdom_libs += fftw_threads_lib + wisdom_deps += threads_dep + endif +elif openmp_dep.found() + wisdom_libs += fftw_omp_lib + wisdom_deps += openmp_dep +endif + + +executable( + f'fftw@prec_suffix@-wisdom', + wisdom_src, + link_with: wisdom_libs, + include_directories: '..', + objects: bench_exe.extract_objects('bench.c', 'fftw-bench.c'), + dependencies: wisdom_deps, + install: true, +) + +cfgv = { + 'PACKAGE': meson.project_name(), + 'VERSION': meson.project_version(), + 'PREC_SUFFIX': prec_suffix, +} + +configure_file( + configuration: cfgv, + input: 'fftw-wisdom-to-conf.in', + output: '@BASENAME@', + install: true, + install_dir: get_option('bindir') +) + +configure_file( + configuration: cfgv, + input: 'fftw_wisdom.1.in', + output: '@BASENAME@', + install: true, + install_dir: get_option('mandir') / 'man1' +) + +install_man('fftw-wisdom-to-conf.1')