From b7b9267be7fcf4b6e790f6dbdee8e16915695ea9 Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 29 Jun 2025 22:52:17 +0000 Subject: [PATCH 01/32] Remove incorrect comment --- pybind_interface/GetCUDAARCHS.cmake | 3 --- 1 file changed, 3 deletions(-) diff --git a/pybind_interface/GetCUDAARCHS.cmake b/pybind_interface/GetCUDAARCHS.cmake index aafa0ad33..4e53f7689 100644 --- a/pybind_interface/GetCUDAARCHS.cmake +++ b/pybind_interface/GetCUDAARCHS.cmake @@ -12,9 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - -# Check whether the user has provided info about the GPU(s) installed -# on their system. If not, try to determine what it is automaticaly. if(CMAKE_CUDA_ARCHITECTURES) # CMake 3.18+ sets this variable from $CUDAARCHS automatically. message(STATUS "qsim: using CUDA architectures " From 4658564fc6d950fd8ea3b1aa5b68ffba0e12252b Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 29 Jun 2025 22:58:49 +0000 Subject: [PATCH 02/32] Detect AVX & SSE and only build corresponding parts Previously, the CMake configuration was such that it would always build the AVX2, AVX512 & SSE2 Pybind11 modules without testing whether the current system supported those options. The changes here use CMake features to detect whether the architectural features are in fact available, and only attempt to build the appropriate modules. In addition, there is a minor bit of refactoring to group some of the code more logically, and to add some more informational message printouts. --- CMakeLists.txt | 56 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9bf3fef4..cfc8572f0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,13 +15,19 @@ cmake_minimum_required(VERSION 3.31) project(qsim LANGUAGES CXX) -include(CheckLanguage) -check_language(CUDA) - # This text is prepended to messages printed by this config file so it's # easier to figure out what came from where in the logs. set(MSG_PREFIX "[qsim cmake configuration]") + +# ~~~~~ Analyze the host's hardware & software features ~~~~~ + +include(CheckLanguage) +include(CheckCXXCompilerFlag) +include(CheckCXXSourceRuns) + +check_language(CUDA) + # CMake normally sets CMAKE_APPLE_SILICON_PROCESSOR on Apple Silicon; however, # it doesn't happen when running builds using cibuildwheel, even on Apple # Silicon. We have had better luck checking and seting it ourselves. @@ -54,6 +60,23 @@ endif() find_package(OpenMP REQUIRED) +cmake_host_system_information(RESULT HAVE_SSE2 QUERY HAS_SSE2) + +if(WIN32) + check_cxx_compiler_flag("/arch:AVX2" HAVE_AVX512) + check_cxx_compiler_flag("/arch:AVX512" HAVE_AVX512) +else() + check_cxx_compiler_flag("-mavx2" HAVE_AVX2) + check_cxx_compiler_flag("-mavx512f" HAVE_AVX512) +endif() + + +# ~~~~~ Configure the build ~~~~~ + +# The following settings mirror what is in our hand-written Makefiles. +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + # Always build the basic part. add_subdirectory(pybind_interface/basic) add_subdirectory(pybind_interface/decide) @@ -69,16 +92,25 @@ if(NOT CMAKE_APPLE_SILICON_PROCESSOR) add_subdirectory(pybind_interface/hip) endif() - add_subdirectory(pybind_interface/sse) - add_subdirectory(pybind_interface/avx512) - add_subdirectory(pybind_interface/avx2) + if(HAVE_SSE2) + add_subdirectory(pybind_interface/sse) + endif() + + if(HAVE_AVX2) + add_subdirectory(pybind_interface/avx2) + endif() + + if(HAVE_AVX512) + add_subdirectory(pybind_interface/avx512) + endif() endif() -# Additional miscellanous settings. -# The following settings mirror what is in our hand-written Makefiles. -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_POSITION_INDEPENDENT_CODE ON) -# Print additional useful info. -message(STATUS "${MSG_PREFIX} OpenMP found = ${OPENMP_FOUND}") +# ~~~~~ Print misc. info ~~~~~ + +message(STATUS "${MSG_PREFIX} host has SSE2 = ${HAVE_SSE2}") +message(STATUS "${MSG_PREFIX} host has AVX2 = ${HAVE_AVX2}") +message(STATUS "${MSG_PREFIX} host has AVX512 = ${HAVE_AVX512}") + message(STATUS "${MSG_PREFIX} shell $PATH = $ENV{PATH}") +message(STATUS "${MSG_PREFIX} shell $CUQUANTUM_ROOT = $ENV{CUQUANTUM_ROOT}") From e2c35ec2e293a1ec902317c210613794d87d9396 Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 29 Jun 2025 23:01:03 +0000 Subject: [PATCH 03/32] Consolidate common flags into the top-level CMakeLists.txt Each of 8 or so `pybind_interface/` subdirectories had `CMakeLists.txt` files that contained the same text for setting certain compilation flags. This commit removes the duplication in favor of putting the settings into the top-level CMake file. --- CMakeLists.txt | 22 ++++++++++++++++++++++ pybind_interface/avx2/CMakeLists.txt | 19 ++----------------- pybind_interface/avx512/CMakeLists.txt | 19 ++----------------- pybind_interface/basic/CMakeLists.txt | 21 --------------------- pybind_interface/cuda/CMakeLists.txt | 21 --------------------- pybind_interface/custatevec/CMakeLists.txt | 21 --------------------- pybind_interface/decide/CMakeLists.txt | 21 --------------------- pybind_interface/hip/CMakeLists.txt | 6 ------ pybind_interface/sse/CMakeLists.txt | 21 ++------------------- 9 files changed, 28 insertions(+), 143 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cfc8572f0..d3a827213 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,6 +77,13 @@ endif() set(CMAKE_CXX_STANDARD 17) set(CMAKE_POSITION_INDEPENDENT_CODE ON) +# Options propagated to all sub-cmakefiles. +if(WIN32) + add_compile_options(/O2 /std:c++17 /openmp) +else() + add_compile_options(-O3 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=1 -flto=auto) +endif() + # Always build the basic part. add_subdirectory(pybind_interface/basic) add_subdirectory(pybind_interface/decide) @@ -105,6 +112,21 @@ if(NOT CMAKE_APPLE_SILICON_PROCESSOR) endif() endif() +if(APPLE) + include_directories( + "/usr/local/include" + "/usr/local/opt/llvm/include" + "/opt/homebrew/include" + "/opt/homebrew/opt/llvm@19/include" + ) + link_directories( + "/usr/local/lib" + "/usr/local/opt/llvm/lib" + "/opt/homebrew/lib" + "/opt/homebrew/opt/llvm@19/lib" + ) +endif() + # ~~~~~ Print misc. info ~~~~~ diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index 50bee22c6..4e59b3064 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -16,26 +16,11 @@ cmake_minimum_required(VERSION 3.31) project(qsim) IF (WIN32) - set(CMAKE_CXX_FLAGS "/arch:AVX2 /O2 /openmp") + set(CMAKE_CXX_FLAGS "/arch:AVX2") ELSE() - set(CMAKE_CXX_FLAGS "-mavx2 -mfma -O3") + set(CMAKE_CXX_FLAGS "-mavx2 -mfma") ENDIF() -if(APPLE) - include_directories( - "/usr/local/include" - "/usr/local/opt/llvm/include" - "/opt/homebrew/include" - "/opt/homebrew/opt/llvm@19/include" - ) - link_directories( - "/usr/local/lib" - "/usr/local/opt/llvm/lib" - "/opt/homebrew/lib" - "/opt/homebrew/opt/llvm@19/lib" - ) -endif() - INCLUDE(../GetPybind11.cmake) pybind11_add_module(qsim_avx2 pybind_main_avx2.cpp) diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index 7fa0bf18f..8cd04676c 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -16,26 +16,11 @@ cmake_minimum_required(VERSION 3.31) project(qsim) IF (WIN32) - set(CMAKE_CXX_FLAGS "/arch:AVX512 /O2 /openmp") + set(CMAKE_CXX_FLAGS "/arch:AVX512") ELSE() - set(CMAKE_CXX_FLAGS "-mavx512f -mbmi2 -O3") + set(CMAKE_CXX_FLAGS "-mavx512f -mbmi2") ENDIF() -if(APPLE) - include_directories( - "/usr/local/include" - "/usr/local/opt/llvm/include" - "/opt/homebrew/include" - "/opt/homebrew/opt/llvm@19/include" - ) - link_directories( - "/usr/local/lib" - "/usr/local/opt/llvm/lib" - "/opt/homebrew/lib" - "/opt/homebrew/opt/llvm@19/lib" - ) -endif() - INCLUDE(../GetPybind11.cmake) pybind11_add_module(qsim_avx512 pybind_main_avx512.cpp) diff --git a/pybind_interface/basic/CMakeLists.txt b/pybind_interface/basic/CMakeLists.txt index ace30190d..ef8b3a75c 100644 --- a/pybind_interface/basic/CMakeLists.txt +++ b/pybind_interface/basic/CMakeLists.txt @@ -15,27 +15,6 @@ cmake_minimum_required(VERSION 3.31) project(qsim) -if(WIN32) - set(CMAKE_CXX_FLAGS "/O2 /openmp") -else() - set(CMAKE_CXX_FLAGS "-O3") -endif() - -if(APPLE) - include_directories( - "/usr/local/include" - "/usr/local/opt/llvm/include" - "/opt/homebrew/include" - "/opt/homebrew/opt/llvm@19/include" - ) - link_directories( - "/usr/local/lib" - "/usr/local/opt/llvm/lib" - "/opt/homebrew/lib" - "/opt/homebrew/opt/llvm@19/lib" - ) -endif() - INCLUDE(../GetPybind11.cmake) pybind11_add_module(qsim_basic pybind_main_basic.cpp) diff --git a/pybind_interface/cuda/CMakeLists.txt b/pybind_interface/cuda/CMakeLists.txt index a695ab5b4..286807af8 100644 --- a/pybind_interface/cuda/CMakeLists.txt +++ b/pybind_interface/cuda/CMakeLists.txt @@ -15,27 +15,6 @@ cmake_minimum_required(VERSION 3.31) project(qsim LANGUAGES CXX CUDA) -if(WIN32) - set(CMAKE_CXX_FLAGS "/O2 /openmp") -else() - set(CMAKE_CXX_FLAGS "-O3") -endif() - -if(APPLE) - include_directories( - "/usr/local/include" - "/usr/local/opt/llvm/include" - "/opt/homebrew/include" - "/opt/homebrew/opt/llvm@19/include" - ) - link_directories( - "/usr/local/lib" - "/usr/local/opt/llvm/lib" - "/opt/homebrew/lib" - "/opt/homebrew/opt/llvm@19/lib" - ) -endif() - include(../GetPybind11.cmake) include(../GetCUDAARCHS.cmake) diff --git a/pybind_interface/custatevec/CMakeLists.txt b/pybind_interface/custatevec/CMakeLists.txt index c749b935a..88fdf7b50 100644 --- a/pybind_interface/custatevec/CMakeLists.txt +++ b/pybind_interface/custatevec/CMakeLists.txt @@ -15,27 +15,6 @@ cmake_minimum_required(VERSION 3.31) project(qsim LANGUAGES CXX CUDA) -if(WIN32) - set(CMAKE_CXX_FLAGS "/O2 /openmp") -else() - set(CMAKE_CXX_FLAGS "-O3") -endif() - -if(APPLE) - include_directories( - "/usr/local/include" - "/usr/local/opt/llvm/include" - "/opt/homebrew/include" - "/opt/homebrew/opt/llvm@19/include" - ) - link_directories( - "/usr/local/lib" - "/usr/local/opt/llvm/lib" - "/opt/homebrew/lib" - "/opt/homebrew/opt/llvm@19/lib" - ) -endif() - INCLUDE(../GetPybind11.cmake) find_package(Python3 3.10 REQUIRED) diff --git a/pybind_interface/decide/CMakeLists.txt b/pybind_interface/decide/CMakeLists.txt index 47b39ccc0..73b36fb4e 100644 --- a/pybind_interface/decide/CMakeLists.txt +++ b/pybind_interface/decide/CMakeLists.txt @@ -18,27 +18,6 @@ project(qsim LANGUAGES CXX) include(CheckLanguage) check_language(CUDA) -if(WIN32) - set(CMAKE_CXX_FLAGS "/O2 /openmp") -else() - set(CMAKE_CXX_FLAGS "-O3") -endif() - -if(APPLE) - include_directories( - "/usr/local/include" - "/usr/local/opt/llvm/include" - "/opt/homebrew/include" - "/opt/homebrew/opt/llvm@19/include" - ) - link_directories( - "/usr/local/lib" - "/usr/local/opt/llvm/lib" - "/opt/homebrew/lib" - "/opt/homebrew/opt/llvm@19/lib" - ) -endif() - include(../GetPybind11.cmake) # Configure based on the detected platform diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt index d92d8537d..6ada8b857 100644 --- a/pybind_interface/hip/CMakeLists.txt +++ b/pybind_interface/hip/CMakeLists.txt @@ -15,12 +15,6 @@ cmake_minimum_required(VERSION 3.31) project(qsim LANGUAGES CXX HIP) -if(WIN32) - set(CMAKE_CXX_FLAGS "/O2 /openmp") -else() - set(CMAKE_CXX_FLAGS "-O3") -endif() - INCLUDE(../GetPybind11.cmake) find_package(PythonLibs 3.10 REQUIRED) diff --git a/pybind_interface/sse/CMakeLists.txt b/pybind_interface/sse/CMakeLists.txt index 2ea5037be..39d4fc09d 100644 --- a/pybind_interface/sse/CMakeLists.txt +++ b/pybind_interface/sse/CMakeLists.txt @@ -15,25 +15,8 @@ cmake_minimum_required(VERSION 3.31) project(qsim) -IF (WIN32) - set(CMAKE_CXX_FLAGS "/O2 /openmp") -ELSE() - set(CMAKE_CXX_FLAGS "-msse4.1 -O3") -ENDIF() - -if(APPLE) - include_directories( - "/usr/local/include" - "/usr/local/opt/llvm/include" - "/opt/homebrew/include" - "/opt/homebrew/opt/llvm@19/include" - ) - link_directories( - "/usr/local/lib" - "/usr/local/opt/llvm/lib" - "/opt/homebrew/lib" - "/opt/homebrew/opt/llvm@19/lib" - ) +if(NOT WIN32) + add_compile_options("-msse4.1") endif() INCLUDE(../GetPybind11.cmake) From 1785a09cf63090a4c032447f2de081d939be757a Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 29 Jun 2025 20:51:53 +0000 Subject: [PATCH 04/32] Silence warning about "using serial compilation" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Ubuntu, one sees warnings like this: ``` lto-wrapper: warning: using serial compilation of 13 LTRANS jobs lto-wrapper: note: see the ‘-flto’ option documentation for more information lto-wrapper: warning: using serial compilation of 15 LTRANS jobs lto-wrapper: note: see the ‘-flto’ option documentation for more information lto-wrapper: warning: using serial compilation of 16 LTRANS jobs lto-wrapper: note: see the ‘-flto’ option documentation for more information lto-wrapper: warning: using serial compilation of 16 LTRANS jobs lto-wrapper: note: see the ‘-flto’ option documentation for more information ``` This seems to be the default behavior if the option `-flto` is not given a value (c.f. https://stackoverflow.com/a/72222512/28972686). Giving it a value of "auto" makes the warning go away and lets the compilation toolchain decide how much parallism it can use. --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 9eb3f854b..ec8adddb0 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ CXX ?= g++ NVCC ?= nvcc HIPCC ?= hipcc -CXXFLAGS ?= -O3 -std=c++17 -fopenmp +CXXFLAGS ?= -O3 -std=c++17 -fopenmp -flto=auto NVCCFLAGS ?= -O3 --std c++17 -Wno-deprecated-gpu-targets HIPCCFLAGS ?= -O3 From d90c642be0c94a26253a3c79387aa24b94c57db7 Mon Sep 17 00:00:00 2001 From: mhucka Date: Sun, 29 Jun 2025 23:14:02 +0000 Subject: [PATCH 05/32] Use add_compile_options() instead of set() --- pybind_interface/avx2/CMakeLists.txt | 4 ++-- pybind_interface/avx512/CMakeLists.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index 4e59b3064..80f8bad0b 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -16,9 +16,9 @@ cmake_minimum_required(VERSION 3.31) project(qsim) IF (WIN32) - set(CMAKE_CXX_FLAGS "/arch:AVX2") + add_compile_options(CMAKE_CXX_FLAGS "/arch:AVX2") ELSE() - set(CMAKE_CXX_FLAGS "-mavx2 -mfma") + add_compile_options(CMAKE_CXX_FLAGS "-mavx2 -mfma") ENDIF() INCLUDE(../GetPybind11.cmake) diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index 8cd04676c..182960a9f 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -16,9 +16,9 @@ cmake_minimum_required(VERSION 3.31) project(qsim) IF (WIN32) - set(CMAKE_CXX_FLAGS "/arch:AVX512") + add_compile_options(CMAKE_CXX_FLAGS "/arch:AVX512") ELSE() - set(CMAKE_CXX_FLAGS "-mavx512f -mbmi2") + add_compile_options(CMAKE_CXX_FLAGS "-mavx512f -mbmi2") ENDIF() INCLUDE(../GetPybind11.cmake) From 7fbf19aeda96ebac3a4280b110b298a661b988e8 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 30 Jun 2025 01:12:14 +0000 Subject: [PATCH 06/32] Fix cmake syntax --- pybind_interface/avx2/CMakeLists.txt | 4 ++-- pybind_interface/avx512/CMakeLists.txt | 4 ++-- pybind_interface/sse/CMakeLists.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index 80f8bad0b..75fec8833 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -16,9 +16,9 @@ cmake_minimum_required(VERSION 3.31) project(qsim) IF (WIN32) - add_compile_options(CMAKE_CXX_FLAGS "/arch:AVX2") + add_compile_options(/arch:AVX2) ELSE() - add_compile_options(CMAKE_CXX_FLAGS "-mavx2 -mfma") + add_compile_options(-mavx2 -mfma) ENDIF() INCLUDE(../GetPybind11.cmake) diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index 182960a9f..e34053670 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -16,9 +16,9 @@ cmake_minimum_required(VERSION 3.31) project(qsim) IF (WIN32) - add_compile_options(CMAKE_CXX_FLAGS "/arch:AVX512") + add_compile_options(/arch:AVX512) ELSE() - add_compile_options(CMAKE_CXX_FLAGS "-mavx512f -mbmi2") + add_compile_options(-mavx512f -mbmi2) ENDIF() INCLUDE(../GetPybind11.cmake) diff --git a/pybind_interface/sse/CMakeLists.txt b/pybind_interface/sse/CMakeLists.txt index 39d4fc09d..a50f37e22 100644 --- a/pybind_interface/sse/CMakeLists.txt +++ b/pybind_interface/sse/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.31) project(qsim) if(NOT WIN32) - add_compile_options("-msse4.1") + add_compile_options(-msse4.1) endif() INCLUDE(../GetPybind11.cmake) From 86c2ac8d691cc42b48fd9a05f496609dc1869d0b Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 30 Jun 2025 03:04:20 +0000 Subject: [PATCH 07/32] Fix typo --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d3a827213..1a015c11b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,7 +63,7 @@ find_package(OpenMP REQUIRED) cmake_host_system_information(RESULT HAVE_SSE2 QUERY HAS_SSE2) if(WIN32) - check_cxx_compiler_flag("/arch:AVX2" HAVE_AVX512) + check_cxx_compiler_flag("/arch:AVX2" HAVE_AVX2) check_cxx_compiler_flag("/arch:AVX512" HAVE_AVX512) else() check_cxx_compiler_flag("-mavx2" HAVE_AVX2) From f518df6b6b93f4230fb3adbb1435d85d9842a942 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 30 Jun 2025 03:47:20 +0000 Subject: [PATCH 08/32] Add -march=native for the basic version This can enable additional optimizations without requiring specific avx/sse/etc. instructions. --- pybind_interface/basic/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pybind_interface/basic/CMakeLists.txt b/pybind_interface/basic/CMakeLists.txt index ef8b3a75c..080a1196f 100644 --- a/pybind_interface/basic/CMakeLists.txt +++ b/pybind_interface/basic/CMakeLists.txt @@ -15,6 +15,10 @@ cmake_minimum_required(VERSION 3.31) project(qsim) +if(NOT WIN32) + add_compile_options(-march=native) +endif() + INCLUDE(../GetPybind11.cmake) pybind11_add_module(qsim_basic pybind_main_basic.cpp) From 3941a3fd59fd9c12d3ee6ad08a17ebe7ec703c58 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 30 Jun 2025 03:50:47 +0000 Subject: [PATCH 09/32] Use march=native with clang on MacOS --- pybind_interface/avx2/CMakeLists.txt | 4 ++++ pybind_interface/avx512/CMakeLists.txt | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index 75fec8833..222c41cfd 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -17,6 +17,10 @@ project(qsim) IF (WIN32) add_compile_options(/arch:AVX2) +ELSEIF(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR) + # On Intel MacOS, CMake detects AVX* but clang doesn't recognize -mavx2. + # Using arch=native should turn on the features if they're available. + add_compile_options(-march=native) ELSE() add_compile_options(-mavx2 -mfma) ENDIF() diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index e34053670..7431ae02e 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -17,6 +17,11 @@ project(qsim) IF (WIN32) add_compile_options(/arch:AVX512) +ELSEIF(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR + AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") + # On Intel Macs CMake detects AVX but clang doesn't recognize -mavx512. + # Using arch=native should turn on the features if they're available. + add_compile_options(-march=native) ELSE() add_compile_options(-mavx512f -mbmi2) ENDIF() From 20c9dd89924618a4820c943513fd9ef6b1cf9e9d Mon Sep 17 00:00:00 2001 From: mhucka Date: Tue, 1 Jul 2025 03:36:03 +0000 Subject: [PATCH 10/32] Attempt to fix AVX compilation issues on MacOS The common wisdom is wrong, apparently: `-march=native` does not work for AVX on MacOS. --- pybind_interface/avx2/CMakeLists.txt | 6 +----- pybind_interface/avx512/CMakeLists.txt | 10 ++++++---- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index 222c41cfd..a836209ee 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -17,11 +17,7 @@ project(qsim) IF (WIN32) add_compile_options(/arch:AVX2) -ELSEIF(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR) - # On Intel MacOS, CMake detects AVX* but clang doesn't recognize -mavx2. - # Using arch=native should turn on the features if they're available. - add_compile_options(-march=native) -ELSE() +ELSEIF(NOT CMAKE_APPLE_SILICON_PROCESSOR) add_compile_options(-mavx2 -mfma) ENDIF() diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index 7431ae02e..743852ca1 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -17,11 +17,13 @@ project(qsim) IF (WIN32) add_compile_options(/arch:AVX512) -ELSEIF(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR +ELSEIF(APPLE + AND NOT CMAKE_APPLE_SILICON_PROCESSOR AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") - # On Intel Macs CMake detects AVX but clang doesn't recognize -mavx512. - # Using arch=native should turn on the features if they're available. - add_compile_options(-march=native) + # Note: some sources recommend against using -mavx512f, and say to use + # -march=native. The doesn't work on Mac Intel systems, at least in MacOS + # 13 (and possibly higher); the compiler does not enable AVX512 features. + add_compile_options(-mavx512f) ELSE() add_compile_options(-mavx512f -mbmi2) ENDIF() From ba036d799de9875d27f14cb3375f4c9440f49724 Mon Sep 17 00:00:00 2001 From: mhucka Date: Tue, 1 Jul 2025 03:44:21 +0000 Subject: [PATCH 11/32] It seems -mbmi2 flag exists on Macos Intel after all --- pybind_interface/avx512/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index 743852ca1..95c197ce9 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -23,7 +23,7 @@ ELSEIF(APPLE # Note: some sources recommend against using -mavx512f, and say to use # -march=native. The doesn't work on Mac Intel systems, at least in MacOS # 13 (and possibly higher); the compiler does not enable AVX512 features. - add_compile_options(-mavx512f) + add_compile_options(-mavx512f -mbmi2) ELSE() add_compile_options(-mavx512f -mbmi2) ENDIF() From 2490847865c9b86760c558963fa6b86b13f635ef Mon Sep 17 00:00:00 2001 From: mhucka Date: Tue, 1 Jul 2025 03:53:22 +0000 Subject: [PATCH 12/32] Simplify logic for setting flags --- pybind_interface/avx512/CMakeLists.txt | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index 95c197ce9..fe2c8322a 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -17,14 +17,7 @@ project(qsim) IF (WIN32) add_compile_options(/arch:AVX512) -ELSEIF(APPLE - AND NOT CMAKE_APPLE_SILICON_PROCESSOR - AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") - # Note: some sources recommend against using -mavx512f, and say to use - # -march=native. The doesn't work on Mac Intel systems, at least in MacOS - # 13 (and possibly higher); the compiler does not enable AVX512 features. - add_compile_options(-mavx512f -mbmi2) -ELSE() +ELSEIF(NOT CMAKE_APPLE_SILICON_PROCESSOR) add_compile_options(-mavx512f -mbmi2) ENDIF() From f00af254799569abd2eff75b4823c8ecc4c40e66 Mon Sep 17 00:00:00 2001 From: mhucka Date: Tue, 1 Jul 2025 21:24:18 +0000 Subject: [PATCH 13/32] Be more careful about SSE flags on Windows Getting correct info at run-time about SSE support on Windows has been a bit difficult. To help get the best performance, if we know SSE is supposed to be supported, we can try to use multiple known architecture flags starting with those introduced most recently. --- pybind_interface/sse/CMakeLists.txt | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/pybind_interface/sse/CMakeLists.txt b/pybind_interface/sse/CMakeLists.txt index a50f37e22..32ff2ec5f 100644 --- a/pybind_interface/sse/CMakeLists.txt +++ b/pybind_interface/sse/CMakeLists.txt @@ -15,7 +15,24 @@ cmake_minimum_required(VERSION 3.31) project(qsim) -if(NOT WIN32) +if(WIN32) + # Visual Studio 17.11.5 added /arch:SSE4.2 in Oct. 2024. Try to use + # that if it's available. + check_cxx_compiler_flag("/arch:SSE4.2" WIN32_SSE4_AVAILABLE) + if(WIN32_SSE4_AVAILABLE) + add_compile_options(/arch:SSE4.2) + else() + # VS 2022 docs say that using /arch:SSE2 will make the auto-vectorizer + # emit 4.2 instructions when available. So, resort to this if we can. + check_cxx_compiler_flag("/arch:SSE2" WIN32_SSE2_AVAILABLE) + if(WIN32_SSE2_AVAILABLE) + add_compile_options(/arch:SSE2) + else() + # Some sources say that to get SSE4.1, you typically use /arch:AVX. + add_compile_options(/arch:AVX) + endif() + endif() +else() add_compile_options(-msse4.1) endif() From 9d1ca83a3a5e593e19210694eb84db207f413d97 Mon Sep 17 00:00:00 2001 From: mhucka Date: Tue, 1 Jul 2025 21:26:34 +0000 Subject: [PATCH 14/32] Fix determining AVX & SSE features of the host CPU The previous code wrongly assumed that testing for compiler support would imply the underlying hardware supported the features. This is wrong (and rather obviously wrong -- I don't know what I was thinking before). The new code tries in various ways to get info about the architectural features of the host's CPU. --- CMakeLists.txt | 73 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a015c11b..c5003166b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,11 +17,13 @@ project(qsim LANGUAGES CXX) # This text is prepended to messages printed by this config file so it's # easier to figure out what came from where in the logs. -set(MSG_PREFIX "[qsim cmake configuration]") +set(MSG_PREFIX "[qsim cmake]") # ~~~~~ Analyze the host's hardware & software features ~~~~~ +find_package(OpenMP REQUIRED) + include(CheckLanguage) include(CheckCXXCompilerFlag) include(CheckCXXSourceRuns) @@ -46,7 +48,7 @@ if(CMAKE_CUDA_COMPILER) "${CMAKE_CUDA_COMPILER} ${CMAKE_CUDA_COMPILER_VERSION}") else() message(STATUS "${MSG_PREFIX} did not find CUDA compiler") - # Did not find the CUDA framewwork, so check for the HIP as an alternative. + # Did not find the CUDA framewwork, so check for HIP as an alternative. execute_process(COMMAND which hipcc OUTPUT_VARIABLE has_hipcc OUTPUT_STRIP_TRAILING_WHITESPACE) @@ -58,16 +60,57 @@ else() endif() endif() -find_package(OpenMP REQUIRED) +macro(check_cpu_supports FEATURE HW_FLAG) + set(HAVE_${FEATURE} FALSE) -cmake_host_system_information(RESULT HAVE_SSE2 QUERY HAS_SSE2) + message(STATUS "${MSG_PREFIX} testing hardware for ${FEATURE} …") + if(WIN32) + + elseif(LINUX) + execute_process( + COMMAND bash --noprofile -c "grep -q ${HW_FLAG} /proc/cpuinfo" + RESULT_VARIABLE _CHECK_HW_FLAG_EXIT_CODE + ) + if(_CHECK_HW_FLAG_EXIT_CODE EQUAL 0) + set(HAVE_${FEATURE} TRUE) + endif() + elseif(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR) + execute_process( + COMMAND bash --noprofile -c "sysctl -n hw.optional.${HW_FLAG}" + RESULT_VARIABLE _CHECK_HW_FLAG_EXIT_CODE + OUTPUT_VARIABLE _CHECK_HW_FLAG_VALUE + ) + if(_CHECK_HW_FLAG_EXIT_CODE EQUAL 0) + if(_CHECK_HW_FLAG_VALUE EQUAL "1") + set(HAVE_${FEATURE} TRUE) + endif() + endif() + endif() + message(STATUS "${MSG_PREFIX} testing hardware for ${FEATURE} … Done.") + message(STATUS "${MSG_PREFIX} HAVE_${FEATURE} = ${HAVE_${FEATURE}}") +endmacro() + +set(HAVE_AVX2 FALSE) +set(HAVE_AVX512 FALSE) +set(HAVE_SSE4 FALSE) if(WIN32) - check_cxx_compiler_flag("/arch:AVX2" HAVE_AVX2) - check_cxx_compiler_flag("/arch:AVX512" HAVE_AVX512) -else() - check_cxx_compiler_flag("-mavx2" HAVE_AVX2) - check_cxx_compiler_flag("-mavx512f" HAVE_AVX512) + check_cpu_supports(AVX2 "avx2") + if(HAVE_AVX2) + # There seems to be no direct way to test for SSE4 on Windows. We'd + # need to compile & run a test program. However, AVX2 implies SSE4.1 + # support because it's a later instruction set built upon SSE4.1. + set(HAVE_SSE4 TRUE) + endif() + check_cpu_supports(AVX512 "avx512f") +elseif(LINUX) + check_cpu_supports(AVX2 "avx2") + check_cpu_supports(AVX512 "avx512f") + check_cpu_supports(SSE4 "sse4") +elseif(APPLE NOT CMAKE_APPLE_SILICON_PROCESSOR) + check_cpu_supports(AVX2 "avx2_0") + check_cpu_supports(AVX512 "avx512f") + check_cpu_supports(SSE4 "sse4_1") endif() @@ -99,10 +142,6 @@ if(NOT CMAKE_APPLE_SILICON_PROCESSOR) add_subdirectory(pybind_interface/hip) endif() - if(HAVE_SSE2) - add_subdirectory(pybind_interface/sse) - endif() - if(HAVE_AVX2) add_subdirectory(pybind_interface/avx2) endif() @@ -110,6 +149,10 @@ if(NOT CMAKE_APPLE_SILICON_PROCESSOR) if(HAVE_AVX512) add_subdirectory(pybind_interface/avx512) endif() + + if(HAVE_SSE4) + add_subdirectory(pybind_interface/sse) + endif() endif() if(APPLE) @@ -130,9 +173,5 @@ endif() # ~~~~~ Print misc. info ~~~~~ -message(STATUS "${MSG_PREFIX} host has SSE2 = ${HAVE_SSE2}") -message(STATUS "${MSG_PREFIX} host has AVX2 = ${HAVE_AVX2}") -message(STATUS "${MSG_PREFIX} host has AVX512 = ${HAVE_AVX512}") - message(STATUS "${MSG_PREFIX} shell $PATH = $ENV{PATH}") message(STATUS "${MSG_PREFIX} shell $CUQUANTUM_ROOT = $ENV{CUQUANTUM_ROOT}") From edbadb18e9654f0000884208983eb51233106e4b Mon Sep 17 00:00:00 2001 From: mhucka Date: Wed, 2 Jul 2025 23:49:11 +0000 Subject: [PATCH 15/32] Fix missing NOT --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c5003166b..0245444aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,7 +107,7 @@ elseif(LINUX) check_cpu_supports(AVX2 "avx2") check_cpu_supports(AVX512 "avx512f") check_cpu_supports(SSE4 "sse4") -elseif(APPLE NOT CMAKE_APPLE_SILICON_PROCESSOR) +elseif(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR) check_cpu_supports(AVX2 "avx2_0") check_cpu_supports(AVX512 "avx512f") check_cpu_supports(SSE4 "sse4_1") From c94360ebf0a0274ce258f703a331826d516faf7a Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 03:17:21 +0000 Subject: [PATCH 16/32] Move GetPybind11.cmake file to dev_tools/cmake We have at least two custom CMake files now, and may have more in the future. The files only need to be included from the top-level `CMakeLists.txt` file; they don't actually need to be included from the `CMakeLists.txt` files in the `pybind_interface/*` subdirectories because those subdirectories are added to the top-level `CMakeLists.txt` file. For better organization and maintaince, I think it makes sense to move `GetPybind11.cmake` and other *.cmake files into a dedicated subdirectory, and a location inside `dev_tools/` seems to make the most sense. --- pybind_interface/GetPybind11.cmake | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 pybind_interface/GetPybind11.cmake diff --git a/pybind_interface/GetPybind11.cmake b/pybind_interface/GetPybind11.cmake deleted file mode 100644 index b2182b45d..000000000 --- a/pybind_interface/GetPybind11.cmake +++ /dev/null @@ -1,18 +0,0 @@ -include(FetchContent) - -set(MIN_PYBIND_VERSION "2.13.6") - -# Suppress warning "Compatibility with CMake < 3.10 will be removed ..." coming -# from Pybind11. Not ideal, but avoids wasting time trying to find the cause. -# TODO(mhucka): remove the settings when pybind11 updates its CMake files -set(CMAKE_WARN_DEPRECATED OFF CACHE BOOL "Disable CMake deprecation warnings" FORCE) - -FetchContent_Declare( - pybind11 - GIT_REPOSITORY https://github.com/pybind/pybind11 - GIT_TAG "v${MIN_PYBIND_VERSION}" - OVERRIDE_FIND_PACKAGE -) -FetchContent_MakeAvailable(pybind11) - -set(CMAKE_WARN_DEPRECATED ON CACHE BOOL "Reenable CMake deprecation warnings" FORCE) From 7f35ee2fcc164094b389fac7a84469f0c179b893 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 03:23:15 +0000 Subject: [PATCH 17/32] Move and overhaul `GetPybind11.cmake` This updates the CMake code and rewrites the logic in several ways: * Test if pybind11 has already been found, and skip the work if so. * Use `find_package(pybind11)` to look for the Pybind11 package first, with a hint about where it might be located in the Python installation directories. * If the previous approach didn't find pybind11, only then try to get it from the GitHub repository. * Add the Pybind11 header files directory to the Python include files directories list, so that the CMake configuration files for the qsim modules can find them without having to add them explicitly in the module's CMakeLists.txt files. --- dev_tools/cmake/GetPybind11.cmake | 40 +++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 dev_tools/cmake/GetPybind11.cmake diff --git a/dev_tools/cmake/GetPybind11.cmake b/dev_tools/cmake/GetPybind11.cmake new file mode 100644 index 000000000..ec9386c15 --- /dev/null +++ b/dev_tools/cmake/GetPybind11.cmake @@ -0,0 +1,40 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if(NOT pybind11_FOUND) + set(MIN_PYBIND_VERSION "2.13.6") + + find_package( + pybind11 + CONFIG + HINTS "${Python3_SITELIB}" + NO_POLICY_SCOPE) + + # qsim's requirements.txt and setup.py both include a requirement for + # "pybind11[global]", so the Pybind11 CMake plugin should be found no matter + # whether the user is doing a "pip install qsim" or a local build. Still, we + # want to be sure, and also want to make sure to get the min version we need. + if(NOT pybind11_FOUND OR ${pybind11_VERSION} VERSION_LESS ${MIN_PYBIND_VERSION}) + include(FetchContent) + FetchContent_Declare( + pybind11 + GIT_REPOSITORY https://github.com/pybind/pybind11 + GIT_TAG "v${MIN_PYBIND_VERSION}" + OVERRIDE_FIND_PACKAGE + ) + FetchContent_MakeAvailable(pybind11) + endif() + + include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_INCLUDE_DIR}) +endif() From 640b572512363af0302f831f4777d87f6980ef74 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 03:25:28 +0000 Subject: [PATCH 18/32] Add new CMake macro for checking CPU vector instruction sets Depending on the host platform, this uses different methods to try to determine whether the CPU supports the SIMD and vector instruction sets used by qsim (different flavors of AVX and SSE). --- dev_tools/cmake/CheckCPU.cmake | 101 +++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 dev_tools/cmake/CheckCPU.cmake diff --git a/dev_tools/cmake/CheckCPU.cmake b/dev_tools/cmake/CheckCPU.cmake new file mode 100644 index 000000000..eda71316f --- /dev/null +++ b/dev_tools/cmake/CheckCPU.cmake @@ -0,0 +1,101 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include(CheckCXXCompilerFlag) +include(CheckCXXSourceRuns) + +macro(check_cpu_support _FEATURE_STRING _FEATURE_FLAG) + set(${_FEATURE_FLAG} FALSE) + + message(STATUS "Testing platform support for ${_FEATURE_STRING} …") + if(WIN32) + # On Windows, there's no built-in method to learn the CPU flags. Third- + # party tools exist, but downloading & running them is a security risk. + # We resort instead to compiling and running our own small program. + set(_CHECKER_FILE_PATH "${CMAKE_BINARY_DIR}/checker.cpp") + file(WRITE ${_CHECKER_FILE_PATH} "${_WIN32_CHECKER_SRC}") + try_run( + _CHECKER_RETURN_VALUE + _CHECKER_COMPILED + "${CMAKE_BINARY_DIR}" + "${_CHECKER_FILE_PATH}" + RUN_OUTPUT_VARIABLE _CPU_FEATURES + ) + if(_CHECKER_COMPILED AND _CHECKER_RETURN_VALUE EQUAL 0) + string(FIND "${_CPU_FEATURES}" ${_FEATURE_STRING} _FOUND) + if(NOT _FOUND EQUAL -1) + set(${_FEATURE_FLAG} TRUE) + endif() + else() + message(STATUS "Unable to autodetect vector instruction sets") + if(NOT _CHECKER_COMPILED) + message(STATUS " (failed to compile CPU checker utility)") + else() + message(STATUS " (got an error trying to run our CPU checker)") + endif() + endif() + + elseif(LINUX) + execute_process( + COMMAND bash --noprofile -c "grep -q ${_FEATURE_STRING} /proc/cpuinfo" + RESULT_VARIABLE _EXIT_CODE + ) + if(_EXIT_CODE EQUAL 0) + set(${_FEATURE_FLAG} TRUE) + endif() + + elseif(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR) + execute_process( + COMMAND bash --noprofile -c "sysctl -n hw.optional.${_FEATURE_STRING}" + RESULT_VARIABLE _EXIT_CODE + OUTPUT_VARIABLE _FLAG_VALUE + ) + if(_EXIT_CODE EQUAL 0 AND _FLAG_VALUE EQUAL "1") + set(${_FEATURE_FLAG} TRUE) + endif() + endif() + + message(STATUS "Testing hardware for ${_FEATURE_STRING} … Done.") + message(STATUS "${_FEATURE_FLAG} = ${${_FEATURE_FLAG}}") +endmacro() + +# Small Windows C++ program to test bits in certain Intel CPU registers. +# Info about the registers in Intel CPUs: https://en.wikipedia.org/wiki/CPUID +# +# EAX ECX Bit Name +# 1 0 19 sse4.1 +# 1 0 20 sse4.2 +# 1 0 28 avx +# 7 0 5 avx2 +# 7 0 16 avx512f +# +# Note: CMake caches the output of try_run() by default; therefore, this program +# will not be executed each time try_run() is called. + +set(_WIN32_CHECKER_SRC " +#include +#include +#include + +int main() { + int cpuInfo[4]; + __cpuidex(cpuInfo, 1, 0); + std::cout << ((cpuInfo[2] & (1 << 19)) ? \"sse4.1\\n\" : \"\"); + std::cout << ((cpuInfo[2] & (1 << 20)) ? \"sse4.2\\n\" : \"\"); + __cpuidex(cpuInfo, 7, 0); + std::cout << ((cpuInfo[1] & (1 << 5)) ? \"avx2\\n\" : \"\") + << ((cpuInfo[1] & (1 << 16)) ? \"avx512f\\n\" : \"\"); + return 0; +} +") From b083ab77880aa1b16bafb11da783a61861279bbf Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 03:26:22 +0000 Subject: [PATCH 19/32] Remove no-longer-needed pybind_interface/GetCUDAARCHS.cmake This file turned out to do so little that it's not worth maintaining a separate file for it. --- pybind_interface/GetCUDAARCHS.cmake | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 pybind_interface/GetCUDAARCHS.cmake diff --git a/pybind_interface/GetCUDAARCHS.cmake b/pybind_interface/GetCUDAARCHS.cmake deleted file mode 100644 index 4e53f7689..000000000 --- a/pybind_interface/GetCUDAARCHS.cmake +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2025 Google LLC. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -if(CMAKE_CUDA_ARCHITECTURES) - # CMake 3.18+ sets this variable from $CUDAARCHS automatically. - message(STATUS "qsim: using CUDA architectures " - "${CMAKE_CUDA_ARCHITECTURES}") -else() - # Compile for all supported major and minor real architectures, and the - # highest major virtual architecture. - set(CMAKE_CUDA_ARCHITECTURES native) -endif() From 28f52d9dff2b51679a4f2a445aec0cffabea34af Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 03:29:47 +0000 Subject: [PATCH 20/32] Remove unnecessary CMake instructions Due to the fact that the top-level `CMakeLists.txt` file includes these files using `add_directory`, it's not necessary for them to do things like include `GetPybind.cmake` or call `project()`. Those directives are already done by the top-level file. We can simplify these sub-`CMakeLists.txt` files. --- pybind_interface/avx2/CMakeLists.txt | 6 +----- pybind_interface/avx512/CMakeLists.txt | 6 +----- pybind_interface/basic/CMakeLists.txt | 4 ---- pybind_interface/custatevec/CMakeLists.txt | 10 +--------- pybind_interface/hip/CMakeLists.txt | 10 +--------- 5 files changed, 4 insertions(+), 32 deletions(-) diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index a836209ee..c427c0e13 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -12,16 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.31) -project(qsim) - IF (WIN32) add_compile_options(/arch:AVX2) ELSEIF(NOT CMAKE_APPLE_SILICON_PROCESSOR) add_compile_options(-mavx2 -mfma) ENDIF() -INCLUDE(../GetPybind11.cmake) pybind11_add_module(qsim_avx2 pybind_main_avx2.cpp) -target_link_libraries(qsim_avx2 PUBLIC OpenMP::OpenMP_CXX) +target_link_libraries(qsim_avx2 PUBLIC pybind11::headers OpenMP::OpenMP_CXX) diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index fe2c8322a..514c5ebde 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -12,16 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.31) -project(qsim) - IF (WIN32) add_compile_options(/arch:AVX512) ELSEIF(NOT CMAKE_APPLE_SILICON_PROCESSOR) add_compile_options(-mavx512f -mbmi2) ENDIF() -INCLUDE(../GetPybind11.cmake) pybind11_add_module(qsim_avx512 pybind_main_avx512.cpp) -target_link_libraries(qsim_avx512 PUBLIC OpenMP::OpenMP_CXX) +target_link_libraries(qsim_avx512 PUBLIC pybind11::headers OpenMP::OpenMP_CXX) diff --git a/pybind_interface/basic/CMakeLists.txt b/pybind_interface/basic/CMakeLists.txt index 080a1196f..26747dcc3 100644 --- a/pybind_interface/basic/CMakeLists.txt +++ b/pybind_interface/basic/CMakeLists.txt @@ -12,14 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.31) -project(qsim) - if(NOT WIN32) add_compile_options(-march=native) endif() -INCLUDE(../GetPybind11.cmake) pybind11_add_module(qsim_basic pybind_main_basic.cpp) target_link_libraries(qsim_basic PUBLIC OpenMP::OpenMP_CXX) diff --git a/pybind_interface/custatevec/CMakeLists.txt b/pybind_interface/custatevec/CMakeLists.txt index 88fdf7b50..438479847 100644 --- a/pybind_interface/custatevec/CMakeLists.txt +++ b/pybind_interface/custatevec/CMakeLists.txt @@ -12,14 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.31) -project(qsim LANGUAGES CXX CUDA) - -INCLUDE(../GetPybind11.cmake) -find_package(Python3 3.10 REQUIRED) - -include_directories(${pybind11_INCLUDE_DIRS}) - include_directories($ENV{CUQUANTUM_ROOT}/include) link_directories($ENV{CUQUANTUM_ROOT}/lib $ENV{CUQUANTUM_ROOT}/lib64) @@ -32,4 +24,4 @@ set_target_properties(qsim_custatevec PROPERTIES ) set_source_files_properties(pybind_main_custatevec.cpp PROPERTIES LANGUAGE CUDA) -target_link_libraries(qsim_custatevec OpenMP::OpenMP_CXX) +target_link_libraries(qsim_custatevec PUBLIC pybind11::headers OpenMP::OpenMP_CXX) diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt index 6ada8b857..af91ff7ce 100644 --- a/pybind_interface/hip/CMakeLists.txt +++ b/pybind_interface/hip/CMakeLists.txt @@ -12,17 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.31) -project(qsim LANGUAGES CXX HIP) - -INCLUDE(../GetPybind11.cmake) -find_package(PythonLibs 3.10 REQUIRED) - list(APPEND CMAKE_MODULE_PATH "/opt/rocm/lib/cmake/hip") find_package(HIP REQUIRED) -include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) - hip_add_library(qsim_hip MODULE pybind_main_hip.cpp) set_target_properties(qsim_hip PROPERTIES @@ -31,4 +23,4 @@ set_target_properties(qsim_hip PROPERTIES ) set_source_files_properties(pybind_main_hip.cpp PROPERTIES LANGUAGE HIP) -target_link_libraries(qsim_hip PUBLIC OpenMP::OpenMP_CXX) +target_link_libraries(qsim_hip PUBLIC pybind11::headers OpenMP::OpenMP_CXX) From 13e839c641bce68154e747d0926610f300a5bfbb Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 03:33:00 +0000 Subject: [PATCH 21/32] Remove unnecessary CMake code & do some deduplication Due to the fact that the top-level `CMakeLists.txt` file includes these files using `add_directory`, it's not necessary for them to do things like include `GetPybind.cmake` or call `project()`. Those directives are already done by the top-level file. We can simplify these sub-`CMakeLists.txt` files. In addition, in this file in particular, it's possible to pull out `target_link_libraries()` as a common element because it's done for all the cases tested. Finally, instead of including the `GetCUDAARCHS.cmake` I had created before, let's just include the 3 lines directly in here. It makes it more obvious what's happening and reduces maintenance burden. --- pybind_interface/decide/CMakeLists.txt | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/pybind_interface/decide/CMakeLists.txt b/pybind_interface/decide/CMakeLists.txt index 73b36fb4e..8c435a379 100644 --- a/pybind_interface/decide/CMakeLists.txt +++ b/pybind_interface/decide/CMakeLists.txt @@ -12,46 +12,34 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.31) -project(qsim LANGUAGES CXX) - -include(CheckLanguage) -check_language(CUDA) - -include(../GetPybind11.cmake) - # Configure based on the detected platform if(CMAKE_CUDA_COMPILER) - include(../GetCUDAARCHS.cmake) + if(NOT CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES native) + endif() add_library(qsim_decide MODULE decide.cpp) if(DEFINED ENV{CUQUANTUM_ROOT}) target_compile_options(qsim_decide PRIVATE $<$:-D__CUSTATEVEC__> ) endif() - find_package(Python3 3.10 REQUIRED COMPONENTS Interpreter Development) - include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) set_target_properties(qsim_decide PROPERTIES CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}" PREFIX "${PYTHON_MODULE_PREFIX}" SUFFIX "${PYTHON_MODULE_EXTENSION}" ) set_source_files_properties(decide.cpp PROPERTIES LANGUAGE CUDA) - target_link_libraries(qsim_decide OpenMP::OpenMP_CXX) elseif(has_hipcc) list(APPEND CMAKE_MODULE_PATH "/opt/rocm/lib/cmake/hip") find_package(HIP REQUIRED) hip_add_library(qsim_decide MODULE decide.cpp) set_source_files_properties(decide.cpp PROPERTIES LANGUAGE HIP) - find_package(Python3 3.10 REQUIRED COMPONENTS Interpreter Development) - include_directories(${PYTHON_INCLUDE_DIRS} ${pybind11_SOURCE_DIR}/include) set_target_properties(qsim_decide PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" SUFFIX "${PYTHON_MODULE_EXTENSION}" ) - target_link_libraries(qsim_decide PUBLIC OpenMP::OpenMP_CXX) else() pybind11_add_module(qsim_decide decide.cpp) - target_link_libraries(qsim_decide PUBLIC OpenMP::OpenMP_CXX) endif() +target_link_libraries(qsim_decide PUBLIC OpenMP::OpenMP_CXX) From eb9369251bd1504dd5de85334a036cd5689976a5 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 03:35:05 +0000 Subject: [PATCH 22/32] Rewrite the logic and remove unnecessary CMake code Due to the fact that the top-level `CMakeLists.txt` file includes these files using `add_directory`, it's not necessary for them to do things like include `GetPybind.cmake` or call `project()`. Those directives are already done by the top-level file. We can simplify these sub-`CMakeLists.txt` files. In addition, this commit rewrites the logic for determining which flag to pass to the compiler on Windows. The Windows case is complicated due to changes in the flags in MSVC over the years. The new logic tries to be robust in the face of different possibilities. --- pybind_interface/sse/CMakeLists.txt | 47 +++++++++++++++++++---------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/pybind_interface/sse/CMakeLists.txt b/pybind_interface/sse/CMakeLists.txt index 32ff2ec5f..aba5fa17c 100644 --- a/pybind_interface/sse/CMakeLists.txt +++ b/pybind_interface/sse/CMakeLists.txt @@ -12,31 +12,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.31) -project(qsim) - +# If the present module is included in the qsim build, it means that the +# top-level CMakeLists.txt found evidence that the hardware supports SSE4.1. +# However, the correct compiler flag still needs to be determined. if(WIN32) - # Visual Studio 17.11.5 added /arch:SSE4.2 in Oct. 2024. Try to use - # that if it's available. - check_cxx_compiler_flag("/arch:SSE4.2" WIN32_SSE4_AVAILABLE) + # Some comments in forums suggest that for some programs, using arch=sse4.1 + # may provide better performance than using arch=sse4.2. It's not available + # in newer VS but still try this flag first in case this compiler is older. + check_cxx_compiler_flag("/arch:SSE4.1" WIN32_SSE4_AVAILABLE) if(WIN32_SSE4_AVAILABLE) - add_compile_options(/arch:SSE4.2) + add_compile_options(/arch:SSE4.1) else() - # VS 2022 docs say that using /arch:SSE2 will make the auto-vectorizer - # emit 4.2 instructions when available. So, resort to this if we can. - check_cxx_compiler_flag("/arch:SSE2" WIN32_SSE2_AVAILABLE) - if(WIN32_SSE2_AVAILABLE) - add_compile_options(/arch:SSE2) + # Visual Studio 17.11.5 added /arch:SSE4.2 in Oct. 2024. Try to use that + # if it's available. + check_cxx_compiler_flag("/arch:SSE4.2" WIN32_SSE4_AVAILABLE) + if(WIN32_SSE4_AVAILABLE) + add_compile_options(/arch:SSE4.2) else() - # Some sources say that to get SSE4.1, you typically use /arch:AVX. - add_compile_options(/arch:AVX) + # VS 2022 docs say /arch:SSE2 will make the auto-vectorizer emit 4.2 + # instructions when available. Use this if we get this far. + check_cxx_compiler_flag("/arch:SSE2" WIN32_SSE2_AVAILABLE) + if(WIN32_SSE2_AVAILABLE) + add_compile_options(/arch:SSE2) + else() + # Although it's not guaranteed that AVX2 support guarantees + # SSE4.1 too, it seems to be true in practice. + if(CPU_SUPPORTS_AVX2) + # If we get here, it means elsewhere we'll have added avx2. + # Nothing more to do. + else() + # Other options failed. Final fallback: in practice, if a + # CPU supports AVX, it is likely to also support SSE4.1. + add_compile_options(/arch:AVX) + endif() + endif() endif() endif() else() add_compile_options(-msse4.1) endif() -INCLUDE(../GetPybind11.cmake) pybind11_add_module(qsim_sse pybind_main_sse.cpp) -target_link_libraries(qsim_sse PUBLIC OpenMP::OpenMP_CXX) +target_link_libraries(qsim_sse PUBLIC pybind11::headers OpenMP::OpenMP_CXX) From a9e49918eb5cc1ebb82bf8f3ac73149f34e97a20 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 03:38:13 +0000 Subject: [PATCH 23/32] Remove unnecessary CMake code and simply setting the arch Due to the fact that the top-level `CMakeLists.txt` file includes these files using `add_directory`, it's not necessary for them to do things like include `GetPybind.cmake` or call `project()`. Those directives are already done by the top-level file. We can simplify these sub-`CMakeLists.txt` files. In addition, the logic for setting `CMAKE_CUDA_ARCHITECTURES` can be made very simple since we don't do complicated things to try to figure out which CUDA/GPU variants the host has. If the `CUDAARCHS` environment variable is not set and `CMAKE_CUDA_ARCHITECTURES` is not set either, we just default to using `native` and hope for the best. --- pybind_interface/cuda/CMakeLists.txt | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/pybind_interface/cuda/CMakeLists.txt b/pybind_interface/cuda/CMakeLists.txt index 286807af8..7c895dee7 100644 --- a/pybind_interface/cuda/CMakeLists.txt +++ b/pybind_interface/cuda/CMakeLists.txt @@ -12,19 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -cmake_minimum_required(VERSION 3.31) -project(qsim LANGUAGES CXX CUDA) - -include(../GetPybind11.cmake) -include(../GetCUDAARCHS.cmake) - -find_package(Python 3.10 REQUIRED) - -include_directories(${PYTHON_INCLUDE_DIRS}) -if(pybind11_FOUND) - include_directories(${pybind11_INCLUDE_DIRS}) -else() # means pybind11 has been fetched in GetPybind11.cmake - include_directories(${pybind11_SOURCE_DIR}/include) +if(CMAKE_CUDA_ARCHITECTURES) + # CMake 3.18+ sets this variable from $CUDAARCHS automatically. + message(STATUS "using CUDA architectures ${CMAKE_CUDA_ARCHITECTURES}") +else() + message(STATUS "using 'native' for the CUDA architecture value") + set(CMAKE_CUDA_ARCHITECTURES native) endif() add_library(qsim_cuda MODULE pybind_main_cuda.cpp) @@ -35,4 +28,4 @@ set_target_properties(qsim_cuda PROPERTIES ) set_source_files_properties(pybind_main_cuda.cpp PROPERTIES LANGUAGE CUDA) -target_link_libraries(qsim_cuda OpenMP::OpenMP_CXX) +target_link_libraries(qsim_cuda PUBLIC OpenMP::OpenMP_CXX) From 344cfb0674fe82cf402ff3966e695497dbe9a629 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 03:40:57 +0000 Subject: [PATCH 24/32] Overhaul the top-level CMakeLists.txt file (again) This is another round of rewriting the top-level file. * Moves the `check_cpu_supports()` macro to a separate file. * Improves testing for CPU features for vectorizing. * Uses more modern ways of doing things like looking for `hipcc`. * Renames variables to make them a little bit more obvious. --- CMakeLists.txt | 136 +++++++++++++++++-------------------------------- 1 file changed, 48 insertions(+), 88 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0245444aa..dd54d92c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,118 +15,79 @@ cmake_minimum_required(VERSION 3.31) project(qsim LANGUAGES CXX) -# This text is prepended to messages printed by this config file so it's -# easier to figure out what came from where in the logs. -set(MSG_PREFIX "[qsim cmake]") +# ~~~~~ Set project-wide policies ~~~~~ +# The following settings mirror what is in our hand-written Makefiles. +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) -# ~~~~~ Analyze the host's hardware & software features ~~~~~ - -find_package(OpenMP REQUIRED) +# For deduplication of static libraries, pick the first occurrence. +cmake_policy(SET CMP0179 NEW) -include(CheckLanguage) -include(CheckCXXCompilerFlag) -include(CheckCXXSourceRuns) +# ~~~~~ Analyze the host's hardware & software features ~~~~~ -check_language(CUDA) +find_package(OpenMP REQUIRED COMPONENTS CXX NO_POLICY_SCOPE) +if(NOT OpenMP_CXX_FOUND) + message(FATAL_ERROR "OpenMP CXX support not found") +endif() # CMake normally sets CMAKE_APPLE_SILICON_PROCESSOR on Apple Silicon; however, # it doesn't happen when running builds using cibuildwheel, even on Apple -# Silicon. We have had better luck checking and seting it ourselves. +# Silicon. It's more reliable to check and set it ourselves. if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") set(CMAKE_APPLE_SILICON_PROCESSOR TRUE) - message(STATUS "${MSG_PREFIX} detected Apple Silicon") + message(VERBOSE "Detected Apple Silicon") else() set(CMAKE_APPLE_SILICON_PROCESSOR FALSE) - message(STATUS "${MSG_PREFIX} did not detect Apple Silicon") endif() +include(CheckLanguage) +check_language(CUDA) if(CMAKE_CUDA_COMPILER) enable_language(CUDA) - message(STATUS "${MSG_PREFIX} found CUDA compiler " - "${CMAKE_CUDA_COMPILER} ${CMAKE_CUDA_COMPILER_VERSION}") + message(VERBOSE "Have ${CMAKE_CUDA_COMPILER} ${CMAKE_CUDA_COMPILER_VERSION}") else() - message(STATUS "${MSG_PREFIX} did not find CUDA compiler") - # Did not find the CUDA framewwork, so check for HIP as an alternative. - execute_process(COMMAND which hipcc - OUTPUT_VARIABLE has_hipcc - OUTPUT_STRIP_TRAILING_WHITESPACE) - if(has_hipcc) - message(STATUS "${MSG_PREFIX} found hipcc") - project(qsim LANGUAGES CXX HIP) + # If CUDA is not available, check for HIP as an alternative. + message(VERBOSE "Did not find CUDA compiler; cannot support CUDA.") + find_program(_HIP_COMPILER hipcc) + if(_HIP_COMPILER) + enable_language(HIP) + message(VERBOSE "Found hipcc. Enabling HIP support.") else() - message(STATUS "${MSG_PREFIX} did not find hipcc") + message(VERBOSE "Did not find hipcc; cannot support HIP.") endif() endif() -macro(check_cpu_supports FEATURE HW_FLAG) - set(HAVE_${FEATURE} FALSE) - - message(STATUS "${MSG_PREFIX} testing hardware for ${FEATURE} …") - if(WIN32) - - elseif(LINUX) - execute_process( - COMMAND bash --noprofile -c "grep -q ${HW_FLAG} /proc/cpuinfo" - RESULT_VARIABLE _CHECK_HW_FLAG_EXIT_CODE - ) - if(_CHECK_HW_FLAG_EXIT_CODE EQUAL 0) - set(HAVE_${FEATURE} TRUE) - endif() - elseif(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR) - execute_process( - COMMAND bash --noprofile -c "sysctl -n hw.optional.${HW_FLAG}" - RESULT_VARIABLE _CHECK_HW_FLAG_EXIT_CODE - OUTPUT_VARIABLE _CHECK_HW_FLAG_VALUE - ) - if(_CHECK_HW_FLAG_EXIT_CODE EQUAL 0) - if(_CHECK_HW_FLAG_VALUE EQUAL "1") - set(HAVE_${FEATURE} TRUE) - endif() - endif() - endif() - message(STATUS "${MSG_PREFIX} testing hardware for ${FEATURE} … Done.") - message(STATUS "${MSG_PREFIX} HAVE_${FEATURE} = ${HAVE_${FEATURE}}") -endmacro() - -set(HAVE_AVX2 FALSE) -set(HAVE_AVX512 FALSE) -set(HAVE_SSE4 FALSE) - +include(dev_tools/cmake/CheckCPU.cmake) +# Note: CMake uses "WIN32" for Windows targets, including Win64. if(WIN32) - check_cpu_supports(AVX2 "avx2") - if(HAVE_AVX2) - # There seems to be no direct way to test for SSE4 on Windows. We'd - # need to compile & run a test program. However, AVX2 implies SSE4.1 - # support because it's a later instruction set built upon SSE4.1. - set(HAVE_SSE4 TRUE) - endif() - check_cpu_supports(AVX512 "avx512f") + check_cpu_support("avx2" CPU_SUPPORTS_AVX2) + check_cpu_support("avx512f" CPU_SUPPORTS_AVX512) + check_cpu_support("sse4.1" CPU_SUPPORTS_SSE4) elseif(LINUX) - check_cpu_supports(AVX2 "avx2") - check_cpu_supports(AVX512 "avx512f") - check_cpu_supports(SSE4 "sse4") + check_cpu_support("avx2" CPU_SUPPORTS_AVX2) + check_cpu_support("avx512f" CPU_SUPPORTS_AVX512) + check_cpu_support("sse4" CPU_SUPPORTS_SSE4) elseif(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR) - check_cpu_supports(AVX2 "avx2_0") - check_cpu_supports(AVX512 "avx512f") - check_cpu_supports(SSE4 "sse4_1") + check_cpu_support("avx2_0" CPU_SUPPORTS_AVX2) + check_cpu_support("avx512f" CPU_SUPPORTS_AVX512) + check_cpu_support("sse4_1" CPU_SUPPORTS_SSE4) endif() - # ~~~~~ Configure the build ~~~~~ -# The following settings mirror what is in our hand-written Makefiles. -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_POSITION_INDEPENDENT_CODE ON) - -# Options propagated to all sub-cmakefiles. if(WIN32) - add_compile_options(/O2 /std:c++17 /openmp) + add_compile_options(/O2) else() - add_compile_options(-O3 -std=c++17 -D_GLIBCXX_USE_CXX11_ABI=1 -flto=auto) + add_compile_options(-O3 -D_GLIBCXX_USE_CXX11_ABI=1) endif() +find_package(Python3 COMPONENTS Interpreter Development) + +include(dev_tools/cmake/GetPybind11.cmake) + # Always build the basic part. add_subdirectory(pybind_interface/basic) add_subdirectory(pybind_interface/decide) @@ -138,19 +99,19 @@ if(NOT CMAKE_APPLE_SILICON_PROCESSOR) if(DEFINED ENV{CUQUANTUM_ROOT}) add_subdirectory(pybind_interface/custatevec) endif() - elseif(has_hipcc) + elseif(HAVE_HIPCC) add_subdirectory(pybind_interface/hip) endif() - if(HAVE_AVX2) + if(CPU_SUPPORTS_AVX2) add_subdirectory(pybind_interface/avx2) endif() - if(HAVE_AVX512) + if(CPU_SUPPORTS_AVX512) add_subdirectory(pybind_interface/avx512) endif() - if(HAVE_SSE4) + if(CPU_SUPPORTS_SSE4) add_subdirectory(pybind_interface/sse) endif() endif() @@ -170,8 +131,7 @@ if(APPLE) ) endif() +# ~~~~~ Print info for debugging ~~~~~ -# ~~~~~ Print misc. info ~~~~~ - -message(STATUS "${MSG_PREFIX} shell $PATH = $ENV{PATH}") -message(STATUS "${MSG_PREFIX} shell $CUQUANTUM_ROOT = $ENV{CUQUANTUM_ROOT}") +message(DEBUG "Shell $PATH = $ENV{PATH}") +message(DEBUG "Shell $CUQUANTUM_ROOT = $ENV{CUQUANTUM_ROOT}") From 9759a38159deebaefe2243215554b296442e1759 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 14:41:04 +0000 Subject: [PATCH 25/32] Don't bother setting number of threads for cibuildwheel It seems to figure out automatically how many threads to use for the builds. --- .github/workflows/reusable_build_wheels.yaml | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/.github/workflows/reusable_build_wheels.yaml b/.github/workflows/reusable_build_wheels.yaml index 4f3138121..8729e30ef 100644 --- a/.github/workflows/reusable_build_wheels.yaml +++ b/.github/workflows/reusable_build_wheels.yaml @@ -60,6 +60,9 @@ jobs: {os: macos-15, arch: arm64}, {os: windows-2025, arch: AMD64}, ] + env: + # SHELLOPTS is used by Bash. Add xtrace when debugging is turned on. + SHELLOPTS: ${{inputs.debug && 'xtrace' || '' }} steps: - name: Check out a copy of the git repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 @@ -89,19 +92,6 @@ jobs: os=${{matrix.conf.os}} echo MACOSX_DEPLOYMENT_TARGET=${os: -2} >> "$GITHUB_ENV" - - if: startsWith(matrix.conf.os, 'ubuntu') - name: Determine the number of threads to use (Linux) - run: echo "num_threads=$(( $(nproc) - 1 ))" >> "$GITHUB_ENV" - - - if: startsWith(matrix.conf.os, 'macos') - name: Determine the number of threads to use (MacOS) - run: echo "num_threads=$(( $(sysctl -n hw.ncpu) - 1 ))" >> "$GITHUB_ENV" - - - if: startsWith(matrix.conf.os, 'win') - name: Determine the number of threads to use (Windows) - shell: bash - run: echo "num_threads=$(( NUMBER_OF_PROCESSORS - 1 ))" >> "$GITHUB_ENV" - - name: Build and test wheels env: # Note: additional cibuildwheel settings are in pyproject.toml. @@ -110,7 +100,6 @@ jobs: CIBW_BUILD_VERBOSITY: ${{inputs.debug && 1 || ''}} # Color codes make the raw logs hard to read. (CMake uses CLICOLOR.) CLICOLOR: ${{inputs.debug && 0 || ''}} - CMAKE_BUILD_PARALLEL_LEVEL: ${{env.num_threads}} run: | cibuildwheel --output-dir wheelhouse From 5bd427c822cfb16a45daacd2f2587857cc1c1462 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 14:47:34 +0000 Subject: [PATCH 26/32] Try without CMP0179 --- CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dd54d92c7..2c8d6efb6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,9 +22,6 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON) -# For deduplication of static libraries, pick the first occurrence. -cmake_policy(SET CMP0179 NEW) - # ~~~~~ Analyze the host's hardware & software features ~~~~~ find_package(OpenMP REQUIRED COMPONENTS CXX NO_POLICY_SCOPE) From 4669af3af5c42f24eb49a4d169a169851cc901aa Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 14:48:44 +0000 Subject: [PATCH 27/32] tmp --- .github/workflows/ci_build_wheels.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci_build_wheels.yaml b/.github/workflows/ci_build_wheels.yaml index 205d4be18..d4e0aa84a 100644 --- a/.github/workflows/ci_build_wheels.yaml +++ b/.github/workflows/ci_build_wheels.yaml @@ -20,6 +20,7 @@ on: branches: - master - main + - mh-consolidate-cmake-configs pull_request: types: [opened, synchronize] From 8d22fe860184d2bfc373408f8100737412320ff8 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 14:54:10 +0000 Subject: [PATCH 28/32] tmp --- .github/workflows/ci_build_wheels.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci_build_wheels.yaml b/.github/workflows/ci_build_wheels.yaml index d4e0aa84a..738137662 100644 --- a/.github/workflows/ci_build_wheels.yaml +++ b/.github/workflows/ci_build_wheels.yaml @@ -57,4 +57,5 @@ jobs: uses: ./.github/workflows/reusable_build_wheels.yaml secrets: inherit with: - debug: ${{inputs.debug == true}} + # debug: ${{inputs.debug == true}} + debug: true From c273d112354d7b79ed8d8fbd9bd3f7d5408aae65 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 22:44:15 +0000 Subject: [PATCH 29/32] OpenMP is not required -- adjust CMake files accordingly This makes the CMake build files test for OpenMP and use it if it's available, but not fail if it's not. --- CMakeLists.txt | 10 +++++----- pybind_interface/avx2/CMakeLists.txt | 5 ++++- pybind_interface/avx512/CMakeLists.txt | 5 ++++- pybind_interface/basic/CMakeLists.txt | 5 ++++- pybind_interface/cuda/CMakeLists.txt | 5 ++++- pybind_interface/custatevec/CMakeLists.txt | 5 ++++- pybind_interface/decide/CMakeLists.txt | 5 ++++- pybind_interface/hip/CMakeLists.txt | 5 ++++- pybind_interface/sse/CMakeLists.txt | 5 ++++- 9 files changed, 37 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2c8d6efb6..e8e5d4199 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,11 +24,6 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON) # ~~~~~ Analyze the host's hardware & software features ~~~~~ -find_package(OpenMP REQUIRED COMPONENTS CXX NO_POLICY_SCOPE) -if(NOT OpenMP_CXX_FOUND) - message(FATAL_ERROR "OpenMP CXX support not found") -endif() - # CMake normally sets CMAKE_APPLE_SILICON_PROCESSOR on Apple Silicon; however, # it doesn't happen when running builds using cibuildwheel, even on Apple # Silicon. It's more reliable to check and set it ourselves. @@ -40,6 +35,11 @@ else() set(CMAKE_APPLE_SILICON_PROCESSOR FALSE) endif() +find_package(OpenMP COMPONENTS CXX NO_POLICY_SCOPE) +if(NOT OpenMP_CXX_FOUND) + message(STATUS "(Without OpenMP, qsim cannot support thread parallelization)") +endif() + include(CheckLanguage) check_language(CUDA) if(CMAKE_CUDA_COMPILER) diff --git a/pybind_interface/avx2/CMakeLists.txt b/pybind_interface/avx2/CMakeLists.txt index c427c0e13..ae59edf83 100644 --- a/pybind_interface/avx2/CMakeLists.txt +++ b/pybind_interface/avx2/CMakeLists.txt @@ -20,4 +20,7 @@ ENDIF() pybind11_add_module(qsim_avx2 pybind_main_avx2.cpp) -target_link_libraries(qsim_avx2 PUBLIC pybind11::headers OpenMP::OpenMP_CXX) +if(OpenMP_CXX_FOUND) + target_link_libraries(qsim_avx2 PUBLIC pybind11::headers OpenMP::OpenMP_CXX) +endif() + diff --git a/pybind_interface/avx512/CMakeLists.txt b/pybind_interface/avx512/CMakeLists.txt index 514c5ebde..1929f32b9 100644 --- a/pybind_interface/avx512/CMakeLists.txt +++ b/pybind_interface/avx512/CMakeLists.txt @@ -20,4 +20,7 @@ ENDIF() pybind11_add_module(qsim_avx512 pybind_main_avx512.cpp) -target_link_libraries(qsim_avx512 PUBLIC pybind11::headers OpenMP::OpenMP_CXX) +if(OpenMP_CXX_FOUND) + target_link_libraries(qsim_avx512 PUBLIC pybind11::headers OpenMP::OpenMP_CXX) +endif() + diff --git a/pybind_interface/basic/CMakeLists.txt b/pybind_interface/basic/CMakeLists.txt index 26747dcc3..1a222052a 100644 --- a/pybind_interface/basic/CMakeLists.txt +++ b/pybind_interface/basic/CMakeLists.txt @@ -18,4 +18,7 @@ endif() pybind11_add_module(qsim_basic pybind_main_basic.cpp) -target_link_libraries(qsim_basic PUBLIC OpenMP::OpenMP_CXX) +if(OpenMP_CXX_FOUND) + target_link_libraries(qsim_basic PUBLIC OpenMP::OpenMP_CXX) +endif() + diff --git a/pybind_interface/cuda/CMakeLists.txt b/pybind_interface/cuda/CMakeLists.txt index 7c895dee7..f83b485c9 100644 --- a/pybind_interface/cuda/CMakeLists.txt +++ b/pybind_interface/cuda/CMakeLists.txt @@ -28,4 +28,7 @@ set_target_properties(qsim_cuda PROPERTIES ) set_source_files_properties(pybind_main_cuda.cpp PROPERTIES LANGUAGE CUDA) -target_link_libraries(qsim_cuda PUBLIC OpenMP::OpenMP_CXX) +if(OpenMP_CXX_FOUND) + target_link_libraries(qsim_cuda PUBLIC OpenMP::OpenMP_CXX) +endif() + diff --git a/pybind_interface/custatevec/CMakeLists.txt b/pybind_interface/custatevec/CMakeLists.txt index 438479847..ba2e2fd49 100644 --- a/pybind_interface/custatevec/CMakeLists.txt +++ b/pybind_interface/custatevec/CMakeLists.txt @@ -24,4 +24,7 @@ set_target_properties(qsim_custatevec PROPERTIES ) set_source_files_properties(pybind_main_custatevec.cpp PROPERTIES LANGUAGE CUDA) -target_link_libraries(qsim_custatevec PUBLIC pybind11::headers OpenMP::OpenMP_CXX) +if(OpenMP_CXX_FOUND) + target_link_libraries(qsim_custatevec PUBLIC pybind11::headers OpenMP::OpenMP_CXX) +endif() + diff --git a/pybind_interface/decide/CMakeLists.txt b/pybind_interface/decide/CMakeLists.txt index 8c435a379..60b1df8c8 100644 --- a/pybind_interface/decide/CMakeLists.txt +++ b/pybind_interface/decide/CMakeLists.txt @@ -42,4 +42,7 @@ else() pybind11_add_module(qsim_decide decide.cpp) endif() -target_link_libraries(qsim_decide PUBLIC OpenMP::OpenMP_CXX) +if(OpenMP_CXX_FOUND) + target_link_libraries(qsim_decide PUBLIC OpenMP::OpenMP_CXX) +endif() + diff --git a/pybind_interface/hip/CMakeLists.txt b/pybind_interface/hip/CMakeLists.txt index af91ff7ce..a7f82a9b0 100644 --- a/pybind_interface/hip/CMakeLists.txt +++ b/pybind_interface/hip/CMakeLists.txt @@ -23,4 +23,7 @@ set_target_properties(qsim_hip PROPERTIES ) set_source_files_properties(pybind_main_hip.cpp PROPERTIES LANGUAGE HIP) -target_link_libraries(qsim_hip PUBLIC pybind11::headers OpenMP::OpenMP_CXX) +if(OpenMP_CXX_FOUND) + target_link_libraries(qsim_hip PUBLIC pybind11::headers OpenMP::OpenMP_CXX) +endif() + diff --git a/pybind_interface/sse/CMakeLists.txt b/pybind_interface/sse/CMakeLists.txt index aba5fa17c..17f0661d8 100644 --- a/pybind_interface/sse/CMakeLists.txt +++ b/pybind_interface/sse/CMakeLists.txt @@ -54,4 +54,7 @@ endif() pybind11_add_module(qsim_sse pybind_main_sse.cpp) -target_link_libraries(qsim_sse PUBLIC pybind11::headers OpenMP::OpenMP_CXX) +if(OpenMP_CXX_FOUND) + target_link_libraries(qsim_sse PUBLIC pybind11::headers OpenMP::OpenMP_CXX) +endif() + From a08dbb34c771481ec7d114b04f4d5f27c2760bc7 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 23:02:35 +0000 Subject: [PATCH 30/32] Reduce & improve some of the excessive messages printed --- CMakeLists.txt | 13 ++++--------- dev_tools/cmake/CheckCPU.cmake | 9 ++++++--- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e8e5d4199..d48deba09 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,16 +44,16 @@ include(CheckLanguage) check_language(CUDA) if(CMAKE_CUDA_COMPILER) enable_language(CUDA) - message(VERBOSE "Have ${CMAKE_CUDA_COMPILER} ${CMAKE_CUDA_COMPILER_VERSION}") else() # If CUDA is not available, check for HIP as an alternative. - message(VERBOSE "Did not find CUDA compiler; cannot support CUDA.") + message(STATUS "CUDA not available; looking for a HIP compiler") find_program(_HIP_COMPILER hipcc) if(_HIP_COMPILER) enable_language(HIP) - message(VERBOSE "Found hipcc. Enabling HIP support.") + message(STATUS "CUDA not available; looking for a HIP compiler - found") else() - message(VERBOSE "Did not find hipcc; cannot support HIP.") + message(STATUS "CUDA not available; looking for a HIP compiler - not found") + message(STATUS "(Without CUDA or HIP, qsim cannot use GPUs for acceleration)") endif() endif() @@ -127,8 +127,3 @@ if(APPLE) "/opt/homebrew/opt/llvm@19/lib" ) endif() - -# ~~~~~ Print info for debugging ~~~~~ - -message(DEBUG "Shell $PATH = $ENV{PATH}") -message(DEBUG "Shell $CUQUANTUM_ROOT = $ENV{CUQUANTUM_ROOT}") diff --git a/dev_tools/cmake/CheckCPU.cmake b/dev_tools/cmake/CheckCPU.cmake index eda71316f..3cd0fdf2e 100644 --- a/dev_tools/cmake/CheckCPU.cmake +++ b/dev_tools/cmake/CheckCPU.cmake @@ -18,7 +18,7 @@ include(CheckCXXSourceRuns) macro(check_cpu_support _FEATURE_STRING _FEATURE_FLAG) set(${_FEATURE_FLAG} FALSE) - message(STATUS "Testing platform support for ${_FEATURE_STRING} …") + message(STATUS "Testing platform support for ${_FEATURE_STRING}") if(WIN32) # On Windows, there's no built-in method to learn the CPU flags. Third- # party tools exist, but downloading & running them is a security risk. @@ -66,8 +66,11 @@ macro(check_cpu_support _FEATURE_STRING _FEATURE_FLAG) endif() endif() - message(STATUS "Testing hardware for ${_FEATURE_STRING} … Done.") - message(STATUS "${_FEATURE_FLAG} = ${${_FEATURE_FLAG}}") + if(${_FEATURE_FLAG}) + message(STATUS "Testing platform support for ${_FEATURE_STRING} - found") + else() + message(STATUS "Testing platform support for ${_FEATURE_STRING} - not found") + endif() endmacro() # Small Windows C++ program to test bits in certain Intel CPU registers. From f8f1ab1e43d3f241a05db74db74bc81e97e47a64 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 23:29:30 +0000 Subject: [PATCH 31/32] Need to require Python3 Development.Module --- CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d48deba09..11e2fb550 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,9 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_POSITION_INDEPENDENT_CODE ON) +# Check we have Python libraries & header files necessary to build modules. +find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module) + # ~~~~~ Analyze the host's hardware & software features ~~~~~ # CMake normally sets CMAKE_APPLE_SILICON_PROCESSOR on Apple Silicon; however, @@ -81,8 +84,6 @@ else() add_compile_options(-O3 -D_GLIBCXX_USE_CXX11_ABI=1) endif() -find_package(Python3 COMPONENTS Interpreter Development) - include(dev_tools/cmake/GetPybind11.cmake) # Always build the basic part. From a469dddde4c16c9463209887674fbceabe0457a0 Mon Sep 17 00:00:00 2001 From: mhucka Date: Mon, 7 Jul 2025 23:36:58 +0000 Subject: [PATCH 32/32] Configure LTO in a portable way CMake has features to handle LTO for different compilers and platforms. Better to use this than to add our own lto options. --- CMakeLists.txt | 8 +++++ pybind_interface/avx512/CMakeLists.txt.avx10 | 31 ++++++++++++++++++++ pybind_interface/sse/CMakeLists.txt | 1 - 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 pybind_interface/avx512/CMakeLists.txt.avx10 diff --git a/CMakeLists.txt b/CMakeLists.txt index 11e2fb550..8ce6f3bd6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,6 +76,10 @@ elseif(APPLE AND NOT CMAKE_APPLE_SILICON_PROCESSOR) check_cpu_support("sse4_1" CPU_SUPPORTS_SSE4) endif() +# Configure LTO for compilers that support it. +include(CheckIPOSupported) +check_ipo_supported(RESULT HAVE_LTO) + # ~~~~~ Configure the build ~~~~~ if(WIN32) @@ -84,6 +88,10 @@ else() add_compile_options(-O3 -D_GLIBCXX_USE_CXX11_ABI=1) endif() +if(HAVE_LTO) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) +endif() + include(dev_tools/cmake/GetPybind11.cmake) # Always build the basic part. diff --git a/pybind_interface/avx512/CMakeLists.txt.avx10 b/pybind_interface/avx512/CMakeLists.txt.avx10 new file mode 100644 index 000000000..1ce78a335 --- /dev/null +++ b/pybind_interface/avx512/CMakeLists.txt.avx10 @@ -0,0 +1,31 @@ +# Copyright 2019 Google LLC. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.31) +project(qsim) + +IF (WIN32) + add_compile_options(/arch:AVX512) +ELSEIF(HAVE_AVX512_10) + add_compile_options(-mavx10.1-512 -mbmi2) +ELSE() + add_compile_options(-mavx512f -mbmi2) +ENDIF() + +INCLUDE(../../dev_tools/cmake/GetPybind11.cmake) +pybind11_add_module(qsim_avx512 pybind_main_avx512.cpp) + +if(OpenMP_CXX_FOUND) + target_link_libraries(qsim_avx512 PUBLIC OpenMP::OpenMP_CXX) +endif() diff --git a/pybind_interface/sse/CMakeLists.txt b/pybind_interface/sse/CMakeLists.txt index 17f0661d8..9ea5e0890 100644 --- a/pybind_interface/sse/CMakeLists.txt +++ b/pybind_interface/sse/CMakeLists.txt @@ -57,4 +57,3 @@ pybind11_add_module(qsim_sse pybind_main_sse.cpp) if(OpenMP_CXX_FOUND) target_link_libraries(qsim_sse PUBLIC pybind11::headers OpenMP::OpenMP_CXX) endif() -