From 64c3575d7eb6c7aa86a4b5d27e036b1167b7632a Mon Sep 17 00:00:00 2001 From: "Martin R. Albrecht" Date: Mon, 13 Oct 2025 10:12:38 +0100 Subject: [PATCH 1/4] we never actually set L3 typo? See https://github.com/malb/m4ri/issues/35 --- m4/ax_cache_size.m4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/m4/ax_cache_size.m4 b/m4/ax_cache_size.m4 index 72754bb..f3d77de 100644 --- a/m4/ax_cache_size.m4 +++ b/m4/ax_cache_size.m4 @@ -75,8 +75,8 @@ AC_DEFUN([AX_CACHE_SIZE], if test "x$cpu_exthi" \> "x80000005"; then AX_GCC_X86_CPUID(0x80000006) # For L3 cache - l2_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 4` - ax_l2_size=$((0x$l2_hexval >> 18))*512 + l3_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 4` + ax_l3_size=$((0x$l3_hexval >> 18))*512 fi fi From f4fb05ed4ba4da89797d512541368cfedb708585 Mon Sep 17 00:00:00 2001 From: "Martin R. Albrecht" Date: Mon, 13 Oct 2025 10:14:11 +0100 Subject: [PATCH 2/4] fix a compiler warning "warning: variable length array folded to constant array as an extension [-Wgnu-folding-constant]" See https://github.com/malb/m4ri/issues/35 --- m4ri/misc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m4ri/misc.h b/m4ri/misc.h index 99fd6bb..64c63bd 100644 --- a/m4ri/misc.h +++ b/m4ri/misc.h @@ -138,7 +138,7 @@ typedef uint64_t word; * \brief The number of bits in a word. */ -static int const m4ri_radix = 64; +#define m4ri_radix 64 /** * \brief The number one as a word. From b95a690216aaba831c00135b5ca38314e157325c Mon Sep 17 00:00:00 2001 From: "Martin R. Albrecht" Date: Mon, 20 Oct 2025 10:17:23 +0100 Subject: [PATCH 3/4] sysctl reads L3 cache size, too --- m4/ax_cache_size.m4 | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/m4/ax_cache_size.m4 b/m4/ax_cache_size.m4 index f3d77de..de08ee2 100644 --- a/m4/ax_cache_size.m4 +++ b/m4/ax_cache_size.m4 @@ -59,21 +59,21 @@ AC_DEFUN([AX_CACHE_SIZE], else if test "x$ax_cv_cpu_vendor" != "xUnknown"; then #Or use CPUID - AX_GCC_X86_CPUID(0x80000000) - cpu_exthigh=`echo $ax_cv_gcc_x86_cpuid_0x80000000 | cut -d ":" -f 1` - if test "x$cpu_exthi" \> "x80000004"; then + AX_GCC_X86_CPUID(0x80000000) + cpu_exthigh=`echo $ax_cv_gcc_x86_cpuid_0x80000000 | cut -d ":" -f 1` + if test "x$cpu_exthi" \> "x80000004"; then AX_GCC_X86_CPUID(0x80000005) # For L1 cache l1_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000005 | cut -d ":" -f 4` ax_l1_size=$((0x$l1_hexval >> 24)) fi - if test "x$cpu_exthi" \> "x80000005"; then + if test "x$cpu_exthi" \> "x80000005"; then AX_GCC_X86_CPUID(0x80000006) # For L2 cache l2_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 3` ax_l2_size=$((0x$l2_hexval >> 16)) fi - if test "x$cpu_exthi" \> "x80000005"; then + if test "x$cpu_exthi" \> "x80000005"; then AX_GCC_X86_CPUID(0x80000006) # For L3 cache l3_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 4` ax_l3_size=$((0x$l3_hexval >> 18))*512 @@ -84,25 +84,31 @@ AC_DEFUN([AX_CACHE_SIZE], #Or use sysctl sysctl_exe= if test -x /usr/sbin/sysctl ; then - sysctl_exe=/usr/sbin/sysctl + sysctl_exe=/usr/sbin/sysctl elif test -x /sbin/sysctl ; then - sysctl_exe=/sbin/sysctl + sysctl_exe=/sbin/sysctl fi + if test -n "$sysctl_exe"; then - if test -z "$ax_l2_size" -o "$ax_l2_size" = "0"; then + if test -z "$ax_l2_size" -o "$ax_l2_size" = "0"; then sysctl_out=`$sysctl_exe -n hw.l2cachesize 2>/dev/null`; if test ! -z "$sysctl_out"; then ax_l2_size=$(($sysctl_out / 1024)) fi; - - fi - if test -z "$ax_l1_size" -o "$ax_l1_size" = "0" ; then + fi + if test -z "$ax_l3_size" -o "$ax_l3_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l3cachesize 2>/dev/null`; + if test ! -z "$sysctl_out"; then + ax_l3_size=$(($sysctl_out / 1024)) + fi; + fi + if test -z "$ax_l1_size" -o "$ax_l1_size" = "0" ; then sysctl_out=`$sysctl_exe -n hw.l1dcachesize 2>/dev/null`; if test ! -z "$sysctl_out"; then - ax_l1_size=$(($sysctl_out / 1024)) + ax_l1_size=$(($sysctl_out / 1024)) fi; - fi - if test -z "$ax_l1_size" -o "ax_l1_size" = "0" ; then + fi + if test -z "$ax_l1_size" -o "ax_l1_size" = "0" ; then sysctl_out=`$sysctl_exe -n hw.l1cachesize 2>/dev/null`; if test ! -z "$sysctl_out"; then ax_l1_size=$(($sysctl_out / 1024)) From e04ab74d83fce8e65a44db7d512897b096a5c724 Mon Sep 17 00:00:00 2001 From: "Martin R. Albrecht" Date: Sun, 9 Nov 2025 20:38:26 +0000 Subject: [PATCH 4/4] next attempt at detecting L3 (with the help of Qwen3-Max, because I'm guessing here!) --- m4/ax_cache_size.m4 | 118 ++++++++++++++++++++++++-------------------- 1 file changed, 64 insertions(+), 54 deletions(-) diff --git a/m4/ax_cache_size.m4 b/m4/ax_cache_size.m4 index de08ee2..a11344a 100644 --- a/m4/ax_cache_size.m4 +++ b/m4/ax_cache_size.m4 @@ -41,8 +41,9 @@ AC_DEFUN([AX_CACHE_SIZE], ax_l1_size= ax_l2_size= + ax_l3_size= - #Check if the variable is present + # Linux sysfs path if test -e /sys/devices/system/cpu/cpu0/cache/index0/size; then for idx in `seq 0 3`; do if test -e /sys/devices/system/cpu/cpu0/cache/index$idx/size ; then @@ -57,69 +58,78 @@ AC_DEFUN([AX_CACHE_SIZE], ax_l3_size=$CPU0_L3_CACHE else - if test "x$ax_cv_cpu_vendor" != "xUnknown"; then - #Or use CPUID - AX_GCC_X86_CPUID(0x80000000) - cpu_exthigh=`echo $ax_cv_gcc_x86_cpuid_0x80000000 | cut -d ":" -f 1` - if test "x$cpu_exthi" \> "x80000004"; then - AX_GCC_X86_CPUID(0x80000005) # For L1 cache - l1_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000005 | cut -d ":" -f 4` - ax_l1_size=$((0x$l1_hexval >> 24)) - fi + # Try CPUID (mostly for Linux without sysfs or during cross-compilation) + if test "x$ax_cv_cpu_vendor" != "xIntel"; then + AX_GCC_X86_CPUID(0x80000000) + cpu_exthigh=`echo $ax_cv_gcc_x86_cpuid_0x80000000 | cut -d ":" -f 1` + if test "x$cpu_exthigh" \> "x80000004"; then + AX_GCC_X86_CPUID(0x80000005) + l1_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000005 | cut -d ":" -f 4` + ax_l1_size=$((0x$l1_hexval >> 24)) + fi + + if test "x$cpu_exthigh" \> "x80000005"; then + AX_GCC_X86_CPUID(0x80000006) + l2_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 3` + ax_l2_size=$((0x$l2_hexval >> 16)) - if test "x$cpu_exthi" \> "x80000005"; then - AX_GCC_X86_CPUID(0x80000006) # For L2 cache - l2_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 3` - ax_l2_size=$((0x$l2_hexval >> 16)) + # L3 info in EDX (bits 31:18) → 512-byte blocks (AMD only) + l3_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 4` + ax_l3_size=$(( (0x$l3_hexval >> 18) * 512 )) + fi fi - if test "x$cpu_exthi" \> "x80000005"; then - AX_GCC_X86_CPUID(0x80000006) # For L3 cache - l3_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 4` - ax_l3_size=$((0x$l3_hexval >> 18))*512 + # sysctl fallback + sysctl_exe= + if test -x /usr/sbin/sysctl; then + sysctl_exe=/usr/sbin/sysctl + elif test -x /sbin/sysctl; then + sysctl_exe=/sbin/sysctl fi - fi - - #Or use sysctl - sysctl_exe= - if test -x /usr/sbin/sysctl ; then - sysctl_exe=/usr/sbin/sysctl - elif test -x /sbin/sysctl ; then - sysctl_exe=/sbin/sysctl - fi - - if test -n "$sysctl_exe"; then - if test -z "$ax_l2_size" -o "$ax_l2_size" = "0"; then - sysctl_out=`$sysctl_exe -n hw.l2cachesize 2>/dev/null`; - if test ! -z "$sysctl_out"; then - ax_l2_size=$(($sysctl_out / 1024)) - fi; - fi - if test -z "$ax_l3_size" -o "$ax_l3_size" = "0"; then - sysctl_out=`$sysctl_exe -n hw.l3cachesize 2>/dev/null`; - if test ! -z "$sysctl_out"; then - ax_l3_size=$(($sysctl_out / 1024)) - fi; - fi - if test -z "$ax_l1_size" -o "$ax_l1_size" = "0" ; then - sysctl_out=`$sysctl_exe -n hw.l1dcachesize 2>/dev/null`; - if test ! -z "$sysctl_out"; then - ax_l1_size=$(($sysctl_out / 1024)) - fi; - fi - if test -z "$ax_l1_size" -o "ax_l1_size" = "0" ; then - sysctl_out=`$sysctl_exe -n hw.l1cachesize 2>/dev/null`; - if test ! -z "$sysctl_out"; then - ax_l1_size=$(($sysctl_out / 1024)) - fi; + if test -n "$sysctl_exe"; then + # L1 + if test -z "$ax_l1_size" -o "$ax_l1_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l1dcachesize 2>/dev/null` + if test -n "$sysctl_out"; then + ax_l1_size=$(($sysctl_out / 1024)) + fi + fi + if test -z "$ax_l1_size" -o "$ax_l1_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l1icachesize 2>/dev/null` + if test -n "$sysctl_out"; then + ax_l1_size=$(($sysctl_out / 1024)) + fi + fi + if test -z "$ax_l1_size" -o "$ax_l1_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l1cachesize 2>/dev/null` + if test -n "$sysctl_out"; then + ax_l1_size=$(($sysctl_out / 1024)) + fi + fi + + # L2 + if test -z "$ax_l2_size" -o "$ax_l2_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l2cachesize 2>/dev/null` + if test -n "$sysctl_out"; then + ax_l2_size=$(($sysctl_out / 1024)) + fi + fi + + # L3 + if test -z "$ax_l3_size" -o "$ax_l3_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l3cachesize 2>/dev/null` + if test -n "$sysctl_out"; then + ax_l3_size=$(($sysctl_out / 1024)) + fi + fi fi - fi fi + # Final fallbacks test -z "$ax_l1_size" && ax_l1_size=0 test -z "$ax_l2_size" && ax_l2_size=0 - test -z "$ax_l3_size" && ax_l3_size=$ax_l2_size + test -z "$ax_l3_size" && ax_l3_size=0 # Keep only digits if there is a unit (ie 1024K -> 1024) and convert in Bytes AC_MSG_CHECKING(the L1 cache size)