diff --git a/m4/ax_cache_size.m4 b/m4/ax_cache_size.m4 index 72754bb..a11344a 100644 --- a/m4/ax_cache_size.m4 +++ b/m4/ax_cache_size.m4 @@ -41,8 +41,9 @@ AC_DEFUN([AX_CACHE_SIZE], ax_l1_size= ax_l2_size= + ax_l3_size= - #Check if the variable is present + # Linux sysfs path if test -e /sys/devices/system/cpu/cpu0/cache/index0/size; then for idx in `seq 0 3`; do if test -e /sys/devices/system/cpu/cpu0/cache/index$idx/size ; then @@ -57,63 +58,78 @@ AC_DEFUN([AX_CACHE_SIZE], ax_l3_size=$CPU0_L3_CACHE else - if test "x$ax_cv_cpu_vendor" != "xUnknown"; then - #Or use CPUID - AX_GCC_X86_CPUID(0x80000000) - cpu_exthigh=`echo $ax_cv_gcc_x86_cpuid_0x80000000 | cut -d ":" -f 1` - if test "x$cpu_exthi" \> "x80000004"; then - AX_GCC_X86_CPUID(0x80000005) # For L1 cache - l1_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000005 | cut -d ":" -f 4` - ax_l1_size=$((0x$l1_hexval >> 24)) - fi + # Try CPUID (mostly for Linux without sysfs or during cross-compilation) + if test "x$ax_cv_cpu_vendor" != "xIntel"; then + AX_GCC_X86_CPUID(0x80000000) + cpu_exthigh=`echo $ax_cv_gcc_x86_cpuid_0x80000000 | cut -d ":" -f 1` + if test "x$cpu_exthigh" \> "x80000004"; then + AX_GCC_X86_CPUID(0x80000005) + l1_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000005 | cut -d ":" -f 4` + ax_l1_size=$((0x$l1_hexval >> 24)) + fi + + if test "x$cpu_exthigh" \> "x80000005"; then + AX_GCC_X86_CPUID(0x80000006) + l2_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 3` + ax_l2_size=$((0x$l2_hexval >> 16)) - if test "x$cpu_exthi" \> "x80000005"; then - AX_GCC_X86_CPUID(0x80000006) # For L2 cache - l2_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 3` - ax_l2_size=$((0x$l2_hexval >> 16)) + # L3 info in EDX (bits 31:18) → 512-byte blocks (AMD only) + l3_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 4` + ax_l3_size=$(( (0x$l3_hexval >> 18) * 512 )) + fi fi - if test "x$cpu_exthi" \> "x80000005"; then - AX_GCC_X86_CPUID(0x80000006) # For L3 cache - l2_hexval=`echo $ax_cv_gcc_x86_cpuid_0x80000006 | cut -d ":" -f 4` - ax_l2_size=$((0x$l2_hexval >> 18))*512 + # sysctl fallback + sysctl_exe= + if test -x /usr/sbin/sysctl; then + sysctl_exe=/usr/sbin/sysctl + elif test -x /sbin/sysctl; then + sysctl_exe=/sbin/sysctl fi - fi - - #Or use sysctl - sysctl_exe= - if test -x /usr/sbin/sysctl ; then - sysctl_exe=/usr/sbin/sysctl - elif test -x /sbin/sysctl ; then - sysctl_exe=/sbin/sysctl - fi - if test -n "$sysctl_exe"; then - if test -z "$ax_l2_size" -o "$ax_l2_size" = "0"; then - sysctl_out=`$sysctl_exe -n hw.l2cachesize 2>/dev/null`; - if test ! -z "$sysctl_out"; then - ax_l2_size=$(($sysctl_out / 1024)) - fi; - - fi - if test -z "$ax_l1_size" -o "$ax_l1_size" = "0" ; then - sysctl_out=`$sysctl_exe -n hw.l1dcachesize 2>/dev/null`; - if test ! -z "$sysctl_out"; then - ax_l1_size=$(($sysctl_out / 1024)) - fi; - fi - if test -z "$ax_l1_size" -o "ax_l1_size" = "0" ; then - sysctl_out=`$sysctl_exe -n hw.l1cachesize 2>/dev/null`; - if test ! -z "$sysctl_out"; then - ax_l1_size=$(($sysctl_out / 1024)) - fi; + if test -n "$sysctl_exe"; then + # L1 + if test -z "$ax_l1_size" -o "$ax_l1_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l1dcachesize 2>/dev/null` + if test -n "$sysctl_out"; then + ax_l1_size=$(($sysctl_out / 1024)) + fi + fi + if test -z "$ax_l1_size" -o "$ax_l1_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l1icachesize 2>/dev/null` + if test -n "$sysctl_out"; then + ax_l1_size=$(($sysctl_out / 1024)) + fi + fi + if test -z "$ax_l1_size" -o "$ax_l1_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l1cachesize 2>/dev/null` + if test -n "$sysctl_out"; then + ax_l1_size=$(($sysctl_out / 1024)) + fi + fi + + # L2 + if test -z "$ax_l2_size" -o "$ax_l2_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l2cachesize 2>/dev/null` + if test -n "$sysctl_out"; then + ax_l2_size=$(($sysctl_out / 1024)) + fi + fi + + # L3 + if test -z "$ax_l3_size" -o "$ax_l3_size" = "0"; then + sysctl_out=`$sysctl_exe -n hw.l3cachesize 2>/dev/null` + if test -n "$sysctl_out"; then + ax_l3_size=$(($sysctl_out / 1024)) + fi + fi fi - fi fi + # Final fallbacks test -z "$ax_l1_size" && ax_l1_size=0 test -z "$ax_l2_size" && ax_l2_size=0 - test -z "$ax_l3_size" && ax_l3_size=$ax_l2_size + test -z "$ax_l3_size" && ax_l3_size=0 # Keep only digits if there is a unit (ie 1024K -> 1024) and convert in Bytes AC_MSG_CHECKING(the L1 cache size) diff --git a/m4ri/misc.h b/m4ri/misc.h index 99fd6bb..64c63bd 100644 --- a/m4ri/misc.h +++ b/m4ri/misc.h @@ -138,7 +138,7 @@ typedef uint64_t word; * \brief The number of bits in a word. */ -static int const m4ri_radix = 64; +#define m4ri_radix 64 /** * \brief The number one as a word.