From ded08aa0076402151ccc1ae7ced674b7005ec092 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Sun, 30 Mar 2025 15:29:57 +0200 Subject: [PATCH 01/25] Add b_instrset_detect to namespace --- include/instrset_detect.cc | 7 +++++-- include/instrset_detect.hh | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/instrset_detect.cc b/include/instrset_detect.cc index a023773..49618c3 100644 --- a/include/instrset_detect.cc +++ b/include/instrset_detect.cc @@ -1,6 +1,9 @@ #include "instrset_detect.hh" +namespace bparser { + + int b_instrset_detect(void) { + return instrset_detect(); + } -int b_instrset_detect(void) { - return instrset_detect(); } \ No newline at end of file diff --git a/include/instrset_detect.hh b/include/instrset_detect.hh index 10430a6..b215c1e 100644 --- a/include/instrset_detect.hh +++ b/include/instrset_detect.hh @@ -15,7 +15,7 @@ #include "config.hh" #include "instrset.h" - -EXPORT int b_instrset_detect(void); - +namespace bparser { + EXPORT int b_instrset_detect(void); +} #endif \ No newline at end of file From d17d873ef977f4d5104f6f970f38fcec84d74e40 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Sun, 30 Mar 2025 20:21:19 +0200 Subject: [PATCH 02/25] arena_resource --- CMakeLists.txt | 5 +- include/arena_alloc.hh | 45 ++++++--- include/arena_resource.hh | 181 +++++++++++++++++++++++++++++++++++++ include/config.hh | 8 +- include/instrset_detect.hh | 10 +- 5 files changed, 227 insertions(+), 22 deletions(-) create mode 100644 include/arena_resource.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 627b7e3..5eb10f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,7 +160,7 @@ if (NOT Boost_FOUND) if (NOT EXTERNAL_PROJECT_DIR) unset(BOOST_ROOT) endif() - find_package( Boost 1.58.0 REQUIRED) + find_package( Boost 1.58.0 REQUIRED) #since Boost 1.70.0 it should be find_package( Boost CONFIG REQUIRED) endif() message(STATUS "-------------------------------------------------------") @@ -198,6 +198,7 @@ add_library(bparser SHARED ${CMAKE_CURRENT_SOURCE_DIR}/include/processor_AVX512.cc ${CMAKE_CURRENT_SOURCE_DIR}/include/processor_double.cc ) +set_target_properties(bparser PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -DBPARSER_DLL") @@ -259,6 +260,6 @@ endmacro() define_test(test_parser bparser) define_test(test_array) define_test(test_grammar bparser) -define_test(test_processor) +define_test(test_processor bparser) define_test(test_speed bparser) define_test(test_simd) diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh index 31196ab..bb2efc4 100644 --- a/include/arena_alloc.hh +++ b/include/arena_alloc.hh @@ -12,6 +12,7 @@ #include #include #include "aligned_alloc.hh" +#include "arena_resource.hh" namespace bparser { @@ -20,56 +21,72 @@ inline size_t align_size(size_t al, size_t size) { return (size + al -1) / al * al; } -struct ArenaAlloc { +struct ArenaAlloc : public PatchArena { + +#define SIZE (align_size(alignment,size)) + + ArenaAlloc(std::size_t alignment, std::size_t size) - : alignment_(alignment), - size_(0) + //: alignment_(alignment), + // size_(0) + : PatchArena(align_alloc(alignment, SIZE), SIZE, alignment) { - size_ = align_size(alignment_, size); + /*size_ = align_size(alignment_, size); base_ = (char*)align_alloc(alignment_, size_); BP_ASSERT(base_ != nullptr); ptr_ = base_; //std::cout << "arena begin: " << (void *)base_ << " end: " << end() << std::endl; + */ + size_ = buffer_size_; } +#undef SIZE ~ArenaAlloc() { destroy(); } void destroy() { - align_free(base_); + //align_free(base_); + align_free(PatchArena::buffer_); } - void *end() { + /*void* end() { return base_ + size_; - } + }*/ - void * allocate(std::size_t size) { + /*void* allocate(std::size_t size) { //defined in PatchArena, std::pmr::memory_resource + size = align_size(alignment_, size); void * ptr = ptr_; ptr_ += size; BP_ASSERT(ptr_ <= end()); //std::cout << "allocated: " << ptr << " end: " << (void *)ptr_ << " aend: " << end() << "\n"; return ptr; - } + + }*/ template - T * create(Args&&... args) { + T* create(Args&&... args) { + void * ptr = allocate(sizeof(T)); return new (ptr) T(std::forward(args)...); + } template - T * create_array(uint n_items) { + T* create_array(uint n_items) { + /* void * ptr = allocate(sizeof(T) * n_items); return new (ptr) T[n_items]; + */ + return PatchArena::allocate_simd(n_items); } - std::size_t alignment_; + //std::size_t alignment_; std::size_t size_; - char * base_; - char * ptr_; + //char * base_; + //char * ptr_; }; } // namespace bparser diff --git a/include/arena_resource.hh b/include/arena_resource.hh new file mode 100644 index 0000000..5fd054a --- /dev/null +++ b/include/arena_resource.hh @@ -0,0 +1,181 @@ +/*! + * + * Copyright (C) 2015 Technical University of Liberec. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 3 as published by the + * Free Software Foundation. (http://www.gnu.org/licenses/gpl-3.0.en.html) + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * + * + * @file arena_resource.hh + */ + +#ifndef ARENA_RESOURCE_HH_ +#define ARENA_RESOURCE_HH_ + +#include +#include +#include +#include +#include // !! Use Flow exception mechanism + +//#include "system/asserts.hh" +#include "assert.hh" + + +// Final proposal of Arena +// TODO shared_ptr out of class, pass pointer to data, describe how to use +template +class PatchArenaResource : public std::pmr::memory_resource { +protected: + /// Returns different upstream resource in debug / release mode + static inline std::pmr::memory_resource* upstream_resource() { +#ifdef DEBUG + return std::pmr::null_memory_resource(); +#else + return std::pmr::get_default_resource(); +#endif + } + +public: + //DECLARE_EXCEPTION( ExcArenaAllocation, + // << "Allocation of ArenaResource failed. Please check if correct type of upstream is used."); +#define EXC_ARENA_ALLOCATION "Allocation of ArenaResource failed. Please check if correct type of upstream is used." + + /// Same as previous but doesn't construct buffer implicitly. + PatchArenaResource(void *buffer, size_t buffer_size, size_t simd_alignment, std::pmr::memory_resource* upstream = PatchArenaResource::upstream_resource()) + : upstream_( upstream ), + buffer_(buffer), + buffer_size_(buffer_size), + resource_(buffer_, buffer_size, upstream_), + simd_alignment_(simd_alignment), + full_data_(false) + { + //ASSERT_PERMANENT_EQ( (buffer_size%simd_alignment), 0 ); + BP_ASSERT( (buffer_size % simd_alignment) == 0 ); + } + + + ~PatchArenaResource() = default; // virtual, call destructor buffer_ = default_resource, (resource_) + + /// Compute and print free space and used space of arena buffer. Development method + inline void print_space() { + void *p = this->raw_allocate(1, simd_alignment_); + size_t used_size = (char *)p - (char *)buffer_; + size_t free_space = buffer_size_ - used_size; + std::cout << "Allocated space of arena is " << used_size << " B, free space is " << free_space << " B." << std::endl; + } + + + /// Getter for resource + Resource &resource() { + return resource_; + } + + /// Allocate and return data pointer of n_item array of type T (alignment to length 8 bytes) + template + T* allocate_8(size_t n_items) { + size_t bytes = sizeof(T) * n_items; + return (T*)this->raw_allocate(bytes, 8); + } + + /// Allocate and return data pointer of n_item array of type T (alignment to length given by simd_alignment constructor argument) + template + T* allocate_simd(size_t n_items) { + size_t bytes = sizeof(T) * n_items; + return (T*)this->raw_allocate(bytes, simd_alignment_); + } + + // Reset allocated data + void reset() { + resource_.release(); + full_data_ = false; +#ifdef DEBUG + char *c_buffer = (char *)buffer_; + for (size_t i=0; ideallocate(p, bytes, alignment); + } + + /// Override do_is_equal for memory resource comparison + bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { + return this == &other; + } + + std::pmr::memory_resource* upstream_; ///< Pointer to upstream + void* buffer_; ///< Pointer to buffer + size_t buffer_size_; ///< Size of buffer + Resource resource_; ///< Resource of arena + size_t simd_alignment_; ///< Size of SIMD alignment + bool full_data_; ///< Flag signs full data (child arena is created) +}; + + +template +class AssemblyArenaResource : public PatchArenaResource { +public: + /// Constructor. Creates assembly arena + AssemblyArenaResource(size_t buffer_size, size_t simd_alignment, std::pmr::memory_resource* upstream = PatchArenaResource::upstream_resource()) + : PatchArenaResource( std::pmr::get_default_resource()->allocate(buffer_size, simd_alignment), buffer_size, simd_alignment, upstream ) {} + + virtual ~AssemblyArenaResource() { + this->do_deallocate(this->buffer_, this->buffer_size_, this->simd_alignment_); + } + + /** + * Create and return child arena. + * + * Child arena is created in free space of actual arena. + * Actual arena is marked as full (flag full_data_) and cannot allocate new data. + */ + PatchArenaResource *get_child_arena() { + void *p = this->raw_allocate(1, this->simd_alignment_); + size_t used_size = (char *)p - (char *)this->buffer_; + size_t free_space = this->buffer_size_ - used_size; + this->full_data_ = true; + return new PatchArenaResource(p, free_space, this->simd_alignment_); + } + + +}; + + + +using AssemblyArena = AssemblyArenaResource; +using PatchArena = PatchArenaResource; + + +#endif /* ARENA_RESOURCE_HH_ */ diff --git a/include/config.hh b/include/config.hh index d1db252..91cf3fe 100644 --- a/include/config.hh +++ b/include/config.hh @@ -33,9 +33,13 @@ typedef unsigned int uint; #endif #if defined(_WIN32) -# define EXPORT __declspec(dllexport) +# if defined(BPARSER_DLL) +# define EXPORT __declspec(dllexport) +# else +# define EXPORT __declspec(dllimport) +# endif #else -#define EXPORT +# define EXPORT #endif #if defined(_WIN32) diff --git a/include/instrset_detect.hh b/include/instrset_detect.hh index b215c1e..5cd0b54 100644 --- a/include/instrset_detect.hh +++ b/include/instrset_detect.hh @@ -10,12 +10,14 @@ * Wraps the third party library function for DLL export reasons. */ -#ifndef INCLUDE_INSTRSET_DETECT_HH -#define INCLUDE_INSTRSET_DETECT_HH +#ifndef INCLUDE_INSTRSET_DETECT_HH_ +#define INCLUDE_INSTRSET_DETECT_HH_ #include "config.hh" #include "instrset.h" -namespace bparser { +namespace bparser{ + EXPORT int b_instrset_detect(void); + } -#endif \ No newline at end of file +#endif //!INCLUDE_INSTRSET_DETECT_HH_ \ No newline at end of file From 21d8a3964f6af23af5bd6bcf162ac147655475b5 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Sun, 30 Mar 2025 20:30:24 +0200 Subject: [PATCH 03/25] note --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5eb10f7..00a9a14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,6 +260,6 @@ endmacro() define_test(test_parser bparser) define_test(test_array) define_test(test_grammar bparser) -define_test(test_processor bparser) +define_test(test_processor bparser) #is it broken? -LV define_test(test_speed bparser) define_test(test_simd) From 975d254f289934c5cac97a086294bdefb9c27879 Mon Sep 17 00:00:00 2001 From: Jan Brezina Date: Mon, 31 Mar 2025 17:20:07 +0200 Subject: [PATCH 04/25] Install Eigen in github action --- .github/workflows/ccpp.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index fcb5beb..b8ade54 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -265,6 +265,10 @@ jobs: # Bparser dependency sudo apt-get install -y libboost-all-dev + # install eigen + wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz &&\ + tar -xzf eigen-3.4.0.tar.gz &&\ + mv eigen-3.4.0 /usr/local From e9ee1f4c44f477baaece70563e19178d9ce10161 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Sun, 6 Apr 2025 15:30:09 +0200 Subject: [PATCH 05/25] ArenaAlloc is now a PatchArena wrapper --- include/arena_alloc.hh | 43 +++++++++++++++++++++++++------------ include/create_processor.hh | 2 +- include/grammar.impl.hh | 3 ++- include/parser.hh | 2 +- include/processor.hh | 14 +++++++++--- 5 files changed, 44 insertions(+), 20 deletions(-) diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh index bb2efc4..5084f16 100644 --- a/include/arena_alloc.hh +++ b/include/arena_alloc.hh @@ -21,49 +21,57 @@ inline size_t align_size(size_t al, size_t size) { return (size + al -1) / al * al; } -struct ArenaAlloc : public PatchArena { +struct ArenaAlloc { -#define SIZE (align_size(alignment,size)) - + //Creates a wrapper of PatchArena for backwards compatibility with BParser + ArenaAlloc(PatchArena& existing_arena) : arena(&existing_arena),buffer(nullptr) { + ; + } + //Creates a wrapper with a new PatchArena with the specified memory alignment and size + //However AssemblyArena might be the correct class to create ArenaAlloc(std::size_t alignment, std::size_t size) //: alignment_(alignment), // size_(0) - : PatchArena(align_alloc(alignment, SIZE), SIZE, alignment) + : size_(align_size(alignment, size)) //We cannot access this->arena->buffer_size_. However it *should* not change and is currently only used by one assert. Maybe create getter? -LV { + buffer = align_alloc(alignment, size_); + arena = new PatchArena(buffer, size_, alignment); /*size_ = align_size(alignment_, size); base_ = (char*)align_alloc(alignment_, size_); BP_ASSERT(base_ != nullptr); ptr_ = base_; //std::cout << "arena begin: " << (void *)base_ << " end: " << end() << std::endl; */ - size_ = buffer_size_; } -#undef SIZE ~ArenaAlloc() { destroy(); } - void destroy() { + inline void destroy() { //align_free(base_); - align_free(PatchArena::buffer_); + if (buffer != nullptr) { + align_free(buffer); + } } /*void* end() { return base_ + size_; }*/ - /*void* allocate(std::size_t size) { //defined in PatchArena, std::pmr::memory_resource - + inline void* allocate(std::size_t size) { + /* size = align_size(alignment_, size); void * ptr = ptr_; ptr_ += size; BP_ASSERT(ptr_ <= end()); //std::cout << "allocated: " << ptr << " end: " << (void *)ptr_ << " aend: " << end() << "\n"; return ptr; + */ + return arena->allocate(size); - }*/ + } template T* create(Args&&... args) { @@ -79,14 +87,21 @@ struct ArenaAlloc : public PatchArena { void * ptr = allocate(sizeof(T) * n_items); return new (ptr) T[n_items]; */ - return PatchArena::allocate_simd(n_items); + return arena->allocate_simd(n_items); } - + inline std::size_t get_size() { + return size_; //arena->buffer_size would be more appropriate + } + //std::size_t alignment_; - std::size_t size_; + //std::size_t size_; //char * base_; //char * ptr_; +protected: + PatchArena* arena; + void* buffer; + std::size_t size_; }; } // namespace bparser diff --git a/include/create_processor.hh b/include/create_processor.hh index a4f10e4..fecb051 100644 --- a/include/create_processor.hh +++ b/include/create_processor.hh @@ -25,7 +25,7 @@ namespace bparser{ } } - ProcessorBase * ProcessorBase::create_processor(ExpressionDAG &se, uint vector_size, uint simd_size, ArenaAllocPtr arena) { + ProcessorBase * ProcessorBase::create_processor(ExpressionDAG &se, uint vector_size, uint simd_size, PatchArenaPtr arena) { if (simd_size == 0) { simd_size = get_simd_size(); } diff --git a/include/grammar.impl.hh b/include/grammar.impl.hh index 037c8ff..a356ccd 100644 --- a/include/grammar.impl.hh +++ b/include/grammar.impl.hh @@ -17,7 +17,8 @@ #include #include -#include +//#include +#include //#define BOOST_SPIRIT_NO_PREDEFINED_TERMINALS diff --git a/include/parser.hh b/include/parser.hh index e233927..ca8bde1 100644 --- a/include/parser.hh +++ b/include/parser.hh @@ -169,7 +169,7 @@ public: /// /// All variable names have to be set before this call. /// TODO: set result variable - void compile(std::shared_ptr arena = nullptr) { + void compile(std::shared_ptr arena = nullptr) { destroy_processor(); ParserResult res_array = boost::apply_visitor(ast::make_array(symbols_), ast); diff --git a/include/processor.hh b/include/processor.hh index 6f9452e..b16e018 100644 --- a/include/processor.hh +++ b/include/processor.hh @@ -127,6 +127,8 @@ using namespace details; typedef std::shared_ptr ArenaAllocPtr; +typedef std::shared_ptr PatchArenaPtr; + #define CODE(OP_NAME) \ @@ -158,7 +160,7 @@ struct ProcessorBase { return arena_; } - inline static ProcessorBase *create_processor(ExpressionDAG &se, uint vec_n_blocks, uint simd_size = 0, ArenaAllocPtr arena = nullptr); + inline static ProcessorBase *create_processor(ExpressionDAG &se, uint vec_n_blocks, uint simd_size = 0, PatchArenaPtr arena = nullptr); ArenaAllocPtr arena_; }; @@ -477,7 +479,13 @@ struct Processor : public ProcessorBase { Operation * program_; std::vector< std::shared_ptr > val_copy_nodes_; }; - +template +ProcessorBase* create_processor_(ExpressionDAG& se, uint vector_size, uint simd_size, PatchArenaPtr arena) { + if (arena == nullptr) { + return create_processor_(se, vector_size, simd_size, (ArenaAllocPtr)std::shared_ptr(nullptr)); //will create new ArenaAlloc in the other method + } + return create_processor_(se, vector_size, simd_size, std::make_shared(*arena)); +} template ProcessorBase * create_processor_(ExpressionDAG &se, uint vector_size, uint simd_size, ArenaAllocPtr arena) @@ -503,7 +511,7 @@ ProcessorBase * create_processor_(ExpressionDAG &se, uint vector_size, uint sim if (arena == nullptr) arena = std::make_shared(simd_bytes, est); else - BP_ASSERT(arena->size_ >= est); + BP_ASSERT(arena->get_size() >= est); return arena->create>>(arena, se, vec_n_blocks); } From 82f21d694d9ae4833bfe8b085b86c18163be9613 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Sun, 6 Apr 2025 21:41:44 +0200 Subject: [PATCH 06/25] Eigen --- CMakeLists.txt | 18 ++++- include/arena_alloc.hh | 2 +- include/array.hh | 142 +++++++++++++++++++++++++++++++++++++- include/scalar_wrapper.hh | 65 +++++++++++++++++ 4 files changed, 223 insertions(+), 4 deletions(-) create mode 100644 include/scalar_wrapper.hh diff --git a/CMakeLists.txt b/CMakeLists.txt index 00a9a14..aa6181a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,7 +160,7 @@ if (NOT Boost_FOUND) if (NOT EXTERNAL_PROJECT_DIR) unset(BOOST_ROOT) endif() - find_package( Boost 1.58.0 REQUIRED) #since Boost 1.70.0 it should be find_package( Boost CONFIG REQUIRED) + find_package( Boost 1.70.0 REQUIRED) #since Boost 1.70.0 it should be find_package( Boost CONFIG REQUIRED) endif() message(STATUS "-------------------------------------------------------") @@ -169,11 +169,24 @@ message(STATUS "BOOST_ROOT = ${BOOST_ROOT}") message(STATUS "Boost_LIBRARIES = ${Boost_LIBRARIES}") message(STATUS "Boost_LIBRARY_DIRS = ${Boost_LIBRARY_DIRS}") message(STATUS "Boost_INCLUDE_DIR = ${Boost_INCLUDE_DIR}") +message(STATUS "=======================================================\n") + +#Eigen +message(STATUS "=======================================================") +message(STATUS "====== EIGEN ==========================================") +message(STATUS "=======================================================") + +find_package(Eigen3 CONFIG REQUIRED PATHS "/usr/local") #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL + +message(STATUS "-------------------------------------------------------") +message(STATUS "EIGEN_ROOT = ${EIGEN_ROOT}") +message(STATUS "Eigen3_DIR = ${Eigen3_DIR}") +message(STATUS "EIGEN3_INCLUDE_DIR = ${EIGEN3_INCLUDE_DIR}") message(STATUS "=======================================================\n\n") message(STATUS "VCL2_INCLUDE_DIR = ${CMAKE_CURRENT_SOURCE_DIR}/third_party/VCL_v2") -set(BPARSER_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/include ${Boost_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/third_party/VCL_v2) +set(BPARSER_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/include ${Boost_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/third_party/VCL_v2 ${EIGEN3_INCLUDE_DIR}) if(NOT PROJECT_IS_TOP_LEVEL) set(BPARSER_INCLUDES ${BPARSER_INCLUDES} PARENT_SCOPE) endif() @@ -198,6 +211,7 @@ add_library(bparser SHARED ${CMAKE_CURRENT_SOURCE_DIR}/include/processor_AVX512.cc ${CMAKE_CURRENT_SOURCE_DIR}/include/processor_double.cc ) +target_link_libraries(bparser Eigen3::Eigen) #Interface library, includes the header files set_target_properties(bparser PROPERTIES COMPILE_FLAGS "${CMAKE_CXX_FLAGS} -DBPARSER_DLL") diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh index 5084f16..0ec025e 100644 --- a/include/arena_alloc.hh +++ b/include/arena_alloc.hh @@ -90,7 +90,7 @@ struct ArenaAlloc { return arena->allocate_simd(n_items); } - inline std::size_t get_size() { + inline std::size_t get_size() const { return size_; //arena->buffer_size would be more appropriate } diff --git a/include/array.hh b/include/array.hh index 490118f..6ac2598 100644 --- a/include/array.hh +++ b/include/array.hh @@ -16,6 +16,7 @@ #include #include "config.hh" #include "scalar_node.hh" +#include "scalar_wrapper.hh" //#include "test_tools.hh" namespace bparser { @@ -859,6 +860,133 @@ public: return result; } + /** + * Numpy.matmul: + * + * a has shape (..., i,j, k,l) + * b has shape (..., i,j, l,m) + * result has shape (..., i,j, k,m) + */ + static Array mat_mult_old(const Array& a, const Array& b) { + //std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n"; + + if (a.shape().size() == 0) + Throw() << "Matmult can not multiply by scalar a." << "\n"; + if (b.shape().size() == 0) + Throw() << "Matmult can not multiply by scalar b." << "\n"; + + auto a_broadcast = MultiIdxRange(a.shape()).full(); + + if (a.shape().size() == 1) { + a_broadcast.insert_axis(0, 0, 1); + } + auto b_broadcast = MultiIdxRange(b.shape()).full(); + if (b.shape().size() == 1) { + b_broadcast.insert_axis(1, 1, 1); + } + Shape a_shape = a_broadcast.source_shape_; + //uint a_only_dim = *(a_shape.end() - 2); + uint a_common_dim = *(a_shape.end() - 1); + a_shape.insert(a_shape.end(), 1); + // a_shape : (...,i,j,k,l,1) + + Shape b_shape = b_broadcast.source_shape_; + //uint b_only_dim = *(b_shape.end() - 1); + uint b_common_dim = *(b_shape.end() - 2); + b_shape.insert(b_shape.end() - 2, 1); + // b_shape : (...,i,j,1,l,m) + if (a_common_dim != b_common_dim) + Throw() << "Matmult summing dimension mismatch: " << a_common_dim << " != " << b_common_dim << "\n"; + Shape common_shape = MultiIdxRange::broadcast_common_shape(a_shape, b_shape); + // common_shape : (...,i,j,k,l,m) + Shape result_shape(common_shape); + *(result_shape.end() - 2) = 1; + // result_shape : (...,i,j,k,1,m) + + MultiIdxRange a_range = MultiIdxRange(a_shape).full().broadcast(common_shape); + a_range.target_transpose(-2, -1); + MultiIdxRange b_range = MultiIdxRange(b_shape).full().broadcast(common_shape); + b_range.target_transpose(-2, -1); + MultiIdxRange result_range = MultiIdxRange(result_shape).full(); + result_range.target_transpose(-2, -1); + // transpose a_range and b_range in order to have common_dim the last one: + // a_shape : (....i,j, k,l,1) + // b_shape : (...,i,j,1,m,l) + *(b_range.target_transpose_.end() - 1) = b_range.target_transpose_.size() - 2; + *(b_range.target_transpose_.end() - 2) = b_range.target_transpose_.size() - 1; + + + MultiIdx a_idx(a_range); + MultiIdx b_idx(b_range); // allocated + MultiIdx result_idx(result_range); + /* + std::cout << "a_idx, shp: " << print_vector(a_idx.range_.full_shape_) << "\n"; + std::cout << "b_idx, shp: " << print_vector(b_idx.range_.full_shape_) << "\n"; + std::cout << "r_idx, shp: " << print_vector(result_idx.range_.full_shape_) << "\n"; + */ + + ScalarNodePtr sum; + Array result(result_shape); + for (; result_idx.valid();) { + sum = nullptr; + a_idx.reset_indices(result_idx); + b_idx.reset_indices(result_idx); + for (; a_idx.valid();) { + /* + std::cout << "a_idx: " << print_vector(a_idx.indices()) << " didx: " + << a_idx.src_idx() << "\n"; + std::cout << "b_idx: " << print_vector(b_idx.indices()) << " didx: " + << b_idx.src_idx() << "\n"; + */ + ScalarNodePtr mult = details::ScalarNode::create( + a.elements_[a_idx.idx_src()], + b.elements_[b_idx.idx_src()]); + if (sum == nullptr) { + sum = mult; + } + else { + // TODO: how to use inplace operations correctly ?? + sum = details::ScalarNode::create(sum, mult); + } + //std::cout << "aidx "; + a_idx.inc_trg(-1, 1, false); + //std::cout << "bidx "; + b_idx.inc_trg(-1, 1, false); + BP_ASSERT(a_idx.valid() == b_idx.valid()); + } + /* + std::cout << "r_idx: " << print_vector(result_idx.indices()) << " didx: " + << result_idx.src_idx() << "\n"; + */ + + result.elements_[result_idx.idx_src()] = sum; + + result_idx.inc_trg(-2); + + } + + + auto final_range = MultiIdxRange(result.shape()).full(); + + //std::cout << " raw res: "<< print_vector(result_shape); + if (b.shape().size() == 1 && *(result_shape.end() - 1) == 1) { + // cut -1 axis + //std::cout << " b cut: "<< result_shape.size()-1 << "\n"; + final_range.remove_target_axis(result_shape.size() - 1); + } + BP_ASSERT(*(result_shape.end() - 2) == 1); + //std::cout << " r cut: "<< result_shape.size()-2 << "\n"; + final_range.remove_target_axis(result_shape.size() - 2); + // cut -2 axis always + if (a.shape().size() == 1 && *(result_shape.end() - 3) == 1) { + // cut -3 axis + // std::cout << " a cut: "<< result_shape.size()-3 << "\n"; + final_range.remove_target_axis(result_shape.size() - 3); + } + // std::cout << " final res: " << print_vector(final_range.sub_shape()) << "\n"; + return Array(result, final_range); + + } /** * Numpy.matmul: @@ -869,6 +997,18 @@ public: */ static Array mat_mult(const Array &a, const Array &b) { //std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n"; + Eigen::Matrix2d m1; + m1(0, 0) = 1; + m1(0, 1) = 2; + m1(1, 0) = 3; + m1(1, 1) = 4; + + Eigen::Vector2d v1; + v1(0) = 5; + v1(1) = 6; + + std::cout << (m1 * v1) << std::endl; + std::cout << (v1.transpose() * m1) << std::endl; if (a.shape().size() == 0) Throw() << "Matmult can not multiply by scalar a." << "\n"; @@ -945,7 +1085,7 @@ public: sum = mult; } else { // TODO: how to use inplace operations correctly ?? - sum = details::ScalarNode::create(sum, mult); + sum = (details::ScalarWrapper(sum) + details::ScalarWrapper(mult)).get(); } //std::cout << "aidx "; a_idx.inc_trg(-1,1, false); diff --git a/include/scalar_wrapper.hh b/include/scalar_wrapper.hh new file mode 100644 index 0000000..ef5c655 --- /dev/null +++ b/include/scalar_wrapper.hh @@ -0,0 +1,65 @@ +/* + * scalar_wrapper.hh + * + * Created on: Apr 6, 2025 + * Author: LV + */ + +//https://eigen.tuxfamily.org/dox/TopicCustomizing_CustomScalar.html + +#ifndef INCLUDE_SCALAR_WRAPPER_HH_ +#define INCLUDE_SCALAR_WRAPPER_HH_ + +#include "scalar_node.hh" +#include + +namespace bparser { + namespace details { + struct ScalarWrapper { + + ScalarWrapper(ScalarNodePtr existing_ptr) : node(existing_ptr) { ; } + + + inline ScalarWrapper operator+(const ScalarWrapper& b) const { + return ScalarWrapper(ScalarNode::create<_add_>(node, b.node)); + } + + inline ScalarNodePtr operator*() const { + return get(); + } + + inline ScalarNodePtr get() const { + return node; + } + + + + protected: + ScalarNodePtr node; + }; + + + } +} +//https://eigen.tuxfamily.org/dox/structEigen_1_1NumTraits.html +namespace Eigen { + template<> struct NumTraits + : NumTraits + { + typedef bparser::details::ScalarWrapper Real; + typedef bparser::details::ScalarWrapper NonInteger; + typedef bparser::details::ScalarWrapper Nested; + + enum { + IsComplex = 0, + IsInteger = 0, + IsSigned = 1, + RequireInitialization = 0, + ReadCost = HugeCost, + AddCost = HugeCost, + MulCost = HugeCost + }; + }; +} + +#endif //!INCLUDE_SCALAR_WRAPPER_HH_ \ No newline at end of file From 11fbb54cb4464f34829586114c8874eba89d5347 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Sun, 13 Apr 2025 19:31:48 +0200 Subject: [PATCH 07/25] does not work, topic for consultation --- include/array.hh | 143 +++++++++++++++++--------------------- include/scalar_wrapper.hh | 83 ++++++++++++++++++++-- 2 files changed, 142 insertions(+), 84 deletions(-) diff --git a/include/array.hh b/include/array.hh index 6ac2598..f77bd7b 100644 --- a/include/array.hh +++ b/include/array.hh @@ -12,7 +12,6 @@ #include #include #include -#include #include #include "config.hh" #include "scalar_node.hh" @@ -860,6 +859,46 @@ public: return result; } + static Eigen::MatrixX wrap_array(const bparser::Array& a) { + BP_ASSERT(a.shape().size() <= 2); // "Eigen does not support more than 2 dimensions" + + using namespace details; + if (a.shape().size() == 0) { + return Eigen::MatrixX(); + } + if (a.shape().size() == 1) { + Eigen::VectorX v(a.shape()[0]); //Rows x cols, which is which + for (uint i = 0; i < a.shape()[0]; i++) { + v(i) = ScalarWrapper(a.elements()[i]); + } + return v; + } + else {// (a.shape().size() == 2) { + MultiIdx index(a.range()); + Eigen::MatrixX m(a.shape()[0], a.shape()[1]); + for (uint row = 0; row < a.shape()[0]; row++) { + for (uint col = 0; col < a.shape()[1]; col++) { + m(row, col) = ScalarWrapper(a[index]); + index.inc_src(); + } + } + return m; + } + } + + static bparser::Array unwrap_array(const Eigen::MatrixX& m) { + using namespace details; + Array a({ (uint)m.rows(), (uint)m.cols() }); + MultiIdx index(a.range()); + for (uint row = 0; row < a.shape()[0]; row++) { + for (uint col = 0; col < a.shape()[1]; col++) { + a.elements_[index.idx_src()] = m(row, col).get(); + index.inc_src(); + } + } + return a; + } + /** * Numpy.matmul: * @@ -997,18 +1036,23 @@ public: */ static Array mat_mult(const Array &a, const Array &b) { //std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n"; - Eigen::Matrix2d m1; - m1(0, 0) = 1; - m1(0, 1) = 2; - m1(1, 0) = 3; - m1(1, 1) = 4; - - Eigen::Vector2d v1; - v1(0) = 5; - v1(1) = 6; - - std::cout << (m1 * v1) << std::endl; - std::cout << (v1.transpose() * m1) << std::endl; + Eigen::Matrix m1; + m1(0, 0) = details::ScalarWrapper(details::ScalarNode::create_const(1)); + m1(0, 1) = details::ScalarWrapper(details::ScalarNode::create_const(2)); + m1(1, 0) = details::ScalarWrapper(details::ScalarNode::create_const(3)); + m1(1, 1) = details::ScalarWrapper(details::ScalarNode::create_const(4)); + + Eigen::Vector v1; + + v1(0) = details::ScalarWrapper(details::ScalarNode::create_const(5)); + v1(1) = details::ScalarWrapper(details::ScalarNode::create_const(6)); + //17,39 + auto r = (m1 * v1); + //std::cout << r << std::endl; + //std::cout << (v1.transpose() * m1) << std::endl; + std::cout << "Shape: ---------" << std::endl; + std::cout << print_shape(a.shape()) << std::endl; + std::cout << print_shape(b.shape()) << std::endl; if (a.shape().size() == 0) Throw() << "Matmult can not multiply by scalar a." << "\n"; @@ -1016,14 +1060,15 @@ public: Throw() << "Matmult can not multiply by scalar b." << "\n"; auto a_broadcast = MultiIdxRange(a.shape()).full(); - if (a.shape().size() == 1) { a_broadcast.insert_axis(0, 0, 1); } + auto b_broadcast = MultiIdxRange(b.shape()).full(); if (b.shape().size() == 1) { b_broadcast.insert_axis(1, 1, 1); } + Shape a_shape = a_broadcast.source_shape_; //uint a_only_dim = *(a_shape.end() - 2); uint a_common_dim = *(a_shape.end() - 1); @@ -1035,74 +1080,14 @@ public: uint b_common_dim = *(b_shape.end() - 2); b_shape.insert(b_shape.end() - 2, 1); // b_shape : (...,i,j,1,l,m) + if (a_common_dim != b_common_dim) Throw() << "Matmult summing dimension mismatch: " << a_common_dim << " != " << b_common_dim << "\n"; - Shape common_shape = MultiIdxRange::broadcast_common_shape(a_shape, b_shape); + //Shape common_shape = MultiIdxRange::broadcast_common_shape(a_shape, b_shape); // common_shape : (...,i,j,k,l,m) - Shape result_shape(common_shape); - *(result_shape.end() - 2) = 1; - // result_shape : (...,i,j,k,1,m) - - MultiIdxRange a_range = MultiIdxRange(a_shape).full().broadcast(common_shape); - a_range.target_transpose(-2, -1); - MultiIdxRange b_range = MultiIdxRange(b_shape).full().broadcast(common_shape); - b_range.target_transpose(-2, -1); - MultiIdxRange result_range = MultiIdxRange(result_shape).full(); - result_range.target_transpose(-2, -1); - // transpose a_range and b_range in order to have common_dim the last one: - // a_shape : (....i,j, k,l,1) - // b_shape : (...,i,j,1,m,l) - *(b_range.target_transpose_.end() - 1) = b_range.target_transpose_.size() - 2; - *(b_range.target_transpose_.end() - 2) = b_range.target_transpose_.size() - 1; - - - MultiIdx a_idx(a_range); - MultiIdx b_idx(b_range); // allocated - MultiIdx result_idx(result_range); -/* - std::cout << "a_idx, shp: " << print_vector(a_idx.range_.full_shape_) << "\n"; - std::cout << "b_idx, shp: " << print_vector(b_idx.range_.full_shape_) << "\n"; - std::cout << "r_idx, shp: " << print_vector(result_idx.range_.full_shape_) << "\n"; -*/ - - ScalarNodePtr sum; - Array result(result_shape); - for(;result_idx.valid();) { - sum = nullptr; - a_idx.reset_indices(result_idx); - b_idx.reset_indices(result_idx); - for(;a_idx.valid();) { -/* - std::cout << "a_idx: " << print_vector(a_idx.indices()) << " didx: " - << a_idx.src_idx() << "\n"; - std::cout << "b_idx: " << print_vector(b_idx.indices()) << " didx: " - << b_idx.src_idx() << "\n"; -*/ - ScalarNodePtr mult = details::ScalarNode::create( - a.elements_[a_idx.idx_src()], - b.elements_[b_idx.idx_src()]); - if (sum == nullptr) { - sum = mult; - } else { - // TODO: how to use inplace operations correctly ?? - sum = (details::ScalarWrapper(sum) + details::ScalarWrapper(mult)).get(); - } - //std::cout << "aidx "; - a_idx.inc_trg(-1,1, false); - //std::cout << "bidx "; - b_idx.inc_trg(-1,1, false); - BP_ASSERT(a_idx.valid() == b_idx.valid()); - } -/* - std::cout << "r_idx: " << print_vector(result_idx.indices()) << " didx: " - << result_idx.src_idx() << "\n"; -*/ - - result.elements_[result_idx.idx_src()] = sum; - - result_idx.inc_trg(-2); - - } + + Array result = unwrap_array(wrap_array(a) * wrap_array(b)); + Shape result_shape = result.shape(); auto final_range = MultiIdxRange(result.shape()).full(); diff --git a/include/scalar_wrapper.hh b/include/scalar_wrapper.hh index ef5c655..d4d997b 100644 --- a/include/scalar_wrapper.hh +++ b/include/scalar_wrapper.hh @@ -15,16 +15,46 @@ namespace bparser { namespace details { + // Eigen compatible wrapper for ScalarNode struct ScalarWrapper { + ScalarWrapper() : node(ScalarNode::create_zero()) { ; } + ScalarWrapper(int i) : node(ScalarNode::create_const(i)) { ; } + ScalarWrapper(double d) : node(ScalarNode::create_const(d)) { ; } ScalarWrapper(ScalarNodePtr existing_ptr) : node(existing_ptr) { ; } + inline ScalarWrapper operator-() const { + return un_op<_minus_>(*this); + } + + inline ScalarWrapper& operator+=(const ScalarWrapper& b) { + node = bin_op<_add_>(*this, b).get(); + return *this; + } inline ScalarWrapper operator+(const ScalarWrapper& b) const { - return ScalarWrapper(ScalarNode::create<_add_>(node, b.node)); + return bin_op<_add_>(*this, b); + } + + inline ScalarWrapper operator-(const ScalarWrapper& b) const { + return bin_op<_sub_>(*this, b); + } + + inline ScalarWrapper operator*(const ScalarWrapper& b) const { + return bin_op<_mul_>(*this, b); } - inline ScalarNodePtr operator*() const { + inline ScalarWrapper operator/(const ScalarWrapper& b) const { + return bin_op<_div_>(*this, b); + } + + inline bool operator==(const ScalarWrapper& b) const { + b.get(); + return false;// return bin_op<_eq_>(*this, b); //How?????????? + } + + + inline ScalarNodePtr operator*() const { //dereference return get(); } @@ -32,15 +62,58 @@ namespace bparser { return node; } + template + static ScalarWrapper bin_op(const ScalarWrapper& a, const ScalarWrapper& b) { + return ScalarWrapper(ScalarNode::create(a.get(), b.get())); + } + + template + static ScalarWrapper un_op(const ScalarWrapper& a) { + return ScalarWrapper(ScalarNode::create(a.get())); + } protected: ScalarNodePtr node; + + + }; //ScalarWrapper + + //inline std::ostream& operator<<(std::ostream& out, const ScalarWrapper& s) { + // + //} + +#define UN_OP(OP) \ + inline ScalarWrapper OP(const ScalarWrapper& s) { \ + return ScalarWrapper::un_op<_##OP##_>(s); \ + } \ + using std::OP; + + inline ScalarWrapper abs(const ScalarWrapper& s) { + return ScalarWrapper::un_op<_abs_>(s); + } + using std::abs; + + //https://eigen.tuxfamily.org/dox/namespaceEigen.html#a54cc34b64b4935307efc06d56cd531df + inline ScalarWrapper abs2(const ScalarWrapper& s) { + return s*s; }; + inline ScalarWrapper sqrt(const ScalarWrapper& s) { + return ScalarWrapper::un_op<_sqrt_>(s); + } + using std::sqrt; + + UN_OP(log) + UN_OP(log10) + + + + + + } //details +} //bparser - } -} //https://eigen.tuxfamily.org/dox/structEigen_1_1NumTraits.html namespace Eigen { template<> struct NumTraits @@ -54,7 +127,7 @@ namespace Eigen { IsComplex = 0, IsInteger = 0, IsSigned = 1, - RequireInitialization = 0, + RequireInitialization = 1, ReadCost = HugeCost, AddCost = HugeCost, MulCost = HugeCost From 8bf99072152b9595c7989e4ce49073decaa876ca Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Sun, 13 Apr 2025 20:24:24 +0200 Subject: [PATCH 08/25] Found the issue, but not fixed yet --- include/array.hh | 9 +++++---- test/test_parser.cc | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/array.hh b/include/array.hh index f77bd7b..5f68d64 100644 --- a/include/array.hh +++ b/include/array.hh @@ -867,7 +867,7 @@ public: return Eigen::MatrixX(); } if (a.shape().size() == 1) { - Eigen::VectorX v(a.shape()[0]); //Rows x cols, which is which + Eigen::VectorX v(a.shape()[0]); //mat_mult_old is altering the vector size. Undocumented convenience? for (uint i = 0; i < a.shape()[0]; i++) { v(i) = ScalarWrapper(a.elements()[i]); } @@ -1087,10 +1087,10 @@ public: // common_shape : (...,i,j,k,l,m) Array result = unwrap_array(wrap_array(a) * wrap_array(b)); - Shape result_shape = result.shape(); + //Shape result_shape = result.shape(); - - auto final_range = MultiIdxRange(result.shape()).full(); + return result; + /*auto final_range = MultiIdxRange(result.shape()).full(); //std::cout << " raw res: "<< print_vector(result_shape); if (b.shape().size() == 1 && *(result_shape.end() - 1) == 1 ) { @@ -1109,6 +1109,7 @@ public: } // std::cout << " final res: " << print_vector(final_range.sub_shape()) << "\n"; return Array(result, final_range); + */ } diff --git a/test/test_parser.cc b/test/test_parser.cc index 8f905df..efb16e7 100644 --- a/test/test_parser.cc +++ b/test/test_parser.cc @@ -227,6 +227,7 @@ void test_expression() { BP_ASSERT(test_expr("25 % cs3", {1})); BP_ASSERT(test_expr("25 % cv4", {1, 0, 1})); + BP_ASSERT(test_expr("[[1,2],[3,4]] @ [5,6]", { 17,39 }, { 2,1 })); //Eigen test BP_ASSERT(test_expr("[3, 4] @ [[1], [2]]", {11}, {1})); BP_ASSERT(test_expr("[3, 4, 1] @ [[1], [2], [3]]", {14}, {1})); ASSERT_THROW(test_expr("[[1], [2], [3]] @ [3, 4, 1]", {14}, {1}), "Matmult summing dimension mismatch"); From 8ccd10e11c01074bda828b4ab4f0458a989999b2 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Sat, 26 Apr 2025 18:55:08 +0200 Subject: [PATCH 09/25] Added getter --- include/arena_alloc.hh | 5 ++--- include/arena_resource.hh | 3 +++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh index 0ec025e..77b1824 100644 --- a/include/arena_alloc.hh +++ b/include/arena_alloc.hh @@ -33,8 +33,8 @@ struct ArenaAlloc { ArenaAlloc(std::size_t alignment, std::size_t size) //: alignment_(alignment), // size_(0) - : size_(align_size(alignment, size)) //We cannot access this->arena->buffer_size_. However it *should* not change and is currently only used by one assert. Maybe create getter? -LV { + size_t size_ = align_size(alignment, size); buffer = align_alloc(alignment, size_); arena = new PatchArena(buffer, size_, alignment); /*size_ = align_size(alignment_, size); @@ -91,7 +91,7 @@ struct ArenaAlloc { } inline std::size_t get_size() const { - return size_; //arena->buffer_size would be more appropriate + return arena->get_size(); } //std::size_t alignment_; @@ -101,7 +101,6 @@ struct ArenaAlloc { protected: PatchArena* arena; void* buffer; - std::size_t size_; }; } // namespace bparser diff --git a/include/arena_resource.hh b/include/arena_resource.hh index 5fd054a..6baa1cf 100644 --- a/include/arena_resource.hh +++ b/include/arena_resource.hh @@ -101,6 +101,9 @@ public: #endif } + inline size_t get_size() { + return buffer_size_; + } protected: void* raw_allocate(size_t bytes, size_t alignment) { //ASSERT(!full_data_).error("Allocation of new data is not possible because child arena was created."); From c48a10f8c80bc40ad21868856efa93cf6ec9c4eb Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Sat, 26 Apr 2025 18:55:52 +0200 Subject: [PATCH 10/25] Expanded wrap_array wiht MultiIdx. Added Numpy diag func --- include/array.hh | 147 +++++++++++++++++++++----------------- include/grammar.impl.hh | 1 + include/scalar_wrapper.hh | 37 ++++++---- test/test_parser.cc | 6 +- 4 files changed, 112 insertions(+), 79 deletions(-) diff --git a/include/array.hh b/include/array.hh index 5f68d64..4a56471 100644 --- a/include/array.hh +++ b/include/array.hh @@ -859,44 +859,82 @@ public: return result; } + + + //Wraps the ScalarNodes of an Array into an Eigen Matrix of ScalarWrappers. + //Vectors will be column vectors. Eigen does not support vectors without orientation. + //Cannot wrap scalars. To wrap scalars, use the bparser::details::ScalarWrapper constructor static Eigen::MatrixX wrap_array(const bparser::Array& a) { - BP_ASSERT(a.shape().size() <= 2); // "Eigen does not support more than 2 dimensions" + MultiIdx idx(a.range()); + return wrap_array(a, idx); + } + + //Wraps the ScalarNodes of an Array accessed via MultiIdx.idx_trg() created from supplied MultiIdxRange into an Eigen Matrix of ScalarWrapper + //Vectors will be column vectors. Eigen does not support vectors without orientation. + //Cannot wrap scalars. To wrap scalars, use the bparser::details::ScalarWrapper constructor + static Eigen::MatrixX wrap_array(const bparser::Array& a, MultiIdxRange& range) { + MultiIdx idx (range); + return wrap_array(a, idx); + } + + //Wraps the ScalarNodes of an Array accessed via MultiIdx.idx_trg() into an Eigen Matrix of ScalarWrapper + //Vectors will be column vectors. Eigen does not support vectors without orientation. + //Cannot wrap scalars. To wrap scalars, use the bparser::details::ScalarWrapper constructor + static Eigen::MatrixX wrap_array(const bparser::Array& a, MultiIdx& index) { using namespace details; - if (a.shape().size() == 0) { - return Eigen::MatrixX(); + Shape trg_shape = index.range_.target_shape(); + if (trg_shape.size() == 0) { + Throw() << "Attempted to wrap scalar into Eigen Matrix"; } - if (a.shape().size() == 1) { - Eigen::VectorX v(a.shape()[0]); //mat_mult_old is altering the vector size. Undocumented convenience? - for (uint i = 0; i < a.shape()[0]; i++) { - v(i) = ScalarWrapper(a.elements()[i]); + if (trg_shape.size() == 1) { + uint len = trg_shape[0]; + Eigen::VectorX v(len); + for (uint i = 0; i < len && index.valid(); i++, index.inc_trg()) { + v(i) = ScalarWrapper(a.elements_[index.idx_trg()]); } return v; } - else {// (a.shape().size() == 2) { - MultiIdx index(a.range()); - Eigen::MatrixX m(a.shape()[0], a.shape()[1]); - for (uint row = 0; row < a.shape()[0]; row++) { - for (uint col = 0; col < a.shape()[1]; col++) { - m(row, col) = ScalarWrapper(a[index]); - index.inc_src(); + else {// (a.shape().size() > 2) { + + + + uint rows = *(trg_shape.end() - 2); + uint cols = *(trg_shape.end() - 1); + + Eigen::MatrixX m(rows, cols); + for (uint row = 0; row < rows; row++) { + for (uint col = 0; col < cols && index.valid(); col++, index.inc_trg()) { + m(row, col) = ScalarWrapper(a.elements_[index.idx_trg()]); } } return m; } } - static bparser::Array unwrap_array(const Eigen::MatrixX& m) { + //Creates an Array of ScalarNodes from an Eigen Matrix of ScalarWrappers + // make_vector - Will reduce the Array shape if Matrix is actually a Vector. Shape:(x,1) -> (x); (1,y) -> (y) + static bparser::Array unwrap_array(const Eigen::MatrixX& m, const bool make_vector = false) { using namespace details; - Array a({ (uint)m.rows(), (uint)m.cols() }); - MultiIdx index(a.range()); - for (uint row = 0; row < a.shape()[0]; row++) { - for (uint col = 0; col < a.shape()[1]; col++) { - a.elements_[index.idx_src()] = m(row, col).get(); - index.inc_src(); + + if (make_vector && (m.rows() == 1 || m.cols() == 1)) { + Array a({ (uint)std::max(m.rows(),m.cols()) }); + MultiIdx index(a.range()); + for (uint i = 0; i < a.shape()[0]; i++, index.inc_src()) { + a.elements_[index.idx_src()] = m(i).get(); } + return a; + } + else { + Array a({ (uint)m.rows(), (uint)m.cols() }); + MultiIdx index(a.range()); + for (uint row = 0; row < a.shape()[0]; row++) { + for (uint col = 0; col < a.shape()[1]; col++, index.inc_src()) { + a.elements_[index.idx_src()] = m(row, col).get(); + } + } + return a; } - return a; } /** @@ -1036,60 +1074,29 @@ public: */ static Array mat_mult(const Array &a, const Array &b) { //std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n"; - Eigen::Matrix m1; - m1(0, 0) = details::ScalarWrapper(details::ScalarNode::create_const(1)); - m1(0, 1) = details::ScalarWrapper(details::ScalarNode::create_const(2)); - m1(1, 0) = details::ScalarWrapper(details::ScalarNode::create_const(3)); - m1(1, 1) = details::ScalarWrapper(details::ScalarNode::create_const(4)); - - Eigen::Vector v1; - - v1(0) = details::ScalarWrapper(details::ScalarNode::create_const(5)); - v1(1) = details::ScalarWrapper(details::ScalarNode::create_const(6)); - //17,39 - auto r = (m1 * v1); - //std::cout << r << std::endl; - //std::cout << (v1.transpose() * m1) << std::endl; - std::cout << "Shape: ---------" << std::endl; - std::cout << print_shape(a.shape()) << std::endl; - std::cout << print_shape(b.shape()) << std::endl; + + //std::cout << "Shape: ---------" << std::endl; + //std::cout << print_shape(a.shape()) << std::endl; + //std::cout << print_shape(b.shape()) << std::endl; if (a.shape().size() == 0) Throw() << "Matmult can not multiply by scalar a." << "\n"; if (b.shape().size() == 0) Throw() << "Matmult can not multiply by scalar b." << "\n"; - auto a_broadcast = MultiIdxRange(a.shape()).full(); - if (a.shape().size() == 1) { - a_broadcast.insert_axis(0, 0, 1); - } + auto m_a = wrap_array(a); + auto m_b = wrap_array(b); - auto b_broadcast = MultiIdxRange(b.shape()).full(); - if (b.shape().size() == 1) { - b_broadcast.insert_axis(1, 1, 1); + if (a.shape().size() == 1) { //is vector + m_a = m_a.transpose(); //colvec -> rowvec } - Shape a_shape = a_broadcast.source_shape_; - //uint a_only_dim = *(a_shape.end() - 2); - uint a_common_dim = *(a_shape.end() - 1); - a_shape.insert(a_shape.end(), 1); - // a_shape : (...,i,j,k,l,1) - - Shape b_shape = b_broadcast.source_shape_; - //uint b_only_dim = *(b_shape.end() - 1); - uint b_common_dim = *(b_shape.end() - 2); - b_shape.insert(b_shape.end() - 2, 1); - // b_shape : (...,i,j,1,l,m) + if (m_a.cols() != m_b.rows()) + Throw() << "Matmult summing dimension mismatch: " << m_a.cols() << " != " << m_b.rows() << "\n"; - if (a_common_dim != b_common_dim) - Throw() << "Matmult summing dimension mismatch: " << a_common_dim << " != " << b_common_dim << "\n"; - //Shape common_shape = MultiIdxRange::broadcast_common_shape(a_shape, b_shape); - // common_shape : (...,i,j,k,l,m) - - Array result = unwrap_array(wrap_array(a) * wrap_array(b)); + return unwrap_array(m_a * m_b, (a.shape().size() == 1 || b.shape().size() == 1)); //Shape result_shape = result.shape(); - return result; /*auto final_range = MultiIdxRange(result.shape()).full(); //std::cout << " raw res: "<< print_vector(result_shape); @@ -1113,6 +1120,18 @@ public: } + static Array diag(const Array& a) { + if (a.shape().size() == 0) + return a; + + if (a.shape().size() == 1) { // diag -> matrix + return unwrap_array(wrap_array(a).asDiagonal()); + } + // matrix -> diag + return unwrap_array(wrap_array(a).diagonal(),true); + + } + static Array flatten(const Array &tensor) { uint n_elements = shape_size(tensor.shape()); Shape res_shape(1, n_elements); diff --git a/include/grammar.impl.hh b/include/grammar.impl.hh index a356ccd..e4ec1c7 100644 --- a/include/grammar.impl.hh +++ b/include/grammar.impl.hh @@ -179,6 +179,7 @@ struct grammar : qi::grammar { FN("power" , binary_array<_pow_>()) FN("minimum", binary_array<_min_>()) FN("maximum", binary_array<_max_>()) + FN("diag" , &Array::diag) ; unary_op.add diff --git a/include/scalar_wrapper.hh b/include/scalar_wrapper.hh index d4d997b..54dda38 100644 --- a/include/scalar_wrapper.hh +++ b/include/scalar_wrapper.hh @@ -49,8 +49,10 @@ namespace bparser { } inline bool operator==(const ScalarWrapper& b) const { - b.get(); - return false;// return bin_op<_eq_>(*this, b); //How?????????? + if (((***this).result_storage == constant && (**b).result_storage == constant ) || + ((***this).result_storage == constant_bool && (**b).result_storage == constant_bool) ) + return *(***this).values_ == *(**b).values_; + return false; } @@ -89,23 +91,30 @@ namespace bparser { } \ using std::OP; - inline ScalarWrapper abs(const ScalarWrapper& s) { - return ScalarWrapper::un_op<_abs_>(s); - } - using std::abs; + /* + UN_OP(abs) //https://eigen.tuxfamily.org/dox/namespaceEigen.html#a54cc34b64b4935307efc06d56cd531df inline ScalarWrapper abs2(const ScalarWrapper& s) { return s*s; }; - - inline ScalarWrapper sqrt(const ScalarWrapper& s) { - return ScalarWrapper::un_op<_sqrt_>(s); - } - using std::sqrt; - - UN_OP(log) - UN_OP(log10) + */ + + //UN_OP(sqrt) + //UN_OP(exp) + //UN_OP(log) + //UN_OP(log10) + //UN_OP(sin) + //UN_OP(sinh) + //UN_OP(asin) + //UN_OP(cos) + //UN_OP(cosh) + //UN_OP(acos) + //UN_OP(tan) + //UN_OP(tanh) + //UN_OP(atan) + //UN_OP(ceil) + //UN_OP(floor) diff --git a/test/test_parser.cc b/test/test_parser.cc index efb16e7..ab1fdd0 100644 --- a/test/test_parser.cc +++ b/test/test_parser.cc @@ -227,7 +227,7 @@ void test_expression() { BP_ASSERT(test_expr("25 % cs3", {1})); BP_ASSERT(test_expr("25 % cv4", {1, 0, 1})); - BP_ASSERT(test_expr("[[1,2],[3,4]] @ [5,6]", { 17,39 }, { 2,1 })); //Eigen test + BP_ASSERT(test_expr("[[1,2],[3,4]] @ [5,6]", { 17,39 }, { 2 })); BP_ASSERT(test_expr("[3, 4] @ [[1], [2]]", {11}, {1})); BP_ASSERT(test_expr("[3, 4, 1] @ [[1], [2], [3]]", {14}, {1})); ASSERT_THROW(test_expr("[[1], [2], [3]] @ [3, 4, 1]", {14}, {1}), "Matmult summing dimension mismatch"); @@ -237,6 +237,10 @@ void test_expression() { BP_ASSERT(test_expr("[[1],[2],[3]] @ [[1,2,3]]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3})); BP_ASSERT(test_expr("a=[1,2,3]; a[:, None] @ a[None,:]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3})); + BP_ASSERT(test_expr("diag([1,2,3])", {1, 0, 0, 0, 2, 0, 0, 0, 3}, {3,3})); + BP_ASSERT(test_expr("diag([[1,5],[9,2]])", { 1, 2 }, {2})); + BP_ASSERT(test_expr("diag(diag([1,2,3]))", { 1, 2, 3 }, {3})); + BP_ASSERT(test_expr("abs(-1)+abs(0)+abs(1)", {2})); BP_ASSERT(test_expr("floor(-3.5)", {-4}, {})); From d248826e8952c4fcc297e7255ba83558d2db6902 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Sat, 26 Apr 2025 19:40:39 +0200 Subject: [PATCH 11/25] workflow change --- .github/workflows/ccpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index b8ade54..56deb1e 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -268,7 +268,7 @@ jobs: # install eigen wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz &&\ tar -xzf eigen-3.4.0.tar.gz &&\ - mv eigen-3.4.0 /usr/local + sudo mv eigen-3.4.0 /usr/local From 783cbdcd7901ec7213de87bee05415d88040cf01 Mon Sep 17 00:00:00 2001 From: Jan Brezina Date: Sun, 4 May 2025 20:14:00 +0200 Subject: [PATCH 12/25] Update eigen install in GH action --- .github/workflows/ccpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index b8ade54..56deb1e 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -268,7 +268,7 @@ jobs: # install eigen wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz &&\ tar -xzf eigen-3.4.0.tar.gz &&\ - mv eigen-3.4.0 /usr/local + sudo mv eigen-3.4.0 /usr/local From 740f6910184070753f5b26b64bc2788a35e151fb Mon Sep 17 00:00:00 2001 From: Jan Brezina Date: Sun, 4 May 2025 21:34:35 +0200 Subject: [PATCH 13/25] Add more complex matrix multiplication tests. --- test/test_parser.cc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/test_parser.cc b/test/test_parser.cc index 8f905df..eaa09a4 100644 --- a/test/test_parser.cc +++ b/test/test_parser.cc @@ -234,8 +234,26 @@ void test_expression() { BP_ASSERT(test_expr("[[1, 2], [2, 3], [3, 4]] @ [[1], [2]]", {5, 8, 11}, {3,1})); BP_ASSERT(test_expr("[[1, 2], [2, 3], [3, 4]] @ [1, 2]", {5, 8, 11}, {3})); BP_ASSERT(test_expr("[[1],[2],[3]] @ [[1,2,3]]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3})); + BP_ASSERT(test_expr("[[1],[2],[3]] @ [[1,2,3]]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3})); BP_ASSERT(test_expr("a=[1,2,3]; a[:, None] @ a[None,:]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3})); + // 2×2 @ 2×2 → 2×2 + BP_ASSERT(test_expr( + "[[1, 2], [3, 4]] @ [[5, 6], [7, 8]]", + {19, 22, 43, 50}, // 1*5+2*7, 1*6+2*8, 3*5+4*7, 3*6+4*8 + {2, 2} + )); + + // 3×1×2 @ 2×3 → 3×1×3 (batched matmul) + BP_ASSERT(test_expr( + "[[[1,2]], [[3,4]], [[5,6]]] @ [[7,8,9], [10,11,12]]", + { + 27, 30, 33, // batch 0: [1,2]×[[7,8,9],[10,11,12]] + 61, 68, 75, // batch 1: [3,4]×... + 95,106,117 // batch 2: [5,6]×... + }, + {3, 1, 3} + )); BP_ASSERT(test_expr("abs(-1)+abs(0)+abs(1)", {2})); BP_ASSERT(test_expr("floor(-3.5)", {-4}, {})); From 7e162d6e4c670a0b6f42e1e678cd356f2cefcec1 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Mon, 5 May 2025 23:37:46 +0200 Subject: [PATCH 14/25] const keyword --- include/arena_resource.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/arena_resource.hh b/include/arena_resource.hh index 6baa1cf..a01c847 100644 --- a/include/arena_resource.hh +++ b/include/arena_resource.hh @@ -101,7 +101,7 @@ public: #endif } - inline size_t get_size() { + inline size_t get_size() const{ return buffer_size_; } protected: From a384ac8402eedbcf71f53a98f4dfc46efb82be0a Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Mon, 5 May 2025 23:38:08 +0200 Subject: [PATCH 15/25] reworked mat_mult, doesn't work for tensors... --- include/array.hh | 62 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/include/array.hh b/include/array.hh index 4a56471..6b8d877 100644 --- a/include/array.hh +++ b/include/array.hh @@ -884,6 +884,7 @@ public: using namespace details; Shape trg_shape = index.range_.target_shape(); + std::cout << "Wrapping: " << print_shape(trg_shape) << std::endl; if (trg_shape.size() == 0) { Throw() << "Attempted to wrap scalar into Eigen Matrix"; } @@ -896,9 +897,6 @@ public: return v; } else {// (a.shape().size() > 2) { - - - uint rows = *(trg_shape.end() - 2); uint cols = *(trg_shape.end() - 1); @@ -1075,15 +1073,65 @@ public: static Array mat_mult(const Array &a, const Array &b) { //std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n"; - //std::cout << "Shape: ---------" << std::endl; - //std::cout << print_shape(a.shape()) << std::endl; - //std::cout << print_shape(b.shape()) << std::endl; + std::cout << "Shape: ---------" << std::endl; + std::cout << print_shape(a.shape()) << std::endl; + std::cout << print_shape(b.shape()) << std::endl; if (a.shape().size() == 0) Throw() << "Matmult can not multiply by scalar a." << "\n"; if (b.shape().size() == 0) Throw() << "Matmult can not multiply by scalar b." << "\n"; + Shape a_shape = a.shape(); + if (a_shape.size() == 1) { + a_shape.insert(a_shape.begin(), 1); + } + + + Shape b_shape = b.shape(); + if (b_shape.size() == 1) { + b_shape.push_back(1); + } + + + uint a_cols = *(a_shape.end() - 1), b_rows = *(b_shape.end() - 2); + + if (a_cols != b_rows) { + Throw() << "Matmult summing dimension mismatch: " << a_cols << " != " << b_rows << "\n"; + } + + a_shape.insert(a_shape.end(), 1); + // a_shape : (...,i,j,k,l,1) + b_shape.insert(b_shape.end() - 2, 1); + // b_shape : (...,i,j,1,l,m) + + + Shape result_shape(MultiIdxRange::broadcast_common_shape(a_shape, b_shape));// shape (..., i,j,k,l,m) + result_shape.erase(result_shape.end() - 2); + + Array result(result_shape); + //std::cout << print_shape(result_shape) << std::endl; + + bool should_transpose = a.shape().size() == 1; + + for (MultiIdx + result_idx(result.range()), + a_idx(a.range()), + b_idx(b.range()); result_idx.valid();) { + + Eigen::MatrixX m_a = wrap_array(a, a_idx); + Eigen::MatrixX m_b = wrap_array(b, b_idx); + if (should_transpose) m_a = m_a.transpose(); + + Array matmult = unwrap_array(m_a * m_b); + + for (MultiIdx mult_idx(matmult.range()); mult_idx.valid(); mult_idx.inc_src(), result_idx.inc_src()) { + result.elements_[result_idx.idx_src()] = matmult[mult_idx]; //TODO + //std::cout << result_idx.idx_src() << std::endl; + } + } + return result; + /* auto m_a = wrap_array(a); auto m_b = wrap_array(b); @@ -1094,7 +1142,7 @@ public: if (m_a.cols() != m_b.rows()) Throw() << "Matmult summing dimension mismatch: " << m_a.cols() << " != " << m_b.rows() << "\n"; - return unwrap_array(m_a * m_b, (a.shape().size() == 1 || b.shape().size() == 1)); + return unwrap_array(m_a * m_b, (a.shape().size() == 1 || b.shape().size() == 1));*/ //Shape result_shape = result.shape(); /*auto final_range = MultiIdxRange(result.shape()).full(); From f096b04f702f6b99476ed5f1aa9eb6baed26c6f5 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Tue, 6 May 2025 20:27:15 +0200 Subject: [PATCH 16/25] Matmult for tensors+ Moved diag tests below matmult tests --- include/array.hh | 54 ++++++++++++++++++++++++++++++++------------- test/test_parser.cc | 12 +++++----- 2 files changed, 45 insertions(+), 21 deletions(-) diff --git a/include/array.hh b/include/array.hh index 6b8d877..ce120f6 100644 --- a/include/array.hh +++ b/include/array.hh @@ -892,7 +892,7 @@ public: uint len = trg_shape[0]; Eigen::VectorX v(len); for (uint i = 0; i < len && index.valid(); i++, index.inc_trg()) { - v(i) = ScalarWrapper(a.elements_[index.idx_trg()]); + v(i) = ScalarWrapper(a[index]); } return v; } @@ -903,7 +903,7 @@ public: Eigen::MatrixX m(rows, cols); for (uint row = 0; row < rows; row++) { for (uint col = 0; col < cols && index.valid(); col++, index.inc_trg()) { - m(row, col) = ScalarWrapper(a.elements_[index.idx_trg()]); + m(row, col) = ScalarWrapper(a[index]); } } return m; @@ -1073,9 +1073,9 @@ public: static Array mat_mult(const Array &a, const Array &b) { //std::cout << "mat mult: " << print_vector(a.shape()) << " @ " << print_vector(b.shape()) << "\n"; - std::cout << "Shape: ---------" << std::endl; - std::cout << print_shape(a.shape()) << std::endl; - std::cout << print_shape(b.shape()) << std::endl; + //std::cout << "Shape: ---------" << std::endl; + //std::cout << print_shape(a.shape()) << std::endl; + //std::cout << print_shape(b.shape()) << std::endl; if (a.shape().size() == 0) Throw() << "Matmult can not multiply by scalar a." << "\n"; @@ -1085,52 +1085,76 @@ public: Shape a_shape = a.shape(); if (a_shape.size() == 1) { a_shape.insert(a_shape.begin(), 1); + // shape (l) -> (1,l) } Shape b_shape = b.shape(); if (b_shape.size() == 1) { b_shape.push_back(1); + // shape (l) -> (l,1) } uint a_cols = *(a_shape.end() - 1), b_rows = *(b_shape.end() - 2); - if (a_cols != b_rows) { + if (a_cols != b_rows) { // l != l Throw() << "Matmult summing dimension mismatch: " << a_cols << " != " << b_rows << "\n"; } + //Add for common shape a_shape.insert(a_shape.end(), 1); // a_shape : (...,i,j,k,l,1) b_shape.insert(b_shape.end() - 2, 1); // b_shape : (...,i,j,1,l,m) - Shape result_shape(MultiIdxRange::broadcast_common_shape(a_shape, b_shape));// shape (..., i,j,k,l,m) - result_shape.erase(result_shape.end() - 2); + Shape result_shape(MultiIdxRange::broadcast_common_shape(a_shape, b_shape)); + // r_shape (..., i,j,k,l,m) + MultiIdxRange a_range(MultiIdxRange(a_shape).full().broadcast(result_shape)); + // a_shape (..., 1,1,k,l,1) -> (...,i,j,k,l,1) + MultiIdxRange b_range(MultiIdxRange(b_shape).full().broadcast(result_shape)); + // b_shape (..., 1,1,1,l,m) -> (...,i,j,1,l,m) + + //Remove for computation + a_range.target_transpose_.erase(a_range.target_transpose_.end() - 1); + // a_shape (..., i,j,k,l, ) + b_range.target_transpose_.erase(b_range.target_transpose_.end() - 3); + // b_shape (..., i,j, ,l,m) + result_shape.erase(result_shape.end() - 2); + // r_shape (..., i,j,k, ,m) - Array result(result_shape); //std::cout << print_shape(result_shape) << std::endl; + Array result(result_shape); bool should_transpose = a.shape().size() == 1; for (MultiIdx result_idx(result.range()), - a_idx(a.range()), - b_idx(b.range()); result_idx.valid();) { + a_idx(a_range), + b_idx(b_range); result_idx.valid(); ) { Eigen::MatrixX m_a = wrap_array(a, a_idx); Eigen::MatrixX m_b = wrap_array(b, b_idx); - if (should_transpose) m_a = m_a.transpose(); Array matmult = unwrap_array(m_a * m_b); for (MultiIdx mult_idx(matmult.range()); mult_idx.valid(); mult_idx.inc_src(), result_idx.inc_src()) { - result.elements_[result_idx.idx_src()] = matmult[mult_idx]; //TODO - //std::cout << result_idx.idx_src() << std::endl; + result.elements_[result_idx.idx_src()] = matmult[mult_idx]; } } - return result; + + MultiIdxRange final_range(result.range()); + if (b.shape().size() == 1) { + final_range.remove_target_axis(absolute_idx(-1, result_shape.size())); + // shape (..., i,j,k,1) -> ...,j,k) + } + if (a.shape().size() == 1) { + final_range.remove_target_axis(absolute_idx(-2, result_shape.size())); + // shape (..., i,j,1,m) -> ...,j,m) + } + + return Array(result,final_range); /* auto m_a = wrap_array(a); auto m_b = wrap_array(b); diff --git a/test/test_parser.cc b/test/test_parser.cc index 49b3b66..4d20bf1 100644 --- a/test/test_parser.cc +++ b/test/test_parser.cc @@ -226,7 +226,7 @@ void test_expression() { BP_ASSERT(test_expr("25 % cs3", {1})); BP_ASSERT(test_expr("25 % cv4", {1, 0, 1})); - + BP_ASSERT(test_expr("[[1,2],[3,4]] @ [5,6]", { 17,39 }, { 2 })); BP_ASSERT(test_expr("[3, 4] @ [[1], [2]]", {11}, {1})); BP_ASSERT(test_expr("[3, 4, 1] @ [[1], [2], [3]]", {14}, {1})); @@ -237,17 +237,13 @@ void test_expression() { BP_ASSERT(test_expr("[[1],[2],[3]] @ [[1,2,3]]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3})); BP_ASSERT(test_expr("a=[1,2,3]; a[:, None] @ a[None,:]", {1, 2, 3, 2, 4, 6, 3, 6, 9}, {3,3})); - BP_ASSERT(test_expr("diag([1,2,3])", {1, 0, 0, 0, 2, 0, 0, 0, 3}, {3,3})); - BP_ASSERT(test_expr("diag([[1,5],[9,2]])", { 1, 2 }, {2})); - BP_ASSERT(test_expr("diag(diag([1,2,3]))", { 1, 2, 3 }, {3})); - // 2×2 @ 2×2 → 2×2 BP_ASSERT(test_expr( "[[1, 2], [3, 4]] @ [[5, 6], [7, 8]]", {19, 22, 43, 50}, // 1*5+2*7, 1*6+2*8, 3*5+4*7, 3*6+4*8 {2, 2} )); - + // 3×1×2 @ 2×3 → 3×1×3 (batched matmul) BP_ASSERT(test_expr( "[[[1,2]], [[3,4]], [[5,6]]] @ [[7,8,9], [10,11,12]]", @@ -259,6 +255,10 @@ void test_expression() { {3, 1, 3} )); + BP_ASSERT(test_expr("diag([1,2,3])", { 1, 0, 0, 0, 2, 0, 0, 0, 3 }, { 3,3 })); + BP_ASSERT(test_expr("diag([[1,5],[9,2]])", { 1, 2 }, { 2 })); + BP_ASSERT(test_expr("diag(diag([1,2,3]))", { 1, 2, 3 }, { 3 })); + BP_ASSERT(test_expr("abs(-1)+abs(0)+abs(1)", {2})); BP_ASSERT(test_expr("floor(-3.5)", {-4}, {})); BP_ASSERT(test_expr("ceil(-3.5)", {-3}, {})); From ada89c0f86c55ebc149d3d355736f3dcd8d17773 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Tue, 6 May 2025 20:50:34 +0200 Subject: [PATCH 17/25] FetchContent_Declare --- CMakeLists.txt | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa6181a..9740d55 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,8 @@ option(SANITIZER_ON "Whether to use AddressSanitizer (asan) in the DEBUG configu message(STATUS "CMakeLists.txt - BParser") +include(FetchContent) + # CLANG #set(CMAKE_CXX_FLAGS "-std=c++14 -finline-hint-functions -pedantic-errors -Werror=pedantic -Wall -Wextra -Werror -Wno-long-long -Wno-strict-aliasing -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION") @@ -176,7 +178,14 @@ message(STATUS "=======================================================") message(STATUS "====== EIGEN ==========================================") message(STATUS "=======================================================") -find_package(Eigen3 CONFIG REQUIRED PATHS "/usr/local") #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL +#find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL +FetchContent_Declare( + Eigen + DOWNLOAD_EXTRACT_TIMESTAMP + URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz + FIND_PACKAGE_ARGS NAMES Eigen3 #find_package(Eigen NAMES Eigen3) +) +FetchContent_MakeAvailable(Eigen) message(STATUS "-------------------------------------------------------") message(STATUS "EIGEN_ROOT = ${EIGEN_ROOT}") From be6e36af9c8ebc5777dfa8a976090ea008e74230 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Tue, 6 May 2025 21:06:16 +0200 Subject: [PATCH 18/25] Eigen install attempt 2 --- CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9740d55..c648884 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,6 +7,10 @@ message(STATUS "CMakeLists.txt - BParser") include(FetchContent) +if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) +endif() + # CLANG #set(CMAKE_CXX_FLAGS "-std=c++14 -finline-hint-functions -pedantic-errors -Werror=pedantic -Wall -Wextra -Werror -Wno-long-long -Wno-strict-aliasing -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION") @@ -181,9 +185,9 @@ message(STATUS "=======================================================") #find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL FetchContent_Declare( Eigen - DOWNLOAD_EXTRACT_TIMESTAMP URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz - FIND_PACKAGE_ARGS NAMES Eigen3 #find_package(Eigen NAMES Eigen3) + EXCLUDE_FROM_ALL + FIND_PACKAGE_ARGS NAMES Eigen3 #same as find_package(Eigen NAMES Eigen3) ) FetchContent_MakeAvailable(Eigen) From 9bd4c50b2e3cefe42ba31e98efcf8becd4b043e3 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Tue, 6 May 2025 21:14:35 +0200 Subject: [PATCH 19/25] Eigen install attempt 3 --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index c648884..a4d69fb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,6 +182,7 @@ message(STATUS "=======================================================") message(STATUS "====== EIGEN ==========================================") message(STATUS "=======================================================") +set(EIGEN_BUILD_CMAKE_PACKAGE TRUE) #find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL FetchContent_Declare( Eigen From 18b139319f67808109358bed4789a56f99d37a20 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Tue, 6 May 2025 21:26:55 +0200 Subject: [PATCH 20/25] Eigen install attempt 4 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a4d69fb..945530a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -187,7 +187,7 @@ set(EIGEN_BUILD_CMAKE_PACKAGE TRUE) FetchContent_Declare( Eigen URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz - EXCLUDE_FROM_ALL + #EXCLUDE_FROM_ALL FIND_PACKAGE_ARGS NAMES Eigen3 #same as find_package(Eigen NAMES Eigen3) ) FetchContent_MakeAvailable(Eigen) From 5c8edbeebc946edc8564ef809b7108d17e48207e Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Tue, 6 May 2025 21:32:31 +0200 Subject: [PATCH 21/25] Eigen install attempt 5 --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 945530a..b7d67de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -185,12 +185,12 @@ message(STATUS "=======================================================") set(EIGEN_BUILD_CMAKE_PACKAGE TRUE) #find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL FetchContent_Declare( - Eigen + Eigen3 URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz - #EXCLUDE_FROM_ALL - FIND_PACKAGE_ARGS NAMES Eigen3 #same as find_package(Eigen NAMES Eigen3) + EXCLUDE_FROM_ALL + FIND_PACKAGE_ARGS CONFIG REQUIRED #same as find_package(Eigen3 CONFIG REQUIRED) ) -FetchContent_MakeAvailable(Eigen) +FetchContent_MakeAvailable(Eigen3) message(STATUS "-------------------------------------------------------") message(STATUS "EIGEN_ROOT = ${EIGEN_ROOT}") From 188cc58efddb2f98cde061376c1d01750e8e6b81 Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Tue, 6 May 2025 21:58:47 +0200 Subject: [PATCH 22/25] Eigen install attempt 6, now via apt-get --- .github/workflows/ccpp.yml | 4 +--- CMakeLists.txt | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ccpp.yml b/.github/workflows/ccpp.yml index 56deb1e..781c7f5 100644 --- a/.github/workflows/ccpp.yml +++ b/.github/workflows/ccpp.yml @@ -266,9 +266,7 @@ jobs: sudo apt-get install -y libboost-all-dev # install eigen - wget https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz &&\ - tar -xzf eigen-3.4.0.tar.gz &&\ - sudo mv eigen-3.4.0 /usr/local + sudo apt install libeigen3-dev diff --git a/CMakeLists.txt b/CMakeLists.txt index b7d67de..ae5cbef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,13 +182,12 @@ message(STATUS "=======================================================") message(STATUS "====== EIGEN ==========================================") message(STATUS "=======================================================") -set(EIGEN_BUILD_CMAKE_PACKAGE TRUE) #find_package(Eigen3 CONFIG REQUIRED) #Remember to install Eigen for this to work. See eigen-3.x.x/INSTALL FetchContent_Declare( Eigen3 URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.tar.gz EXCLUDE_FROM_ALL - FIND_PACKAGE_ARGS CONFIG REQUIRED #same as find_package(Eigen3 CONFIG REQUIRED) + FIND_PACKAGE_ARGS CONFIG #same as find_package(Eigen3 CONFIG) ) FetchContent_MakeAvailable(Eigen3) From f8fd43b339e99d025443b85df464b274efa5831c Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Tue, 6 May 2025 22:32:27 +0200 Subject: [PATCH 23/25] forgot delete --- include/arena_alloc.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/include/arena_alloc.hh b/include/arena_alloc.hh index 77b1824..0bde0b6 100644 --- a/include/arena_alloc.hh +++ b/include/arena_alloc.hh @@ -53,6 +53,7 @@ struct ArenaAlloc { //align_free(base_); if (buffer != nullptr) { align_free(buffer); + delete arena; } } From 9b8bd1b3bfbd969d2ecfdb6e328250b7bb97e76d Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Tue, 6 May 2025 23:04:34 +0200 Subject: [PATCH 24/25] PatchArena in test_speed and commented debug print --- include/array.hh | 2 +- test/test_speed.cc | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/array.hh b/include/array.hh index ce120f6..3a56236 100644 --- a/include/array.hh +++ b/include/array.hh @@ -884,7 +884,7 @@ public: using namespace details; Shape trg_shape = index.range_.target_shape(); - std::cout << "Wrapping: " << print_shape(trg_shape) << std::endl; + //std::cout << "Wrapping: " << print_shape(trg_shape) << std::endl; if (trg_shape.size() == 0) { Throw() << "Attempted to wrap scalar into Eigen Matrix"; } diff --git a/test/test_speed.cc b/test/test_speed.cc index 2bdb7ce..6ac4fb4 100644 --- a/test/test_speed.cc +++ b/test/test_speed.cc @@ -23,6 +23,7 @@ #include "test_tools.hh" #include "arena_alloc.hh" +#include "arena_resource.hh" // Optimized structure, holds data in common arena struct ExprData { @@ -31,7 +32,8 @@ struct ExprData { { uint simd_bytes = sizeof(double) * simd_size; - arena = std::make_shared(simd_bytes, 512 * 1012); + patch_arena = std::make_shared(512 * 1012, simd_bytes); + arena = std::make_shared(*patch_arena);//(simd_bytes, 512 * 1012); v1 = arena->create_array(vec_size * 3); fill_seq(v1, 100, 100 + 3 * vec_size); v2 = arena->create_array(vec_size * 3); @@ -54,6 +56,7 @@ struct ExprData { ~ExprData() {} + std::shared_ptr patch_arena; std::shared_ptr arena; uint vec_size; uint simd_size; @@ -266,7 +269,7 @@ void test_expr(std::string expr, uint block_size, void (* func)(ExprData&)) { //std::cout << "vres: " << vres << ", " << vres + block_size << ", " << vres + 2*vec_size << "\n"; //std::cout << "Symbols: " << print_vector(p.symbols()) << "\n"; //std::cout.flush(); - p.compile(data1.arena); + p.compile(data1.patch_arena); std::vector ss = std::vector(data1.subset, data1.subset+vec_size/simd_size); p.set_subset(ss); From 921a13e9394b829628172c2c8979fd15159d61fb Mon Sep 17 00:00:00 2001 From: lukas-valenta-tul Date: Thu, 8 May 2025 17:29:47 +0200 Subject: [PATCH 25/25] tr - matrix trace --- include/array.hh | 14 +++++++++++++- include/grammar.impl.hh | 1 + test/test_parser.cc | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/array.hh b/include/array.hh index 3a56236..b590b27 100644 --- a/include/array.hh +++ b/include/array.hh @@ -1193,8 +1193,9 @@ public: } static Array diag(const Array& a) { - if (a.shape().size() == 0) + if (a.shape().size() == 0) { return a; + } if (a.shape().size() == 1) { // diag -> matrix return unwrap_array(wrap_array(a).asDiagonal()); @@ -1204,6 +1205,17 @@ public: } + static Array trace(const Array& a) { + if (a.shape().size() != 2) { + Throw() << "Function trace can only be used for matrices" << "\n"; + } + Shape s; //empty Shape for scalar + Array r(s); + r.elements_[0U] = *wrap_array(a).trace(); + return r; + //return full_({}, *wrap_array(a).trace()); + } + static Array flatten(const Array &tensor) { uint n_elements = shape_size(tensor.shape()); Shape res_shape(1, n_elements); diff --git a/include/grammar.impl.hh b/include/grammar.impl.hh index e4ec1c7..8a5ad5c 100644 --- a/include/grammar.impl.hh +++ b/include/grammar.impl.hh @@ -180,6 +180,7 @@ struct grammar : qi::grammar { FN("minimum", binary_array<_min_>()) FN("maximum", binary_array<_max_>()) FN("diag" , &Array::diag) + FN("tr" , &Array::trace) ; unary_op.add diff --git a/test/test_parser.cc b/test/test_parser.cc index 4d20bf1..28b3c20 100644 --- a/test/test_parser.cc +++ b/test/test_parser.cc @@ -259,6 +259,8 @@ void test_expression() { BP_ASSERT(test_expr("diag([[1,5],[9,2]])", { 1, 2 }, { 2 })); BP_ASSERT(test_expr("diag(diag([1,2,3]))", { 1, 2, 3 }, { 3 })); + BP_ASSERT(test_expr("tr([[1,9,9],[9,1,9],[9,9,1]])", { 3 }, {})); + BP_ASSERT(test_expr("abs(-1)+abs(0)+abs(1)", {2})); BP_ASSERT(test_expr("floor(-3.5)", {-4}, {})); BP_ASSERT(test_expr("ceil(-3.5)", {-3}, {}));