diff --git a/include/graphblas/reference/coordinates.hpp b/include/graphblas/reference/coordinates.hpp index 2a1f90137..d808fc631 100644 --- a/include/graphblas/reference/coordinates.hpp +++ b/include/graphblas/reference/coordinates.hpp @@ -59,9 +59,32 @@ namespace grb { /** * This class encapsulates everything needed to store a sparse set of 1D - * coordinates. Its use is internal via, e.g., grb::Vector< T, reference, C >. - * All functions needed to rebuild or update sparsity information are - * encapsulated here. + * coordinates. + * + * Its use is internal via, e.g., grb::Vector< T, reference, C >. All + * functions needed to rebuild or update sparsity information are encapsulated + * here. + * + * An instance of this class should always be initialised using one of the + * following functions: + * -# set + * -# set_seq + * -# set_ompPar + * -# setDense + * The use of both the default constructor and that of setDense does not lead + * to a fully initialised instance and may only be used in restricted use + * cases. + * + * The coordinates are in essense traditional sparse accumulators. There are + * three main arrays in which sparsity information is stored: + * -# _assigned + * -# _stack + * -# _buffer + * + * In the case of shared-memory parallel backends, the size of the buffer + * depends on the number of threads the instance needs to support. (Or + * rather, this is the case for all presently-supported shared-memory parallel + * backends). */ template<> class Coordinates< reference > { @@ -104,6 +127,9 @@ namespace grb { /** Per-thread capacity for parallel stack updates. */ size_t _buf; + /** Number of threads for which these coordinates have been initialised. */ + size_t _threads; + /** * Increments the number of nonzeroes in the current thread-local stack. * @@ -163,6 +189,7 @@ namespace grb { _n = 0; _cap = 0; _buf = 0; + _threads = 0; return; } @@ -182,6 +209,11 @@ namespace grb { // initialise _n = 0; _cap = dim; +#ifdef _H_GRB_REFERENCE_OMP_COORDINATES + _threads = config::OMP::threads(); +#else + _threads = 1; +#endif } /** @@ -370,7 +402,7 @@ namespace grb { /** Base constructor. Creates an empty coordinates list of dimension 0. */ inline Coordinates() noexcept : _assigned( nullptr ), _stack( nullptr ), _buffer( nullptr ), - _n( 0 ), _cap( 0 ), _buf( 0 ) + _n( 0 ), _cap( 0 ), _buf( 0 ), _threads( 0 ) {} /** @@ -379,12 +411,12 @@ namespace grb { */ inline Coordinates( Coordinates &&x ) noexcept : _assigned( x._assigned ), _stack( x._stack ), _buffer( x._buffer ), - _n( x._n ), _cap( x._cap ), _buf( x._buf ) + _n( x._n ), _cap( x._cap ), _buf( x._buf ), _threads( x._threads ) { x._assigned = nullptr; x._stack = nullptr; x._buffer = nullptr; - x._n = x._cap = x._buf = 0; + x._n = x._cap = x._buf = x._threads = 0; } /** @@ -396,7 +428,7 @@ namespace grb { */ inline Coordinates( const Coordinates &x ) noexcept : _assigned( x._assigned ), _stack( x._stack ), _buffer( x._buffer ), - _n( x._n ), _cap( x._cap ), _buf( x._buf ) + _n( x._n ), _cap( x._cap ), _buf( x._buf ), _threads( x._threads ) { // self-assignment is a programming error assert( this != &x ); @@ -426,9 +458,10 @@ namespace grb { _n = x._n; _cap = x._cap; _buf = x._buf; + _threads = x._threads; x._assigned = NULL; x._stack = x._buffer = NULL; - x._n = x._cap = x._buf = 0; + x._n = x._cap = x._buf = x._threads = 0; return *this; } @@ -440,6 +473,105 @@ namespace grb { // blocks are not managed by this class) } + /** + * Checks whether a new OpenMP parallel section returns the same number of + * threads that was given during initialisation of this instance. + * + * This variant automatically determines if it is in a sequential or + * (OpenMP) parallel context. + * + * This is intended exclusively for use within a debug mode, as the test + * has the significant overhead of opening up an OpenMP parallel section. + * + * @returns true if the number of threads matches that during + * setup; + * @returns false otherwise. + * + * An assertion will trip in debug mode instead of returning false, + * however. + */ + bool checkNumThreads() const noexcept { + if( omp_in_parallel() ) { + return checkNumThreadsPar(); + } else { + return checkNumThreadsSeq(); + } + } + + /** + * Checks whether a new OpenMP parallel section returns the same number of + * threads that was given during initialisation of this instance. + * + * This variant should be called from a sequential context. + * + * This is intended exclusively for use within a debug mode, as the test + * has the significant overhead of opening up an OpenMP parallel section. + * + * @returns true if the number of threads matches that during + * setup; + * @returns false otherwise. + * + * An assertion will trip in debug mode instead of returning false, + * however. + */ + bool checkNumThreadsSeq() const noexcept { + // guard against (valid) use of non-initialised coordinates + if( _threads == 0 ) { + return true; + } + // then, get actual number of threads now active and compare + const size_t actualThreads = config::OMP::threads(); + if( actualThreads != _threads ) { + std::cerr << "\t Error: coordinates instance was set for " << _threads + << " threads, however, current OpenMP parallel region reports " + << actualThreads << " threads instead!\n"; +#ifndef NDEBUG + const bool num_omp_threads_has_changed = false; + assert( num_omp_threads_has_changed ); +#endif + return false; + } + return true; + } + + /** + * Checks whether a new OpenMP parallel section returns the same number of + * threads that was given during initialisation of this instance. + * + * This variant should be called from a sequential context. + * + * This is intended for use within a debug mode, but could conceivably be + * used defensively in performance mode as well (there is no significant + * performance overhead for this variant). + * + * @returns true if the number of threads matches that during + * setup; + * @returns false otherwise. + * + * An assertion will trip in debug mode instead of returning false, + * however. + */ + bool checkNumThreadsPar() const noexcept { + // guard against (valid) use of non-initialised coordinates + if( _threads == 0 ) { + return true; + } + // then, get actual number of threads now active and compare + const size_t actualThreads = + static_cast< size_t >( omp_get_num_threads() ); + if( actualThreads != _threads ) { + std::cerr << "\t Error: coordinates instance was set for " << _threads + << " threads, however, current OpenMP parallel region reports " + << actualThreads << " threads instead!\n"; +#ifndef NDEBUG + const bool num_omp_threads_has_changed = false; + assert( num_omp_threads_has_changed ); +#endif + return false; + } + return true; + } + /** * @returns An empty thread-local stack for new nonzeroes. */ @@ -457,8 +589,9 @@ namespace grb { } /** - * Sets the data structure. A call to this function sets the number of - * coordinates to zero. + * Sets the data structure. + * + * A call to this function sets the number of coordinates to zero. * * @param[in] arr Pointer to an array of size #arraySize. This array is * is managed by a container outside this class (and thus @@ -470,6 +603,8 @@ namespace grb { * managed by a container outside this class (and thus will * not be freed on destruction of this instance). * @param[in] dim Size (dimension) of this vector, in number of elements. + * @param[in] threads The number of threads that \a buf has been constructed + * for. * * The memory area \a raw will be reset to reflect an empty coordinate set. * @@ -565,6 +700,7 @@ namespace grb { _n = dim; _cap = dim; _buf = 0; + _threads = 0; } /** diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp index 4be006ed5..caa58aa4a 100644 --- a/include/graphblas/reference/vector.hpp +++ b/include/graphblas/reference/vector.hpp @@ -653,6 +653,9 @@ namespace grb { /** The maximum value of #position. */ size_t max; + /** The size of the vector (not necessarily equal to \a max). */ + size_t n; + /** The local process ID. */ const size_t s; @@ -689,10 +692,14 @@ namespace grb { // if not, go to the next valid value: if( container->_coordinates.isEmpty() ) { max = 0; - } else if( container->_coordinates.isDense() ) { - max = container->_coordinates.size(); + n = 0; } else { - max = container->_coordinates.nonzeroes(); + n = container->_coordinates.size(); + if( container->_coordinates.isDense() ) { + max = n; + } else { + max = container->_coordinates.nonzeroes(); + } } if( position < max ) { setValue(); @@ -712,7 +719,7 @@ namespace grb { * \note If the \a other iterator is not derived from the same container * as this iterator, the result is undefined. */ - bool equal( const ConstIterator & other ) const noexcept { + bool equal( const ConstIterator &other ) const noexcept { return other.position == position; } @@ -729,7 +736,7 @@ namespace grb { } assert( container->_coordinates.assigned( index ) ); const size_t global_index = ActiveDistribution::local_index_to_global( - index, size( *container ), s, P ); + index, n, s, P ); #ifdef _DEBUG std::cout << "\t ConstIterator at process " << s << " / " << P << " translated index " << index << " to " << global_index << "\n"; @@ -742,7 +749,7 @@ namespace grb { /** Default constructor. */ ConstIterator() noexcept : - container( nullptr ), position( 0 ), max( 0 ), + container( nullptr ), position( 0 ), max( 0 ), n( 0 ), s( grb::spmd< spmd_backend >::pid() ), P( grb::spmd< spmd_backend >::nprocs() ) {} @@ -751,7 +758,7 @@ namespace grb { ConstIterator( const ConstIterator &other ) noexcept : container( other.container ), value( other.value ), position( other.position ), - max( other.max ), + max( other.max ), n( other.n ), s( other.s ), P( other.P ) {} @@ -762,6 +769,7 @@ namespace grb { std::swap( value, other.value ); std::swap( position, other.position ); std::swap( max, other.max ); + std::swap( n, other.n ); } /** Copy assignment. */ @@ -770,6 +778,7 @@ namespace grb { value = other.value; position = other.position; max = other.max; + n = other.n; assert( s == other.s ); assert( P == other.P ); return *this; @@ -781,6 +790,7 @@ namespace grb { std::swap( value, other.value ); std::swap( position, other.position ); std::swap( max, other.max ); + std::swap( n, other.n ); assert( s == other.s ); assert( P == other.P ); return *this; @@ -1308,6 +1318,9 @@ namespace grb { template< typename D, typename C > inline C & getCoordinates( Vector< D, reference, C > &x ) noexcept { +#if defined(_H_GRB_REFERENCE_OMP_VECTOR) && !defined(NDEBUG) + (void) x._coordinates.checkNumThreads(); +#endif return x._coordinates; } @@ -1315,6 +1328,9 @@ namespace grb { inline const C & getCoordinates( const Vector< D, reference, C > &x ) noexcept { +#if defined(_H_GRB_REFERENCE_OMP_VECTOR) && !defined(NDEBUG) + (void) x._coordinates.checkNumThreads(); +#endif return x._coordinates; }