diff --git a/include/graphblas/reference/coordinates.hpp b/include/graphblas/reference/coordinates.hpp
index 2a1f90137..d808fc631 100644
--- a/include/graphblas/reference/coordinates.hpp
+++ b/include/graphblas/reference/coordinates.hpp
@@ -59,9 +59,32 @@ namespace grb {
/**
* This class encapsulates everything needed to store a sparse set of 1D
- * coordinates. Its use is internal via, e.g., grb::Vector< T, reference, C >.
- * All functions needed to rebuild or update sparsity information are
- * encapsulated here.
+ * coordinates.
+ *
+ * Its use is internal via, e.g., grb::Vector< T, reference, C >. All
+ * functions needed to rebuild or update sparsity information are encapsulated
+ * here.
+ *
+ * An instance of this class should always be initialised using one of the
+ * following functions:
+ * -# set
+ * -# set_seq
+ * -# set_ompPar
+ * -# setDense
+ * The use of both the default constructor and that of setDense does not lead
+ * to a fully initialised instance and may only be used in restricted use
+ * cases.
+ *
+ * The coordinates are in essense traditional sparse accumulators. There are
+ * three main arrays in which sparsity information is stored:
+ * -# _assigned
+ * -# _stack
+ * -# _buffer
+ *
+ * In the case of shared-memory parallel backends, the size of the buffer
+ * depends on the number of threads the instance needs to support. (Or
+ * rather, this is the case for all presently-supported shared-memory parallel
+ * backends).
*/
template<>
class Coordinates< reference > {
@@ -104,6 +127,9 @@ namespace grb {
/** Per-thread capacity for parallel stack updates. */
size_t _buf;
+ /** Number of threads for which these coordinates have been initialised. */
+ size_t _threads;
+
/**
* Increments the number of nonzeroes in the current thread-local stack.
*
@@ -163,6 +189,7 @@ namespace grb {
_n = 0;
_cap = 0;
_buf = 0;
+ _threads = 0;
return;
}
@@ -182,6 +209,11 @@ namespace grb {
// initialise
_n = 0;
_cap = dim;
+#ifdef _H_GRB_REFERENCE_OMP_COORDINATES
+ _threads = config::OMP::threads();
+#else
+ _threads = 1;
+#endif
}
/**
@@ -370,7 +402,7 @@ namespace grb {
/** Base constructor. Creates an empty coordinates list of dimension 0. */
inline Coordinates() noexcept :
_assigned( nullptr ), _stack( nullptr ), _buffer( nullptr ),
- _n( 0 ), _cap( 0 ), _buf( 0 )
+ _n( 0 ), _cap( 0 ), _buf( 0 ), _threads( 0 )
{}
/**
@@ -379,12 +411,12 @@ namespace grb {
*/
inline Coordinates( Coordinates &&x ) noexcept :
_assigned( x._assigned ), _stack( x._stack ), _buffer( x._buffer ),
- _n( x._n ), _cap( x._cap ), _buf( x._buf )
+ _n( x._n ), _cap( x._cap ), _buf( x._buf ), _threads( x._threads )
{
x._assigned = nullptr;
x._stack = nullptr;
x._buffer = nullptr;
- x._n = x._cap = x._buf = 0;
+ x._n = x._cap = x._buf = x._threads = 0;
}
/**
@@ -396,7 +428,7 @@ namespace grb {
*/
inline Coordinates( const Coordinates &x ) noexcept :
_assigned( x._assigned ), _stack( x._stack ), _buffer( x._buffer ),
- _n( x._n ), _cap( x._cap ), _buf( x._buf )
+ _n( x._n ), _cap( x._cap ), _buf( x._buf ), _threads( x._threads )
{
// self-assignment is a programming error
assert( this != &x );
@@ -426,9 +458,10 @@ namespace grb {
_n = x._n;
_cap = x._cap;
_buf = x._buf;
+ _threads = x._threads;
x._assigned = NULL;
x._stack = x._buffer = NULL;
- x._n = x._cap = x._buf = 0;
+ x._n = x._cap = x._buf = x._threads = 0;
return *this;
}
@@ -440,6 +473,105 @@ namespace grb {
// blocks are not managed by this class)
}
+ /**
+ * Checks whether a new OpenMP parallel section returns the same number of
+ * threads that was given during initialisation of this instance.
+ *
+ * This variant automatically determines if it is in a sequential or
+ * (OpenMP) parallel context.
+ *
+ * This is intended exclusively for use within a debug mode, as the test
+ * has the significant overhead of opening up an OpenMP parallel section.
+ *
+ * @returns true if the number of threads matches that during
+ * setup;
+ * @returns false otherwise.
+ *
+ * An assertion will trip in debug mode instead of returning false,
+ * however.
+ */
+ bool checkNumThreads() const noexcept {
+ if( omp_in_parallel() ) {
+ return checkNumThreadsPar();
+ } else {
+ return checkNumThreadsSeq();
+ }
+ }
+
+ /**
+ * Checks whether a new OpenMP parallel section returns the same number of
+ * threads that was given during initialisation of this instance.
+ *
+ * This variant should be called from a sequential context.
+ *
+ * This is intended exclusively for use within a debug mode, as the test
+ * has the significant overhead of opening up an OpenMP parallel section.
+ *
+ * @returns true if the number of threads matches that during
+ * setup;
+ * @returns false otherwise.
+ *
+ * An assertion will trip in debug mode instead of returning false,
+ * however.
+ */
+ bool checkNumThreadsSeq() const noexcept {
+ // guard against (valid) use of non-initialised coordinates
+ if( _threads == 0 ) {
+ return true;
+ }
+ // then, get actual number of threads now active and compare
+ const size_t actualThreads = config::OMP::threads();
+ if( actualThreads != _threads ) {
+ std::cerr << "\t Error: coordinates instance was set for " << _threads
+ << " threads, however, current OpenMP parallel region reports "
+ << actualThreads << " threads instead!\n";
+#ifndef NDEBUG
+ const bool num_omp_threads_has_changed = false;
+ assert( num_omp_threads_has_changed );
+#endif
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Checks whether a new OpenMP parallel section returns the same number of
+ * threads that was given during initialisation of this instance.
+ *
+ * This variant should be called from a sequential context.
+ *
+ * This is intended for use within a debug mode, but could conceivably be
+ * used defensively in performance mode as well (there is no significant
+ * performance overhead for this variant).
+ *
+ * @returns true if the number of threads matches that during
+ * setup;
+ * @returns false otherwise.
+ *
+ * An assertion will trip in debug mode instead of returning false,
+ * however.
+ */
+ bool checkNumThreadsPar() const noexcept {
+ // guard against (valid) use of non-initialised coordinates
+ if( _threads == 0 ) {
+ return true;
+ }
+ // then, get actual number of threads now active and compare
+ const size_t actualThreads =
+ static_cast< size_t >( omp_get_num_threads() );
+ if( actualThreads != _threads ) {
+ std::cerr << "\t Error: coordinates instance was set for " << _threads
+ << " threads, however, current OpenMP parallel region reports "
+ << actualThreads << " threads instead!\n";
+#ifndef NDEBUG
+ const bool num_omp_threads_has_changed = false;
+ assert( num_omp_threads_has_changed );
+#endif
+ return false;
+ }
+ return true;
+ }
+
/**
* @returns An empty thread-local stack for new nonzeroes.
*/
@@ -457,8 +589,9 @@ namespace grb {
}
/**
- * Sets the data structure. A call to this function sets the number of
- * coordinates to zero.
+ * Sets the data structure.
+ *
+ * A call to this function sets the number of coordinates to zero.
*
* @param[in] arr Pointer to an array of size #arraySize. This array is
* is managed by a container outside this class (and thus
@@ -470,6 +603,8 @@ namespace grb {
* managed by a container outside this class (and thus will
* not be freed on destruction of this instance).
* @param[in] dim Size (dimension) of this vector, in number of elements.
+ * @param[in] threads The number of threads that \a buf has been constructed
+ * for.
*
* The memory area \a raw will be reset to reflect an empty coordinate set.
*
@@ -565,6 +700,7 @@ namespace grb {
_n = dim;
_cap = dim;
_buf = 0;
+ _threads = 0;
}
/**
diff --git a/include/graphblas/reference/vector.hpp b/include/graphblas/reference/vector.hpp
index 4be006ed5..caa58aa4a 100644
--- a/include/graphblas/reference/vector.hpp
+++ b/include/graphblas/reference/vector.hpp
@@ -653,6 +653,9 @@ namespace grb {
/** The maximum value of #position. */
size_t max;
+ /** The size of the vector (not necessarily equal to \a max). */
+ size_t n;
+
/** The local process ID. */
const size_t s;
@@ -689,10 +692,14 @@ namespace grb {
// if not, go to the next valid value:
if( container->_coordinates.isEmpty() ) {
max = 0;
- } else if( container->_coordinates.isDense() ) {
- max = container->_coordinates.size();
+ n = 0;
} else {
- max = container->_coordinates.nonzeroes();
+ n = container->_coordinates.size();
+ if( container->_coordinates.isDense() ) {
+ max = n;
+ } else {
+ max = container->_coordinates.nonzeroes();
+ }
}
if( position < max ) {
setValue();
@@ -712,7 +719,7 @@ namespace grb {
* \note If the \a other iterator is not derived from the same container
* as this iterator, the result is undefined.
*/
- bool equal( const ConstIterator & other ) const noexcept {
+ bool equal( const ConstIterator &other ) const noexcept {
return other.position == position;
}
@@ -729,7 +736,7 @@ namespace grb {
}
assert( container->_coordinates.assigned( index ) );
const size_t global_index = ActiveDistribution::local_index_to_global(
- index, size( *container ), s, P );
+ index, n, s, P );
#ifdef _DEBUG
std::cout << "\t ConstIterator at process " << s << " / " << P
<< " translated index " << index << " to " << global_index << "\n";
@@ -742,7 +749,7 @@ namespace grb {
/** Default constructor. */
ConstIterator() noexcept :
- container( nullptr ), position( 0 ), max( 0 ),
+ container( nullptr ), position( 0 ), max( 0 ), n( 0 ),
s( grb::spmd< spmd_backend >::pid() ),
P( grb::spmd< spmd_backend >::nprocs() )
{}
@@ -751,7 +758,7 @@ namespace grb {
ConstIterator( const ConstIterator &other ) noexcept :
container( other.container ),
value( other.value ), position( other.position ),
- max( other.max ),
+ max( other.max ), n( other.n ),
s( other.s ), P( other.P )
{}
@@ -762,6 +769,7 @@ namespace grb {
std::swap( value, other.value );
std::swap( position, other.position );
std::swap( max, other.max );
+ std::swap( n, other.n );
}
/** Copy assignment. */
@@ -770,6 +778,7 @@ namespace grb {
value = other.value;
position = other.position;
max = other.max;
+ n = other.n;
assert( s == other.s );
assert( P == other.P );
return *this;
@@ -781,6 +790,7 @@ namespace grb {
std::swap( value, other.value );
std::swap( position, other.position );
std::swap( max, other.max );
+ std::swap( n, other.n );
assert( s == other.s );
assert( P == other.P );
return *this;
@@ -1308,6 +1318,9 @@ namespace grb {
template< typename D, typename C >
inline C & getCoordinates( Vector< D, reference, C > &x ) noexcept {
+#if defined(_H_GRB_REFERENCE_OMP_VECTOR) && !defined(NDEBUG)
+ (void) x._coordinates.checkNumThreads();
+#endif
return x._coordinates;
}
@@ -1315,6 +1328,9 @@ namespace grb {
inline const C & getCoordinates(
const Vector< D, reference, C > &x
) noexcept {
+#if defined(_H_GRB_REFERENCE_OMP_VECTOR) && !defined(NDEBUG)
+ (void) x._coordinates.checkNumThreads();
+#endif
return x._coordinates;
}