diff --git a/portability/toku_pthread.h b/portability/toku_pthread.h index 84c277362..2e404794c 100644 --- a/portability/toku_pthread.h +++ b/portability/toku_pthread.h @@ -41,6 +41,7 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include #include #include +#include #include "toku_assert.h" @@ -247,6 +248,31 @@ static inline void toku_cond_wait(toku_cond_t *cond, toku_mutex_t *mutex) { #if TOKU_PTHREAD_DEBUG invariant(mutex->locked); + + switch (random()%3) { + case 0: + // Sometimes we return immediately. This is one case of a + // spurious wakeup, which cond_wait is allowed to do. We + // want to exercise the users of this code to make sure + // they handle it. + return; + case 1: + // Sometimes we unlock the mutex and relock it. This is + // another case of a spurious wakeup. It's different from + // case 0, in that it gives someone else a chance to run, + // and then we can spuriously wake up after they have + // possibly released the mutex without setting the + // condition to the one that our caller expects. + toku_mutex_unlock(mutex); + toku_mutex_lock(mutex); + return; + default: + // The default is to simply call pthread_cond_wait, (which + // may still spuriosly wake up, but won't do it reliably + // enough to test the caller's use of the condition. + break; + } + mutex->locked = false; mutex->owner = 0; #endif diff --git a/util/frwlock.cc b/util/frwlock.cc index ce1e33e85..bbb1816aa 100644 --- a/util/frwlock.cc +++ b/util/frwlock.cc @@ -52,56 +52,19 @@ static int get_local_tid() { } void frwlock::init(toku_mutex_t *const mutex) { + m_queue = new std::queue(); m_mutex = mutex; - m_num_readers = 0; m_num_writers = 0; - m_num_want_write = 0; - m_num_want_read = 0; - m_num_signaled_readers = 0; m_num_expensive_want_write = 0; - - toku_cond_init(&m_wait_read, nullptr); - m_queue_item_read.cond = &m_wait_read; - m_queue_item_read.next = nullptr; - m_wait_read_is_in_queue = false; - m_current_writer_expensive = false; - m_read_wait_expensive = false; + m_current_writer_tid = -1; m_blocking_writer_context_id = CTX_INVALID; - - m_wait_head = nullptr; - m_wait_tail = nullptr; } void frwlock::deinit(void) { - toku_cond_destroy(&m_wait_read); -} - -bool frwlock::queue_is_empty(void) const { - return m_wait_head == nullptr; -} - -void frwlock::enq_item(queue_item *const item) { - paranoid_invariant_null(item->next); - if (m_wait_tail != nullptr) { - m_wait_tail->next = item; - } else { - paranoid_invariant_null(m_wait_head); - m_wait_head = item; - } - m_wait_tail = item; -} - -toku_cond_t *frwlock::deq_item(void) { - paranoid_invariant_notnull(m_wait_head); - paranoid_invariant_notnull(m_wait_tail); - queue_item *item = m_wait_head; - m_wait_head = m_wait_head->next; - if (m_wait_tail == item) { - m_wait_tail = nullptr; - } - return item->cond; + assert(m_queue->empty()); + delete m_queue; } // Prerequisite: Holds m_mutex. @@ -112,32 +75,25 @@ void frwlock::write_lock(bool expensive) { } toku_cond_t cond = TOKU_COND_INITIALIZER; - queue_item item = { .cond = &cond, .next = nullptr }; - this->enq_item(&item); - - // Wait for our turn. - ++m_num_want_write; + m_queue->push(frwlock_queueitem(&cond, + get_local_tid(), + toku_thread_get_context()->get_id())); if (expensive) { ++m_num_expensive_want_write; } - if (m_num_writers == 0 && m_num_want_write == 1) { - // We are the first to want a write lock. No new readers can get the lock. - // Set our thread id and context for proper instrumentation. - // see: toku_context_note_frwlock_contention() - m_current_writer_tid = get_local_tid(); - m_blocking_writer_context_id = toku_thread_get_context()->get_id(); + + while (m_num_writers || m_num_readers || m_queue->front().m_cond != &cond) { + // Wait until this cond variable is woken up. We could get a spurious wakeup. + toku_cond_wait(&cond, m_mutex); } - toku_cond_wait(&cond, m_mutex); + m_queue->pop(); toku_cond_destroy(&cond); // Now it's our turn. - paranoid_invariant(m_num_want_write > 0); paranoid_invariant_zero(m_num_readers); paranoid_invariant_zero(m_num_writers); - paranoid_invariant_zero(m_num_signaled_readers); // Not waiting anymore; grab the lock. - --m_num_want_write; if (expensive) { --m_num_expensive_want_write; } @@ -149,12 +105,10 @@ void frwlock::write_lock(bool expensive) { bool frwlock::try_write_lock(bool expensive) { toku_mutex_assert_locked(m_mutex); - if (m_num_readers > 0 || m_num_writers > 0 || m_num_signaled_readers > 0 || m_num_want_write > 0) { + if (m_num_readers > 0 || m_num_writers > 0 || !m_queue->empty()) { return false; } // No one holds the lock. Grant the write lock. - paranoid_invariant_zero(m_num_want_write); - paranoid_invariant_zero(m_num_want_read); m_num_writers = 1; m_current_writer_expensive = expensive; m_current_writer_tid = get_local_tid(); @@ -164,45 +118,38 @@ bool frwlock::try_write_lock(bool expensive) { void frwlock::read_lock(void) { toku_mutex_assert_locked(m_mutex); - if (m_num_writers > 0 || m_num_want_write > 0) { - if (!m_wait_read_is_in_queue) { - // Throw the read cond_t onto the queue. - paranoid_invariant(m_num_signaled_readers == m_num_want_read); - m_queue_item_read.next = nullptr; - this->enq_item(&m_queue_item_read); - m_wait_read_is_in_queue = true; - paranoid_invariant(!m_read_wait_expensive); - m_read_wait_expensive = ( - m_current_writer_expensive || - (m_num_expensive_want_write > 0) - ); - } - - // Note this contention event in engine status. + if (this->try_read_lock()) return; + + toku_cond_t cond = TOKU_COND_INITIALIZER; + m_queue->push(frwlock_queueitem(&cond)); + while (m_num_writers || m_queue->front().m_cond != &cond) { + // We know the queue isn't empty (since we are in it), so it's + // safe to all m_queue->front(). toku_context_note_frwlock_contention( toku_thread_get_context()->get_id(), - m_blocking_writer_context_id - ); - - // Wait for our turn. - ++m_num_want_read; - toku_cond_wait(&m_wait_read, m_mutex); - - // Now it's our turn. - paranoid_invariant_zero(m_num_writers); - paranoid_invariant(m_num_want_read > 0); - paranoid_invariant(m_num_signaled_readers > 0); - - // Not waiting anymore; grab the lock. - --m_num_want_read; - --m_num_signaled_readers; + m_blocking_writer_context_id); + toku_cond_wait(&cond, m_mutex); } + m_queue->pop(); + toku_cond_destroy(&cond); + paranoid_invariant_zero(m_num_writers); ++m_num_readers; + if (!m_queue->empty()) { + const frwlock_queueitem &qi = m_queue->front(); + if (qi.m_is_read) { + // The next guy is a reader, so wake him up too. + toku_cond_signal(qi.m_cond); + } else { + // The next guy is a writer, so he's the one whose context should be put. + m_current_writer_tid = qi.m_writer_tid; + m_blocking_writer_context_id = qi.m_writer_context_id; + } + } } bool frwlock::try_read_lock(void) { toku_mutex_assert_locked(m_mutex); - if (m_num_writers > 0 || m_num_want_write > 0) { + if (m_num_writers > 0 || !m_queue->empty()) { return false; } // No writer holds the lock. @@ -212,58 +159,19 @@ bool frwlock::try_read_lock(void) { return true; } -void frwlock::maybe_signal_next_writer(void) { - if (m_num_want_write > 0 && m_num_signaled_readers == 0 && m_num_readers == 0) { - toku_cond_t *cond = this->deq_item(); - paranoid_invariant(cond != &m_wait_read); - // Grant write lock to waiting writer. - paranoid_invariant(m_num_want_write > 0); - toku_cond_signal(cond); - } -} - void frwlock::read_unlock(void) { toku_mutex_assert_locked(m_mutex); paranoid_invariant(m_num_writers == 0); paranoid_invariant(m_num_readers > 0); --m_num_readers; - this->maybe_signal_next_writer(); + if (m_num_readers == 0 && !m_queue->empty()) { + toku_cond_signal(m_queue->front().m_cond); + } } bool frwlock::read_lock_is_expensive(void) { toku_mutex_assert_locked(m_mutex); - if (m_wait_read_is_in_queue) { - return m_read_wait_expensive; - } - else { - return m_current_writer_expensive || (m_num_expensive_want_write > 0); - } -} - - -void frwlock::maybe_signal_or_broadcast_next(void) { - paranoid_invariant(m_num_signaled_readers == 0); - - if (this->queue_is_empty()) { - paranoid_invariant(m_num_want_write == 0); - paranoid_invariant(m_num_want_read == 0); - return; - } - toku_cond_t *cond = this->deq_item(); - if (cond == &m_wait_read) { - // Grant read locks to all waiting readers - paranoid_invariant(m_wait_read_is_in_queue); - paranoid_invariant(m_num_want_read > 0); - m_num_signaled_readers = m_num_want_read; - m_wait_read_is_in_queue = false; - m_read_wait_expensive = false; - toku_cond_broadcast(cond); - } - else { - // Grant write lock to waiting writer. - paranoid_invariant(m_num_want_write > 0); - toku_cond_signal(cond); - } + return m_num_expensive_want_write > 0 || m_current_writer_expensive; } void frwlock::write_unlock(void) { @@ -273,39 +181,35 @@ void frwlock::write_unlock(void) { m_current_writer_expensive = false; m_current_writer_tid = -1; m_blocking_writer_context_id = CTX_INVALID; - this->maybe_signal_or_broadcast_next(); + if (!m_queue->empty()) { + toku_cond_signal(m_queue->front().m_cond); + } } bool frwlock::write_lock_is_expensive(void) { toku_mutex_assert_locked(m_mutex); - return (m_num_expensive_want_write > 0) || (m_current_writer_expensive); + return (m_num_expensive_want_write > 0) || m_current_writer_expensive; } - uint32_t frwlock::users(void) const { toku_mutex_assert_locked(m_mutex); - return m_num_readers + m_num_writers + m_num_want_read + m_num_want_write; -} -uint32_t frwlock::blocked_users(void) const { - toku_mutex_assert_locked(m_mutex); - return m_num_want_read + m_num_want_write; + return m_num_readers + m_num_writers + m_queue->size(); } + uint32_t frwlock::writers(void) const { - // this is sometimes called as "assert(lock->writers())" when we + // The following comment was found, and if true means the code is + // incorrect (we would at least have to make m_num_writers be an + // atomic variable). Unfortunately, such code actually exists, so I made the m_num_writer be atomic. + + // This is sometimes called as "assert(lock->writers())" when we // assume we have the write lock. if that's the assumption, we may // not own the mutex, so we don't assert_locked here + // return m_num_writers; } -uint32_t frwlock::blocked_writers(void) const { - toku_mutex_assert_locked(m_mutex); - return m_num_want_write; -} + uint32_t frwlock::readers(void) const { toku_mutex_assert_locked(m_mutex); return m_num_readers; } -uint32_t frwlock::blocked_readers(void) const { - toku_mutex_assert_locked(m_mutex); - return m_num_want_read; -} } // namespace toku diff --git a/util/frwlock.h b/util/frwlock.h index 52b98a8d7..cc34f1a6d 100644 --- a/util/frwlock.h +++ b/util/frwlock.h @@ -43,14 +43,68 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include #include #include +#include +#include //TODO: update comment, this is from rwlock.h namespace toku { +class frwlock_queueitem { + public: + toku_cond_t *m_cond; + bool m_is_read; + int m_writer_tid; + context_id m_writer_context_id; + frwlock_queueitem(toku_cond_t *cond) + : m_cond(cond) + , m_is_read(true) + , m_writer_tid(-1) + , m_writer_context_id(CTX_INVALID) + {} + frwlock_queueitem(toku_cond_t *cond, int writer_tid, context_id writer_context_id) + : m_cond(cond) + , m_is_read(false) + , m_writer_tid(writer_tid) + , m_writer_context_id(writer_context_id) + {} +}; + class frwlock { public: + // This is a fair readers-writers lock. It has two extra properties + // beyond a vanilla fair rw lock: + // + // 1) It can tell you whether waiting on a lock may be + // "expensive". This is done by requiring anyone who obtains + // a write lock to say whether it's expensive, and keeping + // track of whether any expensive write request is either + // holding the lock or in the queue waiting. + // + // 2) It records the context and thread id of the writer who is + // currently blocking any other thread from getting the lock. + // It does this by recording the context and thread id when a + // writer gets the lock, or when a reader gets the lock and + // the next item in the queue is a writer. + // + // The implementation employs a std::queue of frwlock_queueitems + // (which contain a condition variable, a bool indicatin whether + // the request is a reader or a writer, and the threadid and + // context of the requesting thread. + // + // When a reader or writer tries, and cannot get, the lock, it + // places a condition variable into the queue. + // + // When a reader releases a lock, it checks to see if there are + // any other readers still holding the lock, and if not, it + // signals the next item in the queue (which is responsible for + // fremoving itself from the queue, and if it is a reader, for + // signaling the next item in the queue if it is a reader). + // + // When a writer releases a lock, it signals the next item in the + // queue. + void init(toku_mutex_t *const mutex); void deinit(void); @@ -66,55 +120,42 @@ class frwlock { // returns true if acquiring a read lock will be expensive bool read_lock_is_expensive(void); + // How many threads are holding or waiting on the lock ? (You must hold the lock to call this.) uint32_t users(void) const; - uint32_t blocked_users(void) const; + + // How many writer therads are holding the lock (0 or 1)? (You need not hold the lock to call this.) uint32_t writers(void) const; - uint32_t blocked_writers(void) const; + + // How many readers currently hold the lock? (You must hold the lock to call this.) uint32_t readers(void) const; - uint32_t blocked_readers(void) const; private: - struct queue_item { - toku_cond_t *cond; - struct queue_item *next; - }; - - bool queue_is_empty(void) const; - void enq_item(queue_item *const item); - toku_cond_t *deq_item(void); - void maybe_signal_or_broadcast_next(void); - void maybe_signal_next_writer(void); + // the pair is the condition variable and true for read, false for write + std::queue *m_queue; toku_mutex_t *m_mutex; + // How many readers hold the lock? uint32_t m_num_readers; - uint32_t m_num_writers; - uint32_t m_num_want_write; - uint32_t m_num_want_read; - uint32_t m_num_signaled_readers; - // number of writers waiting that are expensive - // MUST be < m_num_want_write + + // How many writers hold the lock? + std::atomic m_num_writers; // this is accessed without a lock, so we make it atomic. + + // Number of writers that are expensive (not including the writer that holds the lock, if any) + // MUST be < the number in the queue that want to write. uint32_t m_num_expensive_want_write; - // bool that states if the current writer is expensive - // if there is no current writer, then is false - bool m_current_writer_expensive; - // bool that states if waiting for a read - // is expensive - // if there are currently no waiting readers, then set to false - bool m_read_wait_expensive; + + // Is the current writer expensive (we must store this separately + // from m_num_expensive_want_write) + bool m_current_writer_expensive; + // thread-id of the current writer int m_current_writer_tid; + // context id describing the context of the current writer blocking // new readers (either because this writer holds the write lock or - // is the first to want the write lock). + // is one of the ones that wants the write lock). context_id m_blocking_writer_context_id; - - toku_cond_t m_wait_read; - queue_item m_queue_item_read; - bool m_wait_read_is_in_queue; - - queue_item *m_wait_head; - queue_item *m_wait_tail; }; ENSURE_POD(frwlock);