@@ -1294,8 +1294,21 @@ size_t CacheAllocator<CacheTrait>::wakeUpWaitersLocked(folly::StringPiece key,
12941294}
12951295
12961296template <typename CacheTrait>
1297- void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
1297+ bool CacheAllocator<CacheTrait>::moveRegularItemWithSync(
12981298 Item& oldItem, WriteHandle& newItemHdl) {
1299+ // on function exit - the new item handle is no longer moving
1300+ // and other threads may access it - but in case where
1301+ // we failed to replace in access container we can give the
1302+ // new item back to the allocator
1303+ auto guard = folly::makeGuard ([&]() {
1304+ auto ref = newItemHdl->unmarkMoving ();
1305+ if (UNLIKELY (ref == 0 )) {
1306+ const auto res =
1307+ releaseBackToAllocator (*newItemHdl, RemoveContext::kNormal , false );
1308+ XDCHECK (res == ReleaseRes::kReleased );
1309+ }
1310+ });
1311+
12991312 XDCHECK (oldItem.isMoving ());
13001313 XDCHECK (!oldItem.isExpired ());
13011314 // TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
@@ -1326,6 +1339,22 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
13261339
13271340 auto replaced = accessContainer_->replaceIf (oldItem, *newItemHdl,
13281341 predicate);
1342+ // another thread may have called insertOrReplace which could have
1343+ // marked this item as unaccessible causing the replaceIf
1344+ // in the access container to fail - in this case we want
1345+ // to abort the move since the item is no longer valid
1346+ if (!replaced) {
1347+ return false ;
1348+ }
1349+ // what if another thread calls insertOrReplace now when
1350+ // the item is moving and already replaced in the hash table?
1351+ // 1. it succeeds in updating the hash table - so there is
1352+ // no guarentee that isAccessible() is true
1353+ // 2. it will then try to remove from MM container
1354+ // - this operation will wait for newItemHdl to
1355+ // be unmarkedMoving via the waitContext
1356+ // 3. replaced handle is returned and eventually drops
1357+ // ref to 0 and the item is recycled back to allocator.
13291358
13301359 if (config_.moveCb ) {
13311360 // Execute the move callback. We cannot make any guarantees about the
@@ -1367,14 +1396,7 @@ void CacheAllocator<CacheTrait>::moveRegularItemWithSync(
13671396 XDCHECK (newItemHdl->hasChainedItem ());
13681397 }
13691398 newItemHdl.unmarkNascent ();
1370- auto ref = newItemHdl->unmarkMoving ();
1371- // remove because there is a chance the new item was not
1372- // added to the access container
1373- if (UNLIKELY (ref == 0 )) {
1374- const auto res =
1375- releaseBackToAllocator (*newItemHdl, RemoveContext::kNormal , false );
1376- XDCHECK (res == ReleaseRes::kReleased );
1377- }
1399+ return true ;
13781400}
13791401
13801402template <typename CacheTrait>
@@ -1529,7 +1551,6 @@ template <typename CacheTrait>
15291551void CacheAllocator<CacheTrait>::unlinkItemForEviction(Item& it) {
15301552 XDCHECK (it.isMarkedForEviction ());
15311553 XDCHECK (it.getRefCount () == 0 );
1532-
15331554 accessContainer_->remove (it);
15341555 removeFromMMContainer (it);
15351556
@@ -1624,28 +1645,43 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
16241645 auto evictedToNext = lastTier ? nullptr
16251646 : tryEvictToNextMemoryTier (*candidate, false );
16261647 if (!evictedToNext) {
1627- if (!token.isValid ()) {
1648+ // if insertOrReplace was called during move
1649+ // then candidate will not be accessible (failed replace during tryEvict)
1650+ // - therefore this was why we failed to
1651+ // evict to the next tier and insertOrReplace
1652+ // will remove from NVM cache
1653+ // however, if candidate is accessible
1654+ // that means the allocation in the next
1655+ // tier failed - so we will continue to
1656+ // evict the item to NVM cache
1657+ bool failedToReplace = !candidate->isAccessible ();
1658+ if (!token.isValid () && !failedToReplace) {
16281659 token = createPutToken (*candidate);
16291660 }
1630- // tryEvictToNextMemoryTier should only fail if allocation of the new item fails
1631- // in that case, it should be still possible to mark item as exclusive.
1661+ // tryEvictToNextMemoryTier can fail if:
1662+ // a) allocation of the new item fails in that case,
1663+ // it should be still possible to mark item for eviction.
1664+ // b) another thread calls insertOrReplace and the item
1665+ // is no longer accessible
16321666 //
16331667 // in case that we are on the last tier, we whould have already marked
16341668 // as exclusive since we will not be moving the item to the next tier
16351669 // but rather just evicting all together, no need to
1636- // markExclusiveWhenMoving
1670+ // markForEvictionWhenMoving
16371671 auto ret = lastTier ? true : candidate->markForEvictionWhenMoving ();
16381672 XDCHECK (ret);
16391673
16401674 unlinkItemForEviction (*candidate);
1675+
1676+ if (token.isValid () && shouldWriteToNvmCacheExclusive (*candidate)
1677+ && !failedToReplace) {
1678+ nvmCache_->put (*candidate, std::move (token));
1679+ }
16411680 // wake up any readers that wait for the move to complete
16421681 // it's safe to do now, as we have the item marked exclusive and
16431682 // no other reader can be added to the waiters list
16441683 wakeUpWaiters (*candidate, {});
16451684
1646- if (token.isValid () && shouldWriteToNvmCacheExclusive (*candidate)) {
1647- nvmCache_->put (*candidate, std::move (token));
1648- }
16491685 } else {
16501686 XDCHECK (!evictedToNext->isMarkedForEviction () && !evictedToNext->isMoving ());
16511687 XDCHECK (!candidate->isMarkedForEviction () && !candidate->isMoving ());
@@ -1756,7 +1792,10 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
17561792
17571793 if (newItemHdl) {
17581794 XDCHECK_EQ (newItemHdl->getSize (), item.getSize ());
1759- moveRegularItemWithSync (item, newItemHdl);
1795+ if (!moveRegularItemWithSync (item, newItemHdl)) {
1796+ return WriteHandle{};
1797+ }
1798+ XDCHECK_EQ (newItemHdl->getKey (),item.getKey ());
17601799 item.unmarkMoving ();
17611800 return newItemHdl;
17621801 } else {
@@ -1795,7 +1834,9 @@ CacheAllocator<CacheTrait>::tryPromoteToNextMemoryTier(
17951834
17961835 if (newItemHdl) {
17971836 XDCHECK_EQ (newItemHdl->getSize (), item.getSize ());
1798- moveRegularItemWithSync (item, newItemHdl);
1837+ if (!moveRegularItemWithSync (item, newItemHdl)) {
1838+ return WriteHandle{};
1839+ }
17991840 item.unmarkMoving ();
18001841 return newItemHdl;
18011842 } else {
@@ -3148,9 +3189,23 @@ bool CacheAllocator<CacheTrait>::tryMovingForSlabRelease(
31483189 // TODO: add support for chained items
31493190 return false ;
31503191 } else {
3151- moveRegularItemWithSync (oldItem, newItemHdl);
3152- removeFromMMContainer (oldItem);
3153- return true ;
3192+ // move can fail if another thread calls insertOrReplace
3193+ // in this case oldItem is no longer valid (not accessible,
3194+ // it gets removed from MMContainer and evictForSlabRelease
3195+ // will send it back to the allocator
3196+ bool ret = moveRegularItemWithSync (oldItem, newItemHdl);
3197+ if (!ret) {
3198+ // we failed to move - newItemHdl was released back to allocator
3199+ // by the moveRegularItemWithSync but oldItem is not accessible
3200+ // and no longer valid - we need to clean it up here
3201+ XDCHECK (!oldItem.isAccessible ());
3202+ oldItem.markForEvictionWhenMoving ();
3203+ unlinkItemForEviction (oldItem);
3204+ wakeUpWaiters (oldItem, {});
3205+ } else {
3206+ removeFromMMContainer (oldItem);
3207+ }
3208+ return ret;
31543209 }
31553210 }
31563211}
0 commit comments