Background movers implementation

This is the implementation logic for the background eviction (and promotion when multi-tier is enabled). The main parameters for the background workers are the number of threads, `backgroundMoverThreads` and the batch size used `backgroundEvictionBatch`, which configures the number of items to evict in batch (while holding the container lock), and `backgroundTargetFree` which sets target free percentage of each class. The background workers will work to keep that percentage of space free. The main result is that SET (allocate) latencies are significantly reduced - kvcache workload with 40GB DRAM, ampFactor set to 200 via the trace replayer. Throughput set at 1.2M ops/sec. | Percentile | % Improvement | |------------|---------------| | 0.50000 | 70.4 | | 0.90000 | 23.0 | | 0.99000 | 12.2 | | 0.99900 | 89.7 | | 0.99990 | 24.9 | | 0.99999 | 0.6 | The GET (find) latencies are unaffected by the background workers as long as the batch size remains reasonably small (10 in our tests).
facebook · Nov 20, 2024 · 7105800 · 7105800
1 parent efa4506
commit 7105800
Show file tree

Hide file tree

Showing 25 changed files with 1,069 additions and 622 deletions.
diff --git a/cachelib/allocator/BackgroundMover.h b/cachelib/allocator/BackgroundMover.h
@@ -16,51 +16,63 @@
 
 #pragma once
 
-#include "cachelib/allocator/BackgroundMoverStrategy.h"
+#include "cachelib/allocator/Cache.h"
 #include "cachelib/allocator/CacheStats.h"
-#include "cachelib/common/AtomicCounter.h"
 #include "cachelib/common/PeriodicWorker.h"
 
 namespace facebook::cachelib {
 // wrapper that exposes the private APIs of CacheType that are specifically
 // needed for the cache api
 template <typename C>
 struct BackgroundMoverAPIWrapper {
-  static size_t traverseAndEvictItems(C& cache,
-                                      unsigned int pid,
-                                      unsigned int cid,
-                                      size_t batch) {
-    return cache.traverseAndEvictItems(pid, cid, batch);
+  // traverse the cache and move items from one tier to another
+  // @param cache             the cache interface
+  // @param pid               the pool id to traverse
+  // @param cid               the class id to traverse
+  // @param evictionBatch     number of items to evict in one go
+  // @param promotionBatch    number of items to promote in one go
+  // @return pair of number of items evicted and promoted
+  static std::pair<size_t, size_t> traverseAndMoveItems(C& cache,
+                                                        PoolId pid,
+                                                        ClassId cid,
+                                                        size_t evictionBatch,
+                                                        size_t promotionBatch) {
+    return cache.traverseAndMoveItems(pid, cid, evictionBatch, promotionBatch);
   }
-
-  static size_t traverseAndPromoteItems(C& cache,
-                                        unsigned int pid,
-                                        unsigned int cid,
-                                        size_t batch) {
-    return cache.traverseAndPromoteItems(pid, cid, batch);
+  static std::pair<size_t, double> getApproxUsage(C& cache,
+                                                  PoolId pid,
+                                                  ClassId cid) {
+    const auto& pool = cache.getPool(pid);
+    // we wait until all slabs are allocated before we start evicting
+    if (!pool.allSlabsAllocated()) {
+      return {0, 0.0};
+    }
+    return pool.getApproxUsage(cid);
   }
 };
 
-enum class MoverDir { Evict = 0, Promote };
-
 // Periodic worker that evicts items from tiers in batches
 // The primary aim is to reduce insertion times for new items in the
 // cache
 template <typename CacheT>
 class BackgroundMover : public PeriodicWorker {
  public:
+  using ClassBgStatsType =
+      std::map<MemoryDescriptorType, std::pair<size_t, size_t>>;
   using Cache = CacheT;
   // @param cache               the cache interface
-  // @param strategy            the stragey class that defines how objects are
-  // moved (promoted vs. evicted and how much)
+  // @param evictionBatch       number of items to evict in one go
+  // @param promotionBatch      number of items to promote in one go
+  // @param targetFree          target free percentage in the class
   BackgroundMover(Cache& cache,
-                  std::shared_ptr<BackgroundMoverStrategy> strategy,
-                  MoverDir direction_);
+                  size_t evictionBatch,
+                  size_t promotionBatch,
+                  double targetFree);
 
   ~BackgroundMover() override;
 
   BackgroundMoverStats getStats() const noexcept;
-  std::map<PoolId, std::map<ClassId, uint64_t>> getClassStats() const noexcept;
+  ClassBgStatsType getPerClassStats() const noexcept { return movesPerClass_; }
 
   void setAssignedMemory(std::vector<MemoryDescriptorType>&& assignedMemory);
 
@@ -69,40 +81,75 @@ class BackgroundMover : public PeriodicWorker {
   static size_t workerId(PoolId pid, ClassId cid, size_t numWorkers);
 
  private:
-  std::map<PoolId, std::map<ClassId, uint64_t>> movesPerClass_;
+  struct TraversalStats {
+    // record a traversal over all assigned classes
+    // and its time taken
+    void recordTraversalTime(uint64_t nsTaken);
+
+    uint64_t getAvgTraversalTimeNs(uint64_t numTraversals) const;
+    uint64_t getMinTraversalTimeNs() const { return minTraversalTimeNs_; }
+    uint64_t getMaxTraversalTimeNs() const { return maxTraversalTimeNs_; }
+    uint64_t getLastTraversalTimeNs() const { return lastTraversalTimeNs_; }
+
+   private:
+    // time it took us the last time to traverse the cache.
+    uint64_t lastTraversalTimeNs_{0};
+    uint64_t minTraversalTimeNs_{std::numeric_limits<uint64_t>::max()};
+    uint64_t maxTraversalTimeNs_{0};
+    uint64_t totalTraversalTimeNs_{0};
+  };
+
+  TraversalStats traversalStats_;
   // cache allocator's interface for evicting
   using Item = typename Cache::Item;
 
   Cache& cache_;
-  std::shared_ptr<BackgroundMoverStrategy> strategy_;
-  MoverDir direction_;
-
-  std::function<size_t(Cache&, unsigned int, unsigned int, size_t)> moverFunc;
+  uint8_t numTiers_{1}; // until we have multi-tier support
+  size_t evictionBatch_{0};
+  size_t promotionBatch_{0};
+  double targetFree_{0.03};
 
   // implements the actual logic of running the background evictor
   void work() override final;
   void checkAndRun();
 
-  AtomicCounter numMovedItems_{0};
-  AtomicCounter numTraversals_{0};
-  AtomicCounter totalBytesMoved_{0};
+  // populates the toFree map for each class with the number of items to free
+  std::map<MemoryDescriptorType, size_t> getNumItemsToFree(
+      const std::vector<MemoryDescriptorType>& assignedMemory);
+
+  uint64_t numEvictedItems_{0};
+  uint64_t numPromotedItems_{0};
+  uint64_t numTraversals_{0};
+
+  ClassBgStatsType movesPerClass_;
 
   std::vector<MemoryDescriptorType> assignedMemory_;
   folly::DistributedMutex mutex_;
 };
 
 template <typename CacheT>
-BackgroundMover<CacheT>::BackgroundMover(
-    Cache& cache,
-    std::shared_ptr<BackgroundMoverStrategy> strategy,
-    MoverDir direction)
-    : cache_(cache), strategy_(strategy), direction_(direction) {
-  if (direction_ == MoverDir::Evict) {
-    moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndEvictItems;
-
-  } else if (direction_ == MoverDir::Promote) {
-    moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndPromoteItems;
-  }
+BackgroundMover<CacheT>::BackgroundMover(Cache& cache,
+                                         size_t evictionBatch,
+                                         size_t promotionBatch,
+                                         double targetFree)
+    : cache_(cache),
+      evictionBatch_(evictionBatch),
+      promotionBatch_(promotionBatch),
+      targetFree_(targetFree) {}
+
+template <typename CacheT>
+void BackgroundMover<CacheT>::TraversalStats::recordTraversalTime(
+    uint64_t nsTaken) {
+  lastTraversalTimeNs_ = nsTaken;
+  minTraversalTimeNs_ = std::min(minTraversalTimeNs_, nsTaken);
+  maxTraversalTimeNs_ = std::max(maxTraversalTimeNs_, nsTaken);
+  totalTraversalTimeNs_ += nsTaken;
+}
+
+template <typename CacheT>
+uint64_t BackgroundMover<CacheT>::TraversalStats::getAvgTraversalTimeNs(
+    uint64_t numTraversals) const {
+  return numTraversals ? totalTraversalTimeNs_ / numTraversals : 0;
 }
 
 template <typename CacheT>
@@ -132,50 +179,89 @@ void BackgroundMover<CacheT>::setAssignedMemory(
   });
 }
 
-// Look for classes that exceed the target memory capacity
-// and return those for eviction
+template <typename CacheT>
+std::map<MemoryDescriptorType, size_t>
+BackgroundMover<CacheT>::getNumItemsToFree(
+    const std::vector<MemoryDescriptorType>& assignedMemory) {
+  std::map<MemoryDescriptorType, size_t> toFree;
+  for (const auto& md : assignedMemory) {
+    const auto [pid, cid] = md;
+    const auto& pool = cache_.getPool(pid);
+    const auto [activeItems, usage] =
+        BackgroundMoverAPIWrapper<CacheT>::getApproxUsage(cache_, pid, cid);
+    if (usage < 1 - targetFree_) {
+      toFree[md] = 0;
+    } else {
+      size_t maxItems = activeItems / usage;
+      size_t targetItems = maxItems * (1 - targetFree_);
+      size_t toFreeItems =
+          activeItems > targetItems ? activeItems - targetItems : 0;
+      toFree[md] = toFreeItems;
+    }
+  }
+  return toFree;
+}
+
 template <typename CacheT>
 void BackgroundMover<CacheT>::checkAndRun() {
   auto assignedMemory = mutex_.lock_combine([this] { return assignedMemory_; });
-
-  unsigned int moves = 0;
-  auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory);
-
-  for (size_t i = 0; i < batches.size(); i++) {
-    const auto [pid, cid] = assignedMemory[i];
-    const auto batch = batches[i];
-
-    if (batch == 0) {
-      continue;
+  auto toFree = getNumItemsToFree(assignedMemory); // calculate the number of
+                                                   // items to free
+  while (true) {
+    bool allDone = true;
+    for (auto md : assignedMemory) {
+      const auto [pid, cid] = md;
+      size_t evictionBatch = evictionBatch_;
+      size_t promotionBatch = 0; // will enable with multi-tier support
+      if (toFree[md] == 0) {
+        // no eviction work to be done since there is already at least
+        // targetFree remaining in the class
+        evictionBatch = 0;
+      } else {
+        allDone = false; // we still have some items to free
+      }
+      if (promotionBatch + evictionBatch > 0) {
+        const auto begin = util::getCurrentTimeNs();
+        // try moving BATCH items from the class in order to reach free target
+        auto moved = BackgroundMoverAPIWrapper<CacheT>::traverseAndMoveItems(
+            cache_, pid, cid, evictionBatch, promotionBatch);
+        numEvictedItems_ += moved.first;
+        toFree[md] > moved.first ? toFree[md] -= moved.first : toFree[md] = 0;
+        numPromotedItems_ += moved.second;
+        auto curr = movesPerClass_[md];
+        curr.first += moved.first;
+        curr.second += moved.second;
+        movesPerClass_[md] = curr;
+        numTraversals_++;
+        auto end = util::getCurrentTimeNs();
+        traversalStats_.recordTraversalTime(end > begin ? end - begin : 0);
+      }
+    }
+    if (shouldStopWork() || allDone) {
+      break;
     }
-
-    // try moving BATCH items from the class in order to reach free target
-    auto moved = moverFunc(cache_, pid, cid, batch);
-    moves += moved;
-    movesPerClass_[pid][cid] += moved;
-    totalBytesMoved_.add(moved * cache_.getPool(pid).getAllocSizes()[cid]);
   }
-
-  numTraversals_.inc();
-  numMovedItems_.add(moves);
 }
 
 template <typename CacheT>
 BackgroundMoverStats BackgroundMover<CacheT>::getStats() const noexcept {
   BackgroundMoverStats stats;
-  stats.numMovedItems = numMovedItems_.get();
-  stats.runCount = numTraversals_.get();
-  stats.totalBytesMoved = totalBytesMoved_.get();
+  stats.numEvictedItems = numEvictedItems_;
+  stats.numPromotedItems = numPromotedItems_;
+  stats.numTraversals = numTraversals_;
+  stats.runCount = getRunCount();
+  stats.avgItemsMoved =
+      (double)(stats.numEvictedItems + stats.numPromotedItems) /
+      (double)numTraversals_;
+  stats.lastTraversalTimeNs = traversalStats_.getLastTraversalTimeNs();
+  stats.avgTraversalTimeNs =
+      traversalStats_.getAvgTraversalTimeNs(numTraversals_);
+  stats.minTraversalTimeNs = traversalStats_.getMinTraversalTimeNs();
+  stats.maxTraversalTimeNs = traversalStats_.getMaxTraversalTimeNs();
 
   return stats;
 }
 
-template <typename CacheT>
-std::map<PoolId, std::map<ClassId, uint64_t>>
-BackgroundMover<CacheT>::getClassStats() const noexcept {
-  return movesPerClass_;
-}
-
 template <typename CacheT>
 size_t BackgroundMover<CacheT>::workerId(PoolId pid,
                                          ClassId cid,
@@ -185,4 +271,4 @@ size_t BackgroundMover<CacheT>::workerId(PoolId pid,
   // TODO: came up with some better sharding (use hashing?)
   return (pid + cid) % numWorkers;
 }
-} // namespace facebook::cachelib
+}; // namespace facebook::cachelib
diff --git a/cachelib/allocator/BackgroundMoverStrategy.h b/cachelib/allocator/BackgroundMoverStrategy.h
diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt
@@ -35,7 +35,6 @@ add_library (cachelib_allocator
     CCacheManager.cpp
     ContainerTypes.cpp
     FreeMemStrategy.cpp
-    FreeThresholdStrategy.cpp
     HitsPerSlabStrategy.cpp
     LruTailAgeStrategy.cpp
     MarginalHitsOptimizeStrategy.cpp

diff --git a/cachelib/allocator/Cache.h b/cachelib/allocator/Cache.h
@@ -73,6 +73,21 @@ enum class DestructorContext {
   kRemovedFromNVM
 };
 
+// a tuple that describes the memory pool and allocation class
+struct MemoryDescriptorType {
+  MemoryDescriptorType(PoolId pid, ClassId cid) : pid_(pid), cid_(cid) {}
+  PoolId pid_;
+  ClassId cid_;
+
+  bool operator<(const MemoryDescriptorType& rhs) const {
+    return std::make_tuple(pid_, cid_) < std::make_tuple(rhs.pid_, rhs.cid_);
+  }
+
+  bool operator==(const MemoryDescriptorType& rhs) const {
+    return std::make_tuple(pid_, cid_) == std::make_tuple(rhs.pid_, rhs.cid_);
+  }
+};
+
 // A base class of cache exposing members and status agnostic of template type.
 class CacheBase {
  public: