Skip to content

Commit

Permalink
Background movers implementation
Browse files Browse the repository at this point in the history
This is the implementation logic for the background eviction (and promotion when multi-tier is enabled).

The main parameters for the background workers are the number of threads, `backgroundMoverThreads` and the batch size used `backgroundEvictionBatch`, which configures the number of items to evict in batch (while holding the container lock), and `backgroundTargetFree` which sets target free percentage of each class. The background workers will work to keep that percentage of space free.

The main result is that SET (allocate) latencies are significantly reduced - kvcache workload with 40GB DRAM, ampFactor set to 200 via the trace replayer. Throughput set at 1.2M ops/sec.
| Percentile | % Improvement |
|------------|---------------|
| 0.50000    |     70.4      |
| 0.90000    |     23.0      |
| 0.99000    |     12.2      |
| 0.99900    |     89.7      |
| 0.99990    |     24.9      |
| 0.99999    |     0.6       |

The GET (find) latencies are unaffected by the background workers as long as the batch size remains reasonably small (10 in our tests).
  • Loading branch information
byrnedj committed Nov 20, 2024
1 parent efa4506 commit 7105800
Show file tree
Hide file tree
Showing 25 changed files with 1,069 additions and 622 deletions.
226 changes: 156 additions & 70 deletions cachelib/allocator/BackgroundMover.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,51 +16,63 @@

#pragma once

#include "cachelib/allocator/BackgroundMoverStrategy.h"
#include "cachelib/allocator/Cache.h"
#include "cachelib/allocator/CacheStats.h"
#include "cachelib/common/AtomicCounter.h"
#include "cachelib/common/PeriodicWorker.h"

namespace facebook::cachelib {
// wrapper that exposes the private APIs of CacheType that are specifically
// needed for the cache api
template <typename C>
struct BackgroundMoverAPIWrapper {
static size_t traverseAndEvictItems(C& cache,
unsigned int pid,
unsigned int cid,
size_t batch) {
return cache.traverseAndEvictItems(pid, cid, batch);
// traverse the cache and move items from one tier to another
// @param cache the cache interface
// @param pid the pool id to traverse
// @param cid the class id to traverse
// @param evictionBatch number of items to evict in one go
// @param promotionBatch number of items to promote in one go
// @return pair of number of items evicted and promoted
static std::pair<size_t, size_t> traverseAndMoveItems(C& cache,
PoolId pid,
ClassId cid,
size_t evictionBatch,
size_t promotionBatch) {
return cache.traverseAndMoveItems(pid, cid, evictionBatch, promotionBatch);
}

static size_t traverseAndPromoteItems(C& cache,
unsigned int pid,
unsigned int cid,
size_t batch) {
return cache.traverseAndPromoteItems(pid, cid, batch);
static std::pair<size_t, double> getApproxUsage(C& cache,
PoolId pid,
ClassId cid) {
const auto& pool = cache.getPool(pid);
// we wait until all slabs are allocated before we start evicting
if (!pool.allSlabsAllocated()) {
return {0, 0.0};
}
return pool.getApproxUsage(cid);
}
};

enum class MoverDir { Evict = 0, Promote };

// Periodic worker that evicts items from tiers in batches
// The primary aim is to reduce insertion times for new items in the
// cache
template <typename CacheT>
class BackgroundMover : public PeriodicWorker {
public:
using ClassBgStatsType =
std::map<MemoryDescriptorType, std::pair<size_t, size_t>>;
using Cache = CacheT;
// @param cache the cache interface
// @param strategy the stragey class that defines how objects are
// moved (promoted vs. evicted and how much)
// @param evictionBatch number of items to evict in one go
// @param promotionBatch number of items to promote in one go
// @param targetFree target free percentage in the class
BackgroundMover(Cache& cache,
std::shared_ptr<BackgroundMoverStrategy> strategy,
MoverDir direction_);
size_t evictionBatch,
size_t promotionBatch,
double targetFree);

~BackgroundMover() override;

BackgroundMoverStats getStats() const noexcept;
std::map<PoolId, std::map<ClassId, uint64_t>> getClassStats() const noexcept;
ClassBgStatsType getPerClassStats() const noexcept { return movesPerClass_; }

void setAssignedMemory(std::vector<MemoryDescriptorType>&& assignedMemory);

Expand All @@ -69,40 +81,75 @@ class BackgroundMover : public PeriodicWorker {
static size_t workerId(PoolId pid, ClassId cid, size_t numWorkers);

private:
std::map<PoolId, std::map<ClassId, uint64_t>> movesPerClass_;
struct TraversalStats {
// record a traversal over all assigned classes
// and its time taken
void recordTraversalTime(uint64_t nsTaken);

uint64_t getAvgTraversalTimeNs(uint64_t numTraversals) const;
uint64_t getMinTraversalTimeNs() const { return minTraversalTimeNs_; }
uint64_t getMaxTraversalTimeNs() const { return maxTraversalTimeNs_; }
uint64_t getLastTraversalTimeNs() const { return lastTraversalTimeNs_; }

private:
// time it took us the last time to traverse the cache.
uint64_t lastTraversalTimeNs_{0};
uint64_t minTraversalTimeNs_{std::numeric_limits<uint64_t>::max()};
uint64_t maxTraversalTimeNs_{0};
uint64_t totalTraversalTimeNs_{0};
};

TraversalStats traversalStats_;
// cache allocator's interface for evicting
using Item = typename Cache::Item;

Cache& cache_;
std::shared_ptr<BackgroundMoverStrategy> strategy_;
MoverDir direction_;

std::function<size_t(Cache&, unsigned int, unsigned int, size_t)> moverFunc;
uint8_t numTiers_{1}; // until we have multi-tier support
size_t evictionBatch_{0};
size_t promotionBatch_{0};
double targetFree_{0.03};

// implements the actual logic of running the background evictor
void work() override final;
void checkAndRun();

AtomicCounter numMovedItems_{0};
AtomicCounter numTraversals_{0};
AtomicCounter totalBytesMoved_{0};
// populates the toFree map for each class with the number of items to free
std::map<MemoryDescriptorType, size_t> getNumItemsToFree(
const std::vector<MemoryDescriptorType>& assignedMemory);

uint64_t numEvictedItems_{0};
uint64_t numPromotedItems_{0};
uint64_t numTraversals_{0};

ClassBgStatsType movesPerClass_;

std::vector<MemoryDescriptorType> assignedMemory_;
folly::DistributedMutex mutex_;
};

template <typename CacheT>
BackgroundMover<CacheT>::BackgroundMover(
Cache& cache,
std::shared_ptr<BackgroundMoverStrategy> strategy,
MoverDir direction)
: cache_(cache), strategy_(strategy), direction_(direction) {
if (direction_ == MoverDir::Evict) {
moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndEvictItems;

} else if (direction_ == MoverDir::Promote) {
moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndPromoteItems;
}
BackgroundMover<CacheT>::BackgroundMover(Cache& cache,
size_t evictionBatch,
size_t promotionBatch,
double targetFree)
: cache_(cache),
evictionBatch_(evictionBatch),
promotionBatch_(promotionBatch),
targetFree_(targetFree) {}

template <typename CacheT>
void BackgroundMover<CacheT>::TraversalStats::recordTraversalTime(
uint64_t nsTaken) {
lastTraversalTimeNs_ = nsTaken;
minTraversalTimeNs_ = std::min(minTraversalTimeNs_, nsTaken);
maxTraversalTimeNs_ = std::max(maxTraversalTimeNs_, nsTaken);
totalTraversalTimeNs_ += nsTaken;
}

template <typename CacheT>
uint64_t BackgroundMover<CacheT>::TraversalStats::getAvgTraversalTimeNs(
uint64_t numTraversals) const {
return numTraversals ? totalTraversalTimeNs_ / numTraversals : 0;
}

template <typename CacheT>
Expand Down Expand Up @@ -132,50 +179,89 @@ void BackgroundMover<CacheT>::setAssignedMemory(
});
}

// Look for classes that exceed the target memory capacity
// and return those for eviction
template <typename CacheT>
std::map<MemoryDescriptorType, size_t>
BackgroundMover<CacheT>::getNumItemsToFree(
const std::vector<MemoryDescriptorType>& assignedMemory) {
std::map<MemoryDescriptorType, size_t> toFree;
for (const auto& md : assignedMemory) {
const auto [pid, cid] = md;
const auto& pool = cache_.getPool(pid);
const auto [activeItems, usage] =
BackgroundMoverAPIWrapper<CacheT>::getApproxUsage(cache_, pid, cid);
if (usage < 1 - targetFree_) {
toFree[md] = 0;
} else {
size_t maxItems = activeItems / usage;
size_t targetItems = maxItems * (1 - targetFree_);
size_t toFreeItems =
activeItems > targetItems ? activeItems - targetItems : 0;
toFree[md] = toFreeItems;
}
}
return toFree;
}

template <typename CacheT>
void BackgroundMover<CacheT>::checkAndRun() {
auto assignedMemory = mutex_.lock_combine([this] { return assignedMemory_; });

unsigned int moves = 0;
auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory);

for (size_t i = 0; i < batches.size(); i++) {
const auto [pid, cid] = assignedMemory[i];
const auto batch = batches[i];

if (batch == 0) {
continue;
auto toFree = getNumItemsToFree(assignedMemory); // calculate the number of
// items to free
while (true) {
bool allDone = true;
for (auto md : assignedMemory) {
const auto [pid, cid] = md;
size_t evictionBatch = evictionBatch_;
size_t promotionBatch = 0; // will enable with multi-tier support
if (toFree[md] == 0) {
// no eviction work to be done since there is already at least
// targetFree remaining in the class
evictionBatch = 0;
} else {
allDone = false; // we still have some items to free
}
if (promotionBatch + evictionBatch > 0) {
const auto begin = util::getCurrentTimeNs();
// try moving BATCH items from the class in order to reach free target
auto moved = BackgroundMoverAPIWrapper<CacheT>::traverseAndMoveItems(
cache_, pid, cid, evictionBatch, promotionBatch);
numEvictedItems_ += moved.first;
toFree[md] > moved.first ? toFree[md] -= moved.first : toFree[md] = 0;
numPromotedItems_ += moved.second;
auto curr = movesPerClass_[md];
curr.first += moved.first;
curr.second += moved.second;
movesPerClass_[md] = curr;
numTraversals_++;
auto end = util::getCurrentTimeNs();
traversalStats_.recordTraversalTime(end > begin ? end - begin : 0);
}
}
if (shouldStopWork() || allDone) {
break;
}

// try moving BATCH items from the class in order to reach free target
auto moved = moverFunc(cache_, pid, cid, batch);
moves += moved;
movesPerClass_[pid][cid] += moved;
totalBytesMoved_.add(moved * cache_.getPool(pid).getAllocSizes()[cid]);
}

numTraversals_.inc();
numMovedItems_.add(moves);
}

template <typename CacheT>
BackgroundMoverStats BackgroundMover<CacheT>::getStats() const noexcept {
BackgroundMoverStats stats;
stats.numMovedItems = numMovedItems_.get();
stats.runCount = numTraversals_.get();
stats.totalBytesMoved = totalBytesMoved_.get();
stats.numEvictedItems = numEvictedItems_;
stats.numPromotedItems = numPromotedItems_;
stats.numTraversals = numTraversals_;
stats.runCount = getRunCount();
stats.avgItemsMoved =
(double)(stats.numEvictedItems + stats.numPromotedItems) /
(double)numTraversals_;
stats.lastTraversalTimeNs = traversalStats_.getLastTraversalTimeNs();
stats.avgTraversalTimeNs =
traversalStats_.getAvgTraversalTimeNs(numTraversals_);
stats.minTraversalTimeNs = traversalStats_.getMinTraversalTimeNs();
stats.maxTraversalTimeNs = traversalStats_.getMaxTraversalTimeNs();

return stats;
}

template <typename CacheT>
std::map<PoolId, std::map<ClassId, uint64_t>>
BackgroundMover<CacheT>::getClassStats() const noexcept {
return movesPerClass_;
}

template <typename CacheT>
size_t BackgroundMover<CacheT>::workerId(PoolId pid,
ClassId cid,
Expand All @@ -185,4 +271,4 @@ size_t BackgroundMover<CacheT>::workerId(PoolId pid,
// TODO: came up with some better sharding (use hashing?)
return (pid + cid) % numWorkers;
}
} // namespace facebook::cachelib
}; // namespace facebook::cachelib
48 changes: 0 additions & 48 deletions cachelib/allocator/BackgroundMoverStrategy.h

This file was deleted.

1 change: 0 additions & 1 deletion cachelib/allocator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ add_library (cachelib_allocator
CCacheManager.cpp
ContainerTypes.cpp
FreeMemStrategy.cpp
FreeThresholdStrategy.cpp
HitsPerSlabStrategy.cpp
LruTailAgeStrategy.cpp
MarginalHitsOptimizeStrategy.cpp
Expand Down
15 changes: 15 additions & 0 deletions cachelib/allocator/Cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,21 @@ enum class DestructorContext {
kRemovedFromNVM
};

// a tuple that describes the memory pool and allocation class
struct MemoryDescriptorType {
MemoryDescriptorType(PoolId pid, ClassId cid) : pid_(pid), cid_(cid) {}
PoolId pid_;
ClassId cid_;

bool operator<(const MemoryDescriptorType& rhs) const {
return std::make_tuple(pid_, cid_) < std::make_tuple(rhs.pid_, rhs.cid_);
}

bool operator==(const MemoryDescriptorType& rhs) const {
return std::make_tuple(pid_, cid_) == std::make_tuple(rhs.pid_, rhs.cid_);
}
};

// A base class of cache exposing members and status agnostic of template type.
class CacheBase {
public:
Expand Down
Loading

0 comments on commit 7105800

Please sign in to comment.