Skip to content

Commit

Permalink
[opt] set retry_on_failure to false for tf allocator
Browse files Browse the repository at this point in the history
  • Loading branch information
LinGeLin authored and rhdong committed May 20, 2024
1 parent 53b5ac8 commit 34cc36d
Showing 1 changed file with 3 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -354,9 +354,11 @@ class TFOrDefaultAllocator : public nv::merlin::BaseAllocator {
void alloc(const NMMemType type, void** ptr, size_t size,
unsigned int pinned_flags = cudaHostAllocDefault) override {
if (!use_default_allocator_) {
tensorflow::AllocationAttributes allocation_attr(false, false, nullptr);
switch (type) {
case NMMemType::Device:
*ptr = tf_device_allocator_->AllocateRaw(kAllocatorAlignment, size);
*ptr = tf_device_allocator_->AllocateRaw(kAllocatorAlignment, size,
allocation_attr);
if (nullptr == *ptr) {
throw std::runtime_error(
"Failed to allocator gpu memory, please adjust param 'max_hbm' "
Expand Down

0 comments on commit 34cc36d

Please sign in to comment.