From 612ca93804a9f12cd649d51fa7370047b7ef89ae Mon Sep 17 00:00:00 2001 From: Michael Axtmann Date: Mon, 7 Oct 2024 16:19:44 +0000 Subject: [PATCH] Revert "neuron: Disable rdma eager messages by default" This reverts commit bfc2e7c877f4eca2ca574cdf8c8662ede68e6e33. Eager gets re-enabled on neuron platforms, i.e., commit above is reverted, since current neuron features benefit from sending data eagerly due to missing pre-posting feature. Signed-off-by: Michael Axtmann --- include/nccl_ofi_param.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/include/nccl_ofi_param.h b/include/nccl_ofi_param.h index b51b20e1..3aa65c9f 100644 --- a/include/nccl_ofi_param.h +++ b/include/nccl_ofi_param.h @@ -261,20 +261,8 @@ OFI_NCCL_PARAM_INT(net_latency, "NET_LATENCY", -1); /* * Eager message size limit when using RDMA protocol. Message sizes greater than * this limit will always be sent using RDMA write instead of eagerly. - * - * Neuron perf is better without the eager protocol, so we set the - * default to 0 on Neuron platforms. We really need to have a way to - * tweak defaults from the platform file, but this fits our needs for - * now. */ -OFI_NCCL_PARAM_UINT(eager_max_size, - "EAGER_MAX_SIZE", -#if HAVE_NEURON - 0 -#else - 8192 -#endif -); +OFI_NCCL_PARAM_UINT(eager_max_size, "EAGER_MAX_SIZE", 8192); /* * Decide whether or not mutexes should default to errorcheck mode.