diff --git a/xla/service/cpu/runtime/thunk_executor.cc b/xla/service/cpu/runtime/thunk_executor.cc index 155edd9bdbca6..f25fd6119a284 100644 --- a/xla/service/cpu/runtime/thunk_executor.cc +++ b/xla/service/cpu/runtime/thunk_executor.cc @@ -162,6 +162,12 @@ tsl::AsyncValueRef ThunkExecutor::Execute( Execute(state.get(), params, ReadyQueue(source_.begin(), source_.end()), /*lock=*/params.session.Join()); + // If execution already completed (all kernels executed in the caller thread), + // immediately return the result to avoid wasteful reference counting below. + if (ABSL_PREDICT_TRUE(state->execute_event.IsAvailable())) { + return std::move(state->execute_event); + } + // Move execute state to the execute event callback to ensure that it is kept // alive while thunk executor has pending tasks. auto execute_event = state->execute_event;