Skip to content

Commit

Permalink
feat: Load custom json OV config during runtime latest
Browse files Browse the repository at this point in the history
  • Loading branch information
ankitm3k committed Sep 26, 2024
1 parent a2638c4 commit ab3b83a
Show file tree
Hide file tree
Showing 11 changed files with 136 additions and 20 deletions.
2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers_openvino.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx)
onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx nlohmann_json::nlohmann_json)
install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
set_target_properties(onnxruntime_providers_openvino PROPERTIES CXX_STANDARD 20)
Expand Down
27 changes: 27 additions & 0 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "core/providers/shared_library/provider_api.h"
#include "core/providers/openvino/backend_utils.h"
#include "core/providers/openvino/ov_interface.h"
#include "nlohmann/json.hpp"

using Exception = ov::Exception;

Expand Down Expand Up @@ -267,6 +268,32 @@ void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std
printPerformanceCounts(performanceMap, stream, std::move(deviceName));
}

void LoadConfig(const std::string& filename, std::map<std::string, ov::AnyMap>& config) {
std::ifstream input_filestream(filename);
if (!input_filestream.is_open()) {
ORT_THROW("Can't load config file \"" + filename + "\".");
}

nlohmann::json json_config;
try {
input_filestream >> json_config;
} catch (const OnnxRuntimeException& ex) {
ORT_THROW("Can't parse config file \"" + filename + "\".\n" + ex.what());
} catch (const std::exception& ex) {
throw std::runtime_error("Standard exception for config file \"" + filename + "\".\n" + ex.what());
} catch (...) {
throw std::runtime_error("Unknown exception for config file \"" + filename + "\".\n");
}

for (auto item = json_config.cbegin(), end = json_config.cend(); item != end; ++item) {
const std::string& deviceName = item.key();
const auto& item_value = item.value();
for (auto option = item_value.cbegin(), item_value_end = item_value.cend(); option != item_value_end; ++option) {
config[deviceName][option.key()] = option.value().get<std::string>();
}
}
}

} // namespace backend_utils
} // namespace openvino_ep
} // namespace onnxruntime
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,

void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std::string deviceName);

void LoadConfig(const std::string& filename, std::map<std::string, ov::AnyMap>& config);

} // namespace backend_utils
} // namespace openvino_ep
} // namespace onnxruntime
69 changes: 69 additions & 0 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,75 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
}
#endif
}

if (!global_context_.load_config.empty()) {
std::map<std::string, ov::AnyMap> target_config;
LoadConfig(global_context_.load_config, target_config);

// Parse device types like "AUTO:CPU,GPU" and extract individual devices
auto parse_individual_devices = [&](const std::string& device_type) -> std::vector<std::string> {
std::vector<std::string> devices;
auto delimiter_pos = device_type.find(':');
if (delimiter_pos != std::string::npos) {
std::stringstream str_stream(device_type.substr(delimiter_pos + 1));
std::string device;
while (std::getline(str_stream, device, ',')) {
devices.emplace_back(device);
}
} else {
devices.emplace_back(device_type);
}
return devices;
};

// Check if a property is supported and mutable
auto is_supported_and_mutable = [&](const std::string& key,
const std::vector<ov::PropertyName>& supported_config) -> bool {
auto it = std::find_if(supported_config.begin(), supported_config.end(), [&](const ov::PropertyName& property) {
return property == key && property.is_mutable();
});
return it != supported_config.end();
};

// Set properties if they are valid, else log a warning if the property is missing or immutable by skipping the same
auto set_target_properties = [&](const std::string& device, const ov::AnyMap& config_options,
const std::vector<ov::PropertyName>& supported_properties) {
for (const auto& [key, value] : config_options) {
if (is_supported_and_mutable(key, supported_properties)) {
global_context_.ie_core.Get().set_property(device, ov::AnyMap{{key, value}});
} else {
LOGS_DEFAULT(WARNING) << "WARNING: Property \"" << key
<< "\" is either unsupported in current OpenVINO version"
<< " or property is immutable for target device \""
<< device << "\". Skipping setting this property.";
}
}
};

// Check if the device type is AUTO, HETERO, or MULTI
if (global_context_.device_type.find("AUTO") == 0 ||
global_context_.device_type.find("HETERO") == 0 ||
global_context_.device_type.find("MULTI") == 0) {
// Parse individual devices (e.g., "AUTO:CPU,GPU" -> ["CPU", "GPU"])
auto individual_devices = parse_individual_devices(global_context_.device_type);
// Set properties only for individual devices (e.g., "CPU", "GPU")
for (const std::string& device : individual_devices) {
if (target_config.count(device)) {
// Get supported properties for each individual device
auto device_properties = global_context_.ie_core.Get().get_property(device, ov::supported_properties);
// Set properties for the device
set_target_properties(device, target_config.at(device), device_properties);
}
}
} else {
if (target_config.count(global_context_.device_type)) {
auto supported_properties = global_context_.ie_core.Get().get_property(global_context_.device_type,
ov::supported_properties);
set_target_properties(global_context_.device_type,
target_config.at(global_context_.device_type), supported_properties);
}
}
}
}

void BasicBackend::EnableCaching(ov::AnyMap& device_config) {
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct GlobalContext {
std::string precision_str;
std::string model_precision;
std::string cache_dir;
std::string load_config;
std::string model_priority = "DEFAULT";
int num_streams;
std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
global_context_->precision_str = info.precision_;
global_context_->enable_npu_fast_compile = info.enable_npu_fast_compile_;
global_context_->cache_dir = info.cache_dir_;
global_context_->load_config = info.load_config_;
global_context_->model_priority = info.model_priority_;
global_context_->num_streams = info.num_streams_;
global_context_->context = info.context_;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ struct OpenVINOExecutionProviderInfo {
std::string precision_{""};
bool enable_npu_fast_compile_{false};
size_t num_of_threads_{0};
std::string load_config_{""};
std::string cache_dir_{""};
std::string model_priority_{""};
int num_streams_{1};
Expand All @@ -96,14 +97,16 @@ struct OpenVINOExecutionProviderInfo {

explicit OpenVINOExecutionProviderInfo(const std::string& dev_type, const std::string& precision,
bool enable_npu_fast_compile, size_t num_of_threads,
const std::string& cache_dir, const std::string& model_priority,
int num_streams, void* context, bool enable_opencl_throttling,
const std::string& load_config, const std::string& cache_dir,
const std::string& model_priority, int num_streams,
void* context, bool enable_opencl_throttling,
bool disable_dynamic_shapes, bool export_ep_ctx_blob,
bool enable_qdq_optimizer, bool disable_cpu_fallback,
bool so_epctx_embed_mode)
: precision_(std::move(precision)),
enable_npu_fast_compile_(enable_npu_fast_compile),
num_of_threads_(num_of_threads),
load_config_(std::move(load_config)),
cache_dir_(std::move(cache_dir)),
model_priority_(std::move(model_priority)),
num_streams_(num_streams),
Expand Down
36 changes: 21 additions & 15 deletions onnxruntime/core/providers/openvino/openvino_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,20 @@

namespace onnxruntime {
struct OpenVINOProviderFactory : IExecutionProviderFactory {
OpenVINOProviderFactory(const char* device_type, const char* precision,
OpenVINOProviderFactory(const std::string& device_type, const std::string& precision,
bool enable_npu_fast_compile, size_t num_of_threads,
const char* cache_dir, const char* model_priority,
int num_streams, void* context,
const std::string& load_config, const std::string& cache_dir,
const std::string& model_priority, int num_streams, void* context,
bool enable_opencl_throttling, bool disable_dynamic_shapes,
bool export_ep_ctx_blob, bool enable_qdq_optimizer,
bool disable_cpu_fallback,
bool so_epctx_embed_mode)
: precision_(precision),
: device_type_(device_type),
precision_(precision),
enable_npu_fast_compile_(enable_npu_fast_compile),
num_of_threads_(num_of_threads),
load_config_(load_config),
cache_dir_(cache_dir),
model_priority_(model_priority),
num_streams_(num_streams),
context_(context),
Expand All @@ -27,13 +30,9 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
export_ep_ctx_blob_(export_ep_ctx_blob),
enable_qdq_optimizer_(enable_qdq_optimizer),
disable_cpu_fallback_(disable_cpu_fallback),
so_epctx_embed_mode_(so_epctx_embed_mode) {
device_type_ = (device_type == nullptr) ? "" : device_type;
cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
}
so_epctx_embed_mode_(so_epctx_embed_mode) {}

~OpenVINOProviderFactory() override {
}
~OpenVINOProviderFactory() override {}

std::unique_ptr<IExecutionProvider> CreateProvider() override;

Expand All @@ -42,6 +41,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
std::string precision_;
bool enable_npu_fast_compile_;
size_t num_of_threads_;
std::string load_config_;
std::string cache_dir_;
std::string model_priority_;
int num_streams_;
Expand All @@ -55,7 +55,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
};

std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_,
OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_, load_config_,
cache_dir_, model_priority_, num_streams_, context_, enable_opencl_throttling_,
disable_dynamic_shapes_, export_ep_ctx_blob_, enable_qdq_optimizer_,
disable_cpu_fallback_,
Expand Down Expand Up @@ -90,10 +90,11 @@ struct OpenVINO_Provider : Provider {
// speeds up the model's compilation to NPU device specific format.
int num_of_threads = 0; // [num_of_threads]: Overrides the accelerator default value of number of
// threads with this value at runtime.
std::string load_config = ""; // Path to JSON file to load custom OV parameters.
std::string cache_dir = ""; // [cache_dir]: specify the path to
// dump and load the blobs for the model caching/kernel caching (GPU)
// feature. If blob files are already present, it will be directly loaded.
const char* model_priority = "DEFAULT"; // High-level OpenVINO model priority hint
std::string model_priority = "DEFAULT"; // High-level OpenVINO model priority hint
// Defines what model should be provided with more performant
// bounded resource first
int num_streams = 1; // [num_streams]: Option that specifies the number of parallel inference
Expand Down Expand Up @@ -185,6 +186,10 @@ struct OpenVINO_Provider : Provider {
cache_dir = provider_options_map.at("cache_dir");
}

if (provider_options_map.find("load_config") != provider_options_map.end()) {
load_config = provider_options_map.at("load_config");
}

if (provider_options_map.find("context") != provider_options_map.end()) {
std::string str = provider_options_map.at("context");
uint64_t number = std::strtoull(str.c_str(), nullptr, 16);
Expand Down Expand Up @@ -319,11 +324,12 @@ struct OpenVINO_Provider : Provider {
}
}

return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
const_cast<char*>(precision.c_str()),
return std::make_shared<OpenVINOProviderFactory>(device_type,
precision,
enable_npu_fast_compile,
num_of_threads,
const_cast<char*>(cache_dir.c_str()),
load_config,
cache_dir,
model_priority,
num_streams,
context,
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/core/session/provider_bridge_ort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1810,6 +1810,7 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
// Add new provider option below
ov_options_converted_map["num_streams"] = "1";
ov_options_converted_map["export_ep_ctx_blob"] = "false";
ov_options_converted_map["load_config"] = "";
ov_options_converted_map["model_priority"] = "DEFAULT";
ov_options_converted_map["enable_qdq_optimizer"] = "false";
return ov_options_converted_map;
Expand Down Expand Up @@ -2104,7 +2105,8 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_MIGraphX, _In
API_IMPL_END
}

ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, _In_ const OrtOpenVINOProviderOptions* provider_options) {
ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options,
_In_ const OrtOpenVINOProviderOptions* provider_options) {
API_IMPL_BEGIN
auto factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(provider_options);
if (!factory) {
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/python/onnxruntime_pybind_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1087,6 +1087,9 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
} else if (option.first == "num_streams") {
OV_provider_options_map[option.first] = option.second;
continue;
} else if (option.first == "load_config") {
OV_provider_options_map[option.first] = option.second;
continue;
} else if (option.first == "cache_dir") {
OV_provider_options_map[option.first] = option.second;
continue;
Expand Down
4 changes: 3 additions & 1 deletion onnxruntime/test/perftest/ort_test_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
} else {
ov_options[key] = value;
}
} else if (key == "load_config") {
ov_options[key] = value;
} else if (key == "model_priority") {
ov_options[key] = value;
} else if (key == "cache_dir") {
Expand All @@ -862,7 +864,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
use_device_mem = true;
}
} else {
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling', 'disable_dynamic_shapes'] \n");
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', 'enable_opencl_throttling', 'disable_dynamic_shapes'] \n");
}
}
session_options.AppendExecutionProvider_OpenVINO_V2(ov_options);
Expand Down

0 comments on commit ab3b83a

Please sign in to comment.