feat: Load custom json OV config during runtime latest

intel · Sep 26, 2024 · ab3b83a · ab3b83a
1 parent a2638c4
commit ab3b83a
Show file tree

Hide file tree

Showing 11 changed files with 136 additions and 20 deletions.
diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake
@@ -37,7 +37,7 @@
 
   source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs})
   onnxruntime_add_shared_library_module(onnxruntime_providers_openvino ${onnxruntime_providers_openvino_cc_srcs} "${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc")
-  onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx)
+  onnxruntime_add_include_to_target(onnxruntime_providers_openvino onnxruntime_common onnx nlohmann_json::nlohmann_json)
   install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
     DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
   set_target_properties(onnxruntime_providers_openvino PROPERTIES CXX_STANDARD 20)

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -11,6 +11,7 @@
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/openvino/backend_utils.h"
 #include "core/providers/openvino/ov_interface.h"
+#include "nlohmann/json.hpp"
 
 using Exception = ov::Exception;
 
@@ -267,6 +268,32 @@ void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std
   printPerformanceCounts(performanceMap, stream, std::move(deviceName));
 }
 
+void LoadConfig(const std::string& filename, std::map<std::string, ov::AnyMap>& config) {
+  std::ifstream input_filestream(filename);
+  if (!input_filestream.is_open()) {
+    ORT_THROW("Can't load config file \"" + filename + "\".");
+  }
+
+  nlohmann::json json_config;
+  try {
+    input_filestream >> json_config;
+  } catch (const OnnxRuntimeException& ex) {
+    ORT_THROW("Can't parse config file \"" + filename + "\".\n" + ex.what());
+  } catch (const std::exception& ex) {
+    throw std::runtime_error("Standard exception for config file \"" + filename + "\".\n" + ex.what());
+  } catch (...) {
+    throw std::runtime_error("Unknown exception for config file \"" + filename + "\".\n");
+  }
+
+  for (auto item = json_config.cbegin(), end = json_config.cend(); item != end; ++item) {
+    const std::string& deviceName = item.key();
+    const auto& item_value = item.value();
+    for (auto option = item_value.cbegin(), item_value_end = item_value.cend(); option != item_value_end; ++option) {
+      config[deviceName][option.key()] = option.value().get<std::string>();
+    }
+  }
+}
+
 }  // namespace backend_utils
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -70,6 +70,8 @@ void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
 
 void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std::string deviceName);
 
+void LoadConfig(const std::string& filename, std::map<std::string, ov::AnyMap>& config);
+
 }  // namespace backend_utils
 }  // namespace openvino_ep
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -178,6 +178,75 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
     }
 #endif
   }
+
+  if (!global_context_.load_config.empty()) {
+    std::map<std::string, ov::AnyMap> target_config;
+    LoadConfig(global_context_.load_config, target_config);
+
+    // Parse device types like "AUTO:CPU,GPU" and extract individual devices
+    auto parse_individual_devices = [&](const std::string& device_type) -> std::vector<std::string> {
+      std::vector<std::string> devices;
+      auto delimiter_pos = device_type.find(':');
+      if (delimiter_pos != std::string::npos) {
+        std::stringstream str_stream(device_type.substr(delimiter_pos + 1));
+        std::string device;
+        while (std::getline(str_stream, device, ',')) {
+          devices.emplace_back(device);
+        }
+      } else {
+        devices.emplace_back(device_type);
+      }
+      return devices;
+    };
+
+    // Check if a property is supported and mutable
+    auto is_supported_and_mutable = [&](const std::string& key,
+                                        const std::vector<ov::PropertyName>& supported_config) -> bool {
+      auto it = std::find_if(supported_config.begin(), supported_config.end(), [&](const ov::PropertyName& property) {
+        return property == key && property.is_mutable();
+      });
+      return it != supported_config.end();
+    };
+
+    // Set properties if they are valid, else log a warning if the property is missing or immutable by skipping the same
+    auto set_target_properties = [&](const std::string& device, const ov::AnyMap& config_options,
+                                     const std::vector<ov::PropertyName>& supported_properties) {
+      for (const auto& [key, value] : config_options) {
+        if (is_supported_and_mutable(key, supported_properties)) {
+          global_context_.ie_core.Get().set_property(device, ov::AnyMap{{key, value}});
+        } else {
+          LOGS_DEFAULT(WARNING) << "WARNING: Property \"" << key
+                                << "\" is either unsupported in current OpenVINO version"
+                                << " or property is immutable for target device \""
+                                << device << "\". Skipping setting this property.";
+        }
+      }
+    };
+
+    // Check if the device type is AUTO, HETERO, or MULTI
+    if (global_context_.device_type.find("AUTO") == 0 ||
+        global_context_.device_type.find("HETERO") == 0 ||
+        global_context_.device_type.find("MULTI") == 0) {
+      // Parse individual devices (e.g., "AUTO:CPU,GPU" -> ["CPU", "GPU"])
+      auto individual_devices = parse_individual_devices(global_context_.device_type);
+      // Set properties only for individual devices (e.g., "CPU", "GPU")
+      for (const std::string& device : individual_devices) {
+        if (target_config.count(device)) {
+          // Get supported properties for each individual device
+          auto device_properties = global_context_.ie_core.Get().get_property(device, ov::supported_properties);
+          // Set properties for the device
+          set_target_properties(device, target_config.at(device), device_properties);
+        }
+      }
+    } else {
+      if (target_config.count(global_context_.device_type)) {
+        auto supported_properties = global_context_.ie_core.Get().get_property(global_context_.device_type,
+                                                                               ov::supported_properties);
+        set_target_properties(global_context_.device_type,
+                              target_config.at(global_context_.device_type), supported_properties);
+      }
+    }
+  }
 }
 
 void BasicBackend::EnableCaching(ov::AnyMap& device_config) {

diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
@@ -27,6 +27,7 @@ struct GlobalContext {
   std::string precision_str;
   std::string model_precision;
   std::string cache_dir;
+  std::string load_config;
   std::string model_priority = "DEFAULT";
   int num_streams;
   std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -27,6 +27,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
   global_context_->precision_str = info.precision_;
   global_context_->enable_npu_fast_compile = info.enable_npu_fast_compile_;
   global_context_->cache_dir = info.cache_dir_;
+  global_context_->load_config = info.load_config_;
   global_context_->model_priority = info.model_priority_;
   global_context_->num_streams = info.num_streams_;
   global_context_->context = info.context_;

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -81,6 +81,7 @@ struct OpenVINOExecutionProviderInfo {
   std::string precision_{""};
   bool enable_npu_fast_compile_{false};
   size_t num_of_threads_{0};
+  std::string load_config_{""};
   std::string cache_dir_{""};
   std::string model_priority_{""};
   int num_streams_{1};
@@ -96,14 +97,16 @@ struct OpenVINOExecutionProviderInfo {
 
   explicit OpenVINOExecutionProviderInfo(const std::string& dev_type, const std::string& precision,
                                          bool enable_npu_fast_compile, size_t num_of_threads,
-                                         const std::string& cache_dir, const std::string& model_priority,
-                                         int num_streams, void* context, bool enable_opencl_throttling,
+                                         const std::string& load_config, const std::string& cache_dir,
+                                         const std::string& model_priority, int num_streams,
+                                         void* context, bool enable_opencl_throttling,
                                          bool disable_dynamic_shapes, bool export_ep_ctx_blob,
                                          bool enable_qdq_optimizer, bool disable_cpu_fallback,
                                          bool so_epctx_embed_mode)
       : precision_(std::move(precision)),
         enable_npu_fast_compile_(enable_npu_fast_compile),
         num_of_threads_(num_of_threads),
+        load_config_(std::move(load_config)),
         cache_dir_(std::move(cache_dir)),
         model_priority_(std::move(model_priority)),
         num_streams_(num_streams),

diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -8,17 +8,20 @@
 
 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
-  OpenVINOProviderFactory(const char* device_type, const char* precision,
+  OpenVINOProviderFactory(const std::string& device_type, const std::string& precision,
                           bool enable_npu_fast_compile, size_t num_of_threads,
-                          const char* cache_dir, const char* model_priority,
-                          int num_streams, void* context,
+                          const std::string& load_config, const std::string& cache_dir,
+                          const std::string& model_priority, int num_streams, void* context,
                           bool enable_opencl_throttling, bool disable_dynamic_shapes,
                           bool export_ep_ctx_blob, bool enable_qdq_optimizer,
                           bool disable_cpu_fallback,
                           bool so_epctx_embed_mode)
-      : precision_(precision),
+      : device_type_(device_type),
+        precision_(precision),
         enable_npu_fast_compile_(enable_npu_fast_compile),
         num_of_threads_(num_of_threads),
+        load_config_(load_config),
+        cache_dir_(cache_dir),
         model_priority_(model_priority),
         num_streams_(num_streams),
         context_(context),
@@ -27,13 +30,9 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
         export_ep_ctx_blob_(export_ep_ctx_blob),
         enable_qdq_optimizer_(enable_qdq_optimizer),
         disable_cpu_fallback_(disable_cpu_fallback),
-        so_epctx_embed_mode_(so_epctx_embed_mode) {
-    device_type_ = (device_type == nullptr) ? "" : device_type;
-    cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
-  }
+        so_epctx_embed_mode_(so_epctx_embed_mode) {}
 
-  ~OpenVINOProviderFactory() override {
-  }
+  ~OpenVINOProviderFactory() override {}
 
   std::unique_ptr<IExecutionProvider> CreateProvider() override;
 
@@ -42,6 +41,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
   std::string precision_;
   bool enable_npu_fast_compile_;
   size_t num_of_threads_;
+  std::string load_config_;
   std::string cache_dir_;
   std::string model_priority_;
   int num_streams_;
@@ -55,7 +55,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
 };
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
-  OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_,
+  OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_, load_config_,
                                      cache_dir_, model_priority_, num_streams_, context_, enable_opencl_throttling_,
                                      disable_dynamic_shapes_, export_ep_ctx_blob_, enable_qdq_optimizer_,
                                      disable_cpu_fallback_,
@@ -90,10 +90,11 @@ struct OpenVINO_Provider : Provider {
                                              // speeds up the model's compilation to NPU device specific format.
     int num_of_threads = 0;                  // [num_of_threads]: Overrides the accelerator default value of number of
                                              //  threads with this value at runtime.
+    std::string load_config = "";            // Path to JSON file to load custom OV parameters.
     std::string cache_dir = "";              // [cache_dir]: specify the path to
                                              // dump and load the blobs for the model caching/kernel caching (GPU)
                                              // feature. If blob files are already present, it will be directly loaded.
-    const char* model_priority = "DEFAULT";  // High-level OpenVINO model priority hint
+    std::string model_priority = "DEFAULT";  // High-level OpenVINO model priority hint
                                              // Defines what model should be provided with more performant
                                              // bounded resource first
     int num_streams = 1;                     // [num_streams]: Option that specifies the number of parallel inference
@@ -185,6 +186,10 @@ struct OpenVINO_Provider : Provider {
       cache_dir = provider_options_map.at("cache_dir");
     }
 
+    if (provider_options_map.find("load_config") != provider_options_map.end()) {
+      load_config = provider_options_map.at("load_config");
+    }
+
     if (provider_options_map.find("context") != provider_options_map.end()) {
       std::string str = provider_options_map.at("context");
       uint64_t number = std::strtoull(str.c_str(), nullptr, 16);
@@ -319,11 +324,12 @@ struct OpenVINO_Provider : Provider {
       }
     }
 
-    return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
-                                                     const_cast<char*>(precision.c_str()),
+    return std::make_shared<OpenVINOProviderFactory>(device_type,
+                                                     precision,
                                                      enable_npu_fast_compile,
                                                      num_of_threads,
-                                                     const_cast<char*>(cache_dir.c_str()),
+                                                     load_config,
+                                                     cache_dir,
                                                      model_priority,
                                                      num_streams,
                                                      context,

diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1810,6 +1810,7 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
   // Add new provider option below
   ov_options_converted_map["num_streams"] = "1";
   ov_options_converted_map["export_ep_ctx_blob"] = "false";
+  ov_options_converted_map["load_config"] = "";
   ov_options_converted_map["model_priority"] = "DEFAULT";
   ov_options_converted_map["enable_qdq_optimizer"] = "false";
   return ov_options_converted_map;
@@ -2104,7 +2105,8 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_MIGraphX, _In
   API_IMPL_END
 }
 
-ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options, _In_ const OrtOpenVINOProviderOptions* provider_options) {
+ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_OpenVINO, _In_ OrtSessionOptions* options,
+                    _In_ const OrtOpenVINOProviderOptions* provider_options) {
   API_IMPL_BEGIN
   auto factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(provider_options);
   if (!factory) {

diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -1087,6 +1087,9 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
         } else if (option.first == "num_streams") {
           OV_provider_options_map[option.first] = option.second;
           continue;
+        } else if (option.first == "load_config") {
+          OV_provider_options_map[option.first] = option.second;
+          continue;
         } else if (option.first == "cache_dir") {
           OV_provider_options_map[option.first] = option.second;
           continue;

diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
@@ -836,6 +836,8 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
         } else {
           ov_options[key] = value;
         }
+      } else if (key == "load_config") {
+        ov_options[key] = value;
       } else if (key == "model_priority") {
         ov_options[key] = value;
       } else if (key == "cache_dir") {
@@ -862,7 +864,7 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)");
           use_device_mem = true;
         }
       } else {
-        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling', 'disable_dynamic_shapes'] \n");
+        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'load_config', 'cache_dir', 'num_streams', 'enable_opencl_throttling', 'disable_dynamic_shapes'] \n");
       }
     }
     session_options.AppendExecutionProvider_OpenVINO_V2(ov_options);