Skip to content

Commit

Permalink
Enable QDQ cleanup pass for OVEP
Browse files Browse the repository at this point in the history
- Add QDQ cleanup as a Level 1 optimization only for OpenVINO
- Fix QDQ cleanup pass so it removes all Q-DQ pairs (by removing all
output edges of DQ nodes and not assume only one output)
- ORT removes remaining DQ layers and adds quantization weights as
intializers of Conv
  • Loading branch information
sspintel authored and ubuntu committed Apr 23, 2024
1 parent e6a677f commit 826bffb
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 19 deletions.
13 changes: 11 additions & 2 deletions onnxruntime/core/optimizer/graph_transformer_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -250,15 +250,20 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
// run TransposeOptimizer last as it works in a slightly different way by moving Transpose nodes around.
// shouldn't affect the end result - just easier to debug any issue if it's last.
transformers.emplace_back(std::make_unique<TransposeOptimizer>(std::move(cpu_allocator)));
#if defined(USE_OPENVINO)
const bool enable_quant_qdq_cleanup =
session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableQuantQDQCleanup, "0") == "1";
// The QDQFinalCleanupTransformer must run AFTER other transformers that fuse Q/DQ nodes. Otherwise, their
// fusions might be prevented if this one removes a Q/DQ node too early.
transformers.emplace_back(std::make_unique<QDQFinalCleanupTransformer>(enable_quant_qdq_cleanup));
#endif
} break;

case TransformerLevel::Level2: {
// we run TransposeOptimizer again in Level2 for some CPU EP specific optimizations that can only be
// applied once nodes are assigned to the CPU EP (which happens between level 1 and level 2).
transformers.emplace_back(std::make_unique<TransposeOptimizer>(std::move(cpu_allocator), kCpuExecutionProvider));

const bool enable_quant_qdq_cleanup =
session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableQuantQDQCleanup, "0") == "1";
#if !defined(DISABLE_CONTRIB_OPS)
const bool qdq_is_int8_allowed =
session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsQDQIsInt8Allowed,
Expand Down Expand Up @@ -354,9 +359,13 @@ InlinedVector<std::unique_ptr<GraphTransformer>> GenerateTransformers(
#endif

#endif // !defined(DISABLE_CONTRIB_OPS)
#if !defined(USE_OPENVINO)
const bool enable_quant_qdq_cleanup =
session_options.config_options.GetConfigOrDefault(kOrtSessionOptionsEnableQuantQDQCleanup, "0") == "1";
// The QDQFinalCleanupTransformer must run AFTER other transformers that fuse Q/DQ nodes. Otherwise, their
// fusions might be prevented if this one removes a Q/DQ node too early.
transformers.emplace_back(std::make_unique<QDQFinalCleanupTransformer>(enable_quant_qdq_cleanup));
#endif

} break;

Expand Down
34 changes: 17 additions & 17 deletions onnxruntime/core/optimizer/qdq_transformer/qdq_final_cleanup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ bool CleanUpNodeSequence(NodeSequence node_sequence_type, Graph& graph, NodeInde

// we have a node sequence to clean up

// we support a second_node that produces a graph output if it has no output edges, or a second_node with one output edge.
// we support a second_node that produces a graph output if it has no output edges, or

Check warning on line 59 in onnxruntime/core/optimizer/qdq_transformer/qdq_final_cleanup.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/optimizer/qdq_transformer/qdq_final_cleanup.cc#L59

Line ends in whitespace. Consider deleting these extra spaces. [whitespace/end_of_line] [4]
Raw output
onnxruntime/core/optimizer/qdq_transformer/qdq_final_cleanup.cc:59:  Line ends in whitespace.  Consider deleting these extra spaces.  [whitespace/end_of_line] [4]
// a second_node with one or more output edges
const bool produces_graph_output = graph.NodeProducesGraphOutput(second_node);
const auto output_edges_count = second_node.GetOutputEdgesCount();

if ((produces_graph_output && output_edges_count != 0) ||
(!produces_graph_output && output_edges_count != 1)) {
if (produces_graph_output && output_edges_count != 0) {
return false;
}

Expand Down Expand Up @@ -92,20 +92,20 @@ bool CleanUpNodeSequence(NodeSequence node_sequence_type, Graph& graph, NodeInde

if (!produces_graph_output) {
// remove edge to downstream node
const Node::EdgeEnd& output_edge = *second_node.OutputEdgesBegin();
downstream_node_idx = output_edge.GetNode().Index();
downstream_arg_idx = output_edge.GetDstArgIndex();

// source arg idx is 0 as Q/DQ only has one output
graph.RemoveEdge(second_node.Index(), downstream_node_idx, 0, downstream_arg_idx);

// replace input on downstream node
Node& downstream_node = *graph.GetNode(downstream_node_idx);
downstream_node.MutableInputDefs()[downstream_arg_idx] = first_node.MutableInputDefs()[0];

// create edge between src_node (if available) and downstream node
if (input_edge) {
graph.AddEdge(src_node_idx, downstream_node_idx, src_arg_idx, downstream_arg_idx);
for (unsigned int idx = 0; idx < output_edges_count; idx++) {
const Node::EdgeEnd& output_edge = *second_node.OutputEdgesBegin();
downstream_node_idx = output_edge.GetNode().Index();
downstream_arg_idx = output_edge.GetDstArgIndex();
graph.RemoveEdge(second_node.Index(), downstream_node_idx, 0, downstream_arg_idx);

// replace input on downstream node
Node& downstream_node = *graph.GetNode(downstream_node_idx);
downstream_node.MutableInputDefs()[downstream_arg_idx] = first_node.MutableInputDefs()[0];

// create edge between src_node (if available) and downstream node
if (input_edge) {
graph.AddEdge(src_node_idx, downstream_node_idx, src_arg_idx, downstream_arg_idx);
}
}
} else {
NodeArg* graph_output_nodearg = second_node.MutableOutputDefs()[0];
Expand Down

0 comments on commit 826bffb

Please sign in to comment.