Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add xai for MaskRCNNModel #95

Merged
merged 5 commits into from
Jul 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 11 additions & 21 deletions model_api/cpp/models/include/models/results.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,7 @@ struct DetectedObject : public cv::Rect2f {
std::string label;
float confidence;

friend std::ostream& operator<< (std::ostream& os, const DetectedObject& detection)
{
friend std::ostream& operator<< (std::ostream& os, const DetectedObject& detection) {
return os << int(detection.x) << ", " << int(detection.y) << ", " << int(detection.x + detection.width)
<< ", " << int(detection.y + detection.height) << ", "
<< detection.labelID << " (" << detection.label << "): " << std::fixed << std::setprecision(3) << detection.confidence;
Expand Down Expand Up @@ -167,14 +166,8 @@ struct RetinaFaceDetectionResult : public DetectionResult {
struct SegmentedObject : DetectedObject {
cv::Mat mask;

friend std::ostream& operator<< (std::ostream& stream, const SegmentedObject& segmentation)
{
stream << "(" << int(segmentation.x) << ", " << int(segmentation.y) << ", " << int(segmentation.x + segmentation.width)
<< ", " << int(segmentation.y + segmentation.height) << ", ";
stream << std::fixed;
stream << std::setprecision(3) << segmentation.confidence << ", ";
stream << std::setprecision(-1) << segmentation.labelID << ", " << segmentation.label << ", " << cv::countNonZero(segmentation.mask > 0.5) << ")";
return stream;
friend std::ostream& operator<< (std::ostream& os, const SegmentedObject& prediction) {
return os << static_cast<const DetectedObject&>(prediction) << ", " << cv::countNonZero(prediction.mask > 0.5);
}
};

Expand All @@ -183,18 +176,12 @@ struct SegmentedObjectWithRects : SegmentedObject {

SegmentedObjectWithRects(const SegmentedObject& segmented_object) : SegmentedObject(segmented_object) {}

friend std::ostream& operator<< (std::ostream& stream, const SegmentedObjectWithRects& segmentation)
{
stream << "(" << int(segmentation.x) << ", " << int(segmentation.y) << ", " << int(segmentation.x + segmentation.width)
<< ", " << int(segmentation.y + segmentation.height) << ", ";
stream << std::fixed;
stream << std::setprecision(3) << segmentation.confidence << ", ";
stream << segmentation.labelID << ", " << segmentation.label << ", " << cv::countNonZero(segmentation.mask > 0.5);
for (const cv::RotatedRect& rect : segmentation.rotated_rects) {
stream << ", RotatedRect: " << rect.center.x << ' ' << rect.center.y << ' ' << rect.size.width << ' ' << rect.size.height << ' ' << rect.angle;
friend std::ostream& operator<< (std::ostream& os, const SegmentedObjectWithRects& prediction) {
os << static_cast<const SegmentedObject&>(prediction) << std::fixed << std::setprecision(3);
for (const cv::RotatedRect& rect : prediction.rotated_rects) {
os << ", RotatedRect: " << rect.center.x << ' ' << rect.center.y << ' ' << rect.size.width << ' ' << rect.size.height << ' ' << rect.angle;
}
stream << ")";
return stream;
return os;
}
};

Expand Down Expand Up @@ -228,6 +215,9 @@ struct InstanceSegmentationResult : ResultBase {
InstanceSegmentationResult(int64_t frameId = -1, const std::shared_ptr<MetaData>& metaData = nullptr)
: ResultBase(frameId, metaData) {}
std::vector<SegmentedObject> segmentedObjects;
// Contan per class saliency_maps and "feature_vector" model output if feature_vector exists
std::vector<cv::Mat_<std::uint8_t>> saliency_map;
ov::Tensor feature_vector;
};

struct ImageResult : public ResultBase {
Expand Down
100 changes: 89 additions & 11 deletions model_api/cpp/models/src/instance_segmentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@
#include "utils/common.hpp"

namespace {
constexpr char saliency_map_name[]{"saliency_map"};
constexpr char feature_vector_name[]{"feature_vector"};

void append_xai_names(const std::vector<ov::Output<ov::Node>>& outputs, std::vector<std::string>& outputNames) {
for (const ov::Output<ov::Node>& output : outputs) {
if (output.get_names().count(saliency_map_name) > 0) {
outputNames.emplace_back(saliency_map_name);
} else if (output.get_names().count(feature_vector_name) > 0) {
outputNames.push_back(feature_vector_name);
}
}
}

cv::Rect expand_box(const cv::Rect2f& box, float scale) {
float w_half = box.width * 0.5f * scale,
h_half = box.height * 0.5f * scale;
Expand All @@ -60,6 +73,42 @@ cv::Mat segm_postprocess(const SegmentedObject& box, const cv::Mat& unpadded, in
im_mask(cv::Rect{x0, y0, x1-x0, y1-y0}).setTo(1, resized({cv::Point(x0-extended_box.x, y0-extended_box.y), cv::Point(x1-extended_box.x, y1-extended_box.y)}) > 0.5f);
return im_mask;
}

std::vector<cv::Mat_<std::uint8_t>> average_and_normalize(const std::vector<std::vector<cv::Mat>>& saliency_maps) {
std::vector<cv::Mat_<std::uint8_t>> aggregated;
aggregated.reserve(saliency_maps.size());
for (const std::vector<cv::Mat>& per_class_maps : saliency_maps) {
if (per_class_maps.empty()) {
aggregated.emplace_back();
} else {
cv::Mat_<double> saliency_map{per_class_maps.front().size()};
for (const cv::Mat& per_class_map : per_class_maps) {
if (saliency_map.size != per_class_map.size) {
throw std::runtime_error("saliency_maps must have same size");
} if (per_class_map.channels() != 1) {
throw std::runtime_error("saliency_maps must have one channel");
} if (per_class_map.type() != CV_8U) {
throw std::runtime_error("saliency_maps must have type CV_8U");
}
}
for (int row = 0; row < saliency_map.rows; ++row) {
for (int col = 0; col < saliency_map.cols; ++col) {
double sum = 0.0;
for (const cv::Mat& per_class_map : per_class_maps) {
sum += per_class_map.at<std::uint8_t>(row, col);
}
saliency_map.at<double>(row, col) = sum / per_class_maps.size();
}
}
double min, max;
cv::minMaxLoc(saliency_map, &min, &max);
cv::Mat_<std::uint8_t> converted;
saliency_map.convertTo(converted, CV_8U, 255.0 / (max + 1e-12));
aggregated.push_back(std::move(converted));
}
}
return aggregated;
}
}
std::string MaskRCNNModel::ModelType = "MaskRCNN";

Expand Down Expand Up @@ -184,25 +233,38 @@ void MaskRCNNModel::prepareInputsOutputs(std::shared_ptr<ov::Model>& model) {
}

// --------------------------- Prepare output -----------------------------------------------------
if (model->outputs().size() != 3) {
throw std::logic_error("MaskRCNNModel model wrapper supports topologies with only 3 outputs");
struct NameRank {
std::string name;
size_t rank;
};
std::vector<NameRank> filtered;
filtered.reserve(3);
for (ov::Output<ov::Node>& output : model->outputs()) {
const std::unordered_set<std::string>& out_names = output.get_names();
if (out_names.find(saliency_map_name) == out_names.end() && out_names.find(feature_vector_name) == out_names.end()) {
filtered.push_back({output.get_any_name(), output.get_partial_shape().get_max_shape().size()});
}
}
if (filtered.size() != 3) {
throw std::logic_error(std::string{"MaskRCNNModel model wrapper supports topologies with "} + saliency_map_name + ", " + feature_vector_name + " and 3 other outputs");
}
outputNames.resize(3);
for (const auto& output : model->outputs()) {
switch (output.get_partial_shape().get_max_shape().size()) {
for (const NameRank& name_rank : filtered) {
switch (name_rank.rank) {
case 2:
outputNames[0] = output.get_any_name();
outputNames[0] = name_rank.name;
break;
case 3:
outputNames[1] = output.get_any_name();
outputNames[1] = name_rank.name;
break;
case 4:
outputNames[2] = output.get_any_name();
outputNames[2] = name_rank.name;
break;
default:
throw std::runtime_error("Unexpected output: " + output.get_any_name());
throw std::runtime_error("Unexpected output: " + name_rank.name);
}
}
append_xai_names(model->outputs(), outputNames);
}

std::unique_ptr<ResultBase> MaskRCNNModel::postprocess(InferenceResult& infResult) {
Expand All @@ -226,9 +288,17 @@ std::unique_ptr<ResultBase> MaskRCNNModel::postprocess(InferenceResult& infResul
const cv::Size& masks_size{int(infResult.outputsData[outputNames[2]].get_shape()[3]), int(infResult.outputsData[outputNames[2]].get_shape()[2])};
InstanceSegmentationResult* result = new InstanceSegmentationResult(infResult.frameId, infResult.metaData);
auto retVal = std::unique_ptr<ResultBase>(result);
std::vector<std::vector<cv::Mat>> saliency_maps;
bool has_feature_vector_name = std::find(outputNames.begin(), outputNames.end(), feature_vector_name) != outputNames.end();
if (has_feature_vector_name) {
if (this->labels.empty()) {
throw std::runtime_error("Can't get number of classes because labels are empty");
}
saliency_maps.resize(this->labels.size());
}
for (size_t i = 0; i < infResult.outputsData[outputNames[0]].get_size(); ++i) {
float confidence = boxes[i * objectSize + 4];
if (confidence <= confidence_threshold) {
if (confidence <= confidence_threshold && !has_feature_vector_name) {
continue;
}
SegmentedObject obj;
Expand Down Expand Up @@ -258,8 +328,16 @@ std::unique_ptr<ResultBase> MaskRCNNModel::postprocess(InferenceResult& infResul
} else {
obj.mask = raw_cls_mask;
}
result->segmentedObjects.push_back(obj);

if (confidence > confidence_threshold) {
result->segmentedObjects.push_back(obj);
}
if (has_feature_vector_name) {
saliency_maps[obj.labelID - 1].push_back(obj.mask);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that mask resized, or we don't have a flag similar to postprocess_semantic_masks?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resized. There's on top

        if (postprocess_semantic_masks) {
            obj.mask = segm_postprocess(obj, raw_cls_mask, internalData.inputImgHeight, internalData.inputImgWidth);
        }

}
}
result->saliency_map = average_and_normalize(saliency_maps);
if (has_feature_vector_name) {
result->feature_vector = std::move(infResult.outputsData[feature_vector_name]);
}
return retVal;
}
Expand Down
2 changes: 2 additions & 0 deletions model_api/python/openvino/model_api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
DetectionWithLandmarks,
ImageResultWithSoftPrediction,
InputTransform,
InstanceSegmentationResult,
OutputTransform,
SegmentedObject,
SegmentedObjectWithRects,
Expand Down Expand Up @@ -92,6 +93,7 @@
"ImageModel",
"ImageResultWithSoftPrediction",
"InputTransform",
"InstanceSegmentationResult",
"MaskRCNNModel",
"Model",
"MonoDepthModel",
Expand Down
88 changes: 67 additions & 21 deletions model_api/python/openvino/model_api/models/instance_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

from .image_model import ImageModel
from .types import BooleanValue, ListValue, NumericalValue, StringValue
from .utils import SegmentedObject, load_labels, nms
from .utils import InstanceSegmentationResult, SegmentedObject, load_labels, nms


class MaskRCNNModel(ImageModel):
Expand Down Expand Up @@ -57,8 +57,15 @@ def parameters(cls):
def _get_outputs(self):
if self.is_segmentoly:
return self._get_segmentoly_outputs()
filtered_names = []
for name, output in self.outputs.items():
if (
_saliency_map_name not in output.names
and _feature_vector_name not in output.names
):
filtered_names.append(name)
outputs = {}
for layer_name in self.outputs:
for layer_name in filtered_names:
if layer_name.startswith("TopK"):
continue
layer_shape = self.outputs[layer_name].shape
Expand All @@ -70,9 +77,10 @@ def _get_outputs(self):
elif len(layer_shape) == 3:
outputs["masks"] = layer_name
if len(outputs) == 3:
_append_xai_names(self.outputs, outputs)
return outputs
outputs = {}
for layer_name in self.outputs:
for layer_name in filtered_names:
if layer_name.startswith("TopK"):
continue
layer_shape = self.outputs[layer_name].shape
Expand All @@ -84,6 +92,7 @@ def _get_outputs(self):
elif len(layer_shape) == 4:
outputs["masks"] = layer_name
if len(outputs) == 3:
_append_xai_names(self.outputs, outputs)
return outputs
self.raise_error(f"Unexpected outputs: {self.outputs}")

Expand Down Expand Up @@ -143,13 +152,8 @@ def postprocess(self, outputs, meta):
if self.is_segmentoly
else outputs[self.output_blob_name["boxes"]][:, 4]
)
detections_filter = scores > self.confidence_threshold
boxes, scores, labels, masks = (
boxes[detections_filter],
scores[detections_filter],
outputs[self.output_blob_name["labels"]][detections_filter],
outputs[self.output_blob_name["masks"]][detections_filter],
)
labels = outputs[self.output_blob_name["labels"]]
masks = outputs[self.output_blob_name["masks"]]
if not self.is_segmentoly:
labels += 1
if self.labels is None:
Expand Down Expand Up @@ -187,21 +191,52 @@ def postprocess(self, outputs, meta):
out=boxes,
)

resized_masks = []
for box, cls, raw_mask in zip(boxes, labels, masks):
objects = []
has_feature_vector_name = _feature_vector_name in self.outputs
if has_feature_vector_name:
if not self.labels:
self.raise_error("Can't get number of classes because labels are empty")
saliency_maps = [[] for _ in range(len(self.labels))]
else:
saliency_maps = []
for box, confidence, cls, str_label, raw_mask in zip(
boxes, scores, labels, str_labels, masks
):
if confidence <= self.confidence_threshold and not has_feature_vector_name:
continue
raw_cls_mask = raw_mask[cls, ...] if self.is_segmentoly else raw_mask
if self.postprocess_semantic_masks:
resized_masks.append(
_segm_postprocess(box, raw_cls_mask, *meta["original_shape"][:-1])
resized_mask = _segm_postprocess(
box, raw_cls_mask, *meta["original_shape"][:-1]
)
else:
resized_masks.append(raw_cls_mask)
return [
SegmentedObject(*box, confidence, label, str_label, mask)
for box, confidence, label, str_label, mask in zip(
boxes.astype(int), scores, labels, str_labels, resized_masks
)
]
resized_mask = raw_cls_mask
if confidence > self.confidence_threshold:
objects.append(
SegmentedObject(
*box.astype(int), confidence, cls, str_label, resized_mask
)
)
if has_feature_vector_name:
saliency_maps[cls - 1].append(resized_mask)
return InstanceSegmentationResult(
objects,
_average_and_normalize(saliency_maps),
outputs.get(_feature_vector_name, np.ndarray(0)),
)


def _average_and_normalize(saliency_maps):
aggregated = []
for per_class_maps in saliency_maps:
if per_class_maps:
saliency_map = np.array(per_class_maps).mean(0)
max_values = np.max(saliency_map)
saliency_map = 255 * (saliency_map) / (max_values + 1e-12)
aggregated.append(saliency_map.astype(np.uint8))
else:
aggregated.append(np.ndarray(0))
return aggregated


def _expand_box(box, scale):
Expand Down Expand Up @@ -423,3 +458,14 @@ def _sanitize_coordinates(_x1, _x2, img_size, shift=0, padding=0):
x1 = np.clip(_x1 - padding, 0, img_size)
x2 = np.clip(_x2 + padding, 0, img_size)
return x1, x2


_saliency_map_name = "saliency_map"
_feature_vector_name = "feature_vector"


def _append_xai_names(outputs, output_names):
if _saliency_map_name in outputs:
output_names["saliency_map"] = _saliency_map_name
if _feature_vector_name in outputs:
output_names["feature_vector"] = _feature_vector_name
Loading