forked from guichristmann/edge-tpu-tiny-yolo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
inference.py
232 lines (181 loc) · 7.57 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import numpy as np
import tensorflow as tf
import sys
import cv2
from time import time
from utils import *
import argparse
EDGETPU_SHARED_LIB = "libedgetpu.so.1"
parser = argparse.ArgumentParser("Run TF-Lite YOLO-V3 Tiny inference.")
parser.add_argument("--model", required=True, help="Model to load.")
parser.add_argument("--anchors", required=True, help="Anchors file.")
parser.add_argument("--classes", required=True, help="Classes (.names) file.")
parser.add_argument("-t", "--threshold", help="Detection threshold.",
type=float,
default=0.25)
parser.add_argument("--edge_tpu",
help="Whether to delegate to Edge TPU or run on CPU.",
action="store_true")
parser.add_argument("--quant",
help="Indicates whether the model is quantized.",
action="store_true")
parser.add_argument("--cam", help="Run inference on webcam.",
action="store_true")
parser.add_argument("--image", help="Run inference on image.")
parser.add_argument("--video", help="Run inference on video.")
args = parser.parse_args()
def make_interpreter(model_path, edge_tpu=False):
# Load the TF-Lite model and delegate to Edge TPU
if edge_tpu:
interpreter = tf.lite.Interpreter(model_path=model_path,
experimental_delegates=[
tf.lite.experimental.load_delegate(EDGETPU_SHARED_LIB)
])
else:
interpreter = tf.lite.Interpreter(model_path=model_path)
return interpreter
# Run YOLO inference on the image, returns detected boxes
def inference(interpreter, img, anchors, n_classes, threshold):
input_details, output_details, net_input_shape = \
get_interpreter_details(interpreter)
img_orig_shape = img.shape
# Crop frame to network input shape
img = letterbox_image(img.copy(), (416, 416))
# Add batch dimension
img = np.expand_dims(img, 0)
if not args.quant:
# Normalize image from 0 to 1
img = np.divide(img, 255.).astype(np.float32)
# Set input tensor
interpreter.set_tensor(input_details[0]['index'], img)
start = time()
# Run model
interpreter.invoke()
inf_time = time() - start
print(f"Net forward-pass time: {inf_time*1000} ms.")
# Retrieve outputs of the network
out1 = interpreter.get_tensor(output_details[0]['index'])
out2 = interpreter.get_tensor(output_details[1]['index'])
# If this is a quantized model, dequantize the outputs
if args.quant:
# Dequantize output
o1_scale, o1_zero = output_details[0]['quantization']
out1 = (out1.astype(np.float32) - o1_zero) * o1_scale
o2_scale, o2_zero = output_details[1]['quantization']
out2 = (out2.astype(np.float32) - o2_zero) * o2_scale
# Get boxes from outputs of network
start = time()
_boxes1, _scores1, _classes1 = featuresToBoxes(out1, anchors[[3, 4, 5]],
n_classes, net_input_shape, img_orig_shape, threshold)
_boxes2, _scores2, _classes2 = featuresToBoxes(out2, anchors[[1, 2, 3]],
n_classes, net_input_shape, img_orig_shape, threshold)
inf_time = time() - start
print(f"Box computation time: {inf_time*1000} ms.")
# This is needed to be able to append nicely when the output layers don't
# return any boxes
if _boxes1.shape[0] == 0:
_boxes1 = np.empty([0, 2, 2])
_scores1 = np.empty([0,])
_classes1 = np.empty([0,])
if _boxes2.shape[0] == 0:
_boxes2 = np.empty([0, 2, 2])
_scores2 = np.empty([0,])
_classes2 = np.empty([0,])
boxes = np.append(_boxes1, _boxes2, axis=0)
scores = np.append(_scores1, _scores2, axis=0)
classes = np.append(_classes1, _classes2, axis=0)
if len(boxes) > 0:
boxes, scores, classes = nms_boxes(boxes, scores, classes)
return boxes, scores, classes
def draw_boxes(image, boxes, scores, classes, class_names):
i = 0
for topleft, botright in boxes:
# Detected class
cl = int(classes[i])
# This stupid thing below is needed for opencv to use as a color
color = tuple(map(int, colors[cl]))
# Box coordinates
topleft = (int(topleft[0]), int(topleft[1]))
botright = (int(botright[0]), int(botright[1]))
# Draw box and class
cv2.rectangle(image, topleft, botright, color, 2)
textpos = (topleft[0]-2, topleft[1] - 3)
score = scores[i] * 100
cl_name = class_names[cl]
text = f"{cl_name} ({score:.1f}%)"
cv2.putText(image, text, textpos, cv2.FONT_HERSHEY_DUPLEX,
0.45, color, 1, cv2.LINE_AA)
i += 1
def get_interpreter_details(interpreter):
# Get input and output tensor details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_shape = input_details[0]["shape"]
return input_details, output_details, input_shape
def webcam_inf(interpreter, anchors, classes, threshold=0.25):
cap = cv2.VideoCapture(0)
input_details, output_details, input_shape = \
get_interpreter_details(interpreter)
n_classes = len(classes)
# Load and process image
while True:
# Read frame from webcam
ret, frame = cap.read()
# Run inference, get boxes
boxes, scores, pred_classes = inference(interpreter, frame, anchors, n_classes, threshold)
if len(boxes) > 0:
draw_boxes(frame, boxes, scores, pred_classes, classes)
cv2.imshow("Image", frame)
k = cv2.waitKey(1) & 0xFF
if k == ord('q'):
break
cap.release()
def video_inf(interpreter, anchors, classes, path, threshold=0.25):
cap = cv2.VideoCapture(path)
input_details, output_details, input_shape = \
get_interpreter_details(interpreter)
n_classes = len(classes)
# Load and process image
while True:
# Read frame from webcam
ret, frame = cap.read()
# Run inference, get boxes
start = time()
boxes, scores, pred_classes = inference(interpreter, frame, anchors, n_classes, threshold)
if len(boxes) > 0:
draw_boxes(frame, boxes, scores, pred_classes, classes)
inf_time = time() - start
fps = 1. / inf_time
print(f"Inference time: {inf_time*1000} ms.")
cv2.putText(frame, f"FPS: {fps:.1f}", (20, 20), cv2.FONT_HERSHEY_DUPLEX,
0.45, (200, 0, 200), 1, cv2.LINE_AA)
cv2.imshow("Image", frame)
k = cv2.waitKey(1) & 0xFF
if k == ord('q'):
break
cap.release()
def image_inf(interpreter, anchors, img_fn, classes, threshold):
img = cv2.imread(img_fn)
n_classes = len(classes)
input_details, output_details, input_shape = \
get_interpreter_details(interpreter)
# Run inference, get boxes
boxes, scores, pred_classes = inference(interpreter, img, anchors, n_classes, threshold)
if len(boxes) > 0:
draw_boxes(img, boxes, scores, pred_classes, classes)
cv2.imshow("Image", img)
cv2.waitKey(0)
if __name__ == "__main__":
anchors = get_anchors(args.anchors)
classes = get_classes(args.classes)
# Generate random colors for each detection
colors = np.random.uniform(30, 255, size=(len(classes), 3))
interpreter = make_interpreter(args.model, args.edge_tpu)
print("Allocating tensors.")
interpreter.allocate_tensors()
if args.cam:
webcam_inf(interpreter, anchors, classes, threshold=args.threshold)
elif args.image:
image_inf(interpreter, anchors, args.image, classes, threshold=args.threshold)
elif args.video:
video_inf(interpreter, anchors, classes, args.video, threshold=args.threshold)