15.3.2021
V předchozích částech seriálu jsem popisoval, jak nasbírat data a natrénovat neurovou síť. Ke sběru dat jsem používal zatím jen ruční ovládání pásovce. Nyní nastal čas pro nasazení natrénované neuronové sítě.
Teoreticky je nasazení sítě velmi jednoduché. Vezme se natrénovaný model, otevře se pro inferenci, předhodí se mu obrázek a na výstupu vypadne informace doleva-doprava-rovně. Takhle to může fungovat někde v PC, kde je k dispozici normální grafická karta. Nvidia Jetson Nano ale nemá na palubě normální grafickou kartu, ale kartu Nvidia. Aby fungovala inference dostatečně rychle, je nutné překonvertovat síť z formátu TensorFlow do formátu TensorRT. Výsledkem je binární blob, který se načte do grafické karty. Na vypracování postupu pro konverzi TensorFlow na TensorRT jsem strávil odhadem 80% veškerého času, který jsem pásovci věnoval. Postupně mi totiž Nvidia házela pod nohy jeden klacek za druhým:
No ale nakonec se mi podařilo zkompilovat a rozchodit balík onnx-tensorrt, abych mohl zkonvertovat TensorFlow do formy potřebné pro Nvidia Jetson Nano:
Na trénovacím PC s TensorFlow:
python3 -m tf2onnx.convert --output model.onnx --saved-model model
Výsledkem je soubor model.onnx, který jsem si zkopíroval do pásovce, a zde jsem jej překonvertoval to TensorRT:
onnx2trt model.onnx
Tím dostaneme soubor s TensorRT enginem, který lze velmi rychle načíst do grafické karty (v řádu sekund) a na kterém běží inference mnohem rychleji, než v TensorFlow.
V třetím díle jsem popisoval kus programu pro sběr dat. Tuto část jsem rozšířil o zpracování jednoho snímku neuronovou sítí:
import cv2 as cv2 import numpy as np import tensorrt as trt import pycuda.driver as cuda import pycuda.autoinit import time import paho.mqtt.client as mqtt import paho.mqtt.publish as publish remoteHost = "cincila.hobrasoft.cz" remotePortMonitor = 5000 remotePortOK = 5001 remotePortXL = 5002 remotePortXX = 5003 remotePortXR = 5004 class Kamera: def __init__(self, engine_path): self.trt_logger = trt.Logger(trt.Logger.INFO) self.trt_runtime = trt.Runtime(self.trt_logger) with open(engine_path, 'rb') as f: engine_data = f.read() self.engine = self.trt_runtime.deserialize_cuda_engine(engine_data) # print(self.engine.get_binding_shape(0)) # Allocate buffers self.h_input_1 = cuda.pagelocked_empty (trt.volume(self.engine.get_binding_shape(0)), dtype=trt.nptype(trt.float32)) self.d_input_1 = cuda.mem_alloc(self.h_input_1.nbytes) self.h_output = cuda.pagelocked_empty (trt.volume(self.engine.get_binding_shape(1)), dtype=trt.nptype(trt.float32)) self.d_output = cuda.mem_alloc(self.h_output.nbytes) self.stream = cuda.Stream() # Open camera self.open_cameraL() self.open_cameraR() self.open_output_XX() self.open_output_XL() self.open_output_XR() self.open_output_OK() self.open_output_Monitor() # Open mqtt publish.single("pasovec/camera", "started", hostname="localhost") self.client = mqtt.Client("camera") self.client.connect("localhost") self.client.subscribe("pasovec/+") self.client.on_message = self.on_message self.saveOK = False self.saveXL = False self.saveXR = False self.saveXX = False self.automat = False; # RUN self.run() def on_message(self, client, userdata, message): payload = str(message.payload.decode('utf8')) topic = message.topic if topic == 'pasovec/kbd': self.automat = False if payload == "OK": self.saveOK = True if payload == "XX": self.saveXX = True if payload == "XL": self.saveXL = True if payload == "XR": self.saveXR = True if payload == "auto": self.automat = True return def run(self): oldStatus = "OK" while True: stime = time.time(); imgL = self.captureL() imgR = self.captureR() np_img = np.concatenate((imgL, imgR),axis=1) OK, XX, XL, XR = self.processImage(np_img) status = "OK" if XX > 0.5: status = "XX" if XL > 0.5: status = "XL" if XR > 0.5: status = "XR" print ("OK: {} XL: {} XX: {} XR: {} time: {} status: {}" .format(int(100*OK), int(100*XL), int(100*XX), int(100*XL), int(1000*(time.time()-stime)), status )) if oldStatus != status: print ("Zmena ", status) publish.single("pasovec/camera", status, hostname="localhost") oldStatus = status self.client.loop(0.05) if self.saveOK: print("Save OK") self.saveOK = False np_imgL = np.uint8(imgL) np_imgR = np.uint8(imgR) np_img = np.concatenate((np_imgL, np_imgR),axis=1) self.video_output_OK.write(np_img) if self.saveXX: print("Save XX") self.saveXX = False np_imgL = np.uint8(imgL) np_imgR = np.uint8(imgR) np_img = np.concatenate((np_imgL, np_imgR),axis=1) self.video_output_XX.write(np_img) if self.saveXL: print("Save XL") self.saveXL = False np_imgL = np.uint8(imgL) np_imgR = np.uint8(imgR) np_img = np.concatenate((np_imgL, np_imgR),axis=1) self.video_output_XL.write(np_img) if self.saveXR: print("Save XR") self.saveXR = False np_imgL = np.uint8(imgL) np_imgR = np.uint8(imgR) np_img = np.concatenate((np_imgL, np_imgR),axis=1) self.video_output_XR.write(np_img) if XL > 0.5: cv2.putText(np_img, "<<<<" (125, 140), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,0,255), 4) if XX > 0.5: cv2.putText(np_img, "xxxx", (285, 140), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,0,255), 4) if XR > 0.5: cv2.putText(np_img, ">>>>", (445, 140), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,0,255), 4) if self.automat: cv2.putText(np_img, "AUTO", (285, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0,0,255), 4) np_img = np.uint8(np_img) self.video_output_Monitor.write(np_img) def open_cameraL(self): string = ('nvarguscamerasrc sensor-id=0 ! ' + 'video/x-raw(memory:NVMM), width=1280, height=720, format=NV12, framerate=60/1 ! ' + 'nvvidconv flip-method=0 left=50 right=1230 top=0 bottom=620 ! ' + 'video/x-raw, width=320, height=160, format=BGRx ! videoconvert ! ' + 'video/x-raw, format=BGR ! ' + 'appsink drop=true sync=false max-lateness=6000') self.video_captureL = cv2.VideoCapture(string, cv2.CAP_GSTREAMER) assert self.video_captureL.isOpened(), "Could not open left camera" def open_cameraR(self): string = ('nvarguscamerasrc sensor-id=1 ! ' + 'video/x-raw(memory:NVMM), width=1280, height=720, format=NV12, framerate=60/1 ! ' + 'nvvidconv flip-method=0 left=45 right=1225 top=52 bottom=672 ! ' + 'video/x-raw, width=320, height=160, format=BGRx ! videoconvert ! ' + 'video/x-raw, format=BGR ! ' + 'appsink drop=true sync=false max-lateness=6000') self.video_captureR = cv2.VideoCapture(string, cv2.CAP_GSTREAMER) assert self.video_captureR.isOpened(), "Could not open right camera" def open_output_XL(self): string = ('appsrc ! ' + 'videoconvert ! ' + 'video/x-raw, format=YUY2 ! ' + 'jpegenc ! rtpjpegpay ! ' + 'udpsink host={} port={}'.format(remoteHost, remotePortXL)) self.video_output_XL = cv2.VideoWriter( string, apiPreference = 0, fourcc = cv2.VideoWriter.fourcc('M','J','P','G'), fps = 25, frameSize = (640, 160), isColor = True) assert self.video_output_XX.isOpened(), "Error Could not open video output XL" def open_output_XR(self): string = ('appsrc ! ' + 'videoconvert ! ' + 'video/x-raw, format=YUY2 ! ' + 'jpegenc ! rtpjpegpay ! ' + 'udpsink host={} port={}'.format(remoteHost, remotePortXR)) self.video_output_XR = cv2.VideoWriter( string, apiPreference = 0, fourcc = cv2.VideoWriter.fourcc('M','J','P','G'), fps = 25, frameSize = (640, 160), isColor = True) assert self.video_output_XX.isOpened(), "Error Could not open video output XR" def open_output_XX(self): string = ('appsrc ! ' + 'videoconvert ! ' + 'video/x-raw, format=YUY2 ! ' + 'jpegenc ! rtpjpegpay ! ' + 'udpsink host={} port={}'.format(remoteHost, remotePortXX)) self.video_output_XX = cv2.VideoWriter( string, apiPreference = 0, fourcc = cv2.VideoWriter.fourcc('M','J','P','G'), fps = 25, frameSize = (640, 160), isColor = True) assert self.video_output_XX.isOpened(), "Error Could not open video output XX" def open_output_OK(self): string = ('appsrc ! ' + 'videoconvert ! ' + 'video/x-raw, format=YUY2 ! ' + 'jpegenc ! rtpjpegpay ! ' + 'udpsink host={} port={}'.format(remoteHost, remotePortOK)) self.video_output_OK = cv2.VideoWriter( string, apiPreference = 0, fourcc = cv2.VideoWriter.fourcc('M','J','P','G'), fps = 25, frameSize = (640, 160), isColor = True) assert self.video_output_OK.isOpened(), "Error Could not open video output OK" def open_output_Monitor(self): string = ('appsrc ! ' + 'videoconvert ! ' + 'video/x-raw, format=YUY2 ! ' + 'jpegenc ! rtpjpegpay ! ' + 'udpsink host={} port={}'.format(remoteHost, remotePortMonitor)) self.video_output_Monitor = cv2.VideoWriter( string, apiPreference = 0, fourcc = cv2.VideoWriter.fourcc('M','J','P','G'), fps = 25, frameSize = (640, 160), isColor = True) assert self.video_output_Monitor.isOpened(), "Error Could not open video output" def capture(self): return captureL(self), captureR(self) def captureL(self): retL, imgL= self.video_captureL.read() assert retL, "Could not capture left image" return imgL def captureR(self): retR, imgR = self.video_captureR.read() assert retR, "Could not capture right image" return imgR def processImage(self, img): preprocessed = np.asarray(img).ravel() np.copyto(self.h_input_1, preprocessed) with self.engine.create_execution_context() as context: cuda.memcpy_htod_async(self.d_input_1, self.h_input_1, self.stream) # context.profiler = trt.Profile() context.execute(batch_size=1, bindings=[int(self.d_input_1), int(self.d_output)]) cuda.memcpy_dtoh_async(self.h_output, self.d_output, self.stream) self.stream.synchronize() return round(self.h_output[0], 2), round(self.h_output[1], 2), round(self.h_output[2], 2), round(self.h_output[3], 2) if __name__ == "__main__": kamera = Kamera("model-stereo.trt")