#!/usr/bin/env python import argparse #from datetime import datetime, time import time from statistics import median import imutils from imutils.video import VideoStream #from imutils.video import FPS import cv2 import numpy as np frame_timer = None contour_timer = None detection_timer = None frame_time = [] contour_time = [] detection_time = [] VISUAL_DEBUG = True def getArgs(): """ Arguments """ ap = argparse.ArgumentParser() ap.add_argument("-v", "--video", help="path to the video file") ap.add_argument("-a", "--min-area", type=int, default=500, help="minimum area size") ap.add_argument("-t", "--tracker", type=str, default="csrt", help="OpenCV object tracker type") return vars(ap.parse_args()) def main(): args = getArgs() timer = Timer() # if the video argument is None, then the code will read from webcam (work in progress) if args.get("video", None) is None: vs = VideoStream(src=0).start() time.sleep(2.0) # otherwise, we are reading from a video file else: vs = cv2.VideoCapture(args["video"]) cv2.namedWindow('Video stream', cv2.WINDOW_NORMAL) detector = DetectionFromFrame(args["min_area"], 0.5) while True: timer.start_frame_timer() detector.currentFrame = vs.read() detector.currentFrame = detector.currentFrame if args.get("video", None) is None else detector.currentFrame[1] # if the frame can not be grabbed, then we have reached the end of the video if detector.currentFrame is None: break # resize the frame to 500 detector.currentFrame = imutils.resize(detector.currentFrame, width=500) detector.framecounter += 1 if detector.framecounter > 1: cnts = detector.prepareFrame() for c in cnts: timer.start_contour_timer() bound_rect = cv2.boundingRect(c) #(x, y, w, h) = cv2.boundingRect(c) #initBB2 =(x,y,w,h) prott1 = r'ML-Models/MobileNetSSD_deploy.prototxt' prott2 = r'ML-Models/MobileNetSSD_deploy.caffemodel' net = cv2.dnn.readNetFromCaffe(prott1, prott2) #trackbox = detector.currentFrame[y:y+h, x:x+w]boundRect[1] trackbox = detector.currentFrame[bound_rect[1]:bound_rect[1]+bound_rect[3], bound_rect[0]:bound_rect[0]+bound_rect[2]] trackbox = cv2.resize(trackbox, (224, 224)) #cv2.imshow('image',trackbox) timer.start_detection_timer() blob = cv2.dnn.blobFromImage(cv2.resize(trackbox, (300, 300)),0.007843, (300, 300), 127.5) net.setInput(blob) detections = net.forward() for i in np.arange(0, detections.shape[2]): detector.detectConfidentiallyPeople(i, detections, bound_rect) timer.stop_detection_timer() cv2.rectangle(detector.currentFrame, (bound_rect[0], bound_rect[1]), (bound_rect[0] + bound_rect[2], bound_rect[1] + bound_rect[3]), (255, 255, 0), 1) timer.stop_contour_timer() # show the frame and record if the user presses a key cv2.imshow("Video stream", detector.currentFrame) key = cv2.waitKey(1) & 0xFF # if the `q` key is pressed, break from the lop if key == ord("q"): break if key == ord("d"): detector.firstFrame = None #detector.lastFrame = detector.currentFrame timer.print_time() # finally, stop the camera/stream and close any open windows vs.stop() if args.get("video", None) is None else vs.release() cv2.destroyAllWindows() class Timer: def __init__(self): self.frame_timer = None self.contour_timer = None self.detection_timer = None self.contour_time = [] self.detection_time = [] def start_frame_timer(self): self.frame_timer = time.time() def get_frame_time(self): return time.time() - self.frame_timer def start_contour_timer(self): self.contour_timer = time.time() def stop_contour_timer(self): self.contour_time.append(time.time() - self.contour_timer) def start_detection_timer(self): self.detection_timer = time.time() def stop_detection_timer(self): self.detection_time.append(time.time() - self.detection_timer) def print_time(self): average_contour = 0 if not self.contour_time else sum(self.contour_time)/float(len(self.contour_time)) average_detection = 0 if not self.detection_time else sum(self.detection_time)/float(len(self.detection_time)) median_contour = 0 if not self.contour_time else median(self.contour_time) median_detection = 0 if not self.detection_time else median(self.detection_time) total_contour = sum(self.contour_time) total_detection = sum(self.detection_time) print("Time for Frame: {:.2f}. Contour Total: {:.2f}. Contour Median: {:.2f}. Contour Average: {:.2f}. Detection Total: {:.2f}. Detection Median: {:.2f}. Detection Average: {:.2f}. ".format( self.get_frame_time(), total_contour, median_contour, average_contour, total_detection, median_detection, average_detection)) #print("Contour Times:" + str(timer.contour_time)) #print("Detection Times:" + str(timer.detection_time)) self.contour_time = [] self.detection_time = [] class DetectionFromFrame: def __init__(self, min_size, confidence): self.min_size = min_size self.confidence_level = confidence self.firstFrame = None self.currentFrame = None self.initBB2 = None self.fps = None self.differ = None self.now = '' self.framecounter = 0 self.people_count_total = 0 def prepareFrame(self): gray = cv2.cvtColor(self.currentFrame, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (21, 21), 0) # if the first frame is None, initialize it if self.firstFrame is None: self.firstFrame = gray return [] # compute the absolute difference between the current frame and first frame frameDelta = cv2.absdiff(self.firstFrame, gray) thresh = cv2.threshold(frameDelta, 25, 255, cv2.THRESH_BINARY)[1] #debug """if VISUAL_DEBUG: cv2.imshow("debug image", thresh) cv2.waitKey(0) cv2.destroyWindow("debug image") #cv2.destroyWindow("threshhold image")""" # dilate the thresholded image to fill in holes thresh = cv2.dilate(thresh, None, iterations=2) # find contours on thresholded image thresh = np.uint8(thresh) cnts, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) return cnts def detectConfidentiallyPeople(self, i, detections, bound_rect): #CLASSES = ["person"] detected_color = (0, 255, 0) #COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) confidence = detections[0, 0, i, 2] if confidence > self.confidence_level: # extract the index of the class label from the `detections`, then compute the (x, y)-coordinates of # the bounding box for the object #idx = int(detections[0, 0, i, 1]) #box = detections[0, 0, i, 3:7] * np.array([bound_rect[2], bound_rect[3], bound_rect[2], bound_rect[3]]) #(startX, startY, endX, endY) = box.astype("int") # draw the prediction on the frame #label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100) label = "{:.2f}%".format(confidence * 100) #cv2.rectangle(frame, (startX, startY), (endX, endY), COLORS[idx], 2) cv2.rectangle(self.currentFrame, (bound_rect[0], bound_rect[1]), (bound_rect[0] + bound_rect[2], bound_rect[1] + bound_rect[3]), detected_color, 3) y = bound_rect[1] - 15 if bound_rect[1] - 15 > 15 else bound_rect[1] + 15 #cv2.putText(frame, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2) cv2.putText(self.currentFrame, label, (bound_rect[0], bound_rect[1]-5), cv2.FONT_HERSHEY_SIMPLEX, 0.3, detected_color, 1) #cv2.imshow("Video stream", self.currentFrame) #print("Person found") if __name__ == "__main__": main()