123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- #!/usr/bin/env python
-
- from imutils.video import VideoStream
- from imutils.video import FPS
- import argparse
- import imutils
- import cv2
- from datetime import datetime, time
- import numpy as np
- import time as time2
-
- VISUAL_DEBUG=True
-
- def getArgs():
- """ Arguments """
- ap = argparse.ArgumentParser()
- ap.add_argument("-v", "--video", help="path to the video file")
- ap.add_argument("-a", "--min-area", type=int, default=500, help="minimum area size")
- ap.add_argument("-t", "--tracker", type=str, default="csrt", help="OpenCV object tracker type")
- return vars(ap.parse_args())
-
-
- def main():
- args = getArgs()
-
- # if the video argument is None, then the code will read from webcam (work in progress)
- if args.get("video", None) is None:
- vs = VideoStream(src=0).start()
- time2.sleep(2.0)
- # otherwise, we are reading from a video file
- else:
- vs = cv2.VideoCapture(args["video"])
-
- cv2.namedWindow('Video stream', cv2.WINDOW_NORMAL)
- detector = DetectionFromFrame(args["min_area"], 0.8)
- while True:
- detector.currentFrame = vs.read()
- detector.currentFrame = detector.currentFrame if args.get("video", None) is None else detector.currentFrame[1]
- # if the frame can not be grabbed, then we have reached the end of the video
- if detector.currentFrame is None:
- break
-
- # resize the frame to 500
- detector.currentFrame = imutils.resize(detector.currentFrame, width=500)
- detector.framecounter+=1
- if detector.framecounter > 1:
- cnts = detector.prepareFrame()
-
- for c in cnts:
- boundRect = cv2.boundingRect(c)
- #(x, y, w, h) = cv2.boundingRect(c)
- #initBB2 =(x,y,w,h)
-
- prott1 = r'ML-Models/MobileNetSSD_deploy.prototxt'
- prott2 = r'ML-Models/MobileNetSSD_deploy.caffemodel'
- net = cv2.dnn.readNetFromCaffe(prott1, prott2)
-
- #trackbox = detector.currentFrame[y:y+h, x:x+w]boundRect[1]
- trackbox = detector.currentFrame[boundRect[1]:boundRect[1]+boundRect[3],
- boundRect[0]:boundRect[0]+boundRect[2]]
- trackbox = cv2.resize(trackbox, (224, 224))
- #cv2.imshow('image',trackbox)
-
- blob = cv2.dnn.blobFromImage(cv2.resize(trackbox, (300, 300)),0.007843, (300, 300), 127.5)
- net.setInput(blob)
- detections = net.forward()
-
- for i in np.arange(0, detections.shape[2]):
- detector.detectConfidentiallyPeople(i, detections, boundRect)
- cv2.rectangle(detector.currentFrame, (boundRect[0], boundRect[1]),
- (boundRect[0] + boundRect[2], boundRect[1] + boundRect[3]), (255, 255, 0), 1)
-
-
- # show the frame and record if the user presses a key
- cv2.imshow("Video stream", detector.currentFrame)
- key = cv2.waitKey(1) & 0xFF
-
- # if the `q` key is pressed, break from the lop
- if key == ord("q"):
- break
- if key == ord("d"):
- detector.firstFrame = None
- #detector.lastFrame = detector.currentFrame
-
- # finally, stop the camera/stream and close any open windows
- vs.stop() if args.get("video", None) is None else vs.release()
- cv2.destroyAllWindows()
-
-
- class DetectionFromFrame:
- def __init__(self, min_size, confidence):
- self.min_size = min_size
- self.confidence_level = confidence
-
- self.firstFrame = None
- self.currentFrame = None
-
- self.initBB2 = None
- self.fps = None
- self.differ = None
- self.now = ''
- self.framecounter = 0
- self.people_count_total = 0
-
-
- def prepareFrame(self):
- gray = cv2.cvtColor(self.currentFrame, cv2.COLOR_BGR2GRAY)
- gray = cv2.GaussianBlur(gray, (21, 21), 0)
-
- # if the first frame is None, initialize it
- if self.firstFrame is None:
- self.firstFrame = gray
- return []
-
- # compute the absolute difference between the current frame and first frame
- frameDelta = cv2.absdiff(self.firstFrame, gray)
- thresh = cv2.threshold(frameDelta, 25, 255, cv2.THRESH_BINARY)[1]
-
- #debug
- """if VISUAL_DEBUG:
- cv2.imshow("debug image", thresh)
- cv2.waitKey(0)
- cv2.destroyWindow("debug image")
- #cv2.destroyWindow("threshhold image")"""
-
- # dilate the thresholded image to fill in holes
- thresh = cv2.dilate(thresh, None, iterations=2)
-
- # find contours on thresholded image
- thresh = np.uint8(thresh)
- cnts, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-
- return cnts
-
- def detectConfidentiallyPeople(self, i, detections, boundRect):
- CLASSES = ["person"]
-
- COLORS = [0,255,0]
- #COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
-
- confidence = detections[0, 0, i, 2]
-
- if confidence > self.confidence_level:
- # extract the index of the class label from the `detections`, then compute the (x, y)-coordinates of
- # the bounding box for the object
- #idx = int(detections[0, 0, i, 1])
- box = detections[0, 0, i, 3:7] * np.array([boundRect[2], boundRect[3], boundRect[2], boundRect[3]])
- (startX, startY, endX, endY) = box.astype("int")
- # draw the prediction on the frame
-
- #label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
- label = "{}: {:.2f}%".format(CLASSES[0], confidence * 100)
-
- #cv2.rectangle(frame, (startX, startY), (endX, endY), COLORS[idx], 2)
- cv2.rectangle(self.currentFrame, (boundRect[0], boundRect[1]),
- (boundRect[0] + boundRect[2], boundRect[1] + boundRect[3]), (0,255, 0), 3)
-
- y = boundRect[1] - 15 if boundRect[1] - 15 > 15 else boundRect[1] + 15
-
- #cv2.putText(frame, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
- cv2.putText(self.currentFrame, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)
- #cv2.imshow("Video stream", self.currentFrame)
- #print("Person found")
-
-
-
- if __name__ == "__main__":
- main()
|