diff --git a/camera/.vscode/launch.json b/camera/.vscode/launch.json
index fa170e0..1f6f73a 100644
--- a/camera/.vscode/launch.json
+++ b/camera/.vscode/launch.json
@@ -1,6 +1,7 @@
 {
     "version": "0.2.0",
     "configurations": [
+
         {
             "name": "Python: Current File",
             "type": "python",
@@ -15,7 +16,7 @@
             "request": "launch",
             "program": "${file}",
             "console": "integratedTerminal",
-            "args": ["-v", "~/Videos/video.h264"]
+            "args": ["-v", "run.mp4"]
         }
     ]
 }
\ No newline at end of file
diff --git a/camera/person_detection.py b/camera/person_detection.py
new file mode 100755
index 0000000..6543c75
--- /dev/null
+++ b/camera/person_detection.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+
+from imutils.video import VideoStream
+from imutils.video import FPS
+import argparse
+import imutils
+import cv2
+from datetime import datetime, time
+import numpy as np
+import time as time2
+
+VISUAL_DEBUG=True
+
+def getArgs():
+    """ Arguments """
+    ap = argparse.ArgumentParser()
+    ap.add_argument("-v", "--video", help="path to the video file")
+    ap.add_argument("-a", "--min-area", type=int, default=500, help="minimum area size")
+    ap.add_argument("-t", "--tracker", type=str, default="csrt", help="OpenCV object tracker type")
+    return vars(ap.parse_args())
+
+
+def main():
+    args = getArgs()
+
+    # if the video argument is None, then the code will read from webcam (work in progress)
+    if args.get("video", None) is None:
+        vs = VideoStream(src=0).start()
+        time2.sleep(2.0)
+    # otherwise, we are reading from a video file
+    else:
+        vs = cv2.VideoCapture(args["video"])
+
+    cv2.namedWindow('Video stream', cv2.WINDOW_NORMAL)
+    detector = DetectionFromFrame(args["min_area"], 0.8)
+    while True:
+        detector.currentFrame = vs.read()
+        detector.currentFrame = detector.currentFrame if args.get("video", None) is None else detector.currentFrame[1]
+        # if the frame can not be grabbed, then we have reached the end of the video
+        if detector.currentFrame is None:
+            break
+
+        # resize the frame to 500
+        detector.currentFrame = imutils.resize(detector.currentFrame, width=500)
+        detector.framecounter+=1
+        if detector.framecounter > 1:
+            cnts = detector.prepareFrame()
+            
+            for c in cnts:
+                boundRect = cv2.boundingRect(c)
+                #(x, y, w, h) = cv2.boundingRect(c)
+                #initBB2 =(x,y,w,h)
+
+                prott1 = r'ML-Models/MobileNetSSD_deploy.prototxt'
+                prott2 = r'ML-Models/MobileNetSSD_deploy.caffemodel'
+                net = cv2.dnn.readNetFromCaffe(prott1, prott2)
+
+                #trackbox = detector.currentFrame[y:y+h, x:x+w]boundRect[1]
+                trackbox = detector.currentFrame[boundRect[1]:boundRect[1]+boundRect[3], 
+                                                boundRect[0]:boundRect[0]+boundRect[2]]
+                trackbox = cv2.resize(trackbox, (224, 224))
+                #cv2.imshow('image',trackbox)
+                
+                blob = cv2.dnn.blobFromImage(cv2.resize(trackbox, (300, 300)),0.007843, (300, 300), 127.5)
+                net.setInput(blob)
+                detections = net.forward()
+
+                for i in np.arange(0, detections.shape[2]):
+                    detector.detectConfidentiallyPeople(i, detections, boundRect)
+                cv2.rectangle(detector.currentFrame, (boundRect[0], boundRect[1]), 
+                                (boundRect[0] + boundRect[2], boundRect[1] + boundRect[3]), (255, 255, 0), 1)
+
+            
+        # show the frame and record if the user presses a key
+        cv2.imshow("Video stream", detector.currentFrame)
+        key = cv2.waitKey(1) & 0xFF
+
+        # if the `q` key is pressed, break from the lop
+        if key == ord("q"):
+            break
+        if key == ord("d"):
+            detector.firstFrame = None
+        #detector.lastFrame = detector.currentFrame
+
+    # finally, stop the camera/stream and close any open windows
+    vs.stop() if args.get("video", None) is None else vs.release()
+    cv2.destroyAllWindows()
+
+
+class DetectionFromFrame:
+    def __init__(self, min_size, confidence):
+        self.min_size = min_size
+        self.confidence_level = confidence
+
+        self.firstFrame = None
+        self.currentFrame = None
+        
+        self.initBB2 = None
+        self.fps = None
+        self.differ = None
+        self.now = ''
+        self.framecounter = 0
+        self.people_count_total = 0
+
+
+    def prepareFrame(self):
+        gray = cv2.cvtColor(self.currentFrame, cv2.COLOR_BGR2GRAY)
+        gray = cv2.GaussianBlur(gray, (21, 21), 0)
+
+        # if the first frame is None, initialize it
+        if self.firstFrame is None:
+            self.firstFrame = gray
+            return []
+
+        # compute the absolute difference between the current frame and first frame
+        frameDelta = cv2.absdiff(self.firstFrame, gray)
+        thresh = cv2.threshold(frameDelta, 25, 255, cv2.THRESH_BINARY)[1]
+        
+        #debug
+        """if VISUAL_DEBUG: 
+            cv2.imshow("debug image", thresh)
+            cv2.waitKey(0)
+            cv2.destroyWindow("debug image")
+            #cv2.destroyWindow("threshhold image")"""
+        
+        # dilate the thresholded image to fill in holes
+        thresh = cv2.dilate(thresh, None, iterations=2)
+
+        # find contours on thresholded image
+        thresh = np.uint8(thresh)
+        cnts, _  = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+
+        return cnts
+
+    def detectConfidentiallyPeople(self, i, detections, boundRect):
+        CLASSES = ["person"]
+
+        COLORS = [0,255,0]
+        #COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
+
+        confidence = detections[0, 0, i, 2]
+
+        if confidence > self.confidence_level:
+            # extract the index of the class label from the `detections`, then compute the (x, y)-coordinates of
+            # the bounding box for the object
+            #idx = int(detections[0, 0, i, 1])
+            box = detections[0, 0, i, 3:7] * np.array([boundRect[2], boundRect[3], boundRect[2], boundRect[3]])
+            (startX, startY, endX, endY) = box.astype("int")
+            # draw the prediction on the frame
+            
+            #label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
+            label = "{}: {:.2f}%".format(CLASSES[0], confidence * 100)
+            
+            #cv2.rectangle(frame, (startX, startY), (endX, endY), COLORS[idx], 2)
+            cv2.rectangle(self.currentFrame, (boundRect[0], boundRect[1]), 
+                                (boundRect[0] + boundRect[2], boundRect[1] + boundRect[3]), (0,255,  0), 3)
+            
+            y = boundRect[1] - 15 if boundRect[1] - 15 > 15 else boundRect[1] + 15
+
+            #cv2.putText(frame, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
+            cv2.putText(self.currentFrame, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,0), 1)
+            #cv2.imshow("Video stream", self.currentFrame)
+            #print("Person found")
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/camera/video_stream/zmq_video_server.py b/camera/video_stream/zmq_video_server.py
index 3c06787..4882210 100644
--- a/camera/video_stream/zmq_video_server.py
+++ b/camera/video_stream/zmq_video_server.py
@@ -7,4 +7,5 @@ while True:  # show streamed images until Ctrl-C
     rpi_name, image = image_hub.recv_image()
     cv2.imshow(rpi_name, image) # 1 window for each RPi
     cv2.waitKey(1)
-    image_hub.send_reply(b'OK')
\ No newline at end of file
+    image_hub.send_reply(b'OK')
+    
\ No newline at end of file