import cv2 import mediapipe as mp import numpy as np import math, json from pythonosc import udp_client # ================================================== # CAMERA / PERFORMANCE (INTEGER) # ================================================== TOUCH_CAM_INDEX = 1 GESTURE_CAM_INDEX = 0 CAMERA_WIDTH = 900 CAMERA_HEIGHT = 500 CAMERA_FPS = 18 # laptop-stabil MODEL_COMPLEXITY = 1 # MUST be 0 on laptops # ================================================== # TOUCH (INTEGER / FRAME BASED) # ================================================== MOVE_TOLERANCE = 28 TOUCH_STILL_FRAMES = 18 # ~1s @ 18 FPS TOUCH_COOLDOWN_FRAMES = 12 # ================================================== # CLAP (INTEGER) # ================================================== CLAP_DISTANCE_THRESHOLD = 110 CLAP_COOLDOWN_FRAMES = 32 # ================================================== # DISPLAY SETTINGS (INTEGER) # 0 = OFF, 1 = 320x240, 2 = 480x360, 3 = 640x480 # ================================================== DISPLAY_TOUCH_RES = 1 DISPLAY_GESTURE_RES = 1 DISPLAY_RES_MAP = { 1: (320, 240), 2: (480, 360), 3: (640, 480) } # ================================================== # GAME / HOMOGRAPHY # ================================================== GAME_SCREEN_WIDTH = 900 GAME_SCREEN_HEIGHT = 600 # ================================================== client = udp_client.SimpleUDPClient("127.0.0.1", 5005) # ================================================== # GLOBAL STATES # ================================================== last_finger_pos = None still_frames = 0 touch_cooldown = 0 clap_cooldown = 0 # ================================================== # LOAD CALIBRATION # ================================================== try: with open("calibration.json", "r") as f: CALIB_POINTS = json.load(f) print("📐 Calibration loaded") except: CALIB_POINTS = None print("⚠️ No calibration found") H = None if CALIB_POINTS: src = np.array(CALIB_POINTS, dtype=np.float32) dst = np.array([ [0, 0], [GAME_SCREEN_WIDTH, 0], [GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT], [0, GAME_SCREEN_HEIGHT] ], dtype=np.float32) H, _ = cv2.findHomography(src, dst) def map_point(x, y): if H is None: return int(x), int(y) p = np.array([[[x, y]]], dtype=np.float32) m = cv2.perspectiveTransform(p, H)[0][0] return int(m[0]), int(m[1]) # ================================================== def run(): global last_finger_pos, still_frames global touch_cooldown, clap_cooldown mp_hands = mp.solutions.hands hands_touch = mp_hands.Hands( max_num_hands=1, model_complexity=MODEL_COMPLEXITY, min_detection_confidence=0.6 ) hands_gesture = mp_hands.Hands( max_num_hands=2, model_complexity=MODEL_COMPLEXITY, min_detection_confidence=0.6 ) cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX) cam_gest = cv2.VideoCapture(GESTURE_CAM_INDEX) for cam in (cam_touch, cam_gest): cam.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH) cam.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT) cam.set(cv2.CAP_PROP_FPS, CAMERA_FPS) frame_delay = int(1000 / CAMERA_FPS) while True: ok1, frame_touch = cam_touch.read() ok2, frame_gest = cam_gest.read() if not ok1 or not ok2: break frame_touch = cv2.flip(frame_touch, -1) frame_gest = cv2.flip(frame_gest, 1) # ================= TOUCH ================= rgb_t = cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB) res_t = hands_touch.process(rgb_t) h, w, _ = frame_touch.shape if res_t.multi_hand_landmarks: lm = res_t.multi_hand_landmarks[0] if lm.landmark[8].y > lm.landmark[5].y: fx = int(lm.landmark[8].x * w) fy = int(lm.landmark[8].y * h) sx, sy = map_point(fx, fy) current_pos = (fx, fy) if last_finger_pos is not None: dist = math.hypot( current_pos[0] - last_finger_pos[0], current_pos[1] - last_finger_pos[1] ) if dist < MOVE_TOLERANCE: still_frames += 1 if still_frames >= TOUCH_STILL_FRAMES and touch_cooldown == 0: client.send_message("/touch", [sx, sy]) print(f"👉 TOUCH {sx},{sy}") touch_cooldown = TOUCH_COOLDOWN_FRAMES still_frames = 0 else: still_frames = 0 else: still_frames = 0 last_finger_pos = current_pos cv2.circle(frame_touch, (fx, fy), 6, (0, 255, 0), -1) else: last_finger_pos = None still_frames = 0 if touch_cooldown > 0: touch_cooldown -= 1 # ================= CLAP ================= rgb_g = cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB) res_g = hands_gesture.process(rgb_g) gh, gw, _ = frame_gest.shape if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2: h1, h2 = res_g.multi_hand_landmarks x1 = np.mean([p.x for p in h1.landmark]) * gw y1 = np.mean([p.y for p in h1.landmark]) * gh x2 = np.mean([p.x for p in h2.landmark]) * gw y2 = np.mean([p.y for p in h2.landmark]) * gh if math.hypot(x2 - x1, y2 - y1) < CLAP_DISTANCE_THRESHOLD and clap_cooldown == 0: client.send_message("/clap", 1) print("👏 CLAP") clap_cooldown = CLAP_COOLDOWN_FRAMES if clap_cooldown > 0: clap_cooldown -= 1 # ================= DISPLAY ================= if DISPLAY_TOUCH_RES > 0: dw, dh = DISPLAY_RES_MAP[DISPLAY_TOUCH_RES] cv2.imshow("Touch-Cam", cv2.resize(frame_touch, (dw, dh))) if DISPLAY_GESTURE_RES > 0: dw, dh = DISPLAY_RES_MAP[DISPLAY_GESTURE_RES] cv2.imshow("Gesture-Cam", cv2.resize(frame_gest, (dw, dh))) if cv2.waitKey(frame_delay) & 0xFF == 27: break cam_touch.release() cam_gest.release() cv2.destroyAllWindows() # ================================================== if __name__ == "__main__": run()