import cv2 import mediapipe as mp import numpy as np import math, time, json from pythonosc import udp_client # ------------------------------- # SETTINGS # ------------------------------- TOUCH_CAM_INDEX = 1 GESTURE_CAM_INDEX = 0 GAME_SCREEN_WIDTH = 900 #900 GAME_SCREEN_HEIGHT = 600 #600 STILL_REQUIRED = 1.0 MOVE_TOLERANCE = 25 # ------------------------------- # CAMERA / PERFORMANCE SETTINGS # ------------------------------- CAMERA_FPS = 15 DISPLAY_WIDTH = 320 #1280 DISPLAY_HEIGHT = 240 #720 MODEL_COMPLEXITY = 0 # ✅ 0=fast | 1=balanced | 2=accurate client = udp_client.SimpleUDPClient("127.0.0.1", 5005) # ------------------------------- # GLOBAL STATES # ------------------------------- last_finger_pos = None finger_still_start = None prev_touch_time = 0.0 prev_clap_time = 0.0 # ------------------------------- # CALIBRATION + HOMOGRAPHY # ------------------------------- try: with open("calibration.json", "r") as f: CALIB_POINTS = json.load(f) print("📐 Calibration loaded:", CALIB_POINTS) except: CALIB_POINTS = None print("⚠️ No calibration found") H = None if CALIB_POINTS is not None: src = np.array(CALIB_POINTS, dtype=np.float32) dst = np.array([ [0, 0], [GAME_SCREEN_WIDTH, 0], [GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT], [0, GAME_SCREEN_HEIGHT] ], dtype=np.float32) H, _ = cv2.findHomography(src, dst) print("📐 Homography ready") def map_point_homography(x, y): if H is None: return int(x), int(y) p = np.array([[[x, y]]], dtype=np.float32) mapped = cv2.perspectiveTransform(p, H)[0][0] return int(mapped[0]), int(mapped[1]) # ----------------------------------------------------------------- def run_gesture_input(): global last_finger_pos, finger_still_start global prev_touch_time, prev_clap_time mp_hands = mp.solutions.hands mp_draw = mp.solutions.drawing_utils # ✅ model_complexity applied here hands_touch = mp_hands.Hands( max_num_hands=1, model_complexity=MODEL_COMPLEXITY, min_detection_confidence=0.6, min_tracking_confidence=0.6 ) hands_gesture = mp_hands.Hands( max_num_hands=2, model_complexity=MODEL_COMPLEXITY, min_detection_confidence=0.6, min_tracking_confidence=0.6 ) cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX) cam_gesture = cv2.VideoCapture(GESTURE_CAM_INDEX) for cam in (cam_touch, cam_gesture): cam.set(cv2.CAP_PROP_FRAME_WIDTH, DISPLAY_WIDTH) cam.set(cv2.CAP_PROP_FRAME_HEIGHT, DISPLAY_HEIGHT) cam.set(cv2.CAP_PROP_FPS, CAMERA_FPS) clap_cooldown = 1.5 frame_duration = 1.0 / CAMERA_FPS last_frame_time = time.time() while True: ok1, frame_touch = cam_touch.read() ok2, frame_gest = cam_gesture.read() if not ok1 or not ok2: break frame_touch = cv2.flip(frame_touch, -1) frame_gest = cv2.flip(frame_gest, 1) # ---------------- TOUCH ---------------- res_t = hands_touch.process(cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB)) th, tw, _ = frame_touch.shape if res_t.multi_hand_landmarks: lm = res_t.multi_hand_landmarks[0] mp_draw.draw_landmarks(frame_touch, lm, mp_hands.HAND_CONNECTIONS) if lm.landmark[8].y < lm.landmark[5].y: last_finger_pos = None finger_still_start = None continue fx = int(lm.landmark[8].x * tw) fy = int(lm.landmark[8].y * th) sx, sy = map_point_homography(fx, fy) now = time.time() curr = (fx, fy) if last_finger_pos is None: last_finger_pos = curr finger_still_start = now else: dist = math.hypot(curr[0]-last_finger_pos[0], curr[1]-last_finger_pos[1]) if dist < MOVE_TOLERANCE: if finger_still_start and now-finger_still_start >= STILL_REQUIRED: if now-prev_touch_time > 0.5: client.send_message("/touch", [sx, sy]) print(f"👉 TOUCH {sx},{sy}") prev_touch_time = now finger_still_start = None else: finger_still_start = now last_finger_pos = curr cv2.circle(frame_touch, (fx, fy), 10, (0,255,0), -1) else: last_finger_pos = None finger_still_start = None # ---------------- CLAP ---------------- res_g = hands_gesture.process(cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB)) gh, gw, _ = frame_gest.shape if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2: h1, h2 = res_g.multi_hand_landmarks x1 = np.mean([p.x for p in h1.landmark]) * gw y1 = np.mean([p.y for p in h1.landmark]) * gh x2 = np.mean([p.x for p in h2.landmark]) * gw y2 = np.mean([p.y for p in h2.landmark]) * gh dist = math.hypot(x2-x1, y2-y1) if dist < 100 and time.time()-prev_clap_time > clap_cooldown: prev_clap_time = time.time() client.send_message("/clap", 1) print("👏 CLAP") cv2.putText(frame_touch, f"FPS:{CAMERA_FPS} MC:{MODEL_COMPLEXITY}", (10,30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,255,0), 2) cv2.imshow("Touch-Cam", frame_touch) cv2.imshow("Gesture-Cam", frame_gest) # FPS limiter sleep = frame_duration - (time.time()-last_frame_time) if sleep > 0: time.sleep(sleep) last_frame_time = time.time() if cv2.waitKey(1) & 0xFF == 27: break cam_touch.release() cam_gesture.release() cv2.destroyAllWindows() if __name__ == "__main__": run_gesture_input()