import cv2 import mediapipe as mp import numpy as np import math, time, json from pythonosc import udp_client # ===================================================== # =================== SETTINGS ======================== # ===================================================== # -------- Camera Index -------- TOUCH_CAM_INDEX = 1 GESTURE_CAM_INDEX = 0 # -------- Camera Capture Resolution / FPS -------- CAM_WIDTH = 1280 CAM_HEIGHT = 720 CAM_FPS = 30 # -------- Display Resolution (INTEGER) -------- DISPLAY_WIDTH = 480 #960 DISPLAY_HEIGHT = 270 #540 # -------- Screen Mapping -------- GAME_SCREEN_WIDTH = 900 GAME_SCREEN_HEIGHT = 600 # -------- MediaPipe Model Complexity -------- MODEL_COMPLEXITY_TOUCH = 1 MODEL_COMPLEXITY_GESTURE = 0 # -------- Touch Trigger -------- STILL_REQUIRED = 1.0 MOVE_TOLERANCE = 25 TOUCH_COOLDOWN = 0.5 # -------- Clap Trigger -------- CLAP_DISTANCE = 100 CLAP_COOLDOWN = 1 # -------- OSC -------- OSC_IP = "127.0.0.1" OSC_PORT = 5005 # ===================================================== # ================= GLOBAL STATE ====================== # ===================================================== client = udp_client.SimpleUDPClient(OSC_IP, OSC_PORT) last_finger_pos = None finger_still_start = None prev_touch_time = 0.0 prev_clap_time = 0.0 # ===================================================== # ============ CALIBRATION / HOMOGRAPHY =============== # ===================================================== try: with open("calibration.json", "r") as f: CALIB_POINTS = json.load(f) print("📐 Calibration loaded") except: CALIB_POINTS = None print("⚠️ No calibration found") H = None if CALIB_POINTS is not None: src = np.array(CALIB_POINTS, dtype=np.float32) dst = np.array([ [0, 0], [GAME_SCREEN_WIDTH, 0], [GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT], [0, GAME_SCREEN_HEIGHT] ], dtype=np.float32) H, _ = cv2.findHomography(src, dst) def map_point_homography(x, y): if H is None: return int(x), int(y) p = np.array([[[x, y]]], dtype=np.float32) m = cv2.perspectiveTransform(p, H)[0][0] return int(m[0]), int(m[1]) # ===================================================== # ===================== MAIN ========================== # ===================================================== def run_gesture_input(): global last_finger_pos, finger_still_start global prev_touch_time, prev_clap_time mp_hands = mp.solutions.hands mp_draw = mp.solutions.drawing_utils hands_touch = mp_hands.Hands( max_num_hands=1, model_complexity=MODEL_COMPLEXITY_TOUCH, min_detection_confidence=0.6, min_tracking_confidence=0.6 ) hands_gesture = mp_hands.Hands( max_num_hands=2, model_complexity=MODEL_COMPLEXITY_GESTURE, min_detection_confidence=0.6, min_tracking_confidence=0.6 ) cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX) cam_gesture = cv2.VideoCapture(GESTURE_CAM_INDEX) for cam in (cam_touch, cam_gesture): cam.set(cv2.CAP_PROP_FRAME_WIDTH, CAM_WIDTH) cam.set(cv2.CAP_PROP_FRAME_HEIGHT, CAM_HEIGHT) cam.set(cv2.CAP_PROP_FPS, CAM_FPS) while True: ok1, frame_touch = cam_touch.read() ok2, frame_gest = cam_gesture.read() if not ok1 or not ok2: break frame_touch = cv2.flip(frame_touch, -1) frame_gest = cv2.flip(frame_gest, 1) # ---------------- TOUCH ---------------- rgb_t = cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB) res_t = hands_touch.process(rgb_t) th, tw, _ = frame_touch.shape if res_t.multi_hand_landmarks: lm = res_t.multi_hand_landmarks[0] mp_draw.draw_landmarks(frame_touch, lm, mp_hands.HAND_CONNECTIONS) if lm.landmark[8].y >= lm.landmark[5].y: fx = int(lm.landmark[8].x * tw) fy = int(lm.landmark[8].y * th) sx, sy = map_point_homography(fx, fy) now = time.time() cur = (fx, fy) if last_finger_pos is None: last_finger_pos = cur finger_still_start = now else: dist = math.hypot(cur[0]-last_finger_pos[0], cur[1]-last_finger_pos[1]) if dist < MOVE_TOLERANCE: if now - finger_still_start >= STILL_REQUIRED and now - prev_touch_time > TOUCH_COOLDOWN: client.send_message("/touch", [sx, sy]) prev_touch_time = now finger_still_start = now else: finger_still_start = now last_finger_pos = cur cv2.circle(frame_touch, (fx, fy), 10, (0,255,0), -1) else: last_finger_pos = None # ---------------- CLAP ---------------- rgb_g = cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB) res_g = hands_gesture.process(rgb_g) gh, gw, _ = frame_gest.shape if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2: h1, h2 = res_g.multi_hand_landmarks c1 = np.mean([[p.x*gw, p.y*gh] for p in h1.landmark], axis=0) c2 = np.mean([[p.x*gw, p.y*gh] for p in h2.landmark], axis=0) dist = np.linalg.norm(c2 - c1) if dist < CLAP_DISTANCE and time.time() - prev_clap_time > CLAP_COOLDOWN: prev_clap_time = time.time() client.send_message("/clap", 1) # ---------------- DISPLAY SCALING ---------------- disp_touch = cv2.resize(frame_touch, (DISPLAY_WIDTH, DISPLAY_HEIGHT)) disp_gest = cv2.resize(frame_gest, (DISPLAY_WIDTH, DISPLAY_HEIGHT)) cv2.imshow("Touch Camera", disp_touch) cv2.imshow("Gesture Camera", disp_gest) if cv2.waitKey(5) & 0xFF == 27: break cam_touch.release() cam_gesture.release() cv2.destroyAllWindows() # ===================================================== if __name__ == "__main__": run_gesture_input()