global_match_memory/gesture_input_osc_test3.py

import cv2
import mediapipe as mp
import numpy as np
import math, time, json
from pythonosc import udp_client

# =====================================================
# =================== SETTINGS ========================
# =====================================================

# -------- Camera Index --------
TOUCH_CAM_INDEX   = 1
GESTURE_CAM_INDEX = 0

# -------- Camera Capture Resolution / FPS --------
CAM_WIDTH  = 1280
CAM_HEIGHT = 720
CAM_FPS    = 30

# -------- Display Resolution (INTEGER) --------
DISPLAY_WIDTH  = 480    #960
DISPLAY_HEIGHT = 270    #540

# -------- Screen Mapping --------
GAME_SCREEN_WIDTH  = 900
GAME_SCREEN_HEIGHT = 600

# -------- MediaPipe Model Complexity --------
MODEL_COMPLEXITY_TOUCH   = 1
MODEL_COMPLEXITY_GESTURE = 0

# -------- Touch Trigger --------
STILL_REQUIRED = 1.0
MOVE_TOLERANCE = 25
TOUCH_COOLDOWN = 0.5

# -------- Clap Trigger --------
CLAP_DISTANCE = 100
CLAP_COOLDOWN = 1

# -------- OSC --------
OSC_IP   = "127.0.0.1"
OSC_PORT = 5005

# =====================================================
# ================= GLOBAL STATE ======================
# =====================================================

client = udp_client.SimpleUDPClient(OSC_IP, OSC_PORT)

last_finger_pos = None
finger_still_start = None
prev_touch_time = 0.0
prev_clap_time = 0.0

# =====================================================
# ============ CALIBRATION / HOMOGRAPHY ===============
# =====================================================

try:
    with open("calibration.json", "r") as f:
        CALIB_POINTS = json.load(f)
    print("📐 Calibration loaded")
except:
    CALIB_POINTS = None
    print("⚠️ No calibration found")

H = None
if CALIB_POINTS is not None:
    src = np.array(CALIB_POINTS, dtype=np.float32)
    dst = np.array([
        [0, 0],
        [GAME_SCREEN_WIDTH, 0],
        [GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT],
        [0, GAME_SCREEN_HEIGHT]
    ], dtype=np.float32)
    H, _ = cv2.findHomography(src, dst)

def map_point_homography(x, y):
    if H is None:
        return int(x), int(y)
    p = np.array([[[x, y]]], dtype=np.float32)
    m = cv2.perspectiveTransform(p, H)[0][0]
    return int(m[0]), int(m[1])

# =====================================================
# ===================== MAIN ==========================
# =====================================================

def run_gesture_input():
    global last_finger_pos, finger_still_start
    global prev_touch_time, prev_clap_time

    mp_hands = mp.solutions.hands
    mp_draw  = mp.solutions.drawing_utils

    hands_touch = mp_hands.Hands(
        max_num_hands=1,
        model_complexity=MODEL_COMPLEXITY_TOUCH,
        min_detection_confidence=0.6,
        min_tracking_confidence=0.6
    )

    hands_gesture = mp_hands.Hands(
        max_num_hands=2,
        model_complexity=MODEL_COMPLEXITY_GESTURE,
        min_detection_confidence=0.6,
        min_tracking_confidence=0.6
    )

    cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX)
    cam_gesture = cv2.VideoCapture(GESTURE_CAM_INDEX)

    for cam in (cam_touch, cam_gesture):
        cam.set(cv2.CAP_PROP_FRAME_WIDTH, CAM_WIDTH)
        cam.set(cv2.CAP_PROP_FRAME_HEIGHT, CAM_HEIGHT)
        cam.set(cv2.CAP_PROP_FPS, CAM_FPS)

    while True:
        ok1, frame_touch = cam_touch.read()
        ok2, frame_gest  = cam_gesture.read()

        if not ok1 or not ok2:
            break

        frame_touch = cv2.flip(frame_touch, -1)
        frame_gest  = cv2.flip(frame_gest, 1)

        # ---------------- TOUCH ----------------
        rgb_t = cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB)
        res_t = hands_touch.process(rgb_t)
        th, tw, _ = frame_touch.shape

        if res_t.multi_hand_landmarks:
            lm = res_t.multi_hand_landmarks[0]
            mp_draw.draw_landmarks(frame_touch, lm, mp_hands.HAND_CONNECTIONS)

            if lm.landmark[8].y >= lm.landmark[5].y:
                fx = int(lm.landmark[8].x * tw)
                fy = int(lm.landmark[8].y * th)
                sx, sy = map_point_homography(fx, fy)

                now = time.time()
                cur = (fx, fy)

                if last_finger_pos is None:
                    last_finger_pos = cur
                    finger_still_start = now
                else:
                    dist = math.hypot(cur[0]-last_finger_pos[0], cur[1]-last_finger_pos[1])
                    if dist < MOVE_TOLERANCE:
                        if now - finger_still_start >= STILL_REQUIRED and now - prev_touch_time > TOUCH_COOLDOWN:
                            client.send_message("/touch", [sx, sy])
                            prev_touch_time = now
                            finger_still_start = now
                    else:
                        finger_still_start = now
                    last_finger_pos = cur

                cv2.circle(frame_touch, (fx, fy), 10, (0,255,0), -1)

        else:
            last_finger_pos = None

        # ---------------- CLAP ----------------
        rgb_g = cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB)
        res_g = hands_gesture.process(rgb_g)
        gh, gw, _ = frame_gest.shape

        if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2:
            h1, h2 = res_g.multi_hand_landmarks
            c1 = np.mean([[p.x*gw, p.y*gh] for p in h1.landmark], axis=0)
            c2 = np.mean([[p.x*gw, p.y*gh] for p in h2.landmark], axis=0)
            dist = np.linalg.norm(c2 - c1)

            if dist < CLAP_DISTANCE and time.time() - prev_clap_time > CLAP_COOLDOWN:
                prev_clap_time = time.time()
                client.send_message("/clap", 1)

        # ---------------- DISPLAY SCALING ----------------
        disp_touch = cv2.resize(frame_touch, (DISPLAY_WIDTH, DISPLAY_HEIGHT))
        disp_gest  = cv2.resize(frame_gest,  (DISPLAY_WIDTH, DISPLAY_HEIGHT))

        cv2.imshow("Touch Camera", disp_touch)
        cv2.imshow("Gesture Camera", disp_gest)

        if cv2.waitKey(5) & 0xFF == 27:
            break

    cam_touch.release()
    cam_gesture.release()
    cv2.destroyAllWindows()

# =====================================================
if __name__ == "__main__":
    run_gesture_input()