global_match_memory/gesture_input_osc_test2.py

import cv2
import mediapipe as mp
import numpy as np
import math, json
from pythonosc import udp_client

# ==================================================
# CAMERA / PERFORMANCE (INTEGER)
# ==================================================
TOUCH_CAM_INDEX   = 1
GESTURE_CAM_INDEX = 0

CAMERA_WIDTH  = 900
CAMERA_HEIGHT = 500
CAMERA_FPS    = 18            # laptop-stabil

MODEL_COMPLEXITY = 1           # MUST be 0 on laptops

# ==================================================
# TOUCH (INTEGER / FRAME BASED)
# ==================================================
MOVE_TOLERANCE         = 28
TOUCH_STILL_FRAMES     = 18    # ~1s @ 18 FPS
TOUCH_COOLDOWN_FRAMES  = 12

# ==================================================
# CLAP (INTEGER)
# ==================================================
CLAP_DISTANCE_THRESHOLD = 110
CLAP_COOLDOWN_FRAMES    = 32

# ==================================================
# DISPLAY SETTINGS (INTEGER)
# 0 = OFF, 1 = 320x240, 2 = 480x360, 3 = 640x480
# ==================================================
DISPLAY_TOUCH_RES   = 1
DISPLAY_GESTURE_RES = 1

DISPLAY_RES_MAP = {
    1: (320, 240),
    2: (480, 360),
    3: (640, 480)
}

# ==================================================
# GAME / HOMOGRAPHY
# ==================================================
GAME_SCREEN_WIDTH  = 900
GAME_SCREEN_HEIGHT = 600

# ==================================================
client = udp_client.SimpleUDPClient("127.0.0.1", 5005)

# ==================================================
# GLOBAL STATES
# ==================================================
last_finger_pos = None
still_frames = 0
touch_cooldown = 0
clap_cooldown  = 0

# ==================================================
# LOAD CALIBRATION
# ==================================================
try:
    with open("calibration.json", "r") as f:
        CALIB_POINTS = json.load(f)
    print("📐 Calibration loaded")
except:
    CALIB_POINTS = None
    print("⚠️ No calibration found")

H = None
if CALIB_POINTS:
    src = np.array(CALIB_POINTS, dtype=np.float32)
    dst = np.array([
        [0, 0],
        [GAME_SCREEN_WIDTH, 0],
        [GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT],
        [0, GAME_SCREEN_HEIGHT]
    ], dtype=np.float32)
    H, _ = cv2.findHomography(src, dst)

def map_point(x, y):
    if H is None:
        return int(x), int(y)
    p = np.array([[[x, y]]], dtype=np.float32)
    m = cv2.perspectiveTransform(p, H)[0][0]
    return int(m[0]), int(m[1])

# ==================================================
def run():
    global last_finger_pos, still_frames
    global touch_cooldown, clap_cooldown

    mp_hands = mp.solutions.hands

    hands_touch = mp_hands.Hands(
        max_num_hands=1,
        model_complexity=MODEL_COMPLEXITY,
        min_detection_confidence=0.6
    )

    hands_gesture = mp_hands.Hands(
        max_num_hands=2,
        model_complexity=MODEL_COMPLEXITY,
        min_detection_confidence=0.6
    )

    cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX)
    cam_gest  = cv2.VideoCapture(GESTURE_CAM_INDEX)

    for cam in (cam_touch, cam_gest):
        cam.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH)
        cam.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT)
        cam.set(cv2.CAP_PROP_FPS, CAMERA_FPS)

    frame_delay = int(1000 / CAMERA_FPS)

    while True:
        ok1, frame_touch = cam_touch.read()
        ok2, frame_gest  = cam_gest.read()
        if not ok1 or not ok2:
            break

        frame_touch = cv2.flip(frame_touch, -1)
        frame_gest  = cv2.flip(frame_gest, 1)

        # ================= TOUCH =================
        rgb_t = cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB)
        res_t = hands_touch.process(rgb_t)
        h, w, _ = frame_touch.shape

        if res_t.multi_hand_landmarks:
            lm = res_t.multi_hand_landmarks[0]

            if lm.landmark[8].y > lm.landmark[5].y:
                fx = int(lm.landmark[8].x * w)
                fy = int(lm.landmark[8].y * h)
                sx, sy = map_point(fx, fy)

                current_pos = (fx, fy)

                if last_finger_pos is not None:
                    dist = math.hypot(
                        current_pos[0] - last_finger_pos[0],
                        current_pos[1] - last_finger_pos[1]
                    )

                    if dist < MOVE_TOLERANCE:
                        still_frames += 1
                        if still_frames >= TOUCH_STILL_FRAMES and touch_cooldown == 0:
                            client.send_message("/touch", [sx, sy])
                            print(f"👉 TOUCH {sx},{sy}")
                            touch_cooldown = TOUCH_COOLDOWN_FRAMES
                            still_frames = 0
                    else:
                        still_frames = 0
                else:
                    still_frames = 0

                last_finger_pos = current_pos
                cv2.circle(frame_touch, (fx, fy), 6, (0, 255, 0), -1)
        else:
            last_finger_pos = None
            still_frames = 0

        if touch_cooldown > 0:
            touch_cooldown -= 1

        # ================= CLAP =================
        rgb_g = cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB)
        res_g = hands_gesture.process(rgb_g)
        gh, gw, _ = frame_gest.shape

        if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2:
            h1, h2 = res_g.multi_hand_landmarks
            x1 = np.mean([p.x for p in h1.landmark]) * gw
            y1 = np.mean([p.y for p in h1.landmark]) * gh
            x2 = np.mean([p.x for p in h2.landmark]) * gw
            y2 = np.mean([p.y for p in h2.landmark]) * gh

            if math.hypot(x2 - x1, y2 - y1) < CLAP_DISTANCE_THRESHOLD and clap_cooldown == 0:
                client.send_message("/clap", 1)
                print("👏 CLAP")
                clap_cooldown = CLAP_COOLDOWN_FRAMES

        if clap_cooldown > 0:
            clap_cooldown -= 1

        # ================= DISPLAY =================
        if DISPLAY_TOUCH_RES > 0:
            dw, dh = DISPLAY_RES_MAP[DISPLAY_TOUCH_RES]
            cv2.imshow("Touch-Cam", cv2.resize(frame_touch, (dw, dh)))

        if DISPLAY_GESTURE_RES > 0:
            dw, dh = DISPLAY_RES_MAP[DISPLAY_GESTURE_RES]
            cv2.imshow("Gesture-Cam", cv2.resize(frame_gest, (dw, dh)))

        if cv2.waitKey(frame_delay) & 0xFF == 27:
            break

    cam_touch.release()
    cam_gest.release()
    cv2.destroyAllWindows()

# ==================================================
if __name__ == "__main__":
    run()