global_match_memory/gesture_input_osc_tryfix.py

import cv2
import mediapipe as mp
import numpy as np
import math, time, json
from pythonosc import udp_client

# -------------------------------
# SETTINGS
# -------------------------------
TOUCH_CAM_INDEX = 0
GESTURE_CAM_INDEX = 1

GAME_SCREEN_WIDTH  = 900
GAME_SCREEN_HEIGHT = 600

STILL_REQUIRED = 1.0
MOVE_TOLERANCE = 25

TARGET_FPS = 20
FRAME_TIME = 1.0 / TARGET_FPS

client = udp_client.SimpleUDPClient("127.0.0.1", 5005)

# Global states
last_finger_pos = None
finger_still_start = None
prev_touch_time = 0.0
prev_clap_time = 0.0

# -------------------------------------
# LOAD CALIBRATION + HOMOGRAPHY
# -------------------------------------
try:
    with open("calibration.json", "r") as f:
        CALIB_POINTS = json.load(f)
    print("📐 Kalibrierung geladen:", CALIB_POINTS)
except:
    CALIB_POINTS = None
    print("⚠️ Keine Kalibrierung gefunden – benutze Rohkoordinaten!")

H = None
if CALIB_POINTS is not None:
    src = np.array(CALIB_POINTS, dtype=np.float32)
    dst = np.array([
        [0, 0],
        [GAME_SCREEN_WIDTH, 0],
        [GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT],
        [0, GAME_SCREEN_HEIGHT]
    ], dtype=np.float32)

    H, _ = cv2.findHomography(src, dst)
    print("📐 Homographie-Matrix berechnet!")

def map_point_homography(x, y):
    if H is None:
        return int(x), int(y)

    p = np.array([[[x, y]]], dtype=np.float32)
    mapped = cv2.perspectiveTransform(p, H)[0][0]
    return int(mapped[0]), int(mapped[1])

# -----------------------------------------------------------------

def run_gesture_input():
    global last_finger_pos, finger_still_start
    global prev_touch_time, prev_clap_time

    mp_hands = mp.solutions.hands
    mp_draw = mp.solutions.drawing_utils

    # FASTER HAND MODELS
    hands_touch = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.6,
                                 model_complexity=0)
    hands_gesture = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.6,
                                   model_complexity=0)

    # Cameras
    cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX)
    cam_gesture = cv2.VideoCapture(GESTURE_CAM_INDEX)

    # Set to 640x480
    for cam in (cam_touch, cam_gesture):
        cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
        cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

    if not cam_touch.isOpened():
        print("❌ Touch-Kamera konnte NICHT geöffnet werden!")
    if not cam_gesture.isOpened():
        print("❌ Gesture-Kamera konnte NICICHT geöffnet werden!")

    clap_cooldown = 1.5

    while True:
        loop_start = time.time()

        ok1, frame_touch = cam_touch.read()
        ok2, frame_gest  = cam_gesture.read()

        if not ok1 or not ok2:
            print("❌ Eine Kamera liefert kein Bild.")
            break

        # Flip für Orientierung
        frame_touch = cv2.flip(frame_touch, -1)
        frame_gest  = cv2.flip(frame_gest, 1)

        # ---------------- TOUCH detection ----------------
        rgb_t = cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB)
        res_t = hands_touch.process(rgb_t)
        th, tw, _ = frame_touch.shape

        if res_t.multi_hand_landmarks:
            lm = res_t.multi_hand_landmarks[0]

            # Finger zeigt nach unten?
            if lm.landmark[8].y < lm.landmark[5].y:
                last_finger_pos = None
                finger_still_start = None
                continue

            fx = int(lm.landmark[8].x * tw)
            fy = int(lm.landmark[8].y * th)

            sx, sy = map_point_homography(fx, fy)

            now = time.time()
            current_pos = (fx, fy)

            if last_finger_pos is None:
                last_finger_pos = current_pos
                finger_still_start = now
            else:
                dist = math.hypot(current_pos[0] - last_finger_pos[0],
                                  current_pos[1] - last_finger_pos[1])

                if dist < MOVE_TOLERANCE:
                    if finger_still_start is None:
                        finger_still_start = now
                    else:
                        still_time = now - finger_still_start
                        if still_time >= STILL_REQUIRED and (now - prev_touch_time) > 0.5:
                            client.send_message("/touch", [sx, sy])
                            print(f"👉 TOUCH bei {sx},{sy} nach {still_time:.2f}s")
                            prev_touch_time = now
                            finger_still_start = None
                else:
                    finger_still_start = now

                last_finger_pos = current_pos

            cv2.circle(frame_touch, (fx, fy), 10, (0, 255, 0), -1)
            cv2.putText(frame_touch, f"{sx},{sy}", (fx + 10, fy - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)

        else:
            last_finger_pos = None
            finger_still_start = None

        # ---------------- Clap detection ----------------
        rgb_g = cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB)
        res_g = hands_gesture.process(rgb_g)
        gh, gw, _ = frame_gest.shape

        if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2:
            h1, h2 = res_g.multi_hand_landmarks

            x1 = np.mean([p.x for p in h1.landmark]) * gw
            y1 = np.mean([p.y for p in h1.landmark]) * gh
            x2 = np.mean([p.x for p in h2.landmark]) * gw
            y2 = np.mean([p.y for p in h2.landmark]) * gh

            dist = math.hypot(x2 - x1, y2 - y1)

            if dist < 100 and (time.time() - prev_clap_time) > clap_cooldown:
                prev_clap_time = time.time()
                client.send_message("/clap", 1)
                print("👏 SEND /clap")
                cv2.putText(frame_gest, "👏", (int(gw/2)-20, 80),
                            cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,255), 3)

        # Display
        cv2.imshow("Touch-Cam", frame_touch)
        cv2.imshow("Gesture-Cam", frame_gest)

        # -------------- FPS LIMITER --------------
        elapsed = time.time() - loop_start
        sleep_time = FRAME_TIME - elapsed
        if sleep_time > 0:
            time.sleep(sleep_time)

        if cv2.waitKey(1) & 0xFF == 27:
            break

    cam_touch.release()
    cam_gesture.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    run_gesture_input()