global_match_memory/gesture_input_osc.py
2025-12-03 16:08:36 +01:00

198 lines
6.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import mediapipe as mp
import numpy as np
import math, time, json
from pythonosc import udp_client
# -------------------------------
# SETTINGS
# -------------------------------
TOUCH_CAM_INDEX = 1 # deine Touch-Kamera
GESTURE_CAM_INDEX = 0 # Clap/Gesture Kamera
GAME_SCREEN_WIDTH = 900
GAME_SCREEN_HEIGHT = 600
STILL_REQUIRED = 1.0 # Sekunden die der Finger stabil sein muss
MOVE_TOLERANCE = 25 # Bewegungsschwelle (Pixel)
client = udp_client.SimpleUDPClient("127.0.0.1", 5005)
# Globale Zustände
last_finger_pos = None
finger_still_start = None
prev_touch_time = 0.0
prev_clap_time = 0.0
# -------------------------------------
# KALIBRIERUNG LADEN + HOMOGRAPHIE
# -------------------------------------
try:
with open("calibration.json", "r") as f:
CALIB_POINTS = json.load(f)
print("📐 Kalibrierung geladen:", CALIB_POINTS)
except:
CALIB_POINTS = None
print("⚠️ Keine Kalibrierung gefunden benutze Rohkoordinaten!")
H = None
if CALIB_POINTS is not None:
src = np.array(CALIB_POINTS, dtype=np.float32)
dst = np.array([
[0, 0],
[GAME_SCREEN_WIDTH, 0],
[GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT],
[0, GAME_SCREEN_HEIGHT]
], dtype=np.float32)
H, _ = cv2.findHomography(src, dst)
print("📐 Homographie-Matrix berechnet!")
def map_point_homography(x, y):
""" Wandelt Kamera-Koordinaten → Bildschirmkoordinaten um """
global H
if H is None:
# fallback: KEINE Skalierung (Variante 1 bedeutet reines Homography)
return int(x), int(y)
p = np.array([[[x, y]]], dtype=np.float32)
mapped = cv2.perspectiveTransform(p, H)[0][0]
return int(mapped[0]), int(mapped[1])
# -----------------------------------------------------------------
def run_gesture_input():
global last_finger_pos, finger_still_start
global prev_touch_time, prev_clap_time
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
hands_touch = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.6)
hands_gesture = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.6)
cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX)#<--------------------------------------------------------------------------Flip old:frame_touch = cv2.flip(frame_touch, 1)
cam_gesture = cv2.VideoCapture(GESTURE_CAM_INDEX)
if not cam_touch.isOpened():
print("❌ Touch-Kamera konnte NICHT geöffnet werden!")
else:
print(f"Touch-Kamera geöffnet (Index {TOUCH_CAM_INDEX})")
if not cam_gesture.isOpened():
print("❌ Gesture-Kamera konnte NICHT geöffnet werden!")
else:
print(f"Gesture-Kamera geöffnet (Index {GESTURE_CAM_INDEX})")
clap_cooldown = 1.5
while True:
ok1, frame_touch = cam_touch.read()
ok2, frame_gest = cam_gesture.read()
if not ok1 or not ok2:
print("❌ Eine Kamera liefert kein Bild.")
break
frame_touch = cv2.flip(frame_touch, -1)
frame_gest = cv2.flip(frame_gest, 1)
rgb_t = cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB)
res_t = hands_touch.process(rgb_t)
th, tw, _ = frame_touch.shape
# -------------------------------------------------------------
# TOUCH detection
# -------------------------------------------------------------
if res_t.multi_hand_landmarks:
lm = res_t.multi_hand_landmarks[0]
mp_draw.draw_landmarks(frame_touch, lm, mp_hands.HAND_CONNECTIONS)
# Finger zeigt nach unten: landmark 8 tiefer als 5
if lm.landmark[8].y < lm.landmark[5].y:
last_finger_pos = None
finger_still_start = None
continue
fx = int(lm.landmark[8].x * tw)
fy = int(lm.landmark[8].y * th)
# → Homographie anwenden
sx, sy = map_point_homography(fx, fy)
now = time.time()
current_pos = (fx, fy)
# erster Messpunkt
if last_finger_pos is None:
last_finger_pos = current_pos
finger_still_start = now
else:
dist = math.hypot(current_pos[0] - last_finger_pos[0],
current_pos[1] - last_finger_pos[1])
if dist < MOVE_TOLERANCE:
if finger_still_start is None:
finger_still_start = now
else:
still_time = now - finger_still_start
if still_time >= STILL_REQUIRED and (now - prev_touch_time) > 0.5:
client.send_message("/touch", [sx, sy])
print(f"👉 TOUCH bei {sx},{sy} nach {still_time:.2f}s")
prev_touch_time = now
finger_still_start = None
else:
finger_still_start = now
last_finger_pos = current_pos
cv2.circle(frame_touch, (fx, fy), 10, (0, 255, 0), -1)
cv2.putText(frame_touch, f"{sx},{sy}", (fx + 10, fy - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)
else:
last_finger_pos = None
finger_still_start = None
# -------------------------------------------------------------
# GESTURE detection (clap)
# -------------------------------------------------------------
rgb_g = cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB)
res_g = hands_gesture.process(rgb_g)
gh, gw, _ = frame_gest.shape
if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2:
h1, h2 = res_g.multi_hand_landmarks
x1 = np.mean([p.x for p in h1.landmark]) * gw
y1 = np.mean([p.y for p in h1.landmark]) * gh
x2 = np.mean([p.x for p in h2.landmark]) * gw
y2 = np.mean([p.y for p in h2.landmark]) * gh
dist = math.hypot(x2 - x1, y2 - y1)
if dist < 100 and (time.time() - prev_clap_time) > clap_cooldown:
prev_clap_time = time.time()
client.send_message("/clap", 1)
print("👏 SEND /clap")
cv2.putText(frame_gest, "👏", (int(gw/2)-20, 80),
cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,255), 3)
cv2.imshow("Touch-Cam", frame_touch)
cv2.imshow("Gesture-Cam", frame_gest)
if cv2.waitKey(5) & 0xFF == 27:
break
cam_touch.release()
cam_gesture.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
run_gesture_input()