global_match_memory/gesture_input_osc_tryfix.py
2025-12-11 11:22:35 +01:00

201 lines
6.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import mediapipe as mp
import numpy as np
import math, time, json
from pythonosc import udp_client
# -------------------------------
# SETTINGS
# -------------------------------
TOUCH_CAM_INDEX = 0
GESTURE_CAM_INDEX = 1
GAME_SCREEN_WIDTH = 900
GAME_SCREEN_HEIGHT = 600
STILL_REQUIRED = 1.0
MOVE_TOLERANCE = 25
TARGET_FPS = 20
FRAME_TIME = 1.0 / TARGET_FPS
client = udp_client.SimpleUDPClient("127.0.0.1", 5005)
# Global states
last_finger_pos = None
finger_still_start = None
prev_touch_time = 0.0
prev_clap_time = 0.0
# -------------------------------------
# LOAD CALIBRATION + HOMOGRAPHY
# -------------------------------------
try:
with open("calibration.json", "r") as f:
CALIB_POINTS = json.load(f)
print("📐 Kalibrierung geladen:", CALIB_POINTS)
except:
CALIB_POINTS = None
print("⚠️ Keine Kalibrierung gefunden benutze Rohkoordinaten!")
H = None
if CALIB_POINTS is not None:
src = np.array(CALIB_POINTS, dtype=np.float32)
dst = np.array([
[0, 0],
[GAME_SCREEN_WIDTH, 0],
[GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT],
[0, GAME_SCREEN_HEIGHT]
], dtype=np.float32)
H, _ = cv2.findHomography(src, dst)
print("📐 Homographie-Matrix berechnet!")
def map_point_homography(x, y):
if H is None:
return int(x), int(y)
p = np.array([[[x, y]]], dtype=np.float32)
mapped = cv2.perspectiveTransform(p, H)[0][0]
return int(mapped[0]), int(mapped[1])
# -----------------------------------------------------------------
def run_gesture_input():
global last_finger_pos, finger_still_start
global prev_touch_time, prev_clap_time
mp_hands = mp.solutions.hands
mp_draw = mp.solutions.drawing_utils
# FASTER HAND MODELS
hands_touch = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.6,
model_complexity=0)
hands_gesture = mp_hands.Hands(max_num_hands=2, min_detection_confidence=0.6,
model_complexity=0)
# Cameras
cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX)
cam_gesture = cv2.VideoCapture(GESTURE_CAM_INDEX)
# Set to 640x480
for cam in (cam_touch, cam_gesture):
cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
if not cam_touch.isOpened():
print("❌ Touch-Kamera konnte NICHT geöffnet werden!")
if not cam_gesture.isOpened():
print("❌ Gesture-Kamera konnte NICICHT geöffnet werden!")
clap_cooldown = 1.5
while True:
loop_start = time.time()
ok1, frame_touch = cam_touch.read()
ok2, frame_gest = cam_gesture.read()
if not ok1 or not ok2:
print("❌ Eine Kamera liefert kein Bild.")
break
# Flip für Orientierung
frame_touch = cv2.flip(frame_touch, -1)
frame_gest = cv2.flip(frame_gest, 1)
# ---------------- TOUCH detection ----------------
rgb_t = cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB)
res_t = hands_touch.process(rgb_t)
th, tw, _ = frame_touch.shape
if res_t.multi_hand_landmarks:
lm = res_t.multi_hand_landmarks[0]
# Finger zeigt nach unten?
if lm.landmark[8].y < lm.landmark[5].y:
last_finger_pos = None
finger_still_start = None
continue
fx = int(lm.landmark[8].x * tw)
fy = int(lm.landmark[8].y * th)
sx, sy = map_point_homography(fx, fy)
now = time.time()
current_pos = (fx, fy)
if last_finger_pos is None:
last_finger_pos = current_pos
finger_still_start = now
else:
dist = math.hypot(current_pos[0] - last_finger_pos[0],
current_pos[1] - last_finger_pos[1])
if dist < MOVE_TOLERANCE:
if finger_still_start is None:
finger_still_start = now
else:
still_time = now - finger_still_start
if still_time >= STILL_REQUIRED and (now - prev_touch_time) > 0.5:
client.send_message("/touch", [sx, sy])
print(f"👉 TOUCH bei {sx},{sy} nach {still_time:.2f}s")
prev_touch_time = now
finger_still_start = None
else:
finger_still_start = now
last_finger_pos = current_pos
cv2.circle(frame_touch, (fx, fy), 10, (0, 255, 0), -1)
cv2.putText(frame_touch, f"{sx},{sy}", (fx + 10, fy - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)
else:
last_finger_pos = None
finger_still_start = None
# ---------------- Clap detection ----------------
rgb_g = cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB)
res_g = hands_gesture.process(rgb_g)
gh, gw, _ = frame_gest.shape
if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2:
h1, h2 = res_g.multi_hand_landmarks
x1 = np.mean([p.x for p in h1.landmark]) * gw
y1 = np.mean([p.y for p in h1.landmark]) * gh
x2 = np.mean([p.x for p in h2.landmark]) * gw
y2 = np.mean([p.y for p in h2.landmark]) * gh
dist = math.hypot(x2 - x1, y2 - y1)
if dist < 100 and (time.time() - prev_clap_time) > clap_cooldown:
prev_clap_time = time.time()
client.send_message("/clap", 1)
print("👏 SEND /clap")
cv2.putText(frame_gest, "👏", (int(gw/2)-20, 80),
cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,255), 3)
# Display
cv2.imshow("Touch-Cam", frame_touch)
cv2.imshow("Gesture-Cam", frame_gest)
# -------------- FPS LIMITER --------------
elapsed = time.time() - loop_start
sleep_time = FRAME_TIME - elapsed
if sleep_time > 0:
time.sleep(sleep_time)
if cv2.waitKey(1) & 0xFF == 27:
break
cam_touch.release()
cam_gesture.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
run_gesture_input()