global_match_memory/gesture_input_osc_new_test.py
2025-12-11 11:22:35 +01:00

187 lines
5.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import cv2
import mediapipe as mp
import numpy as np
import math, json
from pythonosc import udp_client
# --------------------------------------------------
# SETTINGS (INTEGER ONLY) An PC_Leistung anpassen
# --------------------------------------------------
TOUCH_CAM_INDEX = 1
GESTURE_CAM_INDEX = 0 #<--------Index_Kamera
CAMERA_WIDTH = 480 #<------Resolution
CAMERA_HEIGHT = 320
CAMERA_FPS = 18 # INT FPS
MODEL_COMPLEXITY = 0 # 0=fast, 1=normal, 2=accurate
MOVE_TOLERANCE = 28 # Pixel
TOUCH_STILL_FRAMES = 18 # stabile Frames für Touch
TOUCH_COOLDOWN_FRAMES = 12 # Cooldown nach Touch
CLAP_DISTANCE_THRESHOLD = 110
CLAP_COOLDOWN_FRAMES = 32 #<----- Optional?
GAME_SCREEN_WIDTH = 900
GAME_SCREEN_HEIGHT = 600 #<----------------Screens_Controll
# --------------------------------------------------
client = udp_client.SimpleUDPClient("127.0.0.1", 5005)
# --------------------------------------------------
# GLOBAL STATES
# --------------------------------------------------
last_finger_pos = None
still_frames = 0
touch_cooldown = 0
clap_cooldown = 0
# --------------------------------------------------
# LOAD CALIBRATION
# --------------------------------------------------
try:
with open("calibration.json", "r") as f:
CALIB_POINTS = json.load(f)
print("📐 Kalibrierung geladen")
except:
CALIB_POINTS = None
print("⚠️ Keine Kalibrierung Rohkoordinaten")
H = None
if CALIB_POINTS:
src = np.array(CALIB_POINTS, dtype=np.float32)
dst = np.array([
[0, 0],
[GAME_SCREEN_WIDTH, 0],
[GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT],
[0, GAME_SCREEN_HEIGHT]
], dtype=np.float32)
H, _ = cv2.findHomography(src, dst)
def map_point(x, y):
if H is None:
return int(x), int(y)
p = np.array([[[x, y]]], dtype=np.float32)
m = cv2.perspectiveTransform(p, H)[0][0]
return int(m[0]), int(m[1])
# --------------------------------------------------
def run():
global last_finger_pos, still_frames
global touch_cooldown, clap_cooldown
mp_hands = mp.solutions.hands
hands_touch = mp_hands.Hands(
max_num_hands=1,
model_complexity=MODEL_COMPLEXITY,
min_detection_confidence=0.6
)
hands_gesture = mp_hands.Hands(
max_num_hands=2,
model_complexity=MODEL_COMPLEXITY,
min_detection_confidence=0.6
)
cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX)
cam_gest = cv2.VideoCapture(GESTURE_CAM_INDEX)
for cam in (cam_touch, cam_gest):
cam.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT)
cam.set(cv2.CAP_PROP_FPS, CAMERA_FPS)
frame_delay = int(1000 / CAMERA_FPS)
while True:
ok1, frame_touch = cam_touch.read()
ok2, frame_gest = cam_gest.read()
if not ok1 or not ok2:
break
frame_touch = cv2.flip(frame_touch, -1)
frame_gest = cv2.flip(frame_gest, 1)
# ---------------- TOUCH ----------------
rgb_t = cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB)
res_t = hands_touch.process(rgb_t)
h, w, _ = frame_touch.shape
if res_t.multi_hand_landmarks:
lm = res_t.multi_hand_landmarks[0]
# Finger zeigt nach unten?
if lm.landmark[8].y > lm.landmark[5].y:
fx = int(lm.landmark[8].x * w)
fy = int(lm.landmark[8].y * h)
sx, sy = map_point(fx, fy)
current_pos = (fx, fy)
if last_finger_pos is None:
still_frames = 0
else:
dist = math.hypot(
current_pos[0] - last_finger_pos[0],
current_pos[1] - last_finger_pos[1]
)
if dist < MOVE_TOLERANCE:
still_frames += 1
if still_frames >= TOUCH_STILL_FRAMES and touch_cooldown == 0:
client.send_message("/touch", [sx, sy])
print(f"👉 TOUCH {sx},{sy}")
touch_cooldown = TOUCH_COOLDOWN_FRAMES
still_frames = 0
else:
still_frames = 0
last_finger_pos = current_pos
cv2.circle(frame_touch, (fx, fy), 8, (0, 255, 0), -1)
else:
last_finger_pos = None
still_frames = 0
if touch_cooldown > 0:
touch_cooldown -= 1
# ---------------- CLAP ----------------
rgb_g = cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB)
res_g = hands_gesture.process(rgb_g)
gh, gw, _ = frame_gest.shape
if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2:
h1, h2 = res_g.multi_hand_landmarks
x1 = np.mean([p.x for p in h1.landmark]) * gw
y1 = np.mean([p.y for p in h1.landmark]) * gh
x2 = np.mean([p.x for p in h2.landmark]) * gw
y2 = np.mean([p.y for p in h2.landmark]) * gh
dist = math.hypot(x2 - x1, y2 - y1)
if dist < CLAP_DISTANCE_THRESHOLD and clap_cooldown == 0:
client.send_message("/clap", 1)
print("👏 CLAP")
clap_cooldown = CLAP_COOLDOWN_FRAMES
if clap_cooldown > 0:
clap_cooldown -= 1
# ---------------- DISPLAY ----------------
cv2.imshow("Touch-Cam", frame_touch)
cv2.imshow("Gesture-Cam", frame_gest)
if cv2.waitKey(frame_delay) & 0xFF == 27:
break
cam_touch.release()
cam_gest.release()
cv2.destroyAllWindows()
# --------------------------------------------------
if __name__ == "__main__":
run()