global_match_memory/gesture_input_osc_test2.py
2025-12-11 11:22:35 +01:00

210 lines
6.4 KiB
Python

import cv2
import mediapipe as mp
import numpy as np
import math, json
from pythonosc import udp_client
# ==================================================
# CAMERA / PERFORMANCE (INTEGER)
# ==================================================
TOUCH_CAM_INDEX = 1
GESTURE_CAM_INDEX = 0
CAMERA_WIDTH = 900
CAMERA_HEIGHT = 500
CAMERA_FPS = 18 # laptop-stabil
MODEL_COMPLEXITY = 1 # MUST be 0 on laptops
# ==================================================
# TOUCH (INTEGER / FRAME BASED)
# ==================================================
MOVE_TOLERANCE = 28
TOUCH_STILL_FRAMES = 18 # ~1s @ 18 FPS
TOUCH_COOLDOWN_FRAMES = 12
# ==================================================
# CLAP (INTEGER)
# ==================================================
CLAP_DISTANCE_THRESHOLD = 110
CLAP_COOLDOWN_FRAMES = 32
# ==================================================
# DISPLAY SETTINGS (INTEGER)
# 0 = OFF, 1 = 320x240, 2 = 480x360, 3 = 640x480
# ==================================================
DISPLAY_TOUCH_RES = 1
DISPLAY_GESTURE_RES = 1
DISPLAY_RES_MAP = {
1: (320, 240),
2: (480, 360),
3: (640, 480)
}
# ==================================================
# GAME / HOMOGRAPHY
# ==================================================
GAME_SCREEN_WIDTH = 900
GAME_SCREEN_HEIGHT = 600
# ==================================================
client = udp_client.SimpleUDPClient("127.0.0.1", 5005)
# ==================================================
# GLOBAL STATES
# ==================================================
last_finger_pos = None
still_frames = 0
touch_cooldown = 0
clap_cooldown = 0
# ==================================================
# LOAD CALIBRATION
# ==================================================
try:
with open("calibration.json", "r") as f:
CALIB_POINTS = json.load(f)
print("📐 Calibration loaded")
except:
CALIB_POINTS = None
print("⚠️ No calibration found")
H = None
if CALIB_POINTS:
src = np.array(CALIB_POINTS, dtype=np.float32)
dst = np.array([
[0, 0],
[GAME_SCREEN_WIDTH, 0],
[GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT],
[0, GAME_SCREEN_HEIGHT]
], dtype=np.float32)
H, _ = cv2.findHomography(src, dst)
def map_point(x, y):
if H is None:
return int(x), int(y)
p = np.array([[[x, y]]], dtype=np.float32)
m = cv2.perspectiveTransform(p, H)[0][0]
return int(m[0]), int(m[1])
# ==================================================
def run():
global last_finger_pos, still_frames
global touch_cooldown, clap_cooldown
mp_hands = mp.solutions.hands
hands_touch = mp_hands.Hands(
max_num_hands=1,
model_complexity=MODEL_COMPLEXITY,
min_detection_confidence=0.6
)
hands_gesture = mp_hands.Hands(
max_num_hands=2,
model_complexity=MODEL_COMPLEXITY,
min_detection_confidence=0.6
)
cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX)
cam_gest = cv2.VideoCapture(GESTURE_CAM_INDEX)
for cam in (cam_touch, cam_gest):
cam.set(cv2.CAP_PROP_FRAME_WIDTH, CAMERA_WIDTH)
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, CAMERA_HEIGHT)
cam.set(cv2.CAP_PROP_FPS, CAMERA_FPS)
frame_delay = int(1000 / CAMERA_FPS)
while True:
ok1, frame_touch = cam_touch.read()
ok2, frame_gest = cam_gest.read()
if not ok1 or not ok2:
break
frame_touch = cv2.flip(frame_touch, -1)
frame_gest = cv2.flip(frame_gest, 1)
# ================= TOUCH =================
rgb_t = cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB)
res_t = hands_touch.process(rgb_t)
h, w, _ = frame_touch.shape
if res_t.multi_hand_landmarks:
lm = res_t.multi_hand_landmarks[0]
if lm.landmark[8].y > lm.landmark[5].y:
fx = int(lm.landmark[8].x * w)
fy = int(lm.landmark[8].y * h)
sx, sy = map_point(fx, fy)
current_pos = (fx, fy)
if last_finger_pos is not None:
dist = math.hypot(
current_pos[0] - last_finger_pos[0],
current_pos[1] - last_finger_pos[1]
)
if dist < MOVE_TOLERANCE:
still_frames += 1
if still_frames >= TOUCH_STILL_FRAMES and touch_cooldown == 0:
client.send_message("/touch", [sx, sy])
print(f"👉 TOUCH {sx},{sy}")
touch_cooldown = TOUCH_COOLDOWN_FRAMES
still_frames = 0
else:
still_frames = 0
else:
still_frames = 0
last_finger_pos = current_pos
cv2.circle(frame_touch, (fx, fy), 6, (0, 255, 0), -1)
else:
last_finger_pos = None
still_frames = 0
if touch_cooldown > 0:
touch_cooldown -= 1
# ================= CLAP =================
rgb_g = cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB)
res_g = hands_gesture.process(rgb_g)
gh, gw, _ = frame_gest.shape
if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2:
h1, h2 = res_g.multi_hand_landmarks
x1 = np.mean([p.x for p in h1.landmark]) * gw
y1 = np.mean([p.y for p in h1.landmark]) * gh
x2 = np.mean([p.x for p in h2.landmark]) * gw
y2 = np.mean([p.y for p in h2.landmark]) * gh
if math.hypot(x2 - x1, y2 - y1) < CLAP_DISTANCE_THRESHOLD and clap_cooldown == 0:
client.send_message("/clap", 1)
print("👏 CLAP")
clap_cooldown = CLAP_COOLDOWN_FRAMES
if clap_cooldown > 0:
clap_cooldown -= 1
# ================= DISPLAY =================
if DISPLAY_TOUCH_RES > 0:
dw, dh = DISPLAY_RES_MAP[DISPLAY_TOUCH_RES]
cv2.imshow("Touch-Cam", cv2.resize(frame_touch, (dw, dh)))
if DISPLAY_GESTURE_RES > 0:
dw, dh = DISPLAY_RES_MAP[DISPLAY_GESTURE_RES]
cv2.imshow("Gesture-Cam", cv2.resize(frame_gest, (dw, dh)))
if cv2.waitKey(frame_delay) & 0xFF == 27:
break
cam_touch.release()
cam_gest.release()
cv2.destroyAllWindows()
# ==================================================
if __name__ == "__main__":
run()