200 lines
5.9 KiB
Python
200 lines
5.9 KiB
Python
import cv2
|
|
import mediapipe as mp
|
|
import numpy as np
|
|
import math, time, json
|
|
from pythonosc import udp_client
|
|
|
|
# -------------------------------
|
|
# SETTINGS
|
|
# -------------------------------
|
|
TOUCH_CAM_INDEX = 1
|
|
GESTURE_CAM_INDEX = 0
|
|
|
|
GAME_SCREEN_WIDTH = 900 #900
|
|
GAME_SCREEN_HEIGHT = 600 #600
|
|
|
|
STILL_REQUIRED = 1.0
|
|
MOVE_TOLERANCE = 25
|
|
|
|
# -------------------------------
|
|
# CAMERA / PERFORMANCE SETTINGS
|
|
# -------------------------------
|
|
CAMERA_FPS = 15
|
|
DISPLAY_WIDTH = 320 #1280
|
|
DISPLAY_HEIGHT = 240 #720
|
|
|
|
MODEL_COMPLEXITY = 0 # ✅ 0=fast | 1=balanced | 2=accurate
|
|
|
|
client = udp_client.SimpleUDPClient("127.0.0.1", 5005)
|
|
|
|
# -------------------------------
|
|
# GLOBAL STATES
|
|
# -------------------------------
|
|
last_finger_pos = None
|
|
finger_still_start = None
|
|
prev_touch_time = 0.0
|
|
prev_clap_time = 0.0
|
|
|
|
# -------------------------------
|
|
# CALIBRATION + HOMOGRAPHY
|
|
# -------------------------------
|
|
try:
|
|
with open("calibration.json", "r") as f:
|
|
CALIB_POINTS = json.load(f)
|
|
print("📐 Calibration loaded:", CALIB_POINTS)
|
|
except:
|
|
CALIB_POINTS = None
|
|
print("⚠️ No calibration found")
|
|
|
|
H = None
|
|
if CALIB_POINTS is not None:
|
|
src = np.array(CALIB_POINTS, dtype=np.float32)
|
|
dst = np.array([
|
|
[0, 0],
|
|
[GAME_SCREEN_WIDTH, 0],
|
|
[GAME_SCREEN_WIDTH, GAME_SCREEN_HEIGHT],
|
|
[0, GAME_SCREEN_HEIGHT]
|
|
], dtype=np.float32)
|
|
H, _ = cv2.findHomography(src, dst)
|
|
print("📐 Homography ready")
|
|
|
|
|
|
def map_point_homography(x, y):
|
|
if H is None:
|
|
return int(x), int(y)
|
|
p = np.array([[[x, y]]], dtype=np.float32)
|
|
mapped = cv2.perspectiveTransform(p, H)[0][0]
|
|
return int(mapped[0]), int(mapped[1])
|
|
|
|
# -----------------------------------------------------------------
|
|
|
|
def run_gesture_input():
|
|
global last_finger_pos, finger_still_start
|
|
global prev_touch_time, prev_clap_time
|
|
|
|
mp_hands = mp.solutions.hands
|
|
mp_draw = mp.solutions.drawing_utils
|
|
|
|
# ✅ model_complexity applied here
|
|
hands_touch = mp_hands.Hands(
|
|
max_num_hands=1,
|
|
model_complexity=MODEL_COMPLEXITY,
|
|
min_detection_confidence=0.6,
|
|
min_tracking_confidence=0.6
|
|
)
|
|
|
|
hands_gesture = mp_hands.Hands(
|
|
max_num_hands=2,
|
|
model_complexity=MODEL_COMPLEXITY,
|
|
min_detection_confidence=0.6,
|
|
min_tracking_confidence=0.6
|
|
)
|
|
|
|
cam_touch = cv2.VideoCapture(TOUCH_CAM_INDEX)
|
|
cam_gesture = cv2.VideoCapture(GESTURE_CAM_INDEX)
|
|
|
|
for cam in (cam_touch, cam_gesture):
|
|
cam.set(cv2.CAP_PROP_FRAME_WIDTH, DISPLAY_WIDTH)
|
|
cam.set(cv2.CAP_PROP_FRAME_HEIGHT, DISPLAY_HEIGHT)
|
|
cam.set(cv2.CAP_PROP_FPS, CAMERA_FPS)
|
|
|
|
clap_cooldown = 1.5
|
|
frame_duration = 1.0 / CAMERA_FPS
|
|
last_frame_time = time.time()
|
|
|
|
while True:
|
|
ok1, frame_touch = cam_touch.read()
|
|
ok2, frame_gest = cam_gesture.read()
|
|
if not ok1 or not ok2:
|
|
break
|
|
|
|
frame_touch = cv2.flip(frame_touch, -1)
|
|
frame_gest = cv2.flip(frame_gest, 1)
|
|
|
|
# ---------------- TOUCH ----------------
|
|
res_t = hands_touch.process(cv2.cvtColor(frame_touch, cv2.COLOR_BGR2RGB))
|
|
th, tw, _ = frame_touch.shape
|
|
|
|
if res_t.multi_hand_landmarks:
|
|
lm = res_t.multi_hand_landmarks[0]
|
|
mp_draw.draw_landmarks(frame_touch, lm, mp_hands.HAND_CONNECTIONS)
|
|
|
|
if lm.landmark[8].y < lm.landmark[5].y:
|
|
last_finger_pos = None
|
|
finger_still_start = None
|
|
continue
|
|
|
|
fx = int(lm.landmark[8].x * tw)
|
|
fy = int(lm.landmark[8].y * th)
|
|
sx, sy = map_point_homography(fx, fy)
|
|
|
|
now = time.time()
|
|
curr = (fx, fy)
|
|
|
|
if last_finger_pos is None:
|
|
last_finger_pos = curr
|
|
finger_still_start = now
|
|
else:
|
|
dist = math.hypot(curr[0]-last_finger_pos[0],
|
|
curr[1]-last_finger_pos[1])
|
|
if dist < MOVE_TOLERANCE:
|
|
if finger_still_start and now-finger_still_start >= STILL_REQUIRED:
|
|
if now-prev_touch_time > 0.5:
|
|
client.send_message("/touch", [sx, sy])
|
|
print(f"👉 TOUCH {sx},{sy}")
|
|
prev_touch_time = now
|
|
finger_still_start = None
|
|
else:
|
|
finger_still_start = now
|
|
|
|
last_finger_pos = curr
|
|
|
|
cv2.circle(frame_touch, (fx, fy), 10, (0,255,0), -1)
|
|
|
|
else:
|
|
last_finger_pos = None
|
|
finger_still_start = None
|
|
|
|
# ---------------- CLAP ----------------
|
|
res_g = hands_gesture.process(cv2.cvtColor(frame_gest, cv2.COLOR_BGR2RGB))
|
|
gh, gw, _ = frame_gest.shape
|
|
|
|
if res_g.multi_hand_landmarks and len(res_g.multi_hand_landmarks) == 2:
|
|
h1, h2 = res_g.multi_hand_landmarks
|
|
x1 = np.mean([p.x for p in h1.landmark]) * gw
|
|
y1 = np.mean([p.y for p in h1.landmark]) * gh
|
|
x2 = np.mean([p.x for p in h2.landmark]) * gw
|
|
y2 = np.mean([p.y for p in h2.landmark]) * gh
|
|
dist = math.hypot(x2-x1, y2-y1)
|
|
|
|
if dist < 100 and time.time()-prev_clap_time > clap_cooldown:
|
|
prev_clap_time = time.time()
|
|
client.send_message("/clap", 1)
|
|
print("👏 CLAP")
|
|
|
|
cv2.putText(frame_touch,
|
|
f"FPS:{CAMERA_FPS} MC:{MODEL_COMPLEXITY}",
|
|
(10,30), cv2.FONT_HERSHEY_SIMPLEX,
|
|
0.8, (255,255,0), 2)
|
|
|
|
cv2.imshow("Touch-Cam", frame_touch)
|
|
cv2.imshow("Gesture-Cam", frame_gest)
|
|
|
|
# FPS limiter
|
|
sleep = frame_duration - (time.time()-last_frame_time)
|
|
if sleep > 0:
|
|
time.sleep(sleep)
|
|
last_frame_time = time.time()
|
|
|
|
if cv2.waitKey(1) & 0xFF == 27:
|
|
break
|
|
|
|
cam_touch.release()
|
|
cam_gesture.release()
|
|
cv2.destroyAllWindows()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run_gesture_input()
|
|
|