From fd7981f244dbf42a9ac7f2e4bf762e9e9d4706e4 Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 10 Dec 2025 19:30:25 +0100 Subject: [PATCH] max dist calculation for eye tracking, new notebook for model training --- dataset_creation/maxDist.py | 72 +++++++ model_training/VAE_SVM/vaesvm.ipynb | 323 ++++++++++++++++++++++++++++ 2 files changed, 395 insertions(+) create mode 100644 dataset_creation/maxDist.py create mode 100644 model_training/VAE_SVM/vaesvm.ipynb diff --git a/dataset_creation/maxDist.py b/dataset_creation/maxDist.py new file mode 100644 index 0000000..8242101 --- /dev/null +++ b/dataset_creation/maxDist.py @@ -0,0 +1,72 @@ +import math + +def fixation_radius_normalized(theta_deg: float, + distance_cm: float, + screen_width_cm: float, + screen_height_cm: float, + resolution_x: int, + resolution_y: int, + method: str = "max"): + """ + Berechnet den PyGaze-Fixationsradius für normierte Gaze-Daten in [0,1]. + """ + # Schritt 1: visueller Winkel → physische Distanz (cm) + delta_cm = 2 * distance_cm * math.tan(math.radians(theta_deg) / 2) + + # Schritt 2: physische Distanz → Pixel + delta_px_x = delta_cm * (resolution_x / screen_width_cm) + delta_px_y = delta_cm * (resolution_y / screen_height_cm) + + # Pixelradius + if method == "max": + r_px = max(delta_px_x, delta_px_y) + else: + r_px = math.sqrt(delta_px_x**2 + delta_px_y**2) + + # Schritt 3: Pixelradius → normierter Radius + r_norm_x = r_px / resolution_x + r_norm_y = r_px / resolution_y + + if method == "max": + return max(r_norm_x, r_norm_y) + else: + return math.sqrt(r_norm_x**2 + r_norm_y**2) + + + + + + +# Beispiel: 55" 4k Monitor +screen_width_cm = 3*121.8 +screen_height_cm = 68.5 +resolution_x = 3*3840 +resolution_y = 2160 +distance_to_screen_cm = 120 +method = 'max' +max_angle= 1.0 + +maxdist_px = fixation_radius_normalized(theta_deg=max_angle, + distance_cm=distance_to_screen_cm, + screen_width_cm=screen_width_cm, + screen_height_cm=screen_height_cm, + resolution_x=resolution_x, + resolution_y=resolution_y, + method=method) + +print("PyGaze max_dist (max):", maxdist_px) + +method = 'euclid' +maxdist_px = fixation_radius_normalized(theta_deg=max_angle, + distance_cm=distance_to_screen_cm, + screen_width_cm=screen_width_cm, + screen_height_cm=screen_height_cm, + resolution_x=resolution_x, + resolution_y=resolution_y, + method=method) + +print("PyGaze max_dist (euclid):", maxdist_px) + +# Passt noch nicht zu der Breite +# https://osdoc.cogsci.nl/4.0/de/visualangle/ +# https://reference.org/facts/Visual_angle/LUw29zy7 \ No newline at end of file diff --git a/model_training/VAE_SVM/vaesvm.ipynb b/model_training/VAE_SVM/vaesvm.ipynb new file mode 100644 index 0000000..80fc0ae --- /dev/null +++ b/model_training/VAE_SVM/vaesvm.ipynb @@ -0,0 +1,323 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "708c9745", + "metadata": {}, + "source": [ + "### Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "53b10294", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home\n" + ] + }, + { + "ename": "ImportError", + "evalue": "cannot import name 'mad_outlier_removal' from 'Fahrsimulator_MSY2526_AI.model_training.tools' (unknown location)", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mImportError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[61]\u001b[39m\u001b[32m, line 11\u001b[39m\n\u001b[32m 8\u001b[39m sys.path.append(base_dir)\n\u001b[32m 9\u001b[39m \u001b[38;5;28mprint\u001b[39m(base_dir)\n\u001b[32m---> \u001b[39m\u001b[32m11\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mFahrsimulator_MSY2526_AI\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmodel_training\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mtools\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m evaluation_tools, scaler, mad_outlier_removal\n\u001b[32m 12\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpreprocessing\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m StandardScaler, MinMaxScaler\n\u001b[32m 13\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msvm\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OneClassSVM\n", + "\u001b[31mImportError\u001b[39m: cannot import name 'mad_outlier_removal' from 'Fahrsimulator_MSY2526_AI.model_training.tools' (unknown location)" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from pathlib import Path\n", + "import sys\n", + "import os\n", + "\n", + "base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n", + "sys.path.append(base_dir)\n", + "print(base_dir)\n", + "\n", + "from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal\n", + "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", + "from sklearn.svm import OneClassSVM\n", + "from sklearn.model_selection import GridSearchCV, KFold, ParameterGrid, train_test_split\n", + "import matplotlib.pyplot as plt\n", + "import tensorflow as tf\n", + "import pickle\n", + "from sklearn.metrics import (roc_auc_score, accuracy_score, precision_score, \n", + " recall_score, f1_score, confusion_matrix, classification_report) " + ] + }, + { + "cell_type": "markdown", + "id": "68101229", + "metadata": {}, + "source": [ + "### load Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "24a765e8", + "metadata": {}, + "outputs": [], + "source": [ + "dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/first_AU_dataset/output_windowed.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "471001b0", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_parquet(path=dataset_path)" + ] + }, + { + "cell_type": "markdown", + "id": "0fdecdaa", + "metadata": {}, + "source": [ + "### Load Performance data and Subject Split" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "692d1b47", + "metadata": {}, + "outputs": [], + "source": [ + "performance_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/subject_performance/3new_au_performance.csv\")\n", + "performance_df = pd.read_csv(performance_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "ea617e3f", + "metadata": {}, + "outputs": [], + "source": [ + "# Subject IDs aus dem Haupt-Dataset nehmen\n", + "subjects_from_df = df[\"subjectID\"].unique()\n", + "\n", + "# Performance-Subset nur für vorhandene Subjects\n", + "perf_filtered = performance_df[\n", + " performance_df[\"subjectID\"].isin(subjects_from_df)\n", + "][[\"subjectID\", \"overall_score\"]]\n", + "\n", + "# Merge: nur Subjects, die sowohl im df als auch im Performance-CSV vorkommen\n", + "merged = (\n", + " pd.DataFrame({\"subjectID\": subjects_from_df})\n", + " .merge(perf_filtered, on=\"subjectID\", how=\"inner\")\n", + ")\n", + "\n", + "# Sicherstellen, dass keine Scores fehlen\n", + "if merged[\"overall_score\"].isna().any():\n", + " raise ValueError(\"Es fehlen Score-Werte für manche Subjects.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "ae43df8d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finale Score-Differenz: 0.0020961590397180485\n", + "Größe Gruppe 1: 6\n", + "Größe Gruppe 2: 12\n" + ] + } + ], + "source": [ + "merged_sorted = merged.sort_values(\"overall_score\", ascending=False).reset_index(drop=True)\n", + "\n", + "scores = merged_sorted[\"overall_score\"].values\n", + "n_total = len(merged_sorted)\n", + "n_small = n_total // 3\n", + "n_large = n_total - n_small\n", + "\n", + "# Schritt 1: zufällige Start-Aufteilung\n", + "idx = np.arange(n_total)\n", + "np.random.shuffle(idx)\n", + "\n", + "small_idx = idx[:n_small]\n", + "large_idx = idx[n_small:]\n", + "\n", + "def score_diff(small_idx, large_idx):\n", + " return abs(scores[small_idx].mean() - scores[large_idx].mean())\n", + "\n", + "diff = score_diff(small_idx, large_idx)\n", + "threshold = 0.01\n", + "max_iter = 100\n", + "count = 0\n", + "\n", + "# Schritt 2: random swaps bis Differenz klein genug\n", + "while diff > threshold and count < max_iter:\n", + " # Zwei zufällige Elemente auswählen\n", + " si = np.random.choice(small_idx)\n", + " li = np.random.choice(large_idx)\n", + " \n", + " # Tausch durchführen\n", + " new_small_idx = small_idx.copy()\n", + " new_large_idx = large_idx.copy()\n", + " \n", + " new_small_idx[new_small_idx == si] = li\n", + " new_large_idx[new_large_idx == li] = si\n", + "\n", + " # neue Differenz berechnen\n", + " new_diff = score_diff(new_small_idx, new_large_idx)\n", + "\n", + " # Swap akzeptieren, wenn es besser wird\n", + " if new_diff < diff:\n", + " small_idx = new_small_idx\n", + " large_idx = new_large_idx\n", + " diff = new_diff\n", + "\n", + " count += 1\n", + "\n", + "# Finalgruppen\n", + "group_small = merged_sorted.loc[small_idx].reset_index(drop=True)\n", + "group_large = merged_sorted.loc[large_idx].reset_index(drop=True)\n", + "\n", + "print(\"Finale Score-Differenz:\", diff)\n", + "print(\"Größe Gruppe 1:\", len(group_small))\n", + "print(\"Größe Gruppe 2:\", len(group_large))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "9d1b414e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7895307985978888" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "group_large['overall_score'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "fa71f9a5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7874346395581707" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "group_small['overall_score'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "79ecb4a2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[22 4 26 16 3 11 18 14 24 13 9 28]\n", + "[ 5 6 29 0 7 17]\n" + ] + } + ], + "source": [ + "training_subjects = group_large['subjectID'].values\n", + "test_subjects = group_small['subjectID'].values\n", + "print(training_subjects)\n", + "print(test_subjects)" + ] + }, + { + "cell_type": "markdown", + "id": "4353f87c", + "metadata": {}, + "source": [ + "### Data cleaning with mad" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "76610052", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(7115, 25)\n", + "(7320, 25)\n" + ] + } + ], + "source": [ + "# SET\n", + "threshold_mad = 100\n", + "column_praefix ='AU'\n", + "\n", + "au_columns = [col for col in df.columns if col.startswith(column_praefix)]\n", + "cleaned_df = mad_outlier_removal(df,columns=au_columns, threshold=threshold_mad)\n", + "print(cleaned_df.shape)\n", + "print(df.shape)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}