max dist calculation for eye tracking, new notebook for model training
This commit is contained in:
parent
c7295f310c
commit
fd7981f244
72
dataset_creation/maxDist.py
Normal file
72
dataset_creation/maxDist.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
import math
|
||||||
|
|
||||||
|
def fixation_radius_normalized(theta_deg: float,
|
||||||
|
distance_cm: float,
|
||||||
|
screen_width_cm: float,
|
||||||
|
screen_height_cm: float,
|
||||||
|
resolution_x: int,
|
||||||
|
resolution_y: int,
|
||||||
|
method: str = "max"):
|
||||||
|
"""
|
||||||
|
Berechnet den PyGaze-Fixationsradius für normierte Gaze-Daten in [0,1].
|
||||||
|
"""
|
||||||
|
# Schritt 1: visueller Winkel → physische Distanz (cm)
|
||||||
|
delta_cm = 2 * distance_cm * math.tan(math.radians(theta_deg) / 2)
|
||||||
|
|
||||||
|
# Schritt 2: physische Distanz → Pixel
|
||||||
|
delta_px_x = delta_cm * (resolution_x / screen_width_cm)
|
||||||
|
delta_px_y = delta_cm * (resolution_y / screen_height_cm)
|
||||||
|
|
||||||
|
# Pixelradius
|
||||||
|
if method == "max":
|
||||||
|
r_px = max(delta_px_x, delta_px_y)
|
||||||
|
else:
|
||||||
|
r_px = math.sqrt(delta_px_x**2 + delta_px_y**2)
|
||||||
|
|
||||||
|
# Schritt 3: Pixelradius → normierter Radius
|
||||||
|
r_norm_x = r_px / resolution_x
|
||||||
|
r_norm_y = r_px / resolution_y
|
||||||
|
|
||||||
|
if method == "max":
|
||||||
|
return max(r_norm_x, r_norm_y)
|
||||||
|
else:
|
||||||
|
return math.sqrt(r_norm_x**2 + r_norm_y**2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Beispiel: 55" 4k Monitor
|
||||||
|
screen_width_cm = 3*121.8
|
||||||
|
screen_height_cm = 68.5
|
||||||
|
resolution_x = 3*3840
|
||||||
|
resolution_y = 2160
|
||||||
|
distance_to_screen_cm = 120
|
||||||
|
method = 'max'
|
||||||
|
max_angle= 1.0
|
||||||
|
|
||||||
|
maxdist_px = fixation_radius_normalized(theta_deg=max_angle,
|
||||||
|
distance_cm=distance_to_screen_cm,
|
||||||
|
screen_width_cm=screen_width_cm,
|
||||||
|
screen_height_cm=screen_height_cm,
|
||||||
|
resolution_x=resolution_x,
|
||||||
|
resolution_y=resolution_y,
|
||||||
|
method=method)
|
||||||
|
|
||||||
|
print("PyGaze max_dist (max):", maxdist_px)
|
||||||
|
|
||||||
|
method = 'euclid'
|
||||||
|
maxdist_px = fixation_radius_normalized(theta_deg=max_angle,
|
||||||
|
distance_cm=distance_to_screen_cm,
|
||||||
|
screen_width_cm=screen_width_cm,
|
||||||
|
screen_height_cm=screen_height_cm,
|
||||||
|
resolution_x=resolution_x,
|
||||||
|
resolution_y=resolution_y,
|
||||||
|
method=method)
|
||||||
|
|
||||||
|
print("PyGaze max_dist (euclid):", maxdist_px)
|
||||||
|
|
||||||
|
# Passt noch nicht zu der Breite
|
||||||
|
# https://osdoc.cogsci.nl/4.0/de/visualangle/
|
||||||
|
# https://reference.org/facts/Visual_angle/LUw29zy7
|
||||||
323
model_training/VAE_SVM/vaesvm.ipynb
Normal file
323
model_training/VAE_SVM/vaesvm.ipynb
Normal file
@ -0,0 +1,323 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "708c9745",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Imports"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 61,
|
||||||
|
"id": "53b10294",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/home\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"ename": "ImportError",
|
||||||
|
"evalue": "cannot import name 'mad_outlier_removal' from 'Fahrsimulator_MSY2526_AI.model_training.tools' (unknown location)",
|
||||||
|
"output_type": "error",
|
||||||
|
"traceback": [
|
||||||
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||||
|
"\u001b[31mImportError\u001b[39m Traceback (most recent call last)",
|
||||||
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[61]\u001b[39m\u001b[32m, line 11\u001b[39m\n\u001b[32m 8\u001b[39m sys.path.append(base_dir)\n\u001b[32m 9\u001b[39m \u001b[38;5;28mprint\u001b[39m(base_dir)\n\u001b[32m---> \u001b[39m\u001b[32m11\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mFahrsimulator_MSY2526_AI\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mmodel_training\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mtools\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m evaluation_tools, scaler, mad_outlier_removal\n\u001b[32m 12\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpreprocessing\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m StandardScaler, MinMaxScaler\n\u001b[32m 13\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msklearn\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01msvm\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OneClassSVM\n",
|
||||||
|
"\u001b[31mImportError\u001b[39m: cannot import name 'mad_outlier_removal' from 'Fahrsimulator_MSY2526_AI.model_training.tools' (unknown location)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from pathlib import Path\n",
|
||||||
|
"import sys\n",
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
|
||||||
|
"sys.path.append(base_dir)\n",
|
||||||
|
"print(base_dir)\n",
|
||||||
|
"\n",
|
||||||
|
"from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal\n",
|
||||||
|
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
|
||||||
|
"from sklearn.svm import OneClassSVM\n",
|
||||||
|
"from sklearn.model_selection import GridSearchCV, KFold, ParameterGrid, train_test_split\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"import pickle\n",
|
||||||
|
"from sklearn.metrics import (roc_auc_score, accuracy_score, precision_score, \n",
|
||||||
|
" recall_score, f1_score, confusion_matrix, classification_report) "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "68101229",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### load Dataset"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"id": "24a765e8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/first_AU_dataset/output_windowed.parquet\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"id": "471001b0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"df = pd.read_parquet(path=dataset_path)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "0fdecdaa",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Load Performance data and Subject Split"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 32,
|
||||||
|
"id": "692d1b47",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"performance_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/subject_performance/3new_au_performance.csv\")\n",
|
||||||
|
"performance_df = pd.read_csv(performance_path)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 33,
|
||||||
|
"id": "ea617e3f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Subject IDs aus dem Haupt-Dataset nehmen\n",
|
||||||
|
"subjects_from_df = df[\"subjectID\"].unique()\n",
|
||||||
|
"\n",
|
||||||
|
"# Performance-Subset nur für vorhandene Subjects\n",
|
||||||
|
"perf_filtered = performance_df[\n",
|
||||||
|
" performance_df[\"subjectID\"].isin(subjects_from_df)\n",
|
||||||
|
"][[\"subjectID\", \"overall_score\"]]\n",
|
||||||
|
"\n",
|
||||||
|
"# Merge: nur Subjects, die sowohl im df als auch im Performance-CSV vorkommen\n",
|
||||||
|
"merged = (\n",
|
||||||
|
" pd.DataFrame({\"subjectID\": subjects_from_df})\n",
|
||||||
|
" .merge(perf_filtered, on=\"subjectID\", how=\"inner\")\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Sicherstellen, dass keine Scores fehlen\n",
|
||||||
|
"if merged[\"overall_score\"].isna().any():\n",
|
||||||
|
" raise ValueError(\"Es fehlen Score-Werte für manche Subjects.\")\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 52,
|
||||||
|
"id": "ae43df8d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Finale Score-Differenz: 0.0020961590397180485\n",
|
||||||
|
"Größe Gruppe 1: 6\n",
|
||||||
|
"Größe Gruppe 2: 12\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"merged_sorted = merged.sort_values(\"overall_score\", ascending=False).reset_index(drop=True)\n",
|
||||||
|
"\n",
|
||||||
|
"scores = merged_sorted[\"overall_score\"].values\n",
|
||||||
|
"n_total = len(merged_sorted)\n",
|
||||||
|
"n_small = n_total // 3\n",
|
||||||
|
"n_large = n_total - n_small\n",
|
||||||
|
"\n",
|
||||||
|
"# Schritt 1: zufällige Start-Aufteilung\n",
|
||||||
|
"idx = np.arange(n_total)\n",
|
||||||
|
"np.random.shuffle(idx)\n",
|
||||||
|
"\n",
|
||||||
|
"small_idx = idx[:n_small]\n",
|
||||||
|
"large_idx = idx[n_small:]\n",
|
||||||
|
"\n",
|
||||||
|
"def score_diff(small_idx, large_idx):\n",
|
||||||
|
" return abs(scores[small_idx].mean() - scores[large_idx].mean())\n",
|
||||||
|
"\n",
|
||||||
|
"diff = score_diff(small_idx, large_idx)\n",
|
||||||
|
"threshold = 0.01\n",
|
||||||
|
"max_iter = 100\n",
|
||||||
|
"count = 0\n",
|
||||||
|
"\n",
|
||||||
|
"# Schritt 2: random swaps bis Differenz klein genug\n",
|
||||||
|
"while diff > threshold and count < max_iter:\n",
|
||||||
|
" # Zwei zufällige Elemente auswählen\n",
|
||||||
|
" si = np.random.choice(small_idx)\n",
|
||||||
|
" li = np.random.choice(large_idx)\n",
|
||||||
|
" \n",
|
||||||
|
" # Tausch durchführen\n",
|
||||||
|
" new_small_idx = small_idx.copy()\n",
|
||||||
|
" new_large_idx = large_idx.copy()\n",
|
||||||
|
" \n",
|
||||||
|
" new_small_idx[new_small_idx == si] = li\n",
|
||||||
|
" new_large_idx[new_large_idx == li] = si\n",
|
||||||
|
"\n",
|
||||||
|
" # neue Differenz berechnen\n",
|
||||||
|
" new_diff = score_diff(new_small_idx, new_large_idx)\n",
|
||||||
|
"\n",
|
||||||
|
" # Swap akzeptieren, wenn es besser wird\n",
|
||||||
|
" if new_diff < diff:\n",
|
||||||
|
" small_idx = new_small_idx\n",
|
||||||
|
" large_idx = new_large_idx\n",
|
||||||
|
" diff = new_diff\n",
|
||||||
|
"\n",
|
||||||
|
" count += 1\n",
|
||||||
|
"\n",
|
||||||
|
"# Finalgruppen\n",
|
||||||
|
"group_small = merged_sorted.loc[small_idx].reset_index(drop=True)\n",
|
||||||
|
"group_large = merged_sorted.loc[large_idx].reset_index(drop=True)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"Finale Score-Differenz:\", diff)\n",
|
||||||
|
"print(\"Größe Gruppe 1:\", len(group_small))\n",
|
||||||
|
"print(\"Größe Gruppe 2:\", len(group_large))\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 53,
|
||||||
|
"id": "9d1b414e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"0.7895307985978888"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 53,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"group_large['overall_score'].mean()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 54,
|
||||||
|
"id": "fa71f9a5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"0.7874346395581707"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 54,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"group_small['overall_score'].mean()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 55,
|
||||||
|
"id": "79ecb4a2",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[22 4 26 16 3 11 18 14 24 13 9 28]\n",
|
||||||
|
"[ 5 6 29 0 7 17]\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"training_subjects = group_large['subjectID'].values\n",
|
||||||
|
"test_subjects = group_small['subjectID'].values\n",
|
||||||
|
"print(training_subjects)\n",
|
||||||
|
"print(test_subjects)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "4353f87c",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Data cleaning with mad"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 60,
|
||||||
|
"id": "76610052",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"(7115, 25)\n",
|
||||||
|
"(7320, 25)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# SET\n",
|
||||||
|
"threshold_mad = 100\n",
|
||||||
|
"column_praefix ='AU'\n",
|
||||||
|
"\n",
|
||||||
|
"au_columns = [col for col in df.columns if col.startswith(column_praefix)]\n",
|
||||||
|
"cleaned_df = mad_outlier_removal(df,columns=au_columns, threshold=threshold_mad)\n",
|
||||||
|
"print(cleaned_df.shape)\n",
|
||||||
|
"print(df.shape)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.10"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user