adjusted paths (this is the deployment setting)

scaler v2
small changes and lazy import of tensorflow
2026-02-16 20:11:07 +00:00 · 2026-02-16 19:32:32 +01:00 · 2026-02-16 18:58:18 +01:00 · 2026-02-16 17:07:49 +00:00 · 2026-02-16 18:06:07 +01:00 · 2026-01-31 17:51:27 +01:00
34 changed files with 724 additions and 15641 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,4 +3,5 @@
 !*.py
 !*.ipynb
 !*.md
+!*.parquet
 !.gitignore
--- a/EDA/EDA.ipynb
+++ b/EDA/EDA.ipynb
@ -1,259 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "7440a5b3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import h5py\n",
-    "import os\n",
-    "import warnings\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "from pathlib import Path"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2401aaef",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "file_path = \"adabase-public-0020-v_0_0_2.h5py\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "46280999",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "SKT_SR = 100\n",
-    "ECG_SR = 500\n",
-    "RSP_SR = 250\n",
-    "EMG_SR = 1000\n",
-    "EDA_SR = 500\n",
-    "EYE_SR = 250"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e23eb552",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_signals = pd.read_hdf(file_path, \"SIGNALS\", mode=\"r\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b7f494d1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pd.set_option('display.max_columns', None)\n",
-    "pd.set_option('display.max_rows', None)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dd2f4d84",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "settings = df_signals[['STUDY','PHASE','LEVEL']]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1699ddc2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "settings.value_counts()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a4731c56",
-   "metadata": {},
-   "source": [
-    "Actions units"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9db0b4b2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_signals.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3ceccc89",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "au_data = df_signals.iloc[:,-20:]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3d4ee088",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "au_data.tail()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5d85a8cb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(au_data.shape)\n",
-    "print(au_data.isna().sum())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "efff356f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "clean_au_data = au_data.dropna()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "42ed1bcd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "clean_au_data.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2c7c3f14",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for i in range(len(clean_au_data.columns)):\n",
-    "    print(clean_au_data.iloc[:,i].unique())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "332740a8",
-   "metadata": {},
-   "source": [
-    "Plots"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f30b8814",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# df_signals_ecg = pd.read_hdf(file_path, \"SIGNALS\", mode=\"r\", columns=[\"STUDY\",\"LEVEL\", \"PHASE\", 'RAW_ECG_I'])\n",
-    "df_signals_ecg = df_signals[[\"STUDY\",\"LEVEL\", \"PHASE\", 'RAW_ECG_I']]\n",
-    "df_signals_ecg.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ee80fd79",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "study_filter = df_signals[\"STUDY\"] == \"n-back\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3ef29446",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fig, ax = plt.subplots(figsize=(16, 2))\n",
-    "# Set the number of seconds to plot\n",
-    "seconds = 20\n",
-    "# Get the ECG signal data\n",
-    "ecg_signal = df_signals.loc[study_filter, \"RAW_ECG_I\"].dropna()\n",
-    "# Set the x-axis limits to the number of samples in the specified time range\n",
-    "num_samples = ECG_SR * seconds\n",
-    "# Plot the ECG signal\n",
-    "ax.plot(ecg_signal.index[:num_samples]/1000, ecg_signal[:num_samples]);\n",
-    "ax.set_title(\"ECG I\");\n",
-    "ax.set_xlabel('Seconds');\n",
-    "# Set figure size with a 16:6 aspect ratio\n",
-    "fig, ax = plt.subplots(figsize=(16, 2))\n",
-    "# Set the number of seconds to plot\n",
-    "start_second = 0\n",
-    "end_second = 60*30\n",
-    "# Get the EYE signal data - we replace inf with nan to get the original signal.␣\n",
-    "\n",
-    "eye_left_signal = df_signals.loc[study_filter, \"LEFT_PUPIL_DIAMETER\"].dropna()\n",
-    "eye_right_signal = df_signals.loc[study_filter, \"RIGHT_PUPIL_DIAMETER\"].dropna()\n",
-    "#eye_left_signal = df_signals.loc[:, \"LEFT_PUPIL_DIAMETER\"].replace([np.inf],␣\n",
-    "\n",
-    "#eye_right_signal = df_signals.loc[:, \"RIGHT_PUPIL_DIAMETER\"].replace([np.inf],␣\n",
-    "\n",
-    "# Set the x-axis limits to the number of samples in the specified time range\n",
-    "num_samples_start = EYE_SR * start_second\n",
-    "num_samples_end = EYE_SR * end_second\n",
-    "ax.plot(eye_left_signal.index[num_samples_start:num_samples_end]/1000,eye_left_signal[num_samples_start:num_samples_end], label=\"Left\")\n",
-    "ax.plot(eye_right_signal.index[num_samples_start:num_samples_end]/1000,eye_right_signal[num_samples_start:num_samples_end], label=\"Right\")\n",
-    "ax.set_title(\"Pupil Dilation\")\n",
-    "ax.set_xlabel('Seconds')\n",
-    "ax.legend()\n",
-    "\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/EDA/distribution_plots.ipynb
+++ b/EDA/distribution_plots.ipynb
@ -1,625 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "89d81009",
-   "metadata": {},
-   "source": [
-    "### Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7440a5b3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt\n",
-    "from pathlib import Path\n",
-    "from sklearn.preprocessing import StandardScaler, MinMaxScaler"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "09b7d707",
-   "metadata": {},
-   "source": [
-    "### Config"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2401aaef",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")\n",
-    "# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/60s_combined_dataset_25hz.parquet\")\n",
-    "# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/120s_combined_dataset_25hz.parquet\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0282b0b1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "FILTER_MAD = True\n",
-    "THRESHOLD = 3.5\n",
-    "METHOD = 'minmax'\n",
-    "SCOPE = 'subject'\n",
-    "FILTER_SUBSETS = True"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a8f1716b",
-   "metadata": {},
-   "source": [
-    "### Calculations"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ac32444a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.read_parquet(dataset_path)\n",
-    "df.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3ba4401c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if(FILTER_SUBSETS):\n",
-    "    # Special filter: Keep only specific subsets\n",
-    "# - k-drive L1 baseline\n",
-    "# - n-back L1 baseline  \n",
-    "# - k-drive test with levels 1, 2, 3\n",
-    "\n",
-    "    df = df[\n",
-    "        (\n",
-    "            # k-drive L1 baseline\n",
-    "            ((df['STUDY'] == 'k-drive') & \n",
-    "            (df['LEVEL'] == 1) & \n",
-    "            (df['PHASE'] == 'baseline'))\n",
-    "        ) | \n",
-    "        (\n",
-    "            # n-back L1 baseline\n",
-    "            ((df['STUDY'] == 'n-back') & \n",
-    "            (df['LEVEL'] == 1) & \n",
-    "            (df['PHASE'] == 'baseline'))\n",
-    "        ) | \n",
-    "        (\n",
-    "            # k-drive test with levels 1, 2, 3\n",
-    "            ((df['STUDY'] == 'k-drive') & \n",
-    "            (df['LEVEL'].isin([1, 2, 3])) & \n",
-    "            (df['PHASE'] == 'test'))\n",
-    "        )].copy()\n",
-    "\n",
-    "print(f\"Filtered dataframe shape: {df.shape}\")\n",
-    "print(f\"Remaining subsets: {df.groupby(['STUDY', 'LEVEL', 'PHASE']).size()}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "77dbd6df",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "face_au_cols = [c for c in df.columns if c.startswith(\"FACE_AU\")]\n",
-    "eye_cols =  ['Fix_count_short_66_150', 'Fix_count_medium_300_500',\n",
-    "       'Fix_count_long_gt_1000', 'Fix_count_100', 'Fix_mean_duration',\n",
-    "       'Fix_median_duration', 'Sac_count', 'Sac_mean_amp', 'Sac_mean_dur',\n",
-    "       'Sac_median_dur', 'Blink_count', 'Blink_mean_dur', 'Blink_median_dur',\n",
-    "       'Pupil_mean', 'Pupil_IPA']\n",
-    "eye_cols_without_blink =  ['Fix_count_short_66_150', 'Fix_count_medium_300_500',\n",
-    "       'Fix_count_long_gt_1000', 'Fix_count_100', 'Fix_mean_duration',\n",
-    "       'Fix_median_duration', 'Sac_count', 'Sac_mean_amp', 'Sac_mean_dur',\n",
-    "       'Sac_median_dur', 'Pupil_mean', 'Pupil_IPA']\n",
-    "print(len(eye_cols))\n",
-    "all_signal_columns = eye_cols+face_au_cols\n",
-    "print(len(all_signal_columns))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d5e9c67a",
-   "metadata": {},
-   "source": [
-    "MAD"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "592291ef",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def calculate_mad_params(df, columns):\n",
-    "    \"\"\"\n",
-    "    Calculate median and MAD parameters for each column.\n",
-    "    This should be run ONLY on the training data.\n",
-    "    \n",
-    "    Returns a dictionary: {col: (median, mad)}\n",
-    "    \"\"\"\n",
-    "    params = {}\n",
-    "    for col in columns:\n",
-    "        median = df[col].median()\n",
-    "        mad = np.median(np.abs(df[col] - median))\n",
-    "        params[col] = (median, mad)\n",
-    "    return params\n",
-    "def apply_mad_filter(df, params, threshold=3.5):\n",
-    "    \"\"\"\n",
-    "    Apply MAD-based outlier removal using precomputed parameters.\n",
-    "    Works on training, validation, and test data.\n",
-    "    \n",
-    "    df: DataFrame to filter\n",
-    "    params: dictionary {col: (median, mad)} from training data\n",
-    "    threshold: cutoff for robust Z-score\n",
-    "    \"\"\"\n",
-    "    df_clean = df.copy()\n",
-    "\n",
-    "    for col, (median, mad) in params.items():\n",
-    "        if mad == 0:\n",
-    "            continue  # no spread; nothing to remove for this column\n",
-    "\n",
-    "        robust_z = 0.6745 * (df_clean[col] - median) / mad\n",
-    "        outlier_mask = np.abs(robust_z) > threshold\n",
-    "\n",
-    "        # Remove values only in this specific column\n",
-    "        df_clean.loc[outlier_mask, col] = median\n",
-    "        \n",
-    "        \n",
-    "    print(df_clean.shape)\n",
-    "    return df_clean"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4ddad4a8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if(FILTER_MAD):\n",
-    "    mad_params = calculate_mad_params(df, all_signal_columns)\n",
-    "    df = apply_mad_filter(df, mad_params, THRESHOLD)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "89387879",
-   "metadata": {},
-   "source": [
-    "Normalizer"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9c129cdd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def fit_normalizer(train_data, au_columns, method='standard', scope='global'):\n",
-    "    \"\"\"\n",
-    "    Fit normalization scalers on training data.\n",
-    "    \n",
-    "    Parameters:\n",
-    "    -----------\n",
-    "    train_data : pd.DataFrame\n",
-    "        Training dataframe with AU columns and subjectID\n",
-    "    au_columns : list\n",
-    "        List of AU column names to normalize\n",
-    "    method : str, default='standard'\n",
-    "        Normalization method: 'standard' for StandardScaler or 'minmax' for MinMaxScaler\n",
-    "    scope : str, default='global'\n",
-    "        Normalization scope: 'subject' for per-subject or 'global' for across all subjects\n",
-    "    \n",
-    "    Returns:\n",
-    "    --------\n",
-    "    dict\n",
-    "        Dictionary containing fitted scalers and statistics for new subjects\n",
-    "    \"\"\"\n",
-    "    if method == 'standard':\n",
-    "        Scaler = StandardScaler\n",
-    "    elif method == 'minmax':\n",
-    "        Scaler = MinMaxScaler\n",
-    "    else:\n",
-    "        raise ValueError(\"method must be 'standard' or 'minmax'\")\n",
-    "    \n",
-    "    scalers = {}\n",
-    "    if scope == 'subject':\n",
-    "        # Fit one scaler per subject\n",
-    "        subject_stats = []\n",
-    "        \n",
-    "        for subject in train_data['subjectID'].unique():\n",
-    "            subject_mask = train_data['subjectID'] == subject\n",
-    "            scaler = Scaler()\n",
-    "            scaler.fit(train_data.loc[subject_mask, au_columns].values)\n",
-    "            scalers[subject] = scaler\n",
-    "            \n",
-    "            # Store statistics for averaging\n",
-    "            if method == 'standard':\n",
-    "                subject_stats.append({\n",
-    "                    'mean': scaler.mean_,\n",
-    "                    'std': scaler.scale_\n",
-    "                })\n",
-    "            elif method == 'minmax':\n",
-    "                subject_stats.append({\n",
-    "                    'min': scaler.data_min_,\n",
-    "                    'max': scaler.data_max_\n",
-    "                })\n",
-    "        \n",
-    "        # Calculate average statistics for new subjects\n",
-    "        if method == 'standard':\n",
-    "            avg_mean = np.mean([s['mean'] for s in subject_stats], axis=0)\n",
-    "            avg_std = np.mean([s['std'] for s in subject_stats], axis=0)\n",
-    "            fallback_scaler = StandardScaler()\n",
-    "            fallback_scaler.mean_ = avg_mean\n",
-    "            fallback_scaler.scale_ = avg_std\n",
-    "            fallback_scaler.var_ = avg_std ** 2\n",
-    "            fallback_scaler.n_features_in_ = len(au_columns)\n",
-    "        elif method == 'minmax':\n",
-    "            avg_min = np.mean([s['min'] for s in subject_stats], axis=0)\n",
-    "            avg_max = np.mean([s['max'] for s in subject_stats], axis=0)\n",
-    "            fallback_scaler = MinMaxScaler()\n",
-    "            fallback_scaler.data_min_ = avg_min\n",
-    "            fallback_scaler.data_max_ = avg_max\n",
-    "            fallback_scaler.data_range_ = avg_max - avg_min\n",
-    "            fallback_scaler.scale_ = 1.0 / fallback_scaler.data_range_\n",
-    "            fallback_scaler.min_ = -avg_min * fallback_scaler.scale_\n",
-    "            fallback_scaler.n_features_in_ = len(au_columns)\n",
-    "        \n",
-    "        scalers['_fallback'] = fallback_scaler\n",
-    "    \n",
-    "    elif scope == 'global':\n",
-    "        # Fit one scaler for all subjects\n",
-    "        scaler = Scaler()\n",
-    "        scaler.fit(train_data[au_columns].values)\n",
-    "        scalers['global'] = scaler\n",
-    "    \n",
-    "    else:\n",
-    "        raise ValueError(\"scope must be 'subject' or 'global'\")\n",
-    "    \n",
-    "    return {'scalers': scalers, 'method': method, 'scope': scope}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9cfabd37",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def apply_normalizer(data, columns, normalizer_dict):\n",
-    "    \"\"\"\n",
-    "    Apply fitted normalization scalers to data.\n",
-    "    \n",
-    "    Parameters:\n",
-    "    -----------\n",
-    "    data : pd.DataFrame\n",
-    "        Dataframe with AU columns and subjectID\n",
-    "    au_columns : list\n",
-    "        List of AU column names to normalize\n",
-    "    normalizer_dict : dict\n",
-    "        Dictionary containing fitted scalers from fit_normalizer()\n",
-    "    \n",
-    "    Returns:\n",
-    "    --------\n",
-    "    pd.DataFrame\n",
-    "        DataFrame with normalized AU columns\n",
-    "    \"\"\"\n",
-    "    normalized_data = data.copy()\n",
-    "    scalers = normalizer_dict['scalers']\n",
-    "    scope = normalizer_dict['scope']\n",
-    "    normalized_data[columns] = normalized_data[columns].astype(np.float64)\n",
-    "\n",
-    "    if scope == 'subject':\n",
-    "        # Apply per-subject normalization\n",
-    "        for subject in data['subjectID'].unique():\n",
-    "            subject_mask = data['subjectID'] == subject\n",
-    "            \n",
-    "            # Use the subject's scaler if available, otherwise use fallback\n",
-    "            if subject in scalers:\n",
-    "                scaler = scalers[subject]\n",
-    "            else:\n",
-    "                # Use averaged scaler for new subjects\n",
-    "                scaler = scalers['_fallback']\n",
-    "                print(f\"Info: Subject {subject} not in training data. Using averaged scaler from training subjects.\")\n",
-    "            \n",
-    "            normalized_data.loc[subject_mask, columns] = scaler.transform(\n",
-    "                data.loc[subject_mask, columns].values\n",
-    "            )\n",
-    "    \n",
-    "    elif scope == 'global':\n",
-    "        # Apply global normalization\n",
-    "        scaler = scalers['global']\n",
-    "        normalized_data[columns] = scaler.transform(data[columns].values)\n",
-    "    \n",
-    "    return normalized_data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4dbbebf7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "scaler = fit_normalizer(df, all_signal_columns, method=METHOD, scope=SCOPE)\n",
-    "df_min_max_normalised = apply_normalizer(df, all_signal_columns, scaler)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6b9b2ae8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "a= df_min_max_normalised[['STUDY','LEVEL','PHASE']]\n",
-    "print(a.dtypes)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e3e1bc34",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Define signal columns (adjust only once)\n",
-    "signal_columns = all_signal_columns\n",
-    "\n",
-    "# Get all unique combinations of STUDY, LEVEL and PHASE\n",
-    "unique_combinations = df_min_max_normalised[['STUDY', 'LEVEL', 'PHASE']].drop_duplicates().reset_index(drop=True)\n",
-    "\n",
-    "# Dictionary to store subsets\n",
-    "subsets = {}\n",
-    "subset_sizes = {}\n",
-    "\n",
-    "for idx, row in unique_combinations.iterrows():\n",
-    "    study = row['STUDY']\n",
-    "    level = row['LEVEL']\n",
-    "    phase = row['PHASE']\n",
-    "    key = f\"{study}_L{level}_P{phase}\"\n",
-    "    subset = df_min_max_normalised[\n",
-    "        (df_min_max_normalised['STUDY'] == study) & \n",
-    "        (df_min_max_normalised['LEVEL'] == level) & \n",
-    "        (df_min_max_normalised['PHASE'] == phase)\n",
-    "    ]\n",
-    "    subsets[key] = subset\n",
-    "    subset_sizes[key] = len(subset)\n",
-    "\n",
-    "# Output subset sizes\n",
-    "print(\"Number of samples per subset:\")\n",
-    "print(\"=\" * 40)\n",
-    "for key, size in subset_sizes.items():\n",
-    "    print(f\"{key}: {size} samples\")\n",
-    "print(\"=\" * 40)\n",
-    "print(f\"Total number of subsets: {len(subsets)}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c7fdeb5c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "# Function to categorize subsets\n",
-    "def categorize_subset(key):\n",
-    "    \"\"\"Categorizes a subset as 'low' or 'high' based on the given logic\"\"\"\n",
-    "    parts = key.split('_')\n",
-    "    study = parts[0]\n",
-    "    level = int(parts[1][1:])  # 'L1' -> 1\n",
-    "    phase = parts[2][1:]  # 'Pbaseline' -> 'baseline'\n",
-    "    \n",
-    "    # LOW: baseline OR (n-back with level 1 or 4)\n",
-    "    if phase == \"baseline\":\n",
-    "        return 'low'\n",
-    "    elif study == \"n-back\" and level in [1, 4]:\n",
-    "        return 'low'\n",
-    "    \n",
-    "    # HIGH: (n-back with level 2,3,5,6 and phase train/test) OR (k-drive not baseline)\n",
-    "    elif study == \"n-back\" and level in [2, 3, 5, 6] and phase in [\"train\", \"test\"]:\n",
-    "        return 'high'\n",
-    "    elif study == \"k-drive\" and phase != \"baseline\":\n",
-    "        return 'high'\n",
-    "    \n",
-    "    return None\n",
-    "\n",
-    "# Categorize subsets\n",
-    "low_subsets = {}\n",
-    "high_subsets = {}\n",
-    "\n",
-    "for key, subset in subsets.items():\n",
-    "    category = categorize_subset(key)\n",
-    "    if category == 'low':\n",
-    "        low_subsets[key] = subset\n",
-    "    elif category == 'high':\n",
-    "        high_subsets[key] = subset\n",
-    "\n",
-    "# Output statistics\n",
-    "print(\"\\n\" + \"=\" * 50)\n",
-    "print(\"SUBSET CATEGORIZATION\")\n",
-    "print(\"=\" * 50)\n",
-    "\n",
-    "print(\"\\nLOW subsets (Blue):\")\n",
-    "print(\"-\" * 50)\n",
-    "low_total = 0\n",
-    "for key in sorted(low_subsets.keys()):\n",
-    "    size = subset_sizes[key]\n",
-    "    low_total += size\n",
-    "    print(f\"  {key}: {size} samples\")\n",
-    "print(f\"{'TOTAL LOW:':<30} {low_total} samples\")\n",
-    "print(f\"{'NUMBER OF LOW SUBSETS:':<30} {len(low_subsets)}\")\n",
-    "\n",
-    "print(\"\\nHIGH subsets (Red):\")\n",
-    "print(\"-\" * 50)\n",
-    "high_total = 0\n",
-    "for key in sorted(high_subsets.keys()):\n",
-    "    size = subset_sizes[key]\n",
-    "    high_total += size\n",
-    "    print(f\"  {key}: {size} samples\")\n",
-    "print(f\"{'TOTAL HIGH:':<30} {high_total} samples\")\n",
-    "print(f\"{'NUMBER OF HIGH SUBSETS:':<30} {len(high_subsets)}\")\n",
-    "\n",
-    "print(\"\\n\" + \"=\" * 50)\n",
-    "print(f\"TOTAL SAMPLES: {low_total + high_total}\")\n",
-    "print(f\"TOTAL SUBSETS: {len(low_subsets) + len(high_subsets)}\")\n",
-    "print(\"=\" * 50)\n",
-    "\n",
-    "# Find minimum subset size\n",
-    "min_subset_size = min(subset_sizes.values())\n",
-    "print(f\"\\nMinimum subset size: {min_subset_size}\")\n",
-    "\n",
-    "# Number of points to plot per subset (50% of minimum size)\n",
-    "sampling_factor = 1\n",
-    "n_samples_per_subset = int(sampling_factor * min_subset_size)\n",
-    "print(f\"Number of randomly drawn points per subset: {n_samples_per_subset}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ff363fc5",
-   "metadata": {},
-   "source": [
-    "### Plot"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3a9d9163",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Create comparison plots\n",
-    "fig, axes = plt.subplots(len(signal_columns), 1, figsize=(14, 4 * len(signal_columns)))\n",
-    "\n",
-    "# If only one signal column exists, convert axes to list\n",
-    "if len(signal_columns) == 1:\n",
-    "    axes = [axes]\n",
-    "\n",
-    "# Create a plot for each signal column\n",
-    "for i, signal_col in enumerate(signal_columns):\n",
-    "    ax = axes[i]\n",
-    "    \n",
-    "    y_pos = 0\n",
-    "    labels = []\n",
-    "    \n",
-    "    # First plot all LOW subsets (sorted, blue)\n",
-    "    for label in sorted(low_subsets.keys()):\n",
-    "        subset = low_subsets[label]\n",
-    "        if len(subset) > 0 and signal_col in subset.columns:\n",
-    "            # Draw random sample\n",
-    "            n_samples = min(n_samples_per_subset, len(subset))\n",
-    "            sampled_data = subset[signal_col].sample(n=n_samples, random_state=42)\n",
-    "            \n",
-    "            # Calculate mean and median\n",
-    "            mean_val = subset[signal_col].mean()\n",
-    "            median_val = subset[signal_col].median()\n",
-    "            \n",
-    "            # Plot points in blue\n",
-    "            ax.scatter(sampled_data, [y_pos] * len(sampled_data), \n",
-    "                      alpha=0.5, s=30, color='blue')\n",
-    "            \n",
-    "            # Mean as black cross\n",
-    "            ax.plot(mean_val, y_pos, 'x', markersize=12, markeredgewidth=3, \n",
-    "                   color='black', zorder=5)\n",
-    "            \n",
-    "            # Median as brown cross\n",
-    "            ax.plot(median_val, y_pos, 'x', markersize=12, markeredgewidth=3, \n",
-    "                   color='brown', zorder=5)\n",
-    "            \n",
-    "            labels.append(f\"{label} (n={subset_sizes[label]})\")\n",
-    "            y_pos += 1\n",
-    "    \n",
-    "    # Separation line between LOW and HIGH\n",
-    "    if len(low_subsets) > 0 and len(high_subsets) > 0:\n",
-    "        ax.axhline(y=y_pos - 0.5, color='gray', linestyle='--', linewidth=2, alpha=0.7)\n",
-    "    \n",
-    "    # Then plot all HIGH subsets (sorted, red)\n",
-    "    for label in sorted(high_subsets.keys()):\n",
-    "        subset = high_subsets[label]\n",
-    "        if len(subset) > 0 and signal_col in subset.columns:\n",
-    "            # Draw random sample\n",
-    "            n_samples = min(n_samples_per_subset, len(subset))\n",
-    "            sampled_data = subset[signal_col].sample(n=n_samples, random_state=42)\n",
-    "            \n",
-    "            # Calculate mean and median\n",
-    "            mean_val = subset[signal_col].mean()\n",
-    "            median_val = subset[signal_col].median()\n",
-    "            \n",
-    "            # Plot points in red\n",
-    "            ax.scatter(sampled_data, [y_pos] * len(sampled_data), \n",
-    "                      alpha=0.5, s=30, color='red')\n",
-    "            \n",
-    "            # Mean as black cross\n",
-    "            ax.plot(mean_val, y_pos, 'x', markersize=12, markeredgewidth=3, \n",
-    "                   color='black', zorder=5)\n",
-    "            \n",
-    "            # Median as brown cross\n",
-    "            ax.plot(median_val, y_pos, 'x', markersize=12, markeredgewidth=3, \n",
-    "                   color='brown', zorder=5)\n",
-    "            \n",
-    "            labels.append(f\"{label} (n={subset_sizes[label]})\")\n",
-    "            y_pos += 1\n",
-    "    \n",
-    "    ax.set_yticks(range(len(labels)))\n",
-    "    ax.set_yticklabels(labels)\n",
-    "    ax.set_xlabel(f'{signal_col} value')\n",
-    "    ax.set_title(f'{signal_col}: LOW (Blue) vs HIGH (Red) | {n_samples_per_subset} points/subset | Black X = Mean, Brown X = Median')\n",
-    "    ax.grid(True, alpha=0.3, axis='x')\n",
-    "    ax.axvline(0, color='gray', linestyle='--', alpha=0.5)\n",
-    "\n",
-    "plt.tight_layout()\n",
-    "plt.show()\n",
-    "\n",
-    "print(f\"\\nNote: {n_samples_per_subset} random points were plotted per subset.\")\n",
-    "print(\"Blue points = LOW subsets | Red points = HIGH subsets\")\n",
-    "print(\"Black 'X' = Mean of entire subset | Brown 'X' = Median of entire subset\")\n",
-    "print(f\"Total subsets plotted: {len(low_subsets)} LOW + {len(high_subsets)} HIGH = {len(low_subsets) + len(high_subsets)} subsets\")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/EDA/histogramms.ipynb
+++ b/EDA/histogramms.ipynb
@ -1,166 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "1014c5e0",
-   "metadata": {},
-   "source": [
-    "Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e42f3011",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import matplotlib.pyplot as plt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0a834496",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "path =r\"C:\\Users\\micha\\FAUbox\\WS2526_Fahrsimulator_MSY (Celina Korzer)\\AU_dataset\\output_windowed.parquet\"\n",
-    "df = pd.read_parquet(path=path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "aa4759fa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "high_nback = df[\n",
-    "    (df[\"STUDY\"]==\"n-back\") &\n",
-    "    (df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n",
-    "    (df[\"PHASE\"].isin([\"train\", \"test\"]))\n",
-    "]\n",
-    "high_nback.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a2aa0596",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "low_all = df[\n",
-    "    ((df[\"PHASE\"] == \"baseline\") |\n",
-    "     ((df[\"STUDY\"] == \"n-back\") & (df[\"PHASE\"] != \"baseline\") & (df[\"LEVEL\"].isin([1,4]))))\n",
-    "]\n",
-    "print(low_all.shape)\n",
-    "high_kdrive = df[\n",
-    "    (df[\"STUDY\"] == \"k-drive\") & (df[\"PHASE\"] != \"baseline\")\n",
-    "]\n",
-    "print(high_kdrive.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f7d446a1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print((df.shape[0]==(high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0])))\n",
-    "print(df.shape[0])\n",
-    "print((high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "474e144a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "high_all = pd.concat([high_nback, high_kdrive])\n",
-    "high_all.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5dd585c2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.dtypes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0bd39d9f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Get all columns that start with 'AU'\n",
-    "au_columns = [col for col in low_all.columns if col.startswith('AU')]\n",
-    "\n",
-    "# Calculate number of rows and columns for subplots\n",
-    "n_cols = len(au_columns)\n",
-    "n_rows = 4\n",
-    "n_cols_subplot = 5\n",
-    "\n",
-    "# Create figure with subplots\n",
-    "fig, axes = plt.subplots(n_rows, n_cols_subplot, figsize=(20, 16))\n",
-    "axes = axes.flatten()\n",
-    "fig.suptitle('Action Unit (AU) Distributions: Low vs High', fontsize=20, fontweight='bold', y=0.995)\n",
-    "\n",
-    "# Create histogram for each AU column\n",
-    "for idx, col in enumerate(au_columns):\n",
-    "    ax = axes[idx]\n",
-    "    \n",
-    "    # Plot overlapping histograms\n",
-    "    ax.hist(low_all[col].dropna(), bins=30, alpha=0.6, color='blue', label='low_all', edgecolor='black')\n",
-    "    ax.hist(high_all[col].dropna(), bins=30, alpha=0.6, color='red', label='high_all', edgecolor='black')\n",
-    "    \n",
-    "    # Set title and labels\n",
-    "    ax.set_title(col, fontsize=10, fontweight='bold')\n",
-    "    ax.set_xlabel('Value', fontsize=8)\n",
-    "    ax.set_ylabel('Frequency', fontsize=8)\n",
-    "    ax.legend(fontsize=8)\n",
-    "    ax.grid(True, alpha=0.3)\n",
-    "\n",
-    "# Hide any unused subplots\n",
-    "for idx in range(len(au_columns), len(axes)):\n",
-    "    axes[idx].set_visible(False)\n",
-    "\n",
-    "# Adjust layout\n",
-    "plt.tight_layout()\n",
-    "plt.show()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/EDA/owncloud.ipynb
+++ b/EDA/owncloud.ipynb
@ -1,157 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "aab6b326-a583-47ad-8bb7-723c2fddcc63",
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "# %pip install pyocclient\n",
-    "import yaml\n",
-    "import owncloud\n",
-    "import pandas as pd\n",
-    "import time"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4f42846c-27c3-4394-a40a-e22d73c2902e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "start = time.time()\n",
-    "\n",
-    "with open(\"../login.yaml\") as f:\n",
-    "    cfg = yaml.safe_load(f)\n",
-    "url, password = cfg[0][\"url\"], cfg[1][\"password\"]\n",
-    "file = \"adabase-public-0022-v_0_0_2.h5py\"\n",
-    "oc = owncloud.Client.from_public_link(url, folder_password=password)\n",
-    "\n",
-    "\n",
-    "oc.get_file(file, \"tmp22.h5\")\n",
-    "\n",
-    "end = time.time()\n",
-    "print(end - start)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3714dec2-85d0-4f76-af46-ea45ebec2fa3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "start = time.time()\n",
-    "df_performance =  pd.read_hdf(\"tmp22.h5\", \"PERFORMANCE\")\n",
-    "end = time.time()\n",
-    "print(end - start)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f50e97d0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(22)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c131c816",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_performance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6ae47e52-ad86-4f8d-b929-0080dc99f646",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "start = time.time()\n",
-    "df_4_col =  pd.read_hdf(\"tmp.h5\", \"SIGNALS\", mode=\"r\", columns=[\"STUDY\"], start=0, stop=1)\n",
-    "end = time.time()\n",
-    "print(end - start)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7c139f3a-ede8-4530-957d-d1bb939f6cb5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_4_col.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a68d58ea-65f2-46c4-a2b2-8c3447c715d7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_4_col.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "95aa4523-3784-4ab6-bf92-0227ce60e863",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_4_col.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "defbcaf4-ad1b-453f-9b48-ab0ecfc4b5d5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_4_col.isna().sum()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "72313895-c478-44a5-9108-00b0bec01bb8",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/EDA/researchOnSubjectPerformance.ipynb
+++ b/EDA/researchOnSubjectPerformance.ipynb
@ -1,213 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "8fb02733",
-   "metadata": {},
-   "source": [
-    "Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "96f3b128",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import yaml\n",
-    "import owncloud\n",
-    "import pandas as pd\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c20cee7c",
-   "metadata": {},
-   "source": [
-    "Connection to Owncloud"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c4c94558",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Load credentials\n",
-    "with open(\"../login.yaml\") as f:\n",
-    "    cfg = yaml.safe_load(f)\n",
-    "   \n",
-    "url, password = cfg[0][\"url\"], cfg[1][\"password\"]\n",
-    "\n",
-    "# Connect once\n",
-    "oc = owncloud.Client.from_public_link(url, folder_password=password)\n",
-    "# File pattern\n",
-    "# base = \"adabase-public-{num:04d}-v_0_0_2.h5py\"\n",
-    "base = \"{num:04d}-*.h5py\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "07c03d07",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "num_files = 2  # number of files to process (min: 1, max: 30)\n",
-    "performance_data = []\n",
-    "\n",
-    "for i in range(num_files):\n",
-    "    file_pattern = f\"{i:04d}-*\"\n",
-    "    \n",
-    "    # Get list of files matching the pattern\n",
-    "    files = oc.list('.')\n",
-    "    matching_files = [f.get_name() for f in files if f.get_name().startswith(f\"{i:04d}-\")]\n",
-    "    \n",
-    "    if matching_files:\n",
-    "        file_name = matching_files[0]  # Take the first matching file\n",
-    "        local_tmp = f\"tmp_{i:04d}.h5\"\n",
-    "        \n",
-    "        oc.get_file(file_name, local_tmp)\n",
-    "        print(f\"{file_name} geöffnet\")\n",
-    "    else:\n",
-    "        print(f\"Keine Datei gefunden für Muster: {file_pattern}\")\n",
-    "    # file_name = base.format(num=i)\n",
-    "    # local_tmp = f\"tmp_{i:04d}.h5\"\n",
-    "\n",
-    "    # oc.get_file(file_name, local_tmp)\n",
-    "    # print(f\"{file_name} geöffnet\")\n",
-    "\n",
-    "    # check SIGNALS table for AUs\n",
-    "    with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
-    "        cols = store.select(\"SIGNALS\", start=0, stop=1).columns\n",
-    "    au_cols = [c for c in cols if c.startswith(\"AU\")]\n",
-    "    if not au_cols:\n",
-    "        print(f\"Subject {i} enthält keine AUs\")\n",
-    "        continue\n",
-    "\n",
-    "    # load performance table\n",
-    "    with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
-    "        perf_df = store.select(\"PERFORMANCE\")\n",
-    "\n",
-    "    f1_cols = [c for c in [\"AUDITIVE F1\", \"VISUAL F1\", \"F1\"] if c in perf_df.columns]\n",
-    "    if not f1_cols:\n",
-    "        print(f\"Subject {i}: keine F1-Spalten gefunden\")\n",
-    "        continue\n",
-    "\n",
-    "    subject_entry = {\"subjectID\": i}\n",
-    "    valid_scores = []\n",
-    "\n",
-    "    # iterate rows: each (study, level, phase)\n",
-    "    for _, row in perf_df.iterrows():\n",
-    "        study, level, phase = row[\"STUDY\"], row[\"LEVEL\"], row[\"PHASE\"]\n",
-    "        col_name = f\"STUDY_{study}_LEVEL_{level}_PHASE_{phase}\"\n",
-    "\n",
-    "        # collect valid F1 values among the three columns\n",
-    "        scores = [row[c] for c in f1_cols if pd.notna(row[c])]\n",
-    "        if scores:\n",
-    "            mean_score = float(np.mean(scores))\n",
-    "            subject_entry[col_name] = mean_score\n",
-    "            valid_scores.extend(scores)\n",
-    "\n",
-    "    # compute overall average across all valid combinations\n",
-    "    if valid_scores:\n",
-    "        subject_entry[\"overall_score\"] = float(np.mean(valid_scores))\n",
-    "        performance_data.append(subject_entry)\n",
-    "        print(f\"Subject {i}: {len(valid_scores)} gültige Scores, Overall = {subject_entry['overall_score']:.3f}\")\n",
-    "    else:\n",
-    "        print(f\"Subject {i}: keine gültigen F1-Scores\")\n",
-    "\n",
-    "# build dataframe\n",
-    "if performance_data:\n",
-    "    performance_df = pd.DataFrame(performance_data)\n",
-    "    combination_cols = sorted([c for c in performance_df.columns if c.startswith(\"STUDY_\")])\n",
-    "    final_cols = [\"subjectID\", \"overall_score\"] + combination_cols\n",
-    "    performance_df = performance_df[final_cols]\n",
-    "    performance_df.to_csv(\"n_au_performance.csv\", index=False)\n",
-    "\n",
-    "    print(f\"\\nGesamt Subjects mit Action Units: {len(performance_df)}\")\n",
-    "else:\n",
-    "    print(\"Keine gültigen Daten gefunden.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0bcaf065",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "performance_df.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "db95eea7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with pd.HDFStore(\"tmp_0000.h5\", mode=\"r\") as store:\n",
-    "    md = store.select(\"META\")\n",
-    "print(\"File 0:\")\n",
-    "print(md)\n",
-    "with pd.HDFStore(\"tmp_0001.h5\", mode=\"r\") as store:\n",
-    "    md = store.select(\"META\")\n",
-    "print(\"File 1\")\n",
-    "print(md)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8067036b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pd.set_option('display.max_columns', None)\n",
-    "pd.set_option('display.max_rows', None)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f18e7385",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with pd.HDFStore(\"tmp_0000.h5\", mode=\"r\") as store:\n",
-    "    md = store.select(\"SIGNALS\", start=0, stop=1)\n",
-    "print(\"File 0:\")\n",
-    "md.head()\n",
-    "# with pd.HDFStore(\"tmp_0001.h5\", mode=\"r\",start=0, stop=1) as store:\n",
-    "#     md = store.select(\"SIGNALS\")\n",
-    "# print(\"File 1\")\n",
-    "# print(md.columns)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/dataset_creation/AU_creation/AU_creation_service.py
+++ b/dataset_creation/AU_creation/AU_creation_service.py
@ -3,11 +3,11 @@ from feat.utils.io import get_test_data_path
 from moviepy.video.io.VideoFileClip import VideoFileClip
 import os

-def extract_aus(path, model, skip_frames):
+def extract_aus(path, model):
  detector = Detector(au_model=model)

  video_prediction = detector.detect(
-      path, data_type="video", skip_frames=skip_frames, face_detection_threshold=0.95 # alle 5 Sekunden einbeziehen - 24 Frames pro Sekunde
+      path, data_type="video", skip_frames=24*5, face_detection_threshold=0.95 # alle 5 Sekunden einbeziehen - 24 Frames pro Sekunde
  )

  return video_prediction.aus.sum()
@ -38,13 +38,13 @@ def split_video(path, chunk_length=120):

    return output_path

-# def start(path):
-#     results = []
-#     clips = split_video(path)
+def start(path):
+    results = []
+    clips = split_video(path)

-#     for clip in clips:
-#       results.append(extract_aus(clip, 'svm', 25*5))
-#     return results
+    for clip in clips:
+      results.append(extract_aus(clip, 'svm'))
+    return results

 if __name__ == "__main__":
  results = []
@ -53,6 +53,6 @@ if __name__ == "__main__":
  clips = split_video(test_video_path)

  for clippath in clips:
-    results.append(extract_aus(clippath, 'svm', 25*5))
+    results.append(extract_aus(clippath, 'svm'))

  print(results)
--- a/dataset_creation/camera_handling/camera_stream.py
+++ b/dataset_creation/camera_handling/camera_stream.py
@ -1,158 +0,0 @@
-import cv2
-import time
-import os
-import threading
-from datetime import datetime
-from feat import Detector
-import torch
-import pandas as pd
-
-# Import your helper functions
-# from db_helper import connect_db, disconnect_db, insert_rows_into_table, create_table
-import db_helper as db 
-
-
-# Konfiguration
-DB_PATH = "action_units.db" # TODO
-CAMERA_INDEX = 0
-OUTPUT_DIR = "recordings"
-VIDEO_DURATION = 50  # Sekunden
-START_INTERVAL = 5   # Sekunden bis zum nächsten Start
-FPS = 25.0           # Feste FPS
-
-if not os.path.exists(OUTPUT_DIR):
-    os.makedirs(OUTPUT_DIR)
-
-# Globaler Detector, um ihn nicht bei jedem Video neu laden zu müssen (spart massiv Zeit/Speicher)
-print("Initialisiere AU-Detector (bitte warten)...")
-detector = Detector(au_model="xgb") 
-
-def extract_aus(path, skip_frames):
-
-    # torch.no_grad() deaktiviert die Gradientenberechnung.
-    # Das löst den "Can't call numpy() on Tensor that requires grad" Fehler.
-    with torch.no_grad():
-        video_prediction = detector.detect_video(
-            path, 
-            skip_frames=skip_frames, 
-            face_detection_threshold=0.95
-        )
-        
-        # Falls video_prediction oder .aus noch Tensoren sind, 
-        # stellen wir sicher, dass sie korrekt summiert werden.
-        try:
-            # Wir nehmen die Summe der Action Units über alle detektierten Frames
-            res = video_prediction.aus.sum()
-            return res
-        except Exception as e:
-            print(f"Fehler bei der Summenbildung: {e}")
-            return None
-
-def startAU_creation(video_path, db_path):
-    """Diese Funktion läuft nun in einem eigenen Thread."""
-    try:
-        print(f"\n[THREAD START] Analyse läuft für: {video_path}")
-        # skip_frames berechnen (z.B. alle 5 Sekunden bei 25 FPS = 125)
-        output = extract_aus(video_path, skip_frames=int(FPS*5))
-        
-        print(f"\n--- Ergebnis für {os.path.basename(video_path)} ---")
-        print(output)
-        print("--------------------------------------------------\n")
-        if output is not None:
-            # Verbindung für diesen Thread öffnen (SQLite Sicherheit)
-            conn, cursor = db.connect_db(db_path)
-            
-            # Daten vorbereiten: Timestamp + AU Ergebnisse
-            # Wir wandeln die Series/Dataframe in ein Dictionary um
-            data_to_insert = output.to_dict()
-            data_to_insert['timestamp'] = [datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
-            
-            # Da die AU-Spaltennamen dynamisch sind, stellen wir sicher, dass sie Listen sind
-            # (insert_rows_into_table erwartet Listen für jeden Key)
-            final_payload = {k: [v] if not isinstance(v, list) else v for k, v in data_to_insert.items()}
-
-
-            db.insert_rows_into_table(conn, cursor, "actionUnits", final_payload)
-            
-            db.disconnect_db(conn, cursor)
-            print(f"--- Ergebnis für {os.path.basename(video_path)} in DB gespeichert ---")
-    except Exception as e:
-        print(f"Fehler bei der Analyse von {video_path}: {e}")
-
-class VideoRecorder:
-    def __init__(self, filename, width, height, db_path):
-        self.filename = filename
-        self.db_path = db_path
-        fourcc = cv2.VideoWriter_fourcc(*'XVID')
-        self.out = cv2.VideoWriter(filename, fourcc, FPS, (width, height))
-        self.frames_to_record = int(VIDEO_DURATION * FPS)
-        self.frames_count = 0
-        self.is_finished = False
-
-    def write_frame(self, frame):
-        if self.frames_count < self.frames_to_record:
-            self.out.write(frame)
-            self.frames_count += 1
-        else:
-            self.finish()
-
-    def finish(self):
-        if not self.is_finished:
-            self.out.release()
-            self.is_finished = True
-            abs_path = os.path.abspath(self.filename)
-            print(f"Video fertig gespeichert: {self.filename}")
-
-            # --- MULTITHREADING HIER ---
-            # Wir starten die Analyse in einem neuen Thread, damit main() sofort weiter frames lesen kann
-            analysis_thread = threading.Thread(target=startAU_creation, args=(abs_path, self.db_path))
-            analysis_thread.daemon = True  # Beendet sich, wenn das Hauptprogramm schließt
-            analysis_thread.start()
-
-def main():
-    cap = cv2.VideoCapture(CAMERA_INDEX)
-    if not cap.isOpened():
-        print("Fehler: Kamera konnte nicht geöffnet werden.")
-        return
-
-    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    
-    active_recorders = []
-    last_start_time = 0
-    
-    print("Aufnahme läuft. Drücke 'q' zum Beenden.")
-
-    try:
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-
-            current_time = time.time()
-
-            if current_time - last_start_time >= START_INTERVAL:
-                timestamp = datetime.now().strftime("%H%M%S")
-                filename = os.path.join(OUTPUT_DIR, f"rec_{timestamp}.avi")
-                new_recorder = VideoRecorder(filename, width, height, DB_PATH)
-                active_recorders.append(new_recorder)
-                last_start_time = current_time
-
-            for rec in active_recorders[:]:
-                rec.write_frame(frame)
-                if rec.is_finished:
-                    active_recorders.remove(rec)
-
-            cv2.imshow('Kamera Livestream', frame)
-            if cv2.waitKey(1) & 0xFF == ord('q'):
-                break
-
-            time.sleep(1/FPS)
-
-    finally:
-        cap.release()
-        cv2.destroyAllWindows()
-        print("Programm beendet. Warte ggf. auf laufende Analysen...")
-
-if __name__ == "__main__":
-    main()
--- a/dataset_creation/camera_handling/camera_stream_AU_and_ET.py
+++ b/dataset_creation/camera_handling/camera_stream_AU_and_ET.py
@ -1,296 +0,0 @@
-import cv2
-import time
-import os
-import threading
-from datetime import datetime
-from feat import Detector
-import torch
-import mediapipe as mp
-import csv
-
-# Konfiguration
-CAMERA_INDEX = 0
-OUTPUT_DIR = "recordings"
-VIDEO_DURATION = 10  # Sekunden
-START_INTERVAL = 5   # Sekunden bis zum nächsten Start
-FPS = 25.0           # Feste FPS
-
-if not os.path.exists(OUTPUT_DIR):
-    os.makedirs(OUTPUT_DIR)
-
-# Globaler Detector, um ihn nicht bei jedem Video neu laden zu müssen (spart massiv Zeit/Speicher)
-print("Initialisiere AU-Detector (bitte warten)...")
-detector = Detector(au_model="xgb") 
-
-# ===== MediaPipe FaceMesh Setup =====
-mp_face_mesh = mp.solutions.face_mesh
-face_mesh = mp_face_mesh.FaceMesh(
-    static_image_mode=False,
-    max_num_faces=1,
-    refine_landmarks=True,  # wichtig für Iris
-    min_detection_confidence=0.5,
-    min_tracking_confidence=0.5
-)
-
-LEFT_IRIS = [474, 475, 476, 477]
-RIGHT_IRIS = [469, 470, 471, 472]
-
-LEFT_EYE_LIDS = (159, 145)
-RIGHT_EYE_LIDS = (386, 374)
-
-LEFT_EYE_GAZE_IDXS = (33, 133, 159, 145)
-RIGHT_EYE_GAZE_IDXS = (263, 362, 386, 374)
-
-EYE_OPEN_THRESHOLD = 6
-
-# CSV vorbereiten
-gaze_csv = open("gaze_data.csv", mode="w", newline="")
-gaze_writer = csv.writer(gaze_csv)
-gaze_writer.writerow([
-    "timestamp",
-    "left_gaze_x",
-    "left_gaze_y",
-    "right_gaze_x",
-    "right_gaze_y",
-    "left_valid",
-    "right_valid",
-    "left_diameter",
-    "right_diameter"
-])
-
-def eye_openness(landmarks, top_idx, bottom_idx, img_height):
-    top = landmarks[top_idx]
-    bottom = landmarks[bottom_idx]
-    return abs(top.y - bottom.y) * img_height
-
-
-def compute_gaze(landmarks, iris_center, indices, w, h):
-    idx1, idx2, top_idx, bottom_idx = indices
-
-    p1 = landmarks[idx1]
-    p2 = landmarks[idx2]
-    top = landmarks[top_idx]
-    bottom = landmarks[bottom_idx]
-
-    x1 = p1.x * w
-    x2 = p2.x * w
-    y_top = top.y * h
-    y_bottom = bottom.y * h
-
-    iris_x, iris_y = iris_center
-
-    eye_left = min(x1, x2)
-    eye_right = max(x1, x2)
-
-    eye_width = eye_right - eye_left
-    eye_height = abs(y_bottom - y_top)
-
-    if eye_width == 0 or eye_height == 0:
-        return 0.5, 0.5
-
-    gaze_x = (iris_x - eye_left) / eye_width
-    gaze_y = (iris_y - min(y_top, y_bottom)) / eye_height
-
-    gaze_x = max(0, min(1, gaze_x))
-    gaze_y = max(0, min(1, gaze_y))
-
-    return gaze_x, gaze_y
-
-def extract_aus(path, skip_frames):
-
-    # torch.no_grad() deaktiviert die Gradientenberechnung.
-    # Das löst den "Can't call numpy() on Tensor that requires grad" Fehler.
-    with torch.no_grad():
-        video_prediction = detector.detect_video(
-            path, 
-            skip_frames=skip_frames, 
-            face_detection_threshold=0.95
-        )
-        
-        # Falls video_prediction oder .aus noch Tensoren sind, 
-        # stellen wir sicher, dass sie korrekt summiert werden.
-        try:
-            # Wir nehmen die Summe der Action Units über alle detektierten Frames
-            res = video_prediction.aus.sum()
-            return res
-        except Exception as e:
-            print(f"Fehler bei der Summenbildung: {e}")
-            return 0
-
-def startAU_creation(video_path):
-    """Diese Funktion läuft nun in einem eigenen Thread."""
-    try:
-        print(f"\n[THREAD START] Analyse läuft für: {video_path}")
-        # skip_frames berechnen (z.B. alle 5 Sekunden bei 25 FPS = 125)
-        output = extract_aus(video_path, skip_frames=int(FPS*5))
-        
-        print(f"\n--- Ergebnis für {os.path.basename(video_path)} ---")
-        print(output)
-        print("--------------------------------------------------\n")
-    except Exception as e:
-        print(f"Fehler bei der Analyse von {video_path}: {e}")
-
-class VideoRecorder:
-    def __init__(self, filename, width, height):
-        self.filename = filename
-        fourcc = cv2.VideoWriter_fourcc(*'XVID')
-        self.out = cv2.VideoWriter(filename, fourcc, FPS, (width, height))
-        self.frames_to_record = int(VIDEO_DURATION * FPS)
-        self.frames_count = 0
-        self.is_finished = False
-
-    def write_frame(self, frame):
-        if self.frames_count < self.frames_to_record:
-            self.out.write(frame)
-            self.frames_count += 1
-        else:
-            self.finish()
-
-    def finish(self):
-        if not self.is_finished:
-            self.out.release()
-            self.is_finished = True
-            abs_path = os.path.abspath(self.filename)
-            print(f"Video fertig gespeichert: {self.filename}")
-
-            # --- MULTITHREADING HIER ---
-            # Wir starten die Analyse in einem neuen Thread, damit main() sofort weiter frames lesen kann
-            analysis_thread = threading.Thread(target=startAU_creation, args=(abs_path,))
-            analysis_thread.daemon = True  # Beendet sich, wenn das Hauptprogramm schließt
-            analysis_thread.start()
-
-def main():
-    cap = cv2.VideoCapture(CAMERA_INDEX)
-    if not cap.isOpened():
-        print("Fehler: Kamera konnte nicht geöffnet werden.")
-        return
-
-    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    
-    active_recorders = []
-    last_start_time = 0
-    
-    print("Aufnahme läuft. Drücke 'q' zum Beenden.")
-
-    try:
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-
-            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            h, w, _ = frame.shape
-            results = face_mesh.process(rgb)
-
-            left_valid = 0
-            right_valid = 0
-            left_diameter = None
-            right_diameter = None
-
-            left_gaze_x = None
-            left_gaze_y = None
-            right_gaze_x = None
-            right_gaze_y = None
-
-            if results.multi_face_landmarks:
-                face_landmarks = results.multi_face_landmarks[0]
-
-                left_open = eye_openness(
-                    face_landmarks.landmark,
-                    LEFT_EYE_LIDS[0],
-                    LEFT_EYE_LIDS[1],
-                    h
-                )
-
-                right_open = eye_openness(
-                    face_landmarks.landmark,
-                    RIGHT_EYE_LIDS[0],
-                    RIGHT_EYE_LIDS[1],
-                    h
-                )
-
-                left_valid = 1 if left_open > EYE_OPEN_THRESHOLD else 0
-                right_valid = 1 if right_open > EYE_OPEN_THRESHOLD else 0
-
-                for eye_name, eye_indices in [("left", LEFT_IRIS), ("right", RIGHT_IRIS)]:
-                    iris_points = []
-
-                    for idx in eye_indices:
-                        lm = face_landmarks.landmark[idx]
-                        x_i, y_i = int(lm.x * w), int(lm.y * h)
-                        iris_points.append((x_i, y_i))
-
-                    if len(iris_points) == 4:
-                        cx = int(sum(p[0] for p in iris_points) / 4)
-                        cy = int(sum(p[1] for p in iris_points) / 4)
-
-                        radius = max(
-                            ((x - cx) ** 2 + (y - cy) ** 2) ** 0.5
-                            for (x, y) in iris_points
-                        )
-
-                        diameter = 2 * radius
-
-                        cv2.circle(frame, (cx, cy), int(radius), (0, 255, 0), 2)
-
-                        if eye_name == "left" and left_valid:
-                            left_diameter = diameter
-                            left_gaze_x, left_gaze_y = compute_gaze(
-                                face_landmarks.landmark,
-                                (cx, cy),
-                                LEFT_EYE_GAZE_IDXS,
-                                w, h
-                            )
-
-                        elif eye_name == "right" and right_valid:
-                            right_diameter = diameter
-                            right_gaze_x, right_gaze_y = compute_gaze(
-                                face_landmarks.landmark,
-                                (cx, cy),
-                                RIGHT_EYE_GAZE_IDXS,
-                                w, h
-                            )
-
-            # CSV schreiben
-            gaze_writer.writerow([
-                time.time(),
-                left_gaze_x,
-                left_gaze_y,
-                right_gaze_x,
-                right_gaze_y,
-                left_valid,
-                right_valid,
-                left_diameter,
-                right_diameter
-            ])
-
-            current_time = time.time()
-
-            if current_time - last_start_time >= START_INTERVAL:
-                timestamp = datetime.now().strftime("%H%M%S")
-                filename = os.path.join(OUTPUT_DIR, f"rec_{timestamp}.avi")
-                new_recorder = VideoRecorder(filename, width, height)
-                active_recorders.append(new_recorder)
-                last_start_time = current_time
-
-            for rec in active_recorders[:]:
-                rec.write_frame(frame)
-                if rec.is_finished:
-                    active_recorders.remove(rec)
-
-            cv2.imshow('Kamera Livestream', frame)
-            if cv2.waitKey(1) & 0xFF == ord('q'):
-                break
-
-            time.sleep(1/FPS)
-
-    finally:
-        gaze_csv.close()
-        face_mesh.close()
-        cap.release()
-        cv2.destroyAllWindows()
-        print("Programm beendet. Warte ggf. auf laufende Analysen...")
-
-if __name__ == "__main__":
-    main()
--- a/dataset_creation/camera_handling/camera_stream_AU_and_ET_new.py
+++ b/dataset_creation/camera_handling/camera_stream_AU_and_ET_new.py
@ -1,372 +0,0 @@
-import warnings
-
-warnings.filterwarnings(
-    "ignore",
-    message=r".*SymbolDatabase\.GetPrototype\(\) is deprecated.*",
-    category=UserWarning,
-    module=r"google\.protobuf\.symbol_database"
-)
-import cv2
-import time
-import os
-import threading
-from datetime import datetime
-from feat import Detector
-import torch
-import mediapipe as mp
-import pandas as pd
-from pathlib import Path
-from eyeFeature_new import compute_features_from_parquet
-
-# Import your helper functions
-# from db_helper import connect_db, disconnect_db, insert_rows_into_table, create_table
-import db_helper as db 
-
-# Konfiguration
-DB_PATH = Path("~/MSY_FS/databases/database.sqlite").expanduser()
-CAMERA_INDEX = 0
-OUTPUT_DIR = "recordings"
-VIDEO_DURATION = 50  # Sekunden
-START_INTERVAL = 5   # Sekunden bis zum nächsten Start
-FPS = 25.0           # Feste FPS
-
-eye_tracking_features = {}
-
-if not os.path.exists(OUTPUT_DIR):
-    os.makedirs(OUTPUT_DIR)
-
-# Globaler Detector, um ihn nicht bei jedem Video neu laden zu müssen (spart massiv Zeit/Speicher)
-print("Initialisiere AU-Detector (bitte warten)...")
-detector = Detector(au_model="xgb") 
-
-# ===== MediaPipe FaceMesh Setup =====
-mp_face_mesh = mp.solutions.face_mesh
-face_mesh = mp_face_mesh.FaceMesh(
-    static_image_mode=False,
-    max_num_faces=1,
-    refine_landmarks=True,  # wichtig für Iris
-    min_detection_confidence=0.5,
-    min_tracking_confidence=0.5
-)
-
-LEFT_IRIS = [474, 475, 476, 477]
-RIGHT_IRIS = [469, 470, 471, 472]
-
-LEFT_EYE_LIDS = (159, 145)
-RIGHT_EYE_LIDS = (386, 374)
-
-EYE_OPEN_THRESHOLD = 6
-
-LEFT_EYE_ALL = [33, 7, 163, 144, 145, 153, 154, 155,
-                133, 173, 157, 158, 159, 160, 161, 246
-]
-
-RIGHT_EYE_ALL = [263, 249, 390, 373, 374, 380, 381, 382,
-                 362, 398, 384, 385, 386, 387, 388, 466
-]
-
-
-
-def eye_openness(landmarks, top_idx, bottom_idx, img_height):
-    top = landmarks[top_idx]
-    bottom = landmarks[bottom_idx]
-    return abs(top.y - bottom.y) * img_height
-
-
-def compute_gaze(landmarks, iris_center, eye_indices, w, h):
-    iris_x, iris_y = iris_center
-
-    eye_points = []
-    for idx in eye_indices:
-        lm = landmarks[idx]
-        eye_points.append((lm.x * w, lm.y * h))
-    
-    xs = [p[0] for p in eye_points]
-    ys = [p[1] for p in eye_points]
-
-    eye_left = min(xs)
-    eye_right = max(xs)
-    eye_top = min(ys)
-    eye_bottom = max(ys)
-
-    eye_width = eye_right - eye_left
-    eye_height = eye_bottom - eye_top
-
-    if eye_width < 1 or eye_height < 1:
-        return 0.5, 0.5
-
-    gaze_x = (iris_x - eye_left) / eye_width
-    gaze_y = (iris_y - eye_top) / eye_height
-
-    return gaze_x, gaze_y
-
-def extract_aus(path, skip_frames):
-
-    # torch.no_grad() deaktiviert die Gradientenberechnung.
-    # Das löst den "Can't call numpy() on Tensor that requires grad" Fehler.
-    with torch.no_grad():
-        video_prediction = detector.detect_video(
-            path, 
-            skip_frames=skip_frames, 
-            face_detection_threshold=0.95
-        )
-        
-        # Falls video_prediction oder .aus noch Tensoren sind, 
-        # stellen wir sicher, dass sie korrekt summiert werden.
-        try:
-            # Wir nehmen die Summe der Action Units über alle detektierten Frames
-            res = video_prediction.aus.mean()
-            return res
-        except Exception as e:
-            print(f"Fehler bei der Summenbildung: {e}")
-            return None
-
-def startAU_creation(video_path, db_path):
-    """Diese Funktion läuft nun in einem eigenen Thread."""
-    try:
-        print(f"\n[THREAD START] Analyse läuft für: {video_path}")
-        # skip_frames berechnen (z.B. alle 5 Sekunden bei 25 FPS = 125)
-        output = extract_aus(video_path, skip_frames=int(FPS*5))
-        
-        print(f"\n--- Ergebnis für {os.path.basename(video_path)} ---")
-        print(output)
-        print("--------------------------------------------------\n")
-        if output is not None:
-            # Verbindung für diesen Thread öffnen (SQLite Sicherheit)
-            conn, cursor = db.connect_db(db_path)
-            
-            # Daten vorbereiten: Timestamp + AU Ergebnisse
-            # Wir wandeln die Series/Dataframe in ein Dictionary um
-            data_to_insert = output.to_dict()
-
-            data_to_insert = {
-                f"FACE_{k}_mean": v for k, v in data_to_insert.items()
-            }
-
-            now = datetime.now()
-            ticks = int(time.mktime(now.timetuple()))
-
-            data_to_insert['start_time'] = [ticks]
-            data_to_insert = data_to_insert | eye_tracking_features
-
-            #data_to_insert['start_time'] = [datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
-            
-            # Da die AU-Spaltennamen dynamisch sind, stellen wir sicher, dass sie Listen sind
-            # (insert_rows_into_table erwartet Listen für jeden Key)
-            final_payload = {k: [v] if not isinstance(v, list) else v for k, v in data_to_insert.items()}
-
-
-            db.insert_rows_into_table(conn, cursor, "feature_table", final_payload)
-            
-            db.disconnect_db(conn, cursor)
-            print(f"--- Ergebnis für {os.path.basename(video_path)} in DB gespeichert ---")
-            os.remove(video_path)
-            os.remove(video_path.replace(".avi", "_gaze.parquet"))
-            print(f"Löschen der Datei: {video_path}")
-    except Exception as e:
-        print(f"Fehler bei der Analyse von {video_path}: {e}")
-
-
-class VideoRecorder:
-    def __init__(self, filename, width, height, db_path):
-        self.gaze_data = []
-        self.filename = filename
-        self.db_path = db_path
-        fourcc = cv2.VideoWriter_fourcc(*'XVID')
-        self.out = cv2.VideoWriter(filename, fourcc, FPS, (width, height))
-        self.frames_to_record = int(VIDEO_DURATION * FPS)
-        self.frames_count = 0
-        self.is_finished = False
-
-    def write_frame(self, frame):
-        if self.frames_count < self.frames_to_record:
-            self.out.write(frame)
-            self.frames_count += 1
-        else:
-            self.finish()
-
-    def finish(self):
-        if not self.is_finished:
-            self.out.release()
-            self.is_finished = True
-            abs_path = os.path.abspath(self.filename)
-            print(f"Video fertig gespeichert: {self.filename}")
-
-            # --- MULTITHREADING HIER ---
-            # Wir starten die Analyse in einem neuen Thread, damit main() sofort weiter frames lesen kann
-            analysis_thread = threading.Thread(target=startAU_creation, args=(abs_path, self.db_path))
-            analysis_thread.daemon = True  # Beendet sich, wenn das Hauptprogramm schließt
-            analysis_thread.start()
-
-class GazeRecorder:
-    def __init__(self, filename):
-        self.filename = filename
-        self.frames_to_record = int(VIDEO_DURATION * FPS)
-        self.frames_count = 0
-        self.gaze_data = []
-        self.is_finished = False
-
-    def write_frame(self, gaze_row):
-        if self.frames_count < self.frames_to_record:
-            self.gaze_data.append(gaze_row)
-            self.frames_count += 1
-        else:
-            self.finish()
-
-    def finish(self):
-        if not self.is_finished:
-            df = pd.DataFrame(self.gaze_data)
-            df.to_parquet(self.filename, engine="pyarrow", index=False)
-            print(f"Gaze-Parquet gespeichert: {self.filename}")
-            features = compute_features_from_parquet(self.filename)
-            print("Features:", features)
-            self.is_finished = True
-            eye_tracking_features = features
-
-def main():
-    cap = cv2.VideoCapture(CAMERA_INDEX)
-    if not cap.isOpened():
-        print("Fehler: Kamera konnte nicht geöffnet werden.")
-        return
-
-    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    
-    active_video_recorders = []
-    active_gaze_recorders = []
-    last_start_time = 0
-    
-    print("Aufnahme läuft. Drücke 'q' zum Beenden.")
-
-    try:
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-
-            rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            h, w, _ = frame.shape
-            results = face_mesh.process(rgb)
-
-            left_valid = 0
-            right_valid = 0
-            left_diameter = None
-            right_diameter = None
-
-            left_gaze_x = None
-            left_gaze_y = None
-            right_gaze_x = None
-            right_gaze_y = None
-
-            if results.multi_face_landmarks:
-                face_landmarks = results.multi_face_landmarks[0]
-
-                left_open = eye_openness(
-                    face_landmarks.landmark,
-                    LEFT_EYE_LIDS[0],
-                    LEFT_EYE_LIDS[1],
-                    h
-                )
-
-                right_open = eye_openness(
-                    face_landmarks.landmark,
-                    RIGHT_EYE_LIDS[0],
-                    RIGHT_EYE_LIDS[1],
-                    h
-                )
-
-                left_valid = 1 if left_open > EYE_OPEN_THRESHOLD else 0
-                right_valid = 1 if right_open > EYE_OPEN_THRESHOLD else 0
-
-                for eye_name, eye_indices in [("left", LEFT_IRIS), ("right", RIGHT_IRIS)]:
-                    iris_points = []
-
-                    for idx in eye_indices:
-                        lm = face_landmarks.landmark[idx]
-                        x_i, y_i = int(lm.x * w), int(lm.y * h)
-                        iris_points.append((x_i, y_i))
-
-                    if len(iris_points) == 4:
-                        cx = int(sum(p[0] for p in iris_points) / 4)
-                        cy = int(sum(p[1] for p in iris_points) / 4)
-
-                        radius = max(
-                            ((x - cx) ** 2 + (y - cy) ** 2) ** 0.5
-                            for (x, y) in iris_points
-                        )
-
-                        diameter = 2 * radius
-
-                        cv2.circle(frame, (cx, cy), int(radius), (0, 255, 0), 2)
-
-                        if eye_name == "left" and left_valid:
-                            left_diameter = diameter
-                            left_gaze_x, left_gaze_y = compute_gaze(
-                                face_landmarks.landmark,
-                                (cx, cy),
-                                RIGHT_EYE_ALL,
-                                w, h
-                            )
-
-                        elif eye_name == "right" and right_valid:
-                            right_diameter = diameter
-                            right_gaze_x, right_gaze_y = compute_gaze(
-                                face_landmarks.landmark,
-                                (cx, cy),
-                                LEFT_EYE_ALL,
-                                w, h
-                            )
-
-            gaze_row = {
-                "timestamp": time.time(),
-                "EYE_LEFT_GAZE_POINT_ON_DISPLAY_AREA_X": left_gaze_x,
-                "EYE_LEFT_GAZE_POINT_ON_DISPLAY_AREA_Y": left_gaze_y,
-                "EYE_RIGHT_GAZE_POINT_ON_DISPLAY_AREA_X": right_gaze_x,
-                "EYE_RIGHT_GAZE_POINT_ON_DISPLAY_AREA_Y": right_gaze_y,
-                "EYE_LEFT_PUPIL_VALIDITY": left_valid,
-                "EYE_RIGHT_PUPIL_VALIDITY": right_valid,
-                "EYE_LEFT_PUPIL_DIAMETER": left_diameter,
-                "EYE_RIGHT_PUPIL_DIAMETER": right_diameter
-            }
-
-            current_time = time.time()
-
-            if current_time - last_start_time >= START_INTERVAL:
-                timestamp = datetime.now().strftime("%H%M%S")
-                filename = os.path.join(OUTPUT_DIR, f"rec_{timestamp}.avi")
-                video_recorder = VideoRecorder(filename, width, height, DB_PATH)
-
-                gaze_filename = filename.replace(".avi", "_gaze.parquet")
-                gaze_recorder = GazeRecorder(gaze_filename)
-
-                active_video_recorders.append(video_recorder)
-                active_gaze_recorders.append(gaze_recorder)
-
-                last_start_time = current_time
-
-            for v_rec, g_rec in zip(active_video_recorders[:], active_gaze_recorders[:]):
-
-                v_rec.write_frame(frame)
-                g_rec.write_frame(gaze_row)
-
-                if v_rec.is_finished:
-                    active_video_recorders.remove(v_rec)
-
-                if g_rec.is_finished:
-                    active_gaze_recorders.remove(g_rec)
-
-            cv2.imshow('Kamera Livestream', frame)
-            if cv2.waitKey(1) & 0xFF == ord('q'):
-                break
-
-            time.sleep(1/FPS)
-
-    finally:
-        face_mesh.close()
-        cap.release()
-        cv2.destroyAllWindows()
-
-        print("Programm beendet. Warte ggf. auf laufende Analysen...")
-
-if __name__ == "__main__":
-    main()
--- a/dataset_creation/camera_handling/db_test.py
+++ b/dataset_creation/camera_handling/db_test.py
@ -1,54 +0,0 @@
-import db_helper as db
-
-DB_PATH = "action_units.db"
-
-def setup_test_db():
-    # 1. Verbindung herstellen (erstellt die Datei, falls nicht vorhanden)
-    conn, cursor = db.connect_db(DB_PATH)
-
-    # 2. Spalten definieren
-    # Wir erstellen eine Spalte für den Zeitstempel und beispielhaft einige AUs.
-    # In SQLite können wir später mit deinem Helper weitere Spalten hinzufügen.
-    columns = {
-        "timestamp": "TEXT",
-        "AU01": "REAL",
-        "AU02": "REAL",
-        "AU04": "REAL",
-        "AU05": "REAL",
-        "AU06": "REAL",
-        "AU07": "REAL",
-        "AU09": "REAL",
-        "AU10": "REAL",
-        "AU11": "REAL",
-        "AU12": "REAL",
-        "AU14": "REAL",
-        "AU15": "REAL",
-        "AU17": "REAL",
-        "AU20": "REAL",
-        "AU23": "REAL",
-        "AU24": "REAL",
-        "AU25": "REAL",
-        "AU26": "REAL",
-        "AU28": "REAL",
-        "AU43": "REAL",
-    }
-    
-    # Constraints (z.B. Zeitstempel darf nicht leer sein)
-    constraints = {
-        "timestamp": ["NOT NULL"]
-    }
-    
-    # Primärschlüssel definieren (Kombination aus Zeitstempel und ggf. ID)
-    primary_key = {"pk_timestamp": ["timestamp"]}
-
-    try:
-        sql = db.create_table(conn, cursor, "actionUnits", columns, constraints, primary_key)
-        print("Tabelle erfolgreich erstellt!")
-        print(f"SQL-Befehl:\n{sql}")
-    except Exception as e:
-        print(f"Hinweis: {e}") 
-    finally:
-        db.disconnect_db(conn, cursor)
-
-if __name__ == "__main__":
-    setup_test_db()
--- a/dataset_creation/camera_handling/eyeFeature_new.py
+++ b/dataset_creation/camera_handling/eyeFeature_new.py
@ -1,205 +0,0 @@
-import numpy as np
-import pandas as pd
-from pathlib import Path
-from sklearn.preprocessing import MinMaxScaler
-from scipy.signal import welch
-from pygazeanalyser.detectors import fixation_detection, saccade_detection
-
-
-##############################################################################
-# KONFIGURATION
-##############################################################################
-
-SAMPLING_RATE = 25          # Hz
-MIN_DUR_BLINKS = 2          # x * 40ms
-
-
-##############################################################################
-# EYE-TRACKING FUNKTIONEN
-##############################################################################
-
-def clean_eye_df(df):
-    """Extrahiert nur Eye-Tracking Spalten und entfernt leere Zeilen."""
-    eye_cols = [c for c in df.columns if c.startswith("EYE_")]
-
-    if not eye_cols:
-        return pd.DataFrame()
-    
-    df_eye = df[eye_cols].copy()
-    df_eye = df_eye.replace([np.inf, -np.inf], np.nan)
-    df_eye = df_eye.dropna(subset=eye_cols, how="all")
-    
-    return df_eye.reset_index(drop=True)
-
-
-def extract_gaze_signal(df):
-    """Extrahiert 2D-Gaze-Positionen, maskiert ungültige Samples und interpoliert."""
-    gx_L = df["EYE_LEFT_GAZE_POINT_ON_DISPLAY_AREA_X"].astype(float).copy()
-    gy_L = df["EYE_LEFT_GAZE_POINT_ON_DISPLAY_AREA_Y"].astype(float).copy()
-    gx_R = df["EYE_RIGHT_GAZE_POINT_ON_DISPLAY_AREA_X"].astype(float).copy()
-    gy_R = df["EYE_RIGHT_GAZE_POINT_ON_DISPLAY_AREA_Y"].astype(float).copy()
-
-    val_L = (df["EYE_LEFT_PUPIL_VALIDITY"] == 1)
-    val_R = (df["EYE_RIGHT_PUPIL_VALIDITY"] == 1)
-
-    # Inf → NaN
-    for arr in [gx_L, gy_L, gx_R, gy_R]:
-        arr.replace([np.inf, -np.inf], np.nan, inplace=True)
-
-    # Ungültige maskieren
-    gx_L[~val_L] = np.nan
-    gy_L[~val_L] = np.nan
-    gx_R[~val_R] = np.nan
-    gy_R[~val_R] = np.nan
-
-    
-    # Mittelwert beider Augen
-    gx = np.mean(np.column_stack([gx_L, gx_R]), axis=1)
-    gy = np.mean(np.column_stack([gy_L, gy_R]), axis=1)
-
-    # Interpolation
-    gx = pd.Series(gx).interpolate(limit=None, limit_direction="both").bfill().ffill()
-    gy = pd.Series(gy).interpolate(limit=None, limit_direction="both").bfill().ffill()
-    
-    # MinMax Skalierung
-    xscaler = MinMaxScaler()
-    gxscale = xscaler.fit_transform(gx.values.reshape(-1, 1))
-
-    yscaler = MinMaxScaler()
-    gyscale = yscaler.fit_transform(gy.values.reshape(-1, 1))
-    
-    return np.column_stack((gxscale, gyscale))
-
-
-def extract_pupil(df):
-    """Extrahiert Pupillengröße (beide Augen gemittelt)."""
-    pl = df["EYE_LEFT_PUPIL_DIAMETER"].replace([np.inf, -np.inf], np.nan)
-    pr = df["EYE_RIGHT_PUPIL_DIAMETER"].replace([np.inf, -np.inf], np.nan)
-
-    vl = df.get("EYE_LEFT_PUPIL_VALIDITY")
-    vr = df.get("EYE_RIGHT_PUPIL_VALIDITY")
-
-    if vl is None or vr is None:
-        validity = (~pl.isna() | ~pr.isna()).astype(int).to_numpy()
-    else:
-        validity = ((vl == 1) | (vr == 1)).astype(int).to_numpy()
-
-    p = np.mean(np.column_stack([pl, pr]), axis=1)
-    p = pd.Series(p).interpolate(limit=50, limit_direction="both").bfill().ffill()
-    
-    return p.to_numpy(), validity
-
-
-def detect_blinks(pupil_validity, min_duration=5):
-    """Erkennt Blinks: Validity=0 → Blink."""
-    blinks = []
-    start = None
-
-    for i, v in enumerate(pupil_validity):
-        if v == 0 and start is None:
-            start = i
-        elif v == 1 and start is not None:
-            if i - start >= min_duration:
-                blinks.append([start, i])
-            start = None
-
-    return blinks
-
-
-def compute_IPA(pupil, fs=25):
-    """Index of Pupillary Activity (Duchowski 2018)."""
-    f, Pxx = welch(pupil, fs=fs, nperseg=int(fs*2))
-    hf_band = (f >= 0.6) & (f <= 2.0)
-    return np.sum(Pxx[hf_band])
-
-
-def extract_eye_features(df_eye, fs=25, min_dur_blinks=2):
-    """
-    Extrahiert Eye-Tracking Features für ein einzelnes Window.
-    Gibt Dictionary mit allen Eye-Features zurück.
-    """
-    # Gaze
-    gaze = extract_gaze_signal(df_eye)
-    
-    # Pupille
-    pupil, pupil_validity = extract_pupil(df_eye)
-
-    
-    # ----------------------------
-    # FIXATIONS
-    # ----------------------------
-    time_ms = np.arange(len(df_eye)) * 1000.0 / fs
-
-    fix, efix = fixation_detection(
-        x=gaze[:, 0], y=gaze[:, 1], time=time_ms,
-        missing=0.0, maxdist=0.003, mindur=10
-    )
-
-    fixation_durations = [f[2] for f in efix if np.isfinite(f[2]) and f[2] > 0]
-
-    # Kategorien
-    F_short = sum(66 <= d <= 150 for d in fixation_durations)
-    F_medium = sum(300 <= d <= 500 for d in fixation_durations)
-    F_long = sum(d >= 1000 for d in fixation_durations)
-    F_hundred = sum(d > 100 for d in fixation_durations)
-
-    # ----------------------------
-    # SACCADES
-    # ----------------------------
-    sac, esac = saccade_detection(
-        x=gaze[:, 0], y=gaze[:, 1], time=time_ms, 
-        missing=0, minlen=12, maxvel=0.2, maxacc=1
-    )
-
-    sac_durations = [s[2] for s in esac]
-    sac_amplitudes = [((s[5]-s[3])**2 + (s[6]-s[4])**2)**0.5 for s in esac]
-
-    # ----------------------------
-    # BLINKS
-    # ----------------------------
-    blinks = detect_blinks(pupil_validity, min_duration=min_dur_blinks)
-    blink_durations = [(b[1] - b[0]) / fs for b in blinks]
-
-    # ----------------------------
-    # PUPIL
-    # ----------------------------
-    if np.all(np.isnan(pupil)):
-        mean_pupil = np.nan
-        ipa = np.nan
-    else:
-        mean_pupil = np.nanmean(pupil)
-        ipa = compute_IPA(pupil, fs=fs)
-
-    # Feature Dictionary
-    return {
-        "Fix_count_short_66_150": F_short,
-        "Fix_count_medium_300_500": F_medium,
-        "Fix_count_long_gt_1000": F_long,
-        "Fix_count_100": F_hundred,
-        "Fix_mean_duration": np.mean(fixation_durations) if fixation_durations else 0,
-        "Fix_median_duration": np.median(fixation_durations) if fixation_durations else 0,
-        "Sac_count": len(sac),
-        "Sac_mean_amp": np.mean(sac_amplitudes) if sac_amplitudes else 0,
-        "Sac_mean_dur": np.mean(sac_durations) if sac_durations else 0,
-        "Sac_median_dur": np.median(sac_durations) if sac_durations else 0,
-        "Blink_count": len(blinks),
-        "Blink_mean_dur": np.mean(blink_durations) if blink_durations else 0,
-        "Blink_median_dur": np.median(blink_durations) if blink_durations else 0,
-        "Pupil_mean": mean_pupil,
-        "Pupil_IPA": ipa
-    }
-
-def compute_features_from_parquet(parquet_path):
-    df = pd.read_parquet(parquet_path)
-    df_eye = clean_eye_df(df)
-
-    if df_eye.empty:
-        return None
-
-    features = extract_eye_features(
-        df_eye,
-        fs=SAMPLING_RATE,
-        min_dur_blinks=MIN_DUR_BLINKS
-    )
-
-    return features
--- a/dataset_creation/open_parquet_test.ipynb
+++ b/dataset_creation/open_parquet_test.ipynb
@ -17,8 +17,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "df= pd.read_parquet(r\" \")\n",
-    "print(df.shape)"
+    "df= pd.read_parquet(r\"C:\\Users\\micha\\FAUbox\\WS2526_Fahrsimulator_MSY (Celina Korzer)\\AU_dataset\\output_windowed.parquet\")\n",
+    "print(df.shape)\n",
+    "\n"
   ]
  },
  {
--- a/files_for_testing/50s_25Hz_dataset.parquet
+++ b/files_for_testing/50s_25Hz_dataset.parquet
--- a/model_training/CNN/CNN_crossVal.ipynb
+++ b/model_training/CNN/CNN_crossVal.ipynb
--- a/model_training/CNN/CNN_crossVal_EarlyFusion.ipynb
+++ b/model_training/CNN/CNN_crossVal_EarlyFusion.ipynb
--- a/model_training/CNN/CNN_crossVal_EarlyFusion_Filter.ipynb
+++ b/model_training/CNN/CNN_crossVal_EarlyFusion_Filter.ipynb
--- a/model_training/CNN/CNN_crossVal_EarlyFusion_Test_Eval.ipynb
+++ b/model_training/CNN/CNN_crossVal_EarlyFusion_Test_Eval.ipynb
@ -1,529 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "47f6de7b",
-   "metadata": {},
-   "source": [
-    "Bibliotheken importieren"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "99294260",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd \n",
-    "import numpy as np \n",
-    "import matplotlib.pyplot as plt\n",
-    "import seaborn as sns \n",
-    "import random \n",
-    "import joblib \n",
-    "from pathlib import Path \n",
-    "\n",
-    "from sklearn.model_selection import GroupKFold, GroupShuffleSplit\n",
-    "from sklearn.preprocessing import StandardScaler \n",
-    "from sklearn.metrics import ( \n",
-    "    precision_score, recall_score,\n",
-    "    confusion_matrix, roc_curve, auc, \n",
-    "    precision_recall_curve, f1_score, \n",
-    "    balanced_accuracy_score, accuracy_score\n",
-    ") \n",
-    "\n",
-    "import tensorflow as tf \n",
-    "from tensorflow.keras import Input, layers, models, regularizers"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "52b4ca8c",
-   "metadata": {},
-   "source": [
-    "Seed festlegen"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6e49d281",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "SEED = 42 \n",
-    "np.random.seed(SEED) \n",
-    "tf.random.set_seed(SEED) \n",
-    "random.seed(SEED)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ae1a715f",
-   "metadata": {},
-   "source": [
-    "Daten laden"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "870f01c3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_path = Path(r\"~/data-paulusjafahrsimulator-gpu/new_datasets/50s_25Hz_dataset.parquet\") \n",
-    "\n",
-    "data = pd.read_parquet(path=data_path)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "bedbc23b",
-   "metadata": {},
-   "source": [
-    "Labels erstellen"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "38848515",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "low_all = data[((data[\"PHASE\"] == \"baseline\") | \n",
-    "                ((data[\"STUDY\"] == \"n-back\") & (data[\"PHASE\"] != \"baseline\") & (data[\"LEVEL\"].isin([1,4]))))].copy() \n",
-    "\n",
-    "high_all = pd.concat([ \n",
-    "    data[(data[\"STUDY\"]==\"n-back\") & (data[\"LEVEL\"].isin([2,3,5,6])) & (data[\"PHASE\"].isin([\"train\",\"test\"]))], \n",
-    "    data[(data[\"STUDY\"]==\"k-drive\") & (data[\"PHASE\"]!=\"baseline\")] \n",
-    "]).copy() \n",
-    "\n",
-    "low_all[\"label\"] = 0 \n",
-    "high_all[\"label\"] = 1 \n",
-    "data = pd.concat([low_all, high_all], ignore_index=True).drop_duplicates() "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0b282acf",
-   "metadata": {},
-   "source": [
-    "Features und Labels"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5edb00a0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Face AUs\n",
-    "au_columns = [col for col in data.columns if \"face\" in col.lower()] \n",
-    "\n",
-    "#Eye Features\n",
-    "eye_columns = [ \n",
-    "    'Fix_count_short_66_150', \n",
-    "    'Fix_count_medium_300_500', \n",
-    "    'Fix_count_long_gt_1000', \n",
-    "    'Fix_count_100', \n",
-    "    'Fix_mean_duration', \n",
-    "    'Fix_median_duration', \n",
-    "    'Sac_count', \n",
-    "    'Sac_mean_amp', \n",
-    "    'Sac_mean_dur', \n",
-    "    'Sac_median_dur', \n",
-    "    'Blink_count', \n",
-    "    'Blink_mean_dur', \n",
-    "    'Blink_median_dur', \n",
-    "    'Pupil_mean', \n",
-    "    'Pupil_IPA' \n",
-    "]\n",
-    "\n",
-    "#Early Fusion\n",
-    "feature_columns = au_columns + eye_columns\n",
-    "\n",
-    "#NaNs entfernen \n",
-    "data = data.dropna(subset=feature_columns + [\"label\"])\n",
-    "\n",
-    "X = data[feature_columns].values[..., np.newaxis] \n",
-    "y = data[\"label\"].values \n",
-    "\n",
-    "groups = data[\"subjectID\"].values\n",
-    "print(data.columns.tolist())\n",
-    "\n",
-    "print(\"Gefundene FACE_AU-Spalten:\", au_columns)\n",
-    "print(\"Gefundene Eye Features:\" , eye_columns)\n",
-    "\n",
-    "print(\"Anzahl FACE_AUs:\", len(au_columns)) \n",
-    "print(\"Anzahl EYE Features:\", len(eye_columns)) \n",
-    "print(\"Gesamtzahl Features:\", len(feature_columns))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d8689679",
-   "metadata": {},
-   "source": [
-    "Train-Test-Split"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b5cf88c3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)\n",
-    "train_idx, test_idx = next(gss.split(X, y, groups))\n",
-    "\n",
-    "feature_columns_train, feature_columns_test = X[train_idx], X[test_idx]\n",
-    "y_train, y_test = y[train_idx], y[test_idx]\n",
-    "groups_train, groups_test = groups[train_idx], groups[test_idx]\n",
-    "\n",
-    "print(\"Train:\", len(y_train), \" | Test:\", len(y_test))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a539b83b",
-   "metadata": {},
-   "source": [
-    "CNN-Modell"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e4a7f496",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def build_model(input_shape, lr=1e-4): \n",
-    "    model = models.Sequential([ \n",
-    "        Input(shape=input_shape), \n",
-    "        layers.Conv1D(32, kernel_size=3, activation=\"relu\", kernel_regularizer=regularizers.l2(0.001)), \n",
-    "        layers.BatchNormalization(), \n",
-    "        layers.MaxPooling1D(pool_size=2),\n",
-    "\n",
-    "        layers.Conv1D(64, kernel_size=3, activation=\"relu\", kernel_regularizer=regularizers.l2(0.001)), \n",
-    "        layers.BatchNormalization(), \n",
-    "        layers.GlobalAveragePooling1D(), \n",
-    "        \n",
-    "        layers.Dense(32, activation=\"relu\", kernel_regularizer=regularizers.l2(0.001)), \n",
-    "        layers.Dropout(0.5), \n",
-    "        layers.Dense(1, activation=\"sigmoid\") \n",
-    "    ]) \n",
-    "    \n",
-    "    model.compile( \n",
-    "        optimizer=tf.keras.optimizers.Adam(learning_rate=lr), \n",
-    "        loss=\"binary_crossentropy\", \n",
-    "        metrics=[\"accuracy\", tf.keras.metrics.AUC(name=\"auc\")] \n",
-    "    ) \n",
-    "    return model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5905871b",
-   "metadata": {},
-   "source": [
-    "Cross-Validation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "90658000",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gkf = GroupKFold(n_splits=5) \n",
-    "cv_histories = [] \n",
-    "cv_results = [] \n",
-    "fold_subjects = []\n",
-    "all_conf_matrices = []\n",
-    "\n",
-    "for fold, (train_idx, val_idx) in enumerate(gkf.split(X, y, groups)):\n",
-    "    train_subjects = np.unique(groups[train_idx]) \n",
-    "    val_subjects = np.unique(groups[val_idx]) \n",
-    "    fold_subjects.append({\"Fold\": fold+1, \n",
-    "                          \"Train_Subjects\": train_subjects, \n",
-    "                          \"Val_Subjects\": val_subjects}) \n",
-    "    \n",
-    "    print(f\"\\n--- Fold {fold+1} ---\") \n",
-    "    print(\"Train-Subjects:\", train_subjects) \n",
-    "    print(\"Val-Subjects:\", val_subjects) \n",
-    "\n",
-    "    #Split\n",
-    "    X_train, X_val = X[train_idx], X[val_idx] \n",
-    "    y_train, y_val = y[train_idx], y[val_idx] # Normalisierung pro Fold \n",
-    "\n",
-    "    #Normalisierung pro Fold\n",
-    "    scaler = StandardScaler() \n",
-    "    X_train = scaler.fit_transform(X_train.reshape(len(X_train), -1)).reshape(X_train.shape) \n",
-    "    X_val = scaler.transform(X_val.reshape(len(X_val), -1)).reshape(X_val.shape) \n",
-    "\n",
-    "    # Plausibilitäts-Check \n",
-    "    print(\"Train Mittelwerte (erste 5 Features):\", X_train.mean(axis=0)[:5]) \n",
-    "    print(\"Train Std (erste 5 Features):\", X_train.std(axis=0)[:5]) \n",
-    "    print(\"Val Mittelwerte (erste 5 Features):\", X_val.mean(axis=0)[:5]) \n",
-    "    print(\"Val Std (erste 5 Features):\", X_val.std(axis=0)[:5]) \n",
-    "\n",
-    "    # Modell \n",
-    "    model = build_model(input_shape=(len(feature_columns_train),1), lr=1e-4) \n",
-    "    model.summary()  \n",
-    "\n",
-    "    callbacks = [ \n",
-    "        tf.keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=10, restore_best_weights=True), \n",
-    "        tf.keras.callbacks.ReduceLROnPlateau(monitor=\"val_loss\", factor=0.5, patience=5, min_lr=1e-6) \n",
-    "    ] \n",
-    "\n",
-    "    history = model.fit( \n",
-    "        X_train, y_train, \n",
-    "        validation_data=(X_val, y_val), \n",
-    "        epochs=100, \n",
-    "        batch_size=16, \n",
-    "        callbacks=callbacks, \n",
-    "        verbose=0 \n",
-    "    ) \n",
-    "\n",
-    "    cv_histories.append(history.history) \n",
-    "    scores = model.evaluate(X_val, y_val, verbose=0) \n",
-    "    cv_results.append(scores) \n",
-    "    print(f\"Fold {fold+1} - Val Loss: {scores[0]:.4f}, Val Acc: {scores[1]:.4f}, Val AUC: {scores[2]:.4f}\")\n",
-    "\n",
-    "\n",
-    "    #Konfusionsmatrix \n",
-    "    y_pred = (model.predict(X_val) > 0.5).astype(int) \n",
-    "    cm = confusion_matrix(y_val, y_pred) \n",
-    "    all_conf_matrices.append(cm) \n",
-    "    \n",
-    "    print(f\"Konfusionsmatrix Fold {fold+1}:\\n{cm}\\n\") \n",
-    "    \n",
-    "# Aggregierte Matrix \n",
-    "agg_cm = sum(all_conf_matrices) \n",
-    "print(\"Aggregierte Konfusionsmatrix über alle Folds:\") \n",
-    "print(agg_cm)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d10b7e78",
-   "metadata": {},
-   "source": [
-    "Results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9aeba7f4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#results\n",
-    "cv_results = np.array(cv_results) \n",
-    "print(\"\\n=== Cross-Validation Ergebnisse ===\") \n",
-    "print(f\"Durchschnittlicher Val-Loss: {cv_results[:,0].mean():.4f}\") \n",
-    "print(f\"Durchschnittliche Val-Accuracy: {cv_results[:,1].mean():.4f}\") \n",
-    "print(f\"Durchschnittliche Val-AUC: {cv_results[:,2].mean():.4f}\")\n",
-    "\n",
-    "#Ergebnis-Tabelle erstellen\n",
-    "results_table = pd.DataFrame({ \n",
-    "    \"Fold\": np.arange(1, len(cv_results)+1), \n",
-    "    \"Val Loss\": cv_results[:,0], \n",
-    "    \"Val Accuracy\": cv_results[:,1], \n",
-    "    \"Val AUC\": cv_results[:,2] }) \n",
-    "\n",
-    "# Durchschnittszeile hinzufügen \n",
-    "avg_row = pd.DataFrame({ \n",
-    "    \"Fold\": [\"Ø\"], \n",
-    "    \"Val Loss\": [cv_results[:,0].mean()], \n",
-    "    \"Val Accuracy\": [cv_results[:,1].mean()], \n",
-    "    \"Val AUC\": [cv_results[:,2].mean()] \n",
-    "}) \n",
-    "\n",
-    "results_table = pd.concat([results_table, avg_row], ignore_index=True) \n",
-    "\n",
-    "print(\"\\n=== Ergebnis-Tabelle ===\") \n",
-    "print(results_table) \n",
-    "\n",
-    "#Tabelle speichern \n",
-    "results_table.to_csv(\"cnn_crossVal_results.csv\", index=False) \n",
-    "print(\"Ergebnisse gespeichert als 'cnn_crossVal_results.csv'\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fae5df7a",
-   "metadata": {},
-   "source": [
-    "Finales Modell trainieren"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5b3eab61",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "scaler_final = StandardScaler() \n",
-    "X_scaled = scaler_final.fit_transform(feature_columns_train.reshape(len(feature_columns_train), -1)).reshape(feature_columns_train.shape) \n",
-    "\n",
-    "final_model = build_model(input_shape=(len(feature_columns_train),1), lr=1e-4) \n",
-    "final_model.summary() \n",
-    "\n",
-    "final_model.fit( \n",
-    "    X_scaled, y_train, \n",
-    "    epochs=150, \n",
-    "    batch_size=16, \n",
-    "    verbose=1 \n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7c7f9cc4",
-   "metadata": {},
-   "source": [
-    "Speichern des Modells"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2d3af5be",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# final_model.save(\"cnn_crossVal_EarlyFusion_V2.keras\") \n",
-    "# joblib.dump(scaler_final, \"scaler_crossVal_EarlyFusion_V2.joblib\") \n",
-    "\n",
-    "# print(\"Finales Modell und Scaler gespeichert als 'cnn_crossVal_EarlyFusion_V2.keras' und 'scaler_crossVal_EarlyFusion_V2.joblib'\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c11891e0",
-   "metadata": {},
-   "source": [
-    "Plots"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9f6a8584",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#plots\n",
-    "def plot_cv_histories(cv_histories, metric): \n",
-    "    plt.figure(figsize=(10,6)) \n",
-    "    \n",
-    "    for i, hist in enumerate(cv_histories): \n",
-    "        plt.plot(hist[metric], label=f\"Fold {i+1} Train\", alpha=0.7) \n",
-    "        plt.plot(hist[f\"val_{metric}\"], label=f\"Fold {i+1} Val\", linestyle=\"--\", alpha=0.7) \n",
-    "    plt.xlabel(\"Epochs\") \n",
-    "    plt.ylabel(metric.capitalize()) \n",
-    "    plt.title(f\"Cross-Validation {metric.capitalize()} Verläufe\") \n",
-    "    plt.legend() \n",
-    "    plt.grid(True) \n",
-    "    plt.show()\n",
-    "    \n",
-    "plot_cv_histories(cv_histories, \"loss\") \n",
-    "plot_cv_histories(cv_histories, \"accuracy\") \n",
-    "plot_cv_histories(cv_histories, \"auc\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4aebe6c6",
-   "metadata": {},
-   "source": [
-    "Test"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0d34d6b7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Preprocessing Testdaten \n",
-    "X_test_scaled = scaler.transform( \n",
-    "    feature_columns_test.reshape(len(feature_columns_test), -1) \n",
-    ").reshape(feature_columns_test.shape) \n",
-    "\n",
-    "# Vorhersagen \n",
-    "y_prob_test = model.predict(X_test_scaled).flatten() \n",
-    "y_pred_test = (y_prob_test > 0.5).astype(int) \n",
-    "\n",
-    "# Konfusionsmatrix \n",
-    "cm_test = confusion_matrix(y_test, y_pred_test) \n",
-    "\n",
-    "plt.figure(figsize=(6,5)) \n",
-    "sns.heatmap(cm_test, annot=True, fmt=\"d\", cmap=\"Greens\", \n",
-    "            xticklabels=[\"Pred 0\", \"Pred 1\"], \n",
-    "            yticklabels=[\"True 0\", \"True 1\"]) \n",
-    "plt.title(\"Konfusionsmatrix - Testdaten\") \n",
-    "plt.show() \n",
-    "\n",
-    "# ROC \n",
-    "fpr, tpr, _ = roc_curve(y_test, y_prob_test) \n",
-    "roc_auc = auc(fpr, tpr) \n",
-    "\n",
-    "plt.figure(figsize=(7,6)) \n",
-    "plt.plot(fpr, tpr, label=f\"AUC = {roc_auc:.3f}\") \n",
-    "plt.plot([0,1], [0,1], \"k--\") \n",
-    "plt.title(\"ROC - Testdaten\") \n",
-    "plt.legend() \n",
-    "plt.grid(True) \n",
-    "plt.show() \n",
-    "\n",
-    "# Precision-Recall \n",
-    "precision, recall, _ = precision_recall_curve(y_test, y_prob_test) \n",
-    "plt.figure(figsize=(7,6)) \n",
-    "plt.plot(recall, precision) \n",
-    "plt.title(\"Precision-Recall - Testdaten\") \n",
-    "plt.grid(True) \n",
-    "plt.show() \n",
-    "\n",
-    "# Metriken \n",
-    "print(\"Accuracy:\", accuracy_score(y_test, y_pred_test))\n",
-    "print(\"F1-Score:\", f1_score(y_test, y_pred_test)) \n",
-    "print(\"Balanced Accuracy:\", balanced_accuracy_score(y_test, y_pred_test)) \n",
-    "print(\"Precision:\", precision_score(y_test, y_pred_test)) \n",
-    "print(\"Recall:\", recall_score(y_test, y_pred_test)) \n",
-    "print(\"AUC:\", roc_auc)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/model_training/CNN/CNN_crossVal_HybridFusion.ipynb
+++ b/model_training/CNN/CNN_crossVal_HybridFusion.ipynb
--- a/model_training/CNN/CNN_crossVal_HybridFusion_Test_Eval.ipynb
+++ b/model_training/CNN/CNN_crossVal_HybridFusion_Test_Eval.ipynb
@ -1,458 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "b65b6b7d",
-   "metadata": {},
-   "source": [
-    "Imports"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "530e70af",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd \n",
-    "import numpy as np \n",
-    "import matplotlib.pyplot as plt \n",
-    "import seaborn as sns \n",
-    "import random \n",
-    "import joblib \n",
-    "from pathlib import Path \n",
-    "\n",
-    "from sklearn.model_selection import GroupKFold, GroupShuffleSplit \n",
-    "from sklearn.preprocessing import StandardScaler \n",
-    "from sklearn.metrics import ( \n",
-    "    precision_score, recall_score,\n",
-    "    confusion_matrix, roc_curve, auc, \n",
-    "    precision_recall_curve, f1_score, \n",
-    "    balanced_accuracy_score, accuracy_score\n",
-    ") \n",
-    "\n",
-    "import tensorflow as tf \n",
-    "from tensorflow.keras import Input, layers, models"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0d01127c",
-   "metadata": {},
-   "source": [
-    "Seed"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "67aaf56e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "SEED = 42 \n",
-    "np.random.seed(SEED) \n",
-    "tf.random.set_seed(SEED) \n",
-    "random.seed(SEED)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "844e250c",
-   "metadata": {},
-   "source": [
-    "Daten laden "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "73a34b69",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_path = Path(r\"~/data-paulusjafahrsimulator-gpu/new_datasets/50s_25Hz_dataset.parquet\") \n",
-    "data = pd.read_parquet(path=data_path)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "325179d3",
-   "metadata": {},
-   "source": [
-    "Daten vorbereiten"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a5ad3126",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "low_all = data[ \n",
-    "    ((data[\"PHASE\"] == \"baseline\") | \n",
-    "    ((data[\"STUDY\"] == \"n-back\") & \n",
-    "    (data[\"PHASE\"] != \"baseline\") & \n",
-    "    (data[\"LEVEL\"].isin([1, 4])))) \n",
-    "].copy() \n",
-    "\n",
-    "high_all = pd.concat([ \n",
-    "    data[(data[\"STUDY\"] == \"n-back\") & \n",
-    "         (data[\"LEVEL\"].isin([2, 3, 5, 6])) & \n",
-    "         (data[\"PHASE\"].isin([\"train\", \"test\"]))], \n",
-    "    data[(data[\"STUDY\"] == \"k-drive\") & (data[\"PHASE\"] != \"baseline\")] \n",
-    "]).copy() \n",
-    "\n",
-    "low_all[\"label\"] = 0 \n",
-    "high_all[\"label\"] = 1 \n",
-    "\n",
-    "data = pd.concat([low_all, high_all], ignore_index=True).drop_duplicates()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fd843b62",
-   "metadata": {},
-   "source": [
-    "Features"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5f10e6ca",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "au_columns = [col for col in data.columns if \"face\" in col.lower()] \n",
-    "\n",
-    "eye_columns = [ \n",
-    "    'Fix_count_short_66_150','Fix_count_medium_300_500','Fix_count_long_gt_1000', \n",
-    "    'Fix_count_100','Fix_mean_duration','Fix_median_duration', \n",
-    "    'Sac_count','Sac_mean_amp','Sac_mean_dur','Sac_median_dur', \n",
-    "    'Blink_count','Blink_mean_dur','Blink_median_dur', \n",
-    "    'Pupil_mean','Pupil_IPA' \n",
-    "] \n",
-    "\n",
-    "# NaNs entfernen \n",
-    "data = data.dropna(subset=au_columns + eye_columns + [\"label\"]) \n",
-    "\n",
-    "# Arrays \n",
-    "X_au = data[au_columns].values[..., np.newaxis] \n",
-    "X_eye = data[eye_columns].values \n",
-    "y = data[\"label\"].values \n",
-    "groups = data[\"subjectID\"].values"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cabe09af",
-   "metadata": {},
-   "source": [
-    "Train/Test Split"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "52d3b7cf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)\n",
-    "train_idx, test_idx = next(gss.split(X_au, y, groups))\n",
-    "\n",
-    "X_au_train, X_au_test = X_au[train_idx], X_au[test_idx]\n",
-    "X_eye_train, X_eye_test = X_eye[train_idx], X_eye[test_idx]\n",
-    "y_train, y_test = y[train_idx], y[test_idx]\n",
-    "groups_train, groups_test = groups[train_idx], groups[test_idx]\n",
-    "\n",
-    "print(\"Train:\", len(y_train), \" | Test:\", len(y_test))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6dedded5",
-   "metadata": {},
-   "source": [
-    "Hybrid CNN-Modell"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "41cc1b30",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def build_hybrid_model(n_aus, n_eye, lr=1e-4): \n",
-    "    input_au = Input(shape=(n_aus, 1), name=\"au_input\") \n",
-    "    x = layers.Conv1D(32, 3, activation=\"relu\")(input_au) \n",
-    "    x = layers.BatchNormalization()(x) \n",
-    "    x = layers.MaxPooling1D(2)(x) \n",
-    "    x = layers.Conv1D(64, 3, activation=\"relu\")(x) \n",
-    "    x = layers.BatchNormalization()(x) \n",
-    "    x = layers.GlobalAveragePooling1D()(x) \n",
-    "\n",
-    "    input_eye = Input(shape=(n_eye,), name=\"eye_input\") \n",
-    "    e = layers.Dense(32, activation=\"relu\")(input_eye) \n",
-    "    e = layers.Dropout(0.3)(e) \n",
-    "    e = layers.Dense(16, activation=\"relu\")(e) \n",
-    "\n",
-    "    fused = layers.concatenate([x, e]) \n",
-    "    z = layers.Dense(32, activation=\"relu\")(fused) \n",
-    "    z = layers.Dropout(0.4)(z) \n",
-    "    output = layers.Dense(1, activation=\"sigmoid\")(z) \n",
-    "\n",
-    "    model = models.Model(inputs=[input_au, input_eye], outputs=output) \n",
-    "    model.compile( \n",
-    "        optimizer=tf.keras.optimizers.Adam(learning_rate=lr), \n",
-    "        loss=\"binary_crossentropy\", \n",
-    "        metrics=[\"accuracy\", tf.keras.metrics.AUC(name=\"auc\")] \n",
-    "        ) \n",
-    "        \n",
-    "    return model"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cea6d0d0",
-   "metadata": {},
-   "source": [
-    "Cross Validation (nur Trainingsdaten)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9c390b46",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gkf = GroupKFold(n_splits=5) \n",
-    "cv_histories = [] \n",
-    "cv_results = [] \n",
-    "all_conf_matrices = [] \n",
-    "\n",
-    "for fold, (tr_idx, va_idx) in enumerate(gkf.split(X_au_train, y_train, groups_train)): \n",
-    "    print(f\"\\n===== FOLD {fold+1} =====\") \n",
-    "    \n",
-    "    X_tr_au, X_va_au = X_au_train[tr_idx], X_au_train[va_idx] \n",
-    "    X_tr_eye, X_va_eye = X_eye_train[tr_idx], X_eye_train[va_idx] \n",
-    "    y_tr, y_va = y_train[tr_idx], y_train[va_idx] \n",
-    "    \n",
-    "    # Scaler pro Fold \n",
-    "    scaler_au = StandardScaler() \n",
-    "    scaler_eye = StandardScaler() \n",
-    "    \n",
-    "    X_tr_au = scaler_au.fit_transform(X_tr_au.reshape(len(X_tr_au), -1)).reshape(X_tr_au.shape) \n",
-    "    X_va_au = scaler_au.transform(X_va_au.reshape(len(X_va_au), -1)).reshape(X_va_au.shape) \n",
-    "    \n",
-    "    X_tr_eye = scaler_eye.fit_transform(X_tr_eye) \n",
-    "    X_va_eye = scaler_eye.transform(X_va_eye) \n",
-    "    \n",
-    "    # Modell \n",
-    "    model_cv = build_hybrid_model(len(au_columns), len(eye_columns)) \n",
-    "    \n",
-    "    callbacks = [ \n",
-    "        tf.keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=10, restore_best_weights=True), \n",
-    "        tf.keras.callbacks.ReduceLROnPlateau(monitor=\"val_loss\", factor=0.5, patience=5, min_lr=1e-6) \n",
-    "    ] \n",
-    "    \n",
-    "    history = model_cv.fit( \n",
-    "        [X_tr_au, X_tr_eye], y_tr, \n",
-    "        validation_data=([X_va_au, X_va_eye], y_va), \n",
-    "        epochs=100, \n",
-    "        batch_size=16, \n",
-    "        verbose=0 \n",
-    "    ) \n",
-    "    \n",
-    "    cv_histories.append(history.history) \n",
-    "    \n",
-    "    # Evaluation \n",
-    "    scores = model_cv.evaluate([X_va_au, X_va_eye], y_va, verbose=0) \n",
-    "    cv_results.append(scores) \n",
-    "    print(f\"Val Loss={scores[0]:.4f} | Val Acc={scores[1]:.4f} | Val AUC={scores[2]:.4f}\") \n",
-    "        \n",
-    "    # Konfusionsmatrix pro Fold \n",
-    "    y_pred_va = (model_cv.predict([X_va_au, X_va_eye]) > 0.5).astype(int) \n",
-    "    cm = confusion_matrix(y_va, y_pred_va) \n",
-    "    all_conf_matrices.append(cm) \n",
-    "        \n",
-    "    plt.figure(figsize=(6,5)) \n",
-    "    sns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\", \n",
-    "                xticklabels=[\"Pred 0\", \"Pred 1\"], \n",
-    "                yticklabels=[\"True 0\", \"True 1\"]) \n",
-    "    plt.title(f\"Konfusionsmatrix - Fold {fold+1}\") \n",
-    "    plt.show() \n",
-    "    \n",
-    "# Aggregierte Konfusionsmatrix \n",
-    "agg_cm = sum(all_conf_matrices) \n",
-    "\n",
-    "plt.figure(figsize=(6,5)) \n",
-    "sns.heatmap(agg_cm, annot=True, fmt=\"d\", cmap=\"Purples\", \n",
-    "            xticklabels=[\"Pred 0\", \"Pred 1\"], \n",
-    "            yticklabels=[\"True 0\", \"True 1\"]) \n",
-    "plt.title(\"Aggregierte Konfusionsmatrix - alle Folds\") \n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "97df9df1",
-   "metadata": {},
-   "source": [
-    "Results"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9eae5c0f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#results\n",
-    "cv_results = np.array(cv_results) \n",
-    "print(\"\\n=== Cross-Validation Ergebnisse ===\") \n",
-    "print(f\"Durchschnittlicher Val-Loss: {cv_results[:,0].mean():.4f}\") \n",
-    "print(f\"Durchschnittliche Val-Accuracy: {cv_results[:,1].mean():.4f}\") \n",
-    "print(f\"Durchschnittliche Val-AUC: {cv_results[:,2].mean():.4f}\")\n",
-    "\n",
-    "#Ergebnis-Tabelle erstellen\n",
-    "results_table = pd.DataFrame({ \n",
-    "    \"Fold\": np.arange(1, len(cv_results)+1), \n",
-    "    \"Val Loss\": cv_results[:,0], \n",
-    "    \"Val Accuracy\": cv_results[:,1], \n",
-    "    \"Val AUC\": cv_results[:,2] }) \n",
-    "\n",
-    "# Durchschnittszeile hinzufügen \n",
-    "avg_row = pd.DataFrame({ \n",
-    "    \"Fold\": [\"Ø\"], \n",
-    "    \"Val Loss\": [cv_results[:,0].mean()], \n",
-    "    \"Val Accuracy\": [cv_results[:,1].mean()], \n",
-    "    \"Val AUC\": [cv_results[:,2].mean()] \n",
-    "}) \n",
-    "\n",
-    "results_table = pd.concat([results_table, avg_row], ignore_index=True) \n",
-    "\n",
-    "print(\"\\n=== Ergebnis-Tabelle ===\") \n",
-    "print(results_table) \n",
-    "\n",
-    "#Tabelle speichern \n",
-    "results_table.to_csv(\"cnn_crossVal_results.csv\", index=False) \n",
-    "print(\"Ergebnisse gespeichert als 'cnn_crossVal_results.csv'\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7e564308",
-   "metadata": {},
-   "source": [
-    "Speichern des Modells"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9afc926b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model_cv.save(\"hybrid_fusion_model_Test_group_split.keras\") \n",
-    "joblib.dump(scaler_au, \"scaler_au_Test_group_split.joblib\") \n",
-    "joblib.dump(scaler_eye, \"scaler_eye_Test_group_split.joblib\") \n",
-    "\n",
-    "print(\"Finales Modell gespeichert.\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "391af5d5",
-   "metadata": {},
-   "source": [
-    "Test"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0bb8c14c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Preprocessing Testdaten \n",
-    "X_au_test_scaled = scaler_au.transform( \n",
-    "    X_au_test.reshape(len(X_au_test), -1) \n",
-    ").reshape(X_au_test.shape) \n",
-    "\n",
-    "X_eye_test_scaled = scaler_eye.transform(X_eye_test) \n",
-    "\n",
-    "# Vorhersagen \n",
-    "y_prob_test = model_cv.predict([X_au_test_scaled, X_eye_test_scaled]).flatten() \n",
-    "y_pred_test = (y_prob_test > 0.5).astype(int) \n",
-    "\n",
-    "# Konfusionsmatrix \n",
-    "cm_test = confusion_matrix(y_test, y_pred_test) \n",
-    "\n",
-    "plt.figure(figsize=(6,5)) \n",
-    "sns.heatmap(cm_test, annot=True, fmt=\"d\", cmap=\"Greens\", \n",
-    "            xticklabels=[\"Pred 0\", \"Pred 1\"], \n",
-    "            yticklabels=[\"True 0\", \"True 1\"]) \n",
-    "plt.title(\"Konfusionsmatrix - Testdaten\") \n",
-    "plt.show() \n",
-    "\n",
-    "# ROC \n",
-    "fpr, tpr, _ = roc_curve(y_test, y_prob_test) \n",
-    "roc_auc = auc(fpr, tpr) \n",
-    "\n",
-    "plt.figure(figsize=(7,6)) \n",
-    "plt.plot(fpr, tpr, label=f\"AUC = {roc_auc:.3f}\") \n",
-    "plt.plot([0,1], [0,1], \"k--\") \n",
-    "plt.title(\"ROC - Testdaten\") \n",
-    "plt.legend() \n",
-    "plt.grid(True) \n",
-    "plt.show() \n",
-    "\n",
-    "# Precision-Recall \n",
-    "precision, recall, _ = precision_recall_curve(y_test, y_prob_test) \n",
-    "plt.figure(figsize=(7,6)) \n",
-    "plt.plot(recall, precision) \n",
-    "plt.title(\"Precision-Recall - Testdaten\") \n",
-    "plt.grid(True) \n",
-    "plt.show() \n",
-    "\n",
-    "# Metriken \n",
-    "print(\"Accuracy:\", accuracy_score(y_test, y_pred_test))\n",
-    "print(\"F1-Score:\", f1_score(y_test, y_pred_test)) \n",
-    "print(\"Balanced Accuracy:\", balanced_accuracy_score(y_test, y_pred_test)) \n",
-    "print(\"Precision:\", precision_score(y_test, y_pred_test)) \n",
-    "print(\"Recall:\", recall_score(y_test, y_pred_test)) \n",
-    "print(\"AUC:\", roc_auc)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/model_training/CNN/CNN_crossVal_faceAUs.ipynb
+++ b/model_training/CNN/CNN_crossVal_faceAUs.ipynb
--- a/model_training/CNN/CNN_crossVal_faceAUs_eyeFeatures.ipynb
+++ b/model_training/CNN/CNN_crossVal_faceAUs_eyeFeatures.ipynb
--- a/model_training/CNN/CNN_simple.ipynb
+++ b/model_training/CNN/CNN_simple.ipynb
--- a/model_training/CNN/deployment_pipeline.ipynb
+++ b/model_training/CNN/deployment_pipeline.ipynb
@ -1,308 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "d48f2e13",
-   "metadata": {},
-   "source": [
-    "Importe"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e34b838d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np \n",
-    "import pandas as pd \n",
-    "import joblib \n",
-    "import seaborn as sns \n",
-    "import matplotlib.pyplot as plt \n",
-    "\n",
-    "from sklearn.metrics import ( \n",
-    "    confusion_matrix, \n",
-    "    roc_curve, auc, \n",
-    "    precision_recall_curve, \n",
-    "    f1_score, \n",
-    "    balanced_accuracy_score \n",
-    ")\n",
-    " \n",
-    "import tensorflow as tf"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "324554b5",
-   "metadata": {},
-   "source": [
-    "Modell und Scaler laden"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4acc3d2f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = tf.keras.models.load_model(\"hybrid_fusion_model_V2.keras\") \n",
-    "scaler_au = joblib.load(\"scaler_au_V2.joblib\") \n",
-    "scaler_eye = joblib.load(\"scaler_eye_V2.joblib\")\n",
-    "\n",
-    "print(\"Modell & Scaler erfolgreich geladen.\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4271cbee",
-   "metadata": {},
-   "source": [
-    "Features laden"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8342ea10",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "au_columns = [...] \n",
-    "eye_columns = [...]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4a58b20c",
-   "metadata": {},
-   "source": [
-    "Preprocessing"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b683be47",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def preprocess_sample(df, au_columns, eye_columns, scaler_au, scaler_eye):\n",
-    "    # AUs\n",
-    "    X_au = df[au_columns].values\n",
-    "    X_au = scaler_au.transform(X_au).reshape(len(df), len(au_columns), 1)\n",
-    "\n",
-    "    # Eye\n",
-    "    X_eye = df[eye_columns].values\n",
-    "    X_eye = scaler_eye.transform(X_eye)\n",
-    "\n",
-    "    return X_au, X_eye"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9dc99a3d",
-   "metadata": {},
-   "source": [
-    "Predict-Funktion"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "00295aa6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def predict_workload(df, model, au_columns, eye_columns, scaler_au, scaler_eye):\n",
-    "    X_au, X_eye = preprocess_sample(df, au_columns, eye_columns, scaler_au, scaler_eye)\n",
-    "\n",
-    "    probs = model.predict([X_au, X_eye]).flatten()\n",
-    "    preds = (probs > 0.5).astype(int)\n",
-    "    \n",
-    "    return preds, probs"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5753516b",
-   "metadata": {},
-   "source": [
-    "Testdaten laden"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8875b0ee",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_data = pd.read_csv(\"test_data.csv\") # oder direkt aus Notebook 1 exportieren \n",
-    "\n",
-    "X_au_test = test_data[au_columns].values[..., np.newaxis] \n",
-    "X_eye_test = test_data[eye_columns].values \n",
-    "y_test = test_data[\"label\"].values \n",
-    "groups_test = test_data[\"subjectID\"].values \n",
-    "\n",
-    "X_au_test_scaled = scaler_au.transform(X_au_test.reshape(len(X_au_test), -1)).reshape(X_au_test.shape) \n",
-    "X_eye_test_scaled = scaler_eye.transform(X_eye_test)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "332a3a07",
-   "metadata": {},
-   "source": [
-    "Vorhersagen"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b5f58ece",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y_prob = model.predict([X_au_test_scaled, X_eye_test_scaled]).flatten() \n",
-    "y_pred = (y_prob > 0.5).astype(int)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "3bc5c66c",
-   "metadata": {},
-   "source": [
-    "Konfusionsmatrix"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "40648dd7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "cm = confusion_matrix(y_test, y_pred) \n",
-    "plt.figure(figsize=(6,5)) \n",
-    "sns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\", \n",
-    "            xticklabels=[\"Pred 0\", \"Pred 1\"], \n",
-    "            yticklabels=[\"True 0\", \"True 1\"]) \n",
-    "plt.title(\"Konfusionsmatrix - Testdaten\") \n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e79ad8a6",
-   "metadata": {},
-   "source": [
-    "ROC"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dd93f15c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fpr, tpr, _ = roc_curve(y_test, y_prob) \n",
-    "roc_auc = auc(fpr, tpr) \n",
-    "\n",
-    "plt.figure(figsize=(7,6)) \n",
-    "plt.plot(fpr, tpr, label=f\"AUC = {roc_auc:.3f}\") \n",
-    "plt.plot([0,1], [0,1], \"k--\") \n",
-    "plt.xlabel(\"False Positive Rate\") \n",
-    "plt.ylabel(\"True Positive Rate\") \n",
-    "plt.title(\"ROC‑Kurve – Testdaten\") \n",
-    "plt.legend() \n",
-    "plt.grid(True) \n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2eaaf2a0",
-   "metadata": {},
-   "source": [
-    "Precision-Recall"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "601e5dc9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "precision, recall, _ = precision_recall_curve(y_test, y_prob) \n",
-    "plt.figure(figsize=(7,6)) \n",
-    "plt.plot(recall, precision) \n",
-    "plt.xlabel(\"Recall\") \n",
-    "plt.ylabel(\"Precision\") \n",
-    "plt.title(\"Precision‑Recall‑Kurve – Testdaten\")\n",
-    "plt.grid(True) \n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "270af771",
-   "metadata": {},
-   "source": [
-    "Scores"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e2e7da5b",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"F1‑Score:\", f1_score(y_test, y_pred)) \n",
-    "print(\"Balanced Accuracy:\", balanced_accuracy_score(y_test, y_pred))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c6e22e1a",
-   "metadata": {},
-   "source": [
-    "Subject-Performance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "731aaf73",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_eval = pd.DataFrame({ \n",
-    "    \"subject\": groups_test, \n",
-    "    \"y_true\": y_test, \n",
-    "    \"y_pred\": y_pred \n",
-    "}) \n",
-    "\n",
-    "subject_perf = df_eval.groupby(\"subject\").apply( \n",
-    "    lambda x: balanced_accuracy_score(x[\"y_true\"], x[\"y_pred\"]) \n",
-    ") \n",
-    "\n",
-    "print(\"\\n=== Balanced Accuracy pro Proband ===\") \n",
-    "print(subject_perf.sort_values())"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/model_training/DeepSVDD/deepSVDD.ipynb
+++ b/model_training/DeepSVDD/deepSVDD.ipynb
@ -107,8 +107,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "dataset_path = Path(r\"data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")\n",
-    "# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/120s_combined_dataset_25hz.parquet\")"
+    "dataset_path = Path(r\"data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")"
   ]
  },
  {
@ -476,7 +475,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "normalizer_path=Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/normalizer_min_max_global.pkl')"
+    "normalizer_path=Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/normalizer.pkl')"
   ]
  },
  {
@ -495,7 +494,7 @@
    "print(len(eye_cols))\n",
    "all_signal_columns = face_au_cols+eye_cols\n",
    "print(len(all_signal_columns))\n",
-    "normalizer = fit_normalizer(train_df, all_signal_columns, method='minmax', scope='global')\n",
+    "normalizer = fit_normalizer(train_df, all_signal_columns, method='standard', scope='subject')\n",
    "save_normalizer(normalizer, normalizer_path )"
   ]
  },
@ -692,10 +691,10 @@
    "model = build_intermediate_fusion_autoencoder(\n",
    "    input_dim_mod1=len(face_au_cols),\n",
    "    input_dim_mod2=len(eye_cols),\n",
-    "    encoder_hidden_dim_mod1=12,   # individuell\n",
-    "    encoder_hidden_dim_mod2=8,   # individuell\n",
-    "    latent_dim=4,\n",
-    "    dropout_rate=0.7,             # einstellbar\n",
+    "    encoder_hidden_dim_mod1=15,   # individuell\n",
+    "    encoder_hidden_dim_mod2=10,   # individuell\n",
+    "    latent_dim=8,\n",
+    "    dropout_rate=0.3,             # einstellbar\n",
    "    neg_slope=0.1,\n",
    "    weight_decay=1e-3\n",
    ")\n",
@ -709,7 +708,7 @@
    "        \"recon_modality_1\": 1.0,\n",
    "        \"recon_modality_2\": 1.0,\n",
    "    },\n",
-    "    optimizer=tf.keras.optimizers.Adam(1e-3)\n",
+    "    optimizer=tf.keras.optimizers.Adam(1e-2)\n",
    "    \n",
    ")\n",
    "\n",
@ -740,7 +739,7 @@
    "        \"recon_modality_1\": 1.0,\n",
    "        \"recon_modality_2\": 1.0,\n",
    "    },\n",
-    "    optimizer=tf.keras.optimizers.Adam(1e-4),\n",
+    "    optimizer=tf.keras.optimizers.Adam(1e-5),\n",
    ")\n",
    "model.fit(\n",
    "    x=[X_face, X_eye],\n",
@ -780,7 +779,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "encoder_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/encoder_8_deep.keras')\n",
+    "encoder_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/encoder_6_deep.keras')\n",
    "encoder.save(encoder_save_path)"
   ]
  },
@ -944,7 +943,7 @@
    "  return get_radius_from_arrays(nu, X_face, X_eye)\n",
    "\n",
    "\n",
-    "nu = 0.25\n",
+    "nu = 0.05\n",
    "\n",
    "train_dataset = tf.data.Dataset.from_tensor_slices((X_face, X_eye)).shuffle(64).batch(64)\n",
    "# train_dataset = tf.data.Dataset.from_tensor_slices((X_face, X_eye))\n",
@ -1019,7 +1018,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "deep_svdd_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/deep_svdd_06.keras')\n",
+    "deep_svdd_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/deep_svdd_05.keras')\n",
    "deep_svdd_net.save(deep_svdd_save_path)"
   ]
  },
@ -1076,18 +1075,6 @@
    "test_predictions = (test_scores > 0).astype(int)\n"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "575dddcf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "normal_acc = np.mean(test_predictions[y_test == 0] == 0)\n",
-    "anomaly_acc = np.mean(test_predictions[y_test == 1] == 1)\n",
-    "print(f'Accuracy on Test set: {accuracy_score(y_test, test_predictions)}')"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/model_training/VAE_SVM/vaesvm.ipynb
+++ b/model_training/VAE_SVM/vaesvm.ipynb
@ -220,637 +220,14 @@
   "outputs": [],
   "source": [
    "# SET\n",
-    "threshold_mad = 5\n",
+    "threshold_mad = 100\n",
    "column_praefix ='AU'\n",
    "\n",
    "au_columns = [col for col in df.columns if col.startswith(column_praefix)]\n",
-    "cleaned_df = mad_outlier_removal.mad_outlier_removal(df,columns=au_columns, threshold=threshold_mad)\n",
+    "cleaned_df = mad_outlier_removal(df,columns=au_columns, threshold=threshold_mad)\n",
    "print(cleaned_df.shape)\n",
    "print(df.shape)"
   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9a6c1732",
-   "metadata": {},
-   "source": [
-    "#### TO DO\n",
-    "    * pipeline aus Autoencoder und SVM\n",
-    "    * group k fold\n",
-    "    * AE überpüfen, loss dokumentieren"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "877309d9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "### Variational Autoencoder with Classifier Head\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import tensorflow as tf\n",
-    "from tensorflow import keras\n",
-    "from tensorflow.keras import layers, Model\n",
-    "from sklearn.model_selection import GroupKFold\n",
-    "from sklearn.preprocessing import StandardScaler\n",
-    "from sklearn.metrics import (\n",
-    "    accuracy_score, precision_score, recall_score, f1_score, \n",
-    "    roc_auc_score, confusion_matrix, classification_report\n",
-    ")\n",
-    "import matplotlib.pyplot as plt\n",
-    "from collections import defaultdict\n",
-    "\n",
-    "# ============================================================================\n",
-    "# 1. CREATE LABELS\n",
-    "# ============================================================================\n",
-    "\n",
-    "# Low workload: baseline + n-back level 1,4\n",
-    "low_all = cleaned_df[\n",
-    "    ((cleaned_df[\"PHASE\"] == \"baseline\") |\n",
-    "     ((cleaned_df[\"STUDY\"] == \"n-back\") & (cleaned_df[\"PHASE\"] != \"baseline\") & (cleaned_df[\"LEVEL\"].isin([1,4]))))\n",
-    "].copy()\n",
-    "low_all['label'] = 0\n",
-    "print(f\"Low workload samples: {low_all.shape[0]}\")\n",
-    "\n",
-    "# High workload n-back: level 2,3,5,6\n",
-    "high_nback = cleaned_df[\n",
-    "    (cleaned_df[\"STUDY\"]==\"n-back\") &\n",
-    "    (cleaned_df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n",
-    "    (cleaned_df[\"PHASE\"].isin([\"train\", \"test\"]))\n",
-    "].copy()\n",
-    "high_nback['label'] = 1\n",
-    "print(f\"High n-back samples: {high_nback.shape[0]}\")\n",
-    "\n",
-    "# High workload k-drive\n",
-    "high_kdrive = cleaned_df[\n",
-    "    (cleaned_df[\"STUDY\"] == \"k-drive\") & (cleaned_df[\"PHASE\"] != \"baseline\")\n",
-    "].copy()\n",
-    "high_kdrive['label'] = 1\n",
-    "print(f\"High k-drive samples: {high_kdrive.shape[0]}\")\n",
-    "\n",
-    "# Combine all high workload\n",
-    "high_all = pd.concat([high_nback, high_kdrive])\n",
-    "print(f\"Total high workload samples: {high_all.shape[0]}\")\n",
-    "\n",
-    "# Complete labeled dataset\n",
-    "labeled_df = pd.concat([low_all, high_all]).reset_index(drop=True)\n",
-    "print(f\"\\nTotal labeled samples: {labeled_df.shape[0]}\")\n",
-    "print(f\"Class distribution:\\n{labeled_df['label'].value_counts()}\")\n",
-    "\n",
-    "# ============================================================================\n",
-    "# 2. TRAIN/TEST SPLIT BY SUBJECTS\n",
-    "# ============================================================================\n",
-    "\n",
-    "train_df = labeled_df[labeled_df['subjectID'].isin(training_subjects)].copy()\n",
-    "test_df = labeled_df[labeled_df['subjectID'].isin(test_subjects)].copy()\n",
-    "\n",
-    "print(f\"\\nTraining subjects: {training_subjects}\")\n",
-    "print(f\"Test subjects: {test_subjects}\")\n",
-    "print(f\"Train samples: {train_df.shape[0]}, Test samples: {test_df.shape[0]}\")\n",
-    "\n",
-    "# Extract features and labels\n",
-    "au_columns = [col for col in labeled_df.columns if col.startswith('AU')]\n",
-    "print(f\"\\nUsing {len(au_columns)} AU features: {au_columns}\")\n",
-    "\n",
-    "X_train = train_df[au_columns].values\n",
-    "y_train = train_df['label'].values\n",
-    "groups_train = train_df['subjectID'].values\n",
-    "\n",
-    "X_test = test_df[au_columns].values\n",
-    "y_test = test_df['label'].values\n",
-    "\n",
-    "# Normalize features\n",
-    "scaler = StandardScaler()\n",
-    "X_train_scaled = scaler.fit_transform(X_train)\n",
-    "X_test_scaled = scaler.transform(X_test)\n",
-    "\n",
-    "print(f\"\\nTrain class distribution: {np.bincount(y_train)}\")\n",
-    "print(f\"Test class distribution: {np.bincount(y_test)}\")\n",
-    "\n",
-    "# ============================================================================\n",
-    "# 3. VAE WITH CLASSIFIER HEAD MODEL\n",
-    "# ============================================================================\n",
-    "\n",
-    "class Sampling(layers.Layer):\n",
-    "    \"\"\"Reparameterization trick for VAE\"\"\"\n",
-    "    def call(self, inputs):\n",
-    "        z_mean, z_log_var = inputs\n",
-    "        batch = tf.shape(z_mean)[0]\n",
-    "        dim = tf.shape(z_mean)[1]\n",
-    "        epsilon = tf.random.normal(shape=(batch, dim))\n",
-    "        return z_mean + tf.exp(0.5 * z_log_var) * epsilon\n",
-    "\n",
-    "def build_vae_classifier(input_dim, latent_dim, encoder_dims=[32, 16], \n",
-    "                         decoder_dims=[16, 32], classifier_dims=[16]):\n",
-    "    \"\"\"\n",
-    "    Build VAE with classifier head\n",
-    "    \n",
-    "    Args:\n",
-    "        input_dim: Number of input features (20 AUs)\n",
-    "        latent_dim: Dimension of latent space (2-5)\n",
-    "        encoder_dims: Hidden layer sizes for encoder\n",
-    "        decoder_dims: Hidden layer sizes for decoder\n",
-    "        classifier_dims: Hidden layer sizes for classifier\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    # ---- ENCODER ----\n",
-    "    encoder_inputs = keras.Input(shape=(input_dim,), name='encoder_input')\n",
-    "    x = encoder_inputs\n",
-    "    \n",
-    "    for i, dim in enumerate(encoder_dims):\n",
-    "        x = layers.Dense(dim, activation='relu', name=f'encoder_dense_{i}')(x)\n",
-    "        x = layers.BatchNormalization(name=f'encoder_bn_{i}')(x)\n",
-    "        x = layers.Dropout(0.2, name=f'encoder_dropout_{i}')(x)\n",
-    "    \n",
-    "    z_mean = layers.Dense(latent_dim, name='z_mean')(x)\n",
-    "    z_log_var = layers.Dense(latent_dim, name='z_log_var')(x)\n",
-    "    z = Sampling()([z_mean, z_log_var])\n",
-    "    \n",
-    "    encoder = Model(encoder_inputs, [z_mean, z_log_var, z], name='encoder')\n",
-    "    \n",
-    "    # ---- DECODER ----\n",
-    "    latent_inputs = keras.Input(shape=(latent_dim,), name='latent_input')\n",
-    "    x = latent_inputs\n",
-    "    \n",
-    "    for i, dim in enumerate(decoder_dims):\n",
-    "        x = layers.Dense(dim, activation='relu', name=f'decoder_dense_{i}')(x)\n",
-    "        x = layers.BatchNormalization(name=f'decoder_bn_{i}')(x)\n",
-    "    \n",
-    "    decoder_outputs = layers.Dense(input_dim, activation='linear', name='decoder_output')(x)\n",
-    "    decoder = Model(latent_inputs, decoder_outputs, name='decoder')\n",
-    "    \n",
-    "    # ---- CLASSIFIER HEAD ----\n",
-    "    x = latent_inputs\n",
-    "    for i, dim in enumerate(classifier_dims):\n",
-    "        x = layers.Dense(dim, activation='relu', name=f'classifier_dense_{i}')(x)\n",
-    "        x = layers.Dropout(0.3, name=f'classifier_dropout_{i}')(x)\n",
-    "    \n",
-    "    classifier_output = layers.Dense(1, activation='sigmoid', name='classifier_output')(x)\n",
-    "    classifier = Model(latent_inputs, classifier_output, name='classifier')\n",
-    "    \n",
-    "    # ---- FULL MODEL ----\n",
-    "    inputs = keras.Input(shape=(input_dim,), name='vae_input')\n",
-    "    z_mean, z_log_var, z = encoder(inputs)\n",
-    "    reconstructed = decoder(z)\n",
-    "    classification = classifier(z)\n",
-    "    \n",
-    "    model = Model(inputs, [reconstructed, classification], name='vae_classifier')\n",
-    "    \n",
-    "    return model, encoder, decoder, classifier\n",
-    "\n",
-    "# ============================================================================\n",
-    "# 4. CUSTOM TRAINING LOOP WITH COMBINED LOSS\n",
-    "# ============================================================================\n",
-    "\n",
-    "class VAEClassifier(keras.Model):\n",
-    "    def __init__(self, encoder, decoder, classifier, **kwargs):\n",
-    "        super().__init__(**kwargs)\n",
-    "        self.encoder = encoder\n",
-    "        self.decoder = decoder\n",
-    "        self.classifier = classifier\n",
-    "        self.total_loss_tracker = keras.metrics.Mean(name=\"total_loss\")\n",
-    "        self.reconstruction_loss_tracker = keras.metrics.Mean(name=\"reconstruction_loss\")\n",
-    "        self.kl_loss_tracker = keras.metrics.Mean(name=\"kl_loss\")\n",
-    "        self.classification_loss_tracker = keras.metrics.Mean(name=\"classification_loss\")\n",
-    "        self.accuracy_tracker = keras.metrics.BinaryAccuracy(name=\"accuracy\")\n",
-    "        \n",
-    "    @property\n",
-    "    def metrics(self):\n",
-    "        return [\n",
-    "            self.total_loss_tracker,\n",
-    "            self.reconstruction_loss_tracker,\n",
-    "            self.kl_loss_tracker,\n",
-    "            self.classification_loss_tracker,\n",
-    "            self.accuracy_tracker,\n",
-    "        ]\n",
-    "    \n",
-    "    def train_step(self, data):\n",
-    "        x, y = data\n",
-    "        \n",
-    "        with tf.GradientTape() as tape:\n",
-    "            # Forward pass\n",
-    "            z_mean, z_log_var, z = self.encoder(x, training=True)\n",
-    "            reconstruction = self.decoder(z, training=True)\n",
-    "            classification = self.classifier(z, training=True)\n",
-    "            \n",
-    "            # Reconstruction loss (MSE)\n",
-    "            reconstruction_loss = tf.reduce_mean(\n",
-    "            keras.losses.mse(x, reconstruction))\n",
-    "            \n",
-    "            # KL divergence loss\n",
-    "            kl_loss = -0.5 * tf.reduce_mean(\n",
-    "                tf.reduce_sum(\n",
-    "                    1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var),\n",
-    "                    axis=1\n",
-    "                )\n",
-    "            )\n",
-    "            \n",
-    "            # Classification loss (binary crossentropy)\n",
-    "            # Classification loss (binary crossentropy)\n",
-    "            classification_loss = tf.reduce_mean(\n",
-    "                keras.losses.binary_crossentropy(tf.expand_dims(y, -1), classification)\n",
-    "            )\n",
-    "            \n",
-    "            # Combined loss with weights\n",
-    "            total_loss = reconstruction_loss + kl_loss + classification_loss\n",
-    "        \n",
-    "        # Backpropagation\n",
-    "        grads = tape.gradient(total_loss, self.trainable_weights)\n",
-    "        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))\n",
-    "        \n",
-    "        # Update metrics\n",
-    "        self.total_loss_tracker.update_state(total_loss)\n",
-    "        self.reconstruction_loss_tracker.update_state(reconstruction_loss)\n",
-    "        self.kl_loss_tracker.update_state(kl_loss)\n",
-    "        self.classification_loss_tracker.update_state(classification_loss)\n",
-    "        self.accuracy_tracker.update_state(y, classification)\n",
-    "        \n",
-    "        return {\n",
-    "            \"total_loss\": self.total_loss_tracker.result(),\n",
-    "            \"reconstruction_loss\": self.reconstruction_loss_tracker.result(),\n",
-    "            \"kl_loss\": self.kl_loss_tracker.result(),\n",
-    "            \"classification_loss\": self.classification_loss_tracker.result(),\n",
-    "            \"accuracy\": self.accuracy_tracker.result(),\n",
-    "        }\n",
-    "    \n",
-    "    def test_step(self, data):\n",
-    "        x, y = data\n",
-    "        \n",
-    "        z_mean, z_log_var, z = self.encoder(x, training=False)\n",
-    "        reconstruction = self.decoder(z, training=False)\n",
-    "        classification = self.classifier(z, training=False)\n",
-    "        \n",
-    "        # Reconstruction loss (MSE)\n",
-    "        reconstruction_loss = tf.reduce_mean(\n",
-    "            keras.losses.mse(x, reconstruction))\n",
-    "        kl_loss = -0.5 * tf.reduce_mean(\n",
-    "            tf.reduce_sum(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=1)\n",
-    "        )\n",
-    "        # Classification loss (binary crossentropy)\n",
-    "        classification_loss = tf.reduce_mean(\n",
-    "            keras.losses.binary_crossentropy(tf.expand_dims(y, -1), classification)\n",
-    "        )\n",
-    "        total_loss = reconstruction_loss + kl_loss + classification_loss\n",
-    "        \n",
-    "        self.total_loss_tracker.update_state(total_loss)\n",
-    "        self.reconstruction_loss_tracker.update_state(reconstruction_loss)\n",
-    "        self.kl_loss_tracker.update_state(kl_loss)\n",
-    "        self.classification_loss_tracker.update_state(classification_loss)\n",
-    "        self.accuracy_tracker.update_state(y, classification)\n",
-    "        \n",
-    "        return {\n",
-    "            \"total_loss\": self.total_loss_tracker.result(),\n",
-    "            \"reconstruction_loss\": self.reconstruction_loss_tracker.result(),\n",
-    "            \"kl_loss\": self.kl_loss_tracker.result(),\n",
-    "            \"classification_loss\": self.classification_loss_tracker.result(),\n",
-    "            \"accuracy\": self.accuracy_tracker.result(),\n",
-    "        }\n",
-    "\n",
-    "# ============================================================================\n",
-    "# 5. GROUP K-FOLD CROSS-VALIDATION WITH GRID SEARCH\n",
-    "# ============================================================================\n",
-    "\n",
-    "# Hyperparameter grid\n",
-    "param_grid = {\n",
-    "    'latent_dim': [2, 5],\n",
-    "    'encoder_dims': [[32, 16], [64, 32]],\n",
-    "    'learning_rate': [0.001, 0.005],\n",
-    "    'batch_size': [32, 64],\n",
-    "}\n",
-    "\n",
-    "# Generate all combinations\n",
-    "from itertools import product\n",
-    "keys = param_grid.keys()\n",
-    "values = param_grid.values()\n",
-    "param_combinations = [dict(zip(keys, v)) for v in product(*values)]\n",
-    "\n",
-    "print(f\"\\nTotal hyperparameter combinations: {len(param_combinations)}\")\n",
-    "\n",
-    "# Group K-Fold setup\n",
-    "n_splits = 5\n",
-    "gkf = GroupKFold(n_splits=n_splits)\n",
-    "\n",
-    "# Store results\n",
-    "cv_results = []\n",
-    "\n",
-    "# Grid search with cross-validation\n",
-    "for idx, params in enumerate(param_combinations):\n",
-    "    print(f\"\\n{'='*80}\")\n",
-    "    print(f\"Testing combination {idx+1}/{len(param_combinations)}: {params}\")\n",
-    "    print(f\"{'='*80}\")\n",
-    "    \n",
-    "    fold_results = []\n",
-    "    \n",
-    "    for fold, (train_idx, val_idx) in enumerate(gkf.split(X_train_scaled, y_train, groups_train)):\n",
-    "        print(f\"\\nFold {fold+1}/{n_splits}\")\n",
-    "        \n",
-    "        X_fold_train, X_fold_val = X_train_scaled[train_idx], X_train_scaled[val_idx]\n",
-    "        y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]\n",
-    "        \n",
-    "        # Build model\n",
-    "        model, encoder, decoder, classifier = build_vae_classifier(\n",
-    "            input_dim=len(au_columns),\n",
-    "            latent_dim=params['latent_dim'],\n",
-    "            encoder_dims=params['encoder_dims'],\n",
-    "            decoder_dims=list(reversed(params['encoder_dims'])),\n",
-    "            classifier_dims=[16]\n",
-    "        )\n",
-    "        \n",
-    "        vae_classifier = VAEClassifier(encoder, decoder, classifier)\n",
-    "        vae_classifier.compile(optimizer=keras.optimizers.Adam(params['learning_rate']))\n",
-    "        \n",
-    "        # Early stopping\n",
-    "        early_stop = keras.callbacks.EarlyStopping(\n",
-    "            monitor='val_total_loss',\n",
-    "            patience=10,\n",
-    "            restore_best_weights=True,\n",
-    "            mode='min'\n",
-    "        )\n",
-    "        \n",
-    "        # Train\n",
-    "        history = vae_classifier.fit(\n",
-    "            X_fold_train, y_fold_train,\n",
-    "            validation_data=(X_fold_val, y_fold_val),\n",
-    "            epochs=60,\n",
-    "            batch_size=params['batch_size'],\n",
-    "            callbacks=[early_stop],\n",
-    "            verbose=0\n",
-    "        )\n",
-    "        \n",
-    "        # Evaluate on validation fold\n",
-    "        z_mean_val, _, _ = encoder.predict(X_fold_val, verbose=0)\n",
-    "        y_pred_proba = classifier.predict(z_mean_val, verbose=0).flatten()\n",
-    "        y_pred = (y_pred_proba > 0.5).astype(int)\n",
-    "        \n",
-    "        fold_metrics = {\n",
-    "            'accuracy': accuracy_score(y_fold_val, y_pred),\n",
-    "            'precision': precision_score(y_fold_val, y_pred, zero_division=0),\n",
-    "            'recall': recall_score(y_fold_val, y_pred, zero_division=0),\n",
-    "            'f1': f1_score(y_fold_val, y_pred, zero_division=0),\n",
-    "            'roc_auc': roc_auc_score(y_fold_val, y_pred_proba),\n",
-    "            'final_recon_loss': history.history['val_reconstruction_loss'][-1],\n",
-    "            'final_kl_loss': history.history['val_kl_loss'][-1],\n",
-    "            'final_class_loss': history.history['val_classification_loss'][-1],\n",
-    "        }\n",
-    "        \n",
-    "        fold_results.append(fold_metrics)\n",
-    "        print(f\"  Accuracy: {fold_metrics['accuracy']:.4f}, F1: {fold_metrics['f1']:.4f}, AUC: {fold_metrics['roc_auc']:.4f}\")\n",
-    "        \n",
-    "        # Clear session to free memory\n",
-    "        keras.backend.clear_session()\n",
-    "    \n",
-    "    # Average across folds\n",
-    "    avg_results = {\n",
-    "        'params': params,\n",
-    "        'mean_accuracy': np.mean([r['accuracy'] for r in fold_results]),\n",
-    "        'std_accuracy': np.std([r['accuracy'] for r in fold_results]),\n",
-    "        'mean_f1': np.mean([r['f1'] for r in fold_results]),\n",
-    "        'std_f1': np.std([r['f1'] for r in fold_results]),\n",
-    "        'mean_roc_auc': np.mean([r['roc_auc'] for r in fold_results]),\n",
-    "        'std_roc_auc': np.std([r['roc_auc'] for r in fold_results]),\n",
-    "        'mean_recon_loss': np.mean([r['final_recon_loss'] for r in fold_results]),\n",
-    "        'mean_kl_loss': np.mean([r['final_kl_loss'] for r in fold_results]),\n",
-    "        'mean_class_loss': np.mean([r['final_class_loss'] for r in fold_results]),\n",
-    "        'fold_results': fold_results\n",
-    "    }\n",
-    "    \n",
-    "    cv_results.append(avg_results)\n",
-    "    \n",
-    "    print(f\"\\nMean CV Accuracy: {avg_results['mean_accuracy']:.4f} ± {avg_results['std_accuracy']:.4f}\")\n",
-    "    print(f\"Mean CV F1: {avg_results['mean_f1']:.4f} ± {avg_results['std_f1']:.4f}\")\n",
-    "    print(f\"Mean CV AUC: {avg_results['mean_roc_auc']:.4f} ± {avg_results['std_roc_auc']:.4f}\")\n",
-    "\n",
-    "# ============================================================================\n",
-    "# 6. SELECT BEST MODEL AND EVALUATE ON TEST SET\n",
-    "# ============================================================================\n",
-    "\n",
-    "# Find best hyperparameters based on mean F1 score\n",
-    "best_idx = np.argmax([r['mean_f1'] for r in cv_results])\n",
-    "best_params = cv_results[best_idx]['params']\n",
-    "\n",
-    "print(f\"\\n{'='*80}\")\n",
-    "print(\"BEST HYPERPARAMETERS (based on CV F1 score):\")\n",
-    "print(f\"{'='*80}\")\n",
-    "for key, value in best_params.items():\n",
-    "    print(f\"{key}: {value}\")\n",
-    "print(f\"\\nCV Performance:\")\n",
-    "print(f\"  Accuracy: {cv_results[best_idx]['mean_accuracy']:.4f} ± {cv_results[best_idx]['std_accuracy']:.4f}\")\n",
-    "print(f\"  F1 Score: {cv_results[best_idx]['mean_f1']:.4f} ± {cv_results[best_idx]['std_f1']:.4f}\")\n",
-    "print(f\"  ROC-AUC: {cv_results[best_idx]['mean_roc_auc']:.4f} ± {cv_results[best_idx]['std_roc_auc']:.4f}\")\n",
-    "\n",
-    "# Train final model on all training data\n",
-    "print(f\"\\n{'='*80}\")\n",
-    "print(\"TRAINING FINAL MODEL ON ALL TRAINING DATA\")\n",
-    "print(f\"{'='*80}\")\n",
-    "\n",
-    "final_model, final_encoder, final_decoder, final_classifier = build_vae_classifier(\n",
-    "    input_dim=len(au_columns),\n",
-    "    latent_dim=best_params['latent_dim'],\n",
-    "    encoder_dims=best_params['encoder_dims'],\n",
-    "    decoder_dims=list(reversed(best_params['encoder_dims'])),\n",
-    "    classifier_dims=[16]\n",
-    ")\n",
-    "\n",
-    "final_vae_classifier = VAEClassifier(final_encoder, final_decoder, final_classifier)\n",
-    "final_vae_classifier.compile(optimizer=keras.optimizers.Adam(best_params['learning_rate']))\n",
-    "\n",
-    "final_history = final_vae_classifier.fit(\n",
-    "    X_train_scaled, y_train,\n",
-    "    validation_split=0.2,\n",
-    "    epochs=100,\n",
-    "    batch_size=best_params['batch_size'],\n",
-    "    callbacks=[keras.callbacks.EarlyStopping(monitor='val_total_loss', patience=15, restore_best_weights=True, mode='min')],\n",
-    "    verbose=1\n",
-    ")\n",
-    "\n",
-    "# Evaluate on held-out test set\n",
-    "print(f\"\\n{'='*80}\")\n",
-    "print(\"EVALUATION ON HELD-OUT TEST SET\")\n",
-    "print(f\"{'='*80}\")\n",
-    "\n",
-    "z_mean_test, _, _ = final_encoder.predict(X_test_scaled, verbose=0)\n",
-    "y_test_pred_proba = final_classifier.predict(z_mean_test, verbose=0).flatten()\n",
-    "y_test_pred = (y_test_pred_proba > 0.5).astype(int)\n",
-    "\n",
-    "test_metrics = {\n",
-    "    'accuracy': accuracy_score(y_test, y_test_pred),\n",
-    "    'precision': precision_score(y_test, y_test_pred),\n",
-    "    'recall': recall_score(y_test, y_test_pred),\n",
-    "    'f1': f1_score(y_test, y_test_pred),\n",
-    "    'roc_auc': roc_auc_score(y_test, y_test_pred_proba),\n",
-    "}\n",
-    "\n",
-    "print(\"\\nTest Set Performance:\")\n",
-    "for metric, value in test_metrics.items():\n",
-    "    print(f\"  {metric.capitalize()}: {value:.4f}\")\n",
-    "\n",
-    "print(\"\\nConfusion Matrix:\")\n",
-    "print(confusion_matrix(y_test, y_test_pred))\n",
-    "\n",
-    "print(\"\\nClassification Report:\")\n",
-    "print(classification_report(y_test, y_test_pred, target_names=['Low Workload', 'High Workload']))\n",
-    "\n",
-    "# ============================================================================\n",
-    "# 7. VISUALIZATION\n",
-    "# ============================================================================\n",
-    "\n",
-    "# Plot training history\n",
-    "fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n",
-    "\n",
-    "axes[0, 0].plot(final_history.history['reconstruction_loss'], label='Train')\n",
-    "axes[0, 0].plot(final_history.history['val_reconstruction_loss'], label='Val')\n",
-    "axes[0, 0].set_title('Reconstruction Loss')\n",
-    "axes[0, 0].set_xlabel('Epoch')\n",
-    "axes[0, 0].set_ylabel('Loss')\n",
-    "axes[0, 0].legend()\n",
-    "axes[0, 0].grid(True)\n",
-    "\n",
-    "axes[0, 1].plot(final_history.history['kl_loss'], label='Train')\n",
-    "axes[0, 1].plot(final_history.history['val_kl_loss'], label='Val')\n",
-    "axes[0, 1].set_title('KL Divergence Loss')\n",
-    "axes[0, 1].set_xlabel('Epoch')\n",
-    "axes[0, 1].set_ylabel('Loss')\n",
-    "axes[0, 1].legend()\n",
-    "axes[0, 1].grid(True)\n",
-    "\n",
-    "axes[1, 0].plot(final_history.history['classification_loss'], label='Train')\n",
-    "axes[1, 0].plot(final_history.history['val_classification_loss'], label='Val')\n",
-    "axes[1, 0].set_title('Classification Loss')\n",
-    "axes[1, 0].set_xlabel('Epoch')\n",
-    "axes[1, 0].set_ylabel('Loss')\n",
-    "axes[1, 0].legend()\n",
-    "axes[1, 0].grid(True)\n",
-    "\n",
-    "axes[1, 1].plot(final_history.history['accuracy'], label='Train')\n",
-    "axes[1, 1].plot(final_history.history['val_accuracy'], label='Val')\n",
-    "axes[1, 1].set_title('Classification Accuracy')\n",
-    "axes[1, 1].set_xlabel('Epoch')\n",
-    "axes[1, 1].set_ylabel('Accuracy')\n",
-    "axes[1, 1].legend()\n",
-    "axes[1, 1].grid(True)\n",
-    "\n",
-    "plt.tight_layout()\n",
-    "plt.show()\n",
-    "\n",
-    "# Visualize latent space (if 2D or 3D)\n",
-    "if best_params['latent_dim'] == 2:\n",
-    "    z_mean_train, _, _ = final_encoder.predict(X_train_scaled, verbose=0)\n",
-    "    \n",
-    "    plt.figure(figsize=(10, 8))\n",
-    "    scatter = plt.scatter(z_mean_train[:, 0], z_mean_train[:, 1], \n",
-    "                         c=y_train, cmap='RdYlBu', alpha=0.6, edgecolors='k')\n",
-    "    plt.colorbar(scatter, label='Workload (0=Low, 1=High)')\n",
-    "    plt.xlabel('Latent Dimension 1')\n",
-    "    plt.ylabel('Latent Dimension 2')\n",
-    "    plt.title('2D Latent Space Representation (Training Data)')\n",
-    "    plt.grid(True, alpha=0.3)\n",
-    "    plt.show()\n",
-    "    \n",
-    "    # Test set latent space\n",
-    "    plt.figure(figsize=(10, 8))\n",
-    "    scatter = plt.scatter(z_mean_test[:, 0], z_mean_test[:, 1], \n",
-    "                         c=y_test, cmap='RdYlBu', alpha=0.6, edgecolors='k')\n",
-    "    plt.colorbar(scatter, label='Workload (0=Low, 1=High)')\n",
-    "    plt.xlabel('Latent Dimension 1')\n",
-    "    plt.ylabel('Latent Dimension 2')\n",
-    "    plt.title('2D Latent Space Representation (Test Data)')\n",
-    "    plt.grid(True, alpha=0.3)\n",
-    "    plt.show()\n",
-    "\n",
-    "print(\"\\n\" + \"=\"*80)\n",
-    "print(\"TRAINING COMPLETE!\")\n",
-    "print(\"=\"*80)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "79bcfc58",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "### Save Trained VAE Classifier Model\n",
-    "from pathlib import Path\n",
-    "from datetime import datetime\n",
-    "\n",
-    "# Define save path\n",
-    "model_dir = Path(\"/home/jovyan/data-paulusjafahrsimulator-gpu/trained_models\")\n",
-    "model_dir.mkdir(parents=True, exist_ok=True)\n",
-    "\n",
-    "timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
-    "model_path = model_dir / f\"vae_classifier_{timestamp}.keras\"\n",
-    "\n",
-    "# Save the complete model\n",
-    "final_vae_classifier.save(model_path)\n",
-    "\n",
-    "print(f\"Model saved to: {model_path}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d700e517",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "30d8d100",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "### Plot Confusion Matrix for Final Model\n",
-    "from sklearn.metrics import ConfusionMatrixDisplay\n",
-    "x = Path(\"/home/jovyan/data-paulusjafahrsimulator-gpu/trained_models/vae_classifier_20251210_230121.keras\")\n",
-    "# Load the saved model\n",
-    "print(f\"Loading model from: {x}\")\n",
-    "# loaded_vae_classifier = tf.keras.models.load_model(x)\n",
-    "loaded_vae_classifier = final_vae_classifier\n",
-    "print(\"✓ Model loaded successfully!\")\n",
-    "\n",
-    "# Extract encoder and classifier from loaded model\n",
-    "loaded_encoder = loaded_vae_classifier.encoder\n",
-    "loaded_classifier = loaded_vae_classifier.classifier\n",
-    "\n",
-    "# Get predictions on test set\n",
-    "z_mean_test, _, _ = loaded_encoder.predict(X_test_scaled, verbose=0)\n",
-    "y_test_pred_proba = loaded_classifier.predict(z_mean_test, verbose=0).flatten()\n",
-    "y_test_pred = (y_test_pred_proba > 0.5).astype(int)\n",
-    "\n",
-    "# Create and plot confusion matrix\n",
-    "cm = confusion_matrix(y_test, y_test_pred)\n",
-    "disp = ConfusionMatrixDisplay(confusion_matrix=cm, \n",
-    "                               display_labels=['Low Workload', 'High Workload'])\n",
-    "\n",
-    "fig, ax = plt.subplots(figsize=(8, 6))\n",
-    "disp.plot(ax=ax, cmap='Blues', values_format='d')\n",
-    "plt.title('Confusion Matrix - Test Set (Loaded Model)')\n",
-    "plt.tight_layout()\n",
-    "plt.show()\n",
-    "\n",
-    "# Print metrics\n",
-    "print(f\"\\nTest Set Performance (Loaded Model):\")\n",
-    "print(f\"  Accuracy: {accuracy_score(y_test, y_test_pred):.4f}\")\n",
-    "print(f\"  Precision: {precision_score(y_test, y_test_pred):.4f}\")\n",
-    "print(f\"  Recall: {recall_score(y_test, y_test_pred):.4f}\")\n",
-    "print(f\"  F1 Score: {f1_score(y_test, y_test_pred):.4f}\")\n",
-    "print(f\"  ROC-AUC: {roc_auc_score(y_test, y_test_pred_proba):.4f}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e826a998",
-   "metadata": {},
-   "source": [
-    "TO DO\n",
-    "    * autoencoder langsam anfangen mit 19 schichten\n",
-    "    * dann AE und SVM mit hybridem training wie bei claude?!\n",
-    "    * dataset aus eyetracking verwenden?"
-   ]
  }
 ],
 "metadata": {
--- a/model_training/tools/scaler.py
+++ b/model_training/tools/scaler.py
@ -1,7 +1,5 @@
-import pickle
-from sklearn.preprocessing import StandardScaler, MinMaxScaler
-import numpy as np
-import os
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+import pandas as pd

 def fit_normalizer(train_data, au_columns, method='standard', scope='global'):
    """
@ -21,8 +19,9 @@ def fit_normalizer(train_data, au_columns, method='standard', scope='global'):
    Returns:
    --------
    dict
-        Dictionary containing fitted scalers and statistics for new subjects
+        Dictionary containing fitted scalers
    """
+    # Select scaler based on method
    if method == 'standard':
        Scaler = StandardScaler
    elif method == 'minmax':
@ -31,54 +30,19 @@ def fit_normalizer(train_data, au_columns, method='standard', scope='global'):
        raise ValueError("method must be 'standard' or 'minmax'")
    
    scalers = {}
+    
    if scope == 'subject':
        # Fit one scaler per subject
-        subject_stats = []
-        
        for subject in train_data['subjectID'].unique():
            subject_mask = train_data['subjectID'] == subject
            scaler = Scaler()
-            scaler.fit(train_data.loc[subject_mask, au_columns].values)
+            scaler.fit(train_data.loc[subject_mask, au_columns])
            scalers[subject] = scaler
-            
-            # Store statistics for averaging
-            if method == 'standard':
-                subject_stats.append({
-                    'mean': scaler.mean_,
-                    'std': scaler.scale_
-                })
-            elif method == 'minmax':
-                subject_stats.append({
-                    'min': scaler.data_min_,
-                    'max': scaler.data_max_
-                })
-        
-        # Calculate average statistics for new subjects
-        if method == 'standard':
-            avg_mean = np.mean([s['mean'] for s in subject_stats], axis=0)
-            avg_std = np.mean([s['std'] for s in subject_stats], axis=0)
-            fallback_scaler = StandardScaler()
-            fallback_scaler.mean_ = avg_mean
-            fallback_scaler.scale_ = avg_std
-            fallback_scaler.var_ = avg_std ** 2
-            fallback_scaler.n_features_in_ = len(au_columns)
-        elif method == 'minmax':
-            avg_min = np.mean([s['min'] for s in subject_stats], axis=0)
-            avg_max = np.mean([s['max'] for s in subject_stats], axis=0)
-            fallback_scaler = MinMaxScaler()
-            fallback_scaler.data_min_ = avg_min
-            fallback_scaler.data_max_ = avg_max
-            fallback_scaler.data_range_ = avg_max - avg_min
-            fallback_scaler.scale_ = 1.0 / fallback_scaler.data_range_
-            fallback_scaler.min_ = -avg_min * fallback_scaler.scale_
-            fallback_scaler.n_features_in_ = len(au_columns)
-        
-        scalers['_fallback'] = fallback_scaler
    
    elif scope == 'global':
        # Fit one scaler for all subjects
        scaler = Scaler()
-        scaler.fit(train_data[au_columns].values)
+        scaler.fit(train_data[au_columns])
        scalers['global'] = scaler
    
    else:
@ -86,7 +50,7 @@ def fit_normalizer(train_data, au_columns, method='standard', scope='global'):
    
    return {'scalers': scalers, 'method': method, 'scope': scope}

-def apply_normalizer(data, columns, normalizer_dict):
+def apply_normalizer(data, au_columns, normalizer_dict):
    """
    Apply fitted normalization scalers to data.
    
@ -107,70 +71,28 @@ def apply_normalizer(data, columns, normalizer_dict):
    normalized_data = data.copy()
    scalers = normalizer_dict['scalers']
    scope = normalizer_dict['scope']
-    normalized_data[columns] = normalized_data[columns].astype(np.float64)
-
+    
    if scope == 'subject':
        # Apply per-subject normalization
        for subject in data['subjectID'].unique():
            subject_mask = data['subjectID'] == subject
            
-            # Use the subject's scaler if available, otherwise use fallback
+            # Use the subject's scaler if available, otherwise use a fitted scaler from training
            if subject in scalers:
                scaler = scalers[subject]
            else:
-                # Use averaged scaler for new subjects
-                scaler = scalers['_fallback']
-                print(f"Info: Subject {subject} not in training data. Using averaged scaler from training subjects.")
+                # For new subjects not seen in training, use the first available scaler
+                # (This is a fallback - ideally all test subjects should be in training for subject-level normalization)
+                print(f"Warning: Subject {subject} not found in training data. Using fallback scaler.")
+                scaler = list(scalers.values())[0]
            
-            normalized_data.loc[subject_mask, columns] = scaler.transform(
-                data.loc[subject_mask, columns].values
+            normalized_data.loc[subject_mask, au_columns] = scaler.transform(
+                data.loc[subject_mask, au_columns]
            )
    
    elif scope == 'global':
        # Apply global normalization
        scaler = scalers['global']
-        normalized_data[columns] = scaler.transform(data[columns].values)
+        normalized_data[au_columns] = scaler.transform(data[au_columns])
    
-    return normalized_data
-
-
-
-def save_normalizer(normalizer_dict, filepath):
-    """
-    Save fitted normalizer to disk.
-
-    Parameters:
-    -----------
-    normalizer_dict : dict
-        Dictionary containing fitted scalers from fit_normalizer()
-    filepath : str
-        Path to save the normalizer (e.g., 'normalizer.pkl')
-    """
-    # Create directory if it does not exist
-    dirpath = os.path.dirname(filepath)
-    if dirpath:
-        os.makedirs(dirpath, exist_ok=True)
-
-    with open(filepath, 'wb') as f:
-        pickle.dump(normalizer_dict, f)
-
-    print(f"Normalizer saved to {filepath}")
-
-def load_normalizer(filepath):
-    """
-    Load fitted normalizer from disk.
-    
-    Parameters:
-    -----------
-    filepath : str
-        Path to the saved normalizer file
-        
-    Returns:
-    --------
-    dict
-        Dictionary containing fitted scalers
-    """
-    with open(filepath, 'rb') as f:
-        normalizer_dict = pickle.load(f)
-    print(f"Normalizer loaded from {filepath}")
-    return normalizer_dict
+    return normalized_data
--- a/predict_pipeline/check_python_version.py
+++ b/predict_pipeline/check_python_version.py
@ -0,0 +1,11 @@
+# from tools import db_helpers
+import sys
+
+
+def main():
+    print(sys.version)
+    # db_helpers.add_columns_to_table()    
+
+
+if __name__ == "__main__":
+    main()
--- a/predict_pipeline/config.yaml
+++ b/predict_pipeline/config.yaml
@ -0,0 +1,117 @@
+database:
+  path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\predict_pipeline\\database.sqlite"
+  table: feature_table
+  key: _Id
+
+model:
+  path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\files_for_testing\\xgb_model_3_groupK.joblib"
+
+scaler:
+  use_scaling: True
+  path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\predict_pipeline\\normalizer_min_max_global.pkl"
+
+mqtt:
+  enabled: true
+  host: "localhost"
+  port: 1883
+  topic: "ml/predictions"
+  client_id: "predictor-01"
+  qos: 1
+  retain: false
+  # username: ""
+  # password: ""
+  tls:
+    enabled: false
+    # ca_cert: ""
+    # client_cert: ""
+    # client_key: ""
+  publish_format:
+    result_key: prediction   # where to store the predicted value in payload
+    include_metadata: true   # e.g., timestamps, rowid, etc.
+
+sample:
+  columns:
+  - _Id
+  - start_time
+  - FACE_AU01_mean
+  - FACE_AU02_mean
+  - FACE_AU04_mean
+  - FACE_AU05_mean
+  - FACE_AU06_mean
+  - FACE_AU07_mean
+  - FACE_AU09_mean
+  - FACE_AU10_mean
+  - FACE_AU11_mean
+  - FACE_AU12_mean
+  - FACE_AU14_mean
+  - FACE_AU15_mean
+  - FACE_AU17_mean
+  - FACE_AU20_mean
+  - FACE_AU23_mean
+  - FACE_AU24_mean
+  - FACE_AU25_mean
+  - FACE_AU26_mean
+  - FACE_AU28_mean
+  - FACE_AU43_mean
+  - Fix_count_short_66_150
+  - Fix_count_medium_300_500
+  - Fix_count_long_gt_1000
+  - Fix_count_100
+  - Fix_mean_duration
+  - Fix_median_duration
+  - Sac_count
+  - Sac_mean_amp
+  - Sac_mean_dur
+  - Sac_median_dur
+  - Blink_count
+  - Blink_mean_dur
+  - Blink_median_dur
+  - Pupil_mean
+  - Pupil_IPA
+
+  fill_nan_with_median: true
+  discard_if_all_nan: true
+
+fallback: 
+- start_time: 0
+- FACE_AU01_mean: 0.5
+- FACE_AU02_mean: 0.5
+- FACE_AU04_mean: 0.5
+- FACE_AU05_mean: 0.5
+- FACE_AU06_mean: 0.5
+- FACE_AU07_mean: 0.5
+- FACE_AU09_mean: 0.5
+- FACE_AU10_mean: 0.5
+- FACE_AU11_mean: 0.5
+- FACE_AU12_mean: 0.5
+- FACE_AU14_mean: 0.5
+- FACE_AU15_mean: 0.5
+- FACE_AU17_mean: 0.5
+- FACE_AU20_mean: 0.5
+- FACE_AU23_mean: 0.5
+- FACE_AU24_mean: 0.5
+- FACE_AU25_mean: 0.5
+- FACE_AU26_mean: 0.5
+- FACE_AU28_mean: 0.5
+- FACE_AU43_mean: 0.5
+- Fix_count_short_66_150: 2
+- Fix_count_medium_300_500: 2
+- Fix_count_long_gt_1000: 2
+- Fix_count_100: 2
+- Fix_mean_duration: 100
+- Fix_median_duration: 100
+- Sac_count: 2
+- Sac_mean_amp: 2
+- Sac_mean_dur: 100
+- Sac_median_dur: 100
+- Blink_count: 2
+- Blink_mean_dur: 2
+- Blink_median_dur: 2
+- Pupil_mean: 2
+- Pupil_IPA: 2
+
+
+
+
+
+
--- a/predict_pipeline/feature_extraction.py
+++ b/predict_pipeline/feature_extraction.py
@ -0,0 +1,9 @@
+import sqlite3
+
+def main():
+
+    return 0
+
+
+if __name__ == "__main__":
+    main()
--- a/predict_pipeline/fill_db.ipynb
+++ b/predict_pipeline/fill_db.ipynb
@ -0,0 +1,211 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0d70a13f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')\n",
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "import db_helpers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce696366",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "database_path = Path(r\"/home/edgekit/MSY_FS/databases/rawdata.sqlite\")\n",
+    "parquet_path = Path(r\"/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/files_for_testing/both_mod_0000.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b1aa9398",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = pd.read_parquet(parquet_path)\n",
+    "dataset.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b183746e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24ed769d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "con, cursor = db_helpers.connect_db(database_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e604ed30",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_clean = dataset.drop(columns=['subjectID','rowID', 'STUDY', 'LEVEL', 'PHASE'])\n",
+    "df_first_100 = df_clean.head(200)\n",
+    "df_first_100 = df_first_100.reset_index(drop=True)\n",
+    "df_first_100.insert(0, '_Id', df_first_100.index + 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e77a812e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def pandas_to_sqlite_dtype(dtype):\n",
+    "    if pd.api.types.is_integer_dtype(dtype):\n",
+    "        return \"INTEGER\"\n",
+    "    if pd.api.types.is_float_dtype(dtype):\n",
+    "        return \"REAL\"\n",
+    "    if pd.api.types.is_bool_dtype(dtype):\n",
+    "        return \"INTEGER\"\n",
+    "    if pd.api.types.is_datetime64_any_dtype(dtype):\n",
+    "        return \"TEXT\"\n",
+    "    return \"TEXT\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0e8897b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "columns = {\n",
+    "    col: pandas_to_sqlite_dtype(dtype)\n",
+    "    for col, dtype in df_first_100.dtypes.items()\n",
+    "}\n",
+    "\n",
+    "constraints = {\n",
+    "    \"_Id\": [\"NOT NULL\"]\n",
+    "}\n",
+    "\n",
+    "primary_key = {\n",
+    "    \"pk_df_first_100\": [\"_Id\"]\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4ab57624",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sql = db_helpers.create_table(\n",
+    "    conn=con,\n",
+    "    cursor=cursor,\n",
+    "    table_name=\"rawdata\",\n",
+    "    columns=columns,\n",
+    "    constraints=constraints,\n",
+    "    primary_key=primary_key,\n",
+    "    commit=True\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "25096a7f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "columns_to_insert = {\n",
+    "    col: df_first_100[col].tolist()\n",
+    "    for col in df_first_100.columns\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7a5a3aa8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db_helpers.insert_rows_into_table(\n",
+    "    conn=con,\n",
+    "    cursor=cursor,\n",
+    "    table_name=\"rawdata\",\n",
+    "    columns=columns_to_insert,\n",
+    "    commit=True\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b56beae2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a = db_helpers.get_data_from_table(conn=con, table_name='rawdata',columns_list=['*'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a4a74a9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "a.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "da0f8737",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "db_helpers.disconnect_db(con, cursor)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "MSY_FS_env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/predict_pipeline/predict_sample.py
+++ b/predict_pipeline/predict_sample.py
@ -0,0 +1,253 @@
+# Imports
+import pandas as pd
+import json
+from pathlib import Path
+import numpy as np
+import sys
+import yaml
+import pickle
+sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')
+# sys.path.append(r"c:\\repo\\Fahrsimulator_MSY2526_AI\\tools")
+import db_helpers
+import joblib
+
+def _load_serialized(path: Path):
+    suffix = path.suffix.lower()
+    if suffix == ".pkl":
+        with path.open("rb") as f:
+            return pickle.load(f)
+    if suffix == ".joblib":
+        return joblib.load(path)
+    raise ValueError(f"Unsupported file format: {suffix}. Use .pkl or .joblib.")
+
+def getLastEntryFromSQLite(path, table_name, key="_Id"):
+    conn, cursor = db_helpers.connect_db(path)
+    try:
+        row_df = db_helpers.get_data_from_table(
+            conn=conn,
+            table_name=table_name,
+            order_by={key: "DESC"},
+            limit=1,
+        )
+    finally:
+        db_helpers.disconnect_db(conn, cursor, commit=False)
+
+    if row_df.empty:
+        return pd.Series(dtype="object")
+
+    return row_df.iloc[0]
+
+def callModel(sample, model_path):
+    if callable(sample):
+        raise TypeError(
+            f"Invalid sample type: got callable `{getattr(sample, '__name__', type(sample).__name__)}`. "
+            "Expected numpy array / pandas row."
+        )
+
+    model_path = Path(model_path)
+    if not model_path.is_absolute():
+        model_path = Path.cwd() / model_path
+    model_path = model_path.resolve()
+
+    suffix = model_path.suffix.lower()
+    if suffix in {".pkl", ".joblib"}:
+        model = _load_serialized(model_path)
+    # elif suffix == ".keras":
+       # import tensorflow as tf
+        # model = tf.keras.models.load_model(model_path)
+    # else:
+        # raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.")
+
+    x = np.asarray(sample, dtype=np.float32)
+    if x.ndim == 1:
+        x = x.reshape(1, -1)
+
+    if suffix == ".keras":
+        x_full = x  
+        # Future model (35 features): keep this call when your new model is active.
+        # prediction = model.predict(x_full[:, :35], verbose=0)
+        prediction = model.predict(x_full[:, :20], verbose=0)
+
+    else:
+        if hasattr(model, "predict"):
+            prediction = model.predict(x[:,:20])
+        elif callable(model):
+            prediction = model(x[:,:20])
+        else:
+            raise TypeError("Loaded model has no .predict(...) and is not callable.")
+
+    prediction = np.asarray(prediction)
+    if prediction.size == 1:
+        return prediction.item()
+    return prediction.squeeze()
+
+def buildMessage(valid, result: np.int32, config_file_path, sample=None):
+    with Path(config_file_path).open("r", encoding="utf-8") as f:
+        cfg = yaml.safe_load(f)
+
+    mqtt_cfg = cfg.get("mqtt", {})
+    result_key = mqtt_cfg.get("publish_format", {}).get("result_key", "prediction")
+
+    sample_id = None
+    if isinstance(sample, pd.Series):
+        sample_id = sample.get("_Id", sample.get("_id"))
+    elif isinstance(sample, dict):
+        sample_id = sample.get("_Id", sample.get("_id"))
+
+    message = {
+        "valid": bool(valid),
+        "_id": sample_id,
+        result_key: np.asarray(result).tolist() if isinstance(result, np.ndarray) else result,
+    }
+    return message
+
+def convert_int64(obj):
+    if isinstance(obj, np.int64):
+        return int(obj)
+    # If the object is a dictionary or list, recursively convert its values
+    elif isinstance(obj, dict):
+        return {key: convert_int64(value) for key, value in obj.items()}
+    elif isinstance(obj, list):
+        return [convert_int64(item) for item in obj]
+    return obj
+
+def sendMessage(config_file_path, message):
+    # Load the configuration
+    with Path(config_file_path).open("r", encoding="utf-8") as f:
+        cfg = yaml.safe_load(f)
+
+    # Get MQTT configuration
+    mqtt_cfg = cfg.get("mqtt", {})
+    topic = mqtt_cfg.get("topic", "ml/predictions")
+
+    # Convert message to ensure no np.int64 values remain
+    message = convert_int64(message)
+
+    # Serialize the message to JSON
+    payload = json.dumps(message, ensure_ascii=False)
+    print(payload)
+
+    # Later: publish via MQTT using config parameters above.
+    # Example (kept commented intentionally):
+    # import paho.mqtt.client as mqtt
+    # client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01"))
+    # if "username" in mqtt_cfg and mqtt_cfg.get("username"):
+    #     client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password"))
+    # client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60)
+    # client.publish(
+    #     topic=topic,
+    #     payload=payload,
+    #     qos=int(mqtt_cfg.get("qos", 1)),
+    #     retain=bool(mqtt_cfg.get("retain", False)),
+    # )
+    # client.disconnect()
+    return
+
+def replace_nan(sample, config_file_path: Path):
+    with config_file_path.open("r", encoding="utf-8") as f:
+        cfg = yaml.safe_load(f)
+
+    fallback_list = cfg.get("fallback", [])
+    fallback_map = {}
+    for item in fallback_list:
+        if isinstance(item, dict):
+            fallback_map.update(item)
+
+    if sample.empty:
+        return False, sample
+
+    nan_ratio = sample.isna().mean()
+    valid = nan_ratio <= 0.5
+
+    if valid and fallback_map:
+        sample = sample.fillna(value=fallback_map)
+
+
+    return valid, sample
+
+def sample_to_numpy(sample, drop_cols=("_Id", "start_time")):
+    if isinstance(sample, pd.Series):
+        sample = sample.drop(labels=list(drop_cols), errors="ignore")
+        return sample.to_numpy()
+
+    if isinstance(sample, pd.DataFrame):
+        sample = sample.drop(columns=list(drop_cols), errors="ignore")
+        return sample.to_numpy()
+
+    return np.asarray(sample)
+
+def scale_sample(sample, use_scaling=False, scaler_path=None):
+    if not use_scaling or scaler_path is None:
+        return sample
+    scaler_path = Path(scaler_path)
+    if not scaler_path.is_absolute():
+        scaler_path = Path.cwd() / scaler_path
+    scaler_path = scaler_path.resolve()
+    normalizer = _load_serialized(scaler_path)
+
+    # normalizer format from model_training/tools/scaler.py:
+    # {"scalers": {...}, "method": "...", "scope": "..."}
+    scalers = normalizer.get("scalers", {}) if isinstance(normalizer, dict) else {}
+    scope = normalizer.get("scope", "global") if isinstance(normalizer, dict) else "global"
+    if scope == "global":
+        scaler = scalers.get("global")
+    else:
+        scaler = scalers.get("global", next(iter(scalers.values()), None))
+
+    # Optional fallback if the stored object is already a raw scaler.
+    if scaler is None and hasattr(normalizer, "transform"):
+        scaler = normalizer
+    if scaler is None or not hasattr(scaler, "transform"):
+        return sample
+
+    df = sample.to_frame().T if isinstance(sample, pd.Series) else sample.copy()
+    feature_names = getattr(scaler, "feature_names_in_", None)
+    if feature_names is None:
+        return sample
+
+    # Keep columns not in the normalizer unchanged.
+    cols_to_scale = [c for c in df.columns if c in set(feature_names)]
+    if cols_to_scale:
+        df.loc[:, cols_to_scale] = scaler.transform(df.loc[:, cols_to_scale])
+
+    return df.iloc[0] if isinstance(sample, pd.Series) else df
+
+def main():
+    pd.set_option('future.no_silent_downcasting', True) # kann ggf raus
+
+    config_file_path = Path("/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/config.yaml")
+    with config_file_path.open("r", encoding="utf-8") as f:
+        cfg = yaml.safe_load(f)
+
+    database_path = cfg["database"]["path"]
+    table_name = cfg["database"]["table"]
+    row_key = cfg["database"]["key"]
+    
+
+    sample = getLastEntryFromSQLite(database_path, table_name, row_key)
+    valid, sample = replace_nan(sample, config_file_path=config_file_path)
+
+    if not valid:
+        print("Sample invalid: more than 50% NaN.")
+        message = buildMessage(valid, None, config_file_path, sample=sample)
+        sendMessage(config_file_path, message)
+        return
+    
+    model_path = cfg["model"]["path"]
+    scaler_path = cfg["scaler"]["path"]
+    use_scaling = cfg["scaler"]["use_scaling"]
+
+    sample = scale_sample(sample, use_scaling=use_scaling, scaler_path=scaler_path)
+    sample_np = sample_to_numpy(sample)
+
+    prediction = callModel(model_path=model_path, sample=sample_np)
+
+    message = buildMessage(valid, prediction, config_file_path, sample=sample)
+    sendMessage(config_file_path, message)
+
+
+if __name__ == "__main__":
+    main()
+
+
+
--- a/smallerenv.yaml
+++ b/smallerenv.yaml
@ -0,0 +1,78 @@
+# ============================================================
+# SMALLER ENVIRONMENT - Korrigiert & Erweitert
+# Für Fahrsimulator-Projekt mit ML & IoT
+# ============================================================
+
+name: smaller_env
+channels:
+  - conda-forge
+  - defaults
+
+dependencies:
+  # ====== PYTHON ======
+  - python=3.8  # Kompatibel mit Jetson Nano
+  
+  # ====== CORE DATA SCIENCE ======
+  - numpy=1.19.5
+  - pandas=1.3.5
+  - scipy=1.7.3
+  - scikit-learn=1.0.2  # sklearn ist ein Alias
+  
+  # ====== VISUALIZATION ======
+  
+  # ====== ML/DL SUPPORT ======
+  - h5py=3.6.0
+  - joblib=1.1.0
+  
+  # ====== VIDEO PROCESSING ======
+  - moviepy=1.0.3
+  
+  # ====== MACHINE LEARNING ======
+  - xgboost=1.5.2
+  
+  # ====== FILE FORMATS ======
+  - pyyaml  # yaml Modul
+  
+  # ====== IoT & COMMUNICATION (NEU) ======
+  - paho-mqtt=1.6.1  # MQTT Client
+  
+  # ====== DATABASE (NEU) ======
+  # sqlite3 ist bereits in Python eingebaut!
+  
+  # ====== UTILITIES ======
+  - tqdm=4.64.1  # Progress bars
+  - requests=2.28.1  # HTTP requests
+  
+  # ====== PIP PACKAGES ======
+  - pip
+  - pip:
+    # TensorFlow (wird separat für Jetson installiert)
+    # - tensorflow==2.7.0  # Jetson: via NVIDIA repo installieren
+    
+    # Eye-tracking Analysis
+    - pygazeanalyser==0.2.0
+
+    
+    # ML Detection (falls vorhanden auf PyPI)
+    # - detectors  # Prüfen ob verfügbar
+    # - feat  # Prüfen ob verfügbar
+    
+    # MQTT zusätzlich via pip falls conda Version Probleme macht
+    # - paho-mqtt==1.6.1
+
+# ============================================================
+# HINWEISE:
+# ============================================================
+
+#
+# 3. TENSORFLOW FÜR JETSON:
+#    Installiere nach Environment-Erstellung separat:
+#    pip3 install --extra-index-url https://developer.download.nvidia.com/compute/redist/jp/v46 tensorflow==2.7.0+nv22.1
+#
+# 4. SQLITE3:
+#    Ist bereits in Python eingebaut, keine Installation nötig!
+#    Import: import sqlite3
+#
+# 5. MQTT:
+#    paho-mqtt ist der Standard MQTT-Client für Python
+#    Broker-Empfehlungen: Mosquitto, HiveMQ, EMQX
--- a/dataset_creation/camera_handling/db_helper.py
+++ b/dataset_creation/camera_handling/db_helper.py
Author	SHA1	Message	Date
Michael Weig	4eab3c9876	adjusted paths (this is the deployment setting)	2026-02-16 20:11:07 +00:00
Michael	2b01085a9e	scaler v2	2026-02-16 19:32:32 +01:00
Michael	0088cef32a	small changes and lazy import of tensorflow	2026-02-16 18:58:18 +01:00
Michael Weig	cf88f88814	uploaded config file	2026-02-16 17:07:49 +00:00
Michael	2a014e1e4e	first version	2026-02-16 18:06:07 +01:00
Michael	3d86bfe6d0	empty feature creation	2026-01-31 17:51:27 +01:00
Michael Weig	9b7bb945bc	new files for comissioning	2026-01-29 20:06:42 +00:00
Michael	a9ff3880e2	rearrarngement of files	2026-01-29 18:12:13 +01:00
Michael Weig	5a216b22fd	tool functions for sqlite database	2026-01-29 17:04:36 +00:00
Michael Weig	0294d4e584	files for testing	2026-01-29 13:48:47 +00:00
Michael	4f6c3b7370	created low code script for model deployment	2026-01-27 19:28:12 +01:00
Michael	5f2db4d0c9	init commit on deployment, removed EDA files	2026-01-27 18:42:40 +01:00