diff --git a/model_training/VAE_SVM/AEdannSVM.ipynb b/model_training/VAE_SVM/AEdannSVM.ipynb
index 395863d..88a7ce6 100644
--- a/model_training/VAE_SVM/AEdannSVM.ipynb
+++ b/model_training/VAE_SVM/AEdannSVM.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "708c9745",
+   "id": "bcbd4937",
    "metadata": {},
    "source": [
     "### Imports"
@@ -11,7 +11,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "53b10294",
+   "id": "7670c30e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,8 +24,10 @@
     "base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
     "sys.path.append(base_dir)\n",
     "print(base_dir)\n",
+    "print(os.getcwd())\n",
+    "# from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal\n",
     "\n",
-    "from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal\n",
+    "from tools import evaluation_tools, scaler, mad_outlier_removal\n",
     "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
     "from sklearn.svm import OneClassSVM\n",
     "from sklearn.model_selection import GridSearchCV, KFold, ParameterGrid, train_test_split, GroupKFold\n",
@@ -35,9 +37,39 @@
     "from sklearn.metrics import (roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, balanced_accuracy_score, ConfusionMatrixDisplay) "
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "59b2b100",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Check GPU availability\n",
+    "print(\"TensorFlow version:\", tf.__version__)\n",
+    "print(\"GPU Available:\", tf.config.list_physical_devices('GPU'))\n",
+    "print(\"CUDA Available:\", tf.test.is_built_with_cuda())\n",
+    "\n",
+    "# Get detailed GPU info\n",
+    "gpus = tf.config.list_physical_devices('GPU')\n",
+    "if gpus:\n",
+    "    print(f\"\\nNumber of GPUs: {len(gpus)}\")\n",
+    "    for gpu in gpus:\n",
+    "        print(f\"GPU: {gpu}\")\n",
+    "    \n",
+    "    # Enable memory growth to prevent TF from allocating all GPU memory\n",
+    "    try:\n",
+    "        for gpu in gpus:\n",
+    "            tf.config.experimental.set_memory_growth(gpu, True)\n",
+    "        print(\"\\nGPU memory growth enabled\")\n",
+    "    except RuntimeError as e:\n",
+    "        print(e)\n",
+    "else:\n",
+    "    print(\"\\nNo GPU found - running on CPU\")"
+   ]
+  },
   {
    "cell_type": "markdown",
-   "id": "68101229",
+   "id": "b002d3c8",
    "metadata": {},
    "source": [
     "### load Dataset"
@@ -46,7 +78,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "24a765e8",
+   "id": "1620827e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -56,7 +88,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "471001b0",
+   "id": "854240b8",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -65,7 +97,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "0fdecdaa",
+   "id": "69b21772",
    "metadata": {},
    "source": [
     "### Load Performance data and Subject Split"
@@ -74,7 +106,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "692d1b47",
+   "id": "ff894fda",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -85,7 +117,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ea617e3f",
+   "id": "4b2b789b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -111,7 +143,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ae43df8d",
+   "id": "e7336051",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -173,7 +205,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "9d1b414e",
+   "id": "96d9241d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -183,7 +215,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "fa71f9a5",
+   "id": "8c41544e",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -193,7 +225,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "79ecb4a2",
+   "id": "5a110ca6",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -206,7 +238,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "87f9fe7d",
+   "id": "b64d8c2b",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -215,7 +247,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "009d268b",
+   "id": "3d7adcd9",
    "metadata": {},
    "source": [
     "Labeling"
@@ -224,7 +256,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4fa79163",
+   "id": "e563d890",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -253,7 +285,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "82b17d0b",
+   "id": "c44eafa9",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -272,7 +304,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "4353f87c",
+   "id": "d110bd77",
    "metadata": {},
    "source": [
     "### Data cleaning with mad"
@@ -281,7 +313,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c9afaf61",
+   "id": "1cea8fa4",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -327,7 +359,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4a286665",
+   "id": "8aa01ada",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -339,21 +371,21 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "2671e0f4",
+   "id": "857c0ffd",
    "metadata": {},
    "outputs": [],
    "source": [
     "params = calculate_mad_params(train_df, au_columns)\n",
     "\n",
     "# Step 2: Apply filter consistently\n",
-    "train_outlier_removed = apply_mad_filter(train_df, params, threshold=3.5)\n",
-    "test_outlier_removed  = apply_mad_filter(test_df, params, threshold=3.5)\n",
+    "train_outlier_removed = apply_mad_filter(train_df, params, threshold=7)\n",
+    "test_outlier_removed  = apply_mad_filter(test_df, params, threshold=7)\n",
     "print(train_outlier_removed.shape, test_outlier_removed.shape)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "6c39b37f",
+   "id": "f9c5b562",
    "metadata": {},
    "source": [
     "Normalisierung der Daten"
@@ -362,7 +394,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "5e6c654f",
+   "id": "162163ae",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -373,7 +405,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "b6d25e7b",
+   "id": "ec1548c2",
    "metadata": {},
    "source": [
     "to do insert group k fold for train_df_normal"
@@ -381,7 +413,7 @@
   },
   {
    "cell_type": "markdown",
-   "id": "e826a998",
+   "id": "be77010e",
    "metadata": {},
    "source": [
     "### AE first"
@@ -390,7 +422,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e6421371",
+   "id": "462d33eb",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -399,8 +431,8 @@
     "y_train_full = train_outlier_removed.loc[X_train_full.index, 'label'].values\n",
     "groups_train = train_outlier_removed.loc[X_train_full.index, 'subjectID'].values\n",
     "\n",
-    "print(f\"Training data shape: {X_train_full.shape}\")\n",
-    "print(f\"Label distribution in training: {pd.Series(y_train_full).value_counts()}\")\n",
+    "print(f\"Training data shape (before balancing): {X_train_full.shape}\")\n",
+    "print(f\"Label distribution (before balancing): {pd.Series(y_train_full).value_counts()}\")\n",
     "\n",
     "# Test data\n",
     "X_test = test_outlier_removed[au_columns].dropna()\n",
@@ -410,9 +442,59 @@
     "print(f\"Label distribution in test: {pd.Series(y_test).value_counts()}\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dc757b7d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Class balancing durch Undersampling der Mehrheitsklasse\n",
+    "from sklearn.utils import resample\n",
+    "\n",
+    "# Separate nach Labels\n",
+    "X_train_class0 = X_train_full[y_train_full == 0]\n",
+    "X_train_class1 = X_train_full[y_train_full == 1]\n",
+    "groups_class0 = groups_train[y_train_full == 0]\n",
+    "groups_class1 = groups_train[y_train_full == 1]\n",
+    "\n",
+    "print(f\"\\nBefore balancing - Class 0: {len(X_train_class0)}, Class 1: {len(X_train_class1)}\")\n",
+    "\n",
+    "# Undersample der Mehrheitsklasse (class 1)\n",
+    "n_samples = min(len(X_train_class0), len(X_train_class1))\n",
+    "\n",
+    "X_class1_downsampled, groups_class1_downsampled = resample(\n",
+    "    X_train_class1, \n",
+    "    groups_class1,\n",
+    "    n_samples=n_samples,\n",
+    "    random_state=42,\n",
+    "    replace=False\n",
+    ")\n",
+    "\n",
+    "# Kombiniere balanced Daten\n",
+    "X_train_full = pd.concat([X_train_class0, X_class1_downsampled]).reset_index(drop=True)\n",
+    "y_train_full = np.concatenate([\n",
+    "    np.zeros(len(X_train_class0)),\n",
+    "    np.ones(len(X_class1_downsampled))\n",
+    "])\n",
+    "groups_train = np.concatenate([groups_class0, groups_class1_downsampled])\n",
+    "\n",
+    "# Shuffle\n",
+    "shuffle_idx = np.random.permutation(len(X_train_full))\n",
+    "X_train_full = X_train_full.iloc[shuffle_idx].reset_index(drop=True)\n",
+    "y_train_full = y_train_full[shuffle_idx]\n",
+    "groups_train = groups_train[shuffle_idx]\n",
+    "\n",
+    "# Verify balancing worked\n",
+    "print(\"\\n=== DATA CHECK AFTER BALANCING ===\")\n",
+    "print(f\"Training - Class 0: {(y_train_full==0).sum()}, Class 1: {(y_train_full==1).sum()}\")\n",
+    "print(f\"Test - Class 0: {(y_test==0).sum()}, Class 1: {(y_test==1).sum()}\")\n",
+    "print(f\"Training balanced: {(y_train_full==0).sum() == (y_train_full==1).sum()}\")"
+   ]
+  },
   {
    "cell_type": "markdown",
-   "id": "d982e47a",
+   "id": "530e4acf",
    "metadata": {},
    "source": [
     "### Custom SVM Layer (differentiable approximation)"
@@ -421,7 +503,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "50fbda1a",
+   "id": "4abbabe8",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -480,81 +562,124 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e7def811",
+   "id": "61b8978e",
    "metadata": {},
    "outputs": [],
    "source": [
     "class JointAESVM(tf.keras.Model):\n",
     "    \"\"\"\n",
-    "    Joint Autoencoder + SVM Model\n",
-    "    Loss = reconstruction_loss + svm_loss\n",
+    "    Joint Autoencoder + SVM Model with Batch Normalization and Dropout\n",
     "    \"\"\"\n",
     "    def __init__(self, input_dim, latent_dim=5, hidden_dim=16, ae_weight=1.0, \n",
-    "                 svm_weight=1.0, svm_C=1.0, reg=0.0001, **kwargs):\n",
+    "                 svm_weight=1.0, svm_C=1.0, reg=0.0001, \n",
+    "                 use_batchnorm=True, dropout_rate=0.3, **kwargs):\n",
     "        super(JointAESVM, self).__init__(**kwargs)\n",
     "        \n",
     "        self.ae_weight = ae_weight\n",
     "        self.svm_weight = svm_weight\n",
+    "        self.use_batchnorm = use_batchnorm\n",
+    "        self.dropout_rate = dropout_rate\n",
     "        \n",
-    "        # Encoder\n",
-    "        self.encoder = tf.keras.Sequential([\n",
-    "            tf.keras.layers.Dense(input_dim, activation='relu', \n",
-    "                                 kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
-    "            tf.keras.layers.Dense(hidden_dim, activation='relu',\n",
-    "                                 kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
-    "            tf.keras.layers.Dense(latent_dim, activation='relu',\n",
-    "                                 kernel_regularizer=tf.keras.regularizers.l2(reg))\n",
-    "        ], name='encoder')\n",
+    "        # Encoder with BatchNorm and Dropout\n",
+    "        encoder_layers = []\n",
     "        \n",
-    "        # Decoder\n",
-    "        self.decoder = tf.keras.Sequential([\n",
-    "            tf.keras.layers.Dense(latent_dim, activation='relu',\n",
-    "                                 kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
-    "            tf.keras.layers.Dense(hidden_dim, activation='relu',\n",
-    "                                 kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
-    "            tf.keras.layers.Dense(input_dim, activation='linear',\n",
-    "                                 kernel_regularizer=tf.keras.regularizers.l2(reg))\n",
-    "        ], name='decoder')\n",
+    "        encoder_layers.append(tf.keras.layers.Dense(\n",
+    "            input_dim, \n",
+    "            activation=None,\n",
+    "            kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
+    "        ))\n",
+    "        if use_batchnorm:\n",
+    "            encoder_layers.append(tf.keras.layers.BatchNormalization())\n",
+    "        encoder_layers.append(tf.keras.layers.Activation('relu'))\n",
+    "        if dropout_rate > 0:\n",
+    "            encoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n",
+    "        \n",
+    "        encoder_layers.append(tf.keras.layers.Dense(\n",
+    "            hidden_dim,\n",
+    "            activation=None,\n",
+    "            kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
+    "        ))\n",
+    "        if use_batchnorm:\n",
+    "            encoder_layers.append(tf.keras.layers.BatchNormalization())\n",
+    "        encoder_layers.append(tf.keras.layers.Activation('relu'))\n",
+    "        if dropout_rate > 0:\n",
+    "            encoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n",
+    "        \n",
+    "        encoder_layers.append(tf.keras.layers.Dense(\n",
+    "            latent_dim,\n",
+    "            activation=None,\n",
+    "            kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
+    "        ))\n",
+    "        if use_batchnorm:\n",
+    "            encoder_layers.append(tf.keras.layers.BatchNormalization())\n",
+    "        encoder_layers.append(tf.keras.layers.Activation('relu'))\n",
+    "        # Kein Dropout auf latent layer!\n",
+    "        \n",
+    "        self.encoder = tf.keras.Sequential(encoder_layers, name='encoder')\n",
+    "        \n",
+    "        # Decoder with BatchNorm and Dropout\n",
+    "        decoder_layers = []\n",
+    "        \n",
+    "        decoder_layers.append(tf.keras.layers.Dense(\n",
+    "            latent_dim,\n",
+    "            activation=None,\n",
+    "            kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
+    "        ))\n",
+    "        if use_batchnorm:\n",
+    "            decoder_layers.append(tf.keras.layers.BatchNormalization())\n",
+    "        decoder_layers.append(tf.keras.layers.Activation('relu'))\n",
+    "        if dropout_rate > 0:\n",
+    "            decoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n",
+    "        \n",
+    "        decoder_layers.append(tf.keras.layers.Dense(\n",
+    "            hidden_dim,\n",
+    "            activation=None,\n",
+    "            kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
+    "        ))\n",
+    "        if use_batchnorm:\n",
+    "            decoder_layers.append(tf.keras.layers.BatchNormalization())\n",
+    "        decoder_layers.append(tf.keras.layers.Activation('relu'))\n",
+    "        if dropout_rate > 0:\n",
+    "            decoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n",
+    "        \n",
+    "        decoder_layers.append(tf.keras.layers.Dense(\n",
+    "            input_dim,\n",
+    "            activation='linear',\n",
+    "            kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
+    "        ))\n",
+    "        \n",
+    "        self.decoder = tf.keras.Sequential(decoder_layers, name='decoder')\n",
     "        \n",
     "        # SVM Layer\n",
     "        self.svm = DifferentiableSVM(C=svm_C, name='svm')\n",
     "        \n",
     "    def call(self, inputs, training=False):\n",
-    "        # Encode\n",
     "        encoded = self.encoder(inputs, training=training)\n",
-    "        \n",
-    "        # Decode (for reconstruction)\n",
     "        decoded = self.decoder(encoded, training=training)\n",
-    "        \n",
-    "        # SVM decision (for classification)\n",
     "        svm_output = self.svm(encoded)\n",
     "        \n",
     "        return decoded, svm_output, encoded\n",
     "    \n",
     "    def compute_loss(self, x, y_true):\n",
-    "        # Forward pass\n",
     "        x_reconstructed, svm_decision, encoded = self(x, training=True)\n",
     "        \n",
-    "        # Reconstruction loss (MSE)\n",
     "        reconstruction_loss = tf.reduce_mean(\n",
     "            tf.square(x - x_reconstructed)\n",
     "        )\n",
     "        \n",
-    "        # SVM loss (hinge)\n",
     "        svm_loss = self.svm.compute_loss(encoded, y_true)\n",
     "        \n",
-    "        # Total loss\n",
     "        total_loss = (self.ae_weight * reconstruction_loss + \n",
     "                     self.svm_weight * svm_loss)\n",
     "        \n",
     "        return total_loss, reconstruction_loss, svm_loss\n",
     "\n",
-    "print(\"Joint AE-SVM Model class defined\")"
+    "print(\"Joint AE-SVM Model class defined (with BatchNorm + Dropout)\")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "541085f3",
+   "id": "445e10ff",
    "metadata": {},
    "source": [
     "Train function"
@@ -563,14 +688,15 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d0bf18e3",
+   "id": "ae498e69",
    "metadata": {},
    "outputs": [],
    "source": [
     "def train_joint_model(X_train, y_train, groups, model_params, \n",
-    "                      epochs=200, batch_size=64, learning_rate=0.0001):\n",
+    "                      epochs=200, batch_size=64, learning_rate=0.0001,\n",
+    "                      use_batchnorm=True, dropout_rate=0.3):\n",
     "    \"\"\"\n",
-    "    Train joint model on given data\n",
+    "    Train joint model on given data - GPU optimized\n",
     "    \"\"\"\n",
     "    # Build model\n",
     "    model = JointAESVM(\n",
@@ -580,27 +706,38 @@
     "        ae_weight=model_params['ae_weight'],\n",
     "        svm_weight=model_params['svm_weight'],\n",
     "        svm_C=model_params['svm_C'],\n",
-    "        reg=model_params['reg']\n",
+    "        reg=model_params['reg'],\n",
+    "        use_batchnorm=use_batchnorm,\n",
+    "        dropout_rate=dropout_rate  # NEU!\n",
     "    )\n",
     "    \n",
     "    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n",
     "    \n",
-    "    # Training history\n",
     "    history = {\n",
     "        'total_loss': [],\n",
     "        'recon_loss': [],\n",
     "        'svm_loss': []\n",
     "    }\n",
     "    \n",
-    "    # Convert to tensors\n",
     "    X_train_tf = tf.constant(X_train.values, dtype=tf.float32)\n",
     "    y_train_tf = tf.constant(y_train, dtype=tf.float32)\n",
     "    \n",
-    "    # Create dataset\n",
     "    dataset = tf.data.Dataset.from_tensor_slices((X_train_tf, y_train_tf))\n",
-    "    dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)\n",
+    "    dataset = dataset.shuffle(buffer_size=min(10000, len(X_train)), \n",
+    "                             reshuffle_each_iteration=True)\n",
+    "    dataset = dataset.batch(batch_size)\n",
+    "    dataset = dataset.prefetch(tf.data.AUTOTUNE)\n",
+    "    \n",
+    "    @tf.function\n",
+    "    def train_step(x_batch, y_batch):\n",
+    "        with tf.GradientTape() as tape:\n",
+    "            total_loss, recon_loss, svm_loss = model.compute_loss(x_batch, y_batch)\n",
+    "        \n",
+    "        gradients = tape.gradient(total_loss, model.trainable_variables)\n",
+    "        optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
+    "        \n",
+    "        return total_loss, recon_loss, svm_loss\n",
     "    \n",
-    "    # Training loop\n",
     "    for epoch in range(epochs):\n",
     "        epoch_loss = 0.0\n",
     "        epoch_recon = 0.0\n",
@@ -608,24 +745,18 @@
     "        n_batches = 0\n",
     "        \n",
     "        for x_batch, y_batch in dataset:\n",
-    "            with tf.GradientTape() as tape:\n",
-    "                total_loss, recon_loss, svm_loss = model.compute_loss(x_batch, y_batch)\n",
-    "            \n",
-    "            # Backpropagation\n",
-    "            gradients = tape.gradient(total_loss, model.trainable_variables)\n",
-    "            optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
+    "            total_loss, recon_loss, svm_loss = train_step(x_batch, y_batch)\n",
     "            \n",
     "            epoch_loss += total_loss.numpy()\n",
     "            epoch_recon += recon_loss.numpy()\n",
     "            epoch_svm += svm_loss.numpy()\n",
     "            n_batches += 1\n",
     "        \n",
-    "        # Average losses\n",
     "        history['total_loss'].append(epoch_loss / n_batches)\n",
     "        history['recon_loss'].append(epoch_recon / n_batches)\n",
     "        history['svm_loss'].append(epoch_svm / n_batches)\n",
     "        \n",
-    "        if (epoch + 1) % 20 == 0:\n",
+    "        if (epoch + 1) % 25 == 0:\n",
     "            print(f\"Epoch {epoch+1}/{epochs} - \"\n",
     "                  f\"Total: {history['total_loss'][-1]:.4f}, \"\n",
     "                  f\"Recon: {history['recon_loss'][-1]:.4f}, \"\n",
@@ -633,13 +764,13 @@
     "    \n",
     "    return model, history\n",
     "\n",
-    "print(\"Training function defined\")"
+    "print(\"Training function defined (with Dropout)\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "b6a04540",
+   "id": "ded352fe",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -648,23 +779,32 @@
     "    'latent_dim': [5, 8],\n",
     "    'hidden_dim': [10, 16],\n",
     "    'ae_weight': [0.5, 1.0],\n",
-    "    'svm_weight': [0.5, 1.0, 2.0],\n",
+    "    'svm_weight': [0.5, 2.0],\n",
     "    'svm_C': [0.1, 1.0, 10.0],\n",
-    "    'reg': [0.0001, 0.001]\n",
+    "    'reg': [0.01, 0.001]\n",
     "}\n",
     "\n",
-    "n_splits = 5  # Weniger Splits wegen Rechenzeit\n",
+    "n_splits = 5\n",
+    "\n",
+    "# GPU-optimierte Batch Size\n",
+    "gpus = tf.config.list_physical_devices('GPU')\n",
+    "if gpus:\n",
+    "    BATCH_SIZE = 256  # Größere Batches für GPU\n",
+    "    print(\"GPU detected - using batch size:\", BATCH_SIZE)\n",
+    "else:\n",
+    "    BATCH_SIZE = 64   # Kleinere Batches für CPU\n",
+    "    print(\"CPU only - using batch size:\", BATCH_SIZE)\n",
+    "\n",
     "gkf = GroupKFold(n_splits=n_splits)\n",
     "\n",
     "print(f\"Starting Grid Search with {n_splits}-fold GroupKFold\")\n",
-    "print(f\"Parameter combinations: {len(list(ParameterGrid(param_grid)))}\")\n",
-    "print(\"This will take a while...\")"
+    "print(f\"Parameter combinations: {len(list(ParameterGrid(param_grid)))}\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "228463ce",
+   "id": "5c36cc9d",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -685,7 +825,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c945fc87",
+   "id": "92216898",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -709,7 +849,7 @@
     "    for fold, (train_idx, val_idx) in enumerate(gkf.split(X_train_array, y_train_array, groups_train)):\n",
     "        print(f\"\\nFold {fold + 1}/{n_splits}\")\n",
     "        \n",
-    "        X_fold_train = pd.DataFrame(X_train_array[train_idx], columns=X_train_full.columns)\n",
+    "        X_fold_train = pd.DataFrame(X_train_array[train_idx], columns=X_train_full.columns) # wieso hier ein DF!\n",
     "        y_fold_train = y_train_array[train_idx]\n",
     "        X_fold_val = X_train_array[val_idx]\n",
     "        y_fold_val = y_train_array[val_idx]\n",
@@ -718,10 +858,11 @@
     "        model, history = train_joint_model(\n",
     "            X_fold_train, y_fold_train, groups_train[train_idx],\n",
     "            model_params=params,\n",
-    "            epochs=100,  # Weniger Epochen für Grid Search\n",
-    "            batch_size=64,\n",
-    "            learning_rate=0.0001\n",
-    "        )\n",
+    "            epochs=100,\n",
+    "            batch_size=BATCH_SIZE,\n",
+    "            learning_rate=0.0001,\n",
+    "            use_batchnorm=True,     # HINZUFÜGEN!\n",
+    "            dropout_rate=0.3)\n",
     "        \n",
     "        # Validate\n",
     "        val_bal_acc, _ = evaluate_model(model, X_fold_val, y_fold_val)\n",
@@ -755,12 +896,12 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0a0606f5",
+   "id": "91182740",
    "metadata": {},
    "outputs": [],
    "source": [
     "results_df = pd.DataFrame(all_results)\n",
-    "results_df = results_df.sort_values('mean_cv_accuracy', ascending=False)\n",
+    "results_df = results_df.sort_values('mean_cv_bal_accuracy', ascending=False)\n",
     "\n",
     "print(\"\\nTop 10 configurations:\")\n",
     "print(results_df.head(10))\n",
@@ -768,10 +909,10 @@
     "# Plot\n",
     "plt.figure(figsize=(12, 6))\n",
     "plt.barh(range(min(10, len(results_df))), \n",
-    "         results_df['mean_cv_accuracy'].head(10))\n",
+    "         results_df['mean_cv_bal_accuracy'].head(10))\n",
     "plt.yticks(range(min(10, len(results_df))), \n",
     "           [f\"Config {i+1}\" for i in range(min(10, len(results_df)))])\n",
-    "plt.xlabel('Mean CV Accuracy')\n",
+    "plt.xlabel('Mean CV bal Accuracy')\n",
     "plt.title('Top 10 Configurations')\n",
     "plt.tight_layout()\n",
     "plt.show()"
@@ -780,7 +921,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "87906b05",
+   "id": "c6769a88",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -790,9 +931,11 @@
     "final_model, final_history = train_joint_model(\n",
     "    X_train_full, y_train_full, groups_train,\n",
     "    model_params=best_params,\n",
-    "    epochs=300,  # Mehr Epochen für finales Training\n",
-    "    batch_size=64,\n",
-    "    learning_rate=0.0001\n",
+    "    epochs=400,  # 2000 ist zu viel, 300 reicht!\n",
+    "    batch_size=BATCH_SIZE,\n",
+    "    learning_rate=0.0001,\n",
+    "    use_batchnorm=True,      # HINZUFÜGEN!\n",
+    "    dropout_rate=0.3         # HINZUFÜGEN!\n",
     ")\n",
     "\n",
     "print(\"\\nFinal model training completed!\")"
@@ -801,7 +944,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "718137a8",
+   "id": "6dfeaa54",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -832,7 +975,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "02fbc5a2",
+   "id": "b14b63f3",
    "metadata": {},
    "outputs": [],
    "source": [
@@ -858,7 +1001,7 @@
     "print(f\"ROC-AUC:   {roc_auc_score(y_test, decision_scaled):.4f}\")\n",
     "\n",
     "print(\"\\nConfusion Matrix:\")\n",
-    "cm = confusion_matrix(y_test, y_pred)\n",
+    "cm = confusion_matrix(y_test, y_pred, normalize='true')\n",
     "print(cm)\n",
     "\n",
     "print(\"\\nClassification Report:\")\n",
@@ -867,7 +1010,7 @@
     "# Visualize Confusion Matrix\n",
     "fig, ax = plt.subplots(figsize=(8, 6))\n",
     "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Low Load (0)', 'High Load (1)'])\n",
-    "disp.plot(cmap='Blues', ax=ax, colorbar=True, values_format='d')\n",
+    "disp.plot(cmap='Blues', ax=ax, colorbar=True)\n",
     "ax.set_title('Confusion Matrix - Test Set', fontsize=14, fontweight='bold')\n",
     "plt.tight_layout()\n",
     "plt.show()"
@@ -876,27 +1019,74 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "4c524bce",
+   "id": "a8787bc7",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Save entire model\n",
-    "final_model.save_weights('joint_ae_svm_weights.h5')\n",
-    "print(\"Model weights saved as 'joint_ae_svm_weights.h5'\")\n",
+    "import json\n",
+    "from datetime import datetime\n",
     "\n",
-    "# Save encoder separately\n",
-    "final_model.encoder.save('encoder_joint.keras')\n",
-    "print(\"Encoder saved as 'encoder_joint.keras'\")\n",
+    "# Timestamp für eindeutige Dateinamen\n",
+    "timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
     "\n",
-    "# Save best parameters\n",
-    "with open('best_params_joint.pkl', 'wb') as f:\n",
-    "    pickle.dump(best_params, f)\n",
-    "print(\"Best parameters saved as 'best_params_joint.pkl'\")"
+    "# 1. Save model weights\n",
+    "weights_path = f'joint_ae_svm_weights_{timestamp}.h5'\n",
+    "final_model.save_weights(weights_path)\n",
+    "print(f\"Model weights saved as '{weights_path}'\")\n",
+    "\n",
+    "# 2. Save encoder separately\n",
+    "encoder_path = f'encoder_joint_{timestamp}.keras'\n",
+    "final_model.encoder.save(encoder_path)\n",
+    "print(f\"Encoder saved as '{encoder_path}'\")\n",
+    "\n",
+    "# 3. Save best parameters + model architecture info\n",
+    "model_config = {\n",
+    "    'best_params': best_params,\n",
+    "    'input_dim': X_train_full.shape[1],\n",
+    "    'au_columns': au_columns,\n",
+    "    'timestamp': timestamp,\n",
+    "    'training_samples': len(X_train_full),\n",
+    "    'test_samples': len(X_test)\n",
+    "}\n",
+    "\n",
+    "config_pkl_path = f'model_config_joint_{timestamp}.pkl'\n",
+    "with open(config_pkl_path, 'wb') as f:\n",
+    "    pickle.dump(model_config, f)\n",
+    "print(f\"Model config saved as '{config_pkl_path}'\")\n",
+    "\n",
+    "# 4. Speichere auch als JSON (lesbar)\n",
+    "config_json_path = f'model_config_joint_{timestamp}.json'\n",
+    "with open(config_json_path, 'w') as f:\n",
+    "    json_data = {k: v.tolist() if isinstance(v, np.ndarray) else v \n",
+    "                 for k, v in model_config.items() if k != 'au_columns'}\n",
+    "    json_data['au_columns'] = au_columns  # Liste ist JSON-serializable\n",
+    "    json.dump(json_data, f, indent=2)\n",
+    "print(f\"Model config (JSON) saved as '{config_json_path}'\")\n",
+    "    \n",
+    "# 5. Save SVM weights separately\n",
+    "svm_weights_path = f'svm_weights_joint_{timestamp}.pkl'\n",
+    "svm_weights = {\n",
+    "    'w': final_model.svm.w.numpy(),\n",
+    "    'b': final_model.svm.b.numpy()\n",
+    "}\n",
+    "with open(svm_weights_path, 'wb') as f:\n",
+    "    pickle.dump(svm_weights, f)\n",
+    "print(f\"SVM weights saved as '{svm_weights_path}'\")\n",
+    "\n",
+    "# 6. Save Grid Search Results\n",
+    "results_path = f'grid_search_results_{timestamp}.pkl'\n",
+    "with open(results_path, 'wb') as f:\n",
+    "    pickle.dump(all_results, f)\n",
+    "print(f\"Grid search results saved as '{results_path}'\")\n",
+    "\n",
+    "print(f\"\\n✓ All models and configs saved with timestamp: {timestamp}\")\n",
+    "print(f\"\\nTo load this model later, use:\")\n",
+    "print(f\"  load_joint_model('{weights_path}', '{config_pkl_path}')\")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "792c658d",
+   "id": "06a538c4",
    "metadata": {},
    "source": [
     "* doch mal svm ae pipeline?\n",
@@ -904,6 +1094,198 @@
     "* label hinzufügen\n",
     "* mad von CT verwenden oder wert anpassen, ggf. vergleich welches label wie oft vorkommt vorher und nachher. --> labelling schritt von CT übernehmen\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a9da57ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_joint_model(weights_path='joint_ae_svm.weights.h5',\n",
+    "                     config_path='model_config_joint.pkl'):\n",
+    "    \"\"\"\n",
+    "    Load the trained joint AE-SVM model\n",
+    "    \n",
+    "    Returns: model, config dict\n",
+    "    \"\"\"\n",
+    "    # Load config\n",
+    "    with open(config_path, 'rb') as f:\n",
+    "        config = pickle.load(f)\n",
+    "    \n",
+    "    params = config['best_params']\n",
+    "    input_dim = config['input_dim']\n",
+    "    \n",
+    "    # Rebuild model with same architecture\n",
+    "    model = JointAESVM(\n",
+    "        input_dim=input_dim,\n",
+    "        latent_dim=params['latent_dim'],\n",
+    "        hidden_dim=params['hidden_dim'],\n",
+    "        ae_weight=params['ae_weight'],\n",
+    "        svm_weight=params['svm_weight'],\n",
+    "        svm_C=params['svm_C'],\n",
+    "        reg=params['reg']\n",
+    "    )\n",
+    "    \n",
+    "    # Dummy forward pass to build weights\n",
+    "    dummy_input = tf.random.normal((1, input_dim))\n",
+    "    _ = model(dummy_input, training=False)\n",
+    "    \n",
+    "    # Load weights\n",
+    "    model.load_weights(weights_path)\n",
+    "    print(f\"✓ Model loaded from {weights_path}\")\n",
+    "    \n",
+    "    return model, config\n",
+    "\n",
+    "print(\"Load function defined\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a1a9c9fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test: Model laden\n",
+    "loaded_model, loaded_config = load_joint_model()\n",
+    "\n",
+    "# Test prediction\n",
+    "test_sample = X_test.values[:5]\n",
+    "test_sample_tf = tf.constant(test_sample, dtype=tf.float32)\n",
+    "_, svm_out, encoded = loaded_model(test_sample_tf, training=False)\n",
+    "\n",
+    "print(\"Test prediction successful!\")\n",
+    "print(f\"Encoded shape: {encoded.shape}\")\n",
+    "print(f\"SVM decisions: {svm_out.numpy().flatten()}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e4528f50",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict_flexible(model, X_data, use_gpu=None):\n",
+    "    \"\"\"\n",
+    "    Predict auf CPU oder GPU\n",
+    "    \n",
+    "    Args:\n",
+    "        model: Das geladene Modell\n",
+    "        X_data: Input data (numpy array oder DataFrame)\n",
+    "        use_gpu: True/False/None (None = auto-detect)\n",
+    "    \n",
+    "    Returns:\n",
+    "        predictions, decision_values, encoded_features\n",
+    "    \"\"\"\n",
+    "    # Auto-detect GPU\n",
+    "    if use_gpu is None:\n",
+    "        gpus = tf.config.list_physical_devices('GPU')\n",
+    "        use_gpu = len(gpus) > 0\n",
+    "    \n",
+    "    # Force CPU oder GPU\n",
+    "    device = '/GPU:0' if use_gpu else '/CPU:0'\n",
+    "    \n",
+    "    print(f\"Running prediction on: {device}\")\n",
+    "    \n",
+    "    with tf.device(device):\n",
+    "        if isinstance(X_data, pd.DataFrame):\n",
+    "            X_data = X_data.values\n",
+    "        \n",
+    "        X_tf = tf.constant(X_data, dtype=tf.float32)\n",
+    "        _, svm_decision, encoded = model(X_tf, training=False)\n",
+    "        \n",
+    "        # Predictions: decision > 0 -> class 1\n",
+    "        y_pred = (svm_decision.numpy().flatten() > 0).astype(int)\n",
+    "    \n",
+    "    return y_pred, svm_decision.numpy().flatten(), encoded.numpy()\n",
+    "\n",
+    "# Test auf CPU\n",
+    "y_pred_cpu, decisions_cpu, encoded_cpu = predict_flexible(\n",
+    "    loaded_model, X_test.values[:10], use_gpu=False\n",
+    ")\n",
+    "print(f\"CPU Predictions: {y_pred_cpu}\")\n",
+    "\n",
+    "# Test auf GPU\n",
+    "y_pred_gpu, decisions_gpu, encoded_gpu = predict_flexible(\n",
+    "    loaded_model, X_test.values[:10], use_gpu=True\n",
+    ")\n",
+    "print(f\"GPU Predictions: {y_pred_gpu}\")\n",
+    "\n",
+    "# Verify sie sind identisch\n",
+    "print(f\"\\nResults identical: {np.allclose(decisions_cpu, decisions_gpu)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1447bfbd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Diagnose: Was lernt das Modell?\n",
+    "print(\"=== MODEL DIAGNOSIS ===\\n\")\n",
+    "\n",
+    "# Check SVM weights\n",
+    "print(\"SVM Weights (w):\", final_model.svm.w.numpy()[:10], \"...\")\n",
+    "print(\"SVM Bias (b):\", final_model.svm.b.numpy())\n",
+    "print(\"SVM weight norm:\", np.linalg.norm(final_model.svm.w.numpy()))\n",
+    "\n",
+    "# Check predictions distribution\n",
+    "X_train_tf = tf.constant(X_train_full.values, dtype=tf.float32)\n",
+    "_, train_decisions, train_encoded = final_model(X_train_tf, training=False)\n",
+    "train_decisions = train_decisions.numpy().flatten()\n",
+    "train_encoded = train_encoded.numpy()  # KORREKTUR!\n",
+    "\n",
+    "print(f\"\\nTraining set decisions:\")\n",
+    "print(f\"  Min: {train_decisions.min():.4f}\")\n",
+    "print(f\"  Max: {train_decisions.max():.4f}\")\n",
+    "print(f\"  Mean: {train_decisions.mean():.4f}\")\n",
+    "print(f\"  Std: {train_decisions.std():.4f}\")\n",
+    "\n",
+    "train_pred = (train_decisions > 0).astype(int)\n",
+    "print(f\"\\nTraining predictions distribution:\")\n",
+    "print(pd.Series(train_pred).value_counts())\n",
+    "print(f\"Training balanced accuracy: {balanced_accuracy_score(y_train_full, train_pred):.4f}\")\n",
+    "\n",
+    "# Check encoded features\n",
+    "print(f\"\\nEncoded features stats:\")\n",
+    "print(f\"  Mean: {train_encoded.mean():.4f}\")\n",
+    "print(f\"  Std: {train_encoded.std():.4f}\")\n",
+    "print(f\"  Min: {train_encoded.min():.4f}\")\n",
+    "print(f\"  Max: {train_encoded.max():.4f}\")\n",
+    "\n",
+    "# Check per class\n",
+    "print(f\"\\nEncoded features per class:\")\n",
+    "for label in [0, 1]:\n",
+    "    mask = y_train_full == label\n",
+    "    enc_class = train_encoded[mask]\n",
+    "    print(f\"  Class {label}: mean={enc_class.mean():.4f}, std={enc_class.std():.4f}\")\n",
+    "\n",
+    "# Test set diagnosis\n",
+    "print(\"\\n=== TEST SET DIAGNOSIS ===\\n\")\n",
+    "X_test_tf = tf.constant(X_test.values, dtype=tf.float32)\n",
+    "_, test_decisions, test_encoded = final_model(X_test_tf, training=False)\n",
+    "test_decisions = test_decisions.numpy().flatten()\n",
+    "test_encoded = test_encoded.numpy()\n",
+    "\n",
+    "print(f\"Test set decisions:\")\n",
+    "print(f\"  Min: {test_decisions.min():.4f}\")\n",
+    "print(f\"  Max: {test_decisions.max():.4f}\")\n",
+    "print(f\"  Mean: {test_decisions.mean():.4f}\")\n",
+    "print(f\"  Std: {test_decisions.std():.4f}\")\n",
+    "\n",
+    "test_pred = (test_decisions > 0).astype(int)\n",
+    "print(f\"\\nTest predictions distribution:\")\n",
+    "print(pd.Series(test_pred).value_counts())\n",
+    "print(f\"Test balanced accuracy: {balanced_accuracy_score(y_test, test_pred):.4f}\")\n",
+    "\n",
+    "# Vergleich Train vs Test encoded features\n",
+    "print(f\"\\n=== TRAIN vs TEST Encoded Features ===\")\n",
+    "print(f\"Train encoded - Mean: {train_encoded.mean():.4f}, Std: {train_encoded.std():.4f}\")\n",
+    "print(f\"Test encoded  - Mean: {test_encoded.mean():.4f}, Std: {test_encoded.std():.4f}\")"
+   ]
   }
  ],
  "metadata": {
@@ -911,6 +1293,18 @@
    "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
   }
  },
  "nbformat": 4,