diff --git a/model_training/VAE_SVM/AEdannSVM.ipynb b/model_training/VAE_SVM/AEdannSVM.ipynb index 395863d..88a7ce6 100644 --- a/model_training/VAE_SVM/AEdannSVM.ipynb +++ b/model_training/VAE_SVM/AEdannSVM.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "708c9745", + "id": "bcbd4937", "metadata": {}, "source": [ "### Imports" @@ -11,7 +11,7 @@ { "cell_type": "code", "execution_count": null, - "id": "53b10294", + "id": "7670c30e", "metadata": {}, "outputs": [], "source": [ @@ -24,8 +24,10 @@ "base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n", "sys.path.append(base_dir)\n", "print(base_dir)\n", + "print(os.getcwd())\n", + "# from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal\n", "\n", - "from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal\n", + "from tools import evaluation_tools, scaler, mad_outlier_removal\n", "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", "from sklearn.svm import OneClassSVM\n", "from sklearn.model_selection import GridSearchCV, KFold, ParameterGrid, train_test_split, GroupKFold\n", @@ -35,9 +37,39 @@ "from sklearn.metrics import (roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, balanced_accuracy_score, ConfusionMatrixDisplay) " ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "59b2b100", + "metadata": {}, + "outputs": [], + "source": [ + "# Check GPU availability\n", + "print(\"TensorFlow version:\", tf.__version__)\n", + "print(\"GPU Available:\", tf.config.list_physical_devices('GPU'))\n", + "print(\"CUDA Available:\", tf.test.is_built_with_cuda())\n", + "\n", + "# Get detailed GPU info\n", + "gpus = tf.config.list_physical_devices('GPU')\n", + "if gpus:\n", + " print(f\"\\nNumber of GPUs: {len(gpus)}\")\n", + " for gpu in gpus:\n", + " print(f\"GPU: {gpu}\")\n", + " \n", + " # Enable memory growth to prevent TF from allocating all GPU memory\n", + " try:\n", + " for gpu in gpus:\n", + " tf.config.experimental.set_memory_growth(gpu, True)\n", + " print(\"\\nGPU memory growth enabled\")\n", + " except RuntimeError as e:\n", + " print(e)\n", + "else:\n", + " print(\"\\nNo GPU found - running on CPU\")" + ] + }, { "cell_type": "markdown", - "id": "68101229", + "id": "b002d3c8", "metadata": {}, "source": [ "### load Dataset" @@ -46,7 +78,7 @@ { "cell_type": "code", "execution_count": null, - "id": "24a765e8", + "id": "1620827e", "metadata": {}, "outputs": [], "source": [ @@ -56,7 +88,7 @@ { "cell_type": "code", "execution_count": null, - "id": "471001b0", + "id": "854240b8", "metadata": {}, "outputs": [], "source": [ @@ -65,7 +97,7 @@ }, { "cell_type": "markdown", - "id": "0fdecdaa", + "id": "69b21772", "metadata": {}, "source": [ "### Load Performance data and Subject Split" @@ -74,7 +106,7 @@ { "cell_type": "code", "execution_count": null, - "id": "692d1b47", + "id": "ff894fda", "metadata": {}, "outputs": [], "source": [ @@ -85,7 +117,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ea617e3f", + "id": "4b2b789b", "metadata": {}, "outputs": [], "source": [ @@ -111,7 +143,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ae43df8d", + "id": "e7336051", "metadata": {}, "outputs": [], "source": [ @@ -173,7 +205,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9d1b414e", + "id": "96d9241d", "metadata": {}, "outputs": [], "source": [ @@ -183,7 +215,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fa71f9a5", + "id": "8c41544e", "metadata": {}, "outputs": [], "source": [ @@ -193,7 +225,7 @@ { "cell_type": "code", "execution_count": null, - "id": "79ecb4a2", + "id": "5a110ca6", "metadata": {}, "outputs": [], "source": [ @@ -206,7 +238,7 @@ { "cell_type": "code", "execution_count": null, - "id": "87f9fe7d", + "id": "b64d8c2b", "metadata": {}, "outputs": [], "source": [ @@ -215,7 +247,7 @@ }, { "cell_type": "markdown", - "id": "009d268b", + "id": "3d7adcd9", "metadata": {}, "source": [ "Labeling" @@ -224,7 +256,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4fa79163", + "id": "e563d890", "metadata": {}, "outputs": [], "source": [ @@ -253,7 +285,7 @@ { "cell_type": "code", "execution_count": null, - "id": "82b17d0b", + "id": "c44eafa9", "metadata": {}, "outputs": [], "source": [ @@ -272,7 +304,7 @@ }, { "cell_type": "markdown", - "id": "4353f87c", + "id": "d110bd77", "metadata": {}, "source": [ "### Data cleaning with mad" @@ -281,7 +313,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c9afaf61", + "id": "1cea8fa4", "metadata": {}, "outputs": [], "source": [ @@ -327,7 +359,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4a286665", + "id": "8aa01ada", "metadata": {}, "outputs": [], "source": [ @@ -339,21 +371,21 @@ { "cell_type": "code", "execution_count": null, - "id": "2671e0f4", + "id": "857c0ffd", "metadata": {}, "outputs": [], "source": [ "params = calculate_mad_params(train_df, au_columns)\n", "\n", "# Step 2: Apply filter consistently\n", - "train_outlier_removed = apply_mad_filter(train_df, params, threshold=3.5)\n", - "test_outlier_removed = apply_mad_filter(test_df, params, threshold=3.5)\n", + "train_outlier_removed = apply_mad_filter(train_df, params, threshold=7)\n", + "test_outlier_removed = apply_mad_filter(test_df, params, threshold=7)\n", "print(train_outlier_removed.shape, test_outlier_removed.shape)" ] }, { "cell_type": "markdown", - "id": "6c39b37f", + "id": "f9c5b562", "metadata": {}, "source": [ "Normalisierung der Daten" @@ -362,7 +394,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5e6c654f", + "id": "162163ae", "metadata": {}, "outputs": [], "source": [ @@ -373,7 +405,7 @@ }, { "cell_type": "markdown", - "id": "b6d25e7b", + "id": "ec1548c2", "metadata": {}, "source": [ "to do insert group k fold for train_df_normal" @@ -381,7 +413,7 @@ }, { "cell_type": "markdown", - "id": "e826a998", + "id": "be77010e", "metadata": {}, "source": [ "### AE first" @@ -390,7 +422,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e6421371", + "id": "462d33eb", "metadata": {}, "outputs": [], "source": [ @@ -399,8 +431,8 @@ "y_train_full = train_outlier_removed.loc[X_train_full.index, 'label'].values\n", "groups_train = train_outlier_removed.loc[X_train_full.index, 'subjectID'].values\n", "\n", - "print(f\"Training data shape: {X_train_full.shape}\")\n", - "print(f\"Label distribution in training: {pd.Series(y_train_full).value_counts()}\")\n", + "print(f\"Training data shape (before balancing): {X_train_full.shape}\")\n", + "print(f\"Label distribution (before balancing): {pd.Series(y_train_full).value_counts()}\")\n", "\n", "# Test data\n", "X_test = test_outlier_removed[au_columns].dropna()\n", @@ -410,9 +442,59 @@ "print(f\"Label distribution in test: {pd.Series(y_test).value_counts()}\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc757b7d", + "metadata": {}, + "outputs": [], + "source": [ + "# Class balancing durch Undersampling der Mehrheitsklasse\n", + "from sklearn.utils import resample\n", + "\n", + "# Separate nach Labels\n", + "X_train_class0 = X_train_full[y_train_full == 0]\n", + "X_train_class1 = X_train_full[y_train_full == 1]\n", + "groups_class0 = groups_train[y_train_full == 0]\n", + "groups_class1 = groups_train[y_train_full == 1]\n", + "\n", + "print(f\"\\nBefore balancing - Class 0: {len(X_train_class0)}, Class 1: {len(X_train_class1)}\")\n", + "\n", + "# Undersample der Mehrheitsklasse (class 1)\n", + "n_samples = min(len(X_train_class0), len(X_train_class1))\n", + "\n", + "X_class1_downsampled, groups_class1_downsampled = resample(\n", + " X_train_class1, \n", + " groups_class1,\n", + " n_samples=n_samples,\n", + " random_state=42,\n", + " replace=False\n", + ")\n", + "\n", + "# Kombiniere balanced Daten\n", + "X_train_full = pd.concat([X_train_class0, X_class1_downsampled]).reset_index(drop=True)\n", + "y_train_full = np.concatenate([\n", + " np.zeros(len(X_train_class0)),\n", + " np.ones(len(X_class1_downsampled))\n", + "])\n", + "groups_train = np.concatenate([groups_class0, groups_class1_downsampled])\n", + "\n", + "# Shuffle\n", + "shuffle_idx = np.random.permutation(len(X_train_full))\n", + "X_train_full = X_train_full.iloc[shuffle_idx].reset_index(drop=True)\n", + "y_train_full = y_train_full[shuffle_idx]\n", + "groups_train = groups_train[shuffle_idx]\n", + "\n", + "# Verify balancing worked\n", + "print(\"\\n=== DATA CHECK AFTER BALANCING ===\")\n", + "print(f\"Training - Class 0: {(y_train_full==0).sum()}, Class 1: {(y_train_full==1).sum()}\")\n", + "print(f\"Test - Class 0: {(y_test==0).sum()}, Class 1: {(y_test==1).sum()}\")\n", + "print(f\"Training balanced: {(y_train_full==0).sum() == (y_train_full==1).sum()}\")" + ] + }, { "cell_type": "markdown", - "id": "d982e47a", + "id": "530e4acf", "metadata": {}, "source": [ "### Custom SVM Layer (differentiable approximation)" @@ -421,7 +503,7 @@ { "cell_type": "code", "execution_count": null, - "id": "50fbda1a", + "id": "4abbabe8", "metadata": {}, "outputs": [], "source": [ @@ -480,81 +562,124 @@ { "cell_type": "code", "execution_count": null, - "id": "e7def811", + "id": "61b8978e", "metadata": {}, "outputs": [], "source": [ "class JointAESVM(tf.keras.Model):\n", " \"\"\"\n", - " Joint Autoencoder + SVM Model\n", - " Loss = reconstruction_loss + svm_loss\n", + " Joint Autoencoder + SVM Model with Batch Normalization and Dropout\n", " \"\"\"\n", " def __init__(self, input_dim, latent_dim=5, hidden_dim=16, ae_weight=1.0, \n", - " svm_weight=1.0, svm_C=1.0, reg=0.0001, **kwargs):\n", + " svm_weight=1.0, svm_C=1.0, reg=0.0001, \n", + " use_batchnorm=True, dropout_rate=0.3, **kwargs):\n", " super(JointAESVM, self).__init__(**kwargs)\n", " \n", " self.ae_weight = ae_weight\n", " self.svm_weight = svm_weight\n", + " self.use_batchnorm = use_batchnorm\n", + " self.dropout_rate = dropout_rate\n", " \n", - " # Encoder\n", - " self.encoder = tf.keras.Sequential([\n", - " tf.keras.layers.Dense(input_dim, activation='relu', \n", - " kernel_regularizer=tf.keras.regularizers.l2(reg)),\n", - " tf.keras.layers.Dense(hidden_dim, activation='relu',\n", - " kernel_regularizer=tf.keras.regularizers.l2(reg)),\n", - " tf.keras.layers.Dense(latent_dim, activation='relu',\n", - " kernel_regularizer=tf.keras.regularizers.l2(reg))\n", - " ], name='encoder')\n", + " # Encoder with BatchNorm and Dropout\n", + " encoder_layers = []\n", " \n", - " # Decoder\n", - " self.decoder = tf.keras.Sequential([\n", - " tf.keras.layers.Dense(latent_dim, activation='relu',\n", - " kernel_regularizer=tf.keras.regularizers.l2(reg)),\n", - " tf.keras.layers.Dense(hidden_dim, activation='relu',\n", - " kernel_regularizer=tf.keras.regularizers.l2(reg)),\n", - " tf.keras.layers.Dense(input_dim, activation='linear',\n", - " kernel_regularizer=tf.keras.regularizers.l2(reg))\n", - " ], name='decoder')\n", + " encoder_layers.append(tf.keras.layers.Dense(\n", + " input_dim, \n", + " activation=None,\n", + " kernel_regularizer=tf.keras.regularizers.l2(reg)\n", + " ))\n", + " if use_batchnorm:\n", + " encoder_layers.append(tf.keras.layers.BatchNormalization())\n", + " encoder_layers.append(tf.keras.layers.Activation('relu'))\n", + " if dropout_rate > 0:\n", + " encoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n", + " \n", + " encoder_layers.append(tf.keras.layers.Dense(\n", + " hidden_dim,\n", + " activation=None,\n", + " kernel_regularizer=tf.keras.regularizers.l2(reg)\n", + " ))\n", + " if use_batchnorm:\n", + " encoder_layers.append(tf.keras.layers.BatchNormalization())\n", + " encoder_layers.append(tf.keras.layers.Activation('relu'))\n", + " if dropout_rate > 0:\n", + " encoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n", + " \n", + " encoder_layers.append(tf.keras.layers.Dense(\n", + " latent_dim,\n", + " activation=None,\n", + " kernel_regularizer=tf.keras.regularizers.l2(reg)\n", + " ))\n", + " if use_batchnorm:\n", + " encoder_layers.append(tf.keras.layers.BatchNormalization())\n", + " encoder_layers.append(tf.keras.layers.Activation('relu'))\n", + " # Kein Dropout auf latent layer!\n", + " \n", + " self.encoder = tf.keras.Sequential(encoder_layers, name='encoder')\n", + " \n", + " # Decoder with BatchNorm and Dropout\n", + " decoder_layers = []\n", + " \n", + " decoder_layers.append(tf.keras.layers.Dense(\n", + " latent_dim,\n", + " activation=None,\n", + " kernel_regularizer=tf.keras.regularizers.l2(reg)\n", + " ))\n", + " if use_batchnorm:\n", + " decoder_layers.append(tf.keras.layers.BatchNormalization())\n", + " decoder_layers.append(tf.keras.layers.Activation('relu'))\n", + " if dropout_rate > 0:\n", + " decoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n", + " \n", + " decoder_layers.append(tf.keras.layers.Dense(\n", + " hidden_dim,\n", + " activation=None,\n", + " kernel_regularizer=tf.keras.regularizers.l2(reg)\n", + " ))\n", + " if use_batchnorm:\n", + " decoder_layers.append(tf.keras.layers.BatchNormalization())\n", + " decoder_layers.append(tf.keras.layers.Activation('relu'))\n", + " if dropout_rate > 0:\n", + " decoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n", + " \n", + " decoder_layers.append(tf.keras.layers.Dense(\n", + " input_dim,\n", + " activation='linear',\n", + " kernel_regularizer=tf.keras.regularizers.l2(reg)\n", + " ))\n", + " \n", + " self.decoder = tf.keras.Sequential(decoder_layers, name='decoder')\n", " \n", " # SVM Layer\n", " self.svm = DifferentiableSVM(C=svm_C, name='svm')\n", " \n", " def call(self, inputs, training=False):\n", - " # Encode\n", " encoded = self.encoder(inputs, training=training)\n", - " \n", - " # Decode (for reconstruction)\n", " decoded = self.decoder(encoded, training=training)\n", - " \n", - " # SVM decision (for classification)\n", " svm_output = self.svm(encoded)\n", " \n", " return decoded, svm_output, encoded\n", " \n", " def compute_loss(self, x, y_true):\n", - " # Forward pass\n", " x_reconstructed, svm_decision, encoded = self(x, training=True)\n", " \n", - " # Reconstruction loss (MSE)\n", " reconstruction_loss = tf.reduce_mean(\n", " tf.square(x - x_reconstructed)\n", " )\n", " \n", - " # SVM loss (hinge)\n", " svm_loss = self.svm.compute_loss(encoded, y_true)\n", " \n", - " # Total loss\n", " total_loss = (self.ae_weight * reconstruction_loss + \n", " self.svm_weight * svm_loss)\n", " \n", " return total_loss, reconstruction_loss, svm_loss\n", "\n", - "print(\"Joint AE-SVM Model class defined\")" + "print(\"Joint AE-SVM Model class defined (with BatchNorm + Dropout)\")" ] }, { "cell_type": "markdown", - "id": "541085f3", + "id": "445e10ff", "metadata": {}, "source": [ "Train function" @@ -563,14 +688,15 @@ { "cell_type": "code", "execution_count": null, - "id": "d0bf18e3", + "id": "ae498e69", "metadata": {}, "outputs": [], "source": [ "def train_joint_model(X_train, y_train, groups, model_params, \n", - " epochs=200, batch_size=64, learning_rate=0.0001):\n", + " epochs=200, batch_size=64, learning_rate=0.0001,\n", + " use_batchnorm=True, dropout_rate=0.3):\n", " \"\"\"\n", - " Train joint model on given data\n", + " Train joint model on given data - GPU optimized\n", " \"\"\"\n", " # Build model\n", " model = JointAESVM(\n", @@ -580,27 +706,38 @@ " ae_weight=model_params['ae_weight'],\n", " svm_weight=model_params['svm_weight'],\n", " svm_C=model_params['svm_C'],\n", - " reg=model_params['reg']\n", + " reg=model_params['reg'],\n", + " use_batchnorm=use_batchnorm,\n", + " dropout_rate=dropout_rate # NEU!\n", " )\n", " \n", " optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n", " \n", - " # Training history\n", " history = {\n", " 'total_loss': [],\n", " 'recon_loss': [],\n", " 'svm_loss': []\n", " }\n", " \n", - " # Convert to tensors\n", " X_train_tf = tf.constant(X_train.values, dtype=tf.float32)\n", " y_train_tf = tf.constant(y_train, dtype=tf.float32)\n", " \n", - " # Create dataset\n", " dataset = tf.data.Dataset.from_tensor_slices((X_train_tf, y_train_tf))\n", - " dataset = dataset.shuffle(buffer_size=1024).batch(batch_size)\n", + " dataset = dataset.shuffle(buffer_size=min(10000, len(X_train)), \n", + " reshuffle_each_iteration=True)\n", + " dataset = dataset.batch(batch_size)\n", + " dataset = dataset.prefetch(tf.data.AUTOTUNE)\n", + " \n", + " @tf.function\n", + " def train_step(x_batch, y_batch):\n", + " with tf.GradientTape() as tape:\n", + " total_loss, recon_loss, svm_loss = model.compute_loss(x_batch, y_batch)\n", + " \n", + " gradients = tape.gradient(total_loss, model.trainable_variables)\n", + " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", + " \n", + " return total_loss, recon_loss, svm_loss\n", " \n", - " # Training loop\n", " for epoch in range(epochs):\n", " epoch_loss = 0.0\n", " epoch_recon = 0.0\n", @@ -608,24 +745,18 @@ " n_batches = 0\n", " \n", " for x_batch, y_batch in dataset:\n", - " with tf.GradientTape() as tape:\n", - " total_loss, recon_loss, svm_loss = model.compute_loss(x_batch, y_batch)\n", - " \n", - " # Backpropagation\n", - " gradients = tape.gradient(total_loss, model.trainable_variables)\n", - " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", + " total_loss, recon_loss, svm_loss = train_step(x_batch, y_batch)\n", " \n", " epoch_loss += total_loss.numpy()\n", " epoch_recon += recon_loss.numpy()\n", " epoch_svm += svm_loss.numpy()\n", " n_batches += 1\n", " \n", - " # Average losses\n", " history['total_loss'].append(epoch_loss / n_batches)\n", " history['recon_loss'].append(epoch_recon / n_batches)\n", " history['svm_loss'].append(epoch_svm / n_batches)\n", " \n", - " if (epoch + 1) % 20 == 0:\n", + " if (epoch + 1) % 25 == 0:\n", " print(f\"Epoch {epoch+1}/{epochs} - \"\n", " f\"Total: {history['total_loss'][-1]:.4f}, \"\n", " f\"Recon: {history['recon_loss'][-1]:.4f}, \"\n", @@ -633,13 +764,13 @@ " \n", " return model, history\n", "\n", - "print(\"Training function defined\")" + "print(\"Training function defined (with Dropout)\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "b6a04540", + "id": "ded352fe", "metadata": {}, "outputs": [], "source": [ @@ -648,23 +779,32 @@ " 'latent_dim': [5, 8],\n", " 'hidden_dim': [10, 16],\n", " 'ae_weight': [0.5, 1.0],\n", - " 'svm_weight': [0.5, 1.0, 2.0],\n", + " 'svm_weight': [0.5, 2.0],\n", " 'svm_C': [0.1, 1.0, 10.0],\n", - " 'reg': [0.0001, 0.001]\n", + " 'reg': [0.01, 0.001]\n", "}\n", "\n", - "n_splits = 5 # Weniger Splits wegen Rechenzeit\n", + "n_splits = 5\n", + "\n", + "# GPU-optimierte Batch Size\n", + "gpus = tf.config.list_physical_devices('GPU')\n", + "if gpus:\n", + " BATCH_SIZE = 256 # Größere Batches für GPU\n", + " print(\"GPU detected - using batch size:\", BATCH_SIZE)\n", + "else:\n", + " BATCH_SIZE = 64 # Kleinere Batches für CPU\n", + " print(\"CPU only - using batch size:\", BATCH_SIZE)\n", + "\n", "gkf = GroupKFold(n_splits=n_splits)\n", "\n", "print(f\"Starting Grid Search with {n_splits}-fold GroupKFold\")\n", - "print(f\"Parameter combinations: {len(list(ParameterGrid(param_grid)))}\")\n", - "print(\"This will take a while...\")" + "print(f\"Parameter combinations: {len(list(ParameterGrid(param_grid)))}\")" ] }, { "cell_type": "code", "execution_count": null, - "id": "228463ce", + "id": "5c36cc9d", "metadata": {}, "outputs": [], "source": [ @@ -685,7 +825,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c945fc87", + "id": "92216898", "metadata": {}, "outputs": [], "source": [ @@ -709,7 +849,7 @@ " for fold, (train_idx, val_idx) in enumerate(gkf.split(X_train_array, y_train_array, groups_train)):\n", " print(f\"\\nFold {fold + 1}/{n_splits}\")\n", " \n", - " X_fold_train = pd.DataFrame(X_train_array[train_idx], columns=X_train_full.columns)\n", + " X_fold_train = pd.DataFrame(X_train_array[train_idx], columns=X_train_full.columns) # wieso hier ein DF!\n", " y_fold_train = y_train_array[train_idx]\n", " X_fold_val = X_train_array[val_idx]\n", " y_fold_val = y_train_array[val_idx]\n", @@ -718,10 +858,11 @@ " model, history = train_joint_model(\n", " X_fold_train, y_fold_train, groups_train[train_idx],\n", " model_params=params,\n", - " epochs=100, # Weniger Epochen für Grid Search\n", - " batch_size=64,\n", - " learning_rate=0.0001\n", - " )\n", + " epochs=100,\n", + " batch_size=BATCH_SIZE,\n", + " learning_rate=0.0001,\n", + " use_batchnorm=True, # HINZUFÜGEN!\n", + " dropout_rate=0.3)\n", " \n", " # Validate\n", " val_bal_acc, _ = evaluate_model(model, X_fold_val, y_fold_val)\n", @@ -755,12 +896,12 @@ { "cell_type": "code", "execution_count": null, - "id": "0a0606f5", + "id": "91182740", "metadata": {}, "outputs": [], "source": [ "results_df = pd.DataFrame(all_results)\n", - "results_df = results_df.sort_values('mean_cv_accuracy', ascending=False)\n", + "results_df = results_df.sort_values('mean_cv_bal_accuracy', ascending=False)\n", "\n", "print(\"\\nTop 10 configurations:\")\n", "print(results_df.head(10))\n", @@ -768,10 +909,10 @@ "# Plot\n", "plt.figure(figsize=(12, 6))\n", "plt.barh(range(min(10, len(results_df))), \n", - " results_df['mean_cv_accuracy'].head(10))\n", + " results_df['mean_cv_bal_accuracy'].head(10))\n", "plt.yticks(range(min(10, len(results_df))), \n", " [f\"Config {i+1}\" for i in range(min(10, len(results_df)))])\n", - "plt.xlabel('Mean CV Accuracy')\n", + "plt.xlabel('Mean CV bal Accuracy')\n", "plt.title('Top 10 Configurations')\n", "plt.tight_layout()\n", "plt.show()" @@ -780,7 +921,7 @@ { "cell_type": "code", "execution_count": null, - "id": "87906b05", + "id": "c6769a88", "metadata": {}, "outputs": [], "source": [ @@ -790,9 +931,11 @@ "final_model, final_history = train_joint_model(\n", " X_train_full, y_train_full, groups_train,\n", " model_params=best_params,\n", - " epochs=300, # Mehr Epochen für finales Training\n", - " batch_size=64,\n", - " learning_rate=0.0001\n", + " epochs=400, # 2000 ist zu viel, 300 reicht!\n", + " batch_size=BATCH_SIZE,\n", + " learning_rate=0.0001,\n", + " use_batchnorm=True, # HINZUFÜGEN!\n", + " dropout_rate=0.3 # HINZUFÜGEN!\n", ")\n", "\n", "print(\"\\nFinal model training completed!\")" @@ -801,7 +944,7 @@ { "cell_type": "code", "execution_count": null, - "id": "718137a8", + "id": "6dfeaa54", "metadata": {}, "outputs": [], "source": [ @@ -832,7 +975,7 @@ { "cell_type": "code", "execution_count": null, - "id": "02fbc5a2", + "id": "b14b63f3", "metadata": {}, "outputs": [], "source": [ @@ -858,7 +1001,7 @@ "print(f\"ROC-AUC: {roc_auc_score(y_test, decision_scaled):.4f}\")\n", "\n", "print(\"\\nConfusion Matrix:\")\n", - "cm = confusion_matrix(y_test, y_pred)\n", + "cm = confusion_matrix(y_test, y_pred, normalize='true')\n", "print(cm)\n", "\n", "print(\"\\nClassification Report:\")\n", @@ -867,7 +1010,7 @@ "# Visualize Confusion Matrix\n", "fig, ax = plt.subplots(figsize=(8, 6))\n", "disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Low Load (0)', 'High Load (1)'])\n", - "disp.plot(cmap='Blues', ax=ax, colorbar=True, values_format='d')\n", + "disp.plot(cmap='Blues', ax=ax, colorbar=True)\n", "ax.set_title('Confusion Matrix - Test Set', fontsize=14, fontweight='bold')\n", "plt.tight_layout()\n", "plt.show()" @@ -876,27 +1019,74 @@ { "cell_type": "code", "execution_count": null, - "id": "4c524bce", + "id": "a8787bc7", "metadata": {}, "outputs": [], "source": [ - "# Save entire model\n", - "final_model.save_weights('joint_ae_svm_weights.h5')\n", - "print(\"Model weights saved as 'joint_ae_svm_weights.h5'\")\n", + "import json\n", + "from datetime import datetime\n", "\n", - "# Save encoder separately\n", - "final_model.encoder.save('encoder_joint.keras')\n", - "print(\"Encoder saved as 'encoder_joint.keras'\")\n", + "# Timestamp für eindeutige Dateinamen\n", + "timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n", "\n", - "# Save best parameters\n", - "with open('best_params_joint.pkl', 'wb') as f:\n", - " pickle.dump(best_params, f)\n", - "print(\"Best parameters saved as 'best_params_joint.pkl'\")" + "# 1. Save model weights\n", + "weights_path = f'joint_ae_svm_weights_{timestamp}.h5'\n", + "final_model.save_weights(weights_path)\n", + "print(f\"Model weights saved as '{weights_path}'\")\n", + "\n", + "# 2. Save encoder separately\n", + "encoder_path = f'encoder_joint_{timestamp}.keras'\n", + "final_model.encoder.save(encoder_path)\n", + "print(f\"Encoder saved as '{encoder_path}'\")\n", + "\n", + "# 3. Save best parameters + model architecture info\n", + "model_config = {\n", + " 'best_params': best_params,\n", + " 'input_dim': X_train_full.shape[1],\n", + " 'au_columns': au_columns,\n", + " 'timestamp': timestamp,\n", + " 'training_samples': len(X_train_full),\n", + " 'test_samples': len(X_test)\n", + "}\n", + "\n", + "config_pkl_path = f'model_config_joint_{timestamp}.pkl'\n", + "with open(config_pkl_path, 'wb') as f:\n", + " pickle.dump(model_config, f)\n", + "print(f\"Model config saved as '{config_pkl_path}'\")\n", + "\n", + "# 4. Speichere auch als JSON (lesbar)\n", + "config_json_path = f'model_config_joint_{timestamp}.json'\n", + "with open(config_json_path, 'w') as f:\n", + " json_data = {k: v.tolist() if isinstance(v, np.ndarray) else v \n", + " for k, v in model_config.items() if k != 'au_columns'}\n", + " json_data['au_columns'] = au_columns # Liste ist JSON-serializable\n", + " json.dump(json_data, f, indent=2)\n", + "print(f\"Model config (JSON) saved as '{config_json_path}'\")\n", + " \n", + "# 5. Save SVM weights separately\n", + "svm_weights_path = f'svm_weights_joint_{timestamp}.pkl'\n", + "svm_weights = {\n", + " 'w': final_model.svm.w.numpy(),\n", + " 'b': final_model.svm.b.numpy()\n", + "}\n", + "with open(svm_weights_path, 'wb') as f:\n", + " pickle.dump(svm_weights, f)\n", + "print(f\"SVM weights saved as '{svm_weights_path}'\")\n", + "\n", + "# 6. Save Grid Search Results\n", + "results_path = f'grid_search_results_{timestamp}.pkl'\n", + "with open(results_path, 'wb') as f:\n", + " pickle.dump(all_results, f)\n", + "print(f\"Grid search results saved as '{results_path}'\")\n", + "\n", + "print(f\"\\n✓ All models and configs saved with timestamp: {timestamp}\")\n", + "print(f\"\\nTo load this model later, use:\")\n", + "print(f\" load_joint_model('{weights_path}', '{config_pkl_path}')\")" ] }, { "cell_type": "markdown", - "id": "792c658d", + "id": "06a538c4", "metadata": {}, "source": [ "* doch mal svm ae pipeline?\n", @@ -904,6 +1094,198 @@ "* label hinzufügen\n", "* mad von CT verwenden oder wert anpassen, ggf. vergleich welches label wie oft vorkommt vorher und nachher. --> labelling schritt von CT übernehmen\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9da57ed", + "metadata": {}, + "outputs": [], + "source": [ + "def load_joint_model(weights_path='joint_ae_svm.weights.h5',\n", + " config_path='model_config_joint.pkl'):\n", + " \"\"\"\n", + " Load the trained joint AE-SVM model\n", + " \n", + " Returns: model, config dict\n", + " \"\"\"\n", + " # Load config\n", + " with open(config_path, 'rb') as f:\n", + " config = pickle.load(f)\n", + " \n", + " params = config['best_params']\n", + " input_dim = config['input_dim']\n", + " \n", + " # Rebuild model with same architecture\n", + " model = JointAESVM(\n", + " input_dim=input_dim,\n", + " latent_dim=params['latent_dim'],\n", + " hidden_dim=params['hidden_dim'],\n", + " ae_weight=params['ae_weight'],\n", + " svm_weight=params['svm_weight'],\n", + " svm_C=params['svm_C'],\n", + " reg=params['reg']\n", + " )\n", + " \n", + " # Dummy forward pass to build weights\n", + " dummy_input = tf.random.normal((1, input_dim))\n", + " _ = model(dummy_input, training=False)\n", + " \n", + " # Load weights\n", + " model.load_weights(weights_path)\n", + " print(f\"✓ Model loaded from {weights_path}\")\n", + " \n", + " return model, config\n", + "\n", + "print(\"Load function defined\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1a9c9fb", + "metadata": {}, + "outputs": [], + "source": [ + "# Test: Model laden\n", + "loaded_model, loaded_config = load_joint_model()\n", + "\n", + "# Test prediction\n", + "test_sample = X_test.values[:5]\n", + "test_sample_tf = tf.constant(test_sample, dtype=tf.float32)\n", + "_, svm_out, encoded = loaded_model(test_sample_tf, training=False)\n", + "\n", + "print(\"Test prediction successful!\")\n", + "print(f\"Encoded shape: {encoded.shape}\")\n", + "print(f\"SVM decisions: {svm_out.numpy().flatten()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4528f50", + "metadata": {}, + "outputs": [], + "source": [ + "def predict_flexible(model, X_data, use_gpu=None):\n", + " \"\"\"\n", + " Predict auf CPU oder GPU\n", + " \n", + " Args:\n", + " model: Das geladene Modell\n", + " X_data: Input data (numpy array oder DataFrame)\n", + " use_gpu: True/False/None (None = auto-detect)\n", + " \n", + " Returns:\n", + " predictions, decision_values, encoded_features\n", + " \"\"\"\n", + " # Auto-detect GPU\n", + " if use_gpu is None:\n", + " gpus = tf.config.list_physical_devices('GPU')\n", + " use_gpu = len(gpus) > 0\n", + " \n", + " # Force CPU oder GPU\n", + " device = '/GPU:0' if use_gpu else '/CPU:0'\n", + " \n", + " print(f\"Running prediction on: {device}\")\n", + " \n", + " with tf.device(device):\n", + " if isinstance(X_data, pd.DataFrame):\n", + " X_data = X_data.values\n", + " \n", + " X_tf = tf.constant(X_data, dtype=tf.float32)\n", + " _, svm_decision, encoded = model(X_tf, training=False)\n", + " \n", + " # Predictions: decision > 0 -> class 1\n", + " y_pred = (svm_decision.numpy().flatten() > 0).astype(int)\n", + " \n", + " return y_pred, svm_decision.numpy().flatten(), encoded.numpy()\n", + "\n", + "# Test auf CPU\n", + "y_pred_cpu, decisions_cpu, encoded_cpu = predict_flexible(\n", + " loaded_model, X_test.values[:10], use_gpu=False\n", + ")\n", + "print(f\"CPU Predictions: {y_pred_cpu}\")\n", + "\n", + "# Test auf GPU\n", + "y_pred_gpu, decisions_gpu, encoded_gpu = predict_flexible(\n", + " loaded_model, X_test.values[:10], use_gpu=True\n", + ")\n", + "print(f\"GPU Predictions: {y_pred_gpu}\")\n", + "\n", + "# Verify sie sind identisch\n", + "print(f\"\\nResults identical: {np.allclose(decisions_cpu, decisions_gpu)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1447bfbd", + "metadata": {}, + "outputs": [], + "source": [ + "# Diagnose: Was lernt das Modell?\n", + "print(\"=== MODEL DIAGNOSIS ===\\n\")\n", + "\n", + "# Check SVM weights\n", + "print(\"SVM Weights (w):\", final_model.svm.w.numpy()[:10], \"...\")\n", + "print(\"SVM Bias (b):\", final_model.svm.b.numpy())\n", + "print(\"SVM weight norm:\", np.linalg.norm(final_model.svm.w.numpy()))\n", + "\n", + "# Check predictions distribution\n", + "X_train_tf = tf.constant(X_train_full.values, dtype=tf.float32)\n", + "_, train_decisions, train_encoded = final_model(X_train_tf, training=False)\n", + "train_decisions = train_decisions.numpy().flatten()\n", + "train_encoded = train_encoded.numpy() # KORREKTUR!\n", + "\n", + "print(f\"\\nTraining set decisions:\")\n", + "print(f\" Min: {train_decisions.min():.4f}\")\n", + "print(f\" Max: {train_decisions.max():.4f}\")\n", + "print(f\" Mean: {train_decisions.mean():.4f}\")\n", + "print(f\" Std: {train_decisions.std():.4f}\")\n", + "\n", + "train_pred = (train_decisions > 0).astype(int)\n", + "print(f\"\\nTraining predictions distribution:\")\n", + "print(pd.Series(train_pred).value_counts())\n", + "print(f\"Training balanced accuracy: {balanced_accuracy_score(y_train_full, train_pred):.4f}\")\n", + "\n", + "# Check encoded features\n", + "print(f\"\\nEncoded features stats:\")\n", + "print(f\" Mean: {train_encoded.mean():.4f}\")\n", + "print(f\" Std: {train_encoded.std():.4f}\")\n", + "print(f\" Min: {train_encoded.min():.4f}\")\n", + "print(f\" Max: {train_encoded.max():.4f}\")\n", + "\n", + "# Check per class\n", + "print(f\"\\nEncoded features per class:\")\n", + "for label in [0, 1]:\n", + " mask = y_train_full == label\n", + " enc_class = train_encoded[mask]\n", + " print(f\" Class {label}: mean={enc_class.mean():.4f}, std={enc_class.std():.4f}\")\n", + "\n", + "# Test set diagnosis\n", + "print(\"\\n=== TEST SET DIAGNOSIS ===\\n\")\n", + "X_test_tf = tf.constant(X_test.values, dtype=tf.float32)\n", + "_, test_decisions, test_encoded = final_model(X_test_tf, training=False)\n", + "test_decisions = test_decisions.numpy().flatten()\n", + "test_encoded = test_encoded.numpy()\n", + "\n", + "print(f\"Test set decisions:\")\n", + "print(f\" Min: {test_decisions.min():.4f}\")\n", + "print(f\" Max: {test_decisions.max():.4f}\")\n", + "print(f\" Mean: {test_decisions.mean():.4f}\")\n", + "print(f\" Std: {test_decisions.std():.4f}\")\n", + "\n", + "test_pred = (test_decisions > 0).astype(int)\n", + "print(f\"\\nTest predictions distribution:\")\n", + "print(pd.Series(test_pred).value_counts())\n", + "print(f\"Test balanced accuracy: {balanced_accuracy_score(y_test, test_pred):.4f}\")\n", + "\n", + "# Vergleich Train vs Test encoded features\n", + "print(f\"\\n=== TRAIN vs TEST Encoded Features ===\")\n", + "print(f\"Train encoded - Mean: {train_encoded.mean():.4f}, Std: {train_encoded.std():.4f}\")\n", + "print(f\"Test encoded - Mean: {test_encoded.mean():.4f}, Std: {test_encoded.std():.4f}\")" + ] } ], "metadata": { @@ -911,6 +1293,18 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" } }, "nbformat": 4,