1313 lines
45 KiB
Plaintext
1313 lines
45 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "bcbd4937",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Imports"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7670c30e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"from pathlib import Path\n",
|
|
"import sys\n",
|
|
"import os\n",
|
|
"\n",
|
|
"base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
|
|
"sys.path.append(base_dir)\n",
|
|
"print(base_dir)\n",
|
|
"print(os.getcwd())\n",
|
|
"# from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal\n",
|
|
"\n",
|
|
"from tools import evaluation_tools, scaler, mad_outlier_removal\n",
|
|
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
|
|
"from sklearn.svm import OneClassSVM\n",
|
|
"from sklearn.model_selection import GridSearchCV, KFold, ParameterGrid, train_test_split, GroupKFold\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import tensorflow as tf\n",
|
|
"import pickle\n",
|
|
"from sklearn.metrics import (roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, balanced_accuracy_score, ConfusionMatrixDisplay) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "59b2b100",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Check GPU availability\n",
|
|
"print(\"TensorFlow version:\", tf.__version__)\n",
|
|
"print(\"GPU Available:\", tf.config.list_physical_devices('GPU'))\n",
|
|
"print(\"CUDA Available:\", tf.test.is_built_with_cuda())\n",
|
|
"\n",
|
|
"# Get detailed GPU info\n",
|
|
"gpus = tf.config.list_physical_devices('GPU')\n",
|
|
"if gpus:\n",
|
|
" print(f\"\\nNumber of GPUs: {len(gpus)}\")\n",
|
|
" for gpu in gpus:\n",
|
|
" print(f\"GPU: {gpu}\")\n",
|
|
" \n",
|
|
" # Enable memory growth to prevent TF from allocating all GPU memory\n",
|
|
" try:\n",
|
|
" for gpu in gpus:\n",
|
|
" tf.config.experimental.set_memory_growth(gpu, True)\n",
|
|
" print(\"\\nGPU memory growth enabled\")\n",
|
|
" except RuntimeError as e:\n",
|
|
" print(e)\n",
|
|
"else:\n",
|
|
" print(\"\\nNo GPU found - running on CPU\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b002d3c8",
|
|
"metadata": {},
|
|
"source": [
|
|
"### load Dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1620827e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/first_AU_dataset/output_windowed.parquet\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "854240b8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df = pd.read_parquet(path=dataset_path)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "69b21772",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Load Performance data and Subject Split"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ff894fda",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"performance_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/subject_performance/3new_au_performance.csv\")\n",
|
|
"performance_df = pd.read_csv(performance_path)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4b2b789b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Subject IDs aus dem Haupt-Dataset nehmen\n",
|
|
"subjects_from_df = df[\"subjectID\"].unique()\n",
|
|
"\n",
|
|
"# Performance-Subset nur für vorhandene Subjects\n",
|
|
"perf_filtered = performance_df[\n",
|
|
" performance_df[\"subjectID\"].isin(subjects_from_df)\n",
|
|
"][[\"subjectID\", \"overall_score\"]]\n",
|
|
"\n",
|
|
"# Merge: nur Subjects, die sowohl im df als auch im Performance-CSV vorkommen\n",
|
|
"merged = (\n",
|
|
" pd.DataFrame({\"subjectID\": subjects_from_df})\n",
|
|
" .merge(perf_filtered, on=\"subjectID\", how=\"inner\")\n",
|
|
")\n",
|
|
"\n",
|
|
"# Sicherstellen, dass keine Scores fehlen\n",
|
|
"if merged[\"overall_score\"].isna().any():\n",
|
|
" raise ValueError(\"Es fehlen Score-Werte für manche Subjects.\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e7336051",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"merged_sorted = merged.sort_values(\"overall_score\", ascending=False).reset_index(drop=True)\n",
|
|
"\n",
|
|
"scores = merged_sorted[\"overall_score\"].values\n",
|
|
"n_total = len(merged_sorted)\n",
|
|
"n_small = n_total // 3\n",
|
|
"n_large = n_total - n_small\n",
|
|
"\n",
|
|
"# Schritt 1: zufällige Start-Aufteilung\n",
|
|
"idx = np.arange(n_total)\n",
|
|
"np.random.shuffle(idx)\n",
|
|
"\n",
|
|
"small_idx = idx[:n_small]\n",
|
|
"large_idx = idx[n_small:]\n",
|
|
"\n",
|
|
"def score_diff(small_idx, large_idx):\n",
|
|
" return abs(scores[small_idx].mean() - scores[large_idx].mean())\n",
|
|
"\n",
|
|
"diff = score_diff(small_idx, large_idx)\n",
|
|
"threshold = 0.01\n",
|
|
"max_iter = 100\n",
|
|
"count = 0\n",
|
|
"\n",
|
|
"# Schritt 2: random swaps bis Differenz klein genug\n",
|
|
"while diff > threshold and count < max_iter:\n",
|
|
" # Zwei zufällige Elemente auswählen\n",
|
|
" si = np.random.choice(small_idx)\n",
|
|
" li = np.random.choice(large_idx)\n",
|
|
" \n",
|
|
" # Tausch durchführen\n",
|
|
" new_small_idx = small_idx.copy()\n",
|
|
" new_large_idx = large_idx.copy()\n",
|
|
" \n",
|
|
" new_small_idx[new_small_idx == si] = li\n",
|
|
" new_large_idx[new_large_idx == li] = si\n",
|
|
"\n",
|
|
" # neue Differenz berechnen\n",
|
|
" new_diff = score_diff(new_small_idx, new_large_idx)\n",
|
|
"\n",
|
|
" # Swap akzeptieren, wenn es besser wird\n",
|
|
" if new_diff < diff:\n",
|
|
" small_idx = new_small_idx\n",
|
|
" large_idx = new_large_idx\n",
|
|
" diff = new_diff\n",
|
|
"\n",
|
|
" count += 1\n",
|
|
"\n",
|
|
"# Finalgruppen\n",
|
|
"group_small = merged_sorted.loc[small_idx].reset_index(drop=True)\n",
|
|
"group_large = merged_sorted.loc[large_idx].reset_index(drop=True)\n",
|
|
"\n",
|
|
"print(\"Finale Score-Differenz:\", diff)\n",
|
|
"print(\"Größe Gruppe 1:\", len(group_small))\n",
|
|
"print(\"Größe Gruppe 2:\", len(group_large))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "96d9241d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"group_large['overall_score'].mean()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8c41544e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"group_small['overall_score'].mean()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5a110ca6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"training_subjects = group_large['subjectID'].values\n",
|
|
"test_subjects = group_small['subjectID'].values\n",
|
|
"print(training_subjects)\n",
|
|
"print(test_subjects)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b64d8c2b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"au_columns = [col for col in df.columns if col.lower().startswith(\"au\")]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "3d7adcd9",
|
|
"metadata": {},
|
|
"source": [
|
|
"Labeling"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e563d890",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"low_all = df[\n",
|
|
" ((df[\"PHASE\"] == \"baseline\") |\n",
|
|
" ((df[\"STUDY\"] == \"n-back\") & (df[\"PHASE\"] != \"baseline\") & (df[\"LEVEL\"].isin([1, 4]))))\n",
|
|
"]\n",
|
|
"print(f\"low all: {low_all.shape}\")\n",
|
|
"\n",
|
|
"high_nback = df[\n",
|
|
" (df[\"STUDY\"]==\"n-back\") &\n",
|
|
" (df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n",
|
|
" (df[\"PHASE\"].isin([\"train\", \"test\"]))\n",
|
|
"]\n",
|
|
"print(f\"high n-back: {high_nback.shape}\")\n",
|
|
"\n",
|
|
"high_kdrive = df[\n",
|
|
" (df[\"STUDY\"] == \"k-drive\") & (df[\"PHASE\"] != \"baseline\")\n",
|
|
"]\n",
|
|
"print(f\"high k-drive: {high_kdrive.shape}\")\n",
|
|
"\n",
|
|
"high_all = pd.concat([high_nback, high_kdrive])\n",
|
|
"print(f\"high all: {high_all.shape}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c44eafa9",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"low = low_all.copy()\n",
|
|
"high = high_all.copy()\n",
|
|
"\n",
|
|
"low[\"label\"] = 0\n",
|
|
"high[\"label\"] = 1\n",
|
|
"\n",
|
|
"data = pd.concat([low, high], ignore_index=True)\n",
|
|
"df = data.drop_duplicates()\n",
|
|
"\n",
|
|
"print(\"Label distribution:\")\n",
|
|
"print(df[\"label\"].value_counts())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d110bd77",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Data cleaning with mad"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1cea8fa4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# methode CT\n",
|
|
"def calculate_mad_params(df, columns):\n",
|
|
" \"\"\"\n",
|
|
" Calculate median and MAD parameters for each column.\n",
|
|
" This should be run ONLY on the training data.\n",
|
|
" \n",
|
|
" Returns a dictionary: {col: (median, mad)}\n",
|
|
" \"\"\"\n",
|
|
" params = {}\n",
|
|
" for col in columns:\n",
|
|
" median = df[col].median()\n",
|
|
" mad = np.median(np.abs(df[col] - median))\n",
|
|
" params[col] = (median, mad)\n",
|
|
" return params\n",
|
|
"\n",
|
|
"def apply_mad_filter(df, params, threshold=3.5):\n",
|
|
" \"\"\"\n",
|
|
" Apply MAD-based outlier removal using precomputed parameters.\n",
|
|
" Works on training, validation, and test data.\n",
|
|
" \n",
|
|
" df: DataFrame to filter\n",
|
|
" params: dictionary {col: (median, mad)} from training data\n",
|
|
" threshold: cutoff for robust Z-score\n",
|
|
" \"\"\"\n",
|
|
" df_clean = df.copy()\n",
|
|
"\n",
|
|
" for col, (median, mad) in params.items():\n",
|
|
" if mad == 0:\n",
|
|
" continue # no spread; nothing to remove for this column\n",
|
|
"\n",
|
|
" robust_z = 0.6745 * (df_clean[col] - median) / mad\n",
|
|
" outlier_mask = np.abs(robust_z) > threshold\n",
|
|
"\n",
|
|
" # Remove values only in this specific column\n",
|
|
" df_clean.loc[outlier_mask, col] = np.nan\n",
|
|
" \n",
|
|
" return df_clean"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8aa01ada",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"train_df = df[df.subjectID.isin(training_subjects)]\n",
|
|
"test_df = df[df.subjectID.isin(test_subjects)]\n",
|
|
"print(train_df.shape, test_df.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "857c0ffd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"params = calculate_mad_params(train_df, au_columns)\n",
|
|
"\n",
|
|
"# Step 2: Apply filter consistently\n",
|
|
"train_outlier_removed = apply_mad_filter(train_df, params, threshold=7)\n",
|
|
"test_outlier_removed = apply_mad_filter(test_df, params, threshold=7)\n",
|
|
"print(train_outlier_removed.shape, test_outlier_removed.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "f9c5b562",
|
|
"metadata": {},
|
|
"source": [
|
|
"Normalisierung der Daten"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "162163ae",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"normalizer = scaler.fit_normalizer(train_df, au_columns=au_columns, method='standard', scope='global')\n",
|
|
"train_df_normal = scaler.apply_normalizer(train_df, au_columns=au_columns, normalizer_dict=normalizer)\n",
|
|
"test_df_normal = scaler.apply_normalizer(test_df, au_columns=au_columns, normalizer_dict=normalizer)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ec1548c2",
|
|
"metadata": {},
|
|
"source": [
|
|
"to do insert group k fold for train_df_normal"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "be77010e",
|
|
"metadata": {},
|
|
"source": [
|
|
"### AE first"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "462d33eb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Beide Klassen für AE und SVM Training\n",
|
|
"X_train_full = train_outlier_removed[au_columns].dropna()\n",
|
|
"y_train_full = train_outlier_removed.loc[X_train_full.index, 'label'].values\n",
|
|
"groups_train = train_outlier_removed.loc[X_train_full.index, 'subjectID'].values\n",
|
|
"\n",
|
|
"print(f\"Training data shape (before balancing): {X_train_full.shape}\")\n",
|
|
"print(f\"Label distribution (before balancing): {pd.Series(y_train_full).value_counts()}\")\n",
|
|
"\n",
|
|
"# Test data\n",
|
|
"X_test = test_outlier_removed[au_columns].dropna()\n",
|
|
"y_test = test_outlier_removed.loc[X_test.index, 'label'].values\n",
|
|
"\n",
|
|
"print(f\"Test data shape: {X_test.shape}\")\n",
|
|
"print(f\"Label distribution in test: {pd.Series(y_test).value_counts()}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dc757b7d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Class balancing durch Undersampling der Mehrheitsklasse\n",
|
|
"from sklearn.utils import resample\n",
|
|
"\n",
|
|
"# Separate nach Labels\n",
|
|
"X_train_class0 = X_train_full[y_train_full == 0]\n",
|
|
"X_train_class1 = X_train_full[y_train_full == 1]\n",
|
|
"groups_class0 = groups_train[y_train_full == 0]\n",
|
|
"groups_class1 = groups_train[y_train_full == 1]\n",
|
|
"\n",
|
|
"print(f\"\\nBefore balancing - Class 0: {len(X_train_class0)}, Class 1: {len(X_train_class1)}\")\n",
|
|
"\n",
|
|
"# Undersample der Mehrheitsklasse (class 1)\n",
|
|
"n_samples = min(len(X_train_class0), len(X_train_class1))\n",
|
|
"\n",
|
|
"X_class1_downsampled, groups_class1_downsampled = resample(\n",
|
|
" X_train_class1, \n",
|
|
" groups_class1,\n",
|
|
" n_samples=n_samples,\n",
|
|
" random_state=42,\n",
|
|
" replace=False\n",
|
|
")\n",
|
|
"\n",
|
|
"# Kombiniere balanced Daten\n",
|
|
"X_train_full = pd.concat([X_train_class0, X_class1_downsampled]).reset_index(drop=True)\n",
|
|
"y_train_full = np.concatenate([\n",
|
|
" np.zeros(len(X_train_class0)),\n",
|
|
" np.ones(len(X_class1_downsampled))\n",
|
|
"])\n",
|
|
"groups_train = np.concatenate([groups_class0, groups_class1_downsampled])\n",
|
|
"\n",
|
|
"# Shuffle\n",
|
|
"shuffle_idx = np.random.permutation(len(X_train_full))\n",
|
|
"X_train_full = X_train_full.iloc[shuffle_idx].reset_index(drop=True)\n",
|
|
"y_train_full = y_train_full[shuffle_idx]\n",
|
|
"groups_train = groups_train[shuffle_idx]\n",
|
|
"\n",
|
|
"# Verify balancing worked\n",
|
|
"print(\"\\n=== DATA CHECK AFTER BALANCING ===\")\n",
|
|
"print(f\"Training - Class 0: {(y_train_full==0).sum()}, Class 1: {(y_train_full==1).sum()}\")\n",
|
|
"print(f\"Test - Class 0: {(y_test==0).sum()}, Class 1: {(y_test==1).sum()}\")\n",
|
|
"print(f\"Training balanced: {(y_train_full==0).sum() == (y_train_full==1).sum()}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "530e4acf",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Custom SVM Layer (differentiable approximation)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4abbabe8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class DifferentiableSVM(tf.keras.layers.Layer):\n",
|
|
" \"\"\"\n",
|
|
" Differentiable SVM Layer using hinge loss.\n",
|
|
" This allows backpropagation through the SVM to the encoder.\n",
|
|
" \"\"\"\n",
|
|
" def __init__(self, C=1.0, **kwargs):\n",
|
|
" super(DifferentiableSVM, self).__init__(**kwargs)\n",
|
|
" self.C = C\n",
|
|
" \n",
|
|
" def build(self, input_shape):\n",
|
|
" # SVM weights: w and bias b\n",
|
|
" self.w = self.add_weight(\n",
|
|
" shape=(input_shape[-1],),\n",
|
|
" initializer='glorot_uniform',\n",
|
|
" trainable=True,\n",
|
|
" name='svm_w'\n",
|
|
" )\n",
|
|
" self.b = self.add_weight(\n",
|
|
" shape=(1,),\n",
|
|
" initializer='zeros',\n",
|
|
" trainable=True,\n",
|
|
" name='svm_b'\n",
|
|
" )\n",
|
|
" \n",
|
|
" def call(self, inputs):\n",
|
|
" # Decision function: w^T * x + b\n",
|
|
" decision = tf.reduce_sum(inputs * self.w, axis=1, keepdims=True) + self.b\n",
|
|
" return decision\n",
|
|
" \n",
|
|
" def compute_loss(self, inputs, labels):\n",
|
|
" \"\"\"\n",
|
|
" Hinge loss for SVM: max(0, 1 - y * (w^T * x + b))\n",
|
|
" labels should be -1 or +1\n",
|
|
" \"\"\"\n",
|
|
" decision = self.call(inputs)\n",
|
|
" \n",
|
|
" # Convert labels from 0/1 to -1/+1\n",
|
|
" labels_svm = tf.where(labels == 0, -1.0, 1.0)\n",
|
|
" labels_svm = tf.cast(labels_svm, tf.float32)\n",
|
|
" labels_svm = tf.reshape(labels_svm, (-1, 1))\n",
|
|
" \n",
|
|
" # Hinge loss\n",
|
|
" hinge_loss = tf.reduce_mean(\n",
|
|
" tf.maximum(0.0, 1.0 - labels_svm * decision)\n",
|
|
" )\n",
|
|
" \n",
|
|
" # L2 regularization\n",
|
|
" l2_loss = 0.5 * tf.reduce_sum(tf.square(self.w))\n",
|
|
" \n",
|
|
" return self.C * hinge_loss + l2_loss"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "61b8978e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"class JointAESVM(tf.keras.Model):\n",
|
|
" \"\"\"\n",
|
|
" Joint Autoencoder + SVM Model with Batch Normalization and Dropout\n",
|
|
" \"\"\"\n",
|
|
" def __init__(self, input_dim, latent_dim=5, hidden_dim=16, ae_weight=1.0, \n",
|
|
" svm_weight=1.0, svm_C=1.0, reg=0.0001, \n",
|
|
" use_batchnorm=True, dropout_rate=0.3, **kwargs):\n",
|
|
" super(JointAESVM, self).__init__(**kwargs)\n",
|
|
" \n",
|
|
" self.ae_weight = ae_weight\n",
|
|
" self.svm_weight = svm_weight\n",
|
|
" self.use_batchnorm = use_batchnorm\n",
|
|
" self.dropout_rate = dropout_rate\n",
|
|
" \n",
|
|
" # Encoder with BatchNorm and Dropout\n",
|
|
" encoder_layers = []\n",
|
|
" \n",
|
|
" encoder_layers.append(tf.keras.layers.Dense(\n",
|
|
" input_dim, \n",
|
|
" activation=None,\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
|
|
" ))\n",
|
|
" if use_batchnorm:\n",
|
|
" encoder_layers.append(tf.keras.layers.BatchNormalization())\n",
|
|
" encoder_layers.append(tf.keras.layers.Activation('relu'))\n",
|
|
" if dropout_rate > 0:\n",
|
|
" encoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n",
|
|
" \n",
|
|
" encoder_layers.append(tf.keras.layers.Dense(\n",
|
|
" hidden_dim,\n",
|
|
" activation=None,\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
|
|
" ))\n",
|
|
" if use_batchnorm:\n",
|
|
" encoder_layers.append(tf.keras.layers.BatchNormalization())\n",
|
|
" encoder_layers.append(tf.keras.layers.Activation('relu'))\n",
|
|
" if dropout_rate > 0:\n",
|
|
" encoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n",
|
|
" \n",
|
|
" encoder_layers.append(tf.keras.layers.Dense(\n",
|
|
" latent_dim,\n",
|
|
" activation=None,\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
|
|
" ))\n",
|
|
" if use_batchnorm:\n",
|
|
" encoder_layers.append(tf.keras.layers.BatchNormalization())\n",
|
|
" encoder_layers.append(tf.keras.layers.Activation('relu'))\n",
|
|
" # Kein Dropout auf latent layer!\n",
|
|
" \n",
|
|
" self.encoder = tf.keras.Sequential(encoder_layers, name='encoder')\n",
|
|
" \n",
|
|
" # Decoder with BatchNorm and Dropout\n",
|
|
" decoder_layers = []\n",
|
|
" \n",
|
|
" decoder_layers.append(tf.keras.layers.Dense(\n",
|
|
" latent_dim,\n",
|
|
" activation=None,\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
|
|
" ))\n",
|
|
" if use_batchnorm:\n",
|
|
" decoder_layers.append(tf.keras.layers.BatchNormalization())\n",
|
|
" decoder_layers.append(tf.keras.layers.Activation('relu'))\n",
|
|
" if dropout_rate > 0:\n",
|
|
" decoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n",
|
|
" \n",
|
|
" decoder_layers.append(tf.keras.layers.Dense(\n",
|
|
" hidden_dim,\n",
|
|
" activation=None,\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
|
|
" ))\n",
|
|
" if use_batchnorm:\n",
|
|
" decoder_layers.append(tf.keras.layers.BatchNormalization())\n",
|
|
" decoder_layers.append(tf.keras.layers.Activation('relu'))\n",
|
|
" if dropout_rate > 0:\n",
|
|
" decoder_layers.append(tf.keras.layers.Dropout(dropout_rate))\n",
|
|
" \n",
|
|
" decoder_layers.append(tf.keras.layers.Dense(\n",
|
|
" input_dim,\n",
|
|
" activation='linear',\n",
|
|
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
|
|
" ))\n",
|
|
" \n",
|
|
" self.decoder = tf.keras.Sequential(decoder_layers, name='decoder')\n",
|
|
" \n",
|
|
" # SVM Layer\n",
|
|
" self.svm = DifferentiableSVM(C=svm_C, name='svm')\n",
|
|
" \n",
|
|
" def call(self, inputs, training=False):\n",
|
|
" encoded = self.encoder(inputs, training=training)\n",
|
|
" decoded = self.decoder(encoded, training=training)\n",
|
|
" svm_output = self.svm(encoded)\n",
|
|
" \n",
|
|
" return decoded, svm_output, encoded\n",
|
|
" \n",
|
|
" def compute_loss(self, x, y_true):\n",
|
|
" x_reconstructed, svm_decision, encoded = self(x, training=True)\n",
|
|
" \n",
|
|
" reconstruction_loss = tf.reduce_mean(\n",
|
|
" tf.square(x - x_reconstructed)\n",
|
|
" )\n",
|
|
" \n",
|
|
" svm_loss = self.svm.compute_loss(encoded, y_true)\n",
|
|
" \n",
|
|
" total_loss = (self.ae_weight * reconstruction_loss + \n",
|
|
" self.svm_weight * svm_loss)\n",
|
|
" \n",
|
|
" return total_loss, reconstruction_loss, svm_loss\n",
|
|
"\n",
|
|
"print(\"Joint AE-SVM Model class defined (with BatchNorm + Dropout)\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "445e10ff",
|
|
"metadata": {},
|
|
"source": [
|
|
"Train function"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ae498e69",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def train_joint_model(X_train, y_train, groups, model_params, \n",
|
|
" epochs=200, batch_size=64, learning_rate=0.0001,\n",
|
|
" use_batchnorm=True, dropout_rate=0.3):\n",
|
|
" \"\"\"\n",
|
|
" Train joint model on given data - GPU optimized\n",
|
|
" \"\"\"\n",
|
|
" # Build model\n",
|
|
" model = JointAESVM(\n",
|
|
" input_dim=X_train.shape[1],\n",
|
|
" latent_dim=model_params['latent_dim'],\n",
|
|
" hidden_dim=model_params['hidden_dim'],\n",
|
|
" ae_weight=model_params['ae_weight'],\n",
|
|
" svm_weight=model_params['svm_weight'],\n",
|
|
" svm_C=model_params['svm_C'],\n",
|
|
" reg=model_params['reg'],\n",
|
|
" use_batchnorm=use_batchnorm,\n",
|
|
" dropout_rate=dropout_rate # NEU!\n",
|
|
" )\n",
|
|
" \n",
|
|
" optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)\n",
|
|
" \n",
|
|
" history = {\n",
|
|
" 'total_loss': [],\n",
|
|
" 'recon_loss': [],\n",
|
|
" 'svm_loss': []\n",
|
|
" }\n",
|
|
" \n",
|
|
" X_train_tf = tf.constant(X_train.values, dtype=tf.float32)\n",
|
|
" y_train_tf = tf.constant(y_train, dtype=tf.float32)\n",
|
|
" \n",
|
|
" dataset = tf.data.Dataset.from_tensor_slices((X_train_tf, y_train_tf))\n",
|
|
" dataset = dataset.shuffle(buffer_size=min(10000, len(X_train)), \n",
|
|
" reshuffle_each_iteration=True)\n",
|
|
" dataset = dataset.batch(batch_size)\n",
|
|
" dataset = dataset.prefetch(tf.data.AUTOTUNE)\n",
|
|
" \n",
|
|
" @tf.function\n",
|
|
" def train_step(x_batch, y_batch):\n",
|
|
" with tf.GradientTape() as tape:\n",
|
|
" total_loss, recon_loss, svm_loss = model.compute_loss(x_batch, y_batch)\n",
|
|
" \n",
|
|
" gradients = tape.gradient(total_loss, model.trainable_variables)\n",
|
|
" optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n",
|
|
" \n",
|
|
" return total_loss, recon_loss, svm_loss\n",
|
|
" \n",
|
|
" for epoch in range(epochs):\n",
|
|
" epoch_loss = 0.0\n",
|
|
" epoch_recon = 0.0\n",
|
|
" epoch_svm = 0.0\n",
|
|
" n_batches = 0\n",
|
|
" \n",
|
|
" for x_batch, y_batch in dataset:\n",
|
|
" total_loss, recon_loss, svm_loss = train_step(x_batch, y_batch)\n",
|
|
" \n",
|
|
" epoch_loss += total_loss.numpy()\n",
|
|
" epoch_recon += recon_loss.numpy()\n",
|
|
" epoch_svm += svm_loss.numpy()\n",
|
|
" n_batches += 1\n",
|
|
" \n",
|
|
" history['total_loss'].append(epoch_loss / n_batches)\n",
|
|
" history['recon_loss'].append(epoch_recon / n_batches)\n",
|
|
" history['svm_loss'].append(epoch_svm / n_batches)\n",
|
|
" \n",
|
|
" if (epoch + 1) % 25 == 0:\n",
|
|
" print(f\"Epoch {epoch+1}/{epochs} - \"\n",
|
|
" f\"Total: {history['total_loss'][-1]:.4f}, \"\n",
|
|
" f\"Recon: {history['recon_loss'][-1]:.4f}, \"\n",
|
|
" f\"SVM: {history['svm_loss'][-1]:.4f}\")\n",
|
|
" \n",
|
|
" return model, history\n",
|
|
"\n",
|
|
"print(\"Training function defined (with Dropout)\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ded352fe",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Parameter Grid\n",
|
|
"param_grid = {\n",
|
|
" 'latent_dim': [5, 8],\n",
|
|
" 'hidden_dim': [10, 16],\n",
|
|
" 'ae_weight': [0.5, 1.0],\n",
|
|
" 'svm_weight': [0.5, 2.0],\n",
|
|
" 'svm_C': [0.1, 1.0, 10.0],\n",
|
|
" 'reg': [0.01, 0.001]\n",
|
|
"}\n",
|
|
"\n",
|
|
"n_splits = 5\n",
|
|
"\n",
|
|
"# GPU-optimierte Batch Size\n",
|
|
"gpus = tf.config.list_physical_devices('GPU')\n",
|
|
"if gpus:\n",
|
|
" BATCH_SIZE = 256 # Größere Batches für GPU\n",
|
|
" print(\"GPU detected - using batch size:\", BATCH_SIZE)\n",
|
|
"else:\n",
|
|
" BATCH_SIZE = 64 # Kleinere Batches für CPU\n",
|
|
" print(\"CPU only - using batch size:\", BATCH_SIZE)\n",
|
|
"\n",
|
|
"gkf = GroupKFold(n_splits=n_splits)\n",
|
|
"\n",
|
|
"print(f\"Starting Grid Search with {n_splits}-fold GroupKFold\")\n",
|
|
"print(f\"Parameter combinations: {len(list(ParameterGrid(param_grid)))}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "5c36cc9d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def evaluate_model(model, X, y):\n",
|
|
" \"\"\"Evaluate joint model\"\"\"\n",
|
|
" X_tf = tf.constant(X, dtype=tf.float32)\n",
|
|
" _, svm_decision, _ = model(X_tf, training=False)\n",
|
|
" \n",
|
|
" # Predict: decision > 0 -> class 1, else class 0\n",
|
|
" y_pred = (svm_decision.numpy().flatten() > 0).astype(int)\n",
|
|
" \n",
|
|
" bal_accuracy = balanced_accuracy_score(y, y_pred)\n",
|
|
" return bal_accuracy, y_pred\n",
|
|
"\n",
|
|
"print(\"Evaluation function defined\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "92216898",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Grid Search\n",
|
|
"best_score = -np.inf\n",
|
|
"best_params = None\n",
|
|
"best_model = None\n",
|
|
"all_results = []\n",
|
|
"\n",
|
|
"X_train_array = X_train_full.values\n",
|
|
"y_train_array = y_train_full\n",
|
|
"\n",
|
|
"for param_idx, params in enumerate(ParameterGrid(param_grid)):\n",
|
|
" print(f\"\\n{'='*60}\")\n",
|
|
" print(f\"Testing parameters {param_idx + 1}/{len(list(ParameterGrid(param_grid)))}\")\n",
|
|
" print(f\"Params: {params}\")\n",
|
|
" print(f\"{'='*60}\")\n",
|
|
" \n",
|
|
" fold_scores = []\n",
|
|
" \n",
|
|
" for fold, (train_idx, val_idx) in enumerate(gkf.split(X_train_array, y_train_array, groups_train)):\n",
|
|
" print(f\"\\nFold {fold + 1}/{n_splits}\")\n",
|
|
" \n",
|
|
" X_fold_train = pd.DataFrame(X_train_array[train_idx], columns=X_train_full.columns) # wieso hier ein DF!\n",
|
|
" y_fold_train = y_train_array[train_idx]\n",
|
|
" X_fold_val = X_train_array[val_idx]\n",
|
|
" y_fold_val = y_train_array[val_idx]\n",
|
|
" \n",
|
|
" # Train model\n",
|
|
" model, history = train_joint_model(\n",
|
|
" X_fold_train, y_fold_train, groups_train[train_idx],\n",
|
|
" model_params=params,\n",
|
|
" epochs=100,\n",
|
|
" batch_size=BATCH_SIZE,\n",
|
|
" learning_rate=0.0001,\n",
|
|
" use_batchnorm=True, # HINZUFÜGEN!\n",
|
|
" dropout_rate=0.3)\n",
|
|
" \n",
|
|
" # Validate\n",
|
|
" val_bal_acc, _ = evaluate_model(model, X_fold_val, y_fold_val)\n",
|
|
" fold_scores.append(val_bal_acc)\n",
|
|
" print(f\"Fold {fold + 1} Validation balanced Accuracy: {val_bal_acc:.4f}\")\n",
|
|
" \n",
|
|
" mean_score = np.mean(fold_scores)\n",
|
|
" std_score = np.std(fold_scores)\n",
|
|
" \n",
|
|
" result = {\n",
|
|
" **params,\n",
|
|
" 'mean_cv_bal_accuracy': mean_score,\n",
|
|
" 'std_cv_bal_accuracy': std_score\n",
|
|
" }\n",
|
|
" all_results.append(result)\n",
|
|
" \n",
|
|
" print(f\"\\nMean CV bal. Accuracy: {mean_score:.4f} ± {std_score:.4f}\")\n",
|
|
" \n",
|
|
" if mean_score > best_score:\n",
|
|
" best_score = mean_score\n",
|
|
" best_params = params\n",
|
|
" print(\"*** NEW BEST PARAMETERS ***\")\n",
|
|
"\n",
|
|
"print(f\"\\n{'='*60}\")\n",
|
|
"print(\"GRID SEARCH COMPLETED\")\n",
|
|
"print(f\"{'='*60}\")\n",
|
|
"print(f\"Best parameters: {best_params}\")\n",
|
|
"print(f\"Best CV bal. accuracy: {best_score:.4f}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "91182740",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"results_df = pd.DataFrame(all_results)\n",
|
|
"results_df = results_df.sort_values('mean_cv_bal_accuracy', ascending=False)\n",
|
|
"\n",
|
|
"print(\"\\nTop 10 configurations:\")\n",
|
|
"print(results_df.head(10))\n",
|
|
"\n",
|
|
"# Plot\n",
|
|
"plt.figure(figsize=(12, 6))\n",
|
|
"plt.barh(range(min(10, len(results_df))), \n",
|
|
" results_df['mean_cv_bal_accuracy'].head(10))\n",
|
|
"plt.yticks(range(min(10, len(results_df))), \n",
|
|
" [f\"Config {i+1}\" for i in range(min(10, len(results_df)))])\n",
|
|
"plt.xlabel('Mean CV bal Accuracy')\n",
|
|
"plt.title('Top 10 Configurations')\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c6769a88",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(\"Training final model on all training data...\")\n",
|
|
"print(f\"Best parameters: {best_params}\")\n",
|
|
"\n",
|
|
"final_model, final_history = train_joint_model(\n",
|
|
" X_train_full, y_train_full, groups_train,\n",
|
|
" model_params=best_params,\n",
|
|
" epochs=400, # 2000 ist zu viel, 300 reicht!\n",
|
|
" batch_size=BATCH_SIZE,\n",
|
|
" learning_rate=0.0001,\n",
|
|
" use_batchnorm=True, # HINZUFÜGEN!\n",
|
|
" dropout_rate=0.3 # HINZUFÜGEN!\n",
|
|
")\n",
|
|
"\n",
|
|
"print(\"\\nFinal model training completed!\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6dfeaa54",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"fig, axes = plt.subplots(1, 3, figsize=(15, 4))\n",
|
|
"\n",
|
|
"axes[0].plot(final_history['total_loss'])\n",
|
|
"axes[0].set_title('Total Loss')\n",
|
|
"axes[0].set_xlabel('Epoch')\n",
|
|
"axes[0].set_ylabel('Loss')\n",
|
|
"axes[0].grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"axes[1].plot(final_history['recon_loss'])\n",
|
|
"axes[1].set_title('Reconstruction Loss')\n",
|
|
"axes[1].set_xlabel('Epoch')\n",
|
|
"axes[1].set_ylabel('Loss')\n",
|
|
"axes[1].grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"axes[2].plot(final_history['svm_loss'])\n",
|
|
"axes[2].set_title('SVM Loss')\n",
|
|
"axes[2].set_xlabel('Epoch')\n",
|
|
"axes[2].set_ylabel('Loss')\n",
|
|
"axes[2].grid(True, alpha=0.3)\n",
|
|
"\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b14b63f3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Get predictions\n",
|
|
"test_acc, y_pred = evaluate_model(final_model, X_test.values, y_test)\n",
|
|
"\n",
|
|
"# Get SVM decision values for ROC-AUC\n",
|
|
"X_test_tf = tf.constant(X_test.values, dtype=tf.float32)\n",
|
|
"_, svm_decision, _ = final_model(X_test_tf, training=False)\n",
|
|
"y_pred_decision = svm_decision.numpy().flatten()\n",
|
|
"\n",
|
|
"# Metrics\n",
|
|
"print(\"=\" * 50)\n",
|
|
"print(\"TEST SET EVALUATION\")\n",
|
|
"print(\"=\" * 50)\n",
|
|
"print(f\"\\nAccuracy: {accuracy_score(y_test, y_pred):.4f}\")\n",
|
|
"print(f\"Precision: {precision_score(y_test, y_pred):.4f}\")\n",
|
|
"print(f\"Recall: {recall_score(y_test, y_pred):.4f}\")\n",
|
|
"print(f\"F1-Score: {f1_score(y_test, y_pred):.4f}\")\n",
|
|
"\n",
|
|
"# ROC-AUC (decision values as probability proxy)\n",
|
|
"decision_scaled = MinMaxScaler().fit_transform(y_pred_decision.reshape(-1, 1)).flatten()\n",
|
|
"print(f\"ROC-AUC: {roc_auc_score(y_test, decision_scaled):.4f}\")\n",
|
|
"\n",
|
|
"print(\"\\nConfusion Matrix:\")\n",
|
|
"cm = confusion_matrix(y_test, y_pred, normalize='true')\n",
|
|
"print(cm)\n",
|
|
"\n",
|
|
"print(\"\\nClassification Report:\")\n",
|
|
"print(classification_report(y_test, y_pred))\n",
|
|
"\n",
|
|
"# Visualize Confusion Matrix\n",
|
|
"fig, ax = plt.subplots(figsize=(8, 6))\n",
|
|
"disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Low Load (0)', 'High Load (1)'])\n",
|
|
"disp.plot(cmap='Blues', ax=ax, colorbar=True)\n",
|
|
"ax.set_title('Confusion Matrix - Test Set', fontsize=14, fontweight='bold')\n",
|
|
"plt.tight_layout()\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a8787bc7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import json\n",
|
|
"from datetime import datetime\n",
|
|
"\n",
|
|
"# Timestamp für eindeutige Dateinamen\n",
|
|
"timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
|
|
"\n",
|
|
"# 1. Save model weights\n",
|
|
"weights_path = f'joint_ae_svm_weights_{timestamp}.h5'\n",
|
|
"final_model.save_weights(weights_path)\n",
|
|
"print(f\"Model weights saved as '{weights_path}'\")\n",
|
|
"\n",
|
|
"# 2. Save encoder separately\n",
|
|
"encoder_path = f'encoder_joint_{timestamp}.keras'\n",
|
|
"final_model.encoder.save(encoder_path)\n",
|
|
"print(f\"Encoder saved as '{encoder_path}'\")\n",
|
|
"\n",
|
|
"# 3. Save best parameters + model architecture info\n",
|
|
"model_config = {\n",
|
|
" 'best_params': best_params,\n",
|
|
" 'input_dim': X_train_full.shape[1],\n",
|
|
" 'au_columns': au_columns,\n",
|
|
" 'timestamp': timestamp,\n",
|
|
" 'training_samples': len(X_train_full),\n",
|
|
" 'test_samples': len(X_test)\n",
|
|
"}\n",
|
|
"\n",
|
|
"config_pkl_path = f'model_config_joint_{timestamp}.pkl'\n",
|
|
"with open(config_pkl_path, 'wb') as f:\n",
|
|
" pickle.dump(model_config, f)\n",
|
|
"print(f\"Model config saved as '{config_pkl_path}'\")\n",
|
|
"\n",
|
|
"# 4. Speichere auch als JSON (lesbar)\n",
|
|
"config_json_path = f'model_config_joint_{timestamp}.json'\n",
|
|
"with open(config_json_path, 'w') as f:\n",
|
|
" json_data = {k: v.tolist() if isinstance(v, np.ndarray) else v \n",
|
|
" for k, v in model_config.items() if k != 'au_columns'}\n",
|
|
" json_data['au_columns'] = au_columns # Liste ist JSON-serializable\n",
|
|
" json.dump(json_data, f, indent=2)\n",
|
|
"print(f\"Model config (JSON) saved as '{config_json_path}'\")\n",
|
|
" \n",
|
|
"# 5. Save SVM weights separately\n",
|
|
"svm_weights_path = f'svm_weights_joint_{timestamp}.pkl'\n",
|
|
"svm_weights = {\n",
|
|
" 'w': final_model.svm.w.numpy(),\n",
|
|
" 'b': final_model.svm.b.numpy()\n",
|
|
"}\n",
|
|
"with open(svm_weights_path, 'wb') as f:\n",
|
|
" pickle.dump(svm_weights, f)\n",
|
|
"print(f\"SVM weights saved as '{svm_weights_path}'\")\n",
|
|
"\n",
|
|
"# 6. Save Grid Search Results\n",
|
|
"results_path = f'grid_search_results_{timestamp}.pkl'\n",
|
|
"with open(results_path, 'wb') as f:\n",
|
|
" pickle.dump(all_results, f)\n",
|
|
"print(f\"Grid search results saved as '{results_path}'\")\n",
|
|
"\n",
|
|
"print(f\"\\n✓ All models and configs saved with timestamp: {timestamp}\")\n",
|
|
"print(f\"\\nTo load this model later, use:\")\n",
|
|
"print(f\" load_joint_model('{weights_path}', '{config_pkl_path}')\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "06a538c4",
|
|
"metadata": {},
|
|
"source": [
|
|
"* doch mal svm ae pipeline?\n",
|
|
"* einfach mal mit 20 13 5\n",
|
|
"* label hinzufügen\n",
|
|
"* mad von CT verwenden oder wert anpassen, ggf. vergleich welches label wie oft vorkommt vorher und nachher. --> labelling schritt von CT übernehmen\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a9da57ed",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def load_joint_model(weights_path='joint_ae_svm.weights.h5',\n",
|
|
" config_path='model_config_joint.pkl'):\n",
|
|
" \"\"\"\n",
|
|
" Load the trained joint AE-SVM model\n",
|
|
" \n",
|
|
" Returns: model, config dict\n",
|
|
" \"\"\"\n",
|
|
" # Load config\n",
|
|
" with open(config_path, 'rb') as f:\n",
|
|
" config = pickle.load(f)\n",
|
|
" \n",
|
|
" params = config['best_params']\n",
|
|
" input_dim = config['input_dim']\n",
|
|
" \n",
|
|
" # Rebuild model with same architecture\n",
|
|
" model = JointAESVM(\n",
|
|
" input_dim=input_dim,\n",
|
|
" latent_dim=params['latent_dim'],\n",
|
|
" hidden_dim=params['hidden_dim'],\n",
|
|
" ae_weight=params['ae_weight'],\n",
|
|
" svm_weight=params['svm_weight'],\n",
|
|
" svm_C=params['svm_C'],\n",
|
|
" reg=params['reg']\n",
|
|
" )\n",
|
|
" \n",
|
|
" # Dummy forward pass to build weights\n",
|
|
" dummy_input = tf.random.normal((1, input_dim))\n",
|
|
" _ = model(dummy_input, training=False)\n",
|
|
" \n",
|
|
" # Load weights\n",
|
|
" model.load_weights(weights_path)\n",
|
|
" print(f\"✓ Model loaded from {weights_path}\")\n",
|
|
" \n",
|
|
" return model, config\n",
|
|
"\n",
|
|
"print(\"Load function defined\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "a1a9c9fb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Test: Model laden\n",
|
|
"loaded_model, loaded_config = load_joint_model()\n",
|
|
"\n",
|
|
"# Test prediction\n",
|
|
"test_sample = X_test.values[:5]\n",
|
|
"test_sample_tf = tf.constant(test_sample, dtype=tf.float32)\n",
|
|
"_, svm_out, encoded = loaded_model(test_sample_tf, training=False)\n",
|
|
"\n",
|
|
"print(\"Test prediction successful!\")\n",
|
|
"print(f\"Encoded shape: {encoded.shape}\")\n",
|
|
"print(f\"SVM decisions: {svm_out.numpy().flatten()}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "e4528f50",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def predict_flexible(model, X_data, use_gpu=None):\n",
|
|
" \"\"\"\n",
|
|
" Predict auf CPU oder GPU\n",
|
|
" \n",
|
|
" Args:\n",
|
|
" model: Das geladene Modell\n",
|
|
" X_data: Input data (numpy array oder DataFrame)\n",
|
|
" use_gpu: True/False/None (None = auto-detect)\n",
|
|
" \n",
|
|
" Returns:\n",
|
|
" predictions, decision_values, encoded_features\n",
|
|
" \"\"\"\n",
|
|
" # Auto-detect GPU\n",
|
|
" if use_gpu is None:\n",
|
|
" gpus = tf.config.list_physical_devices('GPU')\n",
|
|
" use_gpu = len(gpus) > 0\n",
|
|
" \n",
|
|
" # Force CPU oder GPU\n",
|
|
" device = '/GPU:0' if use_gpu else '/CPU:0'\n",
|
|
" \n",
|
|
" print(f\"Running prediction on: {device}\")\n",
|
|
" \n",
|
|
" with tf.device(device):\n",
|
|
" if isinstance(X_data, pd.DataFrame):\n",
|
|
" X_data = X_data.values\n",
|
|
" \n",
|
|
" X_tf = tf.constant(X_data, dtype=tf.float32)\n",
|
|
" _, svm_decision, encoded = model(X_tf, training=False)\n",
|
|
" \n",
|
|
" # Predictions: decision > 0 -> class 1\n",
|
|
" y_pred = (svm_decision.numpy().flatten() > 0).astype(int)\n",
|
|
" \n",
|
|
" return y_pred, svm_decision.numpy().flatten(), encoded.numpy()\n",
|
|
"\n",
|
|
"# Test auf CPU\n",
|
|
"y_pred_cpu, decisions_cpu, encoded_cpu = predict_flexible(\n",
|
|
" loaded_model, X_test.values[:10], use_gpu=False\n",
|
|
")\n",
|
|
"print(f\"CPU Predictions: {y_pred_cpu}\")\n",
|
|
"\n",
|
|
"# Test auf GPU\n",
|
|
"y_pred_gpu, decisions_gpu, encoded_gpu = predict_flexible(\n",
|
|
" loaded_model, X_test.values[:10], use_gpu=True\n",
|
|
")\n",
|
|
"print(f\"GPU Predictions: {y_pred_gpu}\")\n",
|
|
"\n",
|
|
"# Verify sie sind identisch\n",
|
|
"print(f\"\\nResults identical: {np.allclose(decisions_cpu, decisions_gpu)}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1447bfbd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Diagnose: Was lernt das Modell?\n",
|
|
"print(\"=== MODEL DIAGNOSIS ===\\n\")\n",
|
|
"\n",
|
|
"# Check SVM weights\n",
|
|
"print(\"SVM Weights (w):\", final_model.svm.w.numpy()[:10], \"...\")\n",
|
|
"print(\"SVM Bias (b):\", final_model.svm.b.numpy())\n",
|
|
"print(\"SVM weight norm:\", np.linalg.norm(final_model.svm.w.numpy()))\n",
|
|
"\n",
|
|
"# Check predictions distribution\n",
|
|
"X_train_tf = tf.constant(X_train_full.values, dtype=tf.float32)\n",
|
|
"_, train_decisions, train_encoded = final_model(X_train_tf, training=False)\n",
|
|
"train_decisions = train_decisions.numpy().flatten()\n",
|
|
"train_encoded = train_encoded.numpy() # KORREKTUR!\n",
|
|
"\n",
|
|
"print(f\"\\nTraining set decisions:\")\n",
|
|
"print(f\" Min: {train_decisions.min():.4f}\")\n",
|
|
"print(f\" Max: {train_decisions.max():.4f}\")\n",
|
|
"print(f\" Mean: {train_decisions.mean():.4f}\")\n",
|
|
"print(f\" Std: {train_decisions.std():.4f}\")\n",
|
|
"\n",
|
|
"train_pred = (train_decisions > 0).astype(int)\n",
|
|
"print(f\"\\nTraining predictions distribution:\")\n",
|
|
"print(pd.Series(train_pred).value_counts())\n",
|
|
"print(f\"Training balanced accuracy: {balanced_accuracy_score(y_train_full, train_pred):.4f}\")\n",
|
|
"\n",
|
|
"# Check encoded features\n",
|
|
"print(f\"\\nEncoded features stats:\")\n",
|
|
"print(f\" Mean: {train_encoded.mean():.4f}\")\n",
|
|
"print(f\" Std: {train_encoded.std():.4f}\")\n",
|
|
"print(f\" Min: {train_encoded.min():.4f}\")\n",
|
|
"print(f\" Max: {train_encoded.max():.4f}\")\n",
|
|
"\n",
|
|
"# Check per class\n",
|
|
"print(f\"\\nEncoded features per class:\")\n",
|
|
"for label in [0, 1]:\n",
|
|
" mask = y_train_full == label\n",
|
|
" enc_class = train_encoded[mask]\n",
|
|
" print(f\" Class {label}: mean={enc_class.mean():.4f}, std={enc_class.std():.4f}\")\n",
|
|
"\n",
|
|
"# Test set diagnosis\n",
|
|
"print(\"\\n=== TEST SET DIAGNOSIS ===\\n\")\n",
|
|
"X_test_tf = tf.constant(X_test.values, dtype=tf.float32)\n",
|
|
"_, test_decisions, test_encoded = final_model(X_test_tf, training=False)\n",
|
|
"test_decisions = test_decisions.numpy().flatten()\n",
|
|
"test_encoded = test_encoded.numpy()\n",
|
|
"\n",
|
|
"print(f\"Test set decisions:\")\n",
|
|
"print(f\" Min: {test_decisions.min():.4f}\")\n",
|
|
"print(f\" Max: {test_decisions.max():.4f}\")\n",
|
|
"print(f\" Mean: {test_decisions.mean():.4f}\")\n",
|
|
"print(f\" Std: {test_decisions.std():.4f}\")\n",
|
|
"\n",
|
|
"test_pred = (test_decisions > 0).astype(int)\n",
|
|
"print(f\"\\nTest predictions distribution:\")\n",
|
|
"print(pd.Series(test_pred).value_counts())\n",
|
|
"print(f\"Test balanced accuracy: {balanced_accuracy_score(y_test, test_pred):.4f}\")\n",
|
|
"\n",
|
|
"# Vergleich Train vs Test encoded features\n",
|
|
"print(f\"\\n=== TRAIN vs TEST Encoded Features ===\")\n",
|
|
"print(f\"Train encoded - Mean: {train_encoded.mean():.4f}, Std: {train_encoded.std():.4f}\")\n",
|
|
"print(f\"Test encoded - Mean: {test_encoded.mean():.4f}, Std: {test_encoded.std():.4f}\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.10"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|