3 changed files with 307 additions and 135 deletions
--- a/model_training/OCSVM/ocsvm_with_AE.ipynb
+++ b/model_training/OCSVM/ocsvm_with_AE.ipynb
@ -20,10 +20,10 @@
    "from pathlib import Path\n",
    "import sys\n",
    "import os\n",
    "import tensorflow as tf\n",
    "\n",
    "base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
    "sys.path.append(base_dir)\n",
    "print(base_dir)\n",
    "\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.svm import OneClassSVM\n",
@ -31,7 +31,7 @@
    "import matplotlib.pyplot as plt\n",
    "import tensorflow as tf\n",
    "import pickle\n",
-    "from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler\n",
+    "from tools import evaluation_tools, scaler\n",
    "from sklearn.metrics import (balanced_accuracy_score, accuracy_score, precision_score, \n",
    "                                recall_score, f1_score, confusion_matrix, classification_report)  "
   ]
@ -44,17 +44,6 @@
    "### Load data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30a4c885",
   "metadata": {},
   "outputs": [],
   "source": [
    "enconder_path = Path(\".keras\")\n",
    "model_path = Path(\".pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -62,7 +51,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "data_path = Path(r\".parquet\")"
+    "data_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/first_AU_dataset/output_windowed.parquet\")"
   ]
  },
  {
@ -72,8 +61,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "df = pd.read_parquet(path=data_path)\n",
+    "df = pd.read_parquet(path=data_path)"
    "df = df.dropna()"
   ]
  },
  {
@ -162,29 +150,20 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "au_columns = [col for col in low_all.columns if \"face\" in col.lower()] \n",
+    "au_columns = [col for col in low_all.columns if col.startswith('AU')]\n",
    "\n",
    "eye_columns = [ \n",
    "    'Fix_count_short_66_150','Fix_count_medium_300_500','Fix_count_long_gt_1000', \n",
    "    'Fix_count_100','Fix_mean_duration','Fix_median_duration', \n",
    "    'Sac_count','Sac_mean_amp','Sac_mean_dur','Sac_median_dur', \n",
    "    'Blink_count','Blink_mean_dur','Blink_median_dur', \n",
    "    'Pupil_mean','Pupil_IPA' \n",
    "] \n",
    "cols = au_columns +eye_columns\n",
    "\n",
    "# Prepare training data (only normal/low data)\n",
-    "train_data = low_all[low_all['subjectID'].isin(train_subjects)][['subjectID'] + cols].copy()\n",
+    "train_data = low_all[low_all['subjectID'].isin(train_subjects)][['subjectID'] + au_columns].copy()\n",
    "\n",
    "# Prepare validation data (normal and anomaly) \n",
-    "val_normal_data = low_all[low_all['subjectID'].isin(val_subjects)][['subjectID'] + cols].copy()\n",
+    "val_normal_data = low_all[low_all['subjectID'].isin(val_subjects)][['subjectID'] + au_columns].copy()\n",
-    "val_high_data = high_all[high_all['subjectID'].isin(val_subjects)][['subjectID'] + cols].copy()\n",
+    "val_high_data = high_all[high_all['subjectID'].isin(val_subjects)][['subjectID'] + au_columns].copy()\n",
-    "val_normal_data = val_normal_data.sample(n=500, random_state=42)\n",
+    "val_normal_data = val_normal_data.sample(n=1000, random_state=42)\n",
-    "val_high_data = val_high_data.sample(n=500, random_state=42)\n",
+    "val_high_data = val_high_data.sample(n=1000, random_state=42)\n",
    "\n",
    "# Prepare test data (normal and anomaly) - 1000 samples each\n",
-    "test_normal_data = low_all[low_all['subjectID'].isin(test_subjects)][['subjectID'] + cols].copy()\n",
+    "test_normal_data = low_all[low_all['subjectID'].isin(test_subjects)][['subjectID'] + au_columns].copy()\n",
-    "test_high_data = high_all[high_all['subjectID'].isin(test_subjects)][['subjectID'] + cols].copy()\n",
+    "test_high_data = high_all[high_all['subjectID'].isin(test_subjects)][['subjectID'] + au_columns].copy()\n",
    "test_normal_data = test_normal_data.sample(n=500, random_state=42)\n",
    "test_high_data = test_high_data.sample(n=500, random_state=42)\n",
    "\n",
@ -207,7 +186,7 @@
   "outputs": [],
   "source": [
    "# Cell 3: Fit normalizer on training data\n",
-    "normalizer = scaler.fit_normalizer(train_data, cols, method='minmax', scope='global')\n",
+    "normalizer = scaler.fit_normalizer(train_data, au_columns, method='minmax', scope='global')\n",
    "print(\"Normalizer fitted on training data\")"
   ]
  },
@ -219,11 +198,11 @@
   "outputs": [],
   "source": [
    "# Cell 4: Apply normalization to all datasets\n",
-    "train_normalized = scaler.apply_normalizer(train_data, cols, normalizer)\n",
+    "train_normalized = scaler.apply_normalizer(train_data, au_columns, normalizer)\n",
-    "val_normal_normalized = scaler.apply_normalizer(val_normal_data, cols, normalizer)\n",
+    "val_normal_normalized = scaler.apply_normalizer(val_normal_data, au_columns, normalizer)\n",
-    "val_high_normalized = scaler.apply_normalizer(val_high_data, cols, normalizer)\n",
+    "val_high_normalized = scaler.apply_normalizer(val_high_data, au_columns, normalizer)\n",
-    "test_normal_normalized = scaler.apply_normalizer(test_normal_data, cols, normalizer)\n",
+    "test_normal_normalized = scaler.apply_normalizer(test_normal_data, au_columns, normalizer)\n",
-    "test_high_normalized = scaler.apply_normalizer(test_high_data, cols, normalizer)\n",
+    "test_high_normalized = scaler.apply_normalizer(test_high_data, au_columns, normalizer)\n",
    "\n",
    "print(\"Normalization applied to all datasets\")"
   ]
@ -235,11 +214,13 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "X_train = train_normalized[cols].copy()\n",
+    "# Cell 5: Extract AU columns and create labels for grid search\n",
-    "X_val_normal = val_normal_normalized[cols].copy()\n",
+    "# Extract only AU columns (drop subjectID)\n",
-    "X_val_high = val_high_normalized[cols].copy()\n",
+    "X_train = train_normalized[au_columns].copy()\n",
-    "X_test_high = test_high_normalized[cols].copy()\n",
+    "X_val_normal = val_normal_normalized[au_columns].copy()\n",
-    "X_test_normal = test_normal_normalized[cols].copy()\n",
+    "X_val_high = val_high_normalized[au_columns].copy()\n",
    "X_test_high = test_high_normalized[au_columns].copy()\n",
    "X_test_normal = test_normal_normalized[au_columns].copy()\n",
    "\n",
    "\n",
    "# Create labels for grid search\n",
@ -260,12 +241,116 @@
    "X_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "50fc80dc-fe16-4917-aad6-0dbaa1ce5ef9",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install keras-tuner --quiet  # nur einmal nötig\n",
    "\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "from kerastuner import HyperModel\n",
    "from kerastuner.tuners import RandomSearch\n",
    "\n",
    "# 1️⃣ HyperModel definieren\n",
    "class AutoencoderHyperModel(HyperModel):\n",
    "    def __init__(self, input_dim):\n",
    "        self.input_dim = input_dim\n",
    "\n",
    "    def build(self, hp):\n",
    "        reg = hp.Float(\"l2_reg\", min_value=1e-5, max_value=0.01, sampling=\"log\")\n",
    "        lr = hp.Float(\"learning_rate\", 1e-4, 1e-2, sampling=\"log\")\n",
    "\n",
    "        # Encoder\n",
    "        encoder = keras.Sequential([\n",
    "            keras.layers.Dense(\n",
    "                units=hp.Int(\"enc_units1\", min_value=10, max_value=self.input_dim, step=10),\n",
    "                activation=None,\n",
    "                kernel_regularizer=keras.regularizers.l2(reg)\n",
    "            ),\n",
    "            keras.layers.LeakyReLU(alpha=0.1),\n",
    "            keras.layers.Dense(\n",
    "                units=hp.Int(\"enc_units2\", min_value=5, max_value=20, step=1),\n",
    "                activation=tf.keras.layers.LeakyReLU(alpha=0.1),\n",
    "                kernel_regularizer=keras.regularizers.l2(reg)\n",
    "            ),\n",
    "            keras.layers.Dense(\n",
    "                units=2,  # Bottleneck\n",
    "                activation='linear',\n",
    "                kernel_regularizer=keras.regularizers.l2(reg)\n",
    "            ),\n",
    "        ])\n",
    "\n",
    "        # Decoder\n",
    "        decoder = keras.Sequential([\n",
    "            keras.layers.Dense(\n",
    "                units=hp.Int(\"dec_units1\", min_value=5, max_value=20, step=1),\n",
    "                activation=tf.keras.layers.LeakyReLU(alpha=0.1),\n",
    "                kernel_regularizer=keras.regularizers.l2(reg)\n",
    "            ),\n",
    "            keras.layers.Dense(\n",
    "                units=hp.Int(\"dec_units2\", min_value=10, max_value=self.input_dim, step=10),\n",
    "                activation=tf.keras.layers.LeakyReLU(alpha=0.1),\n",
    "                kernel_regularizer=keras.regularizers.l2(reg)\n",
    "            ),\n",
    "            keras.layers.Dense(\n",
    "                units=self.input_dim,\n",
    "                activation='linear',\n",
    "                kernel_regularizer=keras.regularizers.l2(reg)\n",
    "            ),\n",
    "        ])\n",
    "\n",
    "        # Autoencoder\n",
    "        inputs = keras.Input(shape=(self.input_dim,))\n",
    "        encoded = encoder(inputs)\n",
    "        decoded = decoder(encoded)\n",
    "        autoencoder = keras.Model(inputs, decoded)\n",
    "\n",
    "        autoencoder.compile(\n",
    "            optimizer=keras.optimizers.Adam(learning_rate=lr),\n",
    "            loss='mse'\n",
    "        )\n",
    "\n",
    "        return autoencoder\n",
    "\n",
    "# 2️⃣ RandomSearch-Tuner\n",
    "hypermodel = AutoencoderHyperModel(input_dim=X_train.shape[1])\n",
    "\n",
    "tuner = RandomSearch(\n",
    "    hypermodel,\n",
    "    objective='val_loss',\n",
    "    max_trials=10,            # Anzahl der getesteten Kombinationen\n",
    "    executions_per_trial=1,   # Anzahl Trainings pro Kombination\n",
    "    directory='tuner_dir',\n",
    "    project_name='oc_ae'\n",
    ")\n",
    "\n",
    "# 3️⃣ Hyperparameter-Tuning starten\n",
    "tuner.search(\n",
    "    X_train, X_train,\n",
    "    epochs=100,\n",
    "    batch_size=64,\n",
    "    validation_data=(X_val_normal, X_val_normal),\n",
    "    verbose=0\n",
    ")\n",
    "\n",
    "# 4️⃣ Beste Architektur holen\n",
    "best_model = tuner.get_best_models(num_models=1)[0]\n",
    "best_hyperparameters = tuner.get_best_hyperparameters(1)[0]\n",
    "\n",
    "print(\"Beste Hyperparameter:\", best_hyperparameters.values)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "362c0a6f",
   "metadata": {},
   "source": [
-    "Build model"
+    "\n",
    "Beste Hyperparameter: {'l2_reg': 1.3757411430582133e-05, 'learning_rate': 0.007321002854350309, 'enc_units1': 20, 'enc_units2': 16, 'dec_units1': 14, 'dec_units2': 10}"
   ]
  },
  {
@ -275,6 +360,26 @@
   "metadata": {},
   "outputs": [],
   "source": [
    "# reg = 0.1\n",
    "# encoder = tf.keras.Sequential(\n",
    "#     [\n",
    "#         tf.keras.layers.Dense(units=X_train.shape[1], activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
    "#         tf.keras.layers.Dense(units=10, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
    "#         tf.keras.layers.Dense(units=5, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
    "        \n",
    "#     ]\n",
    "# )\n",
    "\n",
    "# decoder = tf.keras.Sequential(\n",
    "#     [\n",
    "#         tf.keras.layers.Dense(units=5,activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
    "#         tf.keras.layers.Dense(units=10, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
    "#         tf.keras.layers.Dense(units=X_train.shape[1], activation='linear', kernel_regularizer=tf.keras.regularizers.l2(reg))\n",
    "#     ]\n",
    "# )\n",
    "\n",
    "\n",
    "\n",
    "reg = 1e-5\n",
    "\n",
    "# ENCODER\n",
@ -284,15 +389,14 @@
    "        activation=None,\n",
    "        kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
    "    ),\n",
-    "    tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
+    "    tf.keras.layers.LeakyReLU(alpha=0.1),\n",
-    "\n",
+    "    \n",
    "    tf.keras.layers.Dense(\n",
    "        units=12,\n",
-    "        activation=None,\n",
+    "        activation=tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
    "        kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
    "    ),\n",
-    "    tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
+    "       \n",
    "\n",
    "    tf.keras.layers.Dense(\n",
    "        units=8,\n",
    "        activation='linear',  # Bottleneck stays linear\n",
@ -304,24 +408,20 @@
    "decoder = tf.keras.Sequential([\n",
    "    tf.keras.layers.Dense(\n",
    "        units=8,\n",
-    "        activation=None,\n",
+    "        activation=tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
    "        kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
    "    ),\n",
    "    tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
    "\n",
    "    tf.keras.layers.Dense(\n",
    "        units=12,\n",
-    "        activation=None,\n",
+    "        activation=tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
    "        kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
    "    ),\n",
    "    tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
    "\n",
    "    tf.keras.layers.Dense(\n",
    "        units=X_train.shape[1],\n",
    "        activation='linear',\n",
    "        kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
    "    ),\n",
-    "])"
+    "])\n"
   ]
  },
  {
@ -356,7 +456,7 @@
   "source": [
    "history = autoencoder.fit(\n",
    "    X_train, X_train,  # Input and target are the same for autoencoder\n",
-    "    epochs=50,\n",
+    "    epochs=200,\n",
    "    batch_size=64,\n",
    "    validation_data=(X_val_normal, X_val_normal),\n",
    "    verbose=1\n",
@ -370,7 +470,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "encoder.save(enconder_path)"
+    "save_path = Path(\"/home/jovyan/data-paulusjafahrsimulator-gpu/saved_models/encoder_model_2_neurons_minmax.keras\")\n",
    "encoder.save(save_path)"
   ]
  },
  {
@ -388,7 +489,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "encoder = tf.keras.models.load_model(enconder_path)"
+    "load_path = Path(\"/home/jovyan/data-paulusjafahrsimulator-gpu/saved_models/encoder_model_2_neurons_minmax.keras\")\n",
    "encoder = tf.keras.models.load_model(load_path)"
   ]
  },
  {
@ -439,6 +541,25 @@
    "test_predictions.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "759118d8-989d-489c-9d35-331454b4795e",
   "metadata": {},
   "outputs": [],
   "source": [
    "all_zero = {\n",
    "    \"X_train_encoded\":        np.all(X_train_encoded == 0),\n",
    "    \"X_val_normal_encoded\":   np.all(X_val_normal_encoded == 0),\n",
    "    \"X_val_high_encoded\":     np.all(X_val_high_encoded == 0),\n",
    "    \"X_test_normal_encoded\":  np.all(X_test_normal_encoded == 0),\n",
    "    \"X_test_high_encoded\":    np.all(X_test_high_encoded == 0),\n",
    "}\n",
    "\n",
    "print(all_zero)\n",
    "print(X_train_encoded.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
@ -446,49 +567,81 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "fig, axes = plt.subplots(1, 3, figsize=(18, 5))\n",
+    "# fig, axes = plt.subplots(1, 3, figsize=(18, 5))\n",
    "\n",
-    "# Subplot A: Normal\n",
+    "# # Subplot A: Normal\n",
-    "axes[0].scatter(\n",
+    "# axes[0].scatter(\n",
-    "    X_val_normal_encoded[:, 0],\n",
+    "#     X_val_normal_encoded[:, 0],\n",
-    "    X_val_normal_encoded[:, 1],\n",
+    "#     X_val_normal_encoded[:, 1],\n",
-    "    color=\"blue\",\n",
+    "#     color=\"blue\",\n",
-    "    label=\"Normal\"\n",
+    "#     label=\"Normal\"\n",
-    ")\n",
+    "# )\n",
-    "axes[0].set_title(\"Val Normal (encoded)\")\n",
+    "# axes[0].set_title(\"Val Normal (encoded)\")\n",
-    "axes[0].set_xlabel(\"latent feature 1\")\n",
+    "# axes[0].set_xlabel(\"latent feature 1\")\n",
-    "axes[0].set_ylabel(\"latent feature 2\")\n",
+    "# axes[0].set_ylabel(\"latent feature 2\")\n",
-    "axes[0].legend()\n",
+    "# axes[0].legend()\n",
    "\n",
    "# # Subplot B: High\n",
    "# axes[1].scatter(\n",
    "#     X_val_high_encoded[:, 0],\n",
    "#     X_val_high_encoded[:, 1],\n",
    "#     color=\"orange\",\n",
    "#     label=\"High\"\n",
    "# )\n",
    "# axes[1].set_title(\"ValHigh (encoded)\")\n",
    "# axes[1].set_xlabel(\"latent feature 1\")\n",
    "# axes[1].set_ylabel(\"latent feature 2\")\n",
    "# axes[1].legend()\n",
    "\n",
    "# # Subplot C: Both\n",
    "# axes[2].scatter(\n",
    "#     X_val_normal_encoded[:, 0],\n",
    "#     X_val_normal_encoded[:, 1],\n",
    "#     color=\"blue\",\n",
    "#     label=\"Normal\"\n",
    "# )\n",
    "# axes[2].scatter(\n",
    "#     X_val_high_encoded[:, 0],\n",
    "#     X_val_high_encoded[:, 1],\n",
    "#     color=\"orange\",\n",
    "#     label=\"High\"\n",
    "# )\n",
    "# axes[2].set_title(\"Normal vs High (encoded)\")\n",
    "# axes[2].set_xlabel(\"latent feature 1\")\n",
    "# axes[2].set_ylabel(\"latent feature 2\")\n",
    "# axes[2].legend()\n",
    "\n",
    "\n",
    "\n",
    "latent_dim = 8\n",
    "fig, axes = plt.subplots(2, 4, figsize=(20, 10))\n",
    "axes = axes.flatten()  # flatten to index easily\n",
    "\n",
    "for i in range(latent_dim):\n",
    "    axes[i].scatter(\n",
    "        X_val_normal_encoded[:, i],\n",
    "        [0]*X_val_normal_encoded.shape[0],  # optional: place on a line for 1D visualization\n",
    "        color='blue',\n",
    "        label='Normal',\n",
    "        alpha=0.6\n",
    "    )\n",
    "    axes[i].scatter(\n",
    "        X_val_high_encoded[:, i],\n",
    "        [0]*X_val_high_encoded.shape[0],  # same for High\n",
    "        color='orange',\n",
    "        label='High',\n",
    "        alpha=0.6\n",
    "    )\n",
    "    axes[i].set_title(f'Latent dim {i+1}')\n",
    "    axes[i].set_xlabel(f'Feature {i+1}')\n",
    "    axes[i].set_yticks([])  # hide y-axis as it's just a 1D comparison\n",
    "    axes[i].legend()\n",
    "    axes[i].grid(True)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Subplot B: High\n",
    "axes[1].scatter(\n",
    "    X_val_high_encoded[:, 0],\n",
    "    X_val_high_encoded[:, 1],\n",
    "    color=\"orange\",\n",
    "    label=\"High\"\n",
    ")\n",
    "axes[1].set_title(\"ValHigh (encoded)\")\n",
    "axes[1].set_xlabel(\"latent feature 1\")\n",
    "axes[1].set_ylabel(\"latent feature 2\")\n",
    "axes[1].legend()\n",
    "\n",
    "# Subplot C: Both\n",
    "axes[2].scatter(\n",
    "    X_val_normal_encoded[:, 0],\n",
    "    X_val_normal_encoded[:, 1],\n",
    "    color=\"blue\",\n",
    "    label=\"Normal\"\n",
    ")\n",
    "axes[2].scatter(\n",
    "    X_val_high_encoded[:, 0],\n",
    "    X_val_high_encoded[:, 1],\n",
    "    color=\"orange\",\n",
    "    label=\"High\"\n",
    ")\n",
    "axes[2].set_title(\"Normal vs High (encoded)\")\n",
    "axes[2].set_xlabel(\"latent feature 1\")\n",
    "axes[2].set_ylabel(\"latent feature 2\")\n",
    "axes[2].legend()\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
@ -555,11 +708,11 @@
   "outputs": [],
   "source": [
    "# Save\n",
-    "with open(model_path, \"wb\") as f:\n",
+    "with open('ocsvm_model.pkl', 'wb') as f:\n",
    "    pickle.dump(ocsvm, f)\n",
    "\n",
    "# Load later\n",
-    "with open(model_path, \"rb\") as f:\n",
+    "with open('ocsvm_model.pkl', 'rb') as f:\n",
    "    ocsvm_loaded = pickle.load(f)"
   ]
  },
@ -578,6 +731,11 @@
   "metadata": {},
   "outputs": [],
   "source": [
    "# X_combined = np.concatenate([X_train_encoded, X_val_normal_encoded, X_val_high_encoded], axis=0)\n",
    "# y_combined = np.concatenate([\n",
    "#     np.ones(X_train_encoded.shape[0]+X_val_normal_encoded.shape[0]),      # normal = 1\n",
    "#     -np.ones(X_val_high_encoded.shape[0])   # anomaly = -1\n",
    "# ], axis=0)\n",
    "X_combined = np.concatenate([X_train_encoded, X_val_high_encoded], axis=0)\n",
    "y_combined = np.concatenate([\n",
    "    np.ones(X_train_encoded.shape[0]),      # normal = 1\n",
@ -698,6 +856,14 @@
   "source": [
    "f1_score(y_true=np.concatenate([y_test_normal, y_test_high]), y_pred=predictions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e412041-7534-40fe-8486-ee97349a6168",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
--- a/predict_pipeline/config.yaml
+++ b/predict_pipeline/config.yaml
@ -1,20 +1,20 @@
 database:
-  path: "/home/edgekit/MSY_FS/databases/database.sqlite"
+  path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\predict_pipeline\\database.sqlite"
  table: feature_table
  key: _Id
 model:
-  path: "/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/xgb_model_3_groupK.joblib"
+  path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\files_for_testing\\xgb_model_3_groupK.joblib"
 scaler:
-  use_scaling: true
+  use_scaling: True
-  path: "/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/normalizer_min_max_global.pkl"
+  path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\predict_pipeline\\normalizer_min_max_global.pkl"
 mqtt:
  enabled: true
  host: "141.75.215.233"
  port: 1883
-  topic: "PREDICTION"
+  topic: "PREDICTIONS"
  client_id: "jetson-board"
  qos: 0
  retain: false
@ -107,4 +107,4 @@ fallback:
  Blink_mean_dur: 0.38857142857142857
  Blink_median_dur: 0.2
  Pupil_mean: 3.2823675201416016
-  Pupil_IPA: 0.0036347377340156025
+  Pupil_IPA: 0.0036347377340156025
--- a/predict_pipeline/predict_sample.py
+++ b/predict_pipeline/predict_sample.py
@ -7,10 +7,9 @@ import sys
 import yaml
 import pickle
 sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')
-
+# sys.path.append(r"c:\\repo\\Fahrsimulator_MSY2526_AI\\tools")
 import db_helpers
 import joblib
 import paho.mqtt.client as mqtt
 def _load_serialized(path: Path):
    suffix = path.suffix.lower()
@ -53,11 +52,11 @@ def callModel(sample, model_path):
    suffix = model_path.suffix.lower()
    if suffix in {".pkl", ".joblib"}:
        model = _load_serialized(model_path)
-    elif suffix == ".keras":
+    # elif suffix == ".keras":
-        import tensorflow as tf
+       # import tensorflow as tf
-        model = tf.keras.models.load_model(model_path)
+        # model = tf.keras.models.load_model(model_path)
-    else:
+    # else:
-        raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.")
+        # raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.")
    x = np.asarray(sample, dtype=np.float32)
    if x.ndim == 1:
@ -126,37 +125,44 @@ def sendMessage(config_file_path, message):
    # Serialize the message to JSON
    payload = json.dumps(message, ensure_ascii=False)
-    print(payload) # for debugging purposes
+    print(payload)
-    
+    # Later: publish via MQTT using config parameters above.
-    client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01"))
+    # Example (kept commented intentionally):
-    if "username" in mqtt_cfg and mqtt_cfg.get("username"):
+    # import paho.mqtt.client as mqtt
-        client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password"))
+    # client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01"))
-    client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60)
+    # if "username" in mqtt_cfg and mqtt_cfg.get("username"):
-    client.publish(
+    #     client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password"))
-        topic=topic,
+    # client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60)
-        payload=payload,
+    # client.publish(
-        qos=int(mqtt_cfg.get("qos", 1)),
+    #     topic=topic,
-        retain=bool(mqtt_cfg.get("retain", False)),
+    #     payload=payload,
-    )
+    #     qos=int(mqtt_cfg.get("qos", 1)),
-    client.disconnect()
+    #     retain=bool(mqtt_cfg.get("retain", False)),
    # )
    # client.disconnect()
    return
 def replace_nan(sample, config_file_path: Path):
    with config_file_path.open("r", encoding="utf-8") as f:
        cfg = yaml.safe_load(f)
-    
+
-    fallback_map = cfg.get("fallback", {})
+    fallback_list = cfg.get("fallback", [])
-    
+    fallback_map = {}
    for item in fallback_list:
        if isinstance(item, dict):
            fallback_map.update(item)
    if sample.empty:
        return False, sample
-    
+
    nan_ratio = sample.isna().mean()
    valid = nan_ratio <= 0.5
-    
+
    if valid and fallback_map:
        sample = sample.fillna(value=fallback_map)
-    
+
    return valid, sample
 def sample_to_numpy(sample, drop_cols=("_Id", "start_time")):
@ -207,7 +213,7 @@ def scale_sample(sample, use_scaling=False, scaler_path=None):
    return df.iloc[0] if isinstance(sample, pd.Series) else df
 def main():
-    pd.set_option('future.no_silent_downcasting', True) 
+    pd.set_option('future.no_silent_downcasting', True) # kann ggf raus
    config_file_path = Path("/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/config.yaml")
    with config_file_path.open("r", encoding="utf-8") as f: