Compare commits

...

2 Commits

Author SHA1 Message Date
537b452449 changed leaky relu syntax to remove keras bug 2026-03-04 17:59:05 +01:00
3169c29319 mini changes in predict pipeline 2026-03-04 17:01:43 +01:00
3 changed files with 135 additions and 307 deletions

View File

@ -20,10 +20,10 @@
"from pathlib import Path\n", "from pathlib import Path\n",
"import sys\n", "import sys\n",
"import os\n", "import os\n",
"import tensorflow as tf\n",
"\n", "\n",
"base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n", "base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
"sys.path.append(base_dir)\n", "sys.path.append(base_dir)\n",
"print(base_dir)\n",
"\n", "\n",
"from sklearn.pipeline import Pipeline\n", "from sklearn.pipeline import Pipeline\n",
"from sklearn.svm import OneClassSVM\n", "from sklearn.svm import OneClassSVM\n",
@ -31,7 +31,7 @@
"import matplotlib.pyplot as plt\n", "import matplotlib.pyplot as plt\n",
"import tensorflow as tf\n", "import tensorflow as tf\n",
"import pickle\n", "import pickle\n",
"from tools import evaluation_tools, scaler\n", "from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler\n",
"from sklearn.metrics import (balanced_accuracy_score, accuracy_score, precision_score, \n", "from sklearn.metrics import (balanced_accuracy_score, accuracy_score, precision_score, \n",
" recall_score, f1_score, confusion_matrix, classification_report) " " recall_score, f1_score, confusion_matrix, classification_report) "
] ]
@ -44,6 +44,17 @@
"### Load data" "### Load data"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "30a4c885",
"metadata": {},
"outputs": [],
"source": [
"enconder_path = Path(\".keras\")\n",
"model_path = Path(\".pkl\")"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -51,7 +62,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"data_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/first_AU_dataset/output_windowed.parquet\")" "data_path = Path(r\".parquet\")"
] ]
}, },
{ {
@ -61,7 +72,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"df = pd.read_parquet(path=data_path)" "df = pd.read_parquet(path=data_path)\n",
"df = df.dropna()"
] ]
}, },
{ {
@ -150,20 +162,29 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"au_columns = [col for col in low_all.columns if col.startswith('AU')]\n", "au_columns = [col for col in low_all.columns if \"face\" in col.lower()] \n",
"\n",
"eye_columns = [ \n",
" 'Fix_count_short_66_150','Fix_count_medium_300_500','Fix_count_long_gt_1000', \n",
" 'Fix_count_100','Fix_mean_duration','Fix_median_duration', \n",
" 'Sac_count','Sac_mean_amp','Sac_mean_dur','Sac_median_dur', \n",
" 'Blink_count','Blink_mean_dur','Blink_median_dur', \n",
" 'Pupil_mean','Pupil_IPA' \n",
"] \n",
"cols = au_columns +eye_columns\n",
"\n", "\n",
"# Prepare training data (only normal/low data)\n", "# Prepare training data (only normal/low data)\n",
"train_data = low_all[low_all['subjectID'].isin(train_subjects)][['subjectID'] + au_columns].copy()\n", "train_data = low_all[low_all['subjectID'].isin(train_subjects)][['subjectID'] + cols].copy()\n",
"\n", "\n",
"# Prepare validation data (normal and anomaly) \n", "# Prepare validation data (normal and anomaly) \n",
"val_normal_data = low_all[low_all['subjectID'].isin(val_subjects)][['subjectID'] + au_columns].copy()\n", "val_normal_data = low_all[low_all['subjectID'].isin(val_subjects)][['subjectID'] + cols].copy()\n",
"val_high_data = high_all[high_all['subjectID'].isin(val_subjects)][['subjectID'] + au_columns].copy()\n", "val_high_data = high_all[high_all['subjectID'].isin(val_subjects)][['subjectID'] + cols].copy()\n",
"val_normal_data = val_normal_data.sample(n=1000, random_state=42)\n", "val_normal_data = val_normal_data.sample(n=500, random_state=42)\n",
"val_high_data = val_high_data.sample(n=1000, random_state=42)\n", "val_high_data = val_high_data.sample(n=500, random_state=42)\n",
"\n", "\n",
"# Prepare test data (normal and anomaly) - 1000 samples each\n", "# Prepare test data (normal and anomaly) - 1000 samples each\n",
"test_normal_data = low_all[low_all['subjectID'].isin(test_subjects)][['subjectID'] + au_columns].copy()\n", "test_normal_data = low_all[low_all['subjectID'].isin(test_subjects)][['subjectID'] + cols].copy()\n",
"test_high_data = high_all[high_all['subjectID'].isin(test_subjects)][['subjectID'] + au_columns].copy()\n", "test_high_data = high_all[high_all['subjectID'].isin(test_subjects)][['subjectID'] + cols].copy()\n",
"test_normal_data = test_normal_data.sample(n=500, random_state=42)\n", "test_normal_data = test_normal_data.sample(n=500, random_state=42)\n",
"test_high_data = test_high_data.sample(n=500, random_state=42)\n", "test_high_data = test_high_data.sample(n=500, random_state=42)\n",
"\n", "\n",
@ -186,7 +207,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Cell 3: Fit normalizer on training data\n", "# Cell 3: Fit normalizer on training data\n",
"normalizer = scaler.fit_normalizer(train_data, au_columns, method='minmax', scope='global')\n", "normalizer = scaler.fit_normalizer(train_data, cols, method='minmax', scope='global')\n",
"print(\"Normalizer fitted on training data\")" "print(\"Normalizer fitted on training data\")"
] ]
}, },
@ -198,11 +219,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Cell 4: Apply normalization to all datasets\n", "# Cell 4: Apply normalization to all datasets\n",
"train_normalized = scaler.apply_normalizer(train_data, au_columns, normalizer)\n", "train_normalized = scaler.apply_normalizer(train_data, cols, normalizer)\n",
"val_normal_normalized = scaler.apply_normalizer(val_normal_data, au_columns, normalizer)\n", "val_normal_normalized = scaler.apply_normalizer(val_normal_data, cols, normalizer)\n",
"val_high_normalized = scaler.apply_normalizer(val_high_data, au_columns, normalizer)\n", "val_high_normalized = scaler.apply_normalizer(val_high_data, cols, normalizer)\n",
"test_normal_normalized = scaler.apply_normalizer(test_normal_data, au_columns, normalizer)\n", "test_normal_normalized = scaler.apply_normalizer(test_normal_data, cols, normalizer)\n",
"test_high_normalized = scaler.apply_normalizer(test_high_data, au_columns, normalizer)\n", "test_high_normalized = scaler.apply_normalizer(test_high_data, cols, normalizer)\n",
"\n", "\n",
"print(\"Normalization applied to all datasets\")" "print(\"Normalization applied to all datasets\")"
] ]
@ -214,13 +235,11 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Cell 5: Extract AU columns and create labels for grid search\n", "X_train = train_normalized[cols].copy()\n",
"# Extract only AU columns (drop subjectID)\n", "X_val_normal = val_normal_normalized[cols].copy()\n",
"X_train = train_normalized[au_columns].copy()\n", "X_val_high = val_high_normalized[cols].copy()\n",
"X_val_normal = val_normal_normalized[au_columns].copy()\n", "X_test_high = test_high_normalized[cols].copy()\n",
"X_val_high = val_high_normalized[au_columns].copy()\n", "X_test_normal = test_normal_normalized[cols].copy()\n",
"X_test_high = test_high_normalized[au_columns].copy()\n",
"X_test_normal = test_normal_normalized[au_columns].copy()\n",
"\n", "\n",
"\n", "\n",
"# Create labels for grid search\n", "# Create labels for grid search\n",
@ -241,116 +260,12 @@
"X_train.shape" "X_train.shape"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "50fc80dc-fe16-4917-aad6-0dbaa1ce5ef9",
"metadata": {},
"outputs": [],
"source": [
"!pip install keras-tuner --quiet # nur einmal nötig\n",
"\n",
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from kerastuner import HyperModel\n",
"from kerastuner.tuners import RandomSearch\n",
"\n",
"# 1⃣ HyperModel definieren\n",
"class AutoencoderHyperModel(HyperModel):\n",
" def __init__(self, input_dim):\n",
" self.input_dim = input_dim\n",
"\n",
" def build(self, hp):\n",
" reg = hp.Float(\"l2_reg\", min_value=1e-5, max_value=0.01, sampling=\"log\")\n",
" lr = hp.Float(\"learning_rate\", 1e-4, 1e-2, sampling=\"log\")\n",
"\n",
" # Encoder\n",
" encoder = keras.Sequential([\n",
" keras.layers.Dense(\n",
" units=hp.Int(\"enc_units1\", min_value=10, max_value=self.input_dim, step=10),\n",
" activation=None,\n",
" kernel_regularizer=keras.regularizers.l2(reg)\n",
" ),\n",
" keras.layers.LeakyReLU(alpha=0.1),\n",
" keras.layers.Dense(\n",
" units=hp.Int(\"enc_units2\", min_value=5, max_value=20, step=1),\n",
" activation=tf.keras.layers.LeakyReLU(alpha=0.1),\n",
" kernel_regularizer=keras.regularizers.l2(reg)\n",
" ),\n",
" keras.layers.Dense(\n",
" units=2, # Bottleneck\n",
" activation='linear',\n",
" kernel_regularizer=keras.regularizers.l2(reg)\n",
" ),\n",
" ])\n",
"\n",
" # Decoder\n",
" decoder = keras.Sequential([\n",
" keras.layers.Dense(\n",
" units=hp.Int(\"dec_units1\", min_value=5, max_value=20, step=1),\n",
" activation=tf.keras.layers.LeakyReLU(alpha=0.1),\n",
" kernel_regularizer=keras.regularizers.l2(reg)\n",
" ),\n",
" keras.layers.Dense(\n",
" units=hp.Int(\"dec_units2\", min_value=10, max_value=self.input_dim, step=10),\n",
" activation=tf.keras.layers.LeakyReLU(alpha=0.1),\n",
" kernel_regularizer=keras.regularizers.l2(reg)\n",
" ),\n",
" keras.layers.Dense(\n",
" units=self.input_dim,\n",
" activation='linear',\n",
" kernel_regularizer=keras.regularizers.l2(reg)\n",
" ),\n",
" ])\n",
"\n",
" # Autoencoder\n",
" inputs = keras.Input(shape=(self.input_dim,))\n",
" encoded = encoder(inputs)\n",
" decoded = decoder(encoded)\n",
" autoencoder = keras.Model(inputs, decoded)\n",
"\n",
" autoencoder.compile(\n",
" optimizer=keras.optimizers.Adam(learning_rate=lr),\n",
" loss='mse'\n",
" )\n",
"\n",
" return autoencoder\n",
"\n",
"# 2⃣ RandomSearch-Tuner\n",
"hypermodel = AutoencoderHyperModel(input_dim=X_train.shape[1])\n",
"\n",
"tuner = RandomSearch(\n",
" hypermodel,\n",
" objective='val_loss',\n",
" max_trials=10, # Anzahl der getesteten Kombinationen\n",
" executions_per_trial=1, # Anzahl Trainings pro Kombination\n",
" directory='tuner_dir',\n",
" project_name='oc_ae'\n",
")\n",
"\n",
"# 3⃣ Hyperparameter-Tuning starten\n",
"tuner.search(\n",
" X_train, X_train,\n",
" epochs=100,\n",
" batch_size=64,\n",
" validation_data=(X_val_normal, X_val_normal),\n",
" verbose=0\n",
")\n",
"\n",
"# 4⃣ Beste Architektur holen\n",
"best_model = tuner.get_best_models(num_models=1)[0]\n",
"best_hyperparameters = tuner.get_best_hyperparameters(1)[0]\n",
"\n",
"print(\"Beste Hyperparameter:\", best_hyperparameters.values)\n"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "362c0a6f", "id": "362c0a6f",
"metadata": {}, "metadata": {},
"source": [ "source": [
"\n", "Build model"
"Beste Hyperparameter: {'l2_reg': 1.3757411430582133e-05, 'learning_rate': 0.007321002854350309, 'enc_units1': 20, 'enc_units2': 16, 'dec_units1': 14, 'dec_units2': 10}"
] ]
}, },
{ {
@ -360,26 +275,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# reg = 0.1\n",
"# encoder = tf.keras.Sequential(\n",
"# [\n",
"# tf.keras.layers.Dense(units=X_train.shape[1], activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
"# tf.keras.layers.Dense(units=10, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
"# tf.keras.layers.Dense(units=5, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
" \n",
"# ]\n",
"# )\n",
"\n",
"# decoder = tf.keras.Sequential(\n",
"# [\n",
"# tf.keras.layers.Dense(units=5,activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
"# tf.keras.layers.Dense(units=10, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg)),\n",
"# tf.keras.layers.Dense(units=X_train.shape[1], activation='linear', kernel_regularizer=tf.keras.regularizers.l2(reg))\n",
"# ]\n",
"# )\n",
"\n",
"\n",
"\n",
"reg = 1e-5\n", "reg = 1e-5\n",
"\n", "\n",
"# ENCODER\n", "# ENCODER\n",
@ -389,14 +284,15 @@
" activation=None,\n", " activation=None,\n",
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n", " kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
" ),\n", " ),\n",
" tf.keras.layers.LeakyReLU(alpha=0.1),\n", " tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
" \n", "\n",
" tf.keras.layers.Dense(\n", " tf.keras.layers.Dense(\n",
" units=12,\n", " units=12,\n",
" activation=tf.keras.layers.LeakyReLU(negative_slope=0.1),\n", " activation=None,\n",
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n", " kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
" ),\n", " ),\n",
" \n", " tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
"\n",
" tf.keras.layers.Dense(\n", " tf.keras.layers.Dense(\n",
" units=8,\n", " units=8,\n",
" activation='linear', # Bottleneck stays linear\n", " activation='linear', # Bottleneck stays linear\n",
@ -408,20 +304,24 @@
"decoder = tf.keras.Sequential([\n", "decoder = tf.keras.Sequential([\n",
" tf.keras.layers.Dense(\n", " tf.keras.layers.Dense(\n",
" units=8,\n", " units=8,\n",
" activation=tf.keras.layers.LeakyReLU(negative_slope=0.1),\n", " activation=None,\n",
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n", " kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
" ),\n", " ),\n",
" tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
"\n",
" tf.keras.layers.Dense(\n", " tf.keras.layers.Dense(\n",
" units=12,\n", " units=12,\n",
" activation=tf.keras.layers.LeakyReLU(negative_slope=0.1),\n", " activation=None,\n",
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n", " kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
" ),\n", " ),\n",
" tf.keras.layers.LeakyReLU(negative_slope=0.1),\n",
"\n",
" tf.keras.layers.Dense(\n", " tf.keras.layers.Dense(\n",
" units=X_train.shape[1],\n", " units=X_train.shape[1],\n",
" activation='linear',\n", " activation='linear',\n",
" kernel_regularizer=tf.keras.regularizers.l2(reg)\n", " kernel_regularizer=tf.keras.regularizers.l2(reg)\n",
" ),\n", " ),\n",
"])\n" "])"
] ]
}, },
{ {
@ -456,7 +356,7 @@
"source": [ "source": [
"history = autoencoder.fit(\n", "history = autoencoder.fit(\n",
" X_train, X_train, # Input and target are the same for autoencoder\n", " X_train, X_train, # Input and target are the same for autoencoder\n",
" epochs=200,\n", " epochs=50,\n",
" batch_size=64,\n", " batch_size=64,\n",
" validation_data=(X_val_normal, X_val_normal),\n", " validation_data=(X_val_normal, X_val_normal),\n",
" verbose=1\n", " verbose=1\n",
@ -470,8 +370,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"save_path = Path(\"/home/jovyan/data-paulusjafahrsimulator-gpu/saved_models/encoder_model_2_neurons_minmax.keras\")\n", "encoder.save(enconder_path)"
"encoder.save(save_path)"
] ]
}, },
{ {
@ -489,8 +388,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"load_path = Path(\"/home/jovyan/data-paulusjafahrsimulator-gpu/saved_models/encoder_model_2_neurons_minmax.keras\")\n", "encoder = tf.keras.models.load_model(enconder_path)"
"encoder = tf.keras.models.load_model(load_path)"
] ]
}, },
{ {
@ -541,25 +439,6 @@
"test_predictions.shape" "test_predictions.shape"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "759118d8-989d-489c-9d35-331454b4795e",
"metadata": {},
"outputs": [],
"source": [
"all_zero = {\n",
" \"X_train_encoded\": np.all(X_train_encoded == 0),\n",
" \"X_val_normal_encoded\": np.all(X_val_normal_encoded == 0),\n",
" \"X_val_high_encoded\": np.all(X_val_high_encoded == 0),\n",
" \"X_test_normal_encoded\": np.all(X_test_normal_encoded == 0),\n",
" \"X_test_high_encoded\": np.all(X_test_high_encoded == 0),\n",
"}\n",
"\n",
"print(all_zero)\n",
"print(X_train_encoded.shape)"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
@ -567,81 +446,49 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# fig, axes = plt.subplots(1, 3, figsize=(18, 5))\n", "fig, axes = plt.subplots(1, 3, figsize=(18, 5))\n",
"\n", "\n",
"# # Subplot A: Normal\n", "# Subplot A: Normal\n",
"# axes[0].scatter(\n", "axes[0].scatter(\n",
"# X_val_normal_encoded[:, 0],\n", " X_val_normal_encoded[:, 0],\n",
"# X_val_normal_encoded[:, 1],\n", " X_val_normal_encoded[:, 1],\n",
"# color=\"blue\",\n", " color=\"blue\",\n",
"# label=\"Normal\"\n", " label=\"Normal\"\n",
"# )\n", ")\n",
"# axes[0].set_title(\"Val Normal (encoded)\")\n", "axes[0].set_title(\"Val Normal (encoded)\")\n",
"# axes[0].set_xlabel(\"latent feature 1\")\n", "axes[0].set_xlabel(\"latent feature 1\")\n",
"# axes[0].set_ylabel(\"latent feature 2\")\n", "axes[0].set_ylabel(\"latent feature 2\")\n",
"# axes[0].legend()\n", "axes[0].legend()\n",
"\n",
"# # Subplot B: High\n",
"# axes[1].scatter(\n",
"# X_val_high_encoded[:, 0],\n",
"# X_val_high_encoded[:, 1],\n",
"# color=\"orange\",\n",
"# label=\"High\"\n",
"# )\n",
"# axes[1].set_title(\"ValHigh (encoded)\")\n",
"# axes[1].set_xlabel(\"latent feature 1\")\n",
"# axes[1].set_ylabel(\"latent feature 2\")\n",
"# axes[1].legend()\n",
"\n",
"# # Subplot C: Both\n",
"# axes[2].scatter(\n",
"# X_val_normal_encoded[:, 0],\n",
"# X_val_normal_encoded[:, 1],\n",
"# color=\"blue\",\n",
"# label=\"Normal\"\n",
"# )\n",
"# axes[2].scatter(\n",
"# X_val_high_encoded[:, 0],\n",
"# X_val_high_encoded[:, 1],\n",
"# color=\"orange\",\n",
"# label=\"High\"\n",
"# )\n",
"# axes[2].set_title(\"Normal vs High (encoded)\")\n",
"# axes[2].set_xlabel(\"latent feature 1\")\n",
"# axes[2].set_ylabel(\"latent feature 2\")\n",
"# axes[2].legend()\n",
"\n",
"\n",
"\n",
"latent_dim = 8\n",
"fig, axes = plt.subplots(2, 4, figsize=(20, 10))\n",
"axes = axes.flatten() # flatten to index easily\n",
"\n",
"for i in range(latent_dim):\n",
" axes[i].scatter(\n",
" X_val_normal_encoded[:, i],\n",
" [0]*X_val_normal_encoded.shape[0], # optional: place on a line for 1D visualization\n",
" color='blue',\n",
" label='Normal',\n",
" alpha=0.6\n",
" )\n",
" axes[i].scatter(\n",
" X_val_high_encoded[:, i],\n",
" [0]*X_val_high_encoded.shape[0], # same for High\n",
" color='orange',\n",
" label='High',\n",
" alpha=0.6\n",
" )\n",
" axes[i].set_title(f'Latent dim {i+1}')\n",
" axes[i].set_xlabel(f'Feature {i+1}')\n",
" axes[i].set_yticks([]) # hide y-axis as it's just a 1D comparison\n",
" axes[i].legend()\n",
" axes[i].grid(True)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n",
"\n", "\n",
"# Subplot B: High\n",
"axes[1].scatter(\n",
" X_val_high_encoded[:, 0],\n",
" X_val_high_encoded[:, 1],\n",
" color=\"orange\",\n",
" label=\"High\"\n",
")\n",
"axes[1].set_title(\"ValHigh (encoded)\")\n",
"axes[1].set_xlabel(\"latent feature 1\")\n",
"axes[1].set_ylabel(\"latent feature 2\")\n",
"axes[1].legend()\n",
"\n", "\n",
"# Subplot C: Both\n",
"axes[2].scatter(\n",
" X_val_normal_encoded[:, 0],\n",
" X_val_normal_encoded[:, 1],\n",
" color=\"blue\",\n",
" label=\"Normal\"\n",
")\n",
"axes[2].scatter(\n",
" X_val_high_encoded[:, 0],\n",
" X_val_high_encoded[:, 1],\n",
" color=\"orange\",\n",
" label=\"High\"\n",
")\n",
"axes[2].set_title(\"Normal vs High (encoded)\")\n",
"axes[2].set_xlabel(\"latent feature 1\")\n",
"axes[2].set_ylabel(\"latent feature 2\")\n",
"axes[2].legend()\n",
"\n", "\n",
"plt.tight_layout()\n", "plt.tight_layout()\n",
"plt.show()\n", "plt.show()\n",
@ -708,11 +555,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Save\n", "# Save\n",
"with open('ocsvm_model.pkl', 'wb') as f:\n", "with open(model_path, \"wb\") as f:\n",
" pickle.dump(ocsvm, f)\n", " pickle.dump(ocsvm, f)\n",
"\n", "\n",
"# Load later\n", "# Load later\n",
"with open('ocsvm_model.pkl', 'rb') as f:\n", "with open(model_path, \"rb\") as f:\n",
" ocsvm_loaded = pickle.load(f)" " ocsvm_loaded = pickle.load(f)"
] ]
}, },
@ -731,11 +578,6 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# X_combined = np.concatenate([X_train_encoded, X_val_normal_encoded, X_val_high_encoded], axis=0)\n",
"# y_combined = np.concatenate([\n",
"# np.ones(X_train_encoded.shape[0]+X_val_normal_encoded.shape[0]), # normal = 1\n",
"# -np.ones(X_val_high_encoded.shape[0]) # anomaly = -1\n",
"# ], axis=0)\n",
"X_combined = np.concatenate([X_train_encoded, X_val_high_encoded], axis=0)\n", "X_combined = np.concatenate([X_train_encoded, X_val_high_encoded], axis=0)\n",
"y_combined = np.concatenate([\n", "y_combined = np.concatenate([\n",
" np.ones(X_train_encoded.shape[0]), # normal = 1\n", " np.ones(X_train_encoded.shape[0]), # normal = 1\n",
@ -856,14 +698,6 @@
"source": [ "source": [
"f1_score(y_true=np.concatenate([y_test_normal, y_test_high]), y_pred=predictions)" "f1_score(y_true=np.concatenate([y_test_normal, y_test_high]), y_pred=predictions)"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9e412041-7534-40fe-8486-ee97349a6168",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {

View File

@ -1,20 +1,20 @@
database: database:
path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\predict_pipeline\\database.sqlite" path: "/home/edgekit/MSY_FS/databases/database.sqlite"
table: feature_table table: feature_table
key: _Id key: _Id
model: model:
path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\files_for_testing\\xgb_model_3_groupK.joblib" path: "/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/xgb_model_3_groupK.joblib"
scaler: scaler:
use_scaling: True use_scaling: true
path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\predict_pipeline\\normalizer_min_max_global.pkl" path: "/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/normalizer_min_max_global.pkl"
mqtt: mqtt:
enabled: true enabled: true
host: "141.75.215.233" host: "141.75.215.233"
port: 1883 port: 1883
topic: "PREDICTIONS" topic: "PREDICTION"
client_id: "jetson-board" client_id: "jetson-board"
qos: 0 qos: 0
retain: false retain: false

View File

@ -7,9 +7,10 @@ import sys
import yaml import yaml
import pickle import pickle
sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools') sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')
# sys.path.append(r"c:\\repo\\Fahrsimulator_MSY2526_AI\\tools")
import db_helpers import db_helpers
import joblib import joblib
import paho.mqtt.client as mqtt
def _load_serialized(path: Path): def _load_serialized(path: Path):
suffix = path.suffix.lower() suffix = path.suffix.lower()
@ -52,11 +53,11 @@ def callModel(sample, model_path):
suffix = model_path.suffix.lower() suffix = model_path.suffix.lower()
if suffix in {".pkl", ".joblib"}: if suffix in {".pkl", ".joblib"}:
model = _load_serialized(model_path) model = _load_serialized(model_path)
# elif suffix == ".keras": elif suffix == ".keras":
# import tensorflow as tf import tensorflow as tf
# model = tf.keras.models.load_model(model_path) model = tf.keras.models.load_model(model_path)
# else: else:
# raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.") raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.")
x = np.asarray(sample, dtype=np.float32) x = np.asarray(sample, dtype=np.float32)
if x.ndim == 1: if x.ndim == 1:
@ -125,33 +126,27 @@ def sendMessage(config_file_path, message):
# Serialize the message to JSON # Serialize the message to JSON
payload = json.dumps(message, ensure_ascii=False) payload = json.dumps(message, ensure_ascii=False)
print(payload) print(payload) # for debugging purposes
# Later: publish via MQTT using config parameters above.
# Example (kept commented intentionally): client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01"))
# import paho.mqtt.client as mqtt if "username" in mqtt_cfg and mqtt_cfg.get("username"):
# client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01")) client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password"))
# if "username" in mqtt_cfg and mqtt_cfg.get("username"): client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60)
# client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password")) client.publish(
# client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60) topic=topic,
# client.publish( payload=payload,
# topic=topic, qos=int(mqtt_cfg.get("qos", 1)),
# payload=payload, retain=bool(mqtt_cfg.get("retain", False)),
# qos=int(mqtt_cfg.get("qos", 1)), )
# retain=bool(mqtt_cfg.get("retain", False)), client.disconnect()
# )
# client.disconnect()
return return
def replace_nan(sample, config_file_path: Path): def replace_nan(sample, config_file_path: Path):
with config_file_path.open("r", encoding="utf-8") as f: with config_file_path.open("r", encoding="utf-8") as f:
cfg = yaml.safe_load(f) cfg = yaml.safe_load(f)
fallback_list = cfg.get("fallback", []) fallback_map = cfg.get("fallback", {})
fallback_map = {}
for item in fallback_list:
if isinstance(item, dict):
fallback_map.update(item)
if sample.empty: if sample.empty:
return False, sample return False, sample
@ -162,7 +157,6 @@ def replace_nan(sample, config_file_path: Path):
if valid and fallback_map: if valid and fallback_map:
sample = sample.fillna(value=fallback_map) sample = sample.fillna(value=fallback_map)
return valid, sample return valid, sample
def sample_to_numpy(sample, drop_cols=("_Id", "start_time")): def sample_to_numpy(sample, drop_cols=("_Id", "start_time")):
@ -213,7 +207,7 @@ def scale_sample(sample, use_scaling=False, scaler_path=None):
return df.iloc[0] if isinstance(sample, pd.Series) else df return df.iloc[0] if isinstance(sample, pd.Series) else df
def main(): def main():
pd.set_option('future.no_silent_downcasting', True) # kann ggf raus pd.set_option('future.no_silent_downcasting', True)
config_file_path = Path("/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/config.yaml") config_file_path = Path("/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/config.yaml")
with config_file_path.open("r", encoding="utf-8") as f: with config_file_path.open("r", encoding="utf-8") as f: