diff --git a/model_training/CNN/CNN_crossVal_EarlyFusion_Test_Eval.ipynb b/model_training/CNN/CNN_crossVal_EarlyFusion_Test_Eval.ipynb index 4ef2cb8..631fa5c 100644 --- a/model_training/CNN/CNN_crossVal_EarlyFusion_Test_Eval.ipynb +++ b/model_training/CNN/CNN_crossVal_EarlyFusion_Test_Eval.ipynb @@ -180,11 +180,16 @@ "gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)\n", "train_idx, test_idx = next(gss.split(X, y, groups))\n", "\n", - "feature_columns_train, feature_columns_test = X[train_idx], X[test_idx]\n", + "#feature_columns_train, feature_columns_test = X[train_idx], X[test_idx]\n", + "X_train, X_test = X[train_idx], X[test_idx]\n", "y_train, y_test = y[train_idx], y[test_idx]\n", "groups_train, groups_test = groups[train_idx], groups[test_idx]\n", "\n", - "print(\"Train:\", len(y_train), \" | Test:\", len(y_test))" + "print(\"Train:\", len(y_train), \" | Test:\", len(y_test))\n", + "print(\"Train:\", len(X_train), \" | Test:\", len(X_test))\n", + "print(train_idx)\n", + "print(test_idx)\n", + "print(np.intersect1d(train_idx,test_idx))" ] }, { @@ -247,9 +252,9 @@ "fold_subjects = []\n", "all_conf_matrices = []\n", "\n", - "for fold, (train_idx, val_idx) in enumerate(gkf.split(X, y, groups)):\n", - " train_subjects = np.unique(groups[train_idx]) \n", - " val_subjects = np.unique(groups[val_idx]) \n", + "for fold, (tr_idx, val_idx) in enumerate(gkf.split(X_train, y_train, groups_train)):\n", + " train_subjects = np.unique(groups_train[tr_idx]) \n", + " val_subjects = np.unique(groups_train[val_idx]) \n", " fold_subjects.append({\"Fold\": fold+1, \n", " \"Train_Subjects\": train_subjects, \n", " \"Val_Subjects\": val_subjects}) \n", @@ -259,23 +264,17 @@ " print(\"Val-Subjects:\", val_subjects) \n", "\n", " #Split\n", - " X_train, X_val = X[train_idx], X[val_idx] \n", - " y_train, y_val = y[train_idx], y[val_idx] # Normalisierung pro Fold \n", + " X_tr, X_val = X_train[tr_idx], X_train[val_idx] \n", + " y_tr, y_val = y_train[tr_idx], y_train[val_idx] # Normalisierung pro Fold \n", "\n", " #Normalisierung pro Fold\n", " scaler = StandardScaler() \n", - " X_train = scaler.fit_transform(X_train.reshape(len(X_train), -1)).reshape(X_train.shape) \n", + " X_tr = scaler.fit_transform(X_tr.reshape(len(X_tr), -1)).reshape(X_tr.shape) \n", " X_val = scaler.transform(X_val.reshape(len(X_val), -1)).reshape(X_val.shape) \n", "\n", - " # Plausibilitäts-Check \n", - " print(\"Train Mittelwerte (erste 5 Features):\", X_train.mean(axis=0)[:5]) \n", - " print(\"Train Std (erste 5 Features):\", X_train.std(axis=0)[:5]) \n", - " print(\"Val Mittelwerte (erste 5 Features):\", X_val.mean(axis=0)[:5]) \n", - " print(\"Val Std (erste 5 Features):\", X_val.std(axis=0)[:5]) \n", - "\n", " # Modell \n", - " model = build_model(input_shape=(len(feature_columns_train),1), lr=1e-4) \n", - " model.summary() \n", + " model = build_model(input_shape=(len(feature_columns),1), lr=1e-4) \n", + " #model.summary() \n", "\n", " callbacks = [ \n", " tf.keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=10, restore_best_weights=True), \n", @@ -283,9 +282,9 @@ " ] \n", "\n", " history = model.fit( \n", - " X_train, y_train, \n", + " X_tr, y_tr, \n", " validation_data=(X_val, y_val), \n", - " epochs=100, \n", + " epochs=50,\n", " batch_size=16, \n", " callbacks=callbacks, \n", " verbose=0 \n", @@ -373,14 +372,14 @@ "outputs": [], "source": [ "scaler_final = StandardScaler() \n", - "X_scaled = scaler_final.fit_transform(feature_columns_train.reshape(len(feature_columns_train), -1)).reshape(feature_columns_train.shape) \n", + "X_train_scaled = scaler_final.fit_transform( X_train.reshape(len(X_train), -1) ).reshape(X_train.shape)\n", "\n", - "final_model = build_model(input_shape=(len(feature_columns_train),1), lr=1e-4) \n", - "final_model.summary() \n", + "final_model = build_model(input_shape=(len(feature_columns),1), lr=1e-4) \n", + "#final_model.summary() \n", "\n", "final_model.fit( \n", - " X_scaled, y_train, \n", - " epochs=150, \n", + " X_train_scaled, y_train,\n", + " epochs=50, \n", " batch_size=16, \n", " verbose=1 \n", ")" @@ -401,8 +400,8 @@ "metadata": {}, "outputs": [], "source": [ - "# final_model.save(\"cnn_crossVal_EarlyFusion_V2.keras\") \n", - "# joblib.dump(scaler_final, \"scaler_crossVal_EarlyFusion_V2.joblib\") \n", + "final_model.save(\"cnn_crossVal_EarlyFusion_V2_0103.keras\") \n", + "joblib.dump(scaler_final, \"scaler_crossVal_EarlyFusion_V2_0103.joblib\") \n", "\n", "# print(\"Finales Modell und Scaler gespeichert als 'cnn_crossVal_EarlyFusion_V2.keras' und 'scaler_crossVal_EarlyFusion_V2.joblib'\")" ] @@ -458,8 +457,8 @@ "source": [ "# Preprocessing Testdaten \n", "X_test_scaled = scaler.transform( \n", - " feature_columns_test.reshape(len(feature_columns_test), -1) \n", - ").reshape(feature_columns_test.shape) \n", + " X_test.reshape(len(X_test), -1) \n", + ").reshape(X_test.shape) \n", "\n", "# Vorhersagen \n", "y_prob_test = model.predict(X_test_scaled).flatten() \n", diff --git a/model_training/CNN/CNN_crossVal_HybridFusion_Test_Eval.ipynb b/model_training/CNN/CNN_crossVal_HybridFusion_Test_Eval.ipynb index 07a7964..dacf1db 100644 --- a/model_training/CNN/CNN_crossVal_HybridFusion_Test_Eval.ipynb +++ b/model_training/CNN/CNN_crossVal_HybridFusion_Test_Eval.ipynb @@ -140,8 +140,10 @@ "data = data.dropna(subset=au_columns + eye_columns + [\"label\"]) \n", "\n", "# Arrays \n", + "print(data[au_columns].shape)\n", "X_au = data[au_columns].values[..., np.newaxis] \n", - "X_eye = data[eye_columns].values \n", + "X_eye = data[eye_columns].values\n", + "print(X_au.shape)\n", "y = data[\"label\"].values \n", "groups = data[\"subjectID\"].values" ] @@ -169,7 +171,9 @@ "y_train, y_test = y[train_idx], y[test_idx]\n", "groups_train, groups_test = groups[train_idx], groups[test_idx]\n", "\n", - "print(\"Train:\", len(y_train), \" | Test:\", len(y_test))" + "print(\"Train:\", len(y_train), \" | Test:\", len(y_test))\n", + "print(np.unique(groups_test))\n", + "print(np.unique(groups_train))" ] }, { @@ -235,9 +239,19 @@ "cv_histories = [] \n", "cv_results = [] \n", "all_conf_matrices = [] \n", + "fold_subjects = []\n", "\n", "for fold, (tr_idx, va_idx) in enumerate(gkf.split(X_au_train, y_train, groups_train)): \n", - " print(f\"\\n===== FOLD {fold+1} =====\") \n", + " \n", + " train_subjects = np.unique(groups_train[tr_idx]) \n", + " val_subjects = np.unique(groups_train[va_idx]) \n", + " fold_subjects.append({\"Fold\": fold+1, \n", + " \"Train_Subjects\": train_subjects, \n", + " \"Val_Subjects\": val_subjects}) \n", + " \n", + " print(f\"\\n--- Fold {fold+1} ---\") \n", + " print(\"Train-Subjects:\", train_subjects) \n", + " print(\"Val-Subjects:\", val_subjects) \n", " \n", " X_tr_au, X_va_au = X_au_train[tr_idx], X_au_train[va_idx] \n", " X_tr_eye, X_va_eye = X_eye_train[tr_idx], X_eye_train[va_idx] \n", @@ -361,9 +375,9 @@ "metadata": {}, "outputs": [], "source": [ - "model_cv.save(\"hybrid_fusion_model_Test_group_split.keras\") \n", - "joblib.dump(scaler_au, \"scaler_au_Test_group_split.joblib\") \n", - "joblib.dump(scaler_eye, \"scaler_eye_Test_group_split.joblib\") \n", + "model_cv.save(\"hybrid_fusion_model_Test_group_split_0103.keras\") \n", + "joblib.dump(scaler_au, \"scaler_au_Test_group_split_0103.joblib\") \n", + "joblib.dump(scaler_eye, \"scaler_eye_Test_group_split_0103.joblib\") \n", "\n", "print(\"Finales Modell gespeichert.\")" ]