Compare commits
3 Commits
de0084dc09
...
13bd76631f
| Author | SHA1 | Date | |
|---|---|---|---|
| 13bd76631f | |||
| 6cc38291df | |||
| 8b6c547387 |
@ -41,7 +41,6 @@
|
|||||||
"import time\n",
|
"import time\n",
|
||||||
"base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
|
"base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
|
||||||
"sys.path.append(base_dir)\n",
|
"sys.path.append(base_dir)\n",
|
||||||
"print(base_dir)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal, performance_split\n",
|
"from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal, performance_split\n",
|
||||||
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
|
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
|
||||||
@ -51,7 +50,7 @@
|
|||||||
"import tensorflow as tf\n",
|
"import tensorflow as tf\n",
|
||||||
"from tensorflow.keras import layers, models, regularizers\n",
|
"from tensorflow.keras import layers, models, regularizers\n",
|
||||||
"import pickle\n",
|
"import pickle\n",
|
||||||
"from sklearn.metrics import (roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, balanced_accuracy_score, ConfusionMatrixDisplay, auc, roc_curve) "
|
"from sklearn.metrics import (accuracy_score, auc, roc_curve, f1_score) "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -89,15 +88,40 @@
|
|||||||
"id": "f00a477c",
|
"id": "f00a477c",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### Data Preprocessing"
|
"### Configuration of paths and data preprocessing"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "code",
|
||||||
"id": "504c1df7",
|
"execution_count": null,
|
||||||
|
"id": "5136fcec",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"Laden der Daten"
|
"# TODO: set path where to save normalizer\n",
|
||||||
|
"normalizer_path=Path('.pkl') # TODO: set manually"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "c2115f65",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"performance_path = Path(r\".csv\") # TODO: set manually\n",
|
||||||
|
"performance_df = pd.read_csv(performance_path)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "559eb8d2",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"encoder_save_path = Path('.keras') # TODO: set manually\n",
|
||||||
|
"deep_svdd_save_path = Path('.keras') # TODO: set manually"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -107,8 +131,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"dataset_path = Path(r\"data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")\n",
|
"dataset_path = Path(r\".parquet\") # TODO: set manually"
|
||||||
"# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/120s_combined_dataset_25hz.parquet\")"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -121,17 +144,6 @@
|
|||||||
"df = pd.read_parquet(path=dataset_path)"
|
"df = pd.read_parquet(path=dataset_path)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "c2115f65",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"performance_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/subject_performance/3new_au_performance.csv\")\n",
|
|
||||||
"performance_df = pd.read_csv(performance_path)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "c045c46d",
|
"id": "c045c46d",
|
||||||
@ -215,7 +227,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"data = pd.concat([low, high], ignore_index=True)\n",
|
"data = pd.concat([low, high], ignore_index=True)\n",
|
||||||
"df = data.drop_duplicates()\n",
|
"df = data.drop_duplicates()\n",
|
||||||
"\n",
|
"df = df.dropna()\n",
|
||||||
"print(\"Label distribution:\")\n",
|
"print(\"Label distribution:\")\n",
|
||||||
"print(df[\"label\"].value_counts())"
|
"print(df[\"label\"].value_counts())"
|
||||||
]
|
]
|
||||||
@ -275,210 +287,6 @@
|
|||||||
"Normalization"
|
"Normalization"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "acec4a03",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def fit_normalizer(train_data, au_columns, method='standard', scope='global'):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Fit normalization scalers on training data.\n",
|
|
||||||
" \n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" -----------\n",
|
|
||||||
" train_data : pd.DataFrame\n",
|
|
||||||
" Training dataframe with AU columns and subjectID\n",
|
|
||||||
" au_columns : list\n",
|
|
||||||
" List of AU column names to normalize\n",
|
|
||||||
" method : str, default='standard'\n",
|
|
||||||
" Normalization method: 'standard' for StandardScaler or 'minmax' for MinMaxScaler\n",
|
|
||||||
" scope : str, default='global'\n",
|
|
||||||
" Normalization scope: 'subject' for per-subject or 'global' for across all subjects\n",
|
|
||||||
" \n",
|
|
||||||
" Returns:\n",
|
|
||||||
" --------\n",
|
|
||||||
" dict\n",
|
|
||||||
" Dictionary containing fitted scalers and statistics for new subjects\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" if method == 'standard':\n",
|
|
||||||
" Scaler = StandardScaler\n",
|
|
||||||
" elif method == 'minmax':\n",
|
|
||||||
" Scaler = MinMaxScaler\n",
|
|
||||||
" else:\n",
|
|
||||||
" raise ValueError(\"method must be 'standard' or 'minmax'\")\n",
|
|
||||||
" \n",
|
|
||||||
" scalers = {}\n",
|
|
||||||
" if scope == 'subject':\n",
|
|
||||||
" # Fit one scaler per subject\n",
|
|
||||||
" subject_stats = []\n",
|
|
||||||
" \n",
|
|
||||||
" for subject in train_data['subjectID'].unique():\n",
|
|
||||||
" subject_mask = train_data['subjectID'] == subject\n",
|
|
||||||
" scaler = Scaler()\n",
|
|
||||||
" scaler.fit(train_data.loc[subject_mask, au_columns].values)\n",
|
|
||||||
" scalers[subject] = scaler\n",
|
|
||||||
" \n",
|
|
||||||
" # Store statistics for averaging\n",
|
|
||||||
" if method == 'standard':\n",
|
|
||||||
" subject_stats.append({\n",
|
|
||||||
" 'mean': scaler.mean_,\n",
|
|
||||||
" 'std': scaler.scale_\n",
|
|
||||||
" })\n",
|
|
||||||
" elif method == 'minmax':\n",
|
|
||||||
" subject_stats.append({\n",
|
|
||||||
" 'min': scaler.data_min_,\n",
|
|
||||||
" 'max': scaler.data_max_\n",
|
|
||||||
" })\n",
|
|
||||||
" \n",
|
|
||||||
" # Calculate average statistics for new subjects\n",
|
|
||||||
" if method == 'standard':\n",
|
|
||||||
" avg_mean = np.mean([s['mean'] for s in subject_stats], axis=0)\n",
|
|
||||||
" avg_std = np.mean([s['std'] for s in subject_stats], axis=0)\n",
|
|
||||||
" fallback_scaler = StandardScaler()\n",
|
|
||||||
" fallback_scaler.mean_ = avg_mean\n",
|
|
||||||
" fallback_scaler.scale_ = avg_std\n",
|
|
||||||
" fallback_scaler.var_ = avg_std ** 2\n",
|
|
||||||
" fallback_scaler.n_features_in_ = len(au_columns)\n",
|
|
||||||
" elif method == 'minmax':\n",
|
|
||||||
" avg_min = np.mean([s['min'] for s in subject_stats], axis=0)\n",
|
|
||||||
" avg_max = np.mean([s['max'] for s in subject_stats], axis=0)\n",
|
|
||||||
" fallback_scaler = MinMaxScaler()\n",
|
|
||||||
" fallback_scaler.data_min_ = avg_min\n",
|
|
||||||
" fallback_scaler.data_max_ = avg_max\n",
|
|
||||||
" fallback_scaler.data_range_ = avg_max - avg_min\n",
|
|
||||||
" fallback_scaler.scale_ = 1.0 / fallback_scaler.data_range_\n",
|
|
||||||
" fallback_scaler.min_ = -avg_min * fallback_scaler.scale_\n",
|
|
||||||
" fallback_scaler.n_features_in_ = len(au_columns)\n",
|
|
||||||
" \n",
|
|
||||||
" scalers['_fallback'] = fallback_scaler\n",
|
|
||||||
" \n",
|
|
||||||
" elif scope == 'global':\n",
|
|
||||||
" # Fit one scaler for all subjects\n",
|
|
||||||
" scaler = Scaler()\n",
|
|
||||||
" scaler.fit(train_data[au_columns].values)\n",
|
|
||||||
" scalers['global'] = scaler\n",
|
|
||||||
" \n",
|
|
||||||
" else:\n",
|
|
||||||
" raise ValueError(\"scope must be 'subject' or 'global'\")\n",
|
|
||||||
" \n",
|
|
||||||
" return {'scalers': scalers, 'method': method, 'scope': scope}\n",
|
|
||||||
"\n",
|
|
||||||
"def apply_normalizer(data, columns, normalizer_dict):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Apply fitted normalization scalers to data.\n",
|
|
||||||
" \n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" -----------\n",
|
|
||||||
" data : pd.DataFrame\n",
|
|
||||||
" Dataframe with AU columns and subjectID\n",
|
|
||||||
" au_columns : list\n",
|
|
||||||
" List of AU column names to normalize\n",
|
|
||||||
" normalizer_dict : dict\n",
|
|
||||||
" Dictionary containing fitted scalers from fit_normalizer()\n",
|
|
||||||
" \n",
|
|
||||||
" Returns:\n",
|
|
||||||
" --------\n",
|
|
||||||
" pd.DataFrame\n",
|
|
||||||
" DataFrame with normalized AU columns\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" normalized_data = data.copy()\n",
|
|
||||||
" scalers = normalizer_dict['scalers']\n",
|
|
||||||
" scope = normalizer_dict['scope']\n",
|
|
||||||
" normalized_data[columns] = normalized_data[columns].astype(np.float64)\n",
|
|
||||||
"\n",
|
|
||||||
" if scope == 'subject':\n",
|
|
||||||
" # Apply per-subject normalization\n",
|
|
||||||
" for subject in data['subjectID'].unique():\n",
|
|
||||||
" subject_mask = data['subjectID'] == subject\n",
|
|
||||||
" \n",
|
|
||||||
" # Use the subject's scaler if available, otherwise use fallback\n",
|
|
||||||
" if subject in scalers:\n",
|
|
||||||
" scaler = scalers[subject]\n",
|
|
||||||
" else:\n",
|
|
||||||
" # Use averaged scaler for new subjects\n",
|
|
||||||
" scaler = scalers['_fallback']\n",
|
|
||||||
" print(f\"Info: Subject {subject} not in training data. Using averaged scaler from training subjects.\")\n",
|
|
||||||
" \n",
|
|
||||||
" normalized_data.loc[subject_mask, columns] = scaler.transform(\n",
|
|
||||||
" data.loc[subject_mask, columns].values\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" elif scope == 'global':\n",
|
|
||||||
" # Apply global normalization\n",
|
|
||||||
" scaler = scalers['global']\n",
|
|
||||||
" normalized_data[columns] = scaler.transform(data[columns].values)\n",
|
|
||||||
" \n",
|
|
||||||
" return normalized_data\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "53c6ee6f",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def save_normalizer(normalizer_dict, filepath):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Save fitted normalizer to disk.\n",
|
|
||||||
"\n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" -----------\n",
|
|
||||||
" normalizer_dict : dict\n",
|
|
||||||
" Dictionary containing fitted scalers from fit_normalizer()\n",
|
|
||||||
" filepath : str\n",
|
|
||||||
" Path to save the normalizer (e.g., 'normalizer.pkl')\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" # Create directory if it does not exist\n",
|
|
||||||
" dirpath = os.path.dirname(filepath)\n",
|
|
||||||
" if dirpath:\n",
|
|
||||||
" os.makedirs(dirpath, exist_ok=True)\n",
|
|
||||||
"\n",
|
|
||||||
" with open(filepath, 'wb') as f:\n",
|
|
||||||
" pickle.dump(normalizer_dict, f)\n",
|
|
||||||
"\n",
|
|
||||||
" print(f\"Normalizer saved to {filepath}\")\n",
|
|
||||||
"\n",
|
|
||||||
"def load_normalizer(filepath):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Load fitted normalizer from disk.\n",
|
|
||||||
" \n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" -----------\n",
|
|
||||||
" filepath : str\n",
|
|
||||||
" Path to the saved normalizer file\n",
|
|
||||||
" \n",
|
|
||||||
" Returns:\n",
|
|
||||||
" --------\n",
|
|
||||||
" dict\n",
|
|
||||||
" Dictionary containing fitted scalers\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" with open(filepath, 'rb') as f:\n",
|
|
||||||
" normalizer_dict = pickle.load(f)\n",
|
|
||||||
" print(f\"Normalizer loaded from {filepath}\")\n",
|
|
||||||
" return normalizer_dict"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "7280f64f",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"save Normalizer"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "8420afc2",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"normalizer_path=Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/normalizer_min_max_global.pkl')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -495,8 +303,10 @@
|
|||||||
"print(len(eye_cols))\n",
|
"print(len(eye_cols))\n",
|
||||||
"all_signal_columns = face_au_cols+eye_cols\n",
|
"all_signal_columns = face_au_cols+eye_cols\n",
|
||||||
"print(len(all_signal_columns))\n",
|
"print(len(all_signal_columns))\n",
|
||||||
"normalizer = fit_normalizer(train_df, all_signal_columns, method='minmax', scope='global')\n",
|
"\n",
|
||||||
"save_normalizer(normalizer, normalizer_path )"
|
"# fit and save normalizer\n",
|
||||||
|
"normalizer = scaler.fit_normalizer(train_df, all_signal_columns, method='minmax', scope='global')\n",
|
||||||
|
"scaler.save_normalizer(normalizer, normalizer_path )"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -506,11 +316,11 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"normalizer = load_normalizer(normalizer_path)\n",
|
"normalizer = scaler.load_normalizer(normalizer_path)\n",
|
||||||
"# 3. Apply normalization to all sets\n",
|
"# Apply normalization to all sets\n",
|
||||||
"train_df_norm = apply_normalizer(train_df, all_signal_columns, normalizer)\n",
|
"train_df_norm = scaler.apply_normalizer(train_df, all_signal_columns, normalizer)\n",
|
||||||
"val_df_norm = apply_normalizer(val_df, all_signal_columns, normalizer)\n",
|
"val_df_norm = scaler.apply_normalizer(val_df, all_signal_columns, normalizer)\n",
|
||||||
"test_df_norm = apply_normalizer(test_df, all_signal_columns, normalizer)"
|
"test_df_norm = scaler.apply_normalizer(test_df, all_signal_columns, normalizer)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -566,13 +376,13 @@
|
|||||||
"def build_intermediate_fusion_autoencoder(\n",
|
"def build_intermediate_fusion_autoencoder(\n",
|
||||||
" input_dim_mod1=15,\n",
|
" input_dim_mod1=15,\n",
|
||||||
" input_dim_mod2=20,\n",
|
" input_dim_mod2=20,\n",
|
||||||
" encoder_hidden_dim_mod1=12, # individuell\n",
|
" encoder_hidden_dim_mod1=12, # TODO: set manually\n",
|
||||||
" encoder_hidden_dim_mod2=20, # individuell\n",
|
" encoder_hidden_dim_mod2=20, # TODO: set manually\n",
|
||||||
" latent_dim=6, # Änderung: Bottleneck vergrößert für stabilere Repräsentation\n",
|
" latent_dim=6, # TODO: set manually\n",
|
||||||
" dropout_rate=0.4, # Dropout in Hidden Layers\n",
|
" dropout_rate=0.4, # TODO: set manually\n",
|
||||||
" neg_slope=0.1,\n",
|
" neg_slope=0.1, # TODO: set manually\n",
|
||||||
" weight_decay=1e-4,\n",
|
" weight_decay=1e-4, # TODO: set manually\n",
|
||||||
" decoder_hidden_dims=[16, 32] # Änderung: Decoder größer für bessere Rekonstruktion\n",
|
" decoder_hidden_dims=[16, 32] # TODO: set manually\n",
|
||||||
"):\n",
|
"):\n",
|
||||||
" \"\"\"\n",
|
" \"\"\"\n",
|
||||||
" Verbesserter Intermediate-Fusion Autoencoder für Deep SVDD.\n",
|
" Verbesserter Intermediate-Fusion Autoencoder für Deep SVDD.\n",
|
||||||
@ -597,10 +407,10 @@
|
|||||||
" kernel_regularizer=l2\n",
|
" kernel_regularizer=l2\n",
|
||||||
" )(x1_in)\n",
|
" )(x1_in)\n",
|
||||||
" e1 = act(e1)\n",
|
" e1 = act(e1)\n",
|
||||||
" e1 = layers.Dropout(dropout_rate)(e1) # Dropout nur hier\n",
|
" e1 = layers.Dropout(dropout_rate)(e1) \n",
|
||||||
"\n",
|
"\n",
|
||||||
" e1 = layers.Dense(\n",
|
" e1 = layers.Dense(\n",
|
||||||
" 16, # Änderung: Hidden Layer größer für stabilere Fusion\n",
|
" 16, \n",
|
||||||
" use_bias=False,\n",
|
" use_bias=False,\n",
|
||||||
" kernel_regularizer=l2\n",
|
" kernel_regularizer=l2\n",
|
||||||
" )(e1)\n",
|
" )(e1)\n",
|
||||||
@ -613,20 +423,20 @@
|
|||||||
" kernel_regularizer=l2\n",
|
" kernel_regularizer=l2\n",
|
||||||
" )(x2_in)\n",
|
" )(x2_in)\n",
|
||||||
" e2 = act(e2)\n",
|
" e2 = act(e2)\n",
|
||||||
" e2 = layers.Dropout(dropout_rate)(e2) # Dropout nur hier\n",
|
" e2 = layers.Dropout(dropout_rate)(e2) \n",
|
||||||
"\n",
|
"\n",
|
||||||
" e2 = layers.Dense(\n",
|
" e2 = layers.Dense(\n",
|
||||||
" 16, # Änderung: Hidden Layer größer\n",
|
" 16, \n",
|
||||||
" use_bias=False,\n",
|
" use_bias=False,\n",
|
||||||
" kernel_regularizer=l2\n",
|
" kernel_regularizer=l2\n",
|
||||||
" )(e2)\n",
|
" )(e2)\n",
|
||||||
" e2 = act(e2)\n",
|
" e2 = act(e2)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # -------- Intermediate Fusion --------\n",
|
" # -------- Intermediate Fusion --------\n",
|
||||||
" fused = layers.Concatenate(name=\"fusion\")([e1, e2]) # 16+16=32 Dimensionen\n",
|
" fused = layers.Concatenate(name=\"fusion\")([e1, e2]) # 16+16=32 dimensions\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # -------- Joint Encoder / Bottleneck --------\n",
|
" # -------- Joint Encoder / Bottleneck --------\n",
|
||||||
" # sinnvoll kleiner als Fusion\n",
|
"\n",
|
||||||
" h = layers.Dense(\n",
|
" h = layers.Dense(\n",
|
||||||
" latent_dim,\n",
|
" latent_dim,\n",
|
||||||
" use_bias=False,\n",
|
" use_bias=False,\n",
|
||||||
@ -637,16 +447,16 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" z = layers.Dense(\n",
|
" z = layers.Dense(\n",
|
||||||
" latent_dim,\n",
|
" latent_dim,\n",
|
||||||
" activation=None, # linear, für Deep SVDD\n",
|
" activation=None, # linear for Deep SVDD\n",
|
||||||
" use_bias=False,\n",
|
" use_bias=False,\n",
|
||||||
" kernel_regularizer=l2,\n",
|
" kernel_regularizer=l2,\n",
|
||||||
" name=\"latent\"\n",
|
" name=\"latent\"\n",
|
||||||
" )(h)\n",
|
" )(h)\n",
|
||||||
" # Dropout entfernt direkt vor Bottleneck\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # -------- Decoder --------\n",
|
" # -------- Decoder --------\n",
|
||||||
" d = layers.Dense(\n",
|
" d = layers.Dense(\n",
|
||||||
" decoder_hidden_dims[0], # größerer Decoder\n",
|
" decoder_hidden_dims[0], \n",
|
||||||
" use_bias=False,\n",
|
" use_bias=False,\n",
|
||||||
" kernel_regularizer=l2\n",
|
" kernel_regularizer=l2\n",
|
||||||
" )(z)\n",
|
" )(z)\n",
|
||||||
@ -692,10 +502,10 @@
|
|||||||
"model = build_intermediate_fusion_autoencoder(\n",
|
"model = build_intermediate_fusion_autoencoder(\n",
|
||||||
" input_dim_mod1=len(face_au_cols),\n",
|
" input_dim_mod1=len(face_au_cols),\n",
|
||||||
" input_dim_mod2=len(eye_cols),\n",
|
" input_dim_mod2=len(eye_cols),\n",
|
||||||
" encoder_hidden_dim_mod1=12, # individuell\n",
|
" encoder_hidden_dim_mod1=12, # TODO: set manually\n",
|
||||||
" encoder_hidden_dim_mod2=8, # individuell\n",
|
" encoder_hidden_dim_mod2=8, # TODO: set manually\n",
|
||||||
" latent_dim=4,\n",
|
" latent_dim=4,\n",
|
||||||
" dropout_rate=0.7, # einstellbar\n",
|
" dropout_rate=0.7, # TODO: set manually\n",
|
||||||
" neg_slope=0.1,\n",
|
" neg_slope=0.1,\n",
|
||||||
" weight_decay=1e-3\n",
|
" weight_decay=1e-3\n",
|
||||||
")\n",
|
")\n",
|
||||||
@ -780,7 +590,6 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"encoder_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/encoder_8_deep.keras')\n",
|
|
||||||
"encoder.save(encoder_save_path)"
|
"encoder.save(encoder_save_path)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -876,22 +685,6 @@
|
|||||||
"center = get_center(deep_svdd_net, [X_face, X_eye])"
|
"center = get_center(deep_svdd_net, [X_face, X_eye])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "da140072",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# def get_radius(nu, dataset):\n",
|
|
||||||
"# x_face, x_eye = dataset # <-- zwingend entpacken\n",
|
|
||||||
"\n",
|
|
||||||
"# dataset_tuple=[x_face, x_eye]\n",
|
|
||||||
"\n",
|
|
||||||
"# dists = dist_per_sample(deep_svdd_net.predict(dataset_tuple), center)\n",
|
|
||||||
"# return np.quantile(np.sqrt(dists), 1-nu).astype(np.float32)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -944,10 +737,9 @@
|
|||||||
" return get_radius_from_arrays(nu, X_face, X_eye)\n",
|
" return get_radius_from_arrays(nu, X_face, X_eye)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"nu = 0.25\n",
|
"nu = 0.05 # Set nu respectively\n",
|
||||||
"\n",
|
"\n",
|
||||||
"train_dataset = tf.data.Dataset.from_tensor_slices((X_face, X_eye)).shuffle(64).batch(64)\n",
|
"train_dataset = tf.data.Dataset.from_tensor_slices((X_face, X_eye)).shuffle(64).batch(64)\n",
|
||||||
"# train_dataset = tf.data.Dataset.from_tensor_slices((X_face, X_eye))\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"optimizer = tf.keras.optimizers.Adam(1e-3)\n",
|
"optimizer = tf.keras.optimizers.Adam(1e-3)\n",
|
||||||
"train(train_dataset, epochs=150, nu=nu)\n",
|
"train(train_dataset, epochs=150, nu=nu)\n",
|
||||||
@ -1019,7 +811,6 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"deep_svdd_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/deep_svdd_06.keras')\n",
|
|
||||||
"deep_svdd_net.save(deep_svdd_save_path)"
|
"deep_svdd_net.save(deep_svdd_save_path)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@ -28,7 +28,7 @@
|
|||||||
"sys.path.append(base_dir)\n",
|
"sys.path.append(base_dir)\n",
|
||||||
"print(base_dir)\n",
|
"print(base_dir)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools\n",
|
"from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler\n",
|
||||||
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
|
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
|
||||||
"from sklearn.ensemble import IsolationForest\n",
|
"from sklearn.ensemble import IsolationForest\n",
|
||||||
"from sklearn.model_selection import GridSearchCV, KFold\n",
|
"from sklearn.model_selection import GridSearchCV, KFold\n",
|
||||||
@ -52,7 +52,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"data_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/50s_25Hz_dataset.parquet\")"
|
"data_path = Path(r\".parquet\") # TODO: set manually"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -115,118 +115,6 @@
|
|||||||
"print(f\"high all: {high_all.shape}\")"
|
"print(f\"high all: {high_all.shape}\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "47a0f44d",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def fit_normalizer(train_data, au_columns, method='standard', scope='global'):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Fit normalization scalers on training data.\n",
|
|
||||||
" \n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" -----------\n",
|
|
||||||
" train_data : pd.DataFrame\n",
|
|
||||||
" Training dataframe with AU columns and subjectID\n",
|
|
||||||
" au_columns : list\n",
|
|
||||||
" List of AU column names to normalize\n",
|
|
||||||
" method : str, default='standard'\n",
|
|
||||||
" Normalization method: 'standard' for StandardScaler or 'minmax' for MinMaxScaler\n",
|
|
||||||
" scope : str, default='global'\n",
|
|
||||||
" Normalization scope: 'subject' for per-subject or 'global' for across all subjects\n",
|
|
||||||
" \n",
|
|
||||||
" Returns:\n",
|
|
||||||
" --------\n",
|
|
||||||
" dict\n",
|
|
||||||
" Dictionary containing fitted scalers\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" # Select scaler based on method\n",
|
|
||||||
" if method == 'standard':\n",
|
|
||||||
" Scaler = StandardScaler\n",
|
|
||||||
" elif method == 'minmax':\n",
|
|
||||||
" Scaler = MinMaxScaler\n",
|
|
||||||
" else:\n",
|
|
||||||
" raise ValueError(\"method must be 'standard' or 'minmax'\")\n",
|
|
||||||
" \n",
|
|
||||||
" scalers = {}\n",
|
|
||||||
" \n",
|
|
||||||
" if scope == 'subject':\n",
|
|
||||||
" # Fit one scaler per subject\n",
|
|
||||||
" for subject in train_data['subjectID'].unique():\n",
|
|
||||||
" subject_mask = train_data['subjectID'] == subject\n",
|
|
||||||
" scaler = Scaler()\n",
|
|
||||||
" scaler.fit(train_data.loc[subject_mask, au_columns])\n",
|
|
||||||
" scalers[subject] = scaler\n",
|
|
||||||
" \n",
|
|
||||||
" elif scope == 'global':\n",
|
|
||||||
" # Fit one scaler for all subjects\n",
|
|
||||||
" scaler = Scaler()\n",
|
|
||||||
" scaler.fit(train_data[au_columns])\n",
|
|
||||||
" scalers['global'] = scaler\n",
|
|
||||||
" \n",
|
|
||||||
" else:\n",
|
|
||||||
" raise ValueError(\"scope must be 'subject' or 'global'\")\n",
|
|
||||||
" \n",
|
|
||||||
" return {'scalers': scalers, 'method': method, 'scope': scope}"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "642d0017",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def apply_normalizer(data, au_columns, normalizer_dict):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Apply fitted normalization scalers to data.\n",
|
|
||||||
" \n",
|
|
||||||
" Parameters:\n",
|
|
||||||
" -----------\n",
|
|
||||||
" data : pd.DataFrame\n",
|
|
||||||
" Dataframe with AU columns and subjectID\n",
|
|
||||||
" au_columns : list\n",
|
|
||||||
" List of AU column names to normalize\n",
|
|
||||||
" normalizer_dict : dict\n",
|
|
||||||
" Dictionary containing fitted scalers from fit_normalizer()\n",
|
|
||||||
" \n",
|
|
||||||
" Returns:\n",
|
|
||||||
" --------\n",
|
|
||||||
" pd.DataFrame\n",
|
|
||||||
" DataFrame with normalized AU columns\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" normalized_data = data.copy()\n",
|
|
||||||
" scalers = normalizer_dict['scalers']\n",
|
|
||||||
" scope = normalizer_dict['scope']\n",
|
|
||||||
" \n",
|
|
||||||
" if scope == 'subject':\n",
|
|
||||||
" # Apply per-subject normalization\n",
|
|
||||||
" for subject in data['subjectID'].unique():\n",
|
|
||||||
" subject_mask = data['subjectID'] == subject\n",
|
|
||||||
" \n",
|
|
||||||
" # Use the subject's scaler if available, otherwise use a fitted scaler from training\n",
|
|
||||||
" if subject in scalers:\n",
|
|
||||||
" scaler = scalers[subject]\n",
|
|
||||||
" else:\n",
|
|
||||||
" # For new subjects not seen in training, use the first available scaler\n",
|
|
||||||
" # (This is a fallback - ideally all test subjects should be in training for subject-level normalization)\n",
|
|
||||||
" print(f\"Warning: Subject {subject} not found in training data. Using fallback scaler.\")\n",
|
|
||||||
" scaler = list(scalers.values())[0]\n",
|
|
||||||
" \n",
|
|
||||||
" normalized_data.loc[subject_mask, au_columns] = scaler.transform(\n",
|
|
||||||
" data.loc[subject_mask, au_columns]\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" elif scope == 'global':\n",
|
|
||||||
" # Apply global normalization\n",
|
|
||||||
" scaler = scalers['global']\n",
|
|
||||||
" normalized_data[au_columns] = scaler.transform(data[au_columns])\n",
|
|
||||||
" \n",
|
|
||||||
" return normalized_data"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "697b3cf7",
|
"id": "697b3cf7",
|
||||||
@ -335,7 +223,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Fit normalizer on training data\n",
|
"# Fit normalizer on training data\n",
|
||||||
"normalizer = fit_normalizer(train_data, cols, method='minmax', scope='global')\n",
|
"normalizer = scaler.fit_normalizer(train_data, cols, method='minmax', scope='global')\n",
|
||||||
"print(\"Normalizer fitted on training data\")"
|
"print(\"Normalizer fitted on training data\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -347,11 +235,11 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Apply normalization to all datasets\n",
|
"# Apply normalization to all datasets\n",
|
||||||
"train_normalized = apply_normalizer(train_data, cols, normalizer)\n",
|
"train_normalized = scaler.apply_normalizer(train_data, cols, normalizer)\n",
|
||||||
"val_normal_normalized = apply_normalizer(val_normal_data, cols, normalizer)\n",
|
"val_normal_normalized = scaler.apply_normalizer(val_normal_data, cols, normalizer)\n",
|
||||||
"val_high_normalized = apply_normalizer(val_high_data, cols, normalizer)\n",
|
"val_high_normalized = scaler.apply_normalizer(val_high_data, cols, normalizer)\n",
|
||||||
"test_normal_normalized = apply_normalizer(test_normal_data, cols, normalizer)\n",
|
"test_normal_normalized = scaler.apply_normalizer(test_normal_data, cols, normalizer)\n",
|
||||||
"test_high_normalized = apply_normalizer(test_high_data, cols, normalizer)\n",
|
"test_high_normalized = scaler.apply_normalizer(test_high_data, cols, normalizer)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(\"Normalization applied to all datasets\")"
|
"print(\"Normalization applied to all datasets\")"
|
||||||
]
|
]
|
||||||
@ -490,18 +378,6 @@
|
|||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.12.10"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@ -1,877 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "708c9745",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Imports"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "53b10294",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"from pathlib import Path\n",
|
|
||||||
"import sys\n",
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"base_dir = os.path.abspath(os.path.join(os.getcwd(), \"..\"))\n",
|
|
||||||
"sys.path.append(base_dir)\n",
|
|
||||||
"print(base_dir)\n",
|
|
||||||
"\n",
|
|
||||||
"from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal\n",
|
|
||||||
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
|
|
||||||
"from sklearn.svm import OneClassSVM\n",
|
|
||||||
"from sklearn.model_selection import GridSearchCV, KFold, ParameterGrid, train_test_split\n",
|
|
||||||
"import matplotlib.pyplot as plt\n",
|
|
||||||
"import tensorflow as tf\n",
|
|
||||||
"import pickle\n",
|
|
||||||
"from sklearn.metrics import (roc_auc_score, accuracy_score, precision_score, \n",
|
|
||||||
" recall_score, f1_score, confusion_matrix, classification_report) "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "68101229",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### load Dataset"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "24a765e8",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/first_AU_dataset/output_windowed.parquet\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "471001b0",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"df = pd.read_parquet(path=dataset_path)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "0fdecdaa",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Load Performance data and Subject Split"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "692d1b47",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"performance_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/subject_performance/3new_au_performance.csv\")\n",
|
|
||||||
"performance_df = pd.read_csv(performance_path)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "ea617e3f",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# Subject IDs aus dem Haupt-Dataset nehmen\n",
|
|
||||||
"subjects_from_df = df[\"subjectID\"].unique()\n",
|
|
||||||
"\n",
|
|
||||||
"# Performance-Subset nur für vorhandene Subjects\n",
|
|
||||||
"perf_filtered = performance_df[\n",
|
|
||||||
" performance_df[\"subjectID\"].isin(subjects_from_df)\n",
|
|
||||||
"][[\"subjectID\", \"overall_score\"]]\n",
|
|
||||||
"\n",
|
|
||||||
"# Merge: nur Subjects, die sowohl im df als auch im Performance-CSV vorkommen\n",
|
|
||||||
"merged = (\n",
|
|
||||||
" pd.DataFrame({\"subjectID\": subjects_from_df})\n",
|
|
||||||
" .merge(perf_filtered, on=\"subjectID\", how=\"inner\")\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"# Sicherstellen, dass keine Scores fehlen\n",
|
|
||||||
"if merged[\"overall_score\"].isna().any():\n",
|
|
||||||
" raise ValueError(\"Es fehlen Score-Werte für manche Subjects.\")\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "ae43df8d",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"merged_sorted = merged.sort_values(\"overall_score\", ascending=False).reset_index(drop=True)\n",
|
|
||||||
"\n",
|
|
||||||
"scores = merged_sorted[\"overall_score\"].values\n",
|
|
||||||
"n_total = len(merged_sorted)\n",
|
|
||||||
"n_small = n_total // 3\n",
|
|
||||||
"n_large = n_total - n_small\n",
|
|
||||||
"\n",
|
|
||||||
"# Schritt 1: zufällige Start-Aufteilung\n",
|
|
||||||
"idx = np.arange(n_total)\n",
|
|
||||||
"np.random.shuffle(idx)\n",
|
|
||||||
"\n",
|
|
||||||
"small_idx = idx[:n_small]\n",
|
|
||||||
"large_idx = idx[n_small:]\n",
|
|
||||||
"\n",
|
|
||||||
"def score_diff(small_idx, large_idx):\n",
|
|
||||||
" return abs(scores[small_idx].mean() - scores[large_idx].mean())\n",
|
|
||||||
"\n",
|
|
||||||
"diff = score_diff(small_idx, large_idx)\n",
|
|
||||||
"threshold = 0.01\n",
|
|
||||||
"max_iter = 100\n",
|
|
||||||
"count = 0\n",
|
|
||||||
"\n",
|
|
||||||
"# Schritt 2: random swaps bis Differenz klein genug\n",
|
|
||||||
"while diff > threshold and count < max_iter:\n",
|
|
||||||
" # Zwei zufällige Elemente auswählen\n",
|
|
||||||
" si = np.random.choice(small_idx)\n",
|
|
||||||
" li = np.random.choice(large_idx)\n",
|
|
||||||
" \n",
|
|
||||||
" # Tausch durchführen\n",
|
|
||||||
" new_small_idx = small_idx.copy()\n",
|
|
||||||
" new_large_idx = large_idx.copy()\n",
|
|
||||||
" \n",
|
|
||||||
" new_small_idx[new_small_idx == si] = li\n",
|
|
||||||
" new_large_idx[new_large_idx == li] = si\n",
|
|
||||||
"\n",
|
|
||||||
" # neue Differenz berechnen\n",
|
|
||||||
" new_diff = score_diff(new_small_idx, new_large_idx)\n",
|
|
||||||
"\n",
|
|
||||||
" # Swap akzeptieren, wenn es besser wird\n",
|
|
||||||
" if new_diff < diff:\n",
|
|
||||||
" small_idx = new_small_idx\n",
|
|
||||||
" large_idx = new_large_idx\n",
|
|
||||||
" diff = new_diff\n",
|
|
||||||
"\n",
|
|
||||||
" count += 1\n",
|
|
||||||
"\n",
|
|
||||||
"# Finalgruppen\n",
|
|
||||||
"group_small = merged_sorted.loc[small_idx].reset_index(drop=True)\n",
|
|
||||||
"group_large = merged_sorted.loc[large_idx].reset_index(drop=True)\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Finale Score-Differenz:\", diff)\n",
|
|
||||||
"print(\"Größe Gruppe 1:\", len(group_small))\n",
|
|
||||||
"print(\"Größe Gruppe 2:\", len(group_large))\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "9d1b414e",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"group_large['overall_score'].mean()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "fa71f9a5",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"group_small['overall_score'].mean()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "79ecb4a2",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"training_subjects = group_large['subjectID'].values\n",
|
|
||||||
"test_subjects = group_small['subjectID'].values\n",
|
|
||||||
"print(training_subjects)\n",
|
|
||||||
"print(test_subjects)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "4353f87c",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Data cleaning with mad"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "76610052",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"# SET\n",
|
|
||||||
"threshold_mad = 5\n",
|
|
||||||
"column_praefix ='AU'\n",
|
|
||||||
"\n",
|
|
||||||
"au_columns = [col for col in df.columns if col.startswith(column_praefix)]\n",
|
|
||||||
"cleaned_df = mad_outlier_removal.mad_outlier_removal(df,columns=au_columns, threshold=threshold_mad)\n",
|
|
||||||
"print(cleaned_df.shape)\n",
|
|
||||||
"print(df.shape)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "9a6c1732",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"#### TO DO\n",
|
|
||||||
" * pipeline aus Autoencoder und SVM\n",
|
|
||||||
" * group k fold\n",
|
|
||||||
" * AE überpüfen, loss dokumentieren"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "877309d9",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"### Variational Autoencoder with Classifier Head\n",
|
|
||||||
"import pandas as pd\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"import tensorflow as tf\n",
|
|
||||||
"from tensorflow import keras\n",
|
|
||||||
"from tensorflow.keras import layers, Model\n",
|
|
||||||
"from sklearn.model_selection import GroupKFold\n",
|
|
||||||
"from sklearn.preprocessing import StandardScaler\n",
|
|
||||||
"from sklearn.metrics import (\n",
|
|
||||||
" accuracy_score, precision_score, recall_score, f1_score, \n",
|
|
||||||
" roc_auc_score, confusion_matrix, classification_report\n",
|
|
||||||
")\n",
|
|
||||||
"import matplotlib.pyplot as plt\n",
|
|
||||||
"from collections import defaultdict\n",
|
|
||||||
"\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"# 1. CREATE LABELS\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"\n",
|
|
||||||
"# Low workload: baseline + n-back level 1,4\n",
|
|
||||||
"low_all = cleaned_df[\n",
|
|
||||||
" ((cleaned_df[\"PHASE\"] == \"baseline\") |\n",
|
|
||||||
" ((cleaned_df[\"STUDY\"] == \"n-back\") & (cleaned_df[\"PHASE\"] != \"baseline\") & (cleaned_df[\"LEVEL\"].isin([1,4]))))\n",
|
|
||||||
"].copy()\n",
|
|
||||||
"low_all['label'] = 0\n",
|
|
||||||
"print(f\"Low workload samples: {low_all.shape[0]}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# High workload n-back: level 2,3,5,6\n",
|
|
||||||
"high_nback = cleaned_df[\n",
|
|
||||||
" (cleaned_df[\"STUDY\"]==\"n-back\") &\n",
|
|
||||||
" (cleaned_df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n",
|
|
||||||
" (cleaned_df[\"PHASE\"].isin([\"train\", \"test\"]))\n",
|
|
||||||
"].copy()\n",
|
|
||||||
"high_nback['label'] = 1\n",
|
|
||||||
"print(f\"High n-back samples: {high_nback.shape[0]}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# High workload k-drive\n",
|
|
||||||
"high_kdrive = cleaned_df[\n",
|
|
||||||
" (cleaned_df[\"STUDY\"] == \"k-drive\") & (cleaned_df[\"PHASE\"] != \"baseline\")\n",
|
|
||||||
"].copy()\n",
|
|
||||||
"high_kdrive['label'] = 1\n",
|
|
||||||
"print(f\"High k-drive samples: {high_kdrive.shape[0]}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Combine all high workload\n",
|
|
||||||
"high_all = pd.concat([high_nback, high_kdrive])\n",
|
|
||||||
"print(f\"Total high workload samples: {high_all.shape[0]}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Complete labeled dataset\n",
|
|
||||||
"labeled_df = pd.concat([low_all, high_all]).reset_index(drop=True)\n",
|
|
||||||
"print(f\"\\nTotal labeled samples: {labeled_df.shape[0]}\")\n",
|
|
||||||
"print(f\"Class distribution:\\n{labeled_df['label'].value_counts()}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"# 2. TRAIN/TEST SPLIT BY SUBJECTS\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"\n",
|
|
||||||
"train_df = labeled_df[labeled_df['subjectID'].isin(training_subjects)].copy()\n",
|
|
||||||
"test_df = labeled_df[labeled_df['subjectID'].isin(test_subjects)].copy()\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"\\nTraining subjects: {training_subjects}\")\n",
|
|
||||||
"print(f\"Test subjects: {test_subjects}\")\n",
|
|
||||||
"print(f\"Train samples: {train_df.shape[0]}, Test samples: {test_df.shape[0]}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Extract features and labels\n",
|
|
||||||
"au_columns = [col for col in labeled_df.columns if col.startswith('AU')]\n",
|
|
||||||
"print(f\"\\nUsing {len(au_columns)} AU features: {au_columns}\")\n",
|
|
||||||
"\n",
|
|
||||||
"X_train = train_df[au_columns].values\n",
|
|
||||||
"y_train = train_df['label'].values\n",
|
|
||||||
"groups_train = train_df['subjectID'].values\n",
|
|
||||||
"\n",
|
|
||||||
"X_test = test_df[au_columns].values\n",
|
|
||||||
"y_test = test_df['label'].values\n",
|
|
||||||
"\n",
|
|
||||||
"# Normalize features\n",
|
|
||||||
"scaler = StandardScaler()\n",
|
|
||||||
"X_train_scaled = scaler.fit_transform(X_train)\n",
|
|
||||||
"X_test_scaled = scaler.transform(X_test)\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"\\nTrain class distribution: {np.bincount(y_train)}\")\n",
|
|
||||||
"print(f\"Test class distribution: {np.bincount(y_test)}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"# 3. VAE WITH CLASSIFIER HEAD MODEL\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"\n",
|
|
||||||
"class Sampling(layers.Layer):\n",
|
|
||||||
" \"\"\"Reparameterization trick for VAE\"\"\"\n",
|
|
||||||
" def call(self, inputs):\n",
|
|
||||||
" z_mean, z_log_var = inputs\n",
|
|
||||||
" batch = tf.shape(z_mean)[0]\n",
|
|
||||||
" dim = tf.shape(z_mean)[1]\n",
|
|
||||||
" epsilon = tf.random.normal(shape=(batch, dim))\n",
|
|
||||||
" return z_mean + tf.exp(0.5 * z_log_var) * epsilon\n",
|
|
||||||
"\n",
|
|
||||||
"def build_vae_classifier(input_dim, latent_dim, encoder_dims=[32, 16], \n",
|
|
||||||
" decoder_dims=[16, 32], classifier_dims=[16]):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Build VAE with classifier head\n",
|
|
||||||
" \n",
|
|
||||||
" Args:\n",
|
|
||||||
" input_dim: Number of input features (20 AUs)\n",
|
|
||||||
" latent_dim: Dimension of latent space (2-5)\n",
|
|
||||||
" encoder_dims: Hidden layer sizes for encoder\n",
|
|
||||||
" decoder_dims: Hidden layer sizes for decoder\n",
|
|
||||||
" classifier_dims: Hidden layer sizes for classifier\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" # ---- ENCODER ----\n",
|
|
||||||
" encoder_inputs = keras.Input(shape=(input_dim,), name='encoder_input')\n",
|
|
||||||
" x = encoder_inputs\n",
|
|
||||||
" \n",
|
|
||||||
" for i, dim in enumerate(encoder_dims):\n",
|
|
||||||
" x = layers.Dense(dim, activation='relu', name=f'encoder_dense_{i}')(x)\n",
|
|
||||||
" x = layers.BatchNormalization(name=f'encoder_bn_{i}')(x)\n",
|
|
||||||
" x = layers.Dropout(0.2, name=f'encoder_dropout_{i}')(x)\n",
|
|
||||||
" \n",
|
|
||||||
" z_mean = layers.Dense(latent_dim, name='z_mean')(x)\n",
|
|
||||||
" z_log_var = layers.Dense(latent_dim, name='z_log_var')(x)\n",
|
|
||||||
" z = Sampling()([z_mean, z_log_var])\n",
|
|
||||||
" \n",
|
|
||||||
" encoder = Model(encoder_inputs, [z_mean, z_log_var, z], name='encoder')\n",
|
|
||||||
" \n",
|
|
||||||
" # ---- DECODER ----\n",
|
|
||||||
" latent_inputs = keras.Input(shape=(latent_dim,), name='latent_input')\n",
|
|
||||||
" x = latent_inputs\n",
|
|
||||||
" \n",
|
|
||||||
" for i, dim in enumerate(decoder_dims):\n",
|
|
||||||
" x = layers.Dense(dim, activation='relu', name=f'decoder_dense_{i}')(x)\n",
|
|
||||||
" x = layers.BatchNormalization(name=f'decoder_bn_{i}')(x)\n",
|
|
||||||
" \n",
|
|
||||||
" decoder_outputs = layers.Dense(input_dim, activation='linear', name='decoder_output')(x)\n",
|
|
||||||
" decoder = Model(latent_inputs, decoder_outputs, name='decoder')\n",
|
|
||||||
" \n",
|
|
||||||
" # ---- CLASSIFIER HEAD ----\n",
|
|
||||||
" x = latent_inputs\n",
|
|
||||||
" for i, dim in enumerate(classifier_dims):\n",
|
|
||||||
" x = layers.Dense(dim, activation='relu', name=f'classifier_dense_{i}')(x)\n",
|
|
||||||
" x = layers.Dropout(0.3, name=f'classifier_dropout_{i}')(x)\n",
|
|
||||||
" \n",
|
|
||||||
" classifier_output = layers.Dense(1, activation='sigmoid', name='classifier_output')(x)\n",
|
|
||||||
" classifier = Model(latent_inputs, classifier_output, name='classifier')\n",
|
|
||||||
" \n",
|
|
||||||
" # ---- FULL MODEL ----\n",
|
|
||||||
" inputs = keras.Input(shape=(input_dim,), name='vae_input')\n",
|
|
||||||
" z_mean, z_log_var, z = encoder(inputs)\n",
|
|
||||||
" reconstructed = decoder(z)\n",
|
|
||||||
" classification = classifier(z)\n",
|
|
||||||
" \n",
|
|
||||||
" model = Model(inputs, [reconstructed, classification], name='vae_classifier')\n",
|
|
||||||
" \n",
|
|
||||||
" return model, encoder, decoder, classifier\n",
|
|
||||||
"\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"# 4. CUSTOM TRAINING LOOP WITH COMBINED LOSS\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"\n",
|
|
||||||
"class VAEClassifier(keras.Model):\n",
|
|
||||||
" def __init__(self, encoder, decoder, classifier, **kwargs):\n",
|
|
||||||
" super().__init__(**kwargs)\n",
|
|
||||||
" self.encoder = encoder\n",
|
|
||||||
" self.decoder = decoder\n",
|
|
||||||
" self.classifier = classifier\n",
|
|
||||||
" self.total_loss_tracker = keras.metrics.Mean(name=\"total_loss\")\n",
|
|
||||||
" self.reconstruction_loss_tracker = keras.metrics.Mean(name=\"reconstruction_loss\")\n",
|
|
||||||
" self.kl_loss_tracker = keras.metrics.Mean(name=\"kl_loss\")\n",
|
|
||||||
" self.classification_loss_tracker = keras.metrics.Mean(name=\"classification_loss\")\n",
|
|
||||||
" self.accuracy_tracker = keras.metrics.BinaryAccuracy(name=\"accuracy\")\n",
|
|
||||||
" \n",
|
|
||||||
" @property\n",
|
|
||||||
" def metrics(self):\n",
|
|
||||||
" return [\n",
|
|
||||||
" self.total_loss_tracker,\n",
|
|
||||||
" self.reconstruction_loss_tracker,\n",
|
|
||||||
" self.kl_loss_tracker,\n",
|
|
||||||
" self.classification_loss_tracker,\n",
|
|
||||||
" self.accuracy_tracker,\n",
|
|
||||||
" ]\n",
|
|
||||||
" \n",
|
|
||||||
" def train_step(self, data):\n",
|
|
||||||
" x, y = data\n",
|
|
||||||
" \n",
|
|
||||||
" with tf.GradientTape() as tape:\n",
|
|
||||||
" # Forward pass\n",
|
|
||||||
" z_mean, z_log_var, z = self.encoder(x, training=True)\n",
|
|
||||||
" reconstruction = self.decoder(z, training=True)\n",
|
|
||||||
" classification = self.classifier(z, training=True)\n",
|
|
||||||
" \n",
|
|
||||||
" # Reconstruction loss (MSE)\n",
|
|
||||||
" reconstruction_loss = tf.reduce_mean(\n",
|
|
||||||
" keras.losses.mse(x, reconstruction))\n",
|
|
||||||
" \n",
|
|
||||||
" # KL divergence loss\n",
|
|
||||||
" kl_loss = -0.5 * tf.reduce_mean(\n",
|
|
||||||
" tf.reduce_sum(\n",
|
|
||||||
" 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var),\n",
|
|
||||||
" axis=1\n",
|
|
||||||
" )\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" # Classification loss (binary crossentropy)\n",
|
|
||||||
" # Classification loss (binary crossentropy)\n",
|
|
||||||
" classification_loss = tf.reduce_mean(\n",
|
|
||||||
" keras.losses.binary_crossentropy(tf.expand_dims(y, -1), classification)\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" # Combined loss with weights\n",
|
|
||||||
" total_loss = reconstruction_loss + kl_loss + classification_loss\n",
|
|
||||||
" \n",
|
|
||||||
" # Backpropagation\n",
|
|
||||||
" grads = tape.gradient(total_loss, self.trainable_weights)\n",
|
|
||||||
" self.optimizer.apply_gradients(zip(grads, self.trainable_weights))\n",
|
|
||||||
" \n",
|
|
||||||
" # Update metrics\n",
|
|
||||||
" self.total_loss_tracker.update_state(total_loss)\n",
|
|
||||||
" self.reconstruction_loss_tracker.update_state(reconstruction_loss)\n",
|
|
||||||
" self.kl_loss_tracker.update_state(kl_loss)\n",
|
|
||||||
" self.classification_loss_tracker.update_state(classification_loss)\n",
|
|
||||||
" self.accuracy_tracker.update_state(y, classification)\n",
|
|
||||||
" \n",
|
|
||||||
" return {\n",
|
|
||||||
" \"total_loss\": self.total_loss_tracker.result(),\n",
|
|
||||||
" \"reconstruction_loss\": self.reconstruction_loss_tracker.result(),\n",
|
|
||||||
" \"kl_loss\": self.kl_loss_tracker.result(),\n",
|
|
||||||
" \"classification_loss\": self.classification_loss_tracker.result(),\n",
|
|
||||||
" \"accuracy\": self.accuracy_tracker.result(),\n",
|
|
||||||
" }\n",
|
|
||||||
" \n",
|
|
||||||
" def test_step(self, data):\n",
|
|
||||||
" x, y = data\n",
|
|
||||||
" \n",
|
|
||||||
" z_mean, z_log_var, z = self.encoder(x, training=False)\n",
|
|
||||||
" reconstruction = self.decoder(z, training=False)\n",
|
|
||||||
" classification = self.classifier(z, training=False)\n",
|
|
||||||
" \n",
|
|
||||||
" # Reconstruction loss (MSE)\n",
|
|
||||||
" reconstruction_loss = tf.reduce_mean(\n",
|
|
||||||
" keras.losses.mse(x, reconstruction))\n",
|
|
||||||
" kl_loss = -0.5 * tf.reduce_mean(\n",
|
|
||||||
" tf.reduce_sum(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=1)\n",
|
|
||||||
" )\n",
|
|
||||||
" # Classification loss (binary crossentropy)\n",
|
|
||||||
" classification_loss = tf.reduce_mean(\n",
|
|
||||||
" keras.losses.binary_crossentropy(tf.expand_dims(y, -1), classification)\n",
|
|
||||||
" )\n",
|
|
||||||
" total_loss = reconstruction_loss + kl_loss + classification_loss\n",
|
|
||||||
" \n",
|
|
||||||
" self.total_loss_tracker.update_state(total_loss)\n",
|
|
||||||
" self.reconstruction_loss_tracker.update_state(reconstruction_loss)\n",
|
|
||||||
" self.kl_loss_tracker.update_state(kl_loss)\n",
|
|
||||||
" self.classification_loss_tracker.update_state(classification_loss)\n",
|
|
||||||
" self.accuracy_tracker.update_state(y, classification)\n",
|
|
||||||
" \n",
|
|
||||||
" return {\n",
|
|
||||||
" \"total_loss\": self.total_loss_tracker.result(),\n",
|
|
||||||
" \"reconstruction_loss\": self.reconstruction_loss_tracker.result(),\n",
|
|
||||||
" \"kl_loss\": self.kl_loss_tracker.result(),\n",
|
|
||||||
" \"classification_loss\": self.classification_loss_tracker.result(),\n",
|
|
||||||
" \"accuracy\": self.accuracy_tracker.result(),\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"# 5. GROUP K-FOLD CROSS-VALIDATION WITH GRID SEARCH\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"\n",
|
|
||||||
"# Hyperparameter grid\n",
|
|
||||||
"param_grid = {\n",
|
|
||||||
" 'latent_dim': [2, 5],\n",
|
|
||||||
" 'encoder_dims': [[32, 16], [64, 32]],\n",
|
|
||||||
" 'learning_rate': [0.001, 0.005],\n",
|
|
||||||
" 'batch_size': [32, 64],\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"# Generate all combinations\n",
|
|
||||||
"from itertools import product\n",
|
|
||||||
"keys = param_grid.keys()\n",
|
|
||||||
"values = param_grid.values()\n",
|
|
||||||
"param_combinations = [dict(zip(keys, v)) for v in product(*values)]\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"\\nTotal hyperparameter combinations: {len(param_combinations)}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Group K-Fold setup\n",
|
|
||||||
"n_splits = 5\n",
|
|
||||||
"gkf = GroupKFold(n_splits=n_splits)\n",
|
|
||||||
"\n",
|
|
||||||
"# Store results\n",
|
|
||||||
"cv_results = []\n",
|
|
||||||
"\n",
|
|
||||||
"# Grid search with cross-validation\n",
|
|
||||||
"for idx, params in enumerate(param_combinations):\n",
|
|
||||||
" print(f\"\\n{'='*80}\")\n",
|
|
||||||
" print(f\"Testing combination {idx+1}/{len(param_combinations)}: {params}\")\n",
|
|
||||||
" print(f\"{'='*80}\")\n",
|
|
||||||
" \n",
|
|
||||||
" fold_results = []\n",
|
|
||||||
" \n",
|
|
||||||
" for fold, (train_idx, val_idx) in enumerate(gkf.split(X_train_scaled, y_train, groups_train)):\n",
|
|
||||||
" print(f\"\\nFold {fold+1}/{n_splits}\")\n",
|
|
||||||
" \n",
|
|
||||||
" X_fold_train, X_fold_val = X_train_scaled[train_idx], X_train_scaled[val_idx]\n",
|
|
||||||
" y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]\n",
|
|
||||||
" \n",
|
|
||||||
" # Build model\n",
|
|
||||||
" model, encoder, decoder, classifier = build_vae_classifier(\n",
|
|
||||||
" input_dim=len(au_columns),\n",
|
|
||||||
" latent_dim=params['latent_dim'],\n",
|
|
||||||
" encoder_dims=params['encoder_dims'],\n",
|
|
||||||
" decoder_dims=list(reversed(params['encoder_dims'])),\n",
|
|
||||||
" classifier_dims=[16]\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" vae_classifier = VAEClassifier(encoder, decoder, classifier)\n",
|
|
||||||
" vae_classifier.compile(optimizer=keras.optimizers.Adam(params['learning_rate']))\n",
|
|
||||||
" \n",
|
|
||||||
" # Early stopping\n",
|
|
||||||
" early_stop = keras.callbacks.EarlyStopping(\n",
|
|
||||||
" monitor='val_total_loss',\n",
|
|
||||||
" patience=10,\n",
|
|
||||||
" restore_best_weights=True,\n",
|
|
||||||
" mode='min'\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" # Train\n",
|
|
||||||
" history = vae_classifier.fit(\n",
|
|
||||||
" X_fold_train, y_fold_train,\n",
|
|
||||||
" validation_data=(X_fold_val, y_fold_val),\n",
|
|
||||||
" epochs=60,\n",
|
|
||||||
" batch_size=params['batch_size'],\n",
|
|
||||||
" callbacks=[early_stop],\n",
|
|
||||||
" verbose=0\n",
|
|
||||||
" )\n",
|
|
||||||
" \n",
|
|
||||||
" # Evaluate on validation fold\n",
|
|
||||||
" z_mean_val, _, _ = encoder.predict(X_fold_val, verbose=0)\n",
|
|
||||||
" y_pred_proba = classifier.predict(z_mean_val, verbose=0).flatten()\n",
|
|
||||||
" y_pred = (y_pred_proba > 0.5).astype(int)\n",
|
|
||||||
" \n",
|
|
||||||
" fold_metrics = {\n",
|
|
||||||
" 'accuracy': accuracy_score(y_fold_val, y_pred),\n",
|
|
||||||
" 'precision': precision_score(y_fold_val, y_pred, zero_division=0),\n",
|
|
||||||
" 'recall': recall_score(y_fold_val, y_pred, zero_division=0),\n",
|
|
||||||
" 'f1': f1_score(y_fold_val, y_pred, zero_division=0),\n",
|
|
||||||
" 'roc_auc': roc_auc_score(y_fold_val, y_pred_proba),\n",
|
|
||||||
" 'final_recon_loss': history.history['val_reconstruction_loss'][-1],\n",
|
|
||||||
" 'final_kl_loss': history.history['val_kl_loss'][-1],\n",
|
|
||||||
" 'final_class_loss': history.history['val_classification_loss'][-1],\n",
|
|
||||||
" }\n",
|
|
||||||
" \n",
|
|
||||||
" fold_results.append(fold_metrics)\n",
|
|
||||||
" print(f\" Accuracy: {fold_metrics['accuracy']:.4f}, F1: {fold_metrics['f1']:.4f}, AUC: {fold_metrics['roc_auc']:.4f}\")\n",
|
|
||||||
" \n",
|
|
||||||
" # Clear session to free memory\n",
|
|
||||||
" keras.backend.clear_session()\n",
|
|
||||||
" \n",
|
|
||||||
" # Average across folds\n",
|
|
||||||
" avg_results = {\n",
|
|
||||||
" 'params': params,\n",
|
|
||||||
" 'mean_accuracy': np.mean([r['accuracy'] for r in fold_results]),\n",
|
|
||||||
" 'std_accuracy': np.std([r['accuracy'] for r in fold_results]),\n",
|
|
||||||
" 'mean_f1': np.mean([r['f1'] for r in fold_results]),\n",
|
|
||||||
" 'std_f1': np.std([r['f1'] for r in fold_results]),\n",
|
|
||||||
" 'mean_roc_auc': np.mean([r['roc_auc'] for r in fold_results]),\n",
|
|
||||||
" 'std_roc_auc': np.std([r['roc_auc'] for r in fold_results]),\n",
|
|
||||||
" 'mean_recon_loss': np.mean([r['final_recon_loss'] for r in fold_results]),\n",
|
|
||||||
" 'mean_kl_loss': np.mean([r['final_kl_loss'] for r in fold_results]),\n",
|
|
||||||
" 'mean_class_loss': np.mean([r['final_class_loss'] for r in fold_results]),\n",
|
|
||||||
" 'fold_results': fold_results\n",
|
|
||||||
" }\n",
|
|
||||||
" \n",
|
|
||||||
" cv_results.append(avg_results)\n",
|
|
||||||
" \n",
|
|
||||||
" print(f\"\\nMean CV Accuracy: {avg_results['mean_accuracy']:.4f} ± {avg_results['std_accuracy']:.4f}\")\n",
|
|
||||||
" print(f\"Mean CV F1: {avg_results['mean_f1']:.4f} ± {avg_results['std_f1']:.4f}\")\n",
|
|
||||||
" print(f\"Mean CV AUC: {avg_results['mean_roc_auc']:.4f} ± {avg_results['std_roc_auc']:.4f}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"# 6. SELECT BEST MODEL AND EVALUATE ON TEST SET\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"\n",
|
|
||||||
"# Find best hyperparameters based on mean F1 score\n",
|
|
||||||
"best_idx = np.argmax([r['mean_f1'] for r in cv_results])\n",
|
|
||||||
"best_params = cv_results[best_idx]['params']\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"\\n{'='*80}\")\n",
|
|
||||||
"print(\"BEST HYPERPARAMETERS (based on CV F1 score):\")\n",
|
|
||||||
"print(f\"{'='*80}\")\n",
|
|
||||||
"for key, value in best_params.items():\n",
|
|
||||||
" print(f\"{key}: {value}\")\n",
|
|
||||||
"print(f\"\\nCV Performance:\")\n",
|
|
||||||
"print(f\" Accuracy: {cv_results[best_idx]['mean_accuracy']:.4f} ± {cv_results[best_idx]['std_accuracy']:.4f}\")\n",
|
|
||||||
"print(f\" F1 Score: {cv_results[best_idx]['mean_f1']:.4f} ± {cv_results[best_idx]['std_f1']:.4f}\")\n",
|
|
||||||
"print(f\" ROC-AUC: {cv_results[best_idx]['mean_roc_auc']:.4f} ± {cv_results[best_idx]['std_roc_auc']:.4f}\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Train final model on all training data\n",
|
|
||||||
"print(f\"\\n{'='*80}\")\n",
|
|
||||||
"print(\"TRAINING FINAL MODEL ON ALL TRAINING DATA\")\n",
|
|
||||||
"print(f\"{'='*80}\")\n",
|
|
||||||
"\n",
|
|
||||||
"final_model, final_encoder, final_decoder, final_classifier = build_vae_classifier(\n",
|
|
||||||
" input_dim=len(au_columns),\n",
|
|
||||||
" latent_dim=best_params['latent_dim'],\n",
|
|
||||||
" encoder_dims=best_params['encoder_dims'],\n",
|
|
||||||
" decoder_dims=list(reversed(best_params['encoder_dims'])),\n",
|
|
||||||
" classifier_dims=[16]\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"final_vae_classifier = VAEClassifier(final_encoder, final_decoder, final_classifier)\n",
|
|
||||||
"final_vae_classifier.compile(optimizer=keras.optimizers.Adam(best_params['learning_rate']))\n",
|
|
||||||
"\n",
|
|
||||||
"final_history = final_vae_classifier.fit(\n",
|
|
||||||
" X_train_scaled, y_train,\n",
|
|
||||||
" validation_split=0.2,\n",
|
|
||||||
" epochs=100,\n",
|
|
||||||
" batch_size=best_params['batch_size'],\n",
|
|
||||||
" callbacks=[keras.callbacks.EarlyStopping(monitor='val_total_loss', patience=15, restore_best_weights=True, mode='min')],\n",
|
|
||||||
" verbose=1\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"# Evaluate on held-out test set\n",
|
|
||||||
"print(f\"\\n{'='*80}\")\n",
|
|
||||||
"print(\"EVALUATION ON HELD-OUT TEST SET\")\n",
|
|
||||||
"print(f\"{'='*80}\")\n",
|
|
||||||
"\n",
|
|
||||||
"z_mean_test, _, _ = final_encoder.predict(X_test_scaled, verbose=0)\n",
|
|
||||||
"y_test_pred_proba = final_classifier.predict(z_mean_test, verbose=0).flatten()\n",
|
|
||||||
"y_test_pred = (y_test_pred_proba > 0.5).astype(int)\n",
|
|
||||||
"\n",
|
|
||||||
"test_metrics = {\n",
|
|
||||||
" 'accuracy': accuracy_score(y_test, y_test_pred),\n",
|
|
||||||
" 'precision': precision_score(y_test, y_test_pred),\n",
|
|
||||||
" 'recall': recall_score(y_test, y_test_pred),\n",
|
|
||||||
" 'f1': f1_score(y_test, y_test_pred),\n",
|
|
||||||
" 'roc_auc': roc_auc_score(y_test, y_test_pred_proba),\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"\\nTest Set Performance:\")\n",
|
|
||||||
"for metric, value in test_metrics.items():\n",
|
|
||||||
" print(f\" {metric.capitalize()}: {value:.4f}\")\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"\\nConfusion Matrix:\")\n",
|
|
||||||
"print(confusion_matrix(y_test, y_test_pred))\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"\\nClassification Report:\")\n",
|
|
||||||
"print(classification_report(y_test, y_test_pred, target_names=['Low Workload', 'High Workload']))\n",
|
|
||||||
"\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"# 7. VISUALIZATION\n",
|
|
||||||
"# ============================================================================\n",
|
|
||||||
"\n",
|
|
||||||
"# Plot training history\n",
|
|
||||||
"fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n",
|
|
||||||
"\n",
|
|
||||||
"axes[0, 0].plot(final_history.history['reconstruction_loss'], label='Train')\n",
|
|
||||||
"axes[0, 0].plot(final_history.history['val_reconstruction_loss'], label='Val')\n",
|
|
||||||
"axes[0, 0].set_title('Reconstruction Loss')\n",
|
|
||||||
"axes[0, 0].set_xlabel('Epoch')\n",
|
|
||||||
"axes[0, 0].set_ylabel('Loss')\n",
|
|
||||||
"axes[0, 0].legend()\n",
|
|
||||||
"axes[0, 0].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
"axes[0, 1].plot(final_history.history['kl_loss'], label='Train')\n",
|
|
||||||
"axes[0, 1].plot(final_history.history['val_kl_loss'], label='Val')\n",
|
|
||||||
"axes[0, 1].set_title('KL Divergence Loss')\n",
|
|
||||||
"axes[0, 1].set_xlabel('Epoch')\n",
|
|
||||||
"axes[0, 1].set_ylabel('Loss')\n",
|
|
||||||
"axes[0, 1].legend()\n",
|
|
||||||
"axes[0, 1].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
"axes[1, 0].plot(final_history.history['classification_loss'], label='Train')\n",
|
|
||||||
"axes[1, 0].plot(final_history.history['val_classification_loss'], label='Val')\n",
|
|
||||||
"axes[1, 0].set_title('Classification Loss')\n",
|
|
||||||
"axes[1, 0].set_xlabel('Epoch')\n",
|
|
||||||
"axes[1, 0].set_ylabel('Loss')\n",
|
|
||||||
"axes[1, 0].legend()\n",
|
|
||||||
"axes[1, 0].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
"axes[1, 1].plot(final_history.history['accuracy'], label='Train')\n",
|
|
||||||
"axes[1, 1].plot(final_history.history['val_accuracy'], label='Val')\n",
|
|
||||||
"axes[1, 1].set_title('Classification Accuracy')\n",
|
|
||||||
"axes[1, 1].set_xlabel('Epoch')\n",
|
|
||||||
"axes[1, 1].set_ylabel('Accuracy')\n",
|
|
||||||
"axes[1, 1].legend()\n",
|
|
||||||
"axes[1, 1].grid(True)\n",
|
|
||||||
"\n",
|
|
||||||
"plt.tight_layout()\n",
|
|
||||||
"plt.show()\n",
|
|
||||||
"\n",
|
|
||||||
"# Visualize latent space (if 2D or 3D)\n",
|
|
||||||
"if best_params['latent_dim'] == 2:\n",
|
|
||||||
" z_mean_train, _, _ = final_encoder.predict(X_train_scaled, verbose=0)\n",
|
|
||||||
" \n",
|
|
||||||
" plt.figure(figsize=(10, 8))\n",
|
|
||||||
" scatter = plt.scatter(z_mean_train[:, 0], z_mean_train[:, 1], \n",
|
|
||||||
" c=y_train, cmap='RdYlBu', alpha=0.6, edgecolors='k')\n",
|
|
||||||
" plt.colorbar(scatter, label='Workload (0=Low, 1=High)')\n",
|
|
||||||
" plt.xlabel('Latent Dimension 1')\n",
|
|
||||||
" plt.ylabel('Latent Dimension 2')\n",
|
|
||||||
" plt.title('2D Latent Space Representation (Training Data)')\n",
|
|
||||||
" plt.grid(True, alpha=0.3)\n",
|
|
||||||
" plt.show()\n",
|
|
||||||
" \n",
|
|
||||||
" # Test set latent space\n",
|
|
||||||
" plt.figure(figsize=(10, 8))\n",
|
|
||||||
" scatter = plt.scatter(z_mean_test[:, 0], z_mean_test[:, 1], \n",
|
|
||||||
" c=y_test, cmap='RdYlBu', alpha=0.6, edgecolors='k')\n",
|
|
||||||
" plt.colorbar(scatter, label='Workload (0=Low, 1=High)')\n",
|
|
||||||
" plt.xlabel('Latent Dimension 1')\n",
|
|
||||||
" plt.ylabel('Latent Dimension 2')\n",
|
|
||||||
" plt.title('2D Latent Space Representation (Test Data)')\n",
|
|
||||||
" plt.grid(True, alpha=0.3)\n",
|
|
||||||
" plt.show()\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"\\n\" + \"=\"*80)\n",
|
|
||||||
"print(\"TRAINING COMPLETE!\")\n",
|
|
||||||
"print(\"=\"*80)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "79bcfc58",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"### Save Trained VAE Classifier Model\n",
|
|
||||||
"from pathlib import Path\n",
|
|
||||||
"from datetime import datetime\n",
|
|
||||||
"\n",
|
|
||||||
"# Define save path\n",
|
|
||||||
"model_dir = Path(\"/home/jovyan/data-paulusjafahrsimulator-gpu/trained_models\")\n",
|
|
||||||
"model_dir.mkdir(parents=True, exist_ok=True)\n",
|
|
||||||
"\n",
|
|
||||||
"timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
|
|
||||||
"model_path = model_dir / f\"vae_classifier_{timestamp}.keras\"\n",
|
|
||||||
"\n",
|
|
||||||
"# Save the complete model\n",
|
|
||||||
"final_vae_classifier.save(model_path)\n",
|
|
||||||
"\n",
|
|
||||||
"print(f\"Model saved to: {model_path}\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "d700e517",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "30d8d100",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"### Plot Confusion Matrix for Final Model\n",
|
|
||||||
"from sklearn.metrics import ConfusionMatrixDisplay\n",
|
|
||||||
"x = Path(\"/home/jovyan/data-paulusjafahrsimulator-gpu/trained_models/vae_classifier_20251210_230121.keras\")\n",
|
|
||||||
"# Load the saved model\n",
|
|
||||||
"print(f\"Loading model from: {x}\")\n",
|
|
||||||
"# loaded_vae_classifier = tf.keras.models.load_model(x)\n",
|
|
||||||
"loaded_vae_classifier = final_vae_classifier\n",
|
|
||||||
"print(\"✓ Model loaded successfully!\")\n",
|
|
||||||
"\n",
|
|
||||||
"# Extract encoder and classifier from loaded model\n",
|
|
||||||
"loaded_encoder = loaded_vae_classifier.encoder\n",
|
|
||||||
"loaded_classifier = loaded_vae_classifier.classifier\n",
|
|
||||||
"\n",
|
|
||||||
"# Get predictions on test set\n",
|
|
||||||
"z_mean_test, _, _ = loaded_encoder.predict(X_test_scaled, verbose=0)\n",
|
|
||||||
"y_test_pred_proba = loaded_classifier.predict(z_mean_test, verbose=0).flatten()\n",
|
|
||||||
"y_test_pred = (y_test_pred_proba > 0.5).astype(int)\n",
|
|
||||||
"\n",
|
|
||||||
"# Create and plot confusion matrix\n",
|
|
||||||
"cm = confusion_matrix(y_test, y_test_pred)\n",
|
|
||||||
"disp = ConfusionMatrixDisplay(confusion_matrix=cm, \n",
|
|
||||||
" display_labels=['Low Workload', 'High Workload'])\n",
|
|
||||||
"\n",
|
|
||||||
"fig, ax = plt.subplots(figsize=(8, 6))\n",
|
|
||||||
"disp.plot(ax=ax, cmap='Blues', values_format='d')\n",
|
|
||||||
"plt.title('Confusion Matrix - Test Set (Loaded Model)')\n",
|
|
||||||
"plt.tight_layout()\n",
|
|
||||||
"plt.show()\n",
|
|
||||||
"\n",
|
|
||||||
"# Print metrics\n",
|
|
||||||
"print(f\"\\nTest Set Performance (Loaded Model):\")\n",
|
|
||||||
"print(f\" Accuracy: {accuracy_score(y_test, y_test_pred):.4f}\")\n",
|
|
||||||
"print(f\" Precision: {precision_score(y_test, y_test_pred):.4f}\")\n",
|
|
||||||
"print(f\" Recall: {recall_score(y_test, y_test_pred):.4f}\")\n",
|
|
||||||
"print(f\" F1 Score: {f1_score(y_test, y_test_pred):.4f}\")\n",
|
|
||||||
"print(f\" ROC-AUC: {roc_auc_score(y_test, y_test_pred_proba):.4f}\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"id": "e826a998",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"TO DO\n",
|
|
||||||
" * autoencoder langsam anfangen mit 19 schichten\n",
|
|
||||||
" * dann AE und SVM mit hybridem training wie bei claude?!\n",
|
|
||||||
" * dataset aus eyetracking verwenden?"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.12.10"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
||||||
@ -1,11 +0,0 @@
|
|||||||
# from tools import db_helpers
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
print(sys.version)
|
|
||||||
# db_helpers.add_columns_to_table()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -1,9 +0,0 @@
|
|||||||
import sqlite3
|
|
||||||
|
|
||||||
def main():
|
|
||||||
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -1,5 +1,13 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "fb68b447",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Database creation and filling (for live system) "
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -21,8 +29,10 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"database_path = Path(r\"/home/edgekit/MSY_FS/databases/rawdata.sqlite\")\n",
|
"# TODO: set paths and table name\n",
|
||||||
"parquet_path = Path(r\"/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/files_for_testing/both_mod_0000.parquet\")"
|
"database_path = Path(r\"database.sqlite\") # this path references an empty, but already created sqlite file\n",
|
||||||
|
"parquet_path = Path(r\"...parquet\") # this path leads to the data that should be used to fill the databse\n",
|
||||||
|
"table_name = \"XXX\" # name of the new table"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -56,6 +66,14 @@
|
|||||||
"con, cursor = db_helpers.connect_db(database_path)"
|
"con, cursor = db_helpers.connect_db(database_path)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "7007c68f",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Select a subset to insert into database "
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -69,6 +87,14 @@
|
|||||||
"df_first_100.insert(0, '_Id', df_first_100.index + 1)"
|
"df_first_100.insert(0, '_Id', df_first_100.index + 1)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "92171186",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Type conversion"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -88,6 +114,14 @@
|
|||||||
" return \"TEXT\"\n"
|
" return \"TEXT\"\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "45af9956",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Define constraints and primary key"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -109,6 +143,14 @@
|
|||||||
"}\n"
|
"}\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "133e92ee",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Create the table"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
@ -119,7 +161,7 @@
|
|||||||
"sql = db_helpers.create_table(\n",
|
"sql = db_helpers.create_table(\n",
|
||||||
" conn=con,\n",
|
" conn=con,\n",
|
||||||
" cursor=cursor,\n",
|
" cursor=cursor,\n",
|
||||||
" table_name=\"rawdata\",\n",
|
" table_name=table_name,\n",
|
||||||
" columns=columns,\n",
|
" columns=columns,\n",
|
||||||
" constraints=constraints,\n",
|
" constraints=constraints,\n",
|
||||||
" primary_key=primary_key,\n",
|
" primary_key=primary_key,\n",
|
||||||
@ -150,7 +192,7 @@
|
|||||||
"db_helpers.insert_rows_into_table(\n",
|
"db_helpers.insert_rows_into_table(\n",
|
||||||
" conn=con,\n",
|
" conn=con,\n",
|
||||||
" cursor=cursor,\n",
|
" cursor=cursor,\n",
|
||||||
" table_name=\"rawdata\",\n",
|
" table_name=table_name,\n",
|
||||||
" columns=columns_to_insert,\n",
|
" columns=columns_to_insert,\n",
|
||||||
" commit=True\n",
|
" commit=True\n",
|
||||||
")\n"
|
")\n"
|
||||||
@ -163,7 +205,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"a = db_helpers.get_data_from_table(conn=con, table_name='rawdata',columns_list=['*'])"
|
"request = db_helpers.get_data_from_table(conn=con, table_name='rawdata',columns_list=['*'])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -173,7 +215,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"a.head()"
|
"request.head()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -189,7 +231,7 @@
|
|||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "MSY_FS_env",
|
"display_name": "310",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@ -203,7 +245,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.12.12"
|
"version": "3.10.19"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user