Compare commits
12 Commits
main
...
deployment
| Author | SHA1 | Date | |
|---|---|---|---|
| 4eab3c9876 | |||
| 2b01085a9e | |||
| 0088cef32a | |||
| cf88f88814 | |||
| 2a014e1e4e | |||
| 3d86bfe6d0 | |||
| 9b7bb945bc | |||
| a9ff3880e2 | |||
| 5a216b22fd | |||
| 0294d4e584 | |||
| 4f6c3b7370 | |||
| 5f2db4d0c9 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -3,4 +3,5 @@
|
||||
!*.py
|
||||
!*.ipynb
|
||||
!*.md
|
||||
!*.parquet
|
||||
!.gitignore
|
||||
|
||||
259
EDA/EDA.ipynb
259
EDA/EDA.ipynb
@ -1,259 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "7440a5b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import h5py\n",
|
||||
"import os\n",
|
||||
"import warnings\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from pathlib import Path"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2401aaef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"file_path = \"adabase-public-0020-v_0_0_2.h5py\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "46280999",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"SKT_SR = 100\n",
|
||||
"ECG_SR = 500\n",
|
||||
"RSP_SR = 250\n",
|
||||
"EMG_SR = 1000\n",
|
||||
"EDA_SR = 500\n",
|
||||
"EYE_SR = 250"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e23eb552",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_signals = pd.read_hdf(file_path, \"SIGNALS\", mode=\"r\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b7f494d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pd.set_option('display.max_columns', None)\n",
|
||||
"pd.set_option('display.max_rows', None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dd2f4d84",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"settings = df_signals[['STUDY','PHASE','LEVEL']]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1699ddc2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"settings.value_counts()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a4731c56",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Actions units"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9db0b4b2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_signals.columns"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3ceccc89",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"au_data = df_signals.iloc[:,-20:]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3d4ee088",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"au_data.tail()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5d85a8cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(au_data.shape)\n",
|
||||
"print(au_data.isna().sum())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "efff356f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"clean_au_data = au_data.dropna()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "42ed1bcd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"clean_au_data.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2c7c3f14",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for i in range(len(clean_au_data.columns)):\n",
|
||||
" print(clean_au_data.iloc[:,i].unique())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "332740a8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Plots"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f30b8814",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# df_signals_ecg = pd.read_hdf(file_path, \"SIGNALS\", mode=\"r\", columns=[\"STUDY\",\"LEVEL\", \"PHASE\", 'RAW_ECG_I'])\n",
|
||||
"df_signals_ecg = df_signals[[\"STUDY\",\"LEVEL\", \"PHASE\", 'RAW_ECG_I']]\n",
|
||||
"df_signals_ecg.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ee80fd79",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"study_filter = df_signals[\"STUDY\"] == \"n-back\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3ef29446",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, ax = plt.subplots(figsize=(16, 2))\n",
|
||||
"# Set the number of seconds to plot\n",
|
||||
"seconds = 20\n",
|
||||
"# Get the ECG signal data\n",
|
||||
"ecg_signal = df_signals.loc[study_filter, \"RAW_ECG_I\"].dropna()\n",
|
||||
"# Set the x-axis limits to the number of samples in the specified time range\n",
|
||||
"num_samples = ECG_SR * seconds\n",
|
||||
"# Plot the ECG signal\n",
|
||||
"ax.plot(ecg_signal.index[:num_samples]/1000, ecg_signal[:num_samples]);\n",
|
||||
"ax.set_title(\"ECG I\");\n",
|
||||
"ax.set_xlabel('Seconds');\n",
|
||||
"# Set figure size with a 16:6 aspect ratio\n",
|
||||
"fig, ax = plt.subplots(figsize=(16, 2))\n",
|
||||
"# Set the number of seconds to plot\n",
|
||||
"start_second = 0\n",
|
||||
"end_second = 60*30\n",
|
||||
"# Get the EYE signal data - we replace inf with nan to get the original signal.␣\n",
|
||||
"\n",
|
||||
"eye_left_signal = df_signals.loc[study_filter, \"LEFT_PUPIL_DIAMETER\"].dropna()\n",
|
||||
"eye_right_signal = df_signals.loc[study_filter, \"RIGHT_PUPIL_DIAMETER\"].dropna()\n",
|
||||
"#eye_left_signal = df_signals.loc[:, \"LEFT_PUPIL_DIAMETER\"].replace([np.inf],␣\n",
|
||||
"\n",
|
||||
"#eye_right_signal = df_signals.loc[:, \"RIGHT_PUPIL_DIAMETER\"].replace([np.inf],␣\n",
|
||||
"\n",
|
||||
"# Set the x-axis limits to the number of samples in the specified time range\n",
|
||||
"num_samples_start = EYE_SR * start_second\n",
|
||||
"num_samples_end = EYE_SR * end_second\n",
|
||||
"ax.plot(eye_left_signal.index[num_samples_start:num_samples_end]/1000,eye_left_signal[num_samples_start:num_samples_end], label=\"Left\")\n",
|
||||
"ax.plot(eye_right_signal.index[num_samples_start:num_samples_end]/1000,eye_right_signal[num_samples_start:num_samples_end], label=\"Right\")\n",
|
||||
"ax.set_title(\"Pupil Dilation\")\n",
|
||||
"ax.set_xlabel('Seconds')\n",
|
||||
"ax.legend()\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@ -1,625 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "89d81009",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7440a5b3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from pathlib import Path\n",
|
||||
"from sklearn.preprocessing import StandardScaler, MinMaxScaler"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09b7d707",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Config"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2401aaef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")\n",
|
||||
"# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/60s_combined_dataset_25hz.parquet\")\n",
|
||||
"# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/120s_combined_dataset_25hz.parquet\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0282b0b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"FILTER_MAD = True\n",
|
||||
"THRESHOLD = 3.5\n",
|
||||
"METHOD = 'minmax'\n",
|
||||
"SCOPE = 'subject'\n",
|
||||
"FILTER_SUBSETS = True"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a8f1716b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Calculations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ac32444a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.read_parquet(dataset_path)\n",
|
||||
"df.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3ba4401c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if(FILTER_SUBSETS):\n",
|
||||
" # Special filter: Keep only specific subsets\n",
|
||||
"# - k-drive L1 baseline\n",
|
||||
"# - n-back L1 baseline \n",
|
||||
"# - k-drive test with levels 1, 2, 3\n",
|
||||
"\n",
|
||||
" df = df[\n",
|
||||
" (\n",
|
||||
" # k-drive L1 baseline\n",
|
||||
" ((df['STUDY'] == 'k-drive') & \n",
|
||||
" (df['LEVEL'] == 1) & \n",
|
||||
" (df['PHASE'] == 'baseline'))\n",
|
||||
" ) | \n",
|
||||
" (\n",
|
||||
" # n-back L1 baseline\n",
|
||||
" ((df['STUDY'] == 'n-back') & \n",
|
||||
" (df['LEVEL'] == 1) & \n",
|
||||
" (df['PHASE'] == 'baseline'))\n",
|
||||
" ) | \n",
|
||||
" (\n",
|
||||
" # k-drive test with levels 1, 2, 3\n",
|
||||
" ((df['STUDY'] == 'k-drive') & \n",
|
||||
" (df['LEVEL'].isin([1, 2, 3])) & \n",
|
||||
" (df['PHASE'] == 'test'))\n",
|
||||
" )].copy()\n",
|
||||
"\n",
|
||||
"print(f\"Filtered dataframe shape: {df.shape}\")\n",
|
||||
"print(f\"Remaining subsets: {df.groupby(['STUDY', 'LEVEL', 'PHASE']).size()}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "77dbd6df",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"face_au_cols = [c for c in df.columns if c.startswith(\"FACE_AU\")]\n",
|
||||
"eye_cols = ['Fix_count_short_66_150', 'Fix_count_medium_300_500',\n",
|
||||
" 'Fix_count_long_gt_1000', 'Fix_count_100', 'Fix_mean_duration',\n",
|
||||
" 'Fix_median_duration', 'Sac_count', 'Sac_mean_amp', 'Sac_mean_dur',\n",
|
||||
" 'Sac_median_dur', 'Blink_count', 'Blink_mean_dur', 'Blink_median_dur',\n",
|
||||
" 'Pupil_mean', 'Pupil_IPA']\n",
|
||||
"eye_cols_without_blink = ['Fix_count_short_66_150', 'Fix_count_medium_300_500',\n",
|
||||
" 'Fix_count_long_gt_1000', 'Fix_count_100', 'Fix_mean_duration',\n",
|
||||
" 'Fix_median_duration', 'Sac_count', 'Sac_mean_amp', 'Sac_mean_dur',\n",
|
||||
" 'Sac_median_dur', 'Pupil_mean', 'Pupil_IPA']\n",
|
||||
"print(len(eye_cols))\n",
|
||||
"all_signal_columns = eye_cols+face_au_cols\n",
|
||||
"print(len(all_signal_columns))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d5e9c67a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"MAD"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "592291ef",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def calculate_mad_params(df, columns):\n",
|
||||
" \"\"\"\n",
|
||||
" Calculate median and MAD parameters for each column.\n",
|
||||
" This should be run ONLY on the training data.\n",
|
||||
" \n",
|
||||
" Returns a dictionary: {col: (median, mad)}\n",
|
||||
" \"\"\"\n",
|
||||
" params = {}\n",
|
||||
" for col in columns:\n",
|
||||
" median = df[col].median()\n",
|
||||
" mad = np.median(np.abs(df[col] - median))\n",
|
||||
" params[col] = (median, mad)\n",
|
||||
" return params\n",
|
||||
"def apply_mad_filter(df, params, threshold=3.5):\n",
|
||||
" \"\"\"\n",
|
||||
" Apply MAD-based outlier removal using precomputed parameters.\n",
|
||||
" Works on training, validation, and test data.\n",
|
||||
" \n",
|
||||
" df: DataFrame to filter\n",
|
||||
" params: dictionary {col: (median, mad)} from training data\n",
|
||||
" threshold: cutoff for robust Z-score\n",
|
||||
" \"\"\"\n",
|
||||
" df_clean = df.copy()\n",
|
||||
"\n",
|
||||
" for col, (median, mad) in params.items():\n",
|
||||
" if mad == 0:\n",
|
||||
" continue # no spread; nothing to remove for this column\n",
|
||||
"\n",
|
||||
" robust_z = 0.6745 * (df_clean[col] - median) / mad\n",
|
||||
" outlier_mask = np.abs(robust_z) > threshold\n",
|
||||
"\n",
|
||||
" # Remove values only in this specific column\n",
|
||||
" df_clean.loc[outlier_mask, col] = median\n",
|
||||
" \n",
|
||||
" \n",
|
||||
" print(df_clean.shape)\n",
|
||||
" return df_clean"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ddad4a8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"if(FILTER_MAD):\n",
|
||||
" mad_params = calculate_mad_params(df, all_signal_columns)\n",
|
||||
" df = apply_mad_filter(df, mad_params, THRESHOLD)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "89387879",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Normalizer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9c129cdd",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def fit_normalizer(train_data, au_columns, method='standard', scope='global'):\n",
|
||||
" \"\"\"\n",
|
||||
" Fit normalization scalers on training data.\n",
|
||||
" \n",
|
||||
" Parameters:\n",
|
||||
" -----------\n",
|
||||
" train_data : pd.DataFrame\n",
|
||||
" Training dataframe with AU columns and subjectID\n",
|
||||
" au_columns : list\n",
|
||||
" List of AU column names to normalize\n",
|
||||
" method : str, default='standard'\n",
|
||||
" Normalization method: 'standard' for StandardScaler or 'minmax' for MinMaxScaler\n",
|
||||
" scope : str, default='global'\n",
|
||||
" Normalization scope: 'subject' for per-subject or 'global' for across all subjects\n",
|
||||
" \n",
|
||||
" Returns:\n",
|
||||
" --------\n",
|
||||
" dict\n",
|
||||
" Dictionary containing fitted scalers and statistics for new subjects\n",
|
||||
" \"\"\"\n",
|
||||
" if method == 'standard':\n",
|
||||
" Scaler = StandardScaler\n",
|
||||
" elif method == 'minmax':\n",
|
||||
" Scaler = MinMaxScaler\n",
|
||||
" else:\n",
|
||||
" raise ValueError(\"method must be 'standard' or 'minmax'\")\n",
|
||||
" \n",
|
||||
" scalers = {}\n",
|
||||
" if scope == 'subject':\n",
|
||||
" # Fit one scaler per subject\n",
|
||||
" subject_stats = []\n",
|
||||
" \n",
|
||||
" for subject in train_data['subjectID'].unique():\n",
|
||||
" subject_mask = train_data['subjectID'] == subject\n",
|
||||
" scaler = Scaler()\n",
|
||||
" scaler.fit(train_data.loc[subject_mask, au_columns].values)\n",
|
||||
" scalers[subject] = scaler\n",
|
||||
" \n",
|
||||
" # Store statistics for averaging\n",
|
||||
" if method == 'standard':\n",
|
||||
" subject_stats.append({\n",
|
||||
" 'mean': scaler.mean_,\n",
|
||||
" 'std': scaler.scale_\n",
|
||||
" })\n",
|
||||
" elif method == 'minmax':\n",
|
||||
" subject_stats.append({\n",
|
||||
" 'min': scaler.data_min_,\n",
|
||||
" 'max': scaler.data_max_\n",
|
||||
" })\n",
|
||||
" \n",
|
||||
" # Calculate average statistics for new subjects\n",
|
||||
" if method == 'standard':\n",
|
||||
" avg_mean = np.mean([s['mean'] for s in subject_stats], axis=0)\n",
|
||||
" avg_std = np.mean([s['std'] for s in subject_stats], axis=0)\n",
|
||||
" fallback_scaler = StandardScaler()\n",
|
||||
" fallback_scaler.mean_ = avg_mean\n",
|
||||
" fallback_scaler.scale_ = avg_std\n",
|
||||
" fallback_scaler.var_ = avg_std ** 2\n",
|
||||
" fallback_scaler.n_features_in_ = len(au_columns)\n",
|
||||
" elif method == 'minmax':\n",
|
||||
" avg_min = np.mean([s['min'] for s in subject_stats], axis=0)\n",
|
||||
" avg_max = np.mean([s['max'] for s in subject_stats], axis=0)\n",
|
||||
" fallback_scaler = MinMaxScaler()\n",
|
||||
" fallback_scaler.data_min_ = avg_min\n",
|
||||
" fallback_scaler.data_max_ = avg_max\n",
|
||||
" fallback_scaler.data_range_ = avg_max - avg_min\n",
|
||||
" fallback_scaler.scale_ = 1.0 / fallback_scaler.data_range_\n",
|
||||
" fallback_scaler.min_ = -avg_min * fallback_scaler.scale_\n",
|
||||
" fallback_scaler.n_features_in_ = len(au_columns)\n",
|
||||
" \n",
|
||||
" scalers['_fallback'] = fallback_scaler\n",
|
||||
" \n",
|
||||
" elif scope == 'global':\n",
|
||||
" # Fit one scaler for all subjects\n",
|
||||
" scaler = Scaler()\n",
|
||||
" scaler.fit(train_data[au_columns].values)\n",
|
||||
" scalers['global'] = scaler\n",
|
||||
" \n",
|
||||
" else:\n",
|
||||
" raise ValueError(\"scope must be 'subject' or 'global'\")\n",
|
||||
" \n",
|
||||
" return {'scalers': scalers, 'method': method, 'scope': scope}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9cfabd37",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def apply_normalizer(data, columns, normalizer_dict):\n",
|
||||
" \"\"\"\n",
|
||||
" Apply fitted normalization scalers to data.\n",
|
||||
" \n",
|
||||
" Parameters:\n",
|
||||
" -----------\n",
|
||||
" data : pd.DataFrame\n",
|
||||
" Dataframe with AU columns and subjectID\n",
|
||||
" au_columns : list\n",
|
||||
" List of AU column names to normalize\n",
|
||||
" normalizer_dict : dict\n",
|
||||
" Dictionary containing fitted scalers from fit_normalizer()\n",
|
||||
" \n",
|
||||
" Returns:\n",
|
||||
" --------\n",
|
||||
" pd.DataFrame\n",
|
||||
" DataFrame with normalized AU columns\n",
|
||||
" \"\"\"\n",
|
||||
" normalized_data = data.copy()\n",
|
||||
" scalers = normalizer_dict['scalers']\n",
|
||||
" scope = normalizer_dict['scope']\n",
|
||||
" normalized_data[columns] = normalized_data[columns].astype(np.float64)\n",
|
||||
"\n",
|
||||
" if scope == 'subject':\n",
|
||||
" # Apply per-subject normalization\n",
|
||||
" for subject in data['subjectID'].unique():\n",
|
||||
" subject_mask = data['subjectID'] == subject\n",
|
||||
" \n",
|
||||
" # Use the subject's scaler if available, otherwise use fallback\n",
|
||||
" if subject in scalers:\n",
|
||||
" scaler = scalers[subject]\n",
|
||||
" else:\n",
|
||||
" # Use averaged scaler for new subjects\n",
|
||||
" scaler = scalers['_fallback']\n",
|
||||
" print(f\"Info: Subject {subject} not in training data. Using averaged scaler from training subjects.\")\n",
|
||||
" \n",
|
||||
" normalized_data.loc[subject_mask, columns] = scaler.transform(\n",
|
||||
" data.loc[subject_mask, columns].values\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" elif scope == 'global':\n",
|
||||
" # Apply global normalization\n",
|
||||
" scaler = scalers['global']\n",
|
||||
" normalized_data[columns] = scaler.transform(data[columns].values)\n",
|
||||
" \n",
|
||||
" return normalized_data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4dbbebf7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scaler = fit_normalizer(df, all_signal_columns, method=METHOD, scope=SCOPE)\n",
|
||||
"df_min_max_normalised = apply_normalizer(df, all_signal_columns, scaler)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6b9b2ae8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a= df_min_max_normalised[['STUDY','LEVEL','PHASE']]\n",
|
||||
"print(a.dtypes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e3e1bc34",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Define signal columns (adjust only once)\n",
|
||||
"signal_columns = all_signal_columns\n",
|
||||
"\n",
|
||||
"# Get all unique combinations of STUDY, LEVEL and PHASE\n",
|
||||
"unique_combinations = df_min_max_normalised[['STUDY', 'LEVEL', 'PHASE']].drop_duplicates().reset_index(drop=True)\n",
|
||||
"\n",
|
||||
"# Dictionary to store subsets\n",
|
||||
"subsets = {}\n",
|
||||
"subset_sizes = {}\n",
|
||||
"\n",
|
||||
"for idx, row in unique_combinations.iterrows():\n",
|
||||
" study = row['STUDY']\n",
|
||||
" level = row['LEVEL']\n",
|
||||
" phase = row['PHASE']\n",
|
||||
" key = f\"{study}_L{level}_P{phase}\"\n",
|
||||
" subset = df_min_max_normalised[\n",
|
||||
" (df_min_max_normalised['STUDY'] == study) & \n",
|
||||
" (df_min_max_normalised['LEVEL'] == level) & \n",
|
||||
" (df_min_max_normalised['PHASE'] == phase)\n",
|
||||
" ]\n",
|
||||
" subsets[key] = subset\n",
|
||||
" subset_sizes[key] = len(subset)\n",
|
||||
"\n",
|
||||
"# Output subset sizes\n",
|
||||
"print(\"Number of samples per subset:\")\n",
|
||||
"print(\"=\" * 40)\n",
|
||||
"for key, size in subset_sizes.items():\n",
|
||||
" print(f\"{key}: {size} samples\")\n",
|
||||
"print(\"=\" * 40)\n",
|
||||
"print(f\"Total number of subsets: {len(subsets)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c7fdeb5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"# Function to categorize subsets\n",
|
||||
"def categorize_subset(key):\n",
|
||||
" \"\"\"Categorizes a subset as 'low' or 'high' based on the given logic\"\"\"\n",
|
||||
" parts = key.split('_')\n",
|
||||
" study = parts[0]\n",
|
||||
" level = int(parts[1][1:]) # 'L1' -> 1\n",
|
||||
" phase = parts[2][1:] # 'Pbaseline' -> 'baseline'\n",
|
||||
" \n",
|
||||
" # LOW: baseline OR (n-back with level 1 or 4)\n",
|
||||
" if phase == \"baseline\":\n",
|
||||
" return 'low'\n",
|
||||
" elif study == \"n-back\" and level in [1, 4]:\n",
|
||||
" return 'low'\n",
|
||||
" \n",
|
||||
" # HIGH: (n-back with level 2,3,5,6 and phase train/test) OR (k-drive not baseline)\n",
|
||||
" elif study == \"n-back\" and level in [2, 3, 5, 6] and phase in [\"train\", \"test\"]:\n",
|
||||
" return 'high'\n",
|
||||
" elif study == \"k-drive\" and phase != \"baseline\":\n",
|
||||
" return 'high'\n",
|
||||
" \n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
"# Categorize subsets\n",
|
||||
"low_subsets = {}\n",
|
||||
"high_subsets = {}\n",
|
||||
"\n",
|
||||
"for key, subset in subsets.items():\n",
|
||||
" category = categorize_subset(key)\n",
|
||||
" if category == 'low':\n",
|
||||
" low_subsets[key] = subset\n",
|
||||
" elif category == 'high':\n",
|
||||
" high_subsets[key] = subset\n",
|
||||
"\n",
|
||||
"# Output statistics\n",
|
||||
"print(\"\\n\" + \"=\" * 50)\n",
|
||||
"print(\"SUBSET CATEGORIZATION\")\n",
|
||||
"print(\"=\" * 50)\n",
|
||||
"\n",
|
||||
"print(\"\\nLOW subsets (Blue):\")\n",
|
||||
"print(\"-\" * 50)\n",
|
||||
"low_total = 0\n",
|
||||
"for key in sorted(low_subsets.keys()):\n",
|
||||
" size = subset_sizes[key]\n",
|
||||
" low_total += size\n",
|
||||
" print(f\" {key}: {size} samples\")\n",
|
||||
"print(f\"{'TOTAL LOW:':<30} {low_total} samples\")\n",
|
||||
"print(f\"{'NUMBER OF LOW SUBSETS:':<30} {len(low_subsets)}\")\n",
|
||||
"\n",
|
||||
"print(\"\\nHIGH subsets (Red):\")\n",
|
||||
"print(\"-\" * 50)\n",
|
||||
"high_total = 0\n",
|
||||
"for key in sorted(high_subsets.keys()):\n",
|
||||
" size = subset_sizes[key]\n",
|
||||
" high_total += size\n",
|
||||
" print(f\" {key}: {size} samples\")\n",
|
||||
"print(f\"{'TOTAL HIGH:':<30} {high_total} samples\")\n",
|
||||
"print(f\"{'NUMBER OF HIGH SUBSETS:':<30} {len(high_subsets)}\")\n",
|
||||
"\n",
|
||||
"print(\"\\n\" + \"=\" * 50)\n",
|
||||
"print(f\"TOTAL SAMPLES: {low_total + high_total}\")\n",
|
||||
"print(f\"TOTAL SUBSETS: {len(low_subsets) + len(high_subsets)}\")\n",
|
||||
"print(\"=\" * 50)\n",
|
||||
"\n",
|
||||
"# Find minimum subset size\n",
|
||||
"min_subset_size = min(subset_sizes.values())\n",
|
||||
"print(f\"\\nMinimum subset size: {min_subset_size}\")\n",
|
||||
"\n",
|
||||
"# Number of points to plot per subset (50% of minimum size)\n",
|
||||
"sampling_factor = 1\n",
|
||||
"n_samples_per_subset = int(sampling_factor * min_subset_size)\n",
|
||||
"print(f\"Number of randomly drawn points per subset: {n_samples_per_subset}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ff363fc5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Plot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3a9d9163",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Create comparison plots\n",
|
||||
"fig, axes = plt.subplots(len(signal_columns), 1, figsize=(14, 4 * len(signal_columns)))\n",
|
||||
"\n",
|
||||
"# If only one signal column exists, convert axes to list\n",
|
||||
"if len(signal_columns) == 1:\n",
|
||||
" axes = [axes]\n",
|
||||
"\n",
|
||||
"# Create a plot for each signal column\n",
|
||||
"for i, signal_col in enumerate(signal_columns):\n",
|
||||
" ax = axes[i]\n",
|
||||
" \n",
|
||||
" y_pos = 0\n",
|
||||
" labels = []\n",
|
||||
" \n",
|
||||
" # First plot all LOW subsets (sorted, blue)\n",
|
||||
" for label in sorted(low_subsets.keys()):\n",
|
||||
" subset = low_subsets[label]\n",
|
||||
" if len(subset) > 0 and signal_col in subset.columns:\n",
|
||||
" # Draw random sample\n",
|
||||
" n_samples = min(n_samples_per_subset, len(subset))\n",
|
||||
" sampled_data = subset[signal_col].sample(n=n_samples, random_state=42)\n",
|
||||
" \n",
|
||||
" # Calculate mean and median\n",
|
||||
" mean_val = subset[signal_col].mean()\n",
|
||||
" median_val = subset[signal_col].median()\n",
|
||||
" \n",
|
||||
" # Plot points in blue\n",
|
||||
" ax.scatter(sampled_data, [y_pos] * len(sampled_data), \n",
|
||||
" alpha=0.5, s=30, color='blue')\n",
|
||||
" \n",
|
||||
" # Mean as black cross\n",
|
||||
" ax.plot(mean_val, y_pos, 'x', markersize=12, markeredgewidth=3, \n",
|
||||
" color='black', zorder=5)\n",
|
||||
" \n",
|
||||
" # Median as brown cross\n",
|
||||
" ax.plot(median_val, y_pos, 'x', markersize=12, markeredgewidth=3, \n",
|
||||
" color='brown', zorder=5)\n",
|
||||
" \n",
|
||||
" labels.append(f\"{label} (n={subset_sizes[label]})\")\n",
|
||||
" y_pos += 1\n",
|
||||
" \n",
|
||||
" # Separation line between LOW and HIGH\n",
|
||||
" if len(low_subsets) > 0 and len(high_subsets) > 0:\n",
|
||||
" ax.axhline(y=y_pos - 0.5, color='gray', linestyle='--', linewidth=2, alpha=0.7)\n",
|
||||
" \n",
|
||||
" # Then plot all HIGH subsets (sorted, red)\n",
|
||||
" for label in sorted(high_subsets.keys()):\n",
|
||||
" subset = high_subsets[label]\n",
|
||||
" if len(subset) > 0 and signal_col in subset.columns:\n",
|
||||
" # Draw random sample\n",
|
||||
" n_samples = min(n_samples_per_subset, len(subset))\n",
|
||||
" sampled_data = subset[signal_col].sample(n=n_samples, random_state=42)\n",
|
||||
" \n",
|
||||
" # Calculate mean and median\n",
|
||||
" mean_val = subset[signal_col].mean()\n",
|
||||
" median_val = subset[signal_col].median()\n",
|
||||
" \n",
|
||||
" # Plot points in red\n",
|
||||
" ax.scatter(sampled_data, [y_pos] * len(sampled_data), \n",
|
||||
" alpha=0.5, s=30, color='red')\n",
|
||||
" \n",
|
||||
" # Mean as black cross\n",
|
||||
" ax.plot(mean_val, y_pos, 'x', markersize=12, markeredgewidth=3, \n",
|
||||
" color='black', zorder=5)\n",
|
||||
" \n",
|
||||
" # Median as brown cross\n",
|
||||
" ax.plot(median_val, y_pos, 'x', markersize=12, markeredgewidth=3, \n",
|
||||
" color='brown', zorder=5)\n",
|
||||
" \n",
|
||||
" labels.append(f\"{label} (n={subset_sizes[label]})\")\n",
|
||||
" y_pos += 1\n",
|
||||
" \n",
|
||||
" ax.set_yticks(range(len(labels)))\n",
|
||||
" ax.set_yticklabels(labels)\n",
|
||||
" ax.set_xlabel(f'{signal_col} value')\n",
|
||||
" ax.set_title(f'{signal_col}: LOW (Blue) vs HIGH (Red) | {n_samples_per_subset} points/subset | Black X = Mean, Brown X = Median')\n",
|
||||
" ax.grid(True, alpha=0.3, axis='x')\n",
|
||||
" ax.axvline(0, color='gray', linestyle='--', alpha=0.5)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"print(f\"\\nNote: {n_samples_per_subset} random points were plotted per subset.\")\n",
|
||||
"print(\"Blue points = LOW subsets | Red points = HIGH subsets\")\n",
|
||||
"print(\"Black 'X' = Mean of entire subset | Brown 'X' = Median of entire subset\")\n",
|
||||
"print(f\"Total subsets plotted: {len(low_subsets)} LOW + {len(high_subsets)} HIGH = {len(low_subsets) + len(high_subsets)} subsets\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@ -1,166 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1014c5e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e42f3011",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0a834496",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"path =r\"C:\\Users\\micha\\FAUbox\\WS2526_Fahrsimulator_MSY (Celina Korzer)\\AU_dataset\\output_windowed.parquet\"\n",
|
||||
"df = pd.read_parquet(path=path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aa4759fa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"high_nback = df[\n",
|
||||
" (df[\"STUDY\"]==\"n-back\") &\n",
|
||||
" (df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n",
|
||||
" (df[\"PHASE\"].isin([\"train\", \"test\"]))\n",
|
||||
"]\n",
|
||||
"high_nback.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a2aa0596",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"low_all = df[\n",
|
||||
" ((df[\"PHASE\"] == \"baseline\") |\n",
|
||||
" ((df[\"STUDY\"] == \"n-back\") & (df[\"PHASE\"] != \"baseline\") & (df[\"LEVEL\"].isin([1,4]))))\n",
|
||||
"]\n",
|
||||
"print(low_all.shape)\n",
|
||||
"high_kdrive = df[\n",
|
||||
" (df[\"STUDY\"] == \"k-drive\") & (df[\"PHASE\"] != \"baseline\")\n",
|
||||
"]\n",
|
||||
"print(high_kdrive.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f7d446a1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print((df.shape[0]==(high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0])))\n",
|
||||
"print(df.shape[0])\n",
|
||||
"print((high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0]))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "474e144a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"high_all = pd.concat([high_nback, high_kdrive])\n",
|
||||
"high_all.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5dd585c2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0bd39d9f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Get all columns that start with 'AU'\n",
|
||||
"au_columns = [col for col in low_all.columns if col.startswith('AU')]\n",
|
||||
"\n",
|
||||
"# Calculate number of rows and columns for subplots\n",
|
||||
"n_cols = len(au_columns)\n",
|
||||
"n_rows = 4\n",
|
||||
"n_cols_subplot = 5\n",
|
||||
"\n",
|
||||
"# Create figure with subplots\n",
|
||||
"fig, axes = plt.subplots(n_rows, n_cols_subplot, figsize=(20, 16))\n",
|
||||
"axes = axes.flatten()\n",
|
||||
"fig.suptitle('Action Unit (AU) Distributions: Low vs High', fontsize=20, fontweight='bold', y=0.995)\n",
|
||||
"\n",
|
||||
"# Create histogram for each AU column\n",
|
||||
"for idx, col in enumerate(au_columns):\n",
|
||||
" ax = axes[idx]\n",
|
||||
" \n",
|
||||
" # Plot overlapping histograms\n",
|
||||
" ax.hist(low_all[col].dropna(), bins=30, alpha=0.6, color='blue', label='low_all', edgecolor='black')\n",
|
||||
" ax.hist(high_all[col].dropna(), bins=30, alpha=0.6, color='red', label='high_all', edgecolor='black')\n",
|
||||
" \n",
|
||||
" # Set title and labels\n",
|
||||
" ax.set_title(col, fontsize=10, fontweight='bold')\n",
|
||||
" ax.set_xlabel('Value', fontsize=8)\n",
|
||||
" ax.set_ylabel('Frequency', fontsize=8)\n",
|
||||
" ax.legend(fontsize=8)\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"# Hide any unused subplots\n",
|
||||
"for idx in range(len(au_columns), len(axes)):\n",
|
||||
" axes[idx].set_visible(False)\n",
|
||||
"\n",
|
||||
"# Adjust layout\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@ -1,157 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aab6b326-a583-47ad-8bb7-723c2fddcc63",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# %pip install pyocclient\n",
|
||||
"import yaml\n",
|
||||
"import owncloud\n",
|
||||
"import pandas as pd\n",
|
||||
"import time"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4f42846c-27c3-4394-a40a-e22d73c2902e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"start = time.time()\n",
|
||||
"\n",
|
||||
"with open(\"../login.yaml\") as f:\n",
|
||||
" cfg = yaml.safe_load(f)\n",
|
||||
"url, password = cfg[0][\"url\"], cfg[1][\"password\"]\n",
|
||||
"file = \"adabase-public-0022-v_0_0_2.h5py\"\n",
|
||||
"oc = owncloud.Client.from_public_link(url, folder_password=password)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"oc.get_file(file, \"tmp22.h5\")\n",
|
||||
"\n",
|
||||
"end = time.time()\n",
|
||||
"print(end - start)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3714dec2-85d0-4f76-af46-ea45ebec2fa3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"start = time.time()\n",
|
||||
"df_performance = pd.read_hdf(\"tmp22.h5\", \"PERFORMANCE\")\n",
|
||||
"end = time.time()\n",
|
||||
"print(end - start)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f50e97d0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(22)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c131c816",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_performance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6ae47e52-ad86-4f8d-b929-0080dc99f646",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"start = time.time()\n",
|
||||
"df_4_col = pd.read_hdf(\"tmp.h5\", \"SIGNALS\", mode=\"r\", columns=[\"STUDY\"], start=0, stop=1)\n",
|
||||
"end = time.time()\n",
|
||||
"print(end - start)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7c139f3a-ede8-4530-957d-d1bb939f6cb5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_4_col.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a68d58ea-65f2-46c4-a2b2-8c3447c715d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_4_col.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "95aa4523-3784-4ab6-bf92-0227ce60e863",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_4_col.info()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "defbcaf4-ad1b-453f-9b48-ab0ecfc4b5d5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_4_col.isna().sum()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "72313895-c478-44a5-9108-00b0bec01bb8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@ -1,213 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8fb02733",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "96f3b128",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import yaml\n",
|
||||
"import owncloud\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c20cee7c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Connection to Owncloud"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c4c94558",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Load credentials\n",
|
||||
"with open(\"../login.yaml\") as f:\n",
|
||||
" cfg = yaml.safe_load(f)\n",
|
||||
" \n",
|
||||
"url, password = cfg[0][\"url\"], cfg[1][\"password\"]\n",
|
||||
"\n",
|
||||
"# Connect once\n",
|
||||
"oc = owncloud.Client.from_public_link(url, folder_password=password)\n",
|
||||
"# File pattern\n",
|
||||
"# base = \"adabase-public-{num:04d}-v_0_0_2.h5py\"\n",
|
||||
"base = \"{num:04d}-*.h5py\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "07c03d07",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"num_files = 2 # number of files to process (min: 1, max: 30)\n",
|
||||
"performance_data = []\n",
|
||||
"\n",
|
||||
"for i in range(num_files):\n",
|
||||
" file_pattern = f\"{i:04d}-*\"\n",
|
||||
" \n",
|
||||
" # Get list of files matching the pattern\n",
|
||||
" files = oc.list('.')\n",
|
||||
" matching_files = [f.get_name() for f in files if f.get_name().startswith(f\"{i:04d}-\")]\n",
|
||||
" \n",
|
||||
" if matching_files:\n",
|
||||
" file_name = matching_files[0] # Take the first matching file\n",
|
||||
" local_tmp = f\"tmp_{i:04d}.h5\"\n",
|
||||
" \n",
|
||||
" oc.get_file(file_name, local_tmp)\n",
|
||||
" print(f\"{file_name} geöffnet\")\n",
|
||||
" else:\n",
|
||||
" print(f\"Keine Datei gefunden für Muster: {file_pattern}\")\n",
|
||||
" # file_name = base.format(num=i)\n",
|
||||
" # local_tmp = f\"tmp_{i:04d}.h5\"\n",
|
||||
"\n",
|
||||
" # oc.get_file(file_name, local_tmp)\n",
|
||||
" # print(f\"{file_name} geöffnet\")\n",
|
||||
"\n",
|
||||
" # check SIGNALS table for AUs\n",
|
||||
" with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
|
||||
" cols = store.select(\"SIGNALS\", start=0, stop=1).columns\n",
|
||||
" au_cols = [c for c in cols if c.startswith(\"AU\")]\n",
|
||||
" if not au_cols:\n",
|
||||
" print(f\"Subject {i} enthält keine AUs\")\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" # load performance table\n",
|
||||
" with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
|
||||
" perf_df = store.select(\"PERFORMANCE\")\n",
|
||||
"\n",
|
||||
" f1_cols = [c for c in [\"AUDITIVE F1\", \"VISUAL F1\", \"F1\"] if c in perf_df.columns]\n",
|
||||
" if not f1_cols:\n",
|
||||
" print(f\"Subject {i}: keine F1-Spalten gefunden\")\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" subject_entry = {\"subjectID\": i}\n",
|
||||
" valid_scores = []\n",
|
||||
"\n",
|
||||
" # iterate rows: each (study, level, phase)\n",
|
||||
" for _, row in perf_df.iterrows():\n",
|
||||
" study, level, phase = row[\"STUDY\"], row[\"LEVEL\"], row[\"PHASE\"]\n",
|
||||
" col_name = f\"STUDY_{study}_LEVEL_{level}_PHASE_{phase}\"\n",
|
||||
"\n",
|
||||
" # collect valid F1 values among the three columns\n",
|
||||
" scores = [row[c] for c in f1_cols if pd.notna(row[c])]\n",
|
||||
" if scores:\n",
|
||||
" mean_score = float(np.mean(scores))\n",
|
||||
" subject_entry[col_name] = mean_score\n",
|
||||
" valid_scores.extend(scores)\n",
|
||||
"\n",
|
||||
" # compute overall average across all valid combinations\n",
|
||||
" if valid_scores:\n",
|
||||
" subject_entry[\"overall_score\"] = float(np.mean(valid_scores))\n",
|
||||
" performance_data.append(subject_entry)\n",
|
||||
" print(f\"Subject {i}: {len(valid_scores)} gültige Scores, Overall = {subject_entry['overall_score']:.3f}\")\n",
|
||||
" else:\n",
|
||||
" print(f\"Subject {i}: keine gültigen F1-Scores\")\n",
|
||||
"\n",
|
||||
"# build dataframe\n",
|
||||
"if performance_data:\n",
|
||||
" performance_df = pd.DataFrame(performance_data)\n",
|
||||
" combination_cols = sorted([c for c in performance_df.columns if c.startswith(\"STUDY_\")])\n",
|
||||
" final_cols = [\"subjectID\", \"overall_score\"] + combination_cols\n",
|
||||
" performance_df = performance_df[final_cols]\n",
|
||||
" performance_df.to_csv(\"n_au_performance.csv\", index=False)\n",
|
||||
"\n",
|
||||
" print(f\"\\nGesamt Subjects mit Action Units: {len(performance_df)}\")\n",
|
||||
"else:\n",
|
||||
" print(\"Keine gültigen Daten gefunden.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0bcaf065",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"performance_df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "db95eea7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with pd.HDFStore(\"tmp_0000.h5\", mode=\"r\") as store:\n",
|
||||
" md = store.select(\"META\")\n",
|
||||
"print(\"File 0:\")\n",
|
||||
"print(md)\n",
|
||||
"with pd.HDFStore(\"tmp_0001.h5\", mode=\"r\") as store:\n",
|
||||
" md = store.select(\"META\")\n",
|
||||
"print(\"File 1\")\n",
|
||||
"print(md)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8067036b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pd.set_option('display.max_columns', None)\n",
|
||||
"pd.set_option('display.max_rows', None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f18e7385",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with pd.HDFStore(\"tmp_0000.h5\", mode=\"r\") as store:\n",
|
||||
" md = store.select(\"SIGNALS\", start=0, stop=1)\n",
|
||||
"print(\"File 0:\")\n",
|
||||
"md.head()\n",
|
||||
"# with pd.HDFStore(\"tmp_0001.h5\", mode=\"r\",start=0, stop=1) as store:\n",
|
||||
"# md = store.select(\"SIGNALS\")\n",
|
||||
"# print(\"File 1\")\n",
|
||||
"# print(md.columns)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@ -3,11 +3,11 @@ from feat.utils.io import get_test_data_path
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
import os
|
||||
|
||||
def extract_aus(path, model, skip_frames):
|
||||
def extract_aus(path, model):
|
||||
detector = Detector(au_model=model)
|
||||
|
||||
video_prediction = detector.detect(
|
||||
path, data_type="video", skip_frames=skip_frames, face_detection_threshold=0.95 # alle 5 Sekunden einbeziehen - 24 Frames pro Sekunde
|
||||
path, data_type="video", skip_frames=24*5, face_detection_threshold=0.95 # alle 5 Sekunden einbeziehen - 24 Frames pro Sekunde
|
||||
)
|
||||
|
||||
return video_prediction.aus.sum()
|
||||
@ -38,13 +38,13 @@ def split_video(path, chunk_length=120):
|
||||
|
||||
return output_path
|
||||
|
||||
# def start(path):
|
||||
# results = []
|
||||
# clips = split_video(path)
|
||||
def start(path):
|
||||
results = []
|
||||
clips = split_video(path)
|
||||
|
||||
# for clip in clips:
|
||||
# results.append(extract_aus(clip, 'svm', 25*5))
|
||||
# return results
|
||||
for clip in clips:
|
||||
results.append(extract_aus(clip, 'svm'))
|
||||
return results
|
||||
|
||||
if __name__ == "__main__":
|
||||
results = []
|
||||
@ -53,6 +53,6 @@ if __name__ == "__main__":
|
||||
clips = split_video(test_video_path)
|
||||
|
||||
for clippath in clips:
|
||||
results.append(extract_aus(clippath, 'svm', 25*5))
|
||||
results.append(extract_aus(clippath, 'svm'))
|
||||
|
||||
print(results)
|
||||
@ -1,158 +0,0 @@
|
||||
import cv2
|
||||
import time
|
||||
import os
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from feat import Detector
|
||||
import torch
|
||||
import pandas as pd
|
||||
|
||||
# Import your helper functions
|
||||
# from db_helper import connect_db, disconnect_db, insert_rows_into_table, create_table
|
||||
import db_helper as db
|
||||
|
||||
|
||||
# Konfiguration
|
||||
DB_PATH = "action_units.db" # TODO
|
||||
CAMERA_INDEX = 0
|
||||
OUTPUT_DIR = "recordings"
|
||||
VIDEO_DURATION = 50 # Sekunden
|
||||
START_INTERVAL = 5 # Sekunden bis zum nächsten Start
|
||||
FPS = 25.0 # Feste FPS
|
||||
|
||||
if not os.path.exists(OUTPUT_DIR):
|
||||
os.makedirs(OUTPUT_DIR)
|
||||
|
||||
# Globaler Detector, um ihn nicht bei jedem Video neu laden zu müssen (spart massiv Zeit/Speicher)
|
||||
print("Initialisiere AU-Detector (bitte warten)...")
|
||||
detector = Detector(au_model="xgb")
|
||||
|
||||
def extract_aus(path, skip_frames):
|
||||
|
||||
# torch.no_grad() deaktiviert die Gradientenberechnung.
|
||||
# Das löst den "Can't call numpy() on Tensor that requires grad" Fehler.
|
||||
with torch.no_grad():
|
||||
video_prediction = detector.detect_video(
|
||||
path,
|
||||
skip_frames=skip_frames,
|
||||
face_detection_threshold=0.95
|
||||
)
|
||||
|
||||
# Falls video_prediction oder .aus noch Tensoren sind,
|
||||
# stellen wir sicher, dass sie korrekt summiert werden.
|
||||
try:
|
||||
# Wir nehmen die Summe der Action Units über alle detektierten Frames
|
||||
res = video_prediction.aus.sum()
|
||||
return res
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der Summenbildung: {e}")
|
||||
return None
|
||||
|
||||
def startAU_creation(video_path, db_path):
|
||||
"""Diese Funktion läuft nun in einem eigenen Thread."""
|
||||
try:
|
||||
print(f"\n[THREAD START] Analyse läuft für: {video_path}")
|
||||
# skip_frames berechnen (z.B. alle 5 Sekunden bei 25 FPS = 125)
|
||||
output = extract_aus(video_path, skip_frames=int(FPS*5))
|
||||
|
||||
print(f"\n--- Ergebnis für {os.path.basename(video_path)} ---")
|
||||
print(output)
|
||||
print("--------------------------------------------------\n")
|
||||
if output is not None:
|
||||
# Verbindung für diesen Thread öffnen (SQLite Sicherheit)
|
||||
conn, cursor = db.connect_db(db_path)
|
||||
|
||||
# Daten vorbereiten: Timestamp + AU Ergebnisse
|
||||
# Wir wandeln die Series/Dataframe in ein Dictionary um
|
||||
data_to_insert = output.to_dict()
|
||||
data_to_insert['timestamp'] = [datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
|
||||
|
||||
# Da die AU-Spaltennamen dynamisch sind, stellen wir sicher, dass sie Listen sind
|
||||
# (insert_rows_into_table erwartet Listen für jeden Key)
|
||||
final_payload = {k: [v] if not isinstance(v, list) else v for k, v in data_to_insert.items()}
|
||||
|
||||
|
||||
db.insert_rows_into_table(conn, cursor, "actionUnits", final_payload)
|
||||
|
||||
db.disconnect_db(conn, cursor)
|
||||
print(f"--- Ergebnis für {os.path.basename(video_path)} in DB gespeichert ---")
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der Analyse von {video_path}: {e}")
|
||||
|
||||
class VideoRecorder:
|
||||
def __init__(self, filename, width, height, db_path):
|
||||
self.filename = filename
|
||||
self.db_path = db_path
|
||||
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
||||
self.out = cv2.VideoWriter(filename, fourcc, FPS, (width, height))
|
||||
self.frames_to_record = int(VIDEO_DURATION * FPS)
|
||||
self.frames_count = 0
|
||||
self.is_finished = False
|
||||
|
||||
def write_frame(self, frame):
|
||||
if self.frames_count < self.frames_to_record:
|
||||
self.out.write(frame)
|
||||
self.frames_count += 1
|
||||
else:
|
||||
self.finish()
|
||||
|
||||
def finish(self):
|
||||
if not self.is_finished:
|
||||
self.out.release()
|
||||
self.is_finished = True
|
||||
abs_path = os.path.abspath(self.filename)
|
||||
print(f"Video fertig gespeichert: {self.filename}")
|
||||
|
||||
# --- MULTITHREADING HIER ---
|
||||
# Wir starten die Analyse in einem neuen Thread, damit main() sofort weiter frames lesen kann
|
||||
analysis_thread = threading.Thread(target=startAU_creation, args=(abs_path, self.db_path))
|
||||
analysis_thread.daemon = True # Beendet sich, wenn das Hauptprogramm schließt
|
||||
analysis_thread.start()
|
||||
|
||||
def main():
|
||||
cap = cv2.VideoCapture(CAMERA_INDEX)
|
||||
if not cap.isOpened():
|
||||
print("Fehler: Kamera konnte nicht geöffnet werden.")
|
||||
return
|
||||
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
active_recorders = []
|
||||
last_start_time = 0
|
||||
|
||||
print("Aufnahme läuft. Drücke 'q' zum Beenden.")
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
if current_time - last_start_time >= START_INTERVAL:
|
||||
timestamp = datetime.now().strftime("%H%M%S")
|
||||
filename = os.path.join(OUTPUT_DIR, f"rec_{timestamp}.avi")
|
||||
new_recorder = VideoRecorder(filename, width, height, DB_PATH)
|
||||
active_recorders.append(new_recorder)
|
||||
last_start_time = current_time
|
||||
|
||||
for rec in active_recorders[:]:
|
||||
rec.write_frame(frame)
|
||||
if rec.is_finished:
|
||||
active_recorders.remove(rec)
|
||||
|
||||
cv2.imshow('Kamera Livestream', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
time.sleep(1/FPS)
|
||||
|
||||
finally:
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print("Programm beendet. Warte ggf. auf laufende Analysen...")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,296 +0,0 @@
|
||||
import cv2
|
||||
import time
|
||||
import os
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from feat import Detector
|
||||
import torch
|
||||
import mediapipe as mp
|
||||
import csv
|
||||
|
||||
# Konfiguration
|
||||
CAMERA_INDEX = 0
|
||||
OUTPUT_DIR = "recordings"
|
||||
VIDEO_DURATION = 10 # Sekunden
|
||||
START_INTERVAL = 5 # Sekunden bis zum nächsten Start
|
||||
FPS = 25.0 # Feste FPS
|
||||
|
||||
if not os.path.exists(OUTPUT_DIR):
|
||||
os.makedirs(OUTPUT_DIR)
|
||||
|
||||
# Globaler Detector, um ihn nicht bei jedem Video neu laden zu müssen (spart massiv Zeit/Speicher)
|
||||
print("Initialisiere AU-Detector (bitte warten)...")
|
||||
detector = Detector(au_model="xgb")
|
||||
|
||||
# ===== MediaPipe FaceMesh Setup =====
|
||||
mp_face_mesh = mp.solutions.face_mesh
|
||||
face_mesh = mp_face_mesh.FaceMesh(
|
||||
static_image_mode=False,
|
||||
max_num_faces=1,
|
||||
refine_landmarks=True, # wichtig für Iris
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5
|
||||
)
|
||||
|
||||
LEFT_IRIS = [474, 475, 476, 477]
|
||||
RIGHT_IRIS = [469, 470, 471, 472]
|
||||
|
||||
LEFT_EYE_LIDS = (159, 145)
|
||||
RIGHT_EYE_LIDS = (386, 374)
|
||||
|
||||
LEFT_EYE_GAZE_IDXS = (33, 133, 159, 145)
|
||||
RIGHT_EYE_GAZE_IDXS = (263, 362, 386, 374)
|
||||
|
||||
EYE_OPEN_THRESHOLD = 6
|
||||
|
||||
# CSV vorbereiten
|
||||
gaze_csv = open("gaze_data.csv", mode="w", newline="")
|
||||
gaze_writer = csv.writer(gaze_csv)
|
||||
gaze_writer.writerow([
|
||||
"timestamp",
|
||||
"left_gaze_x",
|
||||
"left_gaze_y",
|
||||
"right_gaze_x",
|
||||
"right_gaze_y",
|
||||
"left_valid",
|
||||
"right_valid",
|
||||
"left_diameter",
|
||||
"right_diameter"
|
||||
])
|
||||
|
||||
def eye_openness(landmarks, top_idx, bottom_idx, img_height):
|
||||
top = landmarks[top_idx]
|
||||
bottom = landmarks[bottom_idx]
|
||||
return abs(top.y - bottom.y) * img_height
|
||||
|
||||
|
||||
def compute_gaze(landmarks, iris_center, indices, w, h):
|
||||
idx1, idx2, top_idx, bottom_idx = indices
|
||||
|
||||
p1 = landmarks[idx1]
|
||||
p2 = landmarks[idx2]
|
||||
top = landmarks[top_idx]
|
||||
bottom = landmarks[bottom_idx]
|
||||
|
||||
x1 = p1.x * w
|
||||
x2 = p2.x * w
|
||||
y_top = top.y * h
|
||||
y_bottom = bottom.y * h
|
||||
|
||||
iris_x, iris_y = iris_center
|
||||
|
||||
eye_left = min(x1, x2)
|
||||
eye_right = max(x1, x2)
|
||||
|
||||
eye_width = eye_right - eye_left
|
||||
eye_height = abs(y_bottom - y_top)
|
||||
|
||||
if eye_width == 0 or eye_height == 0:
|
||||
return 0.5, 0.5
|
||||
|
||||
gaze_x = (iris_x - eye_left) / eye_width
|
||||
gaze_y = (iris_y - min(y_top, y_bottom)) / eye_height
|
||||
|
||||
gaze_x = max(0, min(1, gaze_x))
|
||||
gaze_y = max(0, min(1, gaze_y))
|
||||
|
||||
return gaze_x, gaze_y
|
||||
|
||||
def extract_aus(path, skip_frames):
|
||||
|
||||
# torch.no_grad() deaktiviert die Gradientenberechnung.
|
||||
# Das löst den "Can't call numpy() on Tensor that requires grad" Fehler.
|
||||
with torch.no_grad():
|
||||
video_prediction = detector.detect_video(
|
||||
path,
|
||||
skip_frames=skip_frames,
|
||||
face_detection_threshold=0.95
|
||||
)
|
||||
|
||||
# Falls video_prediction oder .aus noch Tensoren sind,
|
||||
# stellen wir sicher, dass sie korrekt summiert werden.
|
||||
try:
|
||||
# Wir nehmen die Summe der Action Units über alle detektierten Frames
|
||||
res = video_prediction.aus.sum()
|
||||
return res
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der Summenbildung: {e}")
|
||||
return 0
|
||||
|
||||
def startAU_creation(video_path):
|
||||
"""Diese Funktion läuft nun in einem eigenen Thread."""
|
||||
try:
|
||||
print(f"\n[THREAD START] Analyse läuft für: {video_path}")
|
||||
# skip_frames berechnen (z.B. alle 5 Sekunden bei 25 FPS = 125)
|
||||
output = extract_aus(video_path, skip_frames=int(FPS*5))
|
||||
|
||||
print(f"\n--- Ergebnis für {os.path.basename(video_path)} ---")
|
||||
print(output)
|
||||
print("--------------------------------------------------\n")
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der Analyse von {video_path}: {e}")
|
||||
|
||||
class VideoRecorder:
|
||||
def __init__(self, filename, width, height):
|
||||
self.filename = filename
|
||||
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
||||
self.out = cv2.VideoWriter(filename, fourcc, FPS, (width, height))
|
||||
self.frames_to_record = int(VIDEO_DURATION * FPS)
|
||||
self.frames_count = 0
|
||||
self.is_finished = False
|
||||
|
||||
def write_frame(self, frame):
|
||||
if self.frames_count < self.frames_to_record:
|
||||
self.out.write(frame)
|
||||
self.frames_count += 1
|
||||
else:
|
||||
self.finish()
|
||||
|
||||
def finish(self):
|
||||
if not self.is_finished:
|
||||
self.out.release()
|
||||
self.is_finished = True
|
||||
abs_path = os.path.abspath(self.filename)
|
||||
print(f"Video fertig gespeichert: {self.filename}")
|
||||
|
||||
# --- MULTITHREADING HIER ---
|
||||
# Wir starten die Analyse in einem neuen Thread, damit main() sofort weiter frames lesen kann
|
||||
analysis_thread = threading.Thread(target=startAU_creation, args=(abs_path,))
|
||||
analysis_thread.daemon = True # Beendet sich, wenn das Hauptprogramm schließt
|
||||
analysis_thread.start()
|
||||
|
||||
def main():
|
||||
cap = cv2.VideoCapture(CAMERA_INDEX)
|
||||
if not cap.isOpened():
|
||||
print("Fehler: Kamera konnte nicht geöffnet werden.")
|
||||
return
|
||||
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
active_recorders = []
|
||||
last_start_time = 0
|
||||
|
||||
print("Aufnahme läuft. Drücke 'q' zum Beenden.")
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
h, w, _ = frame.shape
|
||||
results = face_mesh.process(rgb)
|
||||
|
||||
left_valid = 0
|
||||
right_valid = 0
|
||||
left_diameter = None
|
||||
right_diameter = None
|
||||
|
||||
left_gaze_x = None
|
||||
left_gaze_y = None
|
||||
right_gaze_x = None
|
||||
right_gaze_y = None
|
||||
|
||||
if results.multi_face_landmarks:
|
||||
face_landmarks = results.multi_face_landmarks[0]
|
||||
|
||||
left_open = eye_openness(
|
||||
face_landmarks.landmark,
|
||||
LEFT_EYE_LIDS[0],
|
||||
LEFT_EYE_LIDS[1],
|
||||
h
|
||||
)
|
||||
|
||||
right_open = eye_openness(
|
||||
face_landmarks.landmark,
|
||||
RIGHT_EYE_LIDS[0],
|
||||
RIGHT_EYE_LIDS[1],
|
||||
h
|
||||
)
|
||||
|
||||
left_valid = 1 if left_open > EYE_OPEN_THRESHOLD else 0
|
||||
right_valid = 1 if right_open > EYE_OPEN_THRESHOLD else 0
|
||||
|
||||
for eye_name, eye_indices in [("left", LEFT_IRIS), ("right", RIGHT_IRIS)]:
|
||||
iris_points = []
|
||||
|
||||
for idx in eye_indices:
|
||||
lm = face_landmarks.landmark[idx]
|
||||
x_i, y_i = int(lm.x * w), int(lm.y * h)
|
||||
iris_points.append((x_i, y_i))
|
||||
|
||||
if len(iris_points) == 4:
|
||||
cx = int(sum(p[0] for p in iris_points) / 4)
|
||||
cy = int(sum(p[1] for p in iris_points) / 4)
|
||||
|
||||
radius = max(
|
||||
((x - cx) ** 2 + (y - cy) ** 2) ** 0.5
|
||||
for (x, y) in iris_points
|
||||
)
|
||||
|
||||
diameter = 2 * radius
|
||||
|
||||
cv2.circle(frame, (cx, cy), int(radius), (0, 255, 0), 2)
|
||||
|
||||
if eye_name == "left" and left_valid:
|
||||
left_diameter = diameter
|
||||
left_gaze_x, left_gaze_y = compute_gaze(
|
||||
face_landmarks.landmark,
|
||||
(cx, cy),
|
||||
LEFT_EYE_GAZE_IDXS,
|
||||
w, h
|
||||
)
|
||||
|
||||
elif eye_name == "right" and right_valid:
|
||||
right_diameter = diameter
|
||||
right_gaze_x, right_gaze_y = compute_gaze(
|
||||
face_landmarks.landmark,
|
||||
(cx, cy),
|
||||
RIGHT_EYE_GAZE_IDXS,
|
||||
w, h
|
||||
)
|
||||
|
||||
# CSV schreiben
|
||||
gaze_writer.writerow([
|
||||
time.time(),
|
||||
left_gaze_x,
|
||||
left_gaze_y,
|
||||
right_gaze_x,
|
||||
right_gaze_y,
|
||||
left_valid,
|
||||
right_valid,
|
||||
left_diameter,
|
||||
right_diameter
|
||||
])
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
if current_time - last_start_time >= START_INTERVAL:
|
||||
timestamp = datetime.now().strftime("%H%M%S")
|
||||
filename = os.path.join(OUTPUT_DIR, f"rec_{timestamp}.avi")
|
||||
new_recorder = VideoRecorder(filename, width, height)
|
||||
active_recorders.append(new_recorder)
|
||||
last_start_time = current_time
|
||||
|
||||
for rec in active_recorders[:]:
|
||||
rec.write_frame(frame)
|
||||
if rec.is_finished:
|
||||
active_recorders.remove(rec)
|
||||
|
||||
cv2.imshow('Kamera Livestream', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
time.sleep(1/FPS)
|
||||
|
||||
finally:
|
||||
gaze_csv.close()
|
||||
face_mesh.close()
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
print("Programm beendet. Warte ggf. auf laufende Analysen...")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,372 +0,0 @@
|
||||
import warnings
|
||||
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
message=r".*SymbolDatabase\.GetPrototype\(\) is deprecated.*",
|
||||
category=UserWarning,
|
||||
module=r"google\.protobuf\.symbol_database"
|
||||
)
|
||||
import cv2
|
||||
import time
|
||||
import os
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from feat import Detector
|
||||
import torch
|
||||
import mediapipe as mp
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from eyeFeature_new import compute_features_from_parquet
|
||||
|
||||
# Import your helper functions
|
||||
# from db_helper import connect_db, disconnect_db, insert_rows_into_table, create_table
|
||||
import db_helper as db
|
||||
|
||||
# Konfiguration
|
||||
DB_PATH = Path("~/MSY_FS/databases/database.sqlite").expanduser()
|
||||
CAMERA_INDEX = 0
|
||||
OUTPUT_DIR = "recordings"
|
||||
VIDEO_DURATION = 50 # Sekunden
|
||||
START_INTERVAL = 5 # Sekunden bis zum nächsten Start
|
||||
FPS = 25.0 # Feste FPS
|
||||
|
||||
eye_tracking_features = {}
|
||||
|
||||
if not os.path.exists(OUTPUT_DIR):
|
||||
os.makedirs(OUTPUT_DIR)
|
||||
|
||||
# Globaler Detector, um ihn nicht bei jedem Video neu laden zu müssen (spart massiv Zeit/Speicher)
|
||||
print("Initialisiere AU-Detector (bitte warten)...")
|
||||
detector = Detector(au_model="xgb")
|
||||
|
||||
# ===== MediaPipe FaceMesh Setup =====
|
||||
mp_face_mesh = mp.solutions.face_mesh
|
||||
face_mesh = mp_face_mesh.FaceMesh(
|
||||
static_image_mode=False,
|
||||
max_num_faces=1,
|
||||
refine_landmarks=True, # wichtig für Iris
|
||||
min_detection_confidence=0.5,
|
||||
min_tracking_confidence=0.5
|
||||
)
|
||||
|
||||
LEFT_IRIS = [474, 475, 476, 477]
|
||||
RIGHT_IRIS = [469, 470, 471, 472]
|
||||
|
||||
LEFT_EYE_LIDS = (159, 145)
|
||||
RIGHT_EYE_LIDS = (386, 374)
|
||||
|
||||
EYE_OPEN_THRESHOLD = 6
|
||||
|
||||
LEFT_EYE_ALL = [33, 7, 163, 144, 145, 153, 154, 155,
|
||||
133, 173, 157, 158, 159, 160, 161, 246
|
||||
]
|
||||
|
||||
RIGHT_EYE_ALL = [263, 249, 390, 373, 374, 380, 381, 382,
|
||||
362, 398, 384, 385, 386, 387, 388, 466
|
||||
]
|
||||
|
||||
|
||||
|
||||
def eye_openness(landmarks, top_idx, bottom_idx, img_height):
|
||||
top = landmarks[top_idx]
|
||||
bottom = landmarks[bottom_idx]
|
||||
return abs(top.y - bottom.y) * img_height
|
||||
|
||||
|
||||
def compute_gaze(landmarks, iris_center, eye_indices, w, h):
|
||||
iris_x, iris_y = iris_center
|
||||
|
||||
eye_points = []
|
||||
for idx in eye_indices:
|
||||
lm = landmarks[idx]
|
||||
eye_points.append((lm.x * w, lm.y * h))
|
||||
|
||||
xs = [p[0] for p in eye_points]
|
||||
ys = [p[1] for p in eye_points]
|
||||
|
||||
eye_left = min(xs)
|
||||
eye_right = max(xs)
|
||||
eye_top = min(ys)
|
||||
eye_bottom = max(ys)
|
||||
|
||||
eye_width = eye_right - eye_left
|
||||
eye_height = eye_bottom - eye_top
|
||||
|
||||
if eye_width < 1 or eye_height < 1:
|
||||
return 0.5, 0.5
|
||||
|
||||
gaze_x = (iris_x - eye_left) / eye_width
|
||||
gaze_y = (iris_y - eye_top) / eye_height
|
||||
|
||||
return gaze_x, gaze_y
|
||||
|
||||
def extract_aus(path, skip_frames):
|
||||
|
||||
# torch.no_grad() deaktiviert die Gradientenberechnung.
|
||||
# Das löst den "Can't call numpy() on Tensor that requires grad" Fehler.
|
||||
with torch.no_grad():
|
||||
video_prediction = detector.detect_video(
|
||||
path,
|
||||
skip_frames=skip_frames,
|
||||
face_detection_threshold=0.95
|
||||
)
|
||||
|
||||
# Falls video_prediction oder .aus noch Tensoren sind,
|
||||
# stellen wir sicher, dass sie korrekt summiert werden.
|
||||
try:
|
||||
# Wir nehmen die Summe der Action Units über alle detektierten Frames
|
||||
res = video_prediction.aus.mean()
|
||||
return res
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der Summenbildung: {e}")
|
||||
return None
|
||||
|
||||
def startAU_creation(video_path, db_path):
|
||||
"""Diese Funktion läuft nun in einem eigenen Thread."""
|
||||
try:
|
||||
print(f"\n[THREAD START] Analyse läuft für: {video_path}")
|
||||
# skip_frames berechnen (z.B. alle 5 Sekunden bei 25 FPS = 125)
|
||||
output = extract_aus(video_path, skip_frames=int(FPS*5))
|
||||
|
||||
print(f"\n--- Ergebnis für {os.path.basename(video_path)} ---")
|
||||
print(output)
|
||||
print("--------------------------------------------------\n")
|
||||
if output is not None:
|
||||
# Verbindung für diesen Thread öffnen (SQLite Sicherheit)
|
||||
conn, cursor = db.connect_db(db_path)
|
||||
|
||||
# Daten vorbereiten: Timestamp + AU Ergebnisse
|
||||
# Wir wandeln die Series/Dataframe in ein Dictionary um
|
||||
data_to_insert = output.to_dict()
|
||||
|
||||
data_to_insert = {
|
||||
f"FACE_{k}_mean": v for k, v in data_to_insert.items()
|
||||
}
|
||||
|
||||
now = datetime.now()
|
||||
ticks = int(time.mktime(now.timetuple()))
|
||||
|
||||
data_to_insert['start_time'] = [ticks]
|
||||
data_to_insert = data_to_insert | eye_tracking_features
|
||||
|
||||
#data_to_insert['start_time'] = [datetime.now().strftime("%Y-%m-%d %H:%M:%S")]
|
||||
|
||||
# Da die AU-Spaltennamen dynamisch sind, stellen wir sicher, dass sie Listen sind
|
||||
# (insert_rows_into_table erwartet Listen für jeden Key)
|
||||
final_payload = {k: [v] if not isinstance(v, list) else v for k, v in data_to_insert.items()}
|
||||
|
||||
|
||||
db.insert_rows_into_table(conn, cursor, "feature_table", final_payload)
|
||||
|
||||
db.disconnect_db(conn, cursor)
|
||||
print(f"--- Ergebnis für {os.path.basename(video_path)} in DB gespeichert ---")
|
||||
os.remove(video_path)
|
||||
os.remove(video_path.replace(".avi", "_gaze.parquet"))
|
||||
print(f"Löschen der Datei: {video_path}")
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der Analyse von {video_path}: {e}")
|
||||
|
||||
|
||||
class VideoRecorder:
|
||||
def __init__(self, filename, width, height, db_path):
|
||||
self.gaze_data = []
|
||||
self.filename = filename
|
||||
self.db_path = db_path
|
||||
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
||||
self.out = cv2.VideoWriter(filename, fourcc, FPS, (width, height))
|
||||
self.frames_to_record = int(VIDEO_DURATION * FPS)
|
||||
self.frames_count = 0
|
||||
self.is_finished = False
|
||||
|
||||
def write_frame(self, frame):
|
||||
if self.frames_count < self.frames_to_record:
|
||||
self.out.write(frame)
|
||||
self.frames_count += 1
|
||||
else:
|
||||
self.finish()
|
||||
|
||||
def finish(self):
|
||||
if not self.is_finished:
|
||||
self.out.release()
|
||||
self.is_finished = True
|
||||
abs_path = os.path.abspath(self.filename)
|
||||
print(f"Video fertig gespeichert: {self.filename}")
|
||||
|
||||
# --- MULTITHREADING HIER ---
|
||||
# Wir starten die Analyse in einem neuen Thread, damit main() sofort weiter frames lesen kann
|
||||
analysis_thread = threading.Thread(target=startAU_creation, args=(abs_path, self.db_path))
|
||||
analysis_thread.daemon = True # Beendet sich, wenn das Hauptprogramm schließt
|
||||
analysis_thread.start()
|
||||
|
||||
class GazeRecorder:
|
||||
def __init__(self, filename):
|
||||
self.filename = filename
|
||||
self.frames_to_record = int(VIDEO_DURATION * FPS)
|
||||
self.frames_count = 0
|
||||
self.gaze_data = []
|
||||
self.is_finished = False
|
||||
|
||||
def write_frame(self, gaze_row):
|
||||
if self.frames_count < self.frames_to_record:
|
||||
self.gaze_data.append(gaze_row)
|
||||
self.frames_count += 1
|
||||
else:
|
||||
self.finish()
|
||||
|
||||
def finish(self):
|
||||
if not self.is_finished:
|
||||
df = pd.DataFrame(self.gaze_data)
|
||||
df.to_parquet(self.filename, engine="pyarrow", index=False)
|
||||
print(f"Gaze-Parquet gespeichert: {self.filename}")
|
||||
features = compute_features_from_parquet(self.filename)
|
||||
print("Features:", features)
|
||||
self.is_finished = True
|
||||
eye_tracking_features = features
|
||||
|
||||
def main():
|
||||
cap = cv2.VideoCapture(CAMERA_INDEX)
|
||||
if not cap.isOpened():
|
||||
print("Fehler: Kamera konnte nicht geöffnet werden.")
|
||||
return
|
||||
|
||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
active_video_recorders = []
|
||||
active_gaze_recorders = []
|
||||
last_start_time = 0
|
||||
|
||||
print("Aufnahme läuft. Drücke 'q' zum Beenden.")
|
||||
|
||||
try:
|
||||
while True:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
||||
h, w, _ = frame.shape
|
||||
results = face_mesh.process(rgb)
|
||||
|
||||
left_valid = 0
|
||||
right_valid = 0
|
||||
left_diameter = None
|
||||
right_diameter = None
|
||||
|
||||
left_gaze_x = None
|
||||
left_gaze_y = None
|
||||
right_gaze_x = None
|
||||
right_gaze_y = None
|
||||
|
||||
if results.multi_face_landmarks:
|
||||
face_landmarks = results.multi_face_landmarks[0]
|
||||
|
||||
left_open = eye_openness(
|
||||
face_landmarks.landmark,
|
||||
LEFT_EYE_LIDS[0],
|
||||
LEFT_EYE_LIDS[1],
|
||||
h
|
||||
)
|
||||
|
||||
right_open = eye_openness(
|
||||
face_landmarks.landmark,
|
||||
RIGHT_EYE_LIDS[0],
|
||||
RIGHT_EYE_LIDS[1],
|
||||
h
|
||||
)
|
||||
|
||||
left_valid = 1 if left_open > EYE_OPEN_THRESHOLD else 0
|
||||
right_valid = 1 if right_open > EYE_OPEN_THRESHOLD else 0
|
||||
|
||||
for eye_name, eye_indices in [("left", LEFT_IRIS), ("right", RIGHT_IRIS)]:
|
||||
iris_points = []
|
||||
|
||||
for idx in eye_indices:
|
||||
lm = face_landmarks.landmark[idx]
|
||||
x_i, y_i = int(lm.x * w), int(lm.y * h)
|
||||
iris_points.append((x_i, y_i))
|
||||
|
||||
if len(iris_points) == 4:
|
||||
cx = int(sum(p[0] for p in iris_points) / 4)
|
||||
cy = int(sum(p[1] for p in iris_points) / 4)
|
||||
|
||||
radius = max(
|
||||
((x - cx) ** 2 + (y - cy) ** 2) ** 0.5
|
||||
for (x, y) in iris_points
|
||||
)
|
||||
|
||||
diameter = 2 * radius
|
||||
|
||||
cv2.circle(frame, (cx, cy), int(radius), (0, 255, 0), 2)
|
||||
|
||||
if eye_name == "left" and left_valid:
|
||||
left_diameter = diameter
|
||||
left_gaze_x, left_gaze_y = compute_gaze(
|
||||
face_landmarks.landmark,
|
||||
(cx, cy),
|
||||
RIGHT_EYE_ALL,
|
||||
w, h
|
||||
)
|
||||
|
||||
elif eye_name == "right" and right_valid:
|
||||
right_diameter = diameter
|
||||
right_gaze_x, right_gaze_y = compute_gaze(
|
||||
face_landmarks.landmark,
|
||||
(cx, cy),
|
||||
LEFT_EYE_ALL,
|
||||
w, h
|
||||
)
|
||||
|
||||
gaze_row = {
|
||||
"timestamp": time.time(),
|
||||
"EYE_LEFT_GAZE_POINT_ON_DISPLAY_AREA_X": left_gaze_x,
|
||||
"EYE_LEFT_GAZE_POINT_ON_DISPLAY_AREA_Y": left_gaze_y,
|
||||
"EYE_RIGHT_GAZE_POINT_ON_DISPLAY_AREA_X": right_gaze_x,
|
||||
"EYE_RIGHT_GAZE_POINT_ON_DISPLAY_AREA_Y": right_gaze_y,
|
||||
"EYE_LEFT_PUPIL_VALIDITY": left_valid,
|
||||
"EYE_RIGHT_PUPIL_VALIDITY": right_valid,
|
||||
"EYE_LEFT_PUPIL_DIAMETER": left_diameter,
|
||||
"EYE_RIGHT_PUPIL_DIAMETER": right_diameter
|
||||
}
|
||||
|
||||
current_time = time.time()
|
||||
|
||||
if current_time - last_start_time >= START_INTERVAL:
|
||||
timestamp = datetime.now().strftime("%H%M%S")
|
||||
filename = os.path.join(OUTPUT_DIR, f"rec_{timestamp}.avi")
|
||||
video_recorder = VideoRecorder(filename, width, height, DB_PATH)
|
||||
|
||||
gaze_filename = filename.replace(".avi", "_gaze.parquet")
|
||||
gaze_recorder = GazeRecorder(gaze_filename)
|
||||
|
||||
active_video_recorders.append(video_recorder)
|
||||
active_gaze_recorders.append(gaze_recorder)
|
||||
|
||||
last_start_time = current_time
|
||||
|
||||
for v_rec, g_rec in zip(active_video_recorders[:], active_gaze_recorders[:]):
|
||||
|
||||
v_rec.write_frame(frame)
|
||||
g_rec.write_frame(gaze_row)
|
||||
|
||||
if v_rec.is_finished:
|
||||
active_video_recorders.remove(v_rec)
|
||||
|
||||
if g_rec.is_finished:
|
||||
active_gaze_recorders.remove(g_rec)
|
||||
|
||||
cv2.imshow('Kamera Livestream', frame)
|
||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||||
break
|
||||
|
||||
time.sleep(1/FPS)
|
||||
|
||||
finally:
|
||||
face_mesh.close()
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
print("Programm beendet. Warte ggf. auf laufende Analysen...")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,54 +0,0 @@
|
||||
import db_helper as db
|
||||
|
||||
DB_PATH = "action_units.db"
|
||||
|
||||
def setup_test_db():
|
||||
# 1. Verbindung herstellen (erstellt die Datei, falls nicht vorhanden)
|
||||
conn, cursor = db.connect_db(DB_PATH)
|
||||
|
||||
# 2. Spalten definieren
|
||||
# Wir erstellen eine Spalte für den Zeitstempel und beispielhaft einige AUs.
|
||||
# In SQLite können wir später mit deinem Helper weitere Spalten hinzufügen.
|
||||
columns = {
|
||||
"timestamp": "TEXT",
|
||||
"AU01": "REAL",
|
||||
"AU02": "REAL",
|
||||
"AU04": "REAL",
|
||||
"AU05": "REAL",
|
||||
"AU06": "REAL",
|
||||
"AU07": "REAL",
|
||||
"AU09": "REAL",
|
||||
"AU10": "REAL",
|
||||
"AU11": "REAL",
|
||||
"AU12": "REAL",
|
||||
"AU14": "REAL",
|
||||
"AU15": "REAL",
|
||||
"AU17": "REAL",
|
||||
"AU20": "REAL",
|
||||
"AU23": "REAL",
|
||||
"AU24": "REAL",
|
||||
"AU25": "REAL",
|
||||
"AU26": "REAL",
|
||||
"AU28": "REAL",
|
||||
"AU43": "REAL",
|
||||
}
|
||||
|
||||
# Constraints (z.B. Zeitstempel darf nicht leer sein)
|
||||
constraints = {
|
||||
"timestamp": ["NOT NULL"]
|
||||
}
|
||||
|
||||
# Primärschlüssel definieren (Kombination aus Zeitstempel und ggf. ID)
|
||||
primary_key = {"pk_timestamp": ["timestamp"]}
|
||||
|
||||
try:
|
||||
sql = db.create_table(conn, cursor, "actionUnits", columns, constraints, primary_key)
|
||||
print("Tabelle erfolgreich erstellt!")
|
||||
print(f"SQL-Befehl:\n{sql}")
|
||||
except Exception as e:
|
||||
print(f"Hinweis: {e}")
|
||||
finally:
|
||||
db.disconnect_db(conn, cursor)
|
||||
|
||||
if __name__ == "__main__":
|
||||
setup_test_db()
|
||||
@ -1,205 +0,0 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from scipy.signal import welch
|
||||
from pygazeanalyser.detectors import fixation_detection, saccade_detection
|
||||
|
||||
|
||||
##############################################################################
|
||||
# KONFIGURATION
|
||||
##############################################################################
|
||||
|
||||
SAMPLING_RATE = 25 # Hz
|
||||
MIN_DUR_BLINKS = 2 # x * 40ms
|
||||
|
||||
|
||||
##############################################################################
|
||||
# EYE-TRACKING FUNKTIONEN
|
||||
##############################################################################
|
||||
|
||||
def clean_eye_df(df):
|
||||
"""Extrahiert nur Eye-Tracking Spalten und entfernt leere Zeilen."""
|
||||
eye_cols = [c for c in df.columns if c.startswith("EYE_")]
|
||||
|
||||
if not eye_cols:
|
||||
return pd.DataFrame()
|
||||
|
||||
df_eye = df[eye_cols].copy()
|
||||
df_eye = df_eye.replace([np.inf, -np.inf], np.nan)
|
||||
df_eye = df_eye.dropna(subset=eye_cols, how="all")
|
||||
|
||||
return df_eye.reset_index(drop=True)
|
||||
|
||||
|
||||
def extract_gaze_signal(df):
|
||||
"""Extrahiert 2D-Gaze-Positionen, maskiert ungültige Samples und interpoliert."""
|
||||
gx_L = df["EYE_LEFT_GAZE_POINT_ON_DISPLAY_AREA_X"].astype(float).copy()
|
||||
gy_L = df["EYE_LEFT_GAZE_POINT_ON_DISPLAY_AREA_Y"].astype(float).copy()
|
||||
gx_R = df["EYE_RIGHT_GAZE_POINT_ON_DISPLAY_AREA_X"].astype(float).copy()
|
||||
gy_R = df["EYE_RIGHT_GAZE_POINT_ON_DISPLAY_AREA_Y"].astype(float).copy()
|
||||
|
||||
val_L = (df["EYE_LEFT_PUPIL_VALIDITY"] == 1)
|
||||
val_R = (df["EYE_RIGHT_PUPIL_VALIDITY"] == 1)
|
||||
|
||||
# Inf → NaN
|
||||
for arr in [gx_L, gy_L, gx_R, gy_R]:
|
||||
arr.replace([np.inf, -np.inf], np.nan, inplace=True)
|
||||
|
||||
# Ungültige maskieren
|
||||
gx_L[~val_L] = np.nan
|
||||
gy_L[~val_L] = np.nan
|
||||
gx_R[~val_R] = np.nan
|
||||
gy_R[~val_R] = np.nan
|
||||
|
||||
|
||||
# Mittelwert beider Augen
|
||||
gx = np.mean(np.column_stack([gx_L, gx_R]), axis=1)
|
||||
gy = np.mean(np.column_stack([gy_L, gy_R]), axis=1)
|
||||
|
||||
# Interpolation
|
||||
gx = pd.Series(gx).interpolate(limit=None, limit_direction="both").bfill().ffill()
|
||||
gy = pd.Series(gy).interpolate(limit=None, limit_direction="both").bfill().ffill()
|
||||
|
||||
# MinMax Skalierung
|
||||
xscaler = MinMaxScaler()
|
||||
gxscale = xscaler.fit_transform(gx.values.reshape(-1, 1))
|
||||
|
||||
yscaler = MinMaxScaler()
|
||||
gyscale = yscaler.fit_transform(gy.values.reshape(-1, 1))
|
||||
|
||||
return np.column_stack((gxscale, gyscale))
|
||||
|
||||
|
||||
def extract_pupil(df):
|
||||
"""Extrahiert Pupillengröße (beide Augen gemittelt)."""
|
||||
pl = df["EYE_LEFT_PUPIL_DIAMETER"].replace([np.inf, -np.inf], np.nan)
|
||||
pr = df["EYE_RIGHT_PUPIL_DIAMETER"].replace([np.inf, -np.inf], np.nan)
|
||||
|
||||
vl = df.get("EYE_LEFT_PUPIL_VALIDITY")
|
||||
vr = df.get("EYE_RIGHT_PUPIL_VALIDITY")
|
||||
|
||||
if vl is None or vr is None:
|
||||
validity = (~pl.isna() | ~pr.isna()).astype(int).to_numpy()
|
||||
else:
|
||||
validity = ((vl == 1) | (vr == 1)).astype(int).to_numpy()
|
||||
|
||||
p = np.mean(np.column_stack([pl, pr]), axis=1)
|
||||
p = pd.Series(p).interpolate(limit=50, limit_direction="both").bfill().ffill()
|
||||
|
||||
return p.to_numpy(), validity
|
||||
|
||||
|
||||
def detect_blinks(pupil_validity, min_duration=5):
|
||||
"""Erkennt Blinks: Validity=0 → Blink."""
|
||||
blinks = []
|
||||
start = None
|
||||
|
||||
for i, v in enumerate(pupil_validity):
|
||||
if v == 0 and start is None:
|
||||
start = i
|
||||
elif v == 1 and start is not None:
|
||||
if i - start >= min_duration:
|
||||
blinks.append([start, i])
|
||||
start = None
|
||||
|
||||
return blinks
|
||||
|
||||
|
||||
def compute_IPA(pupil, fs=25):
|
||||
"""Index of Pupillary Activity (Duchowski 2018)."""
|
||||
f, Pxx = welch(pupil, fs=fs, nperseg=int(fs*2))
|
||||
hf_band = (f >= 0.6) & (f <= 2.0)
|
||||
return np.sum(Pxx[hf_band])
|
||||
|
||||
|
||||
def extract_eye_features(df_eye, fs=25, min_dur_blinks=2):
|
||||
"""
|
||||
Extrahiert Eye-Tracking Features für ein einzelnes Window.
|
||||
Gibt Dictionary mit allen Eye-Features zurück.
|
||||
"""
|
||||
# Gaze
|
||||
gaze = extract_gaze_signal(df_eye)
|
||||
|
||||
# Pupille
|
||||
pupil, pupil_validity = extract_pupil(df_eye)
|
||||
|
||||
|
||||
# ----------------------------
|
||||
# FIXATIONS
|
||||
# ----------------------------
|
||||
time_ms = np.arange(len(df_eye)) * 1000.0 / fs
|
||||
|
||||
fix, efix = fixation_detection(
|
||||
x=gaze[:, 0], y=gaze[:, 1], time=time_ms,
|
||||
missing=0.0, maxdist=0.003, mindur=10
|
||||
)
|
||||
|
||||
fixation_durations = [f[2] for f in efix if np.isfinite(f[2]) and f[2] > 0]
|
||||
|
||||
# Kategorien
|
||||
F_short = sum(66 <= d <= 150 for d in fixation_durations)
|
||||
F_medium = sum(300 <= d <= 500 for d in fixation_durations)
|
||||
F_long = sum(d >= 1000 for d in fixation_durations)
|
||||
F_hundred = sum(d > 100 for d in fixation_durations)
|
||||
|
||||
# ----------------------------
|
||||
# SACCADES
|
||||
# ----------------------------
|
||||
sac, esac = saccade_detection(
|
||||
x=gaze[:, 0], y=gaze[:, 1], time=time_ms,
|
||||
missing=0, minlen=12, maxvel=0.2, maxacc=1
|
||||
)
|
||||
|
||||
sac_durations = [s[2] for s in esac]
|
||||
sac_amplitudes = [((s[5]-s[3])**2 + (s[6]-s[4])**2)**0.5 for s in esac]
|
||||
|
||||
# ----------------------------
|
||||
# BLINKS
|
||||
# ----------------------------
|
||||
blinks = detect_blinks(pupil_validity, min_duration=min_dur_blinks)
|
||||
blink_durations = [(b[1] - b[0]) / fs for b in blinks]
|
||||
|
||||
# ----------------------------
|
||||
# PUPIL
|
||||
# ----------------------------
|
||||
if np.all(np.isnan(pupil)):
|
||||
mean_pupil = np.nan
|
||||
ipa = np.nan
|
||||
else:
|
||||
mean_pupil = np.nanmean(pupil)
|
||||
ipa = compute_IPA(pupil, fs=fs)
|
||||
|
||||
# Feature Dictionary
|
||||
return {
|
||||
"Fix_count_short_66_150": F_short,
|
||||
"Fix_count_medium_300_500": F_medium,
|
||||
"Fix_count_long_gt_1000": F_long,
|
||||
"Fix_count_100": F_hundred,
|
||||
"Fix_mean_duration": np.mean(fixation_durations) if fixation_durations else 0,
|
||||
"Fix_median_duration": np.median(fixation_durations) if fixation_durations else 0,
|
||||
"Sac_count": len(sac),
|
||||
"Sac_mean_amp": np.mean(sac_amplitudes) if sac_amplitudes else 0,
|
||||
"Sac_mean_dur": np.mean(sac_durations) if sac_durations else 0,
|
||||
"Sac_median_dur": np.median(sac_durations) if sac_durations else 0,
|
||||
"Blink_count": len(blinks),
|
||||
"Blink_mean_dur": np.mean(blink_durations) if blink_durations else 0,
|
||||
"Blink_median_dur": np.median(blink_durations) if blink_durations else 0,
|
||||
"Pupil_mean": mean_pupil,
|
||||
"Pupil_IPA": ipa
|
||||
}
|
||||
|
||||
def compute_features_from_parquet(parquet_path):
|
||||
df = pd.read_parquet(parquet_path)
|
||||
df_eye = clean_eye_df(df)
|
||||
|
||||
if df_eye.empty:
|
||||
return None
|
||||
|
||||
features = extract_eye_features(
|
||||
df_eye,
|
||||
fs=SAMPLING_RATE,
|
||||
min_dur_blinks=MIN_DUR_BLINKS
|
||||
)
|
||||
|
||||
return features
|
||||
@ -17,8 +17,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df= pd.read_parquet(r\" \")\n",
|
||||
"print(df.shape)"
|
||||
"df= pd.read_parquet(r\"C:\\Users\\micha\\FAUbox\\WS2526_Fahrsimulator_MSY (Celina Korzer)\\AU_dataset\\output_windowed.parquet\")\n",
|
||||
"print(df.shape)\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
BIN
files_for_testing/50s_25Hz_dataset.parquet
Normal file
BIN
files_for_testing/50s_25Hz_dataset.parquet
Normal file
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,529 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "47f6de7b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Bibliotheken importieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "99294260",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd \n",
|
||||
"import numpy as np \n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns \n",
|
||||
"import random \n",
|
||||
"import joblib \n",
|
||||
"from pathlib import Path \n",
|
||||
"\n",
|
||||
"from sklearn.model_selection import GroupKFold, GroupShuffleSplit\n",
|
||||
"from sklearn.preprocessing import StandardScaler \n",
|
||||
"from sklearn.metrics import ( \n",
|
||||
" precision_score, recall_score,\n",
|
||||
" confusion_matrix, roc_curve, auc, \n",
|
||||
" precision_recall_curve, f1_score, \n",
|
||||
" balanced_accuracy_score, accuracy_score\n",
|
||||
") \n",
|
||||
"\n",
|
||||
"import tensorflow as tf \n",
|
||||
"from tensorflow.keras import Input, layers, models, regularizers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "52b4ca8c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Seed festlegen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e49d281",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"SEED = 42 \n",
|
||||
"np.random.seed(SEED) \n",
|
||||
"tf.random.set_seed(SEED) \n",
|
||||
"random.seed(SEED)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ae1a715f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Daten laden"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "870f01c3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data_path = Path(r\"~/data-paulusjafahrsimulator-gpu/new_datasets/50s_25Hz_dataset.parquet\") \n",
|
||||
"\n",
|
||||
"data = pd.read_parquet(path=data_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bedbc23b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Labels erstellen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "38848515",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"low_all = data[((data[\"PHASE\"] == \"baseline\") | \n",
|
||||
" ((data[\"STUDY\"] == \"n-back\") & (data[\"PHASE\"] != \"baseline\") & (data[\"LEVEL\"].isin([1,4]))))].copy() \n",
|
||||
"\n",
|
||||
"high_all = pd.concat([ \n",
|
||||
" data[(data[\"STUDY\"]==\"n-back\") & (data[\"LEVEL\"].isin([2,3,5,6])) & (data[\"PHASE\"].isin([\"train\",\"test\"]))], \n",
|
||||
" data[(data[\"STUDY\"]==\"k-drive\") & (data[\"PHASE\"]!=\"baseline\")] \n",
|
||||
"]).copy() \n",
|
||||
"\n",
|
||||
"low_all[\"label\"] = 0 \n",
|
||||
"high_all[\"label\"] = 1 \n",
|
||||
"data = pd.concat([low_all, high_all], ignore_index=True).drop_duplicates() "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b282acf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Features und Labels"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5edb00a0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Face AUs\n",
|
||||
"au_columns = [col for col in data.columns if \"face\" in col.lower()] \n",
|
||||
"\n",
|
||||
"#Eye Features\n",
|
||||
"eye_columns = [ \n",
|
||||
" 'Fix_count_short_66_150', \n",
|
||||
" 'Fix_count_medium_300_500', \n",
|
||||
" 'Fix_count_long_gt_1000', \n",
|
||||
" 'Fix_count_100', \n",
|
||||
" 'Fix_mean_duration', \n",
|
||||
" 'Fix_median_duration', \n",
|
||||
" 'Sac_count', \n",
|
||||
" 'Sac_mean_amp', \n",
|
||||
" 'Sac_mean_dur', \n",
|
||||
" 'Sac_median_dur', \n",
|
||||
" 'Blink_count', \n",
|
||||
" 'Blink_mean_dur', \n",
|
||||
" 'Blink_median_dur', \n",
|
||||
" 'Pupil_mean', \n",
|
||||
" 'Pupil_IPA' \n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"#Early Fusion\n",
|
||||
"feature_columns = au_columns + eye_columns\n",
|
||||
"\n",
|
||||
"#NaNs entfernen \n",
|
||||
"data = data.dropna(subset=feature_columns + [\"label\"])\n",
|
||||
"\n",
|
||||
"X = data[feature_columns].values[..., np.newaxis] \n",
|
||||
"y = data[\"label\"].values \n",
|
||||
"\n",
|
||||
"groups = data[\"subjectID\"].values\n",
|
||||
"print(data.columns.tolist())\n",
|
||||
"\n",
|
||||
"print(\"Gefundene FACE_AU-Spalten:\", au_columns)\n",
|
||||
"print(\"Gefundene Eye Features:\" , eye_columns)\n",
|
||||
"\n",
|
||||
"print(\"Anzahl FACE_AUs:\", len(au_columns)) \n",
|
||||
"print(\"Anzahl EYE Features:\", len(eye_columns)) \n",
|
||||
"print(\"Gesamtzahl Features:\", len(feature_columns))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d8689679",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Train-Test-Split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b5cf88c3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)\n",
|
||||
"train_idx, test_idx = next(gss.split(X, y, groups))\n",
|
||||
"\n",
|
||||
"feature_columns_train, feature_columns_test = X[train_idx], X[test_idx]\n",
|
||||
"y_train, y_test = y[train_idx], y[test_idx]\n",
|
||||
"groups_train, groups_test = groups[train_idx], groups[test_idx]\n",
|
||||
"\n",
|
||||
"print(\"Train:\", len(y_train), \" | Test:\", len(y_test))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a539b83b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"CNN-Modell"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e4a7f496",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def build_model(input_shape, lr=1e-4): \n",
|
||||
" model = models.Sequential([ \n",
|
||||
" Input(shape=input_shape), \n",
|
||||
" layers.Conv1D(32, kernel_size=3, activation=\"relu\", kernel_regularizer=regularizers.l2(0.001)), \n",
|
||||
" layers.BatchNormalization(), \n",
|
||||
" layers.MaxPooling1D(pool_size=2),\n",
|
||||
"\n",
|
||||
" layers.Conv1D(64, kernel_size=3, activation=\"relu\", kernel_regularizer=regularizers.l2(0.001)), \n",
|
||||
" layers.BatchNormalization(), \n",
|
||||
" layers.GlobalAveragePooling1D(), \n",
|
||||
" \n",
|
||||
" layers.Dense(32, activation=\"relu\", kernel_regularizer=regularizers.l2(0.001)), \n",
|
||||
" layers.Dropout(0.5), \n",
|
||||
" layers.Dense(1, activation=\"sigmoid\") \n",
|
||||
" ]) \n",
|
||||
" \n",
|
||||
" model.compile( \n",
|
||||
" optimizer=tf.keras.optimizers.Adam(learning_rate=lr), \n",
|
||||
" loss=\"binary_crossentropy\", \n",
|
||||
" metrics=[\"accuracy\", tf.keras.metrics.AUC(name=\"auc\")] \n",
|
||||
" ) \n",
|
||||
" return model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5905871b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Cross-Validation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "90658000",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gkf = GroupKFold(n_splits=5) \n",
|
||||
"cv_histories = [] \n",
|
||||
"cv_results = [] \n",
|
||||
"fold_subjects = []\n",
|
||||
"all_conf_matrices = []\n",
|
||||
"\n",
|
||||
"for fold, (train_idx, val_idx) in enumerate(gkf.split(X, y, groups)):\n",
|
||||
" train_subjects = np.unique(groups[train_idx]) \n",
|
||||
" val_subjects = np.unique(groups[val_idx]) \n",
|
||||
" fold_subjects.append({\"Fold\": fold+1, \n",
|
||||
" \"Train_Subjects\": train_subjects, \n",
|
||||
" \"Val_Subjects\": val_subjects}) \n",
|
||||
" \n",
|
||||
" print(f\"\\n--- Fold {fold+1} ---\") \n",
|
||||
" print(\"Train-Subjects:\", train_subjects) \n",
|
||||
" print(\"Val-Subjects:\", val_subjects) \n",
|
||||
"\n",
|
||||
" #Split\n",
|
||||
" X_train, X_val = X[train_idx], X[val_idx] \n",
|
||||
" y_train, y_val = y[train_idx], y[val_idx] # Normalisierung pro Fold \n",
|
||||
"\n",
|
||||
" #Normalisierung pro Fold\n",
|
||||
" scaler = StandardScaler() \n",
|
||||
" X_train = scaler.fit_transform(X_train.reshape(len(X_train), -1)).reshape(X_train.shape) \n",
|
||||
" X_val = scaler.transform(X_val.reshape(len(X_val), -1)).reshape(X_val.shape) \n",
|
||||
"\n",
|
||||
" # Plausibilitäts-Check \n",
|
||||
" print(\"Train Mittelwerte (erste 5 Features):\", X_train.mean(axis=0)[:5]) \n",
|
||||
" print(\"Train Std (erste 5 Features):\", X_train.std(axis=0)[:5]) \n",
|
||||
" print(\"Val Mittelwerte (erste 5 Features):\", X_val.mean(axis=0)[:5]) \n",
|
||||
" print(\"Val Std (erste 5 Features):\", X_val.std(axis=0)[:5]) \n",
|
||||
"\n",
|
||||
" # Modell \n",
|
||||
" model = build_model(input_shape=(len(feature_columns_train),1), lr=1e-4) \n",
|
||||
" model.summary() \n",
|
||||
"\n",
|
||||
" callbacks = [ \n",
|
||||
" tf.keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=10, restore_best_weights=True), \n",
|
||||
" tf.keras.callbacks.ReduceLROnPlateau(monitor=\"val_loss\", factor=0.5, patience=5, min_lr=1e-6) \n",
|
||||
" ] \n",
|
||||
"\n",
|
||||
" history = model.fit( \n",
|
||||
" X_train, y_train, \n",
|
||||
" validation_data=(X_val, y_val), \n",
|
||||
" epochs=100, \n",
|
||||
" batch_size=16, \n",
|
||||
" callbacks=callbacks, \n",
|
||||
" verbose=0 \n",
|
||||
" ) \n",
|
||||
"\n",
|
||||
" cv_histories.append(history.history) \n",
|
||||
" scores = model.evaluate(X_val, y_val, verbose=0) \n",
|
||||
" cv_results.append(scores) \n",
|
||||
" print(f\"Fold {fold+1} - Val Loss: {scores[0]:.4f}, Val Acc: {scores[1]:.4f}, Val AUC: {scores[2]:.4f}\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" #Konfusionsmatrix \n",
|
||||
" y_pred = (model.predict(X_val) > 0.5).astype(int) \n",
|
||||
" cm = confusion_matrix(y_val, y_pred) \n",
|
||||
" all_conf_matrices.append(cm) \n",
|
||||
" \n",
|
||||
" print(f\"Konfusionsmatrix Fold {fold+1}:\\n{cm}\\n\") \n",
|
||||
" \n",
|
||||
"# Aggregierte Matrix \n",
|
||||
"agg_cm = sum(all_conf_matrices) \n",
|
||||
"print(\"Aggregierte Konfusionsmatrix über alle Folds:\") \n",
|
||||
"print(agg_cm)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d10b7e78",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9aeba7f4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#results\n",
|
||||
"cv_results = np.array(cv_results) \n",
|
||||
"print(\"\\n=== Cross-Validation Ergebnisse ===\") \n",
|
||||
"print(f\"Durchschnittlicher Val-Loss: {cv_results[:,0].mean():.4f}\") \n",
|
||||
"print(f\"Durchschnittliche Val-Accuracy: {cv_results[:,1].mean():.4f}\") \n",
|
||||
"print(f\"Durchschnittliche Val-AUC: {cv_results[:,2].mean():.4f}\")\n",
|
||||
"\n",
|
||||
"#Ergebnis-Tabelle erstellen\n",
|
||||
"results_table = pd.DataFrame({ \n",
|
||||
" \"Fold\": np.arange(1, len(cv_results)+1), \n",
|
||||
" \"Val Loss\": cv_results[:,0], \n",
|
||||
" \"Val Accuracy\": cv_results[:,1], \n",
|
||||
" \"Val AUC\": cv_results[:,2] }) \n",
|
||||
"\n",
|
||||
"# Durchschnittszeile hinzufügen \n",
|
||||
"avg_row = pd.DataFrame({ \n",
|
||||
" \"Fold\": [\"Ø\"], \n",
|
||||
" \"Val Loss\": [cv_results[:,0].mean()], \n",
|
||||
" \"Val Accuracy\": [cv_results[:,1].mean()], \n",
|
||||
" \"Val AUC\": [cv_results[:,2].mean()] \n",
|
||||
"}) \n",
|
||||
"\n",
|
||||
"results_table = pd.concat([results_table, avg_row], ignore_index=True) \n",
|
||||
"\n",
|
||||
"print(\"\\n=== Ergebnis-Tabelle ===\") \n",
|
||||
"print(results_table) \n",
|
||||
"\n",
|
||||
"#Tabelle speichern \n",
|
||||
"results_table.to_csv(\"cnn_crossVal_results.csv\", index=False) \n",
|
||||
"print(\"Ergebnisse gespeichert als 'cnn_crossVal_results.csv'\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fae5df7a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finales Modell trainieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5b3eab61",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scaler_final = StandardScaler() \n",
|
||||
"X_scaled = scaler_final.fit_transform(feature_columns_train.reshape(len(feature_columns_train), -1)).reshape(feature_columns_train.shape) \n",
|
||||
"\n",
|
||||
"final_model = build_model(input_shape=(len(feature_columns_train),1), lr=1e-4) \n",
|
||||
"final_model.summary() \n",
|
||||
"\n",
|
||||
"final_model.fit( \n",
|
||||
" X_scaled, y_train, \n",
|
||||
" epochs=150, \n",
|
||||
" batch_size=16, \n",
|
||||
" verbose=1 \n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7c7f9cc4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Speichern des Modells"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2d3af5be",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# final_model.save(\"cnn_crossVal_EarlyFusion_V2.keras\") \n",
|
||||
"# joblib.dump(scaler_final, \"scaler_crossVal_EarlyFusion_V2.joblib\") \n",
|
||||
"\n",
|
||||
"# print(\"Finales Modell und Scaler gespeichert als 'cnn_crossVal_EarlyFusion_V2.keras' und 'scaler_crossVal_EarlyFusion_V2.joblib'\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c11891e0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Plots"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9f6a8584",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#plots\n",
|
||||
"def plot_cv_histories(cv_histories, metric): \n",
|
||||
" plt.figure(figsize=(10,6)) \n",
|
||||
" \n",
|
||||
" for i, hist in enumerate(cv_histories): \n",
|
||||
" plt.plot(hist[metric], label=f\"Fold {i+1} Train\", alpha=0.7) \n",
|
||||
" plt.plot(hist[f\"val_{metric}\"], label=f\"Fold {i+1} Val\", linestyle=\"--\", alpha=0.7) \n",
|
||||
" plt.xlabel(\"Epochs\") \n",
|
||||
" plt.ylabel(metric.capitalize()) \n",
|
||||
" plt.title(f\"Cross-Validation {metric.capitalize()} Verläufe\") \n",
|
||||
" plt.legend() \n",
|
||||
" plt.grid(True) \n",
|
||||
" plt.show()\n",
|
||||
" \n",
|
||||
"plot_cv_histories(cv_histories, \"loss\") \n",
|
||||
"plot_cv_histories(cv_histories, \"accuracy\") \n",
|
||||
"plot_cv_histories(cv_histories, \"auc\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4aebe6c6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0d34d6b7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Preprocessing Testdaten \n",
|
||||
"X_test_scaled = scaler.transform( \n",
|
||||
" feature_columns_test.reshape(len(feature_columns_test), -1) \n",
|
||||
").reshape(feature_columns_test.shape) \n",
|
||||
"\n",
|
||||
"# Vorhersagen \n",
|
||||
"y_prob_test = model.predict(X_test_scaled).flatten() \n",
|
||||
"y_pred_test = (y_prob_test > 0.5).astype(int) \n",
|
||||
"\n",
|
||||
"# Konfusionsmatrix \n",
|
||||
"cm_test = confusion_matrix(y_test, y_pred_test) \n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(6,5)) \n",
|
||||
"sns.heatmap(cm_test, annot=True, fmt=\"d\", cmap=\"Greens\", \n",
|
||||
" xticklabels=[\"Pred 0\", \"Pred 1\"], \n",
|
||||
" yticklabels=[\"True 0\", \"True 1\"]) \n",
|
||||
"plt.title(\"Konfusionsmatrix - Testdaten\") \n",
|
||||
"plt.show() \n",
|
||||
"\n",
|
||||
"# ROC \n",
|
||||
"fpr, tpr, _ = roc_curve(y_test, y_prob_test) \n",
|
||||
"roc_auc = auc(fpr, tpr) \n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(7,6)) \n",
|
||||
"plt.plot(fpr, tpr, label=f\"AUC = {roc_auc:.3f}\") \n",
|
||||
"plt.plot([0,1], [0,1], \"k--\") \n",
|
||||
"plt.title(\"ROC - Testdaten\") \n",
|
||||
"plt.legend() \n",
|
||||
"plt.grid(True) \n",
|
||||
"plt.show() \n",
|
||||
"\n",
|
||||
"# Precision-Recall \n",
|
||||
"precision, recall, _ = precision_recall_curve(y_test, y_prob_test) \n",
|
||||
"plt.figure(figsize=(7,6)) \n",
|
||||
"plt.plot(recall, precision) \n",
|
||||
"plt.title(\"Precision-Recall - Testdaten\") \n",
|
||||
"plt.grid(True) \n",
|
||||
"plt.show() \n",
|
||||
"\n",
|
||||
"# Metriken \n",
|
||||
"print(\"Accuracy:\", accuracy_score(y_test, y_pred_test))\n",
|
||||
"print(\"F1-Score:\", f1_score(y_test, y_pred_test)) \n",
|
||||
"print(\"Balanced Accuracy:\", balanced_accuracy_score(y_test, y_pred_test)) \n",
|
||||
"print(\"Precision:\", precision_score(y_test, y_pred_test)) \n",
|
||||
"print(\"Recall:\", recall_score(y_test, y_pred_test)) \n",
|
||||
"print(\"AUC:\", roc_auc)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@ -1,458 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b65b6b7d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Imports"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "530e70af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import pandas as pd \n",
|
||||
"import numpy as np \n",
|
||||
"import matplotlib.pyplot as plt \n",
|
||||
"import seaborn as sns \n",
|
||||
"import random \n",
|
||||
"import joblib \n",
|
||||
"from pathlib import Path \n",
|
||||
"\n",
|
||||
"from sklearn.model_selection import GroupKFold, GroupShuffleSplit \n",
|
||||
"from sklearn.preprocessing import StandardScaler \n",
|
||||
"from sklearn.metrics import ( \n",
|
||||
" precision_score, recall_score,\n",
|
||||
" confusion_matrix, roc_curve, auc, \n",
|
||||
" precision_recall_curve, f1_score, \n",
|
||||
" balanced_accuracy_score, accuracy_score\n",
|
||||
") \n",
|
||||
"\n",
|
||||
"import tensorflow as tf \n",
|
||||
"from tensorflow.keras import Input, layers, models"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0d01127c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Seed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "67aaf56e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"SEED = 42 \n",
|
||||
"np.random.seed(SEED) \n",
|
||||
"tf.random.set_seed(SEED) \n",
|
||||
"random.seed(SEED)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "844e250c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Daten laden "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "73a34b69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data_path = Path(r\"~/data-paulusjafahrsimulator-gpu/new_datasets/50s_25Hz_dataset.parquet\") \n",
|
||||
"data = pd.read_parquet(path=data_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "325179d3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Daten vorbereiten"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a5ad3126",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"low_all = data[ \n",
|
||||
" ((data[\"PHASE\"] == \"baseline\") | \n",
|
||||
" ((data[\"STUDY\"] == \"n-back\") & \n",
|
||||
" (data[\"PHASE\"] != \"baseline\") & \n",
|
||||
" (data[\"LEVEL\"].isin([1, 4])))) \n",
|
||||
"].copy() \n",
|
||||
"\n",
|
||||
"high_all = pd.concat([ \n",
|
||||
" data[(data[\"STUDY\"] == \"n-back\") & \n",
|
||||
" (data[\"LEVEL\"].isin([2, 3, 5, 6])) & \n",
|
||||
" (data[\"PHASE\"].isin([\"train\", \"test\"]))], \n",
|
||||
" data[(data[\"STUDY\"] == \"k-drive\") & (data[\"PHASE\"] != \"baseline\")] \n",
|
||||
"]).copy() \n",
|
||||
"\n",
|
||||
"low_all[\"label\"] = 0 \n",
|
||||
"high_all[\"label\"] = 1 \n",
|
||||
"\n",
|
||||
"data = pd.concat([low_all, high_all], ignore_index=True).drop_duplicates()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fd843b62",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Features"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5f10e6ca",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"au_columns = [col for col in data.columns if \"face\" in col.lower()] \n",
|
||||
"\n",
|
||||
"eye_columns = [ \n",
|
||||
" 'Fix_count_short_66_150','Fix_count_medium_300_500','Fix_count_long_gt_1000', \n",
|
||||
" 'Fix_count_100','Fix_mean_duration','Fix_median_duration', \n",
|
||||
" 'Sac_count','Sac_mean_amp','Sac_mean_dur','Sac_median_dur', \n",
|
||||
" 'Blink_count','Blink_mean_dur','Blink_median_dur', \n",
|
||||
" 'Pupil_mean','Pupil_IPA' \n",
|
||||
"] \n",
|
||||
"\n",
|
||||
"# NaNs entfernen \n",
|
||||
"data = data.dropna(subset=au_columns + eye_columns + [\"label\"]) \n",
|
||||
"\n",
|
||||
"# Arrays \n",
|
||||
"X_au = data[au_columns].values[..., np.newaxis] \n",
|
||||
"X_eye = data[eye_columns].values \n",
|
||||
"y = data[\"label\"].values \n",
|
||||
"groups = data[\"subjectID\"].values"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cabe09af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Train/Test Split"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "52d3b7cf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)\n",
|
||||
"train_idx, test_idx = next(gss.split(X_au, y, groups))\n",
|
||||
"\n",
|
||||
"X_au_train, X_au_test = X_au[train_idx], X_au[test_idx]\n",
|
||||
"X_eye_train, X_eye_test = X_eye[train_idx], X_eye[test_idx]\n",
|
||||
"y_train, y_test = y[train_idx], y[test_idx]\n",
|
||||
"groups_train, groups_test = groups[train_idx], groups[test_idx]\n",
|
||||
"\n",
|
||||
"print(\"Train:\", len(y_train), \" | Test:\", len(y_test))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6dedded5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Hybrid CNN-Modell"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "41cc1b30",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def build_hybrid_model(n_aus, n_eye, lr=1e-4): \n",
|
||||
" input_au = Input(shape=(n_aus, 1), name=\"au_input\") \n",
|
||||
" x = layers.Conv1D(32, 3, activation=\"relu\")(input_au) \n",
|
||||
" x = layers.BatchNormalization()(x) \n",
|
||||
" x = layers.MaxPooling1D(2)(x) \n",
|
||||
" x = layers.Conv1D(64, 3, activation=\"relu\")(x) \n",
|
||||
" x = layers.BatchNormalization()(x) \n",
|
||||
" x = layers.GlobalAveragePooling1D()(x) \n",
|
||||
"\n",
|
||||
" input_eye = Input(shape=(n_eye,), name=\"eye_input\") \n",
|
||||
" e = layers.Dense(32, activation=\"relu\")(input_eye) \n",
|
||||
" e = layers.Dropout(0.3)(e) \n",
|
||||
" e = layers.Dense(16, activation=\"relu\")(e) \n",
|
||||
"\n",
|
||||
" fused = layers.concatenate([x, e]) \n",
|
||||
" z = layers.Dense(32, activation=\"relu\")(fused) \n",
|
||||
" z = layers.Dropout(0.4)(z) \n",
|
||||
" output = layers.Dense(1, activation=\"sigmoid\")(z) \n",
|
||||
"\n",
|
||||
" model = models.Model(inputs=[input_au, input_eye], outputs=output) \n",
|
||||
" model.compile( \n",
|
||||
" optimizer=tf.keras.optimizers.Adam(learning_rate=lr), \n",
|
||||
" loss=\"binary_crossentropy\", \n",
|
||||
" metrics=[\"accuracy\", tf.keras.metrics.AUC(name=\"auc\")] \n",
|
||||
" ) \n",
|
||||
" \n",
|
||||
" return model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "cea6d0d0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Cross Validation (nur Trainingsdaten)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9c390b46",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gkf = GroupKFold(n_splits=5) \n",
|
||||
"cv_histories = [] \n",
|
||||
"cv_results = [] \n",
|
||||
"all_conf_matrices = [] \n",
|
||||
"\n",
|
||||
"for fold, (tr_idx, va_idx) in enumerate(gkf.split(X_au_train, y_train, groups_train)): \n",
|
||||
" print(f\"\\n===== FOLD {fold+1} =====\") \n",
|
||||
" \n",
|
||||
" X_tr_au, X_va_au = X_au_train[tr_idx], X_au_train[va_idx] \n",
|
||||
" X_tr_eye, X_va_eye = X_eye_train[tr_idx], X_eye_train[va_idx] \n",
|
||||
" y_tr, y_va = y_train[tr_idx], y_train[va_idx] \n",
|
||||
" \n",
|
||||
" # Scaler pro Fold \n",
|
||||
" scaler_au = StandardScaler() \n",
|
||||
" scaler_eye = StandardScaler() \n",
|
||||
" \n",
|
||||
" X_tr_au = scaler_au.fit_transform(X_tr_au.reshape(len(X_tr_au), -1)).reshape(X_tr_au.shape) \n",
|
||||
" X_va_au = scaler_au.transform(X_va_au.reshape(len(X_va_au), -1)).reshape(X_va_au.shape) \n",
|
||||
" \n",
|
||||
" X_tr_eye = scaler_eye.fit_transform(X_tr_eye) \n",
|
||||
" X_va_eye = scaler_eye.transform(X_va_eye) \n",
|
||||
" \n",
|
||||
" # Modell \n",
|
||||
" model_cv = build_hybrid_model(len(au_columns), len(eye_columns)) \n",
|
||||
" \n",
|
||||
" callbacks = [ \n",
|
||||
" tf.keras.callbacks.EarlyStopping(monitor=\"val_loss\", patience=10, restore_best_weights=True), \n",
|
||||
" tf.keras.callbacks.ReduceLROnPlateau(monitor=\"val_loss\", factor=0.5, patience=5, min_lr=1e-6) \n",
|
||||
" ] \n",
|
||||
" \n",
|
||||
" history = model_cv.fit( \n",
|
||||
" [X_tr_au, X_tr_eye], y_tr, \n",
|
||||
" validation_data=([X_va_au, X_va_eye], y_va), \n",
|
||||
" epochs=100, \n",
|
||||
" batch_size=16, \n",
|
||||
" verbose=0 \n",
|
||||
" ) \n",
|
||||
" \n",
|
||||
" cv_histories.append(history.history) \n",
|
||||
" \n",
|
||||
" # Evaluation \n",
|
||||
" scores = model_cv.evaluate([X_va_au, X_va_eye], y_va, verbose=0) \n",
|
||||
" cv_results.append(scores) \n",
|
||||
" print(f\"Val Loss={scores[0]:.4f} | Val Acc={scores[1]:.4f} | Val AUC={scores[2]:.4f}\") \n",
|
||||
" \n",
|
||||
" # Konfusionsmatrix pro Fold \n",
|
||||
" y_pred_va = (model_cv.predict([X_va_au, X_va_eye]) > 0.5).astype(int) \n",
|
||||
" cm = confusion_matrix(y_va, y_pred_va) \n",
|
||||
" all_conf_matrices.append(cm) \n",
|
||||
" \n",
|
||||
" plt.figure(figsize=(6,5)) \n",
|
||||
" sns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\", \n",
|
||||
" xticklabels=[\"Pred 0\", \"Pred 1\"], \n",
|
||||
" yticklabels=[\"True 0\", \"True 1\"]) \n",
|
||||
" plt.title(f\"Konfusionsmatrix - Fold {fold+1}\") \n",
|
||||
" plt.show() \n",
|
||||
" \n",
|
||||
"# Aggregierte Konfusionsmatrix \n",
|
||||
"agg_cm = sum(all_conf_matrices) \n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(6,5)) \n",
|
||||
"sns.heatmap(agg_cm, annot=True, fmt=\"d\", cmap=\"Purples\", \n",
|
||||
" xticklabels=[\"Pred 0\", \"Pred 1\"], \n",
|
||||
" yticklabels=[\"True 0\", \"True 1\"]) \n",
|
||||
"plt.title(\"Aggregierte Konfusionsmatrix - alle Folds\") \n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "97df9df1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9eae5c0f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#results\n",
|
||||
"cv_results = np.array(cv_results) \n",
|
||||
"print(\"\\n=== Cross-Validation Ergebnisse ===\") \n",
|
||||
"print(f\"Durchschnittlicher Val-Loss: {cv_results[:,0].mean():.4f}\") \n",
|
||||
"print(f\"Durchschnittliche Val-Accuracy: {cv_results[:,1].mean():.4f}\") \n",
|
||||
"print(f\"Durchschnittliche Val-AUC: {cv_results[:,2].mean():.4f}\")\n",
|
||||
"\n",
|
||||
"#Ergebnis-Tabelle erstellen\n",
|
||||
"results_table = pd.DataFrame({ \n",
|
||||
" \"Fold\": np.arange(1, len(cv_results)+1), \n",
|
||||
" \"Val Loss\": cv_results[:,0], \n",
|
||||
" \"Val Accuracy\": cv_results[:,1], \n",
|
||||
" \"Val AUC\": cv_results[:,2] }) \n",
|
||||
"\n",
|
||||
"# Durchschnittszeile hinzufügen \n",
|
||||
"avg_row = pd.DataFrame({ \n",
|
||||
" \"Fold\": [\"Ø\"], \n",
|
||||
" \"Val Loss\": [cv_results[:,0].mean()], \n",
|
||||
" \"Val Accuracy\": [cv_results[:,1].mean()], \n",
|
||||
" \"Val AUC\": [cv_results[:,2].mean()] \n",
|
||||
"}) \n",
|
||||
"\n",
|
||||
"results_table = pd.concat([results_table, avg_row], ignore_index=True) \n",
|
||||
"\n",
|
||||
"print(\"\\n=== Ergebnis-Tabelle ===\") \n",
|
||||
"print(results_table) \n",
|
||||
"\n",
|
||||
"#Tabelle speichern \n",
|
||||
"results_table.to_csv(\"cnn_crossVal_results.csv\", index=False) \n",
|
||||
"print(\"Ergebnisse gespeichert als 'cnn_crossVal_results.csv'\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "7e564308",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Speichern des Modells"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9afc926b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_cv.save(\"hybrid_fusion_model_Test_group_split.keras\") \n",
|
||||
"joblib.dump(scaler_au, \"scaler_au_Test_group_split.joblib\") \n",
|
||||
"joblib.dump(scaler_eye, \"scaler_eye_Test_group_split.joblib\") \n",
|
||||
"\n",
|
||||
"print(\"Finales Modell gespeichert.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "391af5d5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0bb8c14c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Preprocessing Testdaten \n",
|
||||
"X_au_test_scaled = scaler_au.transform( \n",
|
||||
" X_au_test.reshape(len(X_au_test), -1) \n",
|
||||
").reshape(X_au_test.shape) \n",
|
||||
"\n",
|
||||
"X_eye_test_scaled = scaler_eye.transform(X_eye_test) \n",
|
||||
"\n",
|
||||
"# Vorhersagen \n",
|
||||
"y_prob_test = model_cv.predict([X_au_test_scaled, X_eye_test_scaled]).flatten() \n",
|
||||
"y_pred_test = (y_prob_test > 0.5).astype(int) \n",
|
||||
"\n",
|
||||
"# Konfusionsmatrix \n",
|
||||
"cm_test = confusion_matrix(y_test, y_pred_test) \n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(6,5)) \n",
|
||||
"sns.heatmap(cm_test, annot=True, fmt=\"d\", cmap=\"Greens\", \n",
|
||||
" xticklabels=[\"Pred 0\", \"Pred 1\"], \n",
|
||||
" yticklabels=[\"True 0\", \"True 1\"]) \n",
|
||||
"plt.title(\"Konfusionsmatrix - Testdaten\") \n",
|
||||
"plt.show() \n",
|
||||
"\n",
|
||||
"# ROC \n",
|
||||
"fpr, tpr, _ = roc_curve(y_test, y_prob_test) \n",
|
||||
"roc_auc = auc(fpr, tpr) \n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(7,6)) \n",
|
||||
"plt.plot(fpr, tpr, label=f\"AUC = {roc_auc:.3f}\") \n",
|
||||
"plt.plot([0,1], [0,1], \"k--\") \n",
|
||||
"plt.title(\"ROC - Testdaten\") \n",
|
||||
"plt.legend() \n",
|
||||
"plt.grid(True) \n",
|
||||
"plt.show() \n",
|
||||
"\n",
|
||||
"# Precision-Recall \n",
|
||||
"precision, recall, _ = precision_recall_curve(y_test, y_prob_test) \n",
|
||||
"plt.figure(figsize=(7,6)) \n",
|
||||
"plt.plot(recall, precision) \n",
|
||||
"plt.title(\"Precision-Recall - Testdaten\") \n",
|
||||
"plt.grid(True) \n",
|
||||
"plt.show() \n",
|
||||
"\n",
|
||||
"# Metriken \n",
|
||||
"print(\"Accuracy:\", accuracy_score(y_test, y_pred_test))\n",
|
||||
"print(\"F1-Score:\", f1_score(y_test, y_pred_test)) \n",
|
||||
"print(\"Balanced Accuracy:\", balanced_accuracy_score(y_test, y_pred_test)) \n",
|
||||
"print(\"Precision:\", precision_score(y_test, y_pred_test)) \n",
|
||||
"print(\"Recall:\", recall_score(y_test, y_pred_test)) \n",
|
||||
"print(\"AUC:\", roc_auc)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,308 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d48f2e13",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Importe"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e34b838d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np \n",
|
||||
"import pandas as pd \n",
|
||||
"import joblib \n",
|
||||
"import seaborn as sns \n",
|
||||
"import matplotlib.pyplot as plt \n",
|
||||
"\n",
|
||||
"from sklearn.metrics import ( \n",
|
||||
" confusion_matrix, \n",
|
||||
" roc_curve, auc, \n",
|
||||
" precision_recall_curve, \n",
|
||||
" f1_score, \n",
|
||||
" balanced_accuracy_score \n",
|
||||
")\n",
|
||||
" \n",
|
||||
"import tensorflow as tf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "324554b5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Modell und Scaler laden"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4acc3d2f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = tf.keras.models.load_model(\"hybrid_fusion_model_V2.keras\") \n",
|
||||
"scaler_au = joblib.load(\"scaler_au_V2.joblib\") \n",
|
||||
"scaler_eye = joblib.load(\"scaler_eye_V2.joblib\")\n",
|
||||
"\n",
|
||||
"print(\"Modell & Scaler erfolgreich geladen.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4271cbee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Features laden"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8342ea10",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"au_columns = [...] \n",
|
||||
"eye_columns = [...]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4a58b20c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Preprocessing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b683be47",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def preprocess_sample(df, au_columns, eye_columns, scaler_au, scaler_eye):\n",
|
||||
" # AUs\n",
|
||||
" X_au = df[au_columns].values\n",
|
||||
" X_au = scaler_au.transform(X_au).reshape(len(df), len(au_columns), 1)\n",
|
||||
"\n",
|
||||
" # Eye\n",
|
||||
" X_eye = df[eye_columns].values\n",
|
||||
" X_eye = scaler_eye.transform(X_eye)\n",
|
||||
"\n",
|
||||
" return X_au, X_eye"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9dc99a3d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Predict-Funktion"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "00295aa6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def predict_workload(df, model, au_columns, eye_columns, scaler_au, scaler_eye):\n",
|
||||
" X_au, X_eye = preprocess_sample(df, au_columns, eye_columns, scaler_au, scaler_eye)\n",
|
||||
"\n",
|
||||
" probs = model.predict([X_au, X_eye]).flatten()\n",
|
||||
" preds = (probs > 0.5).astype(int)\n",
|
||||
" \n",
|
||||
" return preds, probs"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5753516b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Testdaten laden"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8875b0ee",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"test_data = pd.read_csv(\"test_data.csv\") # oder direkt aus Notebook 1 exportieren \n",
|
||||
"\n",
|
||||
"X_au_test = test_data[au_columns].values[..., np.newaxis] \n",
|
||||
"X_eye_test = test_data[eye_columns].values \n",
|
||||
"y_test = test_data[\"label\"].values \n",
|
||||
"groups_test = test_data[\"subjectID\"].values \n",
|
||||
"\n",
|
||||
"X_au_test_scaled = scaler_au.transform(X_au_test.reshape(len(X_au_test), -1)).reshape(X_au_test.shape) \n",
|
||||
"X_eye_test_scaled = scaler_eye.transform(X_eye_test)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "332a3a07",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Vorhersagen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b5f58ece",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"y_prob = model.predict([X_au_test_scaled, X_eye_test_scaled]).flatten() \n",
|
||||
"y_pred = (y_prob > 0.5).astype(int)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3bc5c66c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Konfusionsmatrix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "40648dd7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"cm = confusion_matrix(y_test, y_pred) \n",
|
||||
"plt.figure(figsize=(6,5)) \n",
|
||||
"sns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\", \n",
|
||||
" xticklabels=[\"Pred 0\", \"Pred 1\"], \n",
|
||||
" yticklabels=[\"True 0\", \"True 1\"]) \n",
|
||||
"plt.title(\"Konfusionsmatrix - Testdaten\") \n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e79ad8a6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"ROC"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dd93f15c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fpr, tpr, _ = roc_curve(y_test, y_prob) \n",
|
||||
"roc_auc = auc(fpr, tpr) \n",
|
||||
"\n",
|
||||
"plt.figure(figsize=(7,6)) \n",
|
||||
"plt.plot(fpr, tpr, label=f\"AUC = {roc_auc:.3f}\") \n",
|
||||
"plt.plot([0,1], [0,1], \"k--\") \n",
|
||||
"plt.xlabel(\"False Positive Rate\") \n",
|
||||
"plt.ylabel(\"True Positive Rate\") \n",
|
||||
"plt.title(\"ROC‑Kurve – Testdaten\") \n",
|
||||
"plt.legend() \n",
|
||||
"plt.grid(True) \n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2eaaf2a0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Precision-Recall"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "601e5dc9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"precision, recall, _ = precision_recall_curve(y_test, y_prob) \n",
|
||||
"plt.figure(figsize=(7,6)) \n",
|
||||
"plt.plot(recall, precision) \n",
|
||||
"plt.xlabel(\"Recall\") \n",
|
||||
"plt.ylabel(\"Precision\") \n",
|
||||
"plt.title(\"Precision‑Recall‑Kurve – Testdaten\")\n",
|
||||
"plt.grid(True) \n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "270af771",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Scores"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e2e7da5b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"F1‑Score:\", f1_score(y_test, y_pred)) \n",
|
||||
"print(\"Balanced Accuracy:\", balanced_accuracy_score(y_test, y_pred))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c6e22e1a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Subject-Performance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "731aaf73",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_eval = pd.DataFrame({ \n",
|
||||
" \"subject\": groups_test, \n",
|
||||
" \"y_true\": y_test, \n",
|
||||
" \"y_pred\": y_pred \n",
|
||||
"}) \n",
|
||||
"\n",
|
||||
"subject_perf = df_eval.groupby(\"subject\").apply( \n",
|
||||
" lambda x: balanced_accuracy_score(x[\"y_true\"], x[\"y_pred\"]) \n",
|
||||
") \n",
|
||||
"\n",
|
||||
"print(\"\\n=== Balanced Accuracy pro Proband ===\") \n",
|
||||
"print(subject_perf.sort_values())"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@ -107,8 +107,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_path = Path(r\"data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")\n",
|
||||
"# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/120s_combined_dataset_25hz.parquet\")"
|
||||
"dataset_path = Path(r\"data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -476,7 +475,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"normalizer_path=Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/normalizer_min_max_global.pkl')"
|
||||
"normalizer_path=Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/normalizer.pkl')"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -495,7 +494,7 @@
|
||||
"print(len(eye_cols))\n",
|
||||
"all_signal_columns = face_au_cols+eye_cols\n",
|
||||
"print(len(all_signal_columns))\n",
|
||||
"normalizer = fit_normalizer(train_df, all_signal_columns, method='minmax', scope='global')\n",
|
||||
"normalizer = fit_normalizer(train_df, all_signal_columns, method='standard', scope='subject')\n",
|
||||
"save_normalizer(normalizer, normalizer_path )"
|
||||
]
|
||||
},
|
||||
@ -692,10 +691,10 @@
|
||||
"model = build_intermediate_fusion_autoencoder(\n",
|
||||
" input_dim_mod1=len(face_au_cols),\n",
|
||||
" input_dim_mod2=len(eye_cols),\n",
|
||||
" encoder_hidden_dim_mod1=12, # individuell\n",
|
||||
" encoder_hidden_dim_mod2=8, # individuell\n",
|
||||
" latent_dim=4,\n",
|
||||
" dropout_rate=0.7, # einstellbar\n",
|
||||
" encoder_hidden_dim_mod1=15, # individuell\n",
|
||||
" encoder_hidden_dim_mod2=10, # individuell\n",
|
||||
" latent_dim=8,\n",
|
||||
" dropout_rate=0.3, # einstellbar\n",
|
||||
" neg_slope=0.1,\n",
|
||||
" weight_decay=1e-3\n",
|
||||
")\n",
|
||||
@ -709,7 +708,7 @@
|
||||
" \"recon_modality_1\": 1.0,\n",
|
||||
" \"recon_modality_2\": 1.0,\n",
|
||||
" },\n",
|
||||
" optimizer=tf.keras.optimizers.Adam(1e-3)\n",
|
||||
" optimizer=tf.keras.optimizers.Adam(1e-2)\n",
|
||||
" \n",
|
||||
")\n",
|
||||
"\n",
|
||||
@ -740,7 +739,7 @@
|
||||
" \"recon_modality_1\": 1.0,\n",
|
||||
" \"recon_modality_2\": 1.0,\n",
|
||||
" },\n",
|
||||
" optimizer=tf.keras.optimizers.Adam(1e-4),\n",
|
||||
" optimizer=tf.keras.optimizers.Adam(1e-5),\n",
|
||||
")\n",
|
||||
"model.fit(\n",
|
||||
" x=[X_face, X_eye],\n",
|
||||
@ -780,7 +779,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"encoder_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/encoder_8_deep.keras')\n",
|
||||
"encoder_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/encoder_6_deep.keras')\n",
|
||||
"encoder.save(encoder_save_path)"
|
||||
]
|
||||
},
|
||||
@ -944,7 +943,7 @@
|
||||
" return get_radius_from_arrays(nu, X_face, X_eye)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"nu = 0.25\n",
|
||||
"nu = 0.05\n",
|
||||
"\n",
|
||||
"train_dataset = tf.data.Dataset.from_tensor_slices((X_face, X_eye)).shuffle(64).batch(64)\n",
|
||||
"# train_dataset = tf.data.Dataset.from_tensor_slices((X_face, X_eye))\n",
|
||||
@ -1019,7 +1018,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"deep_svdd_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/deep_svdd_06.keras')\n",
|
||||
"deep_svdd_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/deep_svdd_05.keras')\n",
|
||||
"deep_svdd_net.save(deep_svdd_save_path)"
|
||||
]
|
||||
},
|
||||
@ -1076,18 +1075,6 @@
|
||||
"test_predictions = (test_scores > 0).astype(int)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "575dddcf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"normal_acc = np.mean(test_predictions[y_test == 0] == 0)\n",
|
||||
"anomaly_acc = np.mean(test_predictions[y_test == 1] == 1)\n",
|
||||
"print(f'Accuracy on Test set: {accuracy_score(y_test, test_predictions)}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
||||
@ -220,637 +220,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# SET\n",
|
||||
"threshold_mad = 5\n",
|
||||
"threshold_mad = 100\n",
|
||||
"column_praefix ='AU'\n",
|
||||
"\n",
|
||||
"au_columns = [col for col in df.columns if col.startswith(column_praefix)]\n",
|
||||
"cleaned_df = mad_outlier_removal.mad_outlier_removal(df,columns=au_columns, threshold=threshold_mad)\n",
|
||||
"cleaned_df = mad_outlier_removal(df,columns=au_columns, threshold=threshold_mad)\n",
|
||||
"print(cleaned_df.shape)\n",
|
||||
"print(df.shape)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9a6c1732",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### TO DO\n",
|
||||
" * pipeline aus Autoencoder und SVM\n",
|
||||
" * group k fold\n",
|
||||
" * AE überpüfen, loss dokumentieren"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "877309d9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"### Variational Autoencoder with Classifier Head\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import tensorflow as tf\n",
|
||||
"from tensorflow import keras\n",
|
||||
"from tensorflow.keras import layers, Model\n",
|
||||
"from sklearn.model_selection import GroupKFold\n",
|
||||
"from sklearn.preprocessing import StandardScaler\n",
|
||||
"from sklearn.metrics import (\n",
|
||||
" accuracy_score, precision_score, recall_score, f1_score, \n",
|
||||
" roc_auc_score, confusion_matrix, classification_report\n",
|
||||
")\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from collections import defaultdict\n",
|
||||
"\n",
|
||||
"# ============================================================================\n",
|
||||
"# 1. CREATE LABELS\n",
|
||||
"# ============================================================================\n",
|
||||
"\n",
|
||||
"# Low workload: baseline + n-back level 1,4\n",
|
||||
"low_all = cleaned_df[\n",
|
||||
" ((cleaned_df[\"PHASE\"] == \"baseline\") |\n",
|
||||
" ((cleaned_df[\"STUDY\"] == \"n-back\") & (cleaned_df[\"PHASE\"] != \"baseline\") & (cleaned_df[\"LEVEL\"].isin([1,4]))))\n",
|
||||
"].copy()\n",
|
||||
"low_all['label'] = 0\n",
|
||||
"print(f\"Low workload samples: {low_all.shape[0]}\")\n",
|
||||
"\n",
|
||||
"# High workload n-back: level 2,3,5,6\n",
|
||||
"high_nback = cleaned_df[\n",
|
||||
" (cleaned_df[\"STUDY\"]==\"n-back\") &\n",
|
||||
" (cleaned_df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n",
|
||||
" (cleaned_df[\"PHASE\"].isin([\"train\", \"test\"]))\n",
|
||||
"].copy()\n",
|
||||
"high_nback['label'] = 1\n",
|
||||
"print(f\"High n-back samples: {high_nback.shape[0]}\")\n",
|
||||
"\n",
|
||||
"# High workload k-drive\n",
|
||||
"high_kdrive = cleaned_df[\n",
|
||||
" (cleaned_df[\"STUDY\"] == \"k-drive\") & (cleaned_df[\"PHASE\"] != \"baseline\")\n",
|
||||
"].copy()\n",
|
||||
"high_kdrive['label'] = 1\n",
|
||||
"print(f\"High k-drive samples: {high_kdrive.shape[0]}\")\n",
|
||||
"\n",
|
||||
"# Combine all high workload\n",
|
||||
"high_all = pd.concat([high_nback, high_kdrive])\n",
|
||||
"print(f\"Total high workload samples: {high_all.shape[0]}\")\n",
|
||||
"\n",
|
||||
"# Complete labeled dataset\n",
|
||||
"labeled_df = pd.concat([low_all, high_all]).reset_index(drop=True)\n",
|
||||
"print(f\"\\nTotal labeled samples: {labeled_df.shape[0]}\")\n",
|
||||
"print(f\"Class distribution:\\n{labeled_df['label'].value_counts()}\")\n",
|
||||
"\n",
|
||||
"# ============================================================================\n",
|
||||
"# 2. TRAIN/TEST SPLIT BY SUBJECTS\n",
|
||||
"# ============================================================================\n",
|
||||
"\n",
|
||||
"train_df = labeled_df[labeled_df['subjectID'].isin(training_subjects)].copy()\n",
|
||||
"test_df = labeled_df[labeled_df['subjectID'].isin(test_subjects)].copy()\n",
|
||||
"\n",
|
||||
"print(f\"\\nTraining subjects: {training_subjects}\")\n",
|
||||
"print(f\"Test subjects: {test_subjects}\")\n",
|
||||
"print(f\"Train samples: {train_df.shape[0]}, Test samples: {test_df.shape[0]}\")\n",
|
||||
"\n",
|
||||
"# Extract features and labels\n",
|
||||
"au_columns = [col for col in labeled_df.columns if col.startswith('AU')]\n",
|
||||
"print(f\"\\nUsing {len(au_columns)} AU features: {au_columns}\")\n",
|
||||
"\n",
|
||||
"X_train = train_df[au_columns].values\n",
|
||||
"y_train = train_df['label'].values\n",
|
||||
"groups_train = train_df['subjectID'].values\n",
|
||||
"\n",
|
||||
"X_test = test_df[au_columns].values\n",
|
||||
"y_test = test_df['label'].values\n",
|
||||
"\n",
|
||||
"# Normalize features\n",
|
||||
"scaler = StandardScaler()\n",
|
||||
"X_train_scaled = scaler.fit_transform(X_train)\n",
|
||||
"X_test_scaled = scaler.transform(X_test)\n",
|
||||
"\n",
|
||||
"print(f\"\\nTrain class distribution: {np.bincount(y_train)}\")\n",
|
||||
"print(f\"Test class distribution: {np.bincount(y_test)}\")\n",
|
||||
"\n",
|
||||
"# ============================================================================\n",
|
||||
"# 3. VAE WITH CLASSIFIER HEAD MODEL\n",
|
||||
"# ============================================================================\n",
|
||||
"\n",
|
||||
"class Sampling(layers.Layer):\n",
|
||||
" \"\"\"Reparameterization trick for VAE\"\"\"\n",
|
||||
" def call(self, inputs):\n",
|
||||
" z_mean, z_log_var = inputs\n",
|
||||
" batch = tf.shape(z_mean)[0]\n",
|
||||
" dim = tf.shape(z_mean)[1]\n",
|
||||
" epsilon = tf.random.normal(shape=(batch, dim))\n",
|
||||
" return z_mean + tf.exp(0.5 * z_log_var) * epsilon\n",
|
||||
"\n",
|
||||
"def build_vae_classifier(input_dim, latent_dim, encoder_dims=[32, 16], \n",
|
||||
" decoder_dims=[16, 32], classifier_dims=[16]):\n",
|
||||
" \"\"\"\n",
|
||||
" Build VAE with classifier head\n",
|
||||
" \n",
|
||||
" Args:\n",
|
||||
" input_dim: Number of input features (20 AUs)\n",
|
||||
" latent_dim: Dimension of latent space (2-5)\n",
|
||||
" encoder_dims: Hidden layer sizes for encoder\n",
|
||||
" decoder_dims: Hidden layer sizes for decoder\n",
|
||||
" classifier_dims: Hidden layer sizes for classifier\n",
|
||||
" \"\"\"\n",
|
||||
" \n",
|
||||
" # ---- ENCODER ----\n",
|
||||
" encoder_inputs = keras.Input(shape=(input_dim,), name='encoder_input')\n",
|
||||
" x = encoder_inputs\n",
|
||||
" \n",
|
||||
" for i, dim in enumerate(encoder_dims):\n",
|
||||
" x = layers.Dense(dim, activation='relu', name=f'encoder_dense_{i}')(x)\n",
|
||||
" x = layers.BatchNormalization(name=f'encoder_bn_{i}')(x)\n",
|
||||
" x = layers.Dropout(0.2, name=f'encoder_dropout_{i}')(x)\n",
|
||||
" \n",
|
||||
" z_mean = layers.Dense(latent_dim, name='z_mean')(x)\n",
|
||||
" z_log_var = layers.Dense(latent_dim, name='z_log_var')(x)\n",
|
||||
" z = Sampling()([z_mean, z_log_var])\n",
|
||||
" \n",
|
||||
" encoder = Model(encoder_inputs, [z_mean, z_log_var, z], name='encoder')\n",
|
||||
" \n",
|
||||
" # ---- DECODER ----\n",
|
||||
" latent_inputs = keras.Input(shape=(latent_dim,), name='latent_input')\n",
|
||||
" x = latent_inputs\n",
|
||||
" \n",
|
||||
" for i, dim in enumerate(decoder_dims):\n",
|
||||
" x = layers.Dense(dim, activation='relu', name=f'decoder_dense_{i}')(x)\n",
|
||||
" x = layers.BatchNormalization(name=f'decoder_bn_{i}')(x)\n",
|
||||
" \n",
|
||||
" decoder_outputs = layers.Dense(input_dim, activation='linear', name='decoder_output')(x)\n",
|
||||
" decoder = Model(latent_inputs, decoder_outputs, name='decoder')\n",
|
||||
" \n",
|
||||
" # ---- CLASSIFIER HEAD ----\n",
|
||||
" x = latent_inputs\n",
|
||||
" for i, dim in enumerate(classifier_dims):\n",
|
||||
" x = layers.Dense(dim, activation='relu', name=f'classifier_dense_{i}')(x)\n",
|
||||
" x = layers.Dropout(0.3, name=f'classifier_dropout_{i}')(x)\n",
|
||||
" \n",
|
||||
" classifier_output = layers.Dense(1, activation='sigmoid', name='classifier_output')(x)\n",
|
||||
" classifier = Model(latent_inputs, classifier_output, name='classifier')\n",
|
||||
" \n",
|
||||
" # ---- FULL MODEL ----\n",
|
||||
" inputs = keras.Input(shape=(input_dim,), name='vae_input')\n",
|
||||
" z_mean, z_log_var, z = encoder(inputs)\n",
|
||||
" reconstructed = decoder(z)\n",
|
||||
" classification = classifier(z)\n",
|
||||
" \n",
|
||||
" model = Model(inputs, [reconstructed, classification], name='vae_classifier')\n",
|
||||
" \n",
|
||||
" return model, encoder, decoder, classifier\n",
|
||||
"\n",
|
||||
"# ============================================================================\n",
|
||||
"# 4. CUSTOM TRAINING LOOP WITH COMBINED LOSS\n",
|
||||
"# ============================================================================\n",
|
||||
"\n",
|
||||
"class VAEClassifier(keras.Model):\n",
|
||||
" def __init__(self, encoder, decoder, classifier, **kwargs):\n",
|
||||
" super().__init__(**kwargs)\n",
|
||||
" self.encoder = encoder\n",
|
||||
" self.decoder = decoder\n",
|
||||
" self.classifier = classifier\n",
|
||||
" self.total_loss_tracker = keras.metrics.Mean(name=\"total_loss\")\n",
|
||||
" self.reconstruction_loss_tracker = keras.metrics.Mean(name=\"reconstruction_loss\")\n",
|
||||
" self.kl_loss_tracker = keras.metrics.Mean(name=\"kl_loss\")\n",
|
||||
" self.classification_loss_tracker = keras.metrics.Mean(name=\"classification_loss\")\n",
|
||||
" self.accuracy_tracker = keras.metrics.BinaryAccuracy(name=\"accuracy\")\n",
|
||||
" \n",
|
||||
" @property\n",
|
||||
" def metrics(self):\n",
|
||||
" return [\n",
|
||||
" self.total_loss_tracker,\n",
|
||||
" self.reconstruction_loss_tracker,\n",
|
||||
" self.kl_loss_tracker,\n",
|
||||
" self.classification_loss_tracker,\n",
|
||||
" self.accuracy_tracker,\n",
|
||||
" ]\n",
|
||||
" \n",
|
||||
" def train_step(self, data):\n",
|
||||
" x, y = data\n",
|
||||
" \n",
|
||||
" with tf.GradientTape() as tape:\n",
|
||||
" # Forward pass\n",
|
||||
" z_mean, z_log_var, z = self.encoder(x, training=True)\n",
|
||||
" reconstruction = self.decoder(z, training=True)\n",
|
||||
" classification = self.classifier(z, training=True)\n",
|
||||
" \n",
|
||||
" # Reconstruction loss (MSE)\n",
|
||||
" reconstruction_loss = tf.reduce_mean(\n",
|
||||
" keras.losses.mse(x, reconstruction))\n",
|
||||
" \n",
|
||||
" # KL divergence loss\n",
|
||||
" kl_loss = -0.5 * tf.reduce_mean(\n",
|
||||
" tf.reduce_sum(\n",
|
||||
" 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var),\n",
|
||||
" axis=1\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Classification loss (binary crossentropy)\n",
|
||||
" # Classification loss (binary crossentropy)\n",
|
||||
" classification_loss = tf.reduce_mean(\n",
|
||||
" keras.losses.binary_crossentropy(tf.expand_dims(y, -1), classification)\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Combined loss with weights\n",
|
||||
" total_loss = reconstruction_loss + kl_loss + classification_loss\n",
|
||||
" \n",
|
||||
" # Backpropagation\n",
|
||||
" grads = tape.gradient(total_loss, self.trainable_weights)\n",
|
||||
" self.optimizer.apply_gradients(zip(grads, self.trainable_weights))\n",
|
||||
" \n",
|
||||
" # Update metrics\n",
|
||||
" self.total_loss_tracker.update_state(total_loss)\n",
|
||||
" self.reconstruction_loss_tracker.update_state(reconstruction_loss)\n",
|
||||
" self.kl_loss_tracker.update_state(kl_loss)\n",
|
||||
" self.classification_loss_tracker.update_state(classification_loss)\n",
|
||||
" self.accuracy_tracker.update_state(y, classification)\n",
|
||||
" \n",
|
||||
" return {\n",
|
||||
" \"total_loss\": self.total_loss_tracker.result(),\n",
|
||||
" \"reconstruction_loss\": self.reconstruction_loss_tracker.result(),\n",
|
||||
" \"kl_loss\": self.kl_loss_tracker.result(),\n",
|
||||
" \"classification_loss\": self.classification_loss_tracker.result(),\n",
|
||||
" \"accuracy\": self.accuracy_tracker.result(),\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" def test_step(self, data):\n",
|
||||
" x, y = data\n",
|
||||
" \n",
|
||||
" z_mean, z_log_var, z = self.encoder(x, training=False)\n",
|
||||
" reconstruction = self.decoder(z, training=False)\n",
|
||||
" classification = self.classifier(z, training=False)\n",
|
||||
" \n",
|
||||
" # Reconstruction loss (MSE)\n",
|
||||
" reconstruction_loss = tf.reduce_mean(\n",
|
||||
" keras.losses.mse(x, reconstruction))\n",
|
||||
" kl_loss = -0.5 * tf.reduce_mean(\n",
|
||||
" tf.reduce_sum(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=1)\n",
|
||||
" )\n",
|
||||
" # Classification loss (binary crossentropy)\n",
|
||||
" classification_loss = tf.reduce_mean(\n",
|
||||
" keras.losses.binary_crossentropy(tf.expand_dims(y, -1), classification)\n",
|
||||
" )\n",
|
||||
" total_loss = reconstruction_loss + kl_loss + classification_loss\n",
|
||||
" \n",
|
||||
" self.total_loss_tracker.update_state(total_loss)\n",
|
||||
" self.reconstruction_loss_tracker.update_state(reconstruction_loss)\n",
|
||||
" self.kl_loss_tracker.update_state(kl_loss)\n",
|
||||
" self.classification_loss_tracker.update_state(classification_loss)\n",
|
||||
" self.accuracy_tracker.update_state(y, classification)\n",
|
||||
" \n",
|
||||
" return {\n",
|
||||
" \"total_loss\": self.total_loss_tracker.result(),\n",
|
||||
" \"reconstruction_loss\": self.reconstruction_loss_tracker.result(),\n",
|
||||
" \"kl_loss\": self.kl_loss_tracker.result(),\n",
|
||||
" \"classification_loss\": self.classification_loss_tracker.result(),\n",
|
||||
" \"accuracy\": self.accuracy_tracker.result(),\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"# ============================================================================\n",
|
||||
"# 5. GROUP K-FOLD CROSS-VALIDATION WITH GRID SEARCH\n",
|
||||
"# ============================================================================\n",
|
||||
"\n",
|
||||
"# Hyperparameter grid\n",
|
||||
"param_grid = {\n",
|
||||
" 'latent_dim': [2, 5],\n",
|
||||
" 'encoder_dims': [[32, 16], [64, 32]],\n",
|
||||
" 'learning_rate': [0.001, 0.005],\n",
|
||||
" 'batch_size': [32, 64],\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"# Generate all combinations\n",
|
||||
"from itertools import product\n",
|
||||
"keys = param_grid.keys()\n",
|
||||
"values = param_grid.values()\n",
|
||||
"param_combinations = [dict(zip(keys, v)) for v in product(*values)]\n",
|
||||
"\n",
|
||||
"print(f\"\\nTotal hyperparameter combinations: {len(param_combinations)}\")\n",
|
||||
"\n",
|
||||
"# Group K-Fold setup\n",
|
||||
"n_splits = 5\n",
|
||||
"gkf = GroupKFold(n_splits=n_splits)\n",
|
||||
"\n",
|
||||
"# Store results\n",
|
||||
"cv_results = []\n",
|
||||
"\n",
|
||||
"# Grid search with cross-validation\n",
|
||||
"for idx, params in enumerate(param_combinations):\n",
|
||||
" print(f\"\\n{'='*80}\")\n",
|
||||
" print(f\"Testing combination {idx+1}/{len(param_combinations)}: {params}\")\n",
|
||||
" print(f\"{'='*80}\")\n",
|
||||
" \n",
|
||||
" fold_results = []\n",
|
||||
" \n",
|
||||
" for fold, (train_idx, val_idx) in enumerate(gkf.split(X_train_scaled, y_train, groups_train)):\n",
|
||||
" print(f\"\\nFold {fold+1}/{n_splits}\")\n",
|
||||
" \n",
|
||||
" X_fold_train, X_fold_val = X_train_scaled[train_idx], X_train_scaled[val_idx]\n",
|
||||
" y_fold_train, y_fold_val = y_train[train_idx], y_train[val_idx]\n",
|
||||
" \n",
|
||||
" # Build model\n",
|
||||
" model, encoder, decoder, classifier = build_vae_classifier(\n",
|
||||
" input_dim=len(au_columns),\n",
|
||||
" latent_dim=params['latent_dim'],\n",
|
||||
" encoder_dims=params['encoder_dims'],\n",
|
||||
" decoder_dims=list(reversed(params['encoder_dims'])),\n",
|
||||
" classifier_dims=[16]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" vae_classifier = VAEClassifier(encoder, decoder, classifier)\n",
|
||||
" vae_classifier.compile(optimizer=keras.optimizers.Adam(params['learning_rate']))\n",
|
||||
" \n",
|
||||
" # Early stopping\n",
|
||||
" early_stop = keras.callbacks.EarlyStopping(\n",
|
||||
" monitor='val_total_loss',\n",
|
||||
" patience=10,\n",
|
||||
" restore_best_weights=True,\n",
|
||||
" mode='min'\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Train\n",
|
||||
" history = vae_classifier.fit(\n",
|
||||
" X_fold_train, y_fold_train,\n",
|
||||
" validation_data=(X_fold_val, y_fold_val),\n",
|
||||
" epochs=60,\n",
|
||||
" batch_size=params['batch_size'],\n",
|
||||
" callbacks=[early_stop],\n",
|
||||
" verbose=0\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Evaluate on validation fold\n",
|
||||
" z_mean_val, _, _ = encoder.predict(X_fold_val, verbose=0)\n",
|
||||
" y_pred_proba = classifier.predict(z_mean_val, verbose=0).flatten()\n",
|
||||
" y_pred = (y_pred_proba > 0.5).astype(int)\n",
|
||||
" \n",
|
||||
" fold_metrics = {\n",
|
||||
" 'accuracy': accuracy_score(y_fold_val, y_pred),\n",
|
||||
" 'precision': precision_score(y_fold_val, y_pred, zero_division=0),\n",
|
||||
" 'recall': recall_score(y_fold_val, y_pred, zero_division=0),\n",
|
||||
" 'f1': f1_score(y_fold_val, y_pred, zero_division=0),\n",
|
||||
" 'roc_auc': roc_auc_score(y_fold_val, y_pred_proba),\n",
|
||||
" 'final_recon_loss': history.history['val_reconstruction_loss'][-1],\n",
|
||||
" 'final_kl_loss': history.history['val_kl_loss'][-1],\n",
|
||||
" 'final_class_loss': history.history['val_classification_loss'][-1],\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" fold_results.append(fold_metrics)\n",
|
||||
" print(f\" Accuracy: {fold_metrics['accuracy']:.4f}, F1: {fold_metrics['f1']:.4f}, AUC: {fold_metrics['roc_auc']:.4f}\")\n",
|
||||
" \n",
|
||||
" # Clear session to free memory\n",
|
||||
" keras.backend.clear_session()\n",
|
||||
" \n",
|
||||
" # Average across folds\n",
|
||||
" avg_results = {\n",
|
||||
" 'params': params,\n",
|
||||
" 'mean_accuracy': np.mean([r['accuracy'] for r in fold_results]),\n",
|
||||
" 'std_accuracy': np.std([r['accuracy'] for r in fold_results]),\n",
|
||||
" 'mean_f1': np.mean([r['f1'] for r in fold_results]),\n",
|
||||
" 'std_f1': np.std([r['f1'] for r in fold_results]),\n",
|
||||
" 'mean_roc_auc': np.mean([r['roc_auc'] for r in fold_results]),\n",
|
||||
" 'std_roc_auc': np.std([r['roc_auc'] for r in fold_results]),\n",
|
||||
" 'mean_recon_loss': np.mean([r['final_recon_loss'] for r in fold_results]),\n",
|
||||
" 'mean_kl_loss': np.mean([r['final_kl_loss'] for r in fold_results]),\n",
|
||||
" 'mean_class_loss': np.mean([r['final_class_loss'] for r in fold_results]),\n",
|
||||
" 'fold_results': fold_results\n",
|
||||
" }\n",
|
||||
" \n",
|
||||
" cv_results.append(avg_results)\n",
|
||||
" \n",
|
||||
" print(f\"\\nMean CV Accuracy: {avg_results['mean_accuracy']:.4f} ± {avg_results['std_accuracy']:.4f}\")\n",
|
||||
" print(f\"Mean CV F1: {avg_results['mean_f1']:.4f} ± {avg_results['std_f1']:.4f}\")\n",
|
||||
" print(f\"Mean CV AUC: {avg_results['mean_roc_auc']:.4f} ± {avg_results['std_roc_auc']:.4f}\")\n",
|
||||
"\n",
|
||||
"# ============================================================================\n",
|
||||
"# 6. SELECT BEST MODEL AND EVALUATE ON TEST SET\n",
|
||||
"# ============================================================================\n",
|
||||
"\n",
|
||||
"# Find best hyperparameters based on mean F1 score\n",
|
||||
"best_idx = np.argmax([r['mean_f1'] for r in cv_results])\n",
|
||||
"best_params = cv_results[best_idx]['params']\n",
|
||||
"\n",
|
||||
"print(f\"\\n{'='*80}\")\n",
|
||||
"print(\"BEST HYPERPARAMETERS (based on CV F1 score):\")\n",
|
||||
"print(f\"{'='*80}\")\n",
|
||||
"for key, value in best_params.items():\n",
|
||||
" print(f\"{key}: {value}\")\n",
|
||||
"print(f\"\\nCV Performance:\")\n",
|
||||
"print(f\" Accuracy: {cv_results[best_idx]['mean_accuracy']:.4f} ± {cv_results[best_idx]['std_accuracy']:.4f}\")\n",
|
||||
"print(f\" F1 Score: {cv_results[best_idx]['mean_f1']:.4f} ± {cv_results[best_idx]['std_f1']:.4f}\")\n",
|
||||
"print(f\" ROC-AUC: {cv_results[best_idx]['mean_roc_auc']:.4f} ± {cv_results[best_idx]['std_roc_auc']:.4f}\")\n",
|
||||
"\n",
|
||||
"# Train final model on all training data\n",
|
||||
"print(f\"\\n{'='*80}\")\n",
|
||||
"print(\"TRAINING FINAL MODEL ON ALL TRAINING DATA\")\n",
|
||||
"print(f\"{'='*80}\")\n",
|
||||
"\n",
|
||||
"final_model, final_encoder, final_decoder, final_classifier = build_vae_classifier(\n",
|
||||
" input_dim=len(au_columns),\n",
|
||||
" latent_dim=best_params['latent_dim'],\n",
|
||||
" encoder_dims=best_params['encoder_dims'],\n",
|
||||
" decoder_dims=list(reversed(best_params['encoder_dims'])),\n",
|
||||
" classifier_dims=[16]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"final_vae_classifier = VAEClassifier(final_encoder, final_decoder, final_classifier)\n",
|
||||
"final_vae_classifier.compile(optimizer=keras.optimizers.Adam(best_params['learning_rate']))\n",
|
||||
"\n",
|
||||
"final_history = final_vae_classifier.fit(\n",
|
||||
" X_train_scaled, y_train,\n",
|
||||
" validation_split=0.2,\n",
|
||||
" epochs=100,\n",
|
||||
" batch_size=best_params['batch_size'],\n",
|
||||
" callbacks=[keras.callbacks.EarlyStopping(monitor='val_total_loss', patience=15, restore_best_weights=True, mode='min')],\n",
|
||||
" verbose=1\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Evaluate on held-out test set\n",
|
||||
"print(f\"\\n{'='*80}\")\n",
|
||||
"print(\"EVALUATION ON HELD-OUT TEST SET\")\n",
|
||||
"print(f\"{'='*80}\")\n",
|
||||
"\n",
|
||||
"z_mean_test, _, _ = final_encoder.predict(X_test_scaled, verbose=0)\n",
|
||||
"y_test_pred_proba = final_classifier.predict(z_mean_test, verbose=0).flatten()\n",
|
||||
"y_test_pred = (y_test_pred_proba > 0.5).astype(int)\n",
|
||||
"\n",
|
||||
"test_metrics = {\n",
|
||||
" 'accuracy': accuracy_score(y_test, y_test_pred),\n",
|
||||
" 'precision': precision_score(y_test, y_test_pred),\n",
|
||||
" 'recall': recall_score(y_test, y_test_pred),\n",
|
||||
" 'f1': f1_score(y_test, y_test_pred),\n",
|
||||
" 'roc_auc': roc_auc_score(y_test, y_test_pred_proba),\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"print(\"\\nTest Set Performance:\")\n",
|
||||
"for metric, value in test_metrics.items():\n",
|
||||
" print(f\" {metric.capitalize()}: {value:.4f}\")\n",
|
||||
"\n",
|
||||
"print(\"\\nConfusion Matrix:\")\n",
|
||||
"print(confusion_matrix(y_test, y_test_pred))\n",
|
||||
"\n",
|
||||
"print(\"\\nClassification Report:\")\n",
|
||||
"print(classification_report(y_test, y_test_pred, target_names=['Low Workload', 'High Workload']))\n",
|
||||
"\n",
|
||||
"# ============================================================================\n",
|
||||
"# 7. VISUALIZATION\n",
|
||||
"# ============================================================================\n",
|
||||
"\n",
|
||||
"# Plot training history\n",
|
||||
"fig, axes = plt.subplots(2, 2, figsize=(15, 10))\n",
|
||||
"\n",
|
||||
"axes[0, 0].plot(final_history.history['reconstruction_loss'], label='Train')\n",
|
||||
"axes[0, 0].plot(final_history.history['val_reconstruction_loss'], label='Val')\n",
|
||||
"axes[0, 0].set_title('Reconstruction Loss')\n",
|
||||
"axes[0, 0].set_xlabel('Epoch')\n",
|
||||
"axes[0, 0].set_ylabel('Loss')\n",
|
||||
"axes[0, 0].legend()\n",
|
||||
"axes[0, 0].grid(True)\n",
|
||||
"\n",
|
||||
"axes[0, 1].plot(final_history.history['kl_loss'], label='Train')\n",
|
||||
"axes[0, 1].plot(final_history.history['val_kl_loss'], label='Val')\n",
|
||||
"axes[0, 1].set_title('KL Divergence Loss')\n",
|
||||
"axes[0, 1].set_xlabel('Epoch')\n",
|
||||
"axes[0, 1].set_ylabel('Loss')\n",
|
||||
"axes[0, 1].legend()\n",
|
||||
"axes[0, 1].grid(True)\n",
|
||||
"\n",
|
||||
"axes[1, 0].plot(final_history.history['classification_loss'], label='Train')\n",
|
||||
"axes[1, 0].plot(final_history.history['val_classification_loss'], label='Val')\n",
|
||||
"axes[1, 0].set_title('Classification Loss')\n",
|
||||
"axes[1, 0].set_xlabel('Epoch')\n",
|
||||
"axes[1, 0].set_ylabel('Loss')\n",
|
||||
"axes[1, 0].legend()\n",
|
||||
"axes[1, 0].grid(True)\n",
|
||||
"\n",
|
||||
"axes[1, 1].plot(final_history.history['accuracy'], label='Train')\n",
|
||||
"axes[1, 1].plot(final_history.history['val_accuracy'], label='Val')\n",
|
||||
"axes[1, 1].set_title('Classification Accuracy')\n",
|
||||
"axes[1, 1].set_xlabel('Epoch')\n",
|
||||
"axes[1, 1].set_ylabel('Accuracy')\n",
|
||||
"axes[1, 1].legend()\n",
|
||||
"axes[1, 1].grid(True)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Visualize latent space (if 2D or 3D)\n",
|
||||
"if best_params['latent_dim'] == 2:\n",
|
||||
" z_mean_train, _, _ = final_encoder.predict(X_train_scaled, verbose=0)\n",
|
||||
" \n",
|
||||
" plt.figure(figsize=(10, 8))\n",
|
||||
" scatter = plt.scatter(z_mean_train[:, 0], z_mean_train[:, 1], \n",
|
||||
" c=y_train, cmap='RdYlBu', alpha=0.6, edgecolors='k')\n",
|
||||
" plt.colorbar(scatter, label='Workload (0=Low, 1=High)')\n",
|
||||
" plt.xlabel('Latent Dimension 1')\n",
|
||||
" plt.ylabel('Latent Dimension 2')\n",
|
||||
" plt.title('2D Latent Space Representation (Training Data)')\n",
|
||||
" plt.grid(True, alpha=0.3)\n",
|
||||
" plt.show()\n",
|
||||
" \n",
|
||||
" # Test set latent space\n",
|
||||
" plt.figure(figsize=(10, 8))\n",
|
||||
" scatter = plt.scatter(z_mean_test[:, 0], z_mean_test[:, 1], \n",
|
||||
" c=y_test, cmap='RdYlBu', alpha=0.6, edgecolors='k')\n",
|
||||
" plt.colorbar(scatter, label='Workload (0=Low, 1=High)')\n",
|
||||
" plt.xlabel('Latent Dimension 1')\n",
|
||||
" plt.ylabel('Latent Dimension 2')\n",
|
||||
" plt.title('2D Latent Space Representation (Test Data)')\n",
|
||||
" plt.grid(True, alpha=0.3)\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"print(\"\\n\" + \"=\"*80)\n",
|
||||
"print(\"TRAINING COMPLETE!\")\n",
|
||||
"print(\"=\"*80)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "79bcfc58",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"### Save Trained VAE Classifier Model\n",
|
||||
"from pathlib import Path\n",
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"# Define save path\n",
|
||||
"model_dir = Path(\"/home/jovyan/data-paulusjafahrsimulator-gpu/trained_models\")\n",
|
||||
"model_dir.mkdir(parents=True, exist_ok=True)\n",
|
||||
"\n",
|
||||
"timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
|
||||
"model_path = model_dir / f\"vae_classifier_{timestamp}.keras\"\n",
|
||||
"\n",
|
||||
"# Save the complete model\n",
|
||||
"final_vae_classifier.save(model_path)\n",
|
||||
"\n",
|
||||
"print(f\"Model saved to: {model_path}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d700e517",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "30d8d100",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"### Plot Confusion Matrix for Final Model\n",
|
||||
"from sklearn.metrics import ConfusionMatrixDisplay\n",
|
||||
"x = Path(\"/home/jovyan/data-paulusjafahrsimulator-gpu/trained_models/vae_classifier_20251210_230121.keras\")\n",
|
||||
"# Load the saved model\n",
|
||||
"print(f\"Loading model from: {x}\")\n",
|
||||
"# loaded_vae_classifier = tf.keras.models.load_model(x)\n",
|
||||
"loaded_vae_classifier = final_vae_classifier\n",
|
||||
"print(\"✓ Model loaded successfully!\")\n",
|
||||
"\n",
|
||||
"# Extract encoder and classifier from loaded model\n",
|
||||
"loaded_encoder = loaded_vae_classifier.encoder\n",
|
||||
"loaded_classifier = loaded_vae_classifier.classifier\n",
|
||||
"\n",
|
||||
"# Get predictions on test set\n",
|
||||
"z_mean_test, _, _ = loaded_encoder.predict(X_test_scaled, verbose=0)\n",
|
||||
"y_test_pred_proba = loaded_classifier.predict(z_mean_test, verbose=0).flatten()\n",
|
||||
"y_test_pred = (y_test_pred_proba > 0.5).astype(int)\n",
|
||||
"\n",
|
||||
"# Create and plot confusion matrix\n",
|
||||
"cm = confusion_matrix(y_test, y_test_pred)\n",
|
||||
"disp = ConfusionMatrixDisplay(confusion_matrix=cm, \n",
|
||||
" display_labels=['Low Workload', 'High Workload'])\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(figsize=(8, 6))\n",
|
||||
"disp.plot(ax=ax, cmap='Blues', values_format='d')\n",
|
||||
"plt.title('Confusion Matrix - Test Set (Loaded Model)')\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Print metrics\n",
|
||||
"print(f\"\\nTest Set Performance (Loaded Model):\")\n",
|
||||
"print(f\" Accuracy: {accuracy_score(y_test, y_test_pred):.4f}\")\n",
|
||||
"print(f\" Precision: {precision_score(y_test, y_test_pred):.4f}\")\n",
|
||||
"print(f\" Recall: {recall_score(y_test, y_test_pred):.4f}\")\n",
|
||||
"print(f\" F1 Score: {f1_score(y_test, y_test_pred):.4f}\")\n",
|
||||
"print(f\" ROC-AUC: {roc_auc_score(y_test, y_test_pred_proba):.4f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "e826a998",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"TO DO\n",
|
||||
" * autoencoder langsam anfangen mit 19 schichten\n",
|
||||
" * dann AE und SVM mit hybridem training wie bei claude?!\n",
|
||||
" * dataset aus eyetracking verwenden?"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
import pickle
|
||||
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
||||
import numpy as np
|
||||
import os
|
||||
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
||||
import pandas as pd
|
||||
|
||||
def fit_normalizer(train_data, au_columns, method='standard', scope='global'):
|
||||
"""
|
||||
@ -21,8 +19,9 @@ def fit_normalizer(train_data, au_columns, method='standard', scope='global'):
|
||||
Returns:
|
||||
--------
|
||||
dict
|
||||
Dictionary containing fitted scalers and statistics for new subjects
|
||||
Dictionary containing fitted scalers
|
||||
"""
|
||||
# Select scaler based on method
|
||||
if method == 'standard':
|
||||
Scaler = StandardScaler
|
||||
elif method == 'minmax':
|
||||
@ -31,54 +30,19 @@ def fit_normalizer(train_data, au_columns, method='standard', scope='global'):
|
||||
raise ValueError("method must be 'standard' or 'minmax'")
|
||||
|
||||
scalers = {}
|
||||
|
||||
if scope == 'subject':
|
||||
# Fit one scaler per subject
|
||||
subject_stats = []
|
||||
|
||||
for subject in train_data['subjectID'].unique():
|
||||
subject_mask = train_data['subjectID'] == subject
|
||||
scaler = Scaler()
|
||||
scaler.fit(train_data.loc[subject_mask, au_columns].values)
|
||||
scaler.fit(train_data.loc[subject_mask, au_columns])
|
||||
scalers[subject] = scaler
|
||||
|
||||
# Store statistics for averaging
|
||||
if method == 'standard':
|
||||
subject_stats.append({
|
||||
'mean': scaler.mean_,
|
||||
'std': scaler.scale_
|
||||
})
|
||||
elif method == 'minmax':
|
||||
subject_stats.append({
|
||||
'min': scaler.data_min_,
|
||||
'max': scaler.data_max_
|
||||
})
|
||||
|
||||
# Calculate average statistics for new subjects
|
||||
if method == 'standard':
|
||||
avg_mean = np.mean([s['mean'] for s in subject_stats], axis=0)
|
||||
avg_std = np.mean([s['std'] for s in subject_stats], axis=0)
|
||||
fallback_scaler = StandardScaler()
|
||||
fallback_scaler.mean_ = avg_mean
|
||||
fallback_scaler.scale_ = avg_std
|
||||
fallback_scaler.var_ = avg_std ** 2
|
||||
fallback_scaler.n_features_in_ = len(au_columns)
|
||||
elif method == 'minmax':
|
||||
avg_min = np.mean([s['min'] for s in subject_stats], axis=0)
|
||||
avg_max = np.mean([s['max'] for s in subject_stats], axis=0)
|
||||
fallback_scaler = MinMaxScaler()
|
||||
fallback_scaler.data_min_ = avg_min
|
||||
fallback_scaler.data_max_ = avg_max
|
||||
fallback_scaler.data_range_ = avg_max - avg_min
|
||||
fallback_scaler.scale_ = 1.0 / fallback_scaler.data_range_
|
||||
fallback_scaler.min_ = -avg_min * fallback_scaler.scale_
|
||||
fallback_scaler.n_features_in_ = len(au_columns)
|
||||
|
||||
scalers['_fallback'] = fallback_scaler
|
||||
|
||||
elif scope == 'global':
|
||||
# Fit one scaler for all subjects
|
||||
scaler = Scaler()
|
||||
scaler.fit(train_data[au_columns].values)
|
||||
scaler.fit(train_data[au_columns])
|
||||
scalers['global'] = scaler
|
||||
|
||||
else:
|
||||
@ -86,7 +50,7 @@ def fit_normalizer(train_data, au_columns, method='standard', scope='global'):
|
||||
|
||||
return {'scalers': scalers, 'method': method, 'scope': scope}
|
||||
|
||||
def apply_normalizer(data, columns, normalizer_dict):
|
||||
def apply_normalizer(data, au_columns, normalizer_dict):
|
||||
"""
|
||||
Apply fitted normalization scalers to data.
|
||||
|
||||
@ -107,70 +71,28 @@ def apply_normalizer(data, columns, normalizer_dict):
|
||||
normalized_data = data.copy()
|
||||
scalers = normalizer_dict['scalers']
|
||||
scope = normalizer_dict['scope']
|
||||
normalized_data[columns] = normalized_data[columns].astype(np.float64)
|
||||
|
||||
|
||||
if scope == 'subject':
|
||||
# Apply per-subject normalization
|
||||
for subject in data['subjectID'].unique():
|
||||
subject_mask = data['subjectID'] == subject
|
||||
|
||||
# Use the subject's scaler if available, otherwise use fallback
|
||||
# Use the subject's scaler if available, otherwise use a fitted scaler from training
|
||||
if subject in scalers:
|
||||
scaler = scalers[subject]
|
||||
else:
|
||||
# Use averaged scaler for new subjects
|
||||
scaler = scalers['_fallback']
|
||||
print(f"Info: Subject {subject} not in training data. Using averaged scaler from training subjects.")
|
||||
# For new subjects not seen in training, use the first available scaler
|
||||
# (This is a fallback - ideally all test subjects should be in training for subject-level normalization)
|
||||
print(f"Warning: Subject {subject} not found in training data. Using fallback scaler.")
|
||||
scaler = list(scalers.values())[0]
|
||||
|
||||
normalized_data.loc[subject_mask, columns] = scaler.transform(
|
||||
data.loc[subject_mask, columns].values
|
||||
normalized_data.loc[subject_mask, au_columns] = scaler.transform(
|
||||
data.loc[subject_mask, au_columns]
|
||||
)
|
||||
|
||||
elif scope == 'global':
|
||||
# Apply global normalization
|
||||
scaler = scalers['global']
|
||||
normalized_data[columns] = scaler.transform(data[columns].values)
|
||||
normalized_data[au_columns] = scaler.transform(data[au_columns])
|
||||
|
||||
return normalized_data
|
||||
|
||||
|
||||
|
||||
def save_normalizer(normalizer_dict, filepath):
|
||||
"""
|
||||
Save fitted normalizer to disk.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
normalizer_dict : dict
|
||||
Dictionary containing fitted scalers from fit_normalizer()
|
||||
filepath : str
|
||||
Path to save the normalizer (e.g., 'normalizer.pkl')
|
||||
"""
|
||||
# Create directory if it does not exist
|
||||
dirpath = os.path.dirname(filepath)
|
||||
if dirpath:
|
||||
os.makedirs(dirpath, exist_ok=True)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
pickle.dump(normalizer_dict, f)
|
||||
|
||||
print(f"Normalizer saved to {filepath}")
|
||||
|
||||
def load_normalizer(filepath):
|
||||
"""
|
||||
Load fitted normalizer from disk.
|
||||
|
||||
Parameters:
|
||||
-----------
|
||||
filepath : str
|
||||
Path to the saved normalizer file
|
||||
|
||||
Returns:
|
||||
--------
|
||||
dict
|
||||
Dictionary containing fitted scalers
|
||||
"""
|
||||
with open(filepath, 'rb') as f:
|
||||
normalizer_dict = pickle.load(f)
|
||||
print(f"Normalizer loaded from {filepath}")
|
||||
return normalizer_dict
|
||||
return normalized_data
|
||||
11
predict_pipeline/check_python_version.py
Normal file
11
predict_pipeline/check_python_version.py
Normal file
@ -0,0 +1,11 @@
|
||||
# from tools import db_helpers
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
print(sys.version)
|
||||
# db_helpers.add_columns_to_table()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
117
predict_pipeline/config.yaml
Normal file
117
predict_pipeline/config.yaml
Normal file
@ -0,0 +1,117 @@
|
||||
database:
|
||||
path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\predict_pipeline\\database.sqlite"
|
||||
table: feature_table
|
||||
key: _Id
|
||||
|
||||
model:
|
||||
path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\files_for_testing\\xgb_model_3_groupK.joblib"
|
||||
|
||||
scaler:
|
||||
use_scaling: True
|
||||
path: "C:\\repo\\Fahrsimulator_MSY2526_AI\\predict_pipeline\\normalizer_min_max_global.pkl"
|
||||
|
||||
mqtt:
|
||||
enabled: true
|
||||
host: "localhost"
|
||||
port: 1883
|
||||
topic: "ml/predictions"
|
||||
client_id: "predictor-01"
|
||||
qos: 1
|
||||
retain: false
|
||||
# username: ""
|
||||
# password: ""
|
||||
tls:
|
||||
enabled: false
|
||||
# ca_cert: ""
|
||||
# client_cert: ""
|
||||
# client_key: ""
|
||||
publish_format:
|
||||
result_key: prediction # where to store the predicted value in payload
|
||||
include_metadata: true # e.g., timestamps, rowid, etc.
|
||||
|
||||
sample:
|
||||
columns:
|
||||
- _Id
|
||||
- start_time
|
||||
- FACE_AU01_mean
|
||||
- FACE_AU02_mean
|
||||
- FACE_AU04_mean
|
||||
- FACE_AU05_mean
|
||||
- FACE_AU06_mean
|
||||
- FACE_AU07_mean
|
||||
- FACE_AU09_mean
|
||||
- FACE_AU10_mean
|
||||
- FACE_AU11_mean
|
||||
- FACE_AU12_mean
|
||||
- FACE_AU14_mean
|
||||
- FACE_AU15_mean
|
||||
- FACE_AU17_mean
|
||||
- FACE_AU20_mean
|
||||
- FACE_AU23_mean
|
||||
- FACE_AU24_mean
|
||||
- FACE_AU25_mean
|
||||
- FACE_AU26_mean
|
||||
- FACE_AU28_mean
|
||||
- FACE_AU43_mean
|
||||
- Fix_count_short_66_150
|
||||
- Fix_count_medium_300_500
|
||||
- Fix_count_long_gt_1000
|
||||
- Fix_count_100
|
||||
- Fix_mean_duration
|
||||
- Fix_median_duration
|
||||
- Sac_count
|
||||
- Sac_mean_amp
|
||||
- Sac_mean_dur
|
||||
- Sac_median_dur
|
||||
- Blink_count
|
||||
- Blink_mean_dur
|
||||
- Blink_median_dur
|
||||
- Pupil_mean
|
||||
- Pupil_IPA
|
||||
|
||||
fill_nan_with_median: true
|
||||
discard_if_all_nan: true
|
||||
|
||||
fallback:
|
||||
- start_time: 0
|
||||
- FACE_AU01_mean: 0.5
|
||||
- FACE_AU02_mean: 0.5
|
||||
- FACE_AU04_mean: 0.5
|
||||
- FACE_AU05_mean: 0.5
|
||||
- FACE_AU06_mean: 0.5
|
||||
- FACE_AU07_mean: 0.5
|
||||
- FACE_AU09_mean: 0.5
|
||||
- FACE_AU10_mean: 0.5
|
||||
- FACE_AU11_mean: 0.5
|
||||
- FACE_AU12_mean: 0.5
|
||||
- FACE_AU14_mean: 0.5
|
||||
- FACE_AU15_mean: 0.5
|
||||
- FACE_AU17_mean: 0.5
|
||||
- FACE_AU20_mean: 0.5
|
||||
- FACE_AU23_mean: 0.5
|
||||
- FACE_AU24_mean: 0.5
|
||||
- FACE_AU25_mean: 0.5
|
||||
- FACE_AU26_mean: 0.5
|
||||
- FACE_AU28_mean: 0.5
|
||||
- FACE_AU43_mean: 0.5
|
||||
- Fix_count_short_66_150: 2
|
||||
- Fix_count_medium_300_500: 2
|
||||
- Fix_count_long_gt_1000: 2
|
||||
- Fix_count_100: 2
|
||||
- Fix_mean_duration: 100
|
||||
- Fix_median_duration: 100
|
||||
- Sac_count: 2
|
||||
- Sac_mean_amp: 2
|
||||
- Sac_mean_dur: 100
|
||||
- Sac_median_dur: 100
|
||||
- Blink_count: 2
|
||||
- Blink_mean_dur: 2
|
||||
- Blink_median_dur: 2
|
||||
- Pupil_mean: 2
|
||||
- Pupil_IPA: 2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
9
predict_pipeline/feature_extraction.py
Normal file
9
predict_pipeline/feature_extraction.py
Normal file
@ -0,0 +1,9 @@
|
||||
import sqlite3
|
||||
|
||||
def main():
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
211
predict_pipeline/fill_db.ipynb
Normal file
211
predict_pipeline/fill_db.ipynb
Normal file
@ -0,0 +1,211 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0d70a13f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')\n",
|
||||
"import pandas as pd\n",
|
||||
"from pathlib import Path\n",
|
||||
"import db_helpers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ce696366",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"database_path = Path(r\"/home/edgekit/MSY_FS/databases/rawdata.sqlite\")\n",
|
||||
"parquet_path = Path(r\"/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/files_for_testing/both_mod_0000.parquet\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b1aa9398",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset = pd.read_parquet(parquet_path)\n",
|
||||
"dataset.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b183746e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset.dtypes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "24ed769d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"con, cursor = db_helpers.connect_db(database_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e604ed30",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df_clean = dataset.drop(columns=['subjectID','rowID', 'STUDY', 'LEVEL', 'PHASE'])\n",
|
||||
"df_first_100 = df_clean.head(200)\n",
|
||||
"df_first_100 = df_first_100.reset_index(drop=True)\n",
|
||||
"df_first_100.insert(0, '_Id', df_first_100.index + 1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e77a812e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def pandas_to_sqlite_dtype(dtype):\n",
|
||||
" if pd.api.types.is_integer_dtype(dtype):\n",
|
||||
" return \"INTEGER\"\n",
|
||||
" if pd.api.types.is_float_dtype(dtype):\n",
|
||||
" return \"REAL\"\n",
|
||||
" if pd.api.types.is_bool_dtype(dtype):\n",
|
||||
" return \"INTEGER\"\n",
|
||||
" if pd.api.types.is_datetime64_any_dtype(dtype):\n",
|
||||
" return \"TEXT\"\n",
|
||||
" return \"TEXT\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0e8897b2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"columns = {\n",
|
||||
" col: pandas_to_sqlite_dtype(dtype)\n",
|
||||
" for col, dtype in df_first_100.dtypes.items()\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"constraints = {\n",
|
||||
" \"_Id\": [\"NOT NULL\"]\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"primary_key = {\n",
|
||||
" \"pk_df_first_100\": [\"_Id\"]\n",
|
||||
"}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4ab57624",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sql = db_helpers.create_table(\n",
|
||||
" conn=con,\n",
|
||||
" cursor=cursor,\n",
|
||||
" table_name=\"rawdata\",\n",
|
||||
" columns=columns,\n",
|
||||
" constraints=constraints,\n",
|
||||
" primary_key=primary_key,\n",
|
||||
" commit=True\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "25096a7f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"columns_to_insert = {\n",
|
||||
" col: df_first_100[col].tolist()\n",
|
||||
" for col in df_first_100.columns\n",
|
||||
"}"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7a5a3aa8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db_helpers.insert_rows_into_table(\n",
|
||||
" conn=con,\n",
|
||||
" cursor=cursor,\n",
|
||||
" table_name=\"rawdata\",\n",
|
||||
" columns=columns_to_insert,\n",
|
||||
" commit=True\n",
|
||||
")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b56beae2",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a = db_helpers.get_data_from_table(conn=con, table_name='rawdata',columns_list=['*'])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a4a74a9d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "da0f8737",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"db_helpers.disconnect_db(con, cursor)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "MSY_FS_env",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
253
predict_pipeline/predict_sample.py
Normal file
253
predict_pipeline/predict_sample.py
Normal file
@ -0,0 +1,253 @@
|
||||
# Imports
|
||||
import pandas as pd
|
||||
import json
|
||||
from pathlib import Path
|
||||
import numpy as np
|
||||
import sys
|
||||
import yaml
|
||||
import pickle
|
||||
sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')
|
||||
# sys.path.append(r"c:\\repo\\Fahrsimulator_MSY2526_AI\\tools")
|
||||
import db_helpers
|
||||
import joblib
|
||||
|
||||
def _load_serialized(path: Path):
|
||||
suffix = path.suffix.lower()
|
||||
if suffix == ".pkl":
|
||||
with path.open("rb") as f:
|
||||
return pickle.load(f)
|
||||
if suffix == ".joblib":
|
||||
return joblib.load(path)
|
||||
raise ValueError(f"Unsupported file format: {suffix}. Use .pkl or .joblib.")
|
||||
|
||||
def getLastEntryFromSQLite(path, table_name, key="_Id"):
|
||||
conn, cursor = db_helpers.connect_db(path)
|
||||
try:
|
||||
row_df = db_helpers.get_data_from_table(
|
||||
conn=conn,
|
||||
table_name=table_name,
|
||||
order_by={key: "DESC"},
|
||||
limit=1,
|
||||
)
|
||||
finally:
|
||||
db_helpers.disconnect_db(conn, cursor, commit=False)
|
||||
|
||||
if row_df.empty:
|
||||
return pd.Series(dtype="object")
|
||||
|
||||
return row_df.iloc[0]
|
||||
|
||||
def callModel(sample, model_path):
|
||||
if callable(sample):
|
||||
raise TypeError(
|
||||
f"Invalid sample type: got callable `{getattr(sample, '__name__', type(sample).__name__)}`. "
|
||||
"Expected numpy array / pandas row."
|
||||
)
|
||||
|
||||
model_path = Path(model_path)
|
||||
if not model_path.is_absolute():
|
||||
model_path = Path.cwd() / model_path
|
||||
model_path = model_path.resolve()
|
||||
|
||||
suffix = model_path.suffix.lower()
|
||||
if suffix in {".pkl", ".joblib"}:
|
||||
model = _load_serialized(model_path)
|
||||
# elif suffix == ".keras":
|
||||
# import tensorflow as tf
|
||||
# model = tf.keras.models.load_model(model_path)
|
||||
# else:
|
||||
# raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.")
|
||||
|
||||
x = np.asarray(sample, dtype=np.float32)
|
||||
if x.ndim == 1:
|
||||
x = x.reshape(1, -1)
|
||||
|
||||
if suffix == ".keras":
|
||||
x_full = x
|
||||
# Future model (35 features): keep this call when your new model is active.
|
||||
# prediction = model.predict(x_full[:, :35], verbose=0)
|
||||
prediction = model.predict(x_full[:, :20], verbose=0)
|
||||
|
||||
else:
|
||||
if hasattr(model, "predict"):
|
||||
prediction = model.predict(x[:,:20])
|
||||
elif callable(model):
|
||||
prediction = model(x[:,:20])
|
||||
else:
|
||||
raise TypeError("Loaded model has no .predict(...) and is not callable.")
|
||||
|
||||
prediction = np.asarray(prediction)
|
||||
if prediction.size == 1:
|
||||
return prediction.item()
|
||||
return prediction.squeeze()
|
||||
|
||||
def buildMessage(valid, result: np.int32, config_file_path, sample=None):
|
||||
with Path(config_file_path).open("r", encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
mqtt_cfg = cfg.get("mqtt", {})
|
||||
result_key = mqtt_cfg.get("publish_format", {}).get("result_key", "prediction")
|
||||
|
||||
sample_id = None
|
||||
if isinstance(sample, pd.Series):
|
||||
sample_id = sample.get("_Id", sample.get("_id"))
|
||||
elif isinstance(sample, dict):
|
||||
sample_id = sample.get("_Id", sample.get("_id"))
|
||||
|
||||
message = {
|
||||
"valid": bool(valid),
|
||||
"_id": sample_id,
|
||||
result_key: np.asarray(result).tolist() if isinstance(result, np.ndarray) else result,
|
||||
}
|
||||
return message
|
||||
|
||||
def convert_int64(obj):
|
||||
if isinstance(obj, np.int64):
|
||||
return int(obj)
|
||||
# If the object is a dictionary or list, recursively convert its values
|
||||
elif isinstance(obj, dict):
|
||||
return {key: convert_int64(value) for key, value in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [convert_int64(item) for item in obj]
|
||||
return obj
|
||||
|
||||
def sendMessage(config_file_path, message):
|
||||
# Load the configuration
|
||||
with Path(config_file_path).open("r", encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
# Get MQTT configuration
|
||||
mqtt_cfg = cfg.get("mqtt", {})
|
||||
topic = mqtt_cfg.get("topic", "ml/predictions")
|
||||
|
||||
# Convert message to ensure no np.int64 values remain
|
||||
message = convert_int64(message)
|
||||
|
||||
# Serialize the message to JSON
|
||||
payload = json.dumps(message, ensure_ascii=False)
|
||||
print(payload)
|
||||
|
||||
# Later: publish via MQTT using config parameters above.
|
||||
# Example (kept commented intentionally):
|
||||
# import paho.mqtt.client as mqtt
|
||||
# client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01"))
|
||||
# if "username" in mqtt_cfg and mqtt_cfg.get("username"):
|
||||
# client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password"))
|
||||
# client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60)
|
||||
# client.publish(
|
||||
# topic=topic,
|
||||
# payload=payload,
|
||||
# qos=int(mqtt_cfg.get("qos", 1)),
|
||||
# retain=bool(mqtt_cfg.get("retain", False)),
|
||||
# )
|
||||
# client.disconnect()
|
||||
return
|
||||
|
||||
def replace_nan(sample, config_file_path: Path):
|
||||
with config_file_path.open("r", encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
fallback_list = cfg.get("fallback", [])
|
||||
fallback_map = {}
|
||||
for item in fallback_list:
|
||||
if isinstance(item, dict):
|
||||
fallback_map.update(item)
|
||||
|
||||
if sample.empty:
|
||||
return False, sample
|
||||
|
||||
nan_ratio = sample.isna().mean()
|
||||
valid = nan_ratio <= 0.5
|
||||
|
||||
if valid and fallback_map:
|
||||
sample = sample.fillna(value=fallback_map)
|
||||
|
||||
|
||||
return valid, sample
|
||||
|
||||
def sample_to_numpy(sample, drop_cols=("_Id", "start_time")):
|
||||
if isinstance(sample, pd.Series):
|
||||
sample = sample.drop(labels=list(drop_cols), errors="ignore")
|
||||
return sample.to_numpy()
|
||||
|
||||
if isinstance(sample, pd.DataFrame):
|
||||
sample = sample.drop(columns=list(drop_cols), errors="ignore")
|
||||
return sample.to_numpy()
|
||||
|
||||
return np.asarray(sample)
|
||||
|
||||
def scale_sample(sample, use_scaling=False, scaler_path=None):
|
||||
if not use_scaling or scaler_path is None:
|
||||
return sample
|
||||
scaler_path = Path(scaler_path)
|
||||
if not scaler_path.is_absolute():
|
||||
scaler_path = Path.cwd() / scaler_path
|
||||
scaler_path = scaler_path.resolve()
|
||||
normalizer = _load_serialized(scaler_path)
|
||||
|
||||
# normalizer format from model_training/tools/scaler.py:
|
||||
# {"scalers": {...}, "method": "...", "scope": "..."}
|
||||
scalers = normalizer.get("scalers", {}) if isinstance(normalizer, dict) else {}
|
||||
scope = normalizer.get("scope", "global") if isinstance(normalizer, dict) else "global"
|
||||
if scope == "global":
|
||||
scaler = scalers.get("global")
|
||||
else:
|
||||
scaler = scalers.get("global", next(iter(scalers.values()), None))
|
||||
|
||||
# Optional fallback if the stored object is already a raw scaler.
|
||||
if scaler is None and hasattr(normalizer, "transform"):
|
||||
scaler = normalizer
|
||||
if scaler is None or not hasattr(scaler, "transform"):
|
||||
return sample
|
||||
|
||||
df = sample.to_frame().T if isinstance(sample, pd.Series) else sample.copy()
|
||||
feature_names = getattr(scaler, "feature_names_in_", None)
|
||||
if feature_names is None:
|
||||
return sample
|
||||
|
||||
# Keep columns not in the normalizer unchanged.
|
||||
cols_to_scale = [c for c in df.columns if c in set(feature_names)]
|
||||
if cols_to_scale:
|
||||
df.loc[:, cols_to_scale] = scaler.transform(df.loc[:, cols_to_scale])
|
||||
|
||||
return df.iloc[0] if isinstance(sample, pd.Series) else df
|
||||
|
||||
def main():
|
||||
pd.set_option('future.no_silent_downcasting', True) # kann ggf raus
|
||||
|
||||
config_file_path = Path("/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/config.yaml")
|
||||
with config_file_path.open("r", encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f)
|
||||
|
||||
database_path = cfg["database"]["path"]
|
||||
table_name = cfg["database"]["table"]
|
||||
row_key = cfg["database"]["key"]
|
||||
|
||||
|
||||
sample = getLastEntryFromSQLite(database_path, table_name, row_key)
|
||||
valid, sample = replace_nan(sample, config_file_path=config_file_path)
|
||||
|
||||
if not valid:
|
||||
print("Sample invalid: more than 50% NaN.")
|
||||
message = buildMessage(valid, None, config_file_path, sample=sample)
|
||||
sendMessage(config_file_path, message)
|
||||
return
|
||||
|
||||
model_path = cfg["model"]["path"]
|
||||
scaler_path = cfg["scaler"]["path"]
|
||||
use_scaling = cfg["scaler"]["use_scaling"]
|
||||
|
||||
sample = scale_sample(sample, use_scaling=use_scaling, scaler_path=scaler_path)
|
||||
sample_np = sample_to_numpy(sample)
|
||||
|
||||
prediction = callModel(model_path=model_path, sample=sample_np)
|
||||
|
||||
message = buildMessage(valid, prediction, config_file_path, sample=sample)
|
||||
sendMessage(config_file_path, message)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
|
||||
78
smallerenv.yaml
Normal file
78
smallerenv.yaml
Normal file
@ -0,0 +1,78 @@
|
||||
# ============================================================
|
||||
# SMALLER ENVIRONMENT - Korrigiert & Erweitert
|
||||
# Für Fahrsimulator-Projekt mit ML & IoT
|
||||
# ============================================================
|
||||
|
||||
name: smaller_env
|
||||
channels:
|
||||
- conda-forge
|
||||
- defaults
|
||||
|
||||
dependencies:
|
||||
# ====== PYTHON ======
|
||||
- python=3.8 # Kompatibel mit Jetson Nano
|
||||
|
||||
# ====== CORE DATA SCIENCE ======
|
||||
- numpy=1.19.5
|
||||
- pandas=1.3.5
|
||||
- scipy=1.7.3
|
||||
- scikit-learn=1.0.2 # sklearn ist ein Alias
|
||||
|
||||
# ====== VISUALIZATION ======
|
||||
|
||||
# ====== ML/DL SUPPORT ======
|
||||
- h5py=3.6.0
|
||||
- joblib=1.1.0
|
||||
|
||||
# ====== VIDEO PROCESSING ======
|
||||
- moviepy=1.0.3
|
||||
|
||||
# ====== MACHINE LEARNING ======
|
||||
- xgboost=1.5.2
|
||||
|
||||
# ====== FILE FORMATS ======
|
||||
- pyyaml # yaml Modul
|
||||
|
||||
# ====== IoT & COMMUNICATION (NEU) ======
|
||||
- paho-mqtt=1.6.1 # MQTT Client
|
||||
|
||||
# ====== DATABASE (NEU) ======
|
||||
# sqlite3 ist bereits in Python eingebaut!
|
||||
|
||||
# ====== UTILITIES ======
|
||||
- tqdm=4.64.1 # Progress bars
|
||||
- requests=2.28.1 # HTTP requests
|
||||
|
||||
# ====== PIP PACKAGES ======
|
||||
- pip
|
||||
- pip:
|
||||
# TensorFlow (wird separat für Jetson installiert)
|
||||
# - tensorflow==2.7.0 # Jetson: via NVIDIA repo installieren
|
||||
|
||||
# Eye-tracking Analysis
|
||||
- pygazeanalyser==0.2.0
|
||||
|
||||
|
||||
# ML Detection (falls vorhanden auf PyPI)
|
||||
# - detectors # Prüfen ob verfügbar
|
||||
# - feat # Prüfen ob verfügbar
|
||||
|
||||
# MQTT zusätzlich via pip falls conda Version Probleme macht
|
||||
# - paho-mqtt==1.6.1
|
||||
|
||||
# ============================================================
|
||||
# HINWEISE:
|
||||
# ============================================================
|
||||
|
||||
#
|
||||
# 3. TENSORFLOW FÜR JETSON:
|
||||
# Installiere nach Environment-Erstellung separat:
|
||||
# pip3 install --extra-index-url https://developer.download.nvidia.com/compute/redist/jp/v46 tensorflow==2.7.0+nv22.1
|
||||
#
|
||||
# 4. SQLITE3:
|
||||
# Ist bereits in Python eingebaut, keine Installation nötig!
|
||||
# Import: import sqlite3
|
||||
#
|
||||
# 5. MQTT:
|
||||
# paho-mqtt ist der Standard MQTT-Client für Python
|
||||
# Broker-Empfehlungen: Mosquitto, HiveMQ, EMQX
|
||||
Loading…
x
Reference in New Issue
Block a user