# Intermediate Fusion mit Deep SVDD

* Input: gemeinsames Dataset aus EYE Tracking und Action Units mit selber Abtastfrequenz
* Verarbeitung: Intermediate Fusion
* Modell: Deep SVDD --> Erlernen einer Kugel durch ein neuronales Netz, dass die Normaldaten einschließt

### Imports + GPU 

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import sys
import os
import time
base_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(base_dir)
print(base_dir)

from Fahrsimulator_MSY2526_AI.model_training.tools import evaluation_tools, scaler, mad_outlier_removal, performance_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import OneClassSVM
from sklearn.model_selection import GridSearchCV, KFold, ParameterGrid, train_test_split, GroupKFold
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models, regularizers
import pickle
from sklearn.metrics import (roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, balanced_accuracy_score, ConfusionMatrixDisplay, auc, roc_curve) 

In [None]:
# Check GPU availability
print("TensorFlow version:", tf.__version__)
print("GPU Available:", tf.config.list_physical_devices('GPU'))
print("CUDA Available:", tf.test.is_built_with_cuda())

# Get detailed GPU info
gpus = tf.config.list_physical_devices('GPU')
if gpus:
 print(f"\nNumber of GPUs: {len(gpus)}")
 for gpu in gpus:
 print(f"GPU: {gpu}")
 
 # Enable memory growth to prevent TF from allocating all GPU memory
 try:
 for gpu in gpus:
 tf.config.experimental.set_memory_growth(gpu, True)
 print("\nGPU memory growth enabled")
 except RuntimeError as e:
 print(e)
else:
 print("\nNo GPU found - running on CPU")

### Data Preprocessing

Laden der Daten

In [None]:
dataset_path = Path(r"data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet")

In [None]:
df = pd.read_parquet(path=dataset_path)

In [None]:
performance_path = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/subject_performance/3new_au_performance.csv")
performance_df = pd.read_csv(performance_path)

Performance based split

In [None]:
train_ids, temp_ids, diff1 = performance_split.performance_based_split(
 subject_ids=df["subjectID"].unique(),
 performance_df=performance_df,
 split_ratio=0.6, # 60% train, 40% temp
 random_seed=42
)

val_ids, test_ids, diff2 = performance_split.performance_based_split(
 subject_ids=temp_ids,
 performance_df=performance_df,
 split_ratio=0.5, # 50/50 split of remaining 40%
 random_seed=43
)
print(diff1, diff2)

Labeling

In [None]:
low_all = df[
 ((df["PHASE"] == "baseline") |
 ((df["STUDY"] == "n-back") & (df["PHASE"] != "baseline") & (df["LEVEL"].isin([1, 4]))))
]
print(f"low all: {low_all.shape}")

high_nback = df[
 (df["STUDY"]=="n-back") &
 (df["LEVEL"].isin([2, 3, 5, 6])) &
 (df["PHASE"].isin(["train", "test"]))
]
print(f"high n-back: {high_nback.shape}")

high_kdrive = df[
 (df["STUDY"] == "k-drive") & (df["PHASE"] != "baseline")
]
print(f"high k-drive: {high_kdrive.shape}")

high_all = pd.concat([high_nback, high_kdrive])
print(f"high all: {high_all.shape}")

In [None]:
low = low_all.copy()
high = high_all.copy()

low["label"] = 0
high["label"] = 1

data = pd.concat([low, high], ignore_index=True)
df = data.drop_duplicates()

print("Label distribution:")
print(df["label"].value_counts())

Split

In [None]:
train_df = df[
 (df.subjectID.isin(train_ids)) & (df['label'] == 0)
].copy()

# Validation: balanced sampling of label=0 and label=1
val_df_full = df[df.subjectID.isin(val_ids)].copy()

# Get all label=0 samples
val_df_label0 = val_df_full[val_df_full['label'] == 0]

# Sample same number from label=1
n_samples = len(val_df_label0)
val_df_label1 = val_df_full[val_df_full['label'] == 1].sample(
 n=n_samples, random_state=42
)

# Combine
val_df = pd.concat([val_df_label0, val_df_label1], ignore_index=True)
test_df = df[df.subjectID.isin(test_ids)]
print(train_df.shape, val_df.shape,test_df.shape)

In [None]:
val_df['label'].value_counts()

Normalization

In [None]:
def fit_normalizer(train_data, au_columns, method='standard', scope='global'):
 """
 Fit normalization scalers on training data.
 
 Parameters:
 -----------
 train_data : pd.DataFrame
 Training dataframe with AU columns and subjectID
 au_columns : list
 List of AU column names to normalize
 method : str, default='standard'
 Normalization method: 'standard' for StandardScaler or 'minmax' for MinMaxScaler
 scope : str, default='global'
 Normalization scope: 'subject' for per-subject or 'global' for across all subjects
 
 Returns:
 --------
 dict
 Dictionary containing fitted scalers and statistics for new subjects
 """
 if method == 'standard':
 Scaler = StandardScaler
 elif method == 'minmax':
 Scaler = MinMaxScaler
 else:
 raise ValueError("method must be 'standard' or 'minmax'")
 
 scalers = {}
 if scope == 'subject':
 # Fit one scaler per subject
 subject_stats = []
 
 for subject in train_data['subjectID'].unique():
 subject_mask = train_data['subjectID'] == subject
 scaler = Scaler()
 scaler.fit(train_data.loc[subject_mask, au_columns].values)
 scalers[subject] = scaler
 
 # Store statistics for averaging
 if method == 'standard':
 subject_stats.append({
 'mean': scaler.mean_,
 'std': scaler.scale_
 })
 elif method == 'minmax':
 subject_stats.append({
 'min': scaler.data_min_,
 'max': scaler.data_max_
 })
 
 # Calculate average statistics for new subjects
 if method == 'standard':
 avg_mean = np.mean([s['mean'] for s in subject_stats], axis=0)
 avg_std = np.mean([s['std'] for s in subject_stats], axis=0)
 fallback_scaler = StandardScaler()
 fallback_scaler.mean_ = avg_mean
 fallback_scaler.scale_ = avg_std
 fallback_scaler.var_ = avg_std ** 2
 fallback_scaler.n_features_in_ = len(au_columns)
 elif method == 'minmax':
 avg_min = np.mean([s['min'] for s in subject_stats], axis=0)
 avg_max = np.mean([s['max'] for s in subject_stats], axis=0)
 fallback_scaler = MinMaxScaler()
 fallback_scaler.data_min_ = avg_min
 fallback_scaler.data_max_ = avg_max
 fallback_scaler.data_range_ = avg_max - avg_min
 fallback_scaler.scale_ = 1.0 / fallback_scaler.data_range_
 fallback_scaler.min_ = -avg_min * fallback_scaler.scale_
 fallback_scaler.n_features_in_ = len(au_columns)
 
 scalers['_fallback'] = fallback_scaler
 
 elif scope == 'global':
 # Fit one scaler for all subjects
 scaler = Scaler()
 scaler.fit(train_data[au_columns].values)
 scalers['global'] = scaler
 
 else:
 raise ValueError("scope must be 'subject' or 'global'")
 
 return {'scalers': scalers, 'method': method, 'scope': scope}

def apply_normalizer(data, columns, normalizer_dict):
 """
 Apply fitted normalization scalers to data.
 
 Parameters:
 -----------
 data : pd.DataFrame
 Dataframe with AU columns and subjectID
 au_columns : list
 List of AU column names to normalize
 normalizer_dict : dict
 Dictionary containing fitted scalers from fit_normalizer()
 
 Returns:
 --------
 pd.DataFrame
 DataFrame with normalized AU columns
 """
 normalized_data = data.copy()
 scalers = normalizer_dict['scalers']
 scope = normalizer_dict['scope']
 normalized_data[columns] = normalized_data[columns].astype(np.float64)

 if scope == 'subject':
 # Apply per-subject normalization
 for subject in data['subjectID'].unique():
 subject_mask = data['subjectID'] == subject
 
 # Use the subject's scaler if available, otherwise use fallback
 if subject in scalers:
 scaler = scalers[subject]
 else:
 # Use averaged scaler for new subjects
 scaler = scalers['_fallback']
 print(f"Info: Subject {subject} not in training data. Using averaged scaler from training subjects.")
 
 normalized_data.loc[subject_mask, columns] = scaler.transform(
 data.loc[subject_mask, columns].values
 )
 
 elif scope == 'global':
 # Apply global normalization
 scaler = scalers['global']
 normalized_data[columns] = scaler.transform(data[columns].values)
 
 return normalized_data


In [None]:
def save_normalizer(normalizer_dict, filepath):
 """
 Save fitted normalizer to disk.

 Parameters:
 -----------
 normalizer_dict : dict
 Dictionary containing fitted scalers from fit_normalizer()
 filepath : str
 Path to save the normalizer (e.g., 'normalizer.pkl')
 """
 # Create directory if it does not exist
 dirpath = os.path.dirname(filepath)
 if dirpath:
 os.makedirs(dirpath, exist_ok=True)

 with open(filepath, 'wb') as f:
 pickle.dump(normalizer_dict, f)

 print(f"Normalizer saved to {filepath}")

def load_normalizer(filepath):
 """
 Load fitted normalizer from disk.
 
 Parameters:
 -----------
 filepath : str
 Path to the saved normalizer file
 
 Returns:
 --------
 dict
 Dictionary containing fitted scalers
 """
 with open(filepath, 'rb') as f:
 normalizer_dict = pickle.load(f)
 print(f"Normalizer loaded from {filepath}")
 return normalizer_dict

save Normalizer

In [None]:
normalizer_path=Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/normalizer.pkl')

In [None]:
face_au_cols = [c for c in train_df.columns if c.startswith("FACE_AU")]
eye_cols = ['Fix_count_short_66_150', 'Fix_count_medium_300_500',
 'Fix_count_long_gt_1000', 'Fix_count_100', 'Fix_mean_duration',
 'Fix_median_duration', 'Sac_count', 'Sac_mean_amp', 'Sac_mean_dur',
 'Sac_median_dur', 'Blink_count', 'Blink_mean_dur', 'Blink_median_dur',
 'Pupil_mean', 'Pupil_IPA']
print(len(eye_cols))
all_signal_columns = face_au_cols+eye_cols
print(len(all_signal_columns))
normalizer = fit_normalizer(train_df, all_signal_columns, method='standard', scope='subject')
save_normalizer(normalizer, normalizer_path )

In [None]:
normalizer = load_normalizer(normalizer_path)
# 3. Apply normalization to all sets
train_df_norm = apply_normalizer(train_df, all_signal_columns, normalizer)
val_df_norm = apply_normalizer(val_df, all_signal_columns, normalizer)
test_df_norm = apply_normalizer(test_df, all_signal_columns, normalizer)

Outlier removal (later)

Change of dtypes for keras pandas

In [None]:
X_face = train_df_norm[face_au_cols].to_numpy(dtype=np.float32)
X_eye = train_df_norm[eye_cols].to_numpy(dtype=np.float32)

### Autoencoder Pre-Training

Vor-Training der Gewichte mit Autoencoder, Loss: MSE

In [None]:
def build_intermediate_fusion_autoencoder(
 input_dim_mod1=15,
 input_dim_mod2=20,
 encoder_hidden_dim_mod1=12, # individuell
 encoder_hidden_dim_mod2=20, # individuell
 latent_dim=6, # Änderung: Bottleneck vergrößert für stabilere Repräsentation
 dropout_rate=0.4, # Dropout in Hidden Layers
 neg_slope=0.1,
 weight_decay=1e-4,
 decoder_hidden_dims=[16, 32] # Änderung: Decoder größer für bessere Rekonstruktion
):
 """
 Verbesserter Intermediate-Fusion Autoencoder für Deep SVDD.
 Änderungen:
 - Bottleneck vergrößert (latent_dim)
 - Dropout nur in Hidden Layers, nicht im Bottleneck
 - Decoder größer für stabileres Pretraining
 - Parametrisierbare Hidden-Dimensions für Encoder
 """

 l2 = regularizers.l2(weight_decay)
 act = layers.LeakyReLU(negative_slope=neg_slope)

 # -------- Inputs --------
 x1_in = layers.Input(shape=(input_dim_mod1,), name="modality_1")
 x2_in = layers.Input(shape=(input_dim_mod2,), name="modality_2")

 # -------- Encoder 1 --------
 e1 = layers.Dense(
 encoder_hidden_dim_mod1,
 use_bias=False,
 kernel_regularizer=l2
 )(x1_in)
 e1 = act(e1)
 e1 = layers.Dropout(dropout_rate)(e1) # Dropout nur hier

 e1 = layers.Dense(
 16, # Änderung: Hidden Layer größer für stabilere Fusion
 use_bias=False,
 kernel_regularizer=l2
 )(e1)
 e1 = act(e1)

 # -------- Encoder 2 --------
 e2 = layers.Dense(
 encoder_hidden_dim_mod2,
 use_bias=False,
 kernel_regularizer=l2
 )(x2_in)
 e2 = act(e2)
 e2 = layers.Dropout(dropout_rate)(e2) # Dropout nur hier

 e2 = layers.Dense(
 16, # Änderung: Hidden Layer größer
 use_bias=False,
 kernel_regularizer=l2
 )(e2)
 e2 = act(e2)

 # -------- Intermediate Fusion --------
 fused = layers.Concatenate(name="fusion")([e1, e2]) # 16+16=32 Dimensionen

 # -------- Joint Encoder / Bottleneck --------
 # sinnvoll kleiner als Fusion
 h = layers.Dense(
 latent_dim,
 use_bias=False,
 kernel_regularizer=l2
 )(fused)
 h = act(h)
 h = layers.Dropout(dropout_rate)(h)

 z = layers.Dense(
 latent_dim,
 activation=None, # linear, für Deep SVDD
 use_bias=False,
 kernel_regularizer=l2,
 name="latent"
 )(h)
 # Dropout entfernt direkt vor Bottleneck

 # -------- Decoder --------
 d = layers.Dense(
 decoder_hidden_dims[0], # größerer Decoder
 use_bias=False,
 kernel_regularizer=l2
 )(z)
 d = act(d)

 d = layers.Dense(
 decoder_hidden_dims[1],
 use_bias=False,
 kernel_regularizer=l2
 )(d)
 d = act(d)

 x1_out = layers.Dense(
 input_dim_mod1,
 activation=None,
 use_bias=False,
 name="recon_modality_1"
 )(d)

 x2_out = layers.Dense(
 input_dim_mod2,
 activation=None,
 use_bias=False,
 name="recon_modality_2"
 )(d)

 model = models.Model(
 inputs=[x1_in, x2_in],
 outputs=[x1_out, x2_out],
 name="IntermediateFusionAE_Improved"
 )

 return model


In [None]:
model = build_intermediate_fusion_autoencoder(
 input_dim_mod1=len(face_au_cols),
 input_dim_mod2=len(eye_cols),
 encoder_hidden_dim_mod1=15, # individuell
 encoder_hidden_dim_mod2=10, # individuell
 latent_dim=8,
 dropout_rate=0.3, # einstellbar
 neg_slope=0.1,
 weight_decay=1e-3
)

model.compile(
 loss={
 "recon_modality_1": "mse",
 "recon_modality_2": "mse",
 },
 loss_weights={
 "recon_modality_1": 1.0,
 "recon_modality_2": 1.0,
 },
 optimizer=tf.keras.optimizers.Adam(1e-2)
 
)

batch_size_ae=64
# model.summary()

In [None]:
model.fit(
 x=[X_face, X_eye],
 y=[X_face, X_eye],
 batch_size=batch_size_ae,
 epochs=150,
 shuffle=True
)
model.compile(
 loss={
 "recon_modality_1": "mse",
 "recon_modality_2": "mse",
 },
 loss_weights={
 "recon_modality_1": 1.0,
 "recon_modality_2": 1.0,
 },
 optimizer=tf.keras.optimizers.Adam(1e-5),
)
model.fit(
 x=[X_face, X_eye],
 y=[X_face, X_eye],
 batch_size=batch_size_ae,
 epochs=100,
 shuffle=True
)


In [None]:
encoder = tf.keras.Model(
 inputs=model.inputs,
 outputs=model.get_layer("latent").output,
 name="SVDD_Encoder"
)

Speichern

In [None]:
encoder_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/encoder_6_deep.keras')
encoder.save(encoder_save_path)

Laden Encoder / Deepsvdd

In [None]:
encoder_load_path = encoder_save_path
encoder = tf.keras.models.load_model(encoder_load_path)

Check, if encoder works

In [None]:
ans= encoder.predict([X_face, X_eye])
print(ans[:6,:])

### Deep SVDD Training

In [None]:
encoder_load_path = encoder_save_path
deep_svdd_net = tf.keras.models.load_model(encoder_load_path) 

In [None]:
def get_center(model, dataset):
 center = model.predict(dataset).mean(axis=0)

 eps = 0.1
 center[(abs(center) < eps) & (center < 0)] = -eps
 center[(abs(center) < eps) & (center >= 0)] = eps

 return center
def dist_per_sample(output, center):
 return tf.reduce_sum(tf.square(output - center), axis=-1)

def score_per_sample(output, center, radius):
 return dist_per_sample(output, center) - radius**2

def train_loss(output, center):
 return tf.reduce_mean(dist_per_sample(output, center))

In [None]:
center = get_center(deep_svdd_net, [X_face, X_eye])

In [None]:
# def get_radius(nu, dataset):
# x_face, x_eye = dataset # <-- zwingend entpacken

# dataset_tuple=[x_face, x_eye]

# dists = dist_per_sample(deep_svdd_net.predict(dataset_tuple), center)
# return np.quantile(np.sqrt(dists), 1-nu).astype(np.float32)

In [None]:
def get_radius_from_arrays(nu, X_face, X_eye):
 z = deep_svdd_net.predict([X_face, X_eye])
 dists = dist_per_sample(z, center)
 return np.quantile(np.sqrt(dists), 1 - nu).astype(np.float32)

In [None]:
@tf.function
def train_step(batch):
 with tf.GradientTape() as grad_tape:
 output = deep_svdd_net(batch, training=True)
 batch_loss = train_loss(output, center)

 gradients = grad_tape.gradient(batch_loss, deep_svdd_net.trainable_variables)
 optimizer.apply_gradients(zip(gradients, deep_svdd_net.trainable_variables))

 return batch_loss

In [None]:
def train(dataset, epochs, nu):
 for epoch in range(epochs):
 start = time.time()
 losses = []
 for batch in dataset:
 batch_loss = train_step(batch)
 losses.append(batch_loss)

 print(f'{epoch+1}/{epochs} epoch: Loss of {np.mean(losses)} ({time.time()-start} secs)')

 return get_radius_from_arrays(nu, X_face, X_eye)


nu = 0.05

train_dataset = tf.data.Dataset.from_tensor_slices((X_face, X_eye)).shuffle(64).batch(64)
# train_dataset = tf.data.Dataset.from_tensor_slices((X_face, X_eye))

optimizer = tf.keras.optimizers.Adam(1e-3)
train(train_dataset, epochs=150, nu=nu)

optimizer.learning_rate = 1e-4
radius = train(train_dataset, 100, nu=nu)

prepare valid & test set

In [None]:
# Test set
X_face_test = test_df_norm[face_au_cols].to_numpy(dtype=np.float32)
X_eye_test = test_df_norm[eye_cols].to_numpy(dtype=np.float32)
y_test = test_df_norm["label"].to_numpy(dtype=np.float32)

# Validation set
X_face_val = val_df_norm[face_au_cols].to_numpy(dtype=np.float32)
X_eye_val = val_df_norm[eye_cols].to_numpy(dtype=np.float32)
y_val = val_df_norm["label"].to_numpy(dtype=np.float32)

In [None]:
valid_scores = (score_per_sample(deep_svdd_net.predict([X_face_val, X_eye_val]), center, radius)).numpy()

valid_fpr, valid_tpr, _ = roc_curve(y_val, valid_scores, pos_label=1)
valid_auc = auc(valid_fpr, valid_tpr)

plt.figure()
plt.title('Deep SVDD')
plt.plot(valid_fpr, valid_tpr, 'b-')
plt.text(0.5, 0.5, f'AUC: {valid_auc:.4f}')
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.show()

valid_predictions = (valid_scores > 0).astype(int)

normal_acc = np.mean(valid_predictions[y_val == 0] == 0)
anomaly_acc = np.mean(valid_predictions[y_val == 1] == 1)
print(f'Accuracy on Validation set: {accuracy_score(y_val, valid_predictions)}')
print(f'Accuracy for normals: {normal_acc:.4f}')
print(f'Accuracy for anomalies: {anomaly_acc:.4f}')
print(f'F1 on Validation set: {f1_score(y_val, valid_predictions)}')

In [None]:
deep_svdd_save_path =Path('data-paulusjafahrsimulator-gpu/saved_models/deepsvdd_save/deep_svdd_05.keras')
deep_svdd_net.save(deep_svdd_save_path)

### Results

Validation set

In [None]:
valid_predictions = (valid_scores > 0).astype(int)
evaluation_tools.plot_confusion_matrix(true_labels=y_val, predictions=valid_predictions, label_names=["low","high"])


Test set

In [None]:
test_scores = (
 score_per_sample(
 deep_svdd_net.predict([X_face_test, X_eye_test]),
 center,
 radius
 )
).numpy()

test_predictions = (test_scores > 0).astype(int)


In [None]:
evaluation_tools.plot_confusion_matrix(true_labels=y_test, predictions=test_predictions, label_names=["low","high"])
