- added notes for XGBoost without accuracy details
- deleted unused files for data_creation and modified the project_report file overview - translated the documentation for the pyfeat implementation
This commit is contained in:
parent
c439e35e39
commit
4df1187f84
@ -1,58 +0,0 @@
|
|||||||
from feat import Detector
|
|
||||||
from feat.utils.io import get_test_data_path
|
|
||||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
|
||||||
import os
|
|
||||||
|
|
||||||
def extract_aus(path, model, skip_frames):
|
|
||||||
detector = Detector(au_model=model)
|
|
||||||
|
|
||||||
video_prediction = detector.detect(
|
|
||||||
path, data_type="video", skip_frames=skip_frames, face_detection_threshold=0.95 # alle 5 Sekunden einbeziehen - 24 Frames pro Sekunde
|
|
||||||
)
|
|
||||||
|
|
||||||
return video_prediction.aus.sum()
|
|
||||||
|
|
||||||
def split_video(path, chunk_length=120):
|
|
||||||
video = VideoFileClip(path)
|
|
||||||
duration = int(video.duration)
|
|
||||||
|
|
||||||
subclips_dir = os.path.join(os.dirname(path), "subclips")
|
|
||||||
os.makedirs(subclips_dir, exist_ok=True)
|
|
||||||
paths = []
|
|
||||||
|
|
||||||
for start in range(0, duration, chunk_length):
|
|
||||||
end = min(start + chunk_length, duration)
|
|
||||||
|
|
||||||
subclip = (
|
|
||||||
video
|
|
||||||
.subclip(start, end)
|
|
||||||
.without_audio()
|
|
||||||
.set_fps(video.fps)
|
|
||||||
)
|
|
||||||
|
|
||||||
output_path = f"{subclips_dir}_part_{start//chunk_length + 1}.mp4"
|
|
||||||
subclip.write_videofile(
|
|
||||||
output_path,
|
|
||||||
)
|
|
||||||
paths.append(output_path)
|
|
||||||
|
|
||||||
return output_path
|
|
||||||
|
|
||||||
# def start(path):
|
|
||||||
# results = []
|
|
||||||
# clips = split_video(path)
|
|
||||||
|
|
||||||
# for clip in clips:
|
|
||||||
# results.append(extract_aus(clip, 'svm', 25*5))
|
|
||||||
# return results
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
results = []
|
|
||||||
clips = []
|
|
||||||
test_video_path = "AU_creation/YTDown.com_YouTube_Was-ist-los-bei-7-vs-Wild_Media_Gtj9zu_WikU_001_1080p.mp4"
|
|
||||||
clips = split_video(test_video_path)
|
|
||||||
|
|
||||||
for clippath in clips:
|
|
||||||
results.append(extract_aus(clippath, 'svm', 25*5))
|
|
||||||
|
|
||||||
print(results)
|
|
||||||
@ -5,27 +5,47 @@
|
|||||||
"id": "3b0c6c82",
|
"id": "3b0c6c82",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Hier entsteht die Dokumentation, wie die Action Units erzeugt wurden.\n",
|
"## Action Unit Documentation and Setup\n",
|
||||||
"Daraus wird dann letztendlich ein Skript erstellt, welches automatisch AUs aus Videodateien erstellen soll.\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"Py-Feat besitzt Dependencies, die ab Python 3.12 nicht mehr verfügbar sind.\n",
|
"This documentation outlines the process for generating **Action Units (AUs)** and the eventual creation of a script to automate AU extraction from video files.\n",
|
||||||
"Dazu muss ein Kernel mit Python 3.11 erstellt werden.\n",
|
|
||||||
"Folgendes Vorgehen:\n",
|
|
||||||
"1. Seite des Jupyter Labs öffnen\n",
|
|
||||||
"2. Terminal öffnen und folgende Befehle eingeben:\n",
|
|
||||||
" conda create -n py311 python=3.11\n",
|
|
||||||
" source ~/.bashrc\n",
|
|
||||||
" conda activate py311\n",
|
|
||||||
" conda install jupyter\n",
|
|
||||||
" python -m ipykernel install --user --name=py311 --display-name \"Python 3.11\"\n",
|
|
||||||
" pip install py-feat\n",
|
|
||||||
" pip install \"moviepy<2.0\" (falls benötigt)\n",
|
|
||||||
"3. den Kernel neustarten\n",
|
|
||||||
"4. in VSC den Kernel neu hinzufügen und dann den Kernel mit dem Namen \"Python 3.11\" auswählen.\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"Der Code unten zeigt eine beispielhafte Integration der py-feat Bibliothek.\n",
|
"### Python Environment Configuration\n",
|
||||||
"Die Klassifizierung zu 0,1 kommt durch die Wahl des AU-Modells zustande. Dabei wird SVM gewählt. (ADABase Paper)\n",
|
"\n",
|
||||||
"Gibt die Klassifizierung einen Gleitkommawert zwischen 0 & 1 aus, dann kommt XGB zum Einsatz. (REVELIO Paper)"
|
"**Py-Feat** relies on dependencies that are incompatible with Python 3.12 and later. To ensure functionality, you must set up a dedicated **Python 3.11** kernel.\n",
|
||||||
|
"\n",
|
||||||
|
"#### Setup Instructions:\n",
|
||||||
|
"\n",
|
||||||
|
"1. Open your **Jupyter Lab** interface.\n",
|
||||||
|
"2. Open a **Terminal** and execute the following commands:\n",
|
||||||
|
"```bash\n",
|
||||||
|
"conda create -n py311 python=3.11\n",
|
||||||
|
"source ~/.bashrc\n",
|
||||||
|
"conda activate py311\n",
|
||||||
|
"conda install jupyter\n",
|
||||||
|
"python -m ipykernel install --user --name=py311 --display-name \"Python 3.11\"\n",
|
||||||
|
"pip install py-feat\n",
|
||||||
|
"pip install \"moviepy<2.0\" # Only if required\n",
|
||||||
|
"\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"3. **Restart** the kernel.\n",
|
||||||
|
"4. In **VS Code**, refresh your kernel list and select the one labeled **\"Python 3.11\"**.\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"### Implementation Details\n",
|
||||||
|
"\n",
|
||||||
|
"The following code demonstrates a sample integration of the `py-feat` library. The classification output format is determined by the specific AU model selected:\n",
|
||||||
|
"\n",
|
||||||
|
"| Model | Output Type | Reference Paper |\n",
|
||||||
|
"| --- | --- | --- |\n",
|
||||||
|
"| **SVM** | Binary (0 or 1) | *ADABase* |\n",
|
||||||
|
"| **XGB** | Floating Point (0.0 - 1.0) | *REVELIO* |\n",
|
||||||
|
"\n",
|
||||||
|
"---\n",
|
||||||
|
"\n",
|
||||||
|
"Would you like me to provide the Python code block to implement the **SVM** or **XGB** detector using these libraries?"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -1,296 +0,0 @@
|
|||||||
import cv2
|
|
||||||
import time
|
|
||||||
import os
|
|
||||||
import threading
|
|
||||||
from datetime import datetime
|
|
||||||
from feat import Detector
|
|
||||||
import torch
|
|
||||||
import mediapipe as mp
|
|
||||||
import csv
|
|
||||||
|
|
||||||
# Konfiguration
|
|
||||||
CAMERA_INDEX = 0
|
|
||||||
OUTPUT_DIR = "recordings"
|
|
||||||
VIDEO_DURATION = 10 # Sekunden
|
|
||||||
START_INTERVAL = 5 # Sekunden bis zum nächsten Start
|
|
||||||
FPS = 25.0 # Feste FPS
|
|
||||||
|
|
||||||
if not os.path.exists(OUTPUT_DIR):
|
|
||||||
os.makedirs(OUTPUT_DIR)
|
|
||||||
|
|
||||||
# Globaler Detector, um ihn nicht bei jedem Video neu laden zu müssen (spart massiv Zeit/Speicher)
|
|
||||||
print("Initialisiere AU-Detector (bitte warten)...")
|
|
||||||
detector = Detector(au_model="xgb")
|
|
||||||
|
|
||||||
# ===== MediaPipe FaceMesh Setup =====
|
|
||||||
mp_face_mesh = mp.solutions.face_mesh
|
|
||||||
face_mesh = mp_face_mesh.FaceMesh(
|
|
||||||
static_image_mode=False,
|
|
||||||
max_num_faces=1,
|
|
||||||
refine_landmarks=True, # wichtig für Iris
|
|
||||||
min_detection_confidence=0.5,
|
|
||||||
min_tracking_confidence=0.5
|
|
||||||
)
|
|
||||||
|
|
||||||
LEFT_IRIS = [474, 475, 476, 477]
|
|
||||||
RIGHT_IRIS = [469, 470, 471, 472]
|
|
||||||
|
|
||||||
LEFT_EYE_LIDS = (159, 145)
|
|
||||||
RIGHT_EYE_LIDS = (386, 374)
|
|
||||||
|
|
||||||
LEFT_EYE_GAZE_IDXS = (33, 133, 159, 145)
|
|
||||||
RIGHT_EYE_GAZE_IDXS = (263, 362, 386, 374)
|
|
||||||
|
|
||||||
EYE_OPEN_THRESHOLD = 6
|
|
||||||
|
|
||||||
# CSV vorbereiten
|
|
||||||
gaze_csv = open("gaze_data.csv", mode="w", newline="")
|
|
||||||
gaze_writer = csv.writer(gaze_csv)
|
|
||||||
gaze_writer.writerow([
|
|
||||||
"timestamp",
|
|
||||||
"left_gaze_x",
|
|
||||||
"left_gaze_y",
|
|
||||||
"right_gaze_x",
|
|
||||||
"right_gaze_y",
|
|
||||||
"left_valid",
|
|
||||||
"right_valid",
|
|
||||||
"left_diameter",
|
|
||||||
"right_diameter"
|
|
||||||
])
|
|
||||||
|
|
||||||
def eye_openness(landmarks, top_idx, bottom_idx, img_height):
|
|
||||||
top = landmarks[top_idx]
|
|
||||||
bottom = landmarks[bottom_idx]
|
|
||||||
return abs(top.y - bottom.y) * img_height
|
|
||||||
|
|
||||||
|
|
||||||
def compute_gaze(landmarks, iris_center, indices, w, h):
|
|
||||||
idx1, idx2, top_idx, bottom_idx = indices
|
|
||||||
|
|
||||||
p1 = landmarks[idx1]
|
|
||||||
p2 = landmarks[idx2]
|
|
||||||
top = landmarks[top_idx]
|
|
||||||
bottom = landmarks[bottom_idx]
|
|
||||||
|
|
||||||
x1 = p1.x * w
|
|
||||||
x2 = p2.x * w
|
|
||||||
y_top = top.y * h
|
|
||||||
y_bottom = bottom.y * h
|
|
||||||
|
|
||||||
iris_x, iris_y = iris_center
|
|
||||||
|
|
||||||
eye_left = min(x1, x2)
|
|
||||||
eye_right = max(x1, x2)
|
|
||||||
|
|
||||||
eye_width = eye_right - eye_left
|
|
||||||
eye_height = abs(y_bottom - y_top)
|
|
||||||
|
|
||||||
if eye_width == 0 or eye_height == 0:
|
|
||||||
return 0.5, 0.5
|
|
||||||
|
|
||||||
gaze_x = (iris_x - eye_left) / eye_width
|
|
||||||
gaze_y = (iris_y - min(y_top, y_bottom)) / eye_height
|
|
||||||
|
|
||||||
gaze_x = max(0, min(1, gaze_x))
|
|
||||||
gaze_y = max(0, min(1, gaze_y))
|
|
||||||
|
|
||||||
return gaze_x, gaze_y
|
|
||||||
|
|
||||||
def extract_aus(path, skip_frames):
|
|
||||||
|
|
||||||
# torch.no_grad() deaktiviert die Gradientenberechnung.
|
|
||||||
# Das löst den "Can't call numpy() on Tensor that requires grad" Fehler.
|
|
||||||
with torch.no_grad():
|
|
||||||
video_prediction = detector.detect_video(
|
|
||||||
path,
|
|
||||||
skip_frames=skip_frames,
|
|
||||||
face_detection_threshold=0.95
|
|
||||||
)
|
|
||||||
|
|
||||||
# Falls video_prediction oder .aus noch Tensoren sind,
|
|
||||||
# stellen wir sicher, dass sie korrekt summiert werden.
|
|
||||||
try:
|
|
||||||
# Wir nehmen die Summe der Action Units über alle detektierten Frames
|
|
||||||
res = video_prediction.aus.sum()
|
|
||||||
return res
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Fehler bei der Summenbildung: {e}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def startAU_creation(video_path):
|
|
||||||
"""Diese Funktion läuft nun in einem eigenen Thread."""
|
|
||||||
try:
|
|
||||||
print(f"\n[THREAD START] Analyse läuft für: {video_path}")
|
|
||||||
# skip_frames berechnen (z.B. alle 5 Sekunden bei 25 FPS = 125)
|
|
||||||
output = extract_aus(video_path, skip_frames=int(FPS*5))
|
|
||||||
|
|
||||||
print(f"\n--- Ergebnis für {os.path.basename(video_path)} ---")
|
|
||||||
print(output)
|
|
||||||
print("--------------------------------------------------\n")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Fehler bei der Analyse von {video_path}: {e}")
|
|
||||||
|
|
||||||
class VideoRecorder:
|
|
||||||
def __init__(self, filename, width, height):
|
|
||||||
self.filename = filename
|
|
||||||
fourcc = cv2.VideoWriter_fourcc(*'XVID')
|
|
||||||
self.out = cv2.VideoWriter(filename, fourcc, FPS, (width, height))
|
|
||||||
self.frames_to_record = int(VIDEO_DURATION * FPS)
|
|
||||||
self.frames_count = 0
|
|
||||||
self.is_finished = False
|
|
||||||
|
|
||||||
def write_frame(self, frame):
|
|
||||||
if self.frames_count < self.frames_to_record:
|
|
||||||
self.out.write(frame)
|
|
||||||
self.frames_count += 1
|
|
||||||
else:
|
|
||||||
self.finish()
|
|
||||||
|
|
||||||
def finish(self):
|
|
||||||
if not self.is_finished:
|
|
||||||
self.out.release()
|
|
||||||
self.is_finished = True
|
|
||||||
abs_path = os.path.abspath(self.filename)
|
|
||||||
print(f"Video fertig gespeichert: {self.filename}")
|
|
||||||
|
|
||||||
# --- MULTITHREADING HIER ---
|
|
||||||
# Wir starten die Analyse in einem neuen Thread, damit main() sofort weiter frames lesen kann
|
|
||||||
analysis_thread = threading.Thread(target=startAU_creation, args=(abs_path,))
|
|
||||||
analysis_thread.daemon = True # Beendet sich, wenn das Hauptprogramm schließt
|
|
||||||
analysis_thread.start()
|
|
||||||
|
|
||||||
def main():
|
|
||||||
cap = cv2.VideoCapture(CAMERA_INDEX)
|
|
||||||
if not cap.isOpened():
|
|
||||||
print("Fehler: Kamera konnte nicht geöffnet werden.")
|
|
||||||
return
|
|
||||||
|
|
||||||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
||||||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
||||||
|
|
||||||
active_recorders = []
|
|
||||||
last_start_time = 0
|
|
||||||
|
|
||||||
print("Aufnahme läuft. Drücke 'q' zum Beenden.")
|
|
||||||
|
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
ret, frame = cap.read()
|
|
||||||
if not ret:
|
|
||||||
break
|
|
||||||
|
|
||||||
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
||||||
h, w, _ = frame.shape
|
|
||||||
results = face_mesh.process(rgb)
|
|
||||||
|
|
||||||
left_valid = 0
|
|
||||||
right_valid = 0
|
|
||||||
left_diameter = None
|
|
||||||
right_diameter = None
|
|
||||||
|
|
||||||
left_gaze_x = None
|
|
||||||
left_gaze_y = None
|
|
||||||
right_gaze_x = None
|
|
||||||
right_gaze_y = None
|
|
||||||
|
|
||||||
if results.multi_face_landmarks:
|
|
||||||
face_landmarks = results.multi_face_landmarks[0]
|
|
||||||
|
|
||||||
left_open = eye_openness(
|
|
||||||
face_landmarks.landmark,
|
|
||||||
LEFT_EYE_LIDS[0],
|
|
||||||
LEFT_EYE_LIDS[1],
|
|
||||||
h
|
|
||||||
)
|
|
||||||
|
|
||||||
right_open = eye_openness(
|
|
||||||
face_landmarks.landmark,
|
|
||||||
RIGHT_EYE_LIDS[0],
|
|
||||||
RIGHT_EYE_LIDS[1],
|
|
||||||
h
|
|
||||||
)
|
|
||||||
|
|
||||||
left_valid = 1 if left_open > EYE_OPEN_THRESHOLD else 0
|
|
||||||
right_valid = 1 if right_open > EYE_OPEN_THRESHOLD else 0
|
|
||||||
|
|
||||||
for eye_name, eye_indices in [("left", LEFT_IRIS), ("right", RIGHT_IRIS)]:
|
|
||||||
iris_points = []
|
|
||||||
|
|
||||||
for idx in eye_indices:
|
|
||||||
lm = face_landmarks.landmark[idx]
|
|
||||||
x_i, y_i = int(lm.x * w), int(lm.y * h)
|
|
||||||
iris_points.append((x_i, y_i))
|
|
||||||
|
|
||||||
if len(iris_points) == 4:
|
|
||||||
cx = int(sum(p[0] for p in iris_points) / 4)
|
|
||||||
cy = int(sum(p[1] for p in iris_points) / 4)
|
|
||||||
|
|
||||||
radius = max(
|
|
||||||
((x - cx) ** 2 + (y - cy) ** 2) ** 0.5
|
|
||||||
for (x, y) in iris_points
|
|
||||||
)
|
|
||||||
|
|
||||||
diameter = 2 * radius
|
|
||||||
|
|
||||||
cv2.circle(frame, (cx, cy), int(radius), (0, 255, 0), 2)
|
|
||||||
|
|
||||||
if eye_name == "left" and left_valid:
|
|
||||||
left_diameter = diameter
|
|
||||||
left_gaze_x, left_gaze_y = compute_gaze(
|
|
||||||
face_landmarks.landmark,
|
|
||||||
(cx, cy),
|
|
||||||
LEFT_EYE_GAZE_IDXS,
|
|
||||||
w, h
|
|
||||||
)
|
|
||||||
|
|
||||||
elif eye_name == "right" and right_valid:
|
|
||||||
right_diameter = diameter
|
|
||||||
right_gaze_x, right_gaze_y = compute_gaze(
|
|
||||||
face_landmarks.landmark,
|
|
||||||
(cx, cy),
|
|
||||||
RIGHT_EYE_GAZE_IDXS,
|
|
||||||
w, h
|
|
||||||
)
|
|
||||||
|
|
||||||
# CSV schreiben
|
|
||||||
gaze_writer.writerow([
|
|
||||||
time.time(),
|
|
||||||
left_gaze_x,
|
|
||||||
left_gaze_y,
|
|
||||||
right_gaze_x,
|
|
||||||
right_gaze_y,
|
|
||||||
left_valid,
|
|
||||||
right_valid,
|
|
||||||
left_diameter,
|
|
||||||
right_diameter
|
|
||||||
])
|
|
||||||
|
|
||||||
current_time = time.time()
|
|
||||||
|
|
||||||
if current_time - last_start_time >= START_INTERVAL:
|
|
||||||
timestamp = datetime.now().strftime("%H%M%S")
|
|
||||||
filename = os.path.join(OUTPUT_DIR, f"rec_{timestamp}.avi")
|
|
||||||
new_recorder = VideoRecorder(filename, width, height)
|
|
||||||
active_recorders.append(new_recorder)
|
|
||||||
last_start_time = current_time
|
|
||||||
|
|
||||||
for rec in active_recorders[:]:
|
|
||||||
rec.write_frame(frame)
|
|
||||||
if rec.is_finished:
|
|
||||||
active_recorders.remove(rec)
|
|
||||||
|
|
||||||
cv2.imshow('Kamera Livestream', frame)
|
|
||||||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
|
||||||
break
|
|
||||||
|
|
||||||
time.sleep(1/FPS)
|
|
||||||
|
|
||||||
finally:
|
|
||||||
gaze_csv.close()
|
|
||||||
face_mesh.close()
|
|
||||||
cap.release()
|
|
||||||
cv2.destroyAllWindows()
|
|
||||||
print("Programm beendet. Warte ggf. auf laufende Analysen...")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -61,6 +61,7 @@ Runtime behavior:
|
|||||||
- Extracts gaze/iris-based signals via MediaPipe
|
- Extracts gaze/iris-based signals via MediaPipe
|
||||||
- Records overlapping windows (`VIDEO_DURATION=50s`, `START_INTERVAL=5s`, `FPS=25`)
|
- Records overlapping windows (`VIDEO_DURATION=50s`, `START_INTERVAL=5s`, `FPS=25`)
|
||||||
- Runs AU extraction (`py-feat`) from recorded video segments
|
- Runs AU extraction (`py-feat`) from recorded video segments
|
||||||
|
- Explanation of the py-feat functionality is located in `dataset_creation/AU_creation/pyfeat_docu.ipynb`
|
||||||
- Computes eye-feature summary from generated gaze parquet
|
- Computes eye-feature summary from generated gaze parquet
|
||||||
- Writes merged rows to SQLite table `feature_table`
|
- Writes merged rows to SQLite table `feature_table`
|
||||||
|
|
||||||
@ -103,6 +104,74 @@ Supporting utilities in ```model_training/tools```:
|
|||||||
|
|
||||||
### 4.1 CNNs
|
### 4.1 CNNs
|
||||||
### 4.2 XGBoost
|
### 4.2 XGBoost
|
||||||
|
This documentation outlines the evolution of the XGBoost classification pipeline for cognitive workload detection. The project transitioned from a basic unimodal setup to a sophisticated, multi-stage hybrid system incorporating advanced statistical filtering and deep feature extraction.
|
||||||
|
During the model creation several methods were used to improve the model accuracy. During training, the biggest challenge was always the high overfitting of the model. Even in the last version with explicit regulation parameters the overall accuracy couldn't be improved more than the different methods before.
|
||||||
|
The model overall was not that good, as the highest accuracy we could achieve was around 65%, which is a bit higher than Fraunhofer achieved in the ADABase-Paper.
|
||||||
|
|
||||||
|
### 4.2.1 Classical XGBoost Baseline
|
||||||
|
|
||||||
|
To establish a performance baseline, a classical Extreme Gradient Boosting (XGBoost) model was implemented. XGBoost was selected for its ability to handle non-linear relationships and its inherent regularization, which helps prevent overfitting in high-dimensional feature spaces like Facial Action Units. XGBoost was picked because of its usage in the ADABase Paper. Initially, the model utilized raw Action Unit sums with global normalization to determine the basic predictability of workload from facial muscle activity alone.
|
||||||
|
|
||||||
|
| Metric / Model | Classical XGBoost |
|
||||||
|
| --- | --- |
|
||||||
|
| Accuracy | |
|
||||||
|
| AUC | |
|
||||||
|
| F1-Score | |
|
||||||
|
|
||||||
|
### 4.2.2 XGBoost with GroupKFold Validation
|
||||||
|
|
||||||
|
To address the challenge of inter-subject variability, the validation strategy was upgraded to `GroupKFold`. In behavioral data, samples from the same subject are highly correlated. Standard cross-validation often leads to data leakage, where the model memorizes individual facial characteristics. By ensuring that a subject's data is never shared between the training and validation sets, this iteration provides a scientifically rigorous measure of how the model generalizes to entirely unseen individuals.
|
||||||
|
|
||||||
|
| Metric / Model | XGBoost (GroupKFold) |
|
||||||
|
| --- | --- |
|
||||||
|
| Accuracy | |
|
||||||
|
| AUC | |
|
||||||
|
| F1-Score | |
|
||||||
|
|
||||||
|
### 4.2.3 Hybrid XGBoost with Autoencoder
|
||||||
|
|
||||||
|
To improve feature quality, a hybrid approach was introduced by pre-training a deep Autoencoder. The encoder branch was used to compress 20 raw Action Units into a 5-dimensional latent space. This non-linear dimensionality reduction aims to capture muscle synergies and filter out noise that decision trees might struggle with. The XGBoost classifier was then trained on these machine-learned representations rather than raw inputs.
|
||||||
|
|
||||||
|
| Metric / Model | XGBoost + Autoencoder |
|
||||||
|
| --- | --- |
|
||||||
|
| Accuracy | |
|
||||||
|
| AUC | |
|
||||||
|
| F1-Score | |
|
||||||
|
|
||||||
|
### 4.2.4 Robust XGBoost with MAD Outlier Removal
|
||||||
|
|
||||||
|
Recognizing that physiological and AU data often contain sensor artifacts, a robust preprocessing layer was added using Median Absolute Deviation (MAD). Unlike standard deviation, MAD is resilient to extreme outliers. By calculating a Robust Z-score and filtering signals in the training set, the model learned from a "clean" representation of cognitive states, significantly improving the stability of the gradient boosting process.
|
||||||
|
|
||||||
|
| Metric / Model | XGBoost + MAD |
|
||||||
|
| --- | --- |
|
||||||
|
| Accuracy | |
|
||||||
|
| AUC | |
|
||||||
|
| F1-Score | |
|
||||||
|
|
||||||
|
### 4.2.5 Combined Dataset of Action Units and EyeTracking
|
||||||
|
|
||||||
|
This iteration involved training, which refined a robust pipeline on a new, expanded dataset. This dataset integrated both high-frequency facial action units and advanced eye-tracking metrics (pupillometry and fixations).
|
||||||
|
Since the recreation of the EyeTracking data in the lab was in doubt, only Action Units were used in the first XGBoost models. Now the model also implemented the EyeTracking data as features.
|
||||||
|
By applying performance-based subject splitting, we ensured that the training and test sets were balanced not only by label but by the subjects' underlying skill levels, resulting in the most deployable version of the AI.
|
||||||
|
|
||||||
|
| Metric / Model | Final Combined Model |
|
||||||
|
| --- | --- |
|
||||||
|
| Accuracy | |
|
||||||
|
| AUC | |
|
||||||
|
| F1-Score | |
|
||||||
|
|
||||||
|
### 4.2.6 Regularized XGBoost with Complexity Control
|
||||||
|
|
||||||
|
Building upon the robust preprocessing of the previous steps, this iteration focuses on strict **complexity control** within the XGBoost architecture. To mitigate the 100% training accuracy observed in earlier unimodal tests—a clear indicator of overfitting—we introduced explicit **L1 (reg_alpha)** and **L2 (reg_lambda)** regularization parameters into the GridSearch space.
|
||||||
|
|
||||||
|
By penalizing large weights and promoting feature sparsity, the model is forced to prioritize the most globally relevant Action Units. Furthermore, the tree depth was intentionally restricted (`max_depth`: 2-4), and an **Early Stopping** callback with a 30-round patience window was implemented. This ensures that training terminates at the point of optimal generalization, capturing the essential physiological trends of cognitive load while ignoring subject-specific noise.
|
||||||
|
|
||||||
|
| Metric / Model | Regularized XGBoost |
|
||||||
|
| --- | --- |
|
||||||
|
| Accuracy | |
|
||||||
|
| AUC | |
|
||||||
|
| F1-Score | |
|
||||||
|
|
||||||
### 4.3 Isolation Forest
|
### 4.3 Isolation Forest
|
||||||
To start with unsupervised learning techniques, `IsolationForest.ipynb`was created to research how well a simple ensemble classificator performs on the created dataset.
|
To start with unsupervised learning techniques, `IsolationForest.ipynb`was created to research how well a simple ensemble classificator performs on the created dataset.
|
||||||
The notebook comes with one class grid search for hyperparameter tuning as well as a ROC curve that allows manual fine tuning.
|
The notebook comes with one class grid search for hyperparameter tuning as well as a ROC curve that allows manual fine tuning.
|
||||||
@ -214,7 +283,10 @@ To (re-)create the custom database for deployment, use `fill_db.ipynb`. Enter th
|
|||||||
## 6) Installation and Dependencies
|
## 6) Installation and Dependencies
|
||||||
Due to unsolvable dependency conflicts, several environemnts need to be used in the same time.
|
Due to unsolvable dependency conflicts, several environemnts need to be used in the same time.
|
||||||
### 6.1 Environemnt for camera handling
|
### 6.1 Environemnt for camera handling
|
||||||
TO DO
|
The setup of a virtual environment for the camera handling is difficult due to vary dependency conflicts.
|
||||||
|
Therefore it is necessary to create the virtual environment with every package in the specific version and each package in the specific order.
|
||||||
|
Furthermore the environment needs to be based on Python 3.10. The specific versions and order of the packages are described int the file:
|
||||||
|
`requirements.txt`
|
||||||
|
|
||||||
|
|
||||||
### 6.2 Environment for predictions
|
### 6.2 Environment for predictions
|
||||||
@ -238,14 +310,12 @@ Otherwise, as described in `readme.md: Setup`, you can use `prediction_env.yaml`
|
|||||||
- `dataset_creation/maxDist.py` - helper/statistical utility script for eye-tracking feature creation
|
- `dataset_creation/maxDist.py` - helper/statistical utility script for eye-tracking feature creation
|
||||||
|
|
||||||
#### AU Creation
|
#### AU Creation
|
||||||
- `dataset_creation/AU_creation/AU_creation_service.py` - AU extraction service workflow
|
|
||||||
- `dataset_creation/AU_creation/pyfeat_docu.ipynb` - py-feat exploratory notes
|
- `dataset_creation/AU_creation/pyfeat_docu.ipynb` - py-feat exploratory notes
|
||||||
|
|
||||||
#### Camera Handling
|
#### Camera Handling
|
||||||
- `dataset_creation/camera_handling/camera_stream_AU_and_ET_new.py` - current camera + AU + eye online pipeline
|
- `dataset_creation/camera_handling/camera_stream_AU_and_ET_new.py` - current camera + AU + eye online pipeline
|
||||||
- `dataset_creation/camera_handling/eyeFeature_new.py` - eye-feature extraction from gaze parquet
|
- `dataset_creation/camera_handling/eyeFeature_new.py` - eye-feature extraction from gaze parquet
|
||||||
- `dataset_creation/camera_handling/db_helper.py` - SQLite helper functions (camera pipeline)
|
- `dataset_creation/camera_handling/db_helper.py` - SQLite helper functions (camera pipeline)
|
||||||
- `dataset_creation/camera_handling/camera_stream_AU_and_ET.py` - older pipeline variant
|
|
||||||
- `dataset_creation/camera_handling/camera_stream.py` - baseline camera streaming script
|
- `dataset_creation/camera_handling/camera_stream.py` - baseline camera streaming script
|
||||||
- `dataset_creation/camera_handling/db_test.py` - DB test utility
|
- `dataset_creation/camera_handling/db_test.py` - DB test utility
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user