- added notes for XGBoost without accuracy details

- deleted unused files for data_creation and modified the project_report file overview
- translated the documentation for the pyfeat implementation
This commit is contained in:
TimoKurz 2026-03-14 14:33:35 +01:00
parent c439e35e39
commit 4df1187f84
4 changed files with 112 additions and 376 deletions

View File

@ -1,58 +0,0 @@
from feat import Detector
from feat.utils.io import get_test_data_path
from moviepy.video.io.VideoFileClip import VideoFileClip
import os
def extract_aus(path, model, skip_frames):
detector = Detector(au_model=model)
video_prediction = detector.detect(
path, data_type="video", skip_frames=skip_frames, face_detection_threshold=0.95 # alle 5 Sekunden einbeziehen - 24 Frames pro Sekunde
)
return video_prediction.aus.sum()
def split_video(path, chunk_length=120):
video = VideoFileClip(path)
duration = int(video.duration)
subclips_dir = os.path.join(os.dirname(path), "subclips")
os.makedirs(subclips_dir, exist_ok=True)
paths = []
for start in range(0, duration, chunk_length):
end = min(start + chunk_length, duration)
subclip = (
video
.subclip(start, end)
.without_audio()
.set_fps(video.fps)
)
output_path = f"{subclips_dir}_part_{start//chunk_length + 1}.mp4"
subclip.write_videofile(
output_path,
)
paths.append(output_path)
return output_path
# def start(path):
# results = []
# clips = split_video(path)
# for clip in clips:
# results.append(extract_aus(clip, 'svm', 25*5))
# return results
if __name__ == "__main__":
results = []
clips = []
test_video_path = "AU_creation/YTDown.com_YouTube_Was-ist-los-bei-7-vs-Wild_Media_Gtj9zu_WikU_001_1080p.mp4"
clips = split_video(test_video_path)
for clippath in clips:
results.append(extract_aus(clippath, 'svm', 25*5))
print(results)

View File

@ -5,27 +5,47 @@
"id": "3b0c6c82", "id": "3b0c6c82",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Hier entsteht die Dokumentation, wie die Action Units erzeugt wurden.\n", "## Action Unit Documentation and Setup\n",
"Daraus wird dann letztendlich ein Skript erstellt, welches automatisch AUs aus Videodateien erstellen soll.\n",
"\n", "\n",
"Py-Feat besitzt Dependencies, die ab Python 3.12 nicht mehr verfügbar sind.\n", "This documentation outlines the process for generating **Action Units (AUs)** and the eventual creation of a script to automate AU extraction from video files.\n",
"Dazu muss ein Kernel mit Python 3.11 erstellt werden.\n",
"Folgendes Vorgehen:\n",
"1. Seite des Jupyter Labs öffnen\n",
"2. Terminal öffnen und folgende Befehle eingeben:\n",
" conda create -n py311 python=3.11\n",
" source ~/.bashrc\n",
" conda activate py311\n",
" conda install jupyter\n",
" python -m ipykernel install --user --name=py311 --display-name \"Python 3.11\"\n",
" pip install py-feat\n",
" pip install \"moviepy<2.0\" (falls benötigt)\n",
"3. den Kernel neustarten\n",
"4. in VSC den Kernel neu hinzufügen und dann den Kernel mit dem Namen \"Python 3.11\" auswählen.\n",
"\n", "\n",
"Der Code unten zeigt eine beispielhafte Integration der py-feat Bibliothek.\n", "### Python Environment Configuration\n",
"Die Klassifizierung zu 0,1 kommt durch die Wahl des AU-Modells zustande. Dabei wird SVM gewählt. (ADABase Paper)\n", "\n",
"Gibt die Klassifizierung einen Gleitkommawert zwischen 0 & 1 aus, dann kommt XGB zum Einsatz. (REVELIO Paper)" "**Py-Feat** relies on dependencies that are incompatible with Python 3.12 and later. To ensure functionality, you must set up a dedicated **Python 3.11** kernel.\n",
"\n",
"#### Setup Instructions:\n",
"\n",
"1. Open your **Jupyter Lab** interface.\n",
"2. Open a **Terminal** and execute the following commands:\n",
"```bash\n",
"conda create -n py311 python=3.11\n",
"source ~/.bashrc\n",
"conda activate py311\n",
"conda install jupyter\n",
"python -m ipykernel install --user --name=py311 --display-name \"Python 3.11\"\n",
"pip install py-feat\n",
"pip install \"moviepy<2.0\" # Only if required\n",
"\n",
"```\n",
"\n",
"\n",
"3. **Restart** the kernel.\n",
"4. In **VS Code**, refresh your kernel list and select the one labeled **\"Python 3.11\"**.\n",
"\n",
"---\n",
"\n",
"### Implementation Details\n",
"\n",
"The following code demonstrates a sample integration of the `py-feat` library. The classification output format is determined by the specific AU model selected:\n",
"\n",
"| Model | Output Type | Reference Paper |\n",
"| --- | --- | --- |\n",
"| **SVM** | Binary (0 or 1) | *ADABase* |\n",
"| **XGB** | Floating Point (0.0 - 1.0) | *REVELIO* |\n",
"\n",
"---\n",
"\n",
"Would you like me to provide the Python code block to implement the **SVM** or **XGB** detector using these libraries?"
] ]
}, },
{ {

View File

@ -1,296 +0,0 @@
import cv2
import time
import os
import threading
from datetime import datetime
from feat import Detector
import torch
import mediapipe as mp
import csv
# Konfiguration
CAMERA_INDEX = 0
OUTPUT_DIR = "recordings"
VIDEO_DURATION = 10 # Sekunden
START_INTERVAL = 5 # Sekunden bis zum nächsten Start
FPS = 25.0 # Feste FPS
if not os.path.exists(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
# Globaler Detector, um ihn nicht bei jedem Video neu laden zu müssen (spart massiv Zeit/Speicher)
print("Initialisiere AU-Detector (bitte warten)...")
detector = Detector(au_model="xgb")
# ===== MediaPipe FaceMesh Setup =====
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
static_image_mode=False,
max_num_faces=1,
refine_landmarks=True, # wichtig für Iris
min_detection_confidence=0.5,
min_tracking_confidence=0.5
)
LEFT_IRIS = [474, 475, 476, 477]
RIGHT_IRIS = [469, 470, 471, 472]
LEFT_EYE_LIDS = (159, 145)
RIGHT_EYE_LIDS = (386, 374)
LEFT_EYE_GAZE_IDXS = (33, 133, 159, 145)
RIGHT_EYE_GAZE_IDXS = (263, 362, 386, 374)
EYE_OPEN_THRESHOLD = 6
# CSV vorbereiten
gaze_csv = open("gaze_data.csv", mode="w", newline="")
gaze_writer = csv.writer(gaze_csv)
gaze_writer.writerow([
"timestamp",
"left_gaze_x",
"left_gaze_y",
"right_gaze_x",
"right_gaze_y",
"left_valid",
"right_valid",
"left_diameter",
"right_diameter"
])
def eye_openness(landmarks, top_idx, bottom_idx, img_height):
top = landmarks[top_idx]
bottom = landmarks[bottom_idx]
return abs(top.y - bottom.y) * img_height
def compute_gaze(landmarks, iris_center, indices, w, h):
idx1, idx2, top_idx, bottom_idx = indices
p1 = landmarks[idx1]
p2 = landmarks[idx2]
top = landmarks[top_idx]
bottom = landmarks[bottom_idx]
x1 = p1.x * w
x2 = p2.x * w
y_top = top.y * h
y_bottom = bottom.y * h
iris_x, iris_y = iris_center
eye_left = min(x1, x2)
eye_right = max(x1, x2)
eye_width = eye_right - eye_left
eye_height = abs(y_bottom - y_top)
if eye_width == 0 or eye_height == 0:
return 0.5, 0.5
gaze_x = (iris_x - eye_left) / eye_width
gaze_y = (iris_y - min(y_top, y_bottom)) / eye_height
gaze_x = max(0, min(1, gaze_x))
gaze_y = max(0, min(1, gaze_y))
return gaze_x, gaze_y
def extract_aus(path, skip_frames):
# torch.no_grad() deaktiviert die Gradientenberechnung.
# Das löst den "Can't call numpy() on Tensor that requires grad" Fehler.
with torch.no_grad():
video_prediction = detector.detect_video(
path,
skip_frames=skip_frames,
face_detection_threshold=0.95
)
# Falls video_prediction oder .aus noch Tensoren sind,
# stellen wir sicher, dass sie korrekt summiert werden.
try:
# Wir nehmen die Summe der Action Units über alle detektierten Frames
res = video_prediction.aus.sum()
return res
except Exception as e:
print(f"Fehler bei der Summenbildung: {e}")
return 0
def startAU_creation(video_path):
"""Diese Funktion läuft nun in einem eigenen Thread."""
try:
print(f"\n[THREAD START] Analyse läuft für: {video_path}")
# skip_frames berechnen (z.B. alle 5 Sekunden bei 25 FPS = 125)
output = extract_aus(video_path, skip_frames=int(FPS*5))
print(f"\n--- Ergebnis für {os.path.basename(video_path)} ---")
print(output)
print("--------------------------------------------------\n")
except Exception as e:
print(f"Fehler bei der Analyse von {video_path}: {e}")
class VideoRecorder:
def __init__(self, filename, width, height):
self.filename = filename
fourcc = cv2.VideoWriter_fourcc(*'XVID')
self.out = cv2.VideoWriter(filename, fourcc, FPS, (width, height))
self.frames_to_record = int(VIDEO_DURATION * FPS)
self.frames_count = 0
self.is_finished = False
def write_frame(self, frame):
if self.frames_count < self.frames_to_record:
self.out.write(frame)
self.frames_count += 1
else:
self.finish()
def finish(self):
if not self.is_finished:
self.out.release()
self.is_finished = True
abs_path = os.path.abspath(self.filename)
print(f"Video fertig gespeichert: {self.filename}")
# --- MULTITHREADING HIER ---
# Wir starten die Analyse in einem neuen Thread, damit main() sofort weiter frames lesen kann
analysis_thread = threading.Thread(target=startAU_creation, args=(abs_path,))
analysis_thread.daemon = True # Beendet sich, wenn das Hauptprogramm schließt
analysis_thread.start()
def main():
cap = cv2.VideoCapture(CAMERA_INDEX)
if not cap.isOpened():
print("Fehler: Kamera konnte nicht geöffnet werden.")
return
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
active_recorders = []
last_start_time = 0
print("Aufnahme läuft. Drücke 'q' zum Beenden.")
try:
while True:
ret, frame = cap.read()
if not ret:
break
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
h, w, _ = frame.shape
results = face_mesh.process(rgb)
left_valid = 0
right_valid = 0
left_diameter = None
right_diameter = None
left_gaze_x = None
left_gaze_y = None
right_gaze_x = None
right_gaze_y = None
if results.multi_face_landmarks:
face_landmarks = results.multi_face_landmarks[0]
left_open = eye_openness(
face_landmarks.landmark,
LEFT_EYE_LIDS[0],
LEFT_EYE_LIDS[1],
h
)
right_open = eye_openness(
face_landmarks.landmark,
RIGHT_EYE_LIDS[0],
RIGHT_EYE_LIDS[1],
h
)
left_valid = 1 if left_open > EYE_OPEN_THRESHOLD else 0
right_valid = 1 if right_open > EYE_OPEN_THRESHOLD else 0
for eye_name, eye_indices in [("left", LEFT_IRIS), ("right", RIGHT_IRIS)]:
iris_points = []
for idx in eye_indices:
lm = face_landmarks.landmark[idx]
x_i, y_i = int(lm.x * w), int(lm.y * h)
iris_points.append((x_i, y_i))
if len(iris_points) == 4:
cx = int(sum(p[0] for p in iris_points) / 4)
cy = int(sum(p[1] for p in iris_points) / 4)
radius = max(
((x - cx) ** 2 + (y - cy) ** 2) ** 0.5
for (x, y) in iris_points
)
diameter = 2 * radius
cv2.circle(frame, (cx, cy), int(radius), (0, 255, 0), 2)
if eye_name == "left" and left_valid:
left_diameter = diameter
left_gaze_x, left_gaze_y = compute_gaze(
face_landmarks.landmark,
(cx, cy),
LEFT_EYE_GAZE_IDXS,
w, h
)
elif eye_name == "right" and right_valid:
right_diameter = diameter
right_gaze_x, right_gaze_y = compute_gaze(
face_landmarks.landmark,
(cx, cy),
RIGHT_EYE_GAZE_IDXS,
w, h
)
# CSV schreiben
gaze_writer.writerow([
time.time(),
left_gaze_x,
left_gaze_y,
right_gaze_x,
right_gaze_y,
left_valid,
right_valid,
left_diameter,
right_diameter
])
current_time = time.time()
if current_time - last_start_time >= START_INTERVAL:
timestamp = datetime.now().strftime("%H%M%S")
filename = os.path.join(OUTPUT_DIR, f"rec_{timestamp}.avi")
new_recorder = VideoRecorder(filename, width, height)
active_recorders.append(new_recorder)
last_start_time = current_time
for rec in active_recorders[:]:
rec.write_frame(frame)
if rec.is_finished:
active_recorders.remove(rec)
cv2.imshow('Kamera Livestream', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
time.sleep(1/FPS)
finally:
gaze_csv.close()
face_mesh.close()
cap.release()
cv2.destroyAllWindows()
print("Programm beendet. Warte ggf. auf laufende Analysen...")
if __name__ == "__main__":
main()

View File

@ -61,6 +61,7 @@ Runtime behavior:
- Extracts gaze/iris-based signals via MediaPipe - Extracts gaze/iris-based signals via MediaPipe
- Records overlapping windows (`VIDEO_DURATION=50s`, `START_INTERVAL=5s`, `FPS=25`) - Records overlapping windows (`VIDEO_DURATION=50s`, `START_INTERVAL=5s`, `FPS=25`)
- Runs AU extraction (`py-feat`) from recorded video segments - Runs AU extraction (`py-feat`) from recorded video segments
- Explanation of the py-feat functionality is located in `dataset_creation/AU_creation/pyfeat_docu.ipynb`
- Computes eye-feature summary from generated gaze parquet - Computes eye-feature summary from generated gaze parquet
- Writes merged rows to SQLite table `feature_table` - Writes merged rows to SQLite table `feature_table`
@ -103,6 +104,74 @@ Supporting utilities in ```model_training/tools```:
### 4.1 CNNs ### 4.1 CNNs
### 4.2 XGBoost ### 4.2 XGBoost
This documentation outlines the evolution of the XGBoost classification pipeline for cognitive workload detection. The project transitioned from a basic unimodal setup to a sophisticated, multi-stage hybrid system incorporating advanced statistical filtering and deep feature extraction.
During the model creation several methods were used to improve the model accuracy. During training, the biggest challenge was always the high overfitting of the model. Even in the last version with explicit regulation parameters the overall accuracy couldn't be improved more than the different methods before.
The model overall was not that good, as the highest accuracy we could achieve was around 65%, which is a bit higher than Fraunhofer achieved in the ADABase-Paper.
### 4.2.1 Classical XGBoost Baseline
To establish a performance baseline, a classical Extreme Gradient Boosting (XGBoost) model was implemented. XGBoost was selected for its ability to handle non-linear relationships and its inherent regularization, which helps prevent overfitting in high-dimensional feature spaces like Facial Action Units. XGBoost was picked because of its usage in the ADABase Paper. Initially, the model utilized raw Action Unit sums with global normalization to determine the basic predictability of workload from facial muscle activity alone.
| Metric / Model | Classical XGBoost |
| --- | --- |
| Accuracy | |
| AUC | |
| F1-Score | |
### 4.2.2 XGBoost with GroupKFold Validation
To address the challenge of inter-subject variability, the validation strategy was upgraded to `GroupKFold`. In behavioral data, samples from the same subject are highly correlated. Standard cross-validation often leads to data leakage, where the model memorizes individual facial characteristics. By ensuring that a subject's data is never shared between the training and validation sets, this iteration provides a scientifically rigorous measure of how the model generalizes to entirely unseen individuals.
| Metric / Model | XGBoost (GroupKFold) |
| --- | --- |
| Accuracy | |
| AUC | |
| F1-Score | |
### 4.2.3 Hybrid XGBoost with Autoencoder
To improve feature quality, a hybrid approach was introduced by pre-training a deep Autoencoder. The encoder branch was used to compress 20 raw Action Units into a 5-dimensional latent space. This non-linear dimensionality reduction aims to capture muscle synergies and filter out noise that decision trees might struggle with. The XGBoost classifier was then trained on these machine-learned representations rather than raw inputs.
| Metric / Model | XGBoost + Autoencoder |
| --- | --- |
| Accuracy | |
| AUC | |
| F1-Score | |
### 4.2.4 Robust XGBoost with MAD Outlier Removal
Recognizing that physiological and AU data often contain sensor artifacts, a robust preprocessing layer was added using Median Absolute Deviation (MAD). Unlike standard deviation, MAD is resilient to extreme outliers. By calculating a Robust Z-score and filtering signals in the training set, the model learned from a "clean" representation of cognitive states, significantly improving the stability of the gradient boosting process.
| Metric / Model | XGBoost + MAD |
| --- | --- |
| Accuracy | |
| AUC | |
| F1-Score | |
### 4.2.5 Combined Dataset of Action Units and EyeTracking
This iteration involved training, which refined a robust pipeline on a new, expanded dataset. This dataset integrated both high-frequency facial action units and advanced eye-tracking metrics (pupillometry and fixations).
Since the recreation of the EyeTracking data in the lab was in doubt, only Action Units were used in the first XGBoost models. Now the model also implemented the EyeTracking data as features.
By applying performance-based subject splitting, we ensured that the training and test sets were balanced not only by label but by the subjects' underlying skill levels, resulting in the most deployable version of the AI.
| Metric / Model | Final Combined Model |
| --- | --- |
| Accuracy | |
| AUC | |
| F1-Score | |
### 4.2.6 Regularized XGBoost with Complexity Control
Building upon the robust preprocessing of the previous steps, this iteration focuses on strict **complexity control** within the XGBoost architecture. To mitigate the 100% training accuracy observed in earlier unimodal tests—a clear indicator of overfitting—we introduced explicit **L1 (reg_alpha)** and **L2 (reg_lambda)** regularization parameters into the GridSearch space.
By penalizing large weights and promoting feature sparsity, the model is forced to prioritize the most globally relevant Action Units. Furthermore, the tree depth was intentionally restricted (`max_depth`: 2-4), and an **Early Stopping** callback with a 30-round patience window was implemented. This ensures that training terminates at the point of optimal generalization, capturing the essential physiological trends of cognitive load while ignoring subject-specific noise.
| Metric / Model | Regularized XGBoost |
| --- | --- |
| Accuracy | |
| AUC | |
| F1-Score | |
### 4.3 Isolation Forest ### 4.3 Isolation Forest
To start with unsupervised learning techniques, `IsolationForest.ipynb`was created to research how well a simple ensemble classificator performs on the created dataset. To start with unsupervised learning techniques, `IsolationForest.ipynb`was created to research how well a simple ensemble classificator performs on the created dataset.
The notebook comes with one class grid search for hyperparameter tuning as well as a ROC curve that allows manual fine tuning. The notebook comes with one class grid search for hyperparameter tuning as well as a ROC curve that allows manual fine tuning.
@ -214,7 +283,10 @@ To (re-)create the custom database for deployment, use `fill_db.ipynb`. Enter th
## 6) Installation and Dependencies ## 6) Installation and Dependencies
Due to unsolvable dependency conflicts, several environemnts need to be used in the same time. Due to unsolvable dependency conflicts, several environemnts need to be used in the same time.
### 6.1 Environemnt for camera handling ### 6.1 Environemnt for camera handling
TO DO The setup of a virtual environment for the camera handling is difficult due to vary dependency conflicts.
Therefore it is necessary to create the virtual environment with every package in the specific version and each package in the specific order.
Furthermore the environment needs to be based on Python 3.10. The specific versions and order of the packages are described int the file:
`requirements.txt`
### 6.2 Environment for predictions ### 6.2 Environment for predictions
@ -238,14 +310,12 @@ Otherwise, as described in `readme.md: Setup`, you can use `prediction_env.yaml`
- `dataset_creation/maxDist.py` - helper/statistical utility script for eye-tracking feature creation - `dataset_creation/maxDist.py` - helper/statistical utility script for eye-tracking feature creation
#### AU Creation #### AU Creation
- `dataset_creation/AU_creation/AU_creation_service.py` - AU extraction service workflow
- `dataset_creation/AU_creation/pyfeat_docu.ipynb` - py-feat exploratory notes - `dataset_creation/AU_creation/pyfeat_docu.ipynb` - py-feat exploratory notes
#### Camera Handling #### Camera Handling
- `dataset_creation/camera_handling/camera_stream_AU_and_ET_new.py` - current camera + AU + eye online pipeline - `dataset_creation/camera_handling/camera_stream_AU_and_ET_new.py` - current camera + AU + eye online pipeline
- `dataset_creation/camera_handling/eyeFeature_new.py` - eye-feature extraction from gaze parquet - `dataset_creation/camera_handling/eyeFeature_new.py` - eye-feature extraction from gaze parquet
- `dataset_creation/camera_handling/db_helper.py` - SQLite helper functions (camera pipeline) - `dataset_creation/camera_handling/db_helper.py` - SQLite helper functions (camera pipeline)
- `dataset_creation/camera_handling/camera_stream_AU_and_ET.py` - older pipeline variant
- `dataset_creation/camera_handling/camera_stream.py` - baseline camera streaming script - `dataset_creation/camera_handling/camera_stream.py` - baseline camera streaming script
- `dataset_creation/camera_handling/db_test.py` - DB test utility - `dataset_creation/camera_handling/db_test.py` - DB test utility