- moved eyeTrackingFeatures to EDA

- updated eyeTrackingFeatures - added eyeAlt
2025-12-03 19:08:26 +01:00 · 2025-12-03 19:08:26 +01:00 · 080bdcfbe3
commit 080bdcfbe3
parent d7d0414b14
2 changed files with 385 additions and 32 deletions
--- a/EDA/eyeAlt.py
+++ b/EDA/eyeAlt.py
@ -0,0 +1,324 @@
+import numpy as np
+import pandas as pd
+import h5py
+import yaml
+import owncloud
+import os
+from sklearn.preprocessing import MinMaxScaler
+from scipy.signal import welch
+from pygazeanalyser.detectors import fixation_detection, saccade_detection
+
+
+##############################################################################
+# 1. HELFERFUNKTIONEN
+##############################################################################
+def clean_eye_df(df):
+    """
+    Entfernt alle Zeilen, die keine echten Eyetracking-Daten enthalten.
+    Löst das Problem, dass das Haupt-DataFrame NaN-Zeilen für andere Sensoren enthält.
+    """
+    eye_cols = [c for c in df.columns if ("LEFT_" in c or "RIGHT_" in c)]
+    df_eye = df[eye_cols]
+
+    # INF → NaN
+    df_eye = df_eye.replace([np.inf, -np.inf], np.nan)
+
+    # Nur Zeilen behalten, wo es echte Eyetracking-Daten gibt
+    df_eye = df_eye.dropna(subset=eye_cols, how="all")
+
+    print("Eyetracking-Zeilen vorher:", len(df))
+    print("Eyetracking-Zeilen nachher:", len(df_eye))
+
+    #Index zurücksetzen
+    return df_eye.reset_index(drop=True)
+
+
+def extract_gaze_signal(df):
+    """
+    Extrahiert 2D-Gaze-Positionen auf dem Display,
+    maskiert ungültige Samples und interpoliert Lücken.
+    """
+
+    print("→ extract_gaze_signal(): Eingabegröße:", df.shape)
+
+    # Gaze-Spalten
+    gx_L = df["LEFT_GAZE_POINT_ON_DISPLAY_AREA_X"].astype(float).copy()
+    gy_L = df["LEFT_GAZE_POINT_ON_DISPLAY_AREA_Y"].astype(float).copy()
+    gx_R = df["RIGHT_GAZE_POINT_ON_DISPLAY_AREA_X"].astype(float).copy()
+    gy_R = df["RIGHT_GAZE_POINT_ON_DISPLAY_AREA_Y"].astype(float).copy()
+
+
+    # Validity-Spalten (1 = gültig)
+    val_L = (df["LEFT_GAZE_POINT_VALIDITY"] == 1)
+    val_R = (df["RIGHT_GAZE_POINT_VALIDITY"] == 1)
+
+    # Inf ersetzen mit NaN (kommt bei Tobii bei Blinks vor)
+    gx_L.replace([np.inf, -np.inf], np.nan, inplace=True)
+    gy_L.replace([np.inf, -np.inf], np.nan, inplace=True)
+    gx_R.replace([np.inf, -np.inf], np.nan, inplace=True)
+    gy_R.replace([np.inf, -np.inf], np.nan, inplace=True)
+
+    # Ungültige Werte maskieren
+    gx_L[~val_L] = np.nan
+    gy_L[~val_L] = np.nan
+    gx_R[~val_R] = np.nan
+    gy_R[~val_R] = np.nan
+
+    # Mittelwert der beiden Augen pro Sample (nanmean ist robust)
+    gx = np.mean(np.column_stack([gx_L, gx_R]), axis=1)
+    gy = np.mean(np.column_stack([gy_L, gy_R]), axis=1)
+
+    # Interpolation (wichtig für PyGaze!)
+    gx = pd.Series(gx).interpolate(limit=50, limit_direction="both").bfill().ffill()
+    gy = pd.Series(gy).interpolate(limit=50, limit_direction="both").bfill().ffill()
+
+    # xscaler = MinMaxScaler()
+    # gxscale = xscaler.fit_transform(gx.values.reshape(-1, 1))
+
+    # yscaler = MinMaxScaler()
+    # gyscale = yscaler.fit_transform(gx.values.reshape(-1, 1))
+
+    #print("xmax ymax", gxscale.max(), gyscale.max())
+
+    #out = np.column_stack((gxscale, gyscale))
+    out = np.column_stack((gx, gy))
+
+    print("→ extract_gaze_signal(): Ausgabegröße:", out.shape)
+
+    return out
+
+
+def extract_pupil(df):
+    """Extrahiert Pupillengröße (beide Augen gemittelt)."""
+
+    pl = df["LEFT_PUPIL_DIAMETER"].replace([np.inf, -np.inf], np.nan)
+    pr = df["RIGHT_PUPIL_DIAMETER"].replace([np.inf, -np.inf], np.nan)
+
+    vl = df.get("LEFT_PUPIL_VALIDITY")
+    vr = df.get("RIGHT_PUPIL_VALIDITY")
+
+    if vl is None or vr is None:
+        # Falls Validity-Spalten nicht vorhanden sind, versuchen wir grobe Heuristik:
+        # gültig, wenn Pupillendurchmesser nicht NaN.
+        validity = (~pl.isna() | ~pr.isna()).astype(int).to_numpy()
+    else:
+        # Falls vorhanden: 1 wenn mindestens eines der Augen gültig ist
+        validity = ( (vl == 1) | (vr == 1) ).astype(int).to_numpy()
+
+    # Mittelwert der verfügbaren Pupillen
+    p = np.mean(np.column_stack([pl, pr]), axis=1)
+
+    # INF/NaN reparieren
+    p = pd.Series(p).interpolate(limit=50, limit_direction="both").bfill().ffill()
+    p = p.to_numpy()
+
+    print("→ extract_pupil(): Pupillensignal Länge:", len(p))
+    return p, validity
+
+
+def detect_blinks(pupil_validity, min_duration=5):
+    """Erkennt Blinks: Validity=0 → Blink."""
+    blinks = []
+    start = None
+
+    for i, v in enumerate(pupil_validity):
+        if v == 0 and start is None:
+            start = i
+        elif v == 1 and start is not None:
+            if i - start >= min_duration:
+                blinks.append([start, i])
+            start = None
+
+    return blinks
+
+
+def compute_IPA(pupil, fs=250):
+    """
+    IPA = Index of Pupillary Activity (nach Duchowski 2018).
+    Hochfrequenzanteile der Pupillenzeitreihe.
+    """
+    f, Pxx = welch(pupil, fs=fs, nperseg=int(fs*2))  # 2 Sekunden Fenster
+
+    hf_band = (f >= 0.6) & (f <= 2.0)
+    ipa = np.sum(Pxx[hf_band])
+
+    return ipa
+
+
+##############################################################################
+# 2. FEATURE-EXTRAKTION (HAUPTFUNKTION)
+##############################################################################
+
+def extract_eye_features(df, window_length_sec=50, fs=250):
+    """
+    df = Tobii DataFrame
+    window_length_sec = Fenstergröße (z.B. W=1s)
+    """
+
+    print("→ extract_eye_features(): Starte Feature-Berechnung...")
+    print("   Fensterlänge W =", window_length_sec, "s")
+
+    W = int(window_length_sec * fs) # Window größe in Samples
+
+    # Gaze
+    gaze = extract_gaze_signal(df)
+    gx, gy = gaze[:, 0], gaze[:, 1]
+    print("Gültige Werte (gx):", np.sum(~np.isnan(gx)), "von", len(gx))
+    print("Range:", np.nanmin(gx), np.nanmax(gx))
+    print("Gültige Werte (gy):", np.sum(~np.isnan(gy)), "von", len(gy))
+    print("Range:", np.nanmin(gy), np.nanmax(gy))
+
+    # Pupille
+    pupil, pupil_validity = extract_pupil(df)
+
+    features = []
+
+    # Sliding windows
+    for start in range(0, len(df), W):
+        end = start + W
+        if end > len(df):
+            break          #das letzte Fenster wird ignoriert
+
+
+        w_gaze = gaze[start:end]
+        w_pupil = pupil[start:end]
+        w_valid = pupil_validity[start:end]
+
+        # ----------------------------
+        # FIXATIONS (PyGaze)
+        # ----------------------------
+        time_ms = np.arange(W) * 1000.0 / fs
+
+        # print("gx im Fenster:", w_gaze[:,0][:20])
+        # print("gy im Fenster:", w_gaze[:,1][:20])
+        # print("gx diff:", np.mean(np.abs(np.diff(w_gaze[:,0]))))
+
+        # print("Werte X im Fenster:", w_gaze[:,0])
+        # print("Werte Y im Fenster:", w_gaze[:,1])
+        # print("X-Stats: min/max/diff", np.nanmin(w_gaze[:,0]), np.nanmax(w_gaze[:,0]), np.nanmean(np.abs(np.diff(w_gaze[:,0]))))
+        # print("Y-Stats: min/max/diff", np.nanmin(w_gaze[:,1]), np.nanmax(w_gaze[:,1]), np.nanmean(np.abs(np.diff(w_gaze[:,1]))))
+        print("time_ms:", time_ms)
+
+        fix, efix = fixation_detection(
+            x=w_gaze[:, 0], y=w_gaze[:, 1], time=time_ms,
+            missing=0.0, maxdist=0.003, mindur=10 # mindur=100ms
+        )
+
+        #print("Raw Fixation Output:", efix[0])
+
+        if start == 0:
+            print("DEBUG fix raw:", fix[:10])
+
+        # Robust fixations: PyGaze may return malformed entries
+        fixation_durations = []
+        for f in efix:
+            print("Efix:", f[2])
+            # start_t = f[1]   # in ms
+            # end_t   = f[2]   # in ms
+            # duration = (end_t - start_t) / 1000.0  # in Sekunden
+
+            #duration = f[2] / 1000.0
+            if np.isfinite(f[2]) and f[2] > 0:
+                fixation_durations.append(f[2])
+
+        # Kategorien laut Paper
+        F_short = sum(66 <= d <= 150 for d in fixation_durations)
+        F_medium = sum(300 <= d <= 500 for d in fixation_durations)
+        F_long = sum(d >= 1000 for d in fixation_durations)
+        F_hundred = sum(d > 100 for d in fixation_durations)
+        F_Cancel = sum(66 < d for d in fixation_durations)
+
+        # ----------------------------
+        # SACCADES
+        # ----------------------------
+        sac, esac = saccade_detection(
+            x=w_gaze[:, 0], y=w_gaze[:, 1], time=time_ms, missing=0, minlen=12, maxvel=0.2, maxacc=1
+        )
+
+        sac_durations = [s[2] for s in esac]
+        sac_amplitudes = [((s[5]-s[3])**2 + (s[6]-s[4])**2)**0.5 for s in esac]
+
+        # ----------------------------
+        # BLINKS
+        # ----------------------------
+        blinks = detect_blinks(w_valid)
+        blink_durations = [(b[1] - b[0]) / fs for b in blinks]
+
+        # ----------------------------
+        # PUPIL
+        # ----------------------------
+        if np.all(np.isnan(w_pupil)):
+            mean_pupil = np.nan
+            ipa = np.nan
+        else:
+            mean_pupil = np.nanmean(w_pupil)
+            ipa = compute_IPA(w_pupil, fs=fs)
+
+        # ----------------------------
+        # FEATURE-TABELLE FÜLLEN
+        # ----------------------------
+        features.append({
+            "Fix_count_short_66_150": F_short,
+            "Fix_count_medium_300_500": F_medium,
+            "Fix_count_long_gt_1000": F_long,
+            "Fix_count_100": F_hundred,
+            "Fix_cancel": F_Cancel,
+            "Fix_mean_duration": np.mean(fixation_durations) if fixation_durations else 0,
+            "Fix_median_duration": np.median(fixation_durations) if fixation_durations else 0,
+
+            "Sac_count": len(sac),
+            "Sac_mean_amp": np.mean(sac_amplitudes) if sac_amplitudes else 0,
+            "Sac_mean_dur": np.mean(sac_durations) if sac_durations else 0,
+            "Sac_median_dur": np.median(sac_durations) if sac_durations else 0,
+
+            "Blink_count": len(blinks),
+            "Blink_mean_dur": np.mean(blink_durations) if blink_durations else 0,
+            "Blink_median_dur": np.median(blink_durations) if blink_durations else 0,
+
+            "Pupil_mean": mean_pupil,
+            "Pupil_IPA": ipa
+        })
+
+
+    result = pd.DataFrame(features)
+    print("→ extract_eye_features(): Fertig! Ergebnisgröße:", result.shape)
+
+    return result
+
+##############################################################################
+# 3. MAIN FUNKTION
+##############################################################################
+
+def main():
+    print("### STARTE FEATURE-EXTRAKTION ###")
+    print("Aktueller Arbeitsordner:", os.getcwd())
+
+    #df = pd.read_hdf("tmp22.h5", "SIGNALS", mode="r")
+    df = pd.read_parquet("cleaned_0001.parquet")
+    print("DataFrame geladen:", df.shape)
+
+    # Nur Eye-Tracking auswählen
+    #eye_cols = [c for c in df.columns if "EYE_" in c]
+    #df_eye = df[eye_cols]
+
+    #print("Eye-Tracking-Spalten:", len(eye_cols))
+    #print("→", eye_cols[:10], " ...")
+
+    print("Reinige Eyetracking-Daten ...")
+    df_eye = clean_eye_df(df)
+
+    # Feature Extraction
+    features = extract_eye_features(df_eye, window_length_sec=50, fs=250)
+
+    print("\n### FEATURE-MATRIX (HEAD) ###")
+    print(features.head())
+
+    print("\nSpeichere Output in features.csv ...")
+    features.to_csv("features4.csv", index=False)
+
+    print("FERTIG!")
+
+
+
+if __name__ == "__main__":
+    main()
--- a/EyeTracking/eyetrackingFeatures.py
+++ b/EyeTracking/eyetrackingFeatures.py
@ -4,6 +4,7 @@ import h5py
 import yaml
 import owncloud
 import os
+from sklearn.preprocessing import MinMaxScaler
 from scipy.signal import welch
 from pygazeanalyser.detectors import fixation_detection, saccade_detection

@ -28,6 +29,7 @@ def clean_eye_df(df):
    print("Eyetracking-Zeilen vorher:", len(df))
    print("Eyetracking-Zeilen nachher:", len(df_eye))

+    #Index zurücksetzen
    return df_eye.reset_index(drop=True)


@ -50,7 +52,7 @@ def extract_gaze_signal(df):
    val_L = (df["EYE_LEFT_GAZE_POINT_VALIDITY"] == 1)
    val_R = (df["EYE_RIGHT_GAZE_POINT_VALIDITY"] == 1)

-    # inf ersetzen (kommt bei Tobii bei Blinks vor)
+    # Inf ersetzen mit NaN (kommt bei Tobii bei Blinks vor)
    gx_L.replace([np.inf, -np.inf], np.nan, inplace=True)
    gy_L.replace([np.inf, -np.inf], np.nan, inplace=True)
    gx_R.replace([np.inf, -np.inf], np.nan, inplace=True)
@ -63,14 +65,22 @@ def extract_gaze_signal(df):
    gy_R[~val_R] = np.nan

    # Mittelwert der beiden Augen pro Sample (nanmean ist robust)
-    gx = np.nanmean(np.column_stack([gx_L, gx_R]), axis=1)
-    gy = np.nanmean(np.column_stack([gy_L, gy_R]), axis=1)
+    gx = np.mean(np.column_stack([gx_L, gx_R]), axis=1)
+    gy = np.mean(np.column_stack([gy_L, gy_R]), axis=1)

    # Interpolation (wichtig für PyGaze!)
    gx = pd.Series(gx).interpolate(limit=50, limit_direction="both").bfill().ffill()
    gy = pd.Series(gy).interpolate(limit=50, limit_direction="both").bfill().ffill()

-    out = np.column_stack((gx, gy))
+    xscaler = MinMaxScaler()
+    gxscale = xscaler.fit_transform(gx.values.reshape(-1, 1))
+
+    yscaler = MinMaxScaler()
+    gyscale = yscaler.fit_transform(gx.values.reshape(-1, 1))
+
+    print("xmax ymax", gxscale.max(), gyscale.max())
+
+    out = np.column_stack((gxscale, gyscale))

    print("→ extract_gaze_signal(): Ausgabegröße:", out.shape)

@ -95,7 +105,7 @@ def extract_pupil(df):
        validity = ( (vl == 1) | (vr == 1) ).astype(int).to_numpy()

    # Mittelwert der verfügbaren Pupillen
-    p = np.nanmean(np.column_stack([pl, pr]), axis=1)
+    p = np.mean(np.column_stack([pl, pr]), axis=1)

    # INF/NaN reparieren
    p = pd.Series(p).interpolate(limit=50, limit_direction="both").bfill().ffill()
@ -138,7 +148,7 @@ def compute_IPA(pupil, fs=250):
 # 2. FEATURE-EXTRAKTION (HAUPTFUNKTION)
 ##############################################################################

-def extract_eye_features(df, window_length_sec=2, fs=250):
+def extract_eye_features(df, window_length_sec=50, fs=250):
    """
    df = Tobii DataFrame
    window_length_sec = Fenstergröße (z.B. W=1s)
@ -147,10 +157,15 @@ def extract_eye_features(df, window_length_sec=2, fs=250):
    print("→ extract_eye_features(): Starte Feature-Berechnung...")
    print("   Fensterlänge W =", window_length_sec, "s")

-    W = int(window_length_sec * fs)
+    W = int(window_length_sec * fs) # Window größe in Samples

    # Gaze
    gaze = extract_gaze_signal(df)
+    gx, gy = gaze[:, 0], gaze[:, 1]
+    print("Gültige Werte (gx):", np.sum(~np.isnan(gx)), "von", len(gx))
+    print("Range:", np.nanmin(gx), np.nanmax(gx))
+    print("Gültige Werte (gy):", np.sum(~np.isnan(gy)), "von", len(gy))
+    print("Range:", np.nanmin(gy), np.nanmax(gy))

    # Pupille
    pupil, pupil_validity = extract_pupil(df)
@ -161,8 +176,8 @@ def extract_eye_features(df, window_length_sec=2, fs=250):
    for start in range(0, len(df), W):
        end = start + W
        if end > len(df):
-            break
-        #print(f"→ Fenster {start}:{end} wird verarbeitet...")
+            break          #das letzte Fenster wird ignoriert
+

        w_gaze = gaze[start:end]
        w_pupil = pupil[start:end]
@ -171,45 +186,56 @@ def extract_eye_features(df, window_length_sec=2, fs=250):
        # ----------------------------
        # FIXATIONS (PyGaze)
        # ----------------------------
-        fix, _ = fixation_detection(
-            x=w_gaze[:, 0], y=w_gaze[:, 1], time=np.arange(W)/fs,
-            missing=np.nan, maxdist=0.02, mindur=0.1 # mindur=100ms
+        time_ms = np.arange(W) * 1000.0 / fs
+
+        # print("gx im Fenster:", w_gaze[:,0][:20])
+        # print("gy im Fenster:", w_gaze[:,1][:20])
+        # print("gx diff:", np.mean(np.abs(np.diff(w_gaze[:,0]))))
+
+        # print("Werte X im Fenster:", w_gaze[:,0])
+        # print("Werte Y im Fenster:", w_gaze[:,1])
+        # print("X-Stats: min/max/diff", np.nanmin(w_gaze[:,0]), np.nanmax(w_gaze[:,0]), np.nanmean(np.abs(np.diff(w_gaze[:,0]))))
+        # print("Y-Stats: min/max/diff", np.nanmin(w_gaze[:,1]), np.nanmax(w_gaze[:,1]), np.nanmean(np.abs(np.diff(w_gaze[:,1]))))
+        print("time_ms:", time_ms)
+
+        fix, efix = fixation_detection(
+            x=w_gaze[:, 0], y=w_gaze[:, 1], time=time_ms,
+            missing=0.0, maxdist=0.001, mindur=65 # mindur=100ms
        )

+        #print("Raw Fixation Output:", efix[0])
+
        if start == 0:
            print("DEBUG fix raw:", fix[:10])

-        # nur gültige Fixationen
-        fix = [f for f in fix if isinstance(f, (list, tuple)) and len(f) >= 3]
-
        # Robust fixations: PyGaze may return malformed entries
        fixation_durations = []
-        for f in fix:
-            start_t = f[1]   # in ms
-            end_t   = f[2]   # in ms
-            duration = (end_t - start_t) / 1000.0  # in Sekunden
+        for f in efix:
+            print("Efix:", f[2])
+            # start_t = f[1]   # in ms
+            # end_t   = f[2]   # in ms
+            # duration = (end_t - start_t) / 1000.0  # in Sekunden

            #duration = f[2] / 1000.0
-            if np.isfinite(duration) and duration > 0:
-                fixation_durations.append(duration)
+            if np.isfinite(f[2]) and f[2] > 0:
+                fixation_durations.append(f[2])

        # Kategorien laut Paper
-        F_short = sum(0.066 <= d <= 0.150 for d in fixation_durations)
-        F_medium = sum(0.300 <= d <= 0.500 for d in fixation_durations)
-        F_long = sum(d >= 1.000 for d in fixation_durations)
+        F_short = sum(66 <= d <= 150 for d in fixation_durations)
+        F_medium = sum(300 <= d <= 500 for d in fixation_durations)
+        F_long = sum(d >= 1000 for d in fixation_durations)
+        F_hundred = sum(d > 100 for d in fixation_durations)
+        F_Cancel = sum(66 < d for d in fixation_durations)

        # ----------------------------
        # SACCADES
        # ----------------------------
-        sac, _ = saccade_detection(
-            x=w_gaze[:, 0], y=w_gaze[:, 1], time=np.arange(W)/fs, missing=np.nan
+        sac, esac = saccade_detection(
+            x=w_gaze[:, 0], y=w_gaze[:, 1], time=time_ms, missing=0, minlen=12, maxvel=0.2, maxacc=1
        )

-        # Korrektes Format: [start_index, end_index, duration_seconds, amplitude_deg]
-        sac = [s for s in sac if isinstance(s, (list, tuple)) and len(s) >= 4]
-
-        sac_durations = [(s[2] - s[1]) for s in sac]
-        sac_amplitudes = [s[3] for s in sac]
+        sac_durations = [s[2] for s in esac]
+        sac_amplitudes = [((s[5]-s[3])**2 + (s[6]-s[4])**2)**0.5 for s in esac]

        # ----------------------------
        # BLINKS
@ -234,6 +260,8 @@ def extract_eye_features(df, window_length_sec=2, fs=250):
            "Fix_count_short_66_150": F_short,
            "Fix_count_medium_300_500": F_medium,
            "Fix_count_long_gt_1000": F_long,
+            "Fix_count_100": F_hundred,
+            "Fix_cancel": F_Cancel,
            "Fix_mean_duration": np.mean(fixation_durations) if fixation_durations else 0,
            "Fix_median_duration": np.median(fixation_durations) if fixation_durations else 0,

@ -265,6 +293,7 @@ def main():
    print("Aktueller Arbeitsordner:", os.getcwd())

    df = pd.read_hdf("tmp22.h5", "SIGNALS", mode="r")
+    #df = pd.read_parquet("cleaned_0001.parquet")
    print("DataFrame geladen:", df.shape)

    # Nur Eye-Tracking auswählen
@ -278,7 +307,7 @@ def main():
    df_eye = clean_eye_df(df)

    # Feature Extraction
-    features = extract_eye_features(df_eye, window_length_sec=2, fs=250)
+    features = extract_eye_features(df_eye, window_length_sec=50, fs=250)

    print("\n### FEATURE-MATRIX (HEAD) ###")
    print(features.head())