diff --git a/dataset_creation/chunkwise_parquet_file_creation_EYE_TRACKING.py b/dataset_creation/chunkwise_parquet_file_creation_EYE_TRACKING.py
new file mode 100644
index 0000000..64b1ae6
--- /dev/null
+++ b/dataset_creation/chunkwise_parquet_file_creation_EYE_TRACKING.py
@@ -0,0 +1,91 @@
+import os
+import pandas as pd
+from pathlib import Path
+
+print(os.getcwd())
+num_files = 2  # number of files to process (min: 1, max: 30)
+
+print("connection aufgebaut")
+
+data_dir = Path("/home/jovyan/Fahrsimulator_MSY2526_AI/EDA")
+# os.chdir(data_dir)
+# Get all .h5 files and sort them
+matching_files = sorted(data_dir.glob("*.h5"))
+
+# Chunk size for reading (adjust based on your RAM - 100k rows is ~50-100MB depending on columns)
+CHUNK_SIZE = 100_000
+
+for i, file_path in enumerate(matching_files):
+    print(f"Subject {i} gestartet")
+    print(f"{file_path} geoeffnet")
+    
+    # Step 1: Get total number of rows and column names
+    with pd.HDFStore(file_path, mode="r") as store:
+        cols = store.select("SIGNALS", start=0, stop=1).columns
+        nrows = store.get_storer("SIGNALS").nrows
+        print(f"Total columns: {len(cols)}, Total rows: {nrows}")
+    
+    # Step 2: Filter columns that start with "FACE_AU"
+    eye_cols = [c for c in cols if c.startswith("EYE_")]
+    print(f"eye-tracking columns found: {eye_cols}")
+    
+    if len(eye_cols) == 0:
+        print(f"keine eye-tracking-Signale in Subject {i}")
+        continue
+    
+    # Columns to read
+    columns_to_read = ["STUDY", "LEVEL", "PHASE"] + eye_cols
+    
+    # Step 3: Process file in chunks
+    chunks_to_save = []
+    
+    for start_row in range(0, nrows, CHUNK_SIZE):
+        stop_row = min(start_row + CHUNK_SIZE, nrows)
+        print(f"Processing rows {start_row} to {stop_row} ({stop_row/nrows*100:.1f}%)")
+        
+        # Read chunk
+        df_chunk = pd.read_hdf(
+            file_path, 
+            key="SIGNALS", 
+            columns=columns_to_read,
+            start=start_row,
+            stop=stop_row
+        )
+        
+        # Add metadata columns
+        df_chunk["subjectID"] = i
+        df_chunk["rowID"] = range(start_row, stop_row)
+        
+        # Clean data
+        df_chunk = df_chunk[df_chunk["LEVEL"] != 0]
+        df_chunk = df_chunk.dropna()
+        
+        # Only keep non-empty chunks
+        if len(df_chunk) > 0:
+            chunks_to_save.append(df_chunk)
+        
+        # Free memory
+        del df_chunk
+    
+    print("load and cleaning done")
+    
+    # Step 4: Combine all chunks and save
+    if chunks_to_save:
+        df_final = pd.concat(chunks_to_save, ignore_index=True)
+        print(f"Final dataframe shape: {df_final.shape}")
+        
+        # Save to parquet
+        base_dir = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/new_ET_Parquet_files")
+        os.makedirs(base_dir, exist_ok=True)
+        
+        out_name = base_dir / f"ET_signals_extracted_{i:04d}.parquet"
+        df_final.to_parquet(out_name, index=False)
+        print(f"Saved to {out_name}")
+        
+        # Free memory
+        del df_final
+        del chunks_to_save
+    else:
+        print(f"No valid data found for Subject {i}")
+
+print("All files processed!")
\ No newline at end of file
diff --git a/dataset_creation/chunkwise_parquet_file_creation.py b/dataset_creation/chunkwise_parquet_file_creation_FACE_AU.py
similarity index 100%
rename from dataset_creation/chunkwise_parquet_file_creation.py
rename to dataset_creation/chunkwise_parquet_file_creation_FACE_AU.py
diff --git a/dataset_creation/create_feature_table.py b/dataset_creation/create_feature_table.py
index 86ee7b8..54e7892 100644
--- a/dataset_creation/create_feature_table.py
+++ b/dataset_creation/create_feature_table.py
@@ -71,7 +71,8 @@ def process_parquet_files(input_dir, output_file, window_size=1250, step_size=12
                 
                 # Summiere alle AU-Spalten
                 for au_col in au_columns:
-                    result[f'{au_col}_sum'] = window_df[au_col].sum()
+                    # result[f'{au_col}_sum'] = window_df[au_col].sum()
+                    result[f'{au_col}_mean'] = window_df[au_col].mean()
                 
                 all_windows.append(result)
             
@@ -94,8 +95,8 @@ def process_parquet_files(input_dir, output_file, window_size=1250, step_size=12
 # Beispiel-Verwendung
 if __name__ == "__main__":
     # Anpassen an deine Pfade
-    input_directory = r"C:\Users\x\FAUbox\WS2526_Fahrsimulator_MSY (Celina Korzer)\new_AU_parquet_files"
-    output_file = r"C:\Users\x\FAUbox\WS2526_Fahrsimulator_MSY (Celina Korzer)\new_AU_dataset\AU_dataset.parquet"
+    input_directory = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/new_AU_parquet_files")
+    output_file = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/new_AU_dataset_mean/AU_dataset_mean.parquet")
 
     
     
diff --git a/dataset_creation/create_multimodal_dataset.py b/dataset_creation/create_multimodal_dataset.py
new file mode 100644
index 0000000..a81a242
--- /dev/null
+++ b/dataset_creation/create_multimodal_dataset.py
@@ -0,0 +1,56 @@
+from pathlib import Path
+import pandas as pd
+
+
+def main():
+    """
+    USER CONFIGURATION
+    ------------------
+    Specify input files and output directory here.
+    """
+
+    # Input parquet files (single-modality datasets)
+    file_modality_1 = Path("/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/AU_dataset_mean.parquet")
+    file_modality_2 = Path("/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/new_eye_dataset.parquet")
+
+    # Output directory and file name
+    output_dir = Path("/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/")
+    output_file = output_dir / "merged_dataset.parquet"
+
+    # Column names (adjust only if your schema differs)
+    subject_col = "subjectID"
+    time_col = "start_time"
+
+    # ------------------------------------------------------------------
+    # Load datasets
+    # ------------------------------------------------------------------
+    df1 = pd.read_parquet(file_modality_1)
+    df2 = pd.read_parquet(file_modality_2)
+
+    # ------------------------------------------------------------------
+    # Keep only subjects that appear in BOTH datasets
+    # ------------------------------------------------------------------
+    common_subjects = set(df1[subject_col]).intersection(df2[subject_col])
+
+    df1 = df1[df1[subject_col].isin(common_subjects)]
+    df2 = df2[df2[subject_col].isin(common_subjects)]
+
+    # ------------------------------------------------------------------
+    # Inner join on subject ID AND start_time
+    # ------------------------------------------------------------------
+    merged_df = pd.merge(
+        df1,
+        df2,
+        on=[subject_col, time_col],
+        how="inner",
+    )
+
+    # ------------------------------------------------------------------
+    # Save merged dataset
+    # ------------------------------------------------------------------
+    output_dir.mkdir(parents=True, exist_ok=True)
+    merged_df.to_parquet(output_file, index=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/dataset_creation/eyeAlt.py b/dataset_creation/eyeAlt.py
index 1f685a9..fef68ad 100644
--- a/dataset_creation/eyeAlt.py
+++ b/dataset_creation/eyeAlt.py
@@ -2,7 +2,6 @@ import numpy as np
 import pandas as pd
 import h5py
 import yaml
-import owncloud
 import os
 from sklearn.preprocessing import MinMaxScaler
 from scipy.signal import welch
diff --git a/dataset_creation/eye_batch_processor.py b/dataset_creation/eye_batch_processor.py
index 09b906d..8192147 100644
--- a/dataset_creation/eye_batch_processor.py
+++ b/dataset_creation/eye_batch_processor.py
@@ -12,8 +12,8 @@ from pygazeanalyser.detectors import fixation_detection, saccade_detection
 ##############################################################################
 # KONFIGURATION - HIER ANPASSEN!
 ##############################################################################
-INPUT_DIR = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/parquet_Eye_features_old/")
-OUTPUT_FILE = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/Eye_dataset_old/eye_dataset_old.parquet")
+INPUT_DIR = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/new_ET_Parquet_files/")
+OUTPUT_FILE = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/Eye_dataset_old/new_eye_dataset.parquet")
 
 WINDOW_SIZE_SAMPLES = 12500  # Anzahl Samples pro Window (z.B. 1250 = 50s bei 25Hz, oder 5s bei 250Hz)
 STEP_SIZE_SAMPLES = 1250    # Schrittweite (z.B. 125 = 5s bei 25Hz, oder 0.5s bei 250Hz)
@@ -28,7 +28,7 @@ def clean_eye_df(df):
     Entfernt alle Zeilen, die keine echten Eyetracking-Daten enthalten.
     Löst das Problem, dass das Haupt-DataFrame NaN-Zeilen für andere Sensoren enthält.
     """
-    eye_cols = [c for c in df.columns if ("LEFT_" in c or "RIGHT_" in c)]
+    eye_cols = [c for c in df.columns if c.startswith("EYE_")]
     df_eye = df[eye_cols]
 
     # INF → NaN
@@ -48,14 +48,14 @@ def extract_gaze_signal(df):
     maskiert ungültige Samples und interpoliert Lücken.
     """
     # Gaze-Spalten
-    gx_L = df["LEFT_GAZE_POINT_ON_DISPLAY_AREA_X"].astype(float).copy()
-    gy_L = df["LEFT_GAZE_POINT_ON_DISPLAY_AREA_Y"].astype(float).copy()
-    gx_R = df["RIGHT_GAZE_POINT_ON_DISPLAY_AREA_X"].astype(float).copy()
-    gy_R = df["RIGHT_GAZE_POINT_ON_DISPLAY_AREA_Y"].astype(float).copy()
+    gx_L = df["EYE_LEFT_GAZE_POINT_ON_DISPLAY_AREA_X"].astype(float).copy()
+    gy_L = df["EYE_LEFT_GAZE_POINT_ON_DISPLAY_AREA_Y"].astype(float).copy()
+    gx_R = df["EYE_RIGHT_GAZE_POINT_ON_DISPLAY_AREA_X"].astype(float).copy()
+    gy_R = df["EYE_RIGHT_GAZE_POINT_ON_DISPLAY_AREA_Y"].astype(float).copy()
 
     # Validity-Spalten (1 = gültig)
-    val_L = (df["LEFT_GAZE_POINT_VALIDITY"] == 1)
-    val_R = (df["RIGHT_GAZE_POINT_VALIDITY"] == 1)
+    val_L = (df["EYE_LEFT_GAZE_POINT_VALIDITY"] == 1)
+    val_R = (df["EYE_RIGHT_GAZE_POINT_VALIDITY"] == 1)
 
     # Inf ersetzen mit NaN (kommt bei Tobii bei Blinks vor)
     gx_L.replace([np.inf, -np.inf], np.nan, inplace=True)
@@ -76,18 +76,24 @@ def extract_gaze_signal(df):
     # Interpolation (wichtig für PyGaze!)
     gx = pd.Series(gx).interpolate(limit=50, limit_direction="both").bfill().ffill()
     gy = pd.Series(gy).interpolate(limit=50, limit_direction="both").bfill().ffill()
+    
+    xscaler = MinMaxScaler()
+    gxscale = xscaler.fit_transform(gx.values.reshape(-1, 1))
 
-    out = np.column_stack((gx, gy))
+    yscaler = MinMaxScaler()
+    gyscale = yscaler.fit_transform(gy.values.reshape(-1, 1))
+    
+    out = np.column_stack((gxscale, gyscale))
     return out
 
 
 def extract_pupil(df):
     """Extrahiert Pupillengröße (beide Augen gemittelt)."""
-    pl = df["LEFT_PUPIL_DIAMETER"].replace([np.inf, -np.inf], np.nan)
-    pr = df["RIGHT_PUPIL_DIAMETER"].replace([np.inf, -np.inf], np.nan)
+    pl = df["EYE_LEFT_PUPIL_DIAMETER"].replace([np.inf, -np.inf], np.nan)
+    pr = df["EYE_RIGHT_PUPIL_DIAMETER"].replace([np.inf, -np.inf], np.nan)
 
-    vl = df.get("LEFT_PUPIL_VALIDITY")
-    vr = df.get("RIGHT_PUPIL_VALIDITY")
+    vl = df.get("EYE_LEFT_PUPIL_VALIDITY")
+    vr = df.get("EYE_RIGHT_PUPIL_VALIDITY")
 
     if vl is None or vr is None:
         validity = (~pl.isna() | ~pr.isna()).astype(int).to_numpy()