diff --git a/dataset_creation/CPFC_both.py b/dataset_creation/CPFC_both.py index 3b37b9d..06e5a6c 100644 --- a/dataset_creation/CPFC_both.py +++ b/dataset_creation/CPFC_both.py @@ -2,7 +2,7 @@ import os import pandas as pd from pathlib import Path -data_dir = Path("/home/jovyan/Fahrsimulator_MSY2526_AI/EDA") +data_dir = Path("/home/jovyan/data-paulusjafahrsimulator-gpu/raw_data") # Get all .h5 files and sort them matching_files = sorted(data_dir.glob("*.h5")) @@ -56,16 +56,16 @@ for i, file_path in enumerate(matching_files): start=start_row, stop=stop_row ) - + # print(f"[DEBUG] Vor Dropna: {df_chunk["EYE_LEFT_PUPIL_VALIDITY"].value_counts()}") # Add metadata columns df_chunk["subjectID"] = i df_chunk["rowID"] = range(start_row, stop_row) # Clean data df_chunk = df_chunk[df_chunk["LEVEL"] != 0] - df_chunk = df_chunk.dropna() - # problematisch, weil die eye tracking auflösung kaputt geht - + df_chunk = df_chunk.dropna(subset=face_au_cols) + + # print(f"[DEBUG] Nach Dropna: {df_chunk["EYE_LEFT_PUPIL_VALIDITY"].value_counts()}") # Only keep non-empty chunks if len(df_chunk) > 0: chunks_to_save.append(df_chunk) diff --git a/dataset_creation/combined_feature_creation.py b/dataset_creation/combined_feature_creation.py index ad8fec4..9b6097e 100644 --- a/dataset_creation/combined_feature_creation.py +++ b/dataset_creation/combined_feature_creation.py @@ -11,9 +11,9 @@ from pygazeanalyser.detectors import fixation_detection, saccade_detection # KONFIGURATION ############################################################################## INPUT_DIR = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/both_mod_parquet_files") -OUTPUT_FILE = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/fix_blink_combined_dataset_25hz.parquet") +OUTPUT_FILE = Path(r"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/blink_fix_dataset.parquet") -WINDOW_SIZE_SAMPLES = 1250 # 50s bei 25Hz +WINDOW_SIZE_SAMPLES = 25*50 # 50s bei 25Hz STEP_SIZE_SAMPLES = 125 # 5s bei 25Hz SAMPLING_RATE = 25 # Hz MIN_DUR_BLINKS = 2 # x * 40ms @@ -368,9 +368,6 @@ def main(): print(result.head()) print("\nSpalten-Übersicht:") - print(result.columns.tolist()) - - print("\nDatentypen:") print(result.dtypes) print("\nStatistik:")