From ee648f9adc9eb66f623533226cf773a44cd8f16d Mon Sep 17 00:00:00 2001 From: Michael Date: Sat, 24 Jan 2026 19:15:31 +0100 Subject: [PATCH] added subset filtering to notebook --- EDA/distribution_plots.ipynb | 44 ++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/EDA/distribution_plots.ipynb b/EDA/distribution_plots.ipynb index f43012f..a73877a 100644 --- a/EDA/distribution_plots.ipynb +++ b/EDA/distribution_plots.ipynb @@ -37,7 +37,9 @@ "metadata": {}, "outputs": [], "source": [ - "dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")" + "dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")\n", + "# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/60s_combined_dataset_25hz.parquet\")\n", + "# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/120s_combined_dataset_25hz.parquet\")" ] }, { @@ -50,7 +52,8 @@ "FILTER_MAD = True\n", "THRESHOLD = 3.5\n", "METHOD = 'minmax'\n", - "SCOPE = 'subject'" + "SCOPE = 'subject'\n", + "FILTER_SUBSETS = True" ] }, { @@ -72,6 +75,43 @@ "df.shape" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ba4401c", + "metadata": {}, + "outputs": [], + "source": [ + "if(FILTER_SUBSETS):\n", + " # Special filter: Keep only specific subsets\n", + "# - k-drive L1 baseline\n", + "# - n-back L1 baseline \n", + "# - k-drive test with levels 1, 2, 3\n", + "\n", + " df = df[\n", + " (\n", + " # k-drive L1 baseline\n", + " ((df['STUDY'] == 'k-drive') & \n", + " (df['LEVEL'] == 1) & \n", + " (df['PHASE'] == 'baseline'))\n", + " ) | \n", + " (\n", + " # n-back L1 baseline\n", + " ((df['STUDY'] == 'n-back') & \n", + " (df['LEVEL'] == 1) & \n", + " (df['PHASE'] == 'baseline'))\n", + " ) | \n", + " (\n", + " # k-drive test with levels 1, 2, 3\n", + " ((df['STUDY'] == 'k-drive') & \n", + " (df['LEVEL'].isin([1, 2, 3])) & \n", + " (df['PHASE'] == 'test'))\n", + " )].copy()\n", + "\n", + "print(f\"Filtered dataframe shape: {df.shape}\")\n", + "print(f\"Remaining subsets: {df.groupby(['STUDY', 'LEVEL', 'PHASE']).size()}\")" + ] + }, { "cell_type": "code", "execution_count": null,