From ee648f9adc9eb66f623533226cf773a44cd8f16d Mon Sep 17 00:00:00 2001
From: Michael <weigmi87303@th-nuernberg.de>
Date: Sat, 24 Jan 2026 19:15:31 +0100
Subject: [PATCH] added subset filtering to notebook

---
 EDA/distribution_plots.ipynb | 44 ++++++++++++++++++++++++++++++++++--
 1 file changed, 42 insertions(+), 2 deletions(-)

diff --git a/EDA/distribution_plots.ipynb b/EDA/distribution_plots.ipynb
index f43012f..a73877a 100644
--- a/EDA/distribution_plots.ipynb
+++ b/EDA/distribution_plots.ipynb
@@ -37,7 +37,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")"
+    "dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/combined_dataset_25hz.parquet\")\n",
+    "# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/60s_combined_dataset_25hz.parquet\")\n",
+    "# dataset_path = Path(r\"/home/jovyan/data-paulusjafahrsimulator-gpu/new_datasets/120s_combined_dataset_25hz.parquet\")"
    ]
   },
   {
@@ -50,7 +52,8 @@
     "FILTER_MAD = True\n",
     "THRESHOLD = 3.5\n",
     "METHOD = 'minmax'\n",
-    "SCOPE = 'subject'"
+    "SCOPE = 'subject'\n",
+    "FILTER_SUBSETS = True"
    ]
   },
   {
@@ -72,6 +75,43 @@
     "df.shape"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3ba4401c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if(FILTER_SUBSETS):\n",
+    "    # Special filter: Keep only specific subsets\n",
+    "# - k-drive L1 baseline\n",
+    "# - n-back L1 baseline  \n",
+    "# - k-drive test with levels 1, 2, 3\n",
+    "\n",
+    "    df = df[\n",
+    "        (\n",
+    "            # k-drive L1 baseline\n",
+    "            ((df['STUDY'] == 'k-drive') & \n",
+    "            (df['LEVEL'] == 1) & \n",
+    "            (df['PHASE'] == 'baseline'))\n",
+    "        ) | \n",
+    "        (\n",
+    "            # n-back L1 baseline\n",
+    "            ((df['STUDY'] == 'n-back') & \n",
+    "            (df['LEVEL'] == 1) & \n",
+    "            (df['PHASE'] == 'baseline'))\n",
+    "        ) | \n",
+    "        (\n",
+    "            # k-drive test with levels 1, 2, 3\n",
+    "            ((df['STUDY'] == 'k-drive') & \n",
+    "            (df['LEVEL'].isin([1, 2, 3])) & \n",
+    "            (df['PHASE'] == 'test'))\n",
+    "        )].copy()\n",
+    "\n",
+    "print(f\"Filtered dataframe shape: {df.shape}\")\n",
+    "print(f\"Remaining subsets: {df.groupby(['STUDY', 'LEVEL', 'PHASE']).size()}\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,