From 37ab7cb6204758b91c270cd9f316ba12317199c3 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 3 Nov 2025 10:24:02 +0100 Subject: [PATCH] added histogramms for AU EDA --- EDA/histogramms.ipynb | 166 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 EDA/histogramms.ipynb diff --git a/EDA/histogramms.ipynb b/EDA/histogramms.ipynb new file mode 100644 index 0000000..e35055f --- /dev/null +++ b/EDA/histogramms.ipynb @@ -0,0 +1,166 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1014c5e0", + "metadata": {}, + "source": [ + "Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e42f3011", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0a834496", + "metadata": {}, + "outputs": [], + "source": [ + "path =r\"C:\\Users\\micha\\FAUbox\\WS2526_Fahrsimulator_MSY (Celina Korzer)\\AU_dataset\\output_windowed.parquet\"\n", + "df = pd.read_parquet(path=path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa4759fa", + "metadata": {}, + "outputs": [], + "source": [ + "high_nback = df[\n", + " (df[\"STUDY\"]==\"n-back\") &\n", + " (df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n", + " (df[\"PHASE\"].isin([\"train\", \"test\"]))\n", + "]\n", + "high_nback.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2aa0596", + "metadata": {}, + "outputs": [], + "source": [ + "low_all = df[\n", + " ((df[\"PHASE\"] == \"baseline\") |\n", + " ((df[\"STUDY\"] == \"n-back\") & (df[\"PHASE\"] != \"baseline\") & (df[\"LEVEL\"].isin([1,4]))))\n", + "]\n", + "print(low_all.shape)\n", + "high_kdrive = df[\n", + " (df[\"STUDY\"] == \"k-drive\") & (df[\"PHASE\"] != \"baseline\")\n", + "]\n", + "print(high_kdrive.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7d446a1", + "metadata": {}, + "outputs": [], + "source": [ + "print((df.shape[0]==(high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0])))\n", + "print(df.shape[0])\n", + "print((high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "474e144a", + "metadata": {}, + "outputs": [], + "source": [ + "high_all = pd.concat([high_nback, high_kdrive])\n", + "high_all.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5dd585c2", + "metadata": {}, + "outputs": [], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0bd39d9f", + "metadata": {}, + "outputs": [], + "source": [ + "# Get all columns that start with 'AU'\n", + "au_columns = [col for col in low_all.columns if col.startswith('AU')]\n", + "\n", + "# Calculate number of rows and columns for subplots\n", + "n_cols = len(au_columns)\n", + "n_rows = 4\n", + "n_cols_subplot = 5\n", + "\n", + "# Create figure with subplots\n", + "fig, axes = plt.subplots(n_rows, n_cols_subplot, figsize=(20, 16))\n", + "axes = axes.flatten()\n", + "fig.suptitle('Action Unit (AU) Distributions: Low vs High', fontsize=20, fontweight='bold', y=0.995)\n", + "\n", + "# Create histogram for each AU column\n", + "for idx, col in enumerate(au_columns):\n", + " ax = axes[idx]\n", + " \n", + " # Plot overlapping histograms\n", + " ax.hist(low_all[col].dropna(), bins=30, alpha=0.6, color='blue', label='low_all', edgecolor='black')\n", + " ax.hist(high_all[col].dropna(), bins=30, alpha=0.6, color='red', label='high_all', edgecolor='black')\n", + " \n", + " # Set title and labels\n", + " ax.set_title(col, fontsize=10, fontweight='bold')\n", + " ax.set_xlabel('Value', fontsize=8)\n", + " ax.set_ylabel('Frequency', fontsize=8)\n", + " ax.legend(fontsize=8)\n", + " ax.grid(True, alpha=0.3)\n", + "\n", + "# Hide any unused subplots\n", + "for idx in range(len(au_columns), len(axes)):\n", + " axes[idx].set_visible(False)\n", + "\n", + "# Adjust layout\n", + "plt.tight_layout()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}