{ "cells": [ { "cell_type": "markdown", "id": "1014c5e0", "metadata": {}, "source": [ "Imports" ] }, { "cell_type": "code", "execution_count": null, "id": "e42f3011", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "id": "0a834496", "metadata": {}, "outputs": [], "source": [ "path =r\"C:\\Users\\micha\\FAUbox\\WS2526_Fahrsimulator_MSY (Celina Korzer)\\AU_dataset\\output_windowed.parquet\"\n", "df = pd.read_parquet(path=path)" ] }, { "cell_type": "code", "execution_count": null, "id": "aa4759fa", "metadata": {}, "outputs": [], "source": [ "high_nback = df[\n", " (df[\"STUDY\"]==\"n-back\") &\n", " (df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n", " (df[\"PHASE\"].isin([\"train\", \"test\"]))\n", "]\n", "high_nback.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "a2aa0596", "metadata": {}, "outputs": [], "source": [ "low_all = df[\n", " ((df[\"PHASE\"] == \"baseline\") |\n", " ((df[\"STUDY\"] == \"n-back\") & (df[\"PHASE\"] != \"baseline\") & (df[\"LEVEL\"].isin([1,4]))))\n", "]\n", "print(low_all.shape)\n", "high_kdrive = df[\n", " (df[\"STUDY\"] == \"k-drive\") & (df[\"PHASE\"] != \"baseline\")\n", "]\n", "print(high_kdrive.shape)" ] }, { "cell_type": "code", "execution_count": null, "id": "f7d446a1", "metadata": {}, "outputs": [], "source": [ "print((df.shape[0]==(high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0])))\n", "print(df.shape[0])\n", "print((high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0]))" ] }, { "cell_type": "code", "execution_count": null, "id": "474e144a", "metadata": {}, "outputs": [], "source": [ "high_all = pd.concat([high_nback, high_kdrive])\n", "high_all.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "5dd585c2", "metadata": {}, "outputs": [], "source": [ "df.dtypes" ] }, { "cell_type": "code", "execution_count": null, "id": "0bd39d9f", "metadata": {}, "outputs": [], "source": [ "# Get all columns that start with 'AU'\n", "au_columns = [col for col in low_all.columns if col.startswith('AU')]\n", "\n", "# Calculate number of rows and columns for subplots\n", "n_cols = len(au_columns)\n", "n_rows = 4\n", "n_cols_subplot = 5\n", "\n", "# Create figure with subplots\n", "fig, axes = plt.subplots(n_rows, n_cols_subplot, figsize=(20, 16))\n", "axes = axes.flatten()\n", "fig.suptitle('Action Unit (AU) Distributions: Low vs High', fontsize=20, fontweight='bold', y=0.995)\n", "\n", "# Create histogram for each AU column\n", "for idx, col in enumerate(au_columns):\n", " ax = axes[idx]\n", " \n", " # Plot overlapping histograms\n", " ax.hist(low_all[col].dropna(), bins=30, alpha=0.6, color='blue', label='low_all', edgecolor='black')\n", " ax.hist(high_all[col].dropna(), bins=30, alpha=0.6, color='red', label='high_all', edgecolor='black')\n", " \n", " # Set title and labels\n", " ax.set_title(col, fontsize=10, fontweight='bold')\n", " ax.set_xlabel('Value', fontsize=8)\n", " ax.set_ylabel('Frequency', fontsize=8)\n", " ax.legend(fontsize=8)\n", " ax.grid(True, alpha=0.3)\n", "\n", "# Hide any unused subplots\n", "for idx in range(len(au_columns), len(axes)):\n", " axes[idx].set_visible(False)\n", "\n", "# Adjust layout\n", "plt.tight_layout()\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }