Fahrsimulator_MSY2526_AI/EDA/histogramms.ipynb

167 lines
4.2 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "1014c5e0",
"metadata": {},
"source": [
"Imports"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e42f3011",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0a834496",
"metadata": {},
"outputs": [],
"source": [
"path =r\"C:\\Users\\micha\\FAUbox\\WS2526_Fahrsimulator_MSY (Celina Korzer)\\AU_dataset\\output_windowed.parquet\"\n",
"df = pd.read_parquet(path=path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa4759fa",
"metadata": {},
"outputs": [],
"source": [
"high_nback = df[\n",
" (df[\"STUDY\"]==\"n-back\") &\n",
" (df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n",
" (df[\"PHASE\"].isin([\"train\", \"test\"]))\n",
"]\n",
"high_nback.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2aa0596",
"metadata": {},
"outputs": [],
"source": [
"low_all = df[\n",
" ((df[\"PHASE\"] == \"baseline\") |\n",
" ((df[\"STUDY\"] == \"n-back\") & (df[\"PHASE\"] != \"baseline\") & (df[\"LEVEL\"].isin([1,4]))))\n",
"]\n",
"print(low_all.shape)\n",
"high_kdrive = df[\n",
" (df[\"STUDY\"] == \"k-drive\") & (df[\"PHASE\"] != \"baseline\")\n",
"]\n",
"print(high_kdrive.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f7d446a1",
"metadata": {},
"outputs": [],
"source": [
"print((df.shape[0]==(high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0])))\n",
"print(df.shape[0])\n",
"print((high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "474e144a",
"metadata": {},
"outputs": [],
"source": [
"high_all = pd.concat([high_nback, high_kdrive])\n",
"high_all.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5dd585c2",
"metadata": {},
"outputs": [],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0bd39d9f",
"metadata": {},
"outputs": [],
"source": [
"# Get all columns that start with 'AU'\n",
"au_columns = [col for col in low_all.columns if col.startswith('AU')]\n",
"\n",
"# Calculate number of rows and columns for subplots\n",
"n_cols = len(au_columns)\n",
"n_rows = 4\n",
"n_cols_subplot = 5\n",
"\n",
"# Create figure with subplots\n",
"fig, axes = plt.subplots(n_rows, n_cols_subplot, figsize=(20, 16))\n",
"axes = axes.flatten()\n",
"fig.suptitle('Action Unit (AU) Distributions: Low vs High', fontsize=20, fontweight='bold', y=0.995)\n",
"\n",
"# Create histogram for each AU column\n",
"for idx, col in enumerate(au_columns):\n",
" ax = axes[idx]\n",
" \n",
" # Plot overlapping histograms\n",
" ax.hist(low_all[col].dropna(), bins=30, alpha=0.6, color='blue', label='low_all', edgecolor='black')\n",
" ax.hist(high_all[col].dropna(), bins=30, alpha=0.6, color='red', label='high_all', edgecolor='black')\n",
" \n",
" # Set title and labels\n",
" ax.set_title(col, fontsize=10, fontweight='bold')\n",
" ax.set_xlabel('Value', fontsize=8)\n",
" ax.set_ylabel('Frequency', fontsize=8)\n",
" ax.legend(fontsize=8)\n",
" ax.grid(True, alpha=0.3)\n",
"\n",
"# Hide any unused subplots\n",
"for idx in range(len(au_columns), len(axes)):\n",
" axes[idx].set_visible(False)\n",
"\n",
"# Adjust layout\n",
"plt.tight_layout()\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}