157 lines
3.3 KiB
Plaintext
157 lines
3.3 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2b3fface",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "74f1f5ec",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df= pd.read_parquet(r\"C:\\Users\\micha\\FAUbox\\WS2526_Fahrsimulator_MSY (Celina Korzer)\\AU_dataset\\output_windowed.parquet\")\n",
|
|
"print(df.shape)\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "05775454",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "99e17328",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.tail()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "69e53731",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3754c664",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Zeigt alle Kombinationen mit Häufigkeit\n",
|
|
"df[['STUDY', 'PHASE', 'LEVEL']].value_counts(ascending=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f83b595c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"high_nback = df[\n",
|
|
" (df[\"STUDY\"]==\"n-back\") &\n",
|
|
" (df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n",
|
|
" (df[\"PHASE\"].isin([\"train\", \"test\"]))\n",
|
|
"]\n",
|
|
"high_nback.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c0940343",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"low_all = df[\n",
|
|
" ((df[\"PHASE\"] == \"baseline\") |\n",
|
|
" ((df[\"STUDY\"] == \"n-back\") & (df[\"PHASE\"] != \"baseline\") & (df[\"LEVEL\"].isin([1,4]))))\n",
|
|
"]\n",
|
|
"print(low_all.shape)\n",
|
|
"high_kdrive = df[\n",
|
|
" (df[\"STUDY\"] == \"k-drive\") & (df[\"PHASE\"] != \"baseline\")\n",
|
|
"]\n",
|
|
"print(high_kdrive.shape)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "f7ce38d3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print((df.shape[0]==(high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0])))\n",
|
|
"print(df.shape[0])\n",
|
|
"print((high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0]))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "48ba0379",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"high_all = pd.concat([high_nback, high_kdrive])\n",
|
|
"high_all.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "77dda26c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"print(f\"Gesamt: {df.shape[0]}=={low_all.shape[0]+high_all.shape[0]}\")\n",
|
|
"print(f\"Anzahl an low load Samples: {low_all.shape[0]}\")\n",
|
|
"print(f\"Anzahl an high load Samples: {high_all.shape[0]}\")\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "base",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|