{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "2b3fface", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "id": "74f1f5ec", "metadata": {}, "outputs": [], "source": [ "df= pd.read_parquet(r\"C:\\Users\\micha\\FAUbox\\WS2526_Fahrsimulator_MSY (Celina Korzer)\\AU_dataset\\output_windowed.parquet\")\n", "print(df.shape)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "05775454", "metadata": {}, "outputs": [], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "99e17328", "metadata": {}, "outputs": [], "source": [ "df.tail()" ] }, { "cell_type": "code", "execution_count": null, "id": "69e53731", "metadata": {}, "outputs": [], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "3754c664", "metadata": {}, "outputs": [], "source": [ "# Zeigt alle Kombinationen mit Häufigkeit\n", "df[['STUDY', 'PHASE', 'LEVEL']].value_counts(ascending=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "f83b595c", "metadata": {}, "outputs": [], "source": [ "high_nback = df[\n", " (df[\"STUDY\"]==\"n-back\") &\n", " (df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n", " (df[\"PHASE\"].isin([\"train\", \"test\"]))\n", "]\n", "high_nback.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "c0940343", "metadata": {}, "outputs": [], "source": [ "low_all = df[\n", " ((df[\"PHASE\"] == \"baseline\") |\n", " ((df[\"STUDY\"] == \"n-back\") & (df[\"PHASE\"] != \"baseline\") & (df[\"LEVEL\"].isin([1,4]))))\n", "]\n", "print(low_all.shape)\n", "high_kdrive = df[\n", " (df[\"STUDY\"] == \"k-drive\") & (df[\"PHASE\"] != \"baseline\")\n", "]\n", "print(high_kdrive.shape)" ] }, { "cell_type": "code", "execution_count": null, "id": "f7ce38d3", "metadata": {}, "outputs": [], "source": [ "print((df.shape[0]==(high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0])))\n", "print(df.shape[0])\n", "print((high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0]))" ] }, { "cell_type": "code", "execution_count": null, "id": "48ba0379", "metadata": {}, "outputs": [], "source": [ "high_all = pd.concat([high_nback, high_kdrive])\n", "high_all.shape" ] }, { "cell_type": "code", "execution_count": null, "id": "77dda26c", "metadata": {}, "outputs": [], "source": [ "print(f\"Gesamt: {df.shape[0]}=={low_all.shape[0]+high_all.shape[0]}\")\n", "print(f\"Anzahl an low load Samples: {low_all.shape[0]}\")\n", "print(f\"Anzahl an high load Samples: {high_all.shape[0]}\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }