Fahrsimulator_MSY2526_AI/dataset_creation/open_parquet_test.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b3fface",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "74f1f5ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "df= pd.read_parquet(r\"C:\\Users\\micha\\FAUbox\\WS2526_Fahrsimulator_MSY (Celina Korzer)\\AU_dataset\\output_windowed.parquet\")\n",
    "print(df.shape)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05775454",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99e17328",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "69e53731",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3754c664",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Zeigt alle Kombinationen mit Häufigkeit\n",
    "df[['STUDY', 'PHASE', 'LEVEL']].value_counts(ascending=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f83b595c",
   "metadata": {},
   "outputs": [],
   "source": [
    "high_nback = df[\n",
    "    (df[\"STUDY\"]==\"n-back\") &\n",
    "    (df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n",
    "    (df[\"PHASE\"].isin([\"train\", \"test\"]))\n",
    "]\n",
    "high_nback.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0940343",
   "metadata": {},
   "outputs": [],
   "source": [
    "low_all = df[\n",
    "    ((df[\"PHASE\"] == \"baseline\") |\n",
    "     ((df[\"STUDY\"] == \"n-back\") & (df[\"PHASE\"] != \"baseline\") & (df[\"LEVEL\"].isin([1,4]))))\n",
    "]\n",
    "print(low_all.shape)\n",
    "high_kdrive = df[\n",
    "    (df[\"STUDY\"] == \"k-drive\") & (df[\"PHASE\"] != \"baseline\")\n",
    "]\n",
    "print(high_kdrive.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7ce38d3",
   "metadata": {},
   "outputs": [],
   "source": [
    "print((df.shape[0]==(high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0])))\n",
    "print(df.shape[0])\n",
    "print((high_kdrive.shape[0]+high_nback.shape[0]+low_all.shape[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "48ba0379",
   "metadata": {},
   "outputs": [],
   "source": [
    "high_all = pd.concat([high_nback, high_kdrive])\n",
    "high_all.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77dda26c",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(f\"Gesamt: {df.shape[0]}=={low_all.shape[0]+high_all.shape[0]}\")\n",
    "print(f\"Anzahl an low load Samples: {low_all.shape[0]}\")\n",
    "print(f\"Anzahl an high load Samples: {high_all.shape[0]}\")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}