From 25aa03398a6b783c25d90ee810cad7977dd7f79c Mon Sep 17 00:00:00 2001
From: Michael <weigmi87303@th-nuernberg.de>
Date: Fri, 31 Oct 2025 17:40:20 +0100
Subject: [PATCH] added performance research

---
 EDA/researchOnSubjectPerformance.ipynb | 165 +++++++++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 EDA/researchOnSubjectPerformance.ipynb

diff --git a/EDA/researchOnSubjectPerformance.ipynb b/EDA/researchOnSubjectPerformance.ipynb
new file mode 100644
index 0000000..4895576
--- /dev/null
+++ b/EDA/researchOnSubjectPerformance.ipynb
@@ -0,0 +1,165 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "8fb02733",
+   "metadata": {},
+   "source": [
+    "Imports"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96f3b128",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import yaml\n",
+    "import owncloud\n",
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c20cee7c",
+   "metadata": {},
+   "source": [
+    "Connection to Owncloud"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c4c94558",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load credentials\n",
+    "with open(\"../login.yaml\") as f:\n",
+    "    cfg = yaml.safe_load(f)\n",
+    "   \n",
+    "url, password = cfg[0][\"url\"], cfg[1][\"password\"]\n",
+    "\n",
+    "# Connect once\n",
+    "oc = owncloud.Client.from_public_link(url, folder_password=password)\n",
+    "# File pattern\n",
+    "base = \"adabase-public-{num:04d}-v_0_0_2.h5py\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07c03d07",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_files = 2  # number of files to process (min: 1, max: 30)\n",
+    "performance_data = []\n",
+    "\n",
+    "for i in range(num_files):\n",
+    "    file_name = base.format(num=i)\n",
+    "    local_tmp = f\"tmp_{i:04d}.h5\"\n",
+    "\n",
+    "    oc.get_file(file_name, local_tmp)\n",
+    "    print(f\"{file_name} geöffnet\")\n",
+    "\n",
+    "    # check SIGNALS table for AUs\n",
+    "    with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
+    "        cols = store.select(\"SIGNALS\", start=0, stop=1).columns\n",
+    "    au_cols = [c for c in cols if c.startswith(\"AU\")]\n",
+    "    if not au_cols:\n",
+    "        print(f\"Subject {i} enthält keine AUs\")\n",
+    "        continue\n",
+    "\n",
+    "    # load performance table\n",
+    "    with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
+    "        perf_df = store.select(\"PERFORMANCE\")\n",
+    "\n",
+    "    f1_cols = [c for c in [\"AUDITIVE F1\", \"VISUAL F1\", \"F1\"] if c in perf_df.columns]\n",
+    "    if not f1_cols:\n",
+    "        print(f\"Subject {i}: keine F1-Spalten gefunden\")\n",
+    "        continue\n",
+    "\n",
+    "    subject_entry = {\"subjectID\": i}\n",
+    "    valid_scores = []\n",
+    "\n",
+    "    # iterate rows: each (study, level, phase)\n",
+    "    for _, row in perf_df.iterrows():\n",
+    "        study, level, phase = row[\"STUDY\"], row[\"LEVEL\"], row[\"PHASE\"]\n",
+    "        col_name = f\"STUDY_{study}_LEVEL_{level}_PHASE_{phase}\"\n",
+    "\n",
+    "        # collect valid F1 values among the three columns\n",
+    "        scores = [row[c] for c in f1_cols if pd.notna(row[c])]\n",
+    "        if scores:\n",
+    "            mean_score = float(np.mean(scores))\n",
+    "            subject_entry[col_name] = mean_score\n",
+    "            valid_scores.extend(scores)\n",
+    "\n",
+    "    # compute overall average across all valid combinations\n",
+    "    if valid_scores:\n",
+    "        subject_entry[\"overall_score\"] = float(np.mean(valid_scores))\n",
+    "        performance_data.append(subject_entry)\n",
+    "        print(f\"Subject {i}: {len(valid_scores)} gültige Scores, Overall = {subject_entry['overall_score']:.3f}\")\n",
+    "    else:\n",
+    "        print(f\"Subject {i}: keine gültigen F1-Scores\")\n",
+    "\n",
+    "# build dataframe\n",
+    "if performance_data:\n",
+    "    performance_df = pd.DataFrame(performance_data)\n",
+    "    combination_cols = sorted([c for c in performance_df.columns if c.startswith(\"STUDY_\")])\n",
+    "    final_cols = [\"subjectID\", \"overall_score\"] + combination_cols\n",
+    "    performance_df = performance_df[final_cols]\n",
+    "    performance_df.to_csv(\"au_performance.csv\", index=False)\n",
+    "\n",
+    "    print(f\"\\nGesamt Subjects mit Action Units: {len(performance_df)}\")\n",
+    "else:\n",
+    "    print(\"Keine gültigen Daten gefunden.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0bcaf065",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "performance_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "db95eea7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
+    "    performance = store.select(\"PERFORMANCE\")\n",
+    "performance"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}