From 25aa03398a6b783c25d90ee810cad7977dd7f79c Mon Sep 17 00:00:00 2001 From: Michael Date: Fri, 31 Oct 2025 17:40:20 +0100 Subject: [PATCH] added performance research --- EDA/researchOnSubjectPerformance.ipynb | 165 +++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 EDA/researchOnSubjectPerformance.ipynb diff --git a/EDA/researchOnSubjectPerformance.ipynb b/EDA/researchOnSubjectPerformance.ipynb new file mode 100644 index 0000000..4895576 --- /dev/null +++ b/EDA/researchOnSubjectPerformance.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8fb02733", + "metadata": {}, + "source": [ + "Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96f3b128", + "metadata": {}, + "outputs": [], + "source": [ + "import yaml\n", + "import owncloud\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "id": "c20cee7c", + "metadata": {}, + "source": [ + "Connection to Owncloud" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c4c94558", + "metadata": {}, + "outputs": [], + "source": [ + "# Load credentials\n", + "with open(\"../login.yaml\") as f:\n", + " cfg = yaml.safe_load(f)\n", + " \n", + "url, password = cfg[0][\"url\"], cfg[1][\"password\"]\n", + "\n", + "# Connect once\n", + "oc = owncloud.Client.from_public_link(url, folder_password=password)\n", + "# File pattern\n", + "base = \"adabase-public-{num:04d}-v_0_0_2.h5py\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07c03d07", + "metadata": {}, + "outputs": [], + "source": [ + "num_files = 2 # number of files to process (min: 1, max: 30)\n", + "performance_data = []\n", + "\n", + "for i in range(num_files):\n", + " file_name = base.format(num=i)\n", + " local_tmp = f\"tmp_{i:04d}.h5\"\n", + "\n", + " oc.get_file(file_name, local_tmp)\n", + " print(f\"{file_name} geöffnet\")\n", + "\n", + " # check SIGNALS table for AUs\n", + " with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", + " cols = store.select(\"SIGNALS\", start=0, stop=1).columns\n", + " au_cols = [c for c in cols if c.startswith(\"AU\")]\n", + " if not au_cols:\n", + " print(f\"Subject {i} enthält keine AUs\")\n", + " continue\n", + "\n", + " # load performance table\n", + " with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", + " perf_df = store.select(\"PERFORMANCE\")\n", + "\n", + " f1_cols = [c for c in [\"AUDITIVE F1\", \"VISUAL F1\", \"F1\"] if c in perf_df.columns]\n", + " if not f1_cols:\n", + " print(f\"Subject {i}: keine F1-Spalten gefunden\")\n", + " continue\n", + "\n", + " subject_entry = {\"subjectID\": i}\n", + " valid_scores = []\n", + "\n", + " # iterate rows: each (study, level, phase)\n", + " for _, row in perf_df.iterrows():\n", + " study, level, phase = row[\"STUDY\"], row[\"LEVEL\"], row[\"PHASE\"]\n", + " col_name = f\"STUDY_{study}_LEVEL_{level}_PHASE_{phase}\"\n", + "\n", + " # collect valid F1 values among the three columns\n", + " scores = [row[c] for c in f1_cols if pd.notna(row[c])]\n", + " if scores:\n", + " mean_score = float(np.mean(scores))\n", + " subject_entry[col_name] = mean_score\n", + " valid_scores.extend(scores)\n", + "\n", + " # compute overall average across all valid combinations\n", + " if valid_scores:\n", + " subject_entry[\"overall_score\"] = float(np.mean(valid_scores))\n", + " performance_data.append(subject_entry)\n", + " print(f\"Subject {i}: {len(valid_scores)} gültige Scores, Overall = {subject_entry['overall_score']:.3f}\")\n", + " else:\n", + " print(f\"Subject {i}: keine gültigen F1-Scores\")\n", + "\n", + "# build dataframe\n", + "if performance_data:\n", + " performance_df = pd.DataFrame(performance_data)\n", + " combination_cols = sorted([c for c in performance_df.columns if c.startswith(\"STUDY_\")])\n", + " final_cols = [\"subjectID\", \"overall_score\"] + combination_cols\n", + " performance_df = performance_df[final_cols]\n", + " performance_df.to_csv(\"au_performance.csv\", index=False)\n", + "\n", + " print(f\"\\nGesamt Subjects mit Action Units: {len(performance_df)}\")\n", + "else:\n", + " print(\"Keine gültigen Daten gefunden.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0bcaf065", + "metadata": {}, + "outputs": [], + "source": [ + "performance_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db95eea7", + "metadata": {}, + "outputs": [], + "source": [ + "with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", + " performance = store.select(\"PERFORMANCE\")\n", + "performance" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}