From ab3ba28a7175016150d0b6c35cd8b8431b5a15b1 Mon Sep 17 00:00:00 2001 From: Michael Weig Date: Sun, 2 Nov 2025 19:59:39 +0000 Subject: [PATCH] revert c5a61c9e55c9a92869db3e7108be64864c09cc23 revert Merge branch 'main' of https://git.efi.th-nuernberg.de/gitea/kurzti88066/Fahrsimulator_MSY2526_AI --- EDA/EDA.ipynb | 4 +- ...check_if_performance_tables_are_same.ipynb | 110 ------------------ EDA/researchOnSubjectPerformance.ipynb | 95 +++++---------- 3 files changed, 30 insertions(+), 179 deletions(-) delete mode 100644 EDA/check_if_performance_tables_are_same.ipynb diff --git a/EDA/EDA.ipynb b/EDA/EDA.ipynb index f54d03b..76844a5 100644 --- a/EDA/EDA.ipynb +++ b/EDA/EDA.ipynb @@ -237,7 +237,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -251,7 +251,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.10" + "version": "3.11.5" } }, "nbformat": 4, diff --git a/EDA/check_if_performance_tables_are_same.ipynb b/EDA/check_if_performance_tables_are_same.ipynb deleted file mode 100644 index 1b00bc4..0000000 --- a/EDA/check_if_performance_tables_are_same.ipynb +++ /dev/null @@ -1,110 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "8fb02733", - "metadata": {}, - "source": [ - "Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ebd32616-1e11-4b15-805e-481e010e03fc", - "metadata": {}, - "outputs": [], - "source": [ - "%pip install pyocclient\n", - "import yaml\n", - "import owncloud\n", - "import pandas as pd\n", - "import numpy as np\n", - "import hashlib" - ] - }, - { - "cell_type": "markdown", - "id": "c20cee7c", - "metadata": {}, - "source": [ - "Connection to Owncloud" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c4c94558", - "metadata": {}, - "outputs": [], - "source": [ - "# Load credentials\n", - "with open(\"../login.yaml\") as f:\n", - " cfg = yaml.safe_load(f)\n", - " \n", - "url, password = cfg[0][\"url\"], cfg[1][\"password\"]\n", - "\n", - "# Connect once\n", - "oc = owncloud.Client.from_public_link(url, folder_password=password)\n", - "# File pattern\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "485149e6-f26c-4aac-a742-c0111f0272cb", - "metadata": {}, - "outputs": [], - "source": [ - "num_files = 30\n", - "base = \"adabase-public-{num:04d}-v_0_0_2.h5py\"\n", - "\n", - "for i in range(num_files):\n", - " file_name = base.format(num=i)\n", - " local_tmp = f\"tmp_{i:04d}.h5\"\n", - "\n", - " oc.get_file(file_name, local_tmp)\n", - "\n", - " print(f\"\\n===== FILE {i}: {file_name} =====\")\n", - "\n", - " try:\n", - " with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", - " perf_df = store.select(\"PERFORMANCE\", columns=[\"AUDITIVE F1\", \"VISUAL F1\", \"F1\"])\n", - " print(perf_df)\n", - " except Exception as e:\n", - " print(f\"Error reading PERFORMANCE table: {e}\")\n", - "\n", - " print(\"===========================================\\n\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1ff28917-860a-4900-906c-f38ac2fe9d64", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/EDA/researchOnSubjectPerformance.ipynb b/EDA/researchOnSubjectPerformance.ipynb index 8b77b7e..4895576 100644 --- a/EDA/researchOnSubjectPerformance.ipynb +++ b/EDA/researchOnSubjectPerformance.ipynb @@ -11,16 +11,14 @@ { "cell_type": "code", "execution_count": null, - "id": "ebd32616-1e11-4b15-805e-481e010e03fc", + "id": "96f3b128", "metadata": {}, "outputs": [], "source": [ - "%pip install pyocclient\n", "import yaml\n", "import owncloud\n", "import pandas as pd\n", - "import numpy as np\n", - "import hashlib" + "import numpy as np" ] }, { @@ -46,7 +44,8 @@ "\n", "# Connect once\n", "oc = owncloud.Client.from_public_link(url, folder_password=password)\n", - "# File pattern\n" + "# File pattern\n", + "base = \"adabase-public-{num:04d}-v_0_0_2.h5py\"" ] }, { @@ -56,19 +55,15 @@ "metadata": {}, "outputs": [], "source": [ - "num_files = 30\n", + "num_files = 2 # number of files to process (min: 1, max: 30)\n", "performance_data = []\n", - "base = \"adabase-public-{num:04d}-v_0_0_2.h5py\" # remote name pattern\n", + "\n", "for i in range(num_files):\n", " file_name = base.format(num=i)\n", " local_tmp = f\"tmp_{i:04d}.h5\"\n", "\n", - " #oc.get_file(file_name, local_tmp)\n", - "\n", - " # quick checksum to detect identical downloads\n", - " with open(local_tmp, \"rb\") as fh:\n", - " file_hash = hashlib.sha1(fh.read()).hexdigest()\n", - " print(f\"File {i}: {file_name} checksum={file_hash}\")\n", + " oc.get_file(file_name, local_tmp)\n", + " print(f\"{file_name} geöffnet\")\n", "\n", " # check SIGNALS table for AUs\n", " with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", @@ -78,57 +73,49 @@ " print(f\"Subject {i} enthält keine AUs\")\n", " continue\n", "\n", - " # load performance table (make a copy)\n", + " # load performance table\n", " with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", - " perf_df = store.select(\"PERFORMANCE\").copy()\n", - "\n", - " # print(f\"Subject {i}: PERFORMANCE rows={len(perf_df)}\")\n", - " # print(perf_df.head(3).to_string(index=False))\n", + " perf_df = store.select(\"PERFORMANCE\")\n", "\n", " f1_cols = [c for c in [\"AUDITIVE F1\", \"VISUAL F1\", \"F1\"] if c in perf_df.columns]\n", " if not f1_cols:\n", " print(f\"Subject {i}: keine F1-Spalten gefunden\")\n", " continue\n", "\n", - " # drop rows that have all F1s NaN (no valid score for that combo)\n", - " perf_df = perf_df.dropna(subset=f1_cols, how=\"all\")\n", - " if perf_df.empty:\n", - " print(f\"Subject {i}: keine gültigen F1-Daten nach Filter\")\n", - " continue\n", - "\n", " subject_entry = {\"subjectID\": i}\n", - " combo_means = []\n", + " valid_scores = []\n", "\n", + " # iterate rows: each (study, level, phase)\n", " for _, row in perf_df.iterrows():\n", " study, level, phase = row[\"STUDY\"], row[\"LEVEL\"], row[\"PHASE\"]\n", " col_name = f\"STUDY_{study}_LEVEL_{level}_PHASE_{phase}\"\n", "\n", - " # mean of available F1 cols for this single combination\n", - " vals = [float(row[c]) for c in f1_cols if pd.notna(row[c])]\n", - " if not vals:\n", - " continue\n", - " mean_for_combo = float(np.mean(vals))\n", - " subject_entry[col_name] = mean_for_combo\n", - " combo_means.append(mean_for_combo)\n", + " # collect valid F1 values among the three columns\n", + " scores = [row[c] for c in f1_cols if pd.notna(row[c])]\n", + " if scores:\n", + " mean_score = float(np.mean(scores))\n", + " subject_entry[col_name] = mean_score\n", + " valid_scores.extend(scores)\n", "\n", - " # overall: mean of per-combination means (not flattened raw F1s)\n", - " if combo_means:\n", - " subject_entry[\"overall_score\"] = float(np.mean(combo_means))\n", + " # compute overall average across all valid combinations\n", + " if valid_scores:\n", + " subject_entry[\"overall_score\"] = float(np.mean(valid_scores))\n", " performance_data.append(subject_entry)\n", - " print(f\"Subject {i}: combos={len(combo_means)} overall={subject_entry['overall_score']:.4f}\")\n", + " print(f\"Subject {i}: {len(valid_scores)} gültige Scores, Overall = {subject_entry['overall_score']:.3f}\")\n", " else:\n", - " print(f\"Subject {i}: keine gültigen Kombinationen\")\n", + " print(f\"Subject {i}: keine gültigen F1-Scores\")\n", "\n", "# build dataframe\n", "if performance_data:\n", " performance_df = pd.DataFrame(performance_data)\n", " combination_cols = sorted([c for c in performance_df.columns if c.startswith(\"STUDY_\")])\n", " final_cols = [\"subjectID\", \"overall_score\"] + combination_cols\n", - " performance_df = performance_df.reindex(columns=final_cols) # keeps missing combo cols as NaN\n", + " performance_df = performance_df[final_cols]\n", " performance_df.to_csv(\"au_performance.csv\", index=False)\n", + "\n", " print(f\"\\nGesamt Subjects mit Action Units: {len(performance_df)}\")\n", "else:\n", - " print(\"Keine gültigen Daten gefunden.\")\n" + " print(\"Keine gültigen Daten gefunden.\")" ] }, { @@ -152,37 +139,11 @@ " performance = store.select(\"PERFORMANCE\")\n", "performance" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e17fd7eb-8600-4c31-9212-d1eeb9e74736", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "def hash_perf(local_tmp):\n", - " with pd.HDFStore(local_tmp, mode=\"r\") as s:\n", - " df = s.select(\"PERFORMANCE\")\n", - " # hash based on data values only\n", - " return hashlib.sha1(pd.util.hash_pandas_object(df, index=True).values).hexdigest()\n", - "\n", - "hashes = []\n", - "for i in range(5):\n", - " local_tmp = f\"tmp_{i:04d}.h5\"\n", - " try:\n", - " hashes.append((i, hash_perf(local_tmp)))\n", - " except Exception as e:\n", - " hashes.append((i, str(e)))\n", - "\n", - "print(hashes)\n" - ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -196,7 +157,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.10" + "version": "3.11.5" } }, "nbformat": 4,