{ "cells": [ { "cell_type": "markdown", "id": "8fb02733", "metadata": {}, "source": [ "Imports" ] }, { "cell_type": "code", "execution_count": null, "id": "ebd32616-1e11-4b15-805e-481e010e03fc", "metadata": {}, "outputs": [], "source": [ "%pip install pyocclient\n", "import yaml\n", "import owncloud\n", "import pandas as pd\n", "import numpy as np\n", "import hashlib" ] }, { "cell_type": "markdown", "id": "c20cee7c", "metadata": {}, "source": [ "Connection to Owncloud" ] }, { "cell_type": "code", "execution_count": null, "id": "c4c94558", "metadata": {}, "outputs": [], "source": [ "# Load credentials\n", "with open(\"../login.yaml\") as f:\n", " cfg = yaml.safe_load(f)\n", " \n", "url, password = cfg[0][\"url\"], cfg[1][\"password\"]\n", "\n", "# Connect once\n", "oc = owncloud.Client.from_public_link(url, folder_password=password)\n", "# File pattern\n" ] }, { "cell_type": "code", "execution_count": null, "id": "07c03d07", "metadata": {}, "outputs": [], "source": [ "num_files = 30\n", "performance_data = []\n", "base = \"adabase-public-{num:04d}-v_0_0_2.h5py\" # remote name pattern\n", "for i in range(num_files):\n", " file_name = base.format(num=i)\n", " local_tmp = f\"tmp_{i:04d}.h5\"\n", "\n", " #oc.get_file(file_name, local_tmp)\n", "\n", " # quick checksum to detect identical downloads\n", " with open(local_tmp, \"rb\") as fh:\n", " file_hash = hashlib.sha1(fh.read()).hexdigest()\n", " print(f\"File {i}: {file_name} checksum={file_hash}\")\n", "\n", " # check SIGNALS table for AUs\n", " with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", " cols = store.select(\"SIGNALS\", start=0, stop=1).columns\n", " au_cols = [c for c in cols if c.startswith(\"AU\")]\n", " if not au_cols:\n", " print(f\"Subject {i} enthält keine AUs\")\n", " continue\n", "\n", " # load performance table (make a copy)\n", " with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", " perf_df = store.select(\"PERFORMANCE\").copy()\n", "\n", " # print(f\"Subject {i}: PERFORMANCE rows={len(perf_df)}\")\n", " # print(perf_df.head(3).to_string(index=False))\n", "\n", " f1_cols = [c for c in [\"AUDITIVE F1\", \"VISUAL F1\", \"F1\"] if c in perf_df.columns]\n", " if not f1_cols:\n", " print(f\"Subject {i}: keine F1-Spalten gefunden\")\n", " continue\n", "\n", " # drop rows that have all F1s NaN (no valid score for that combo)\n", " perf_df = perf_df.dropna(subset=f1_cols, how=\"all\")\n", " if perf_df.empty:\n", " print(f\"Subject {i}: keine gültigen F1-Daten nach Filter\")\n", " continue\n", "\n", " subject_entry = {\"subjectID\": i}\n", " combo_means = []\n", "\n", " for _, row in perf_df.iterrows():\n", " study, level, phase = row[\"STUDY\"], row[\"LEVEL\"], row[\"PHASE\"]\n", " col_name = f\"STUDY_{study}_LEVEL_{level}_PHASE_{phase}\"\n", "\n", " # mean of available F1 cols for this single combination\n", " vals = [float(row[c]) for c in f1_cols if pd.notna(row[c])]\n", " if not vals:\n", " continue\n", " mean_for_combo = float(np.mean(vals))\n", " subject_entry[col_name] = mean_for_combo\n", " combo_means.append(mean_for_combo)\n", "\n", " # overall: mean of per-combination means (not flattened raw F1s)\n", " if combo_means:\n", " subject_entry[\"overall_score\"] = float(np.mean(combo_means))\n", " performance_data.append(subject_entry)\n", " print(f\"Subject {i}: combos={len(combo_means)} overall={subject_entry['overall_score']:.4f}\")\n", " else:\n", " print(f\"Subject {i}: keine gültigen Kombinationen\")\n", "\n", "# build dataframe\n", "if performance_data:\n", " performance_df = pd.DataFrame(performance_data)\n", " combination_cols = sorted([c for c in performance_df.columns if c.startswith(\"STUDY_\")])\n", " final_cols = [\"subjectID\", \"overall_score\"] + combination_cols\n", " performance_df = performance_df.reindex(columns=final_cols) # keeps missing combo cols as NaN\n", " performance_df.to_csv(\"au_performance.csv\", index=False)\n", " print(f\"\\nGesamt Subjects mit Action Units: {len(performance_df)}\")\n", "else:\n", " print(\"Keine gültigen Daten gefunden.\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "0bcaf065", "metadata": {}, "outputs": [], "source": [ "performance_df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "db95eea7", "metadata": {}, "outputs": [], "source": [ "with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", " performance = store.select(\"PERFORMANCE\")\n", "performance" ] }, { "cell_type": "code", "execution_count": null, "id": "e17fd7eb-8600-4c31-9212-d1eeb9e74736", "metadata": {}, "outputs": [], "source": [ "\n", "\n", "def hash_perf(local_tmp):\n", " with pd.HDFStore(local_tmp, mode=\"r\") as s:\n", " df = s.select(\"PERFORMANCE\")\n", " # hash based on data values only\n", " return hashlib.sha1(pd.util.hash_pandas_object(df, index=True).values).hexdigest()\n", "\n", "hashes = []\n", "for i in range(5):\n", " local_tmp = f\"tmp_{i:04d}.h5\"\n", " try:\n", " hashes.append((i, hash_perf(local_tmp)))\n", " except Exception as e:\n", " hashes.append((i, str(e)))\n", "\n", "print(hashes)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.10" } }, "nbformat": 4, "nbformat_minor": 5 }