updatet subject performance notebook

This commit is contained in:
Michael Weig 2026-03-09 20:10:55 +01:00
parent 182fc102de
commit ef785283f0

View File

@ -15,6 +15,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"%pip install pyocclient\n",
"import yaml\n", "import yaml\n",
"import owncloud\n", "import owncloud\n",
"import pandas as pd\n", "import pandas as pd\n",
@ -36,101 +37,109 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Load credentials\n", "# Load credentials from YAML\n",
"with open(\"../login.yaml\") as f:\n", "with open(\"login.yaml\", \"r\") as f:\n",
" cfg = yaml.safe_load(f)\n", " cfg = yaml.safe_load(f)\n",
" \n",
"url, password = cfg[0][\"url\"], cfg[1][\"password\"]\n",
"\n", "\n",
"# Connect once\n", "url = cfg[0][\"url\"]\n",
"password = cfg[1][\"password\"]\n",
"\n",
"# Connect once to the public OwnCloud link\n",
"oc = owncloud.Client.from_public_link(url, folder_password=password)\n", "oc = owncloud.Client.from_public_link(url, folder_password=password)\n",
"# File pattern\n", "\n",
"# base = \"adabase-public-{num:04d}-v_0_0_2.h5py\"\n", "num_files = 1 # number of subject IDs to process (min: 1, max: 30)\n",
"base = \"{num:04d}-*.h5py\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "07c03d07",
"metadata": {},
"outputs": [],
"source": [
"num_files = 2 # number of files to process (min: 1, max: 30)\n",
"performance_data = []\n", "performance_data = []\n",
"\n", "\n",
"# Read remote file list once\n",
"remote_files = oc.list(\".\")\n",
"remote_names = [f.get_name() for f in remote_files]\n",
"\n",
"for i in range(num_files):\n", "for i in range(num_files):\n",
" file_pattern = f\"{i:04d}-*\"\n", " prefix = f\"{i:04d}-\"\n",
" \n", " matching_files = [name for name in remote_names if name.startswith(prefix) and name.endswith(\".hdf5\")]\n",
" # Get list of files matching the pattern\n",
" files = oc.list('.')\n",
" matching_files = [f.get_name() for f in files if f.get_name().startswith(f\"{i:04d}-\")]\n",
" \n",
" if matching_files:\n",
" file_name = matching_files[0] # Take the first matching file\n",
" local_tmp = f\"tmp_{i:04d}.h5\"\n",
" \n",
" oc.get_file(file_name, local_tmp)\n",
" print(f\"{file_name} geöffnet\")\n",
" else:\n",
" print(f\"Keine Datei gefunden für Muster: {file_pattern}\")\n",
" # file_name = base.format(num=i)\n",
" # local_tmp = f\"tmp_{i:04d}.h5\"\n",
"\n", "\n",
" # oc.get_file(file_name, local_tmp)\n", " if not matching_files:\n",
" # print(f\"{file_name} geöffnet\")\n", " print(f\"No file found for pattern: {prefix}*.hdf5\")\n",
"\n",
" # check SIGNALS table for AUs\n",
" with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
" cols = store.select(\"SIGNALS\", start=0, stop=1).columns\n",
" au_cols = [c for c in cols if c.startswith(\"AU\")]\n",
" if not au_cols:\n",
" print(f\"Subject {i} enthält keine AUs\")\n",
" continue\n", " continue\n",
"\n", "\n",
" # load performance table\n", " # Take the first matching file, e.g. 0000-AACA.hdf5\n",
" with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", " file_name = matching_files[0]\n",
" perf_df = store.select(\"PERFORMANCE\")\n", " local_tmp = f\"tmp_{i:04d}.hdf5\"\n",
"\n",
" try:\n",
" # Download the file locally\n",
" oc.get_file(file_name, local_tmp)\n",
" print(f\"Downloaded and opened file: {file_name} -> {local_tmp}\")\n",
" except Exception as e:\n",
" print(f\"Failed to download file {file_name}: {e}\")\n",
" continue\n",
"\n",
" # Check SIGNALS table for AU columns\n",
" try:\n",
" with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
" cols = store.select(\"SIGNALS\", start=0, stop=1).columns\n",
" except Exception as e:\n",
" print(f\"Failed to read SIGNALS from {local_tmp}: {e}\")\n",
" continue\n",
"\n",
" au_cols = [c for c in cols if c.startswith(\"AU\")]\n",
" if not au_cols:\n",
" print(f\"Subject {i:04d} contains no AU columns\")\n",
" continue\n",
"\n",
" # Load PERFORMANCE table\n",
" try:\n",
" with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
" perf_df = store.select(\"PERFORMANCE\")\n",
" except Exception as e:\n",
" print(f\"Failed to read PERFORMANCE from {local_tmp}: {e}\")\n",
" continue\n",
"\n", "\n",
" f1_cols = [c for c in [\"AUDITIVE F1\", \"VISUAL F1\", \"F1\"] if c in perf_df.columns]\n", " f1_cols = [c for c in [\"AUDITIVE F1\", \"VISUAL F1\", \"F1\"] if c in perf_df.columns]\n",
" if not f1_cols:\n", " if not f1_cols:\n",
" print(f\"Subject {i}: keine F1-Spalten gefunden\")\n", " print(f\"Subject {i:04d}: no F1 columns found\")\n",
" continue\n", " continue\n",
"\n", "\n",
" subject_entry = {\"subjectID\": i}\n", " subject_entry = {\"subjectID\": i}\n",
" valid_scores = []\n", " valid_scores = []\n",
"\n", "\n",
" # iterate rows: each (study, level, phase)\n", " # Iterate through PERFORMANCE rows: each row is one (study, level, phase) combination\n",
" for _, row in perf_df.iterrows():\n", " for _, row in perf_df.iterrows():\n",
" study, level, phase = row[\"STUDY\"], row[\"LEVEL\"], row[\"PHASE\"]\n", " study = row[\"STUDY\"]\n",
" level = row[\"LEVEL\"]\n",
" phase = row[\"PHASE\"]\n",
" col_name = f\"STUDY_{study}_LEVEL_{level}_PHASE_{phase}\"\n", " col_name = f\"STUDY_{study}_LEVEL_{level}_PHASE_{phase}\"\n",
"\n", "\n",
" # collect valid F1 values among the three columns\n", " # Collect non-NaN F1 values from the available F1 columns\n",
" scores = [row[c] for c in f1_cols if pd.notna(row[c])]\n", " scores = [row[c] for c in f1_cols if pd.notna(row[c])]\n",
" if scores:\n", " if scores:\n",
" mean_score = float(np.mean(scores))\n", " mean_score = float(np.mean(scores))\n",
" subject_entry[col_name] = mean_score\n", " subject_entry[col_name] = mean_score\n",
" valid_scores.extend(scores)\n", " valid_scores.extend(scores)\n",
"\n", "\n",
" # compute overall average across all valid combinations\n", " # Compute overall average across all valid F1 values\n",
" if valid_scores:\n", " if valid_scores:\n",
" subject_entry[\"overall_score\"] = float(np.mean(valid_scores))\n", " subject_entry[\"overall_score\"] = float(np.mean(valid_scores))\n",
" performance_data.append(subject_entry)\n", " performance_data.append(subject_entry)\n",
" print(f\"Subject {i}: {len(valid_scores)} gültige Scores, Overall = {subject_entry['overall_score']:.3f}\")\n", " print(\n",
" f\"Subject {i:04d}: {len(valid_scores)} valid scores, \"\n",
" f\"overall = {subject_entry['overall_score']:.3f}\"\n",
" )\n",
" else:\n", " else:\n",
" print(f\"Subject {i}: keine gültigen F1-Scores\")\n", " print(f\"Subject {i:04d}: no valid F1 scores found\")\n",
"\n", "\n",
"# build dataframe\n", "# Build final DataFrame and save CSV\n",
"if performance_data:\n", "if performance_data:\n",
" performance_df = pd.DataFrame(performance_data)\n", " performance_df = pd.DataFrame(performance_data)\n",
" combination_cols = sorted([c for c in performance_df.columns if c.startswith(\"STUDY_\")])\n", " combination_cols = sorted([c for c in performance_df.columns if c.startswith(\"STUDY_\")])\n",
" final_cols = [\"subjectID\", \"overall_score\"] + combination_cols\n", " final_cols = [\"subjectID\", \"overall_score\"] + combination_cols\n",
" performance_df = performance_df[final_cols]\n", " performance_df = performance_df[final_cols]\n",
" performance_df.to_csv(\"n_au_performance.csv\", index=False)\n", " performance_df.to_csv(\"performance.csv\", index=False)\n",
"\n", "\n",
" print(f\"\\nGesamt Subjects mit Action Units: {len(performance_df)}\")\n", " print(f\"\\nTotal subjects with Action Units: {len(performance_df)}\")\n",
" print(\"Saved results to performance.csv\")\n",
"else:\n", "else:\n",
" print(\"Keine gültigen Daten gefunden.\")" " print(\"No valid data found.\")"
] ]
}, },
{ {
@ -142,56 +151,11 @@
"source": [ "source": [
"performance_df.head()" "performance_df.head()"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db95eea7",
"metadata": {},
"outputs": [],
"source": [
"with pd.HDFStore(\"tmp_0000.h5\", mode=\"r\") as store:\n",
" md = store.select(\"META\")\n",
"print(\"File 0:\")\n",
"print(md)\n",
"with pd.HDFStore(\"tmp_0001.h5\", mode=\"r\") as store:\n",
" md = store.select(\"META\")\n",
"print(\"File 1\")\n",
"print(md)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8067036b",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_columns', None)\n",
"pd.set_option('display.max_rows', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f18e7385",
"metadata": {},
"outputs": [],
"source": [
"with pd.HDFStore(\"tmp_0000.h5\", mode=\"r\") as store:\n",
" md = store.select(\"SIGNALS\", start=0, stop=1)\n",
"print(\"File 0:\")\n",
"md.head()\n",
"# with pd.HDFStore(\"tmp_0001.h5\", mode=\"r\",start=0, stop=1) as store:\n",
"# md = store.select(\"SIGNALS\")\n",
"# print(\"File 1\")\n",
"# print(md.columns)"
]
} }
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "base", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@ -205,7 +169,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.5" "version": "3.12.10"
} }
}, },
"nbformat": 4, "nbformat": 4,