178 lines
5.7 KiB
Plaintext
178 lines
5.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8fb02733",
|
|
"metadata": {},
|
|
"source": [
|
|
"Imports"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "96f3b128",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%pip install pyocclient\n",
|
|
"import yaml\n",
|
|
"import owncloud\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "c20cee7c",
|
|
"metadata": {},
|
|
"source": [
|
|
"Connection to Owncloud"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c4c94558",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Load credentials from YAML\n",
|
|
"with open(\"login.yaml\", \"r\") as f:\n",
|
|
" cfg = yaml.safe_load(f)\n",
|
|
"\n",
|
|
"url = cfg[0][\"url\"]\n",
|
|
"password = cfg[1][\"password\"]\n",
|
|
"\n",
|
|
"# Connect once to the public OwnCloud link\n",
|
|
"oc = owncloud.Client.from_public_link(url, folder_password=password)\n",
|
|
"\n",
|
|
"num_files = 1 # number of subject IDs to process (min: 1, max: 30)\n",
|
|
"performance_data = []\n",
|
|
"\n",
|
|
"# Read remote file list once\n",
|
|
"remote_files = oc.list(\".\")\n",
|
|
"remote_names = [f.get_name() for f in remote_files]\n",
|
|
"\n",
|
|
"for i in range(num_files):\n",
|
|
" prefix = f\"{i:04d}-\"\n",
|
|
" matching_files = [name for name in remote_names if name.startswith(prefix) and name.endswith(\".hdf5\")]\n",
|
|
"\n",
|
|
" if not matching_files:\n",
|
|
" print(f\"No file found for pattern: {prefix}*.hdf5\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # Take the first matching file, e.g. 0000-AACA.hdf5\n",
|
|
" file_name = matching_files[0]\n",
|
|
" local_tmp = f\"tmp_{i:04d}.hdf5\"\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # Download the file locally\n",
|
|
" oc.get_file(file_name, local_tmp)\n",
|
|
" print(f\"Downloaded and opened file: {file_name} -> {local_tmp}\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Failed to download file {file_name}: {e}\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # Check SIGNALS table for AU columns\n",
|
|
" try:\n",
|
|
" with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
|
|
" cols = store.select(\"SIGNALS\", start=0, stop=1).columns\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Failed to read SIGNALS from {local_tmp}: {e}\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" au_cols = [c for c in cols if c.startswith(\"AU\")]\n",
|
|
" if not au_cols:\n",
|
|
" print(f\"Subject {i:04d} contains no AU columns\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # Load PERFORMANCE table\n",
|
|
" try:\n",
|
|
" with pd.HDFStore(local_tmp, mode=\"r\") as store:\n",
|
|
" perf_df = store.select(\"PERFORMANCE\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"Failed to read PERFORMANCE from {local_tmp}: {e}\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" f1_cols = [c for c in [\"AUDITIVE F1\", \"VISUAL F1\", \"F1\"] if c in perf_df.columns]\n",
|
|
" if not f1_cols:\n",
|
|
" print(f\"Subject {i:04d}: no F1 columns found\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" subject_entry = {\"subjectID\": i}\n",
|
|
" valid_scores = []\n",
|
|
"\n",
|
|
" # Iterate through PERFORMANCE rows: each row is one (study, level, phase) combination\n",
|
|
" for _, row in perf_df.iterrows():\n",
|
|
" study = row[\"STUDY\"]\n",
|
|
" level = row[\"LEVEL\"]\n",
|
|
" phase = row[\"PHASE\"]\n",
|
|
" col_name = f\"STUDY_{study}_LEVEL_{level}_PHASE_{phase}\"\n",
|
|
"\n",
|
|
" # Collect non-NaN F1 values from the available F1 columns\n",
|
|
" scores = [row[c] for c in f1_cols if pd.notna(row[c])]\n",
|
|
" if scores:\n",
|
|
" mean_score = float(np.mean(scores))\n",
|
|
" subject_entry[col_name] = mean_score\n",
|
|
" valid_scores.extend(scores)\n",
|
|
"\n",
|
|
" # Compute overall average across all valid F1 values\n",
|
|
" if valid_scores:\n",
|
|
" subject_entry[\"overall_score\"] = float(np.mean(valid_scores))\n",
|
|
" performance_data.append(subject_entry)\n",
|
|
" print(\n",
|
|
" f\"Subject {i:04d}: {len(valid_scores)} valid scores, \"\n",
|
|
" f\"overall = {subject_entry['overall_score']:.3f}\"\n",
|
|
" )\n",
|
|
" else:\n",
|
|
" print(f\"Subject {i:04d}: no valid F1 scores found\")\n",
|
|
"\n",
|
|
"# Build final DataFrame and save CSV\n",
|
|
"if performance_data:\n",
|
|
" performance_df = pd.DataFrame(performance_data)\n",
|
|
" combination_cols = sorted([c for c in performance_df.columns if c.startswith(\"STUDY_\")])\n",
|
|
" final_cols = [\"subjectID\", \"overall_score\"] + combination_cols\n",
|
|
" performance_df = performance_df[final_cols]\n",
|
|
" performance_df.to_csv(\"performance.csv\", index=False)\n",
|
|
"\n",
|
|
" print(f\"\\nTotal subjects with Action Units: {len(performance_df)}\")\n",
|
|
" print(\"Saved results to performance.csv\")\n",
|
|
"else:\n",
|
|
" print(\"No valid data found.\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "0bcaf065",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"performance_df.head()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.10"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|