From 7c4fadb951870ce5eb085b7e65846c58a9cd31b3 Mon Sep 17 00:00:00 2001 From: Michael Date: Mon, 10 Nov 2025 11:31:42 +0100 Subject: [PATCH] new files --- EDA/researchOnSubjectPerformance.ipynb | 66 ++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/EDA/researchOnSubjectPerformance.ipynb b/EDA/researchOnSubjectPerformance.ipynb index 4895576..5a53635 100644 --- a/EDA/researchOnSubjectPerformance.ipynb +++ b/EDA/researchOnSubjectPerformance.ipynb @@ -45,7 +45,8 @@ "# Connect once\n", "oc = owncloud.Client.from_public_link(url, folder_password=password)\n", "# File pattern\n", - "base = \"adabase-public-{num:04d}-v_0_0_2.h5py\"" + "# base = \"adabase-public-{num:04d}-v_0_0_2.h5py\"\n", + "base = \"{num:04d}-*.h5py\"" ] }, { @@ -59,11 +60,25 @@ "performance_data = []\n", "\n", "for i in range(num_files):\n", - " file_name = base.format(num=i)\n", - " local_tmp = f\"tmp_{i:04d}.h5\"\n", + " file_pattern = f\"{i:04d}-*\"\n", + " \n", + " # Get list of files matching the pattern\n", + " files = oc.list('.')\n", + " matching_files = [f.get_name() for f in files if f.get_name().startswith(f\"{i:04d}-\")]\n", + " \n", + " if matching_files:\n", + " file_name = matching_files[0] # Take the first matching file\n", + " local_tmp = f\"tmp_{i:04d}.h5\"\n", + " \n", + " oc.get_file(file_name, local_tmp)\n", + " print(f\"{file_name} geöffnet\")\n", + " else:\n", + " print(f\"Keine Datei gefunden für Muster: {file_pattern}\")\n", + " # file_name = base.format(num=i)\n", + " # local_tmp = f\"tmp_{i:04d}.h5\"\n", "\n", - " oc.get_file(file_name, local_tmp)\n", - " print(f\"{file_name} geöffnet\")\n", + " # oc.get_file(file_name, local_tmp)\n", + " # print(f\"{file_name} geöffnet\")\n", "\n", " # check SIGNALS table for AUs\n", " with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", @@ -111,7 +126,7 @@ " combination_cols = sorted([c for c in performance_df.columns if c.startswith(\"STUDY_\")])\n", " final_cols = [\"subjectID\", \"overall_score\"] + combination_cols\n", " performance_df = performance_df[final_cols]\n", - " performance_df.to_csv(\"au_performance.csv\", index=False)\n", + " performance_df.to_csv(\"n_au_performance.csv\", index=False)\n", "\n", " print(f\"\\nGesamt Subjects mit Action Units: {len(performance_df)}\")\n", "else:\n", @@ -135,9 +150,42 @@ "metadata": {}, "outputs": [], "source": [ - "with pd.HDFStore(local_tmp, mode=\"r\") as store:\n", - " performance = store.select(\"PERFORMANCE\")\n", - "performance" + "with pd.HDFStore(\"tmp_0000.h5\", mode=\"r\") as store:\n", + " md = store.select(\"META\")\n", + "print(\"File 0:\")\n", + "print(md)\n", + "with pd.HDFStore(\"tmp_0001.h5\", mode=\"r\") as store:\n", + " md = store.select(\"META\")\n", + "print(\"File 1\")\n", + "print(md)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8067036b", + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f18e7385", + "metadata": {}, + "outputs": [], + "source": [ + "with pd.HDFStore(\"tmp_0000.h5\", mode=\"r\") as store:\n", + " md = store.select(\"SIGNALS\", start=0, stop=1)\n", + "print(\"File 0:\")\n", + "md.head()\n", + "# with pd.HDFStore(\"tmp_0001.h5\", mode=\"r\",start=0, stop=1) as store:\n", + "# md = store.select(\"SIGNALS\")\n", + "# print(\"File 1\")\n", + "# print(md.columns)" ] } ],