Imports

In [None]:
import yaml
import owncloud
import pandas as pd
import numpy as np

Connection to Owncloud

In [None]:
# Load credentials
with open("../login.yaml") as f:
 cfg = yaml.safe_load(f)
 
url, password = cfg[0]["url"], cfg[1]["password"]

# Connect once
oc = owncloud.Client.from_public_link(url, folder_password=password)
# File pattern
base = "adabase-public-{num:04d}-v_0_0_2.h5py"

In [None]:
num_files = 2 # number of files to process (min: 1, max: 30)
performance_data = []

for i in range(num_files):
 file_name = base.format(num=i)
 local_tmp = f"tmp_{i:04d}.h5"

 oc.get_file(file_name, local_tmp)
 print(f"{file_name} geöffnet")

 # check SIGNALS table for AUs
 with pd.HDFStore(local_tmp, mode="r") as store:
 cols = store.select("SIGNALS", start=0, stop=1).columns
 au_cols = [c for c in cols if c.startswith("AU")]
 if not au_cols:
 print(f"Subject {i} enthält keine AUs")
 continue

 # load performance table
 with pd.HDFStore(local_tmp, mode="r") as store:
 perf_df = store.select("PERFORMANCE")

 f1_cols = [c for c in ["AUDITIVE F1", "VISUAL F1", "F1"] if c in perf_df.columns]
 if not f1_cols:
 print(f"Subject {i}: keine F1-Spalten gefunden")
 continue

 subject_entry = {"subjectID": i}
 valid_scores = []

 # iterate rows: each (study, level, phase)
 for _, row in perf_df.iterrows():
 study, level, phase = row["STUDY"], row["LEVEL"], row["PHASE"]
 col_name = f"STUDY_{study}_LEVEL_{level}_PHASE_{phase}"

 # collect valid F1 values among the three columns
 scores = [row[c] for c in f1_cols if pd.notna(row[c])]
 if scores:
 mean_score = float(np.mean(scores))
 subject_entry[col_name] = mean_score
 valid_scores.extend(scores)

 # compute overall average across all valid combinations
 if valid_scores:
 subject_entry["overall_score"] = float(np.mean(valid_scores))
 performance_data.append(subject_entry)
 print(f"Subject {i}: {len(valid_scores)} gültige Scores, Overall = {subject_entry['overall_score']:.3f}")
 else:
 print(f"Subject {i}: keine gültigen F1-Scores")

# build dataframe
if performance_data:
 performance_df = pd.DataFrame(performance_data)
 combination_cols = sorted([c for c in performance_df.columns if c.startswith("STUDY_")])
 final_cols = ["subjectID", "overall_score"] + combination_cols
 performance_df = performance_df[final_cols]
 performance_df.to_csv("au_performance.csv", index=False)

 print(f"\nGesamt Subjects mit Action Units: {len(performance_df)}")
else:
 print("Keine gültigen Daten gefunden.")

In [None]:
performance_df.head()

In [None]:
with pd.HDFStore(local_tmp, mode="r") as store:
 performance = store.select("PERFORMANCE")
performance