{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e3be057e-8d2a-4d05-bd42-6b1dc75df5ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "from sklearn.preprocessing import StandardScaler, MinMaxScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "13ad96f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "data_path = Path(r\"~/Fahrsimulator_MSY2526_AI/model_training/xgboost/output_windowed.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95e1a351",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_parquet(path=data_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "68afd83e",
   "metadata": {},
   "outputs": [],
   "source": [
    "subjects = df['subjectID'].unique()\n",
    "print(subjects)\n",
    "print(len(subjects))\n",
    "print(len(subjects)*0.66)\n",
    "print(len(subjects)*0.33)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52dfd885",
   "metadata": {},
   "outputs": [],
   "source": [
    "low_all = df[\n",
    "    ((df[\"PHASE\"] == \"baseline\") |\n",
    "     ((df[\"STUDY\"] == \"n-back\") & (df[\"PHASE\"] != \"baseline\") & (df[\"LEVEL\"].isin([1, 4]))))\n",
    "]\n",
    "print(f\"low all: {low_all.shape}\")\n",
    "\n",
    "high_nback = df[\n",
    "    (df[\"STUDY\"]==\"n-back\") &\n",
    "    (df[\"LEVEL\"].isin([2, 3, 5, 6])) &\n",
    "    (df[\"PHASE\"].isin([\"train\", \"test\"]))\n",
    "]\n",
    "print(f\"high n-back: {high_nback.shape}\")\n",
    "\n",
    "high_kdrive = df[\n",
    "    (df[\"STUDY\"] == \"k-drive\") & (df[\"PHASE\"] != \"baseline\")\n",
    "]\n",
    "print(f\"high k-drive: {high_kdrive.shape}\")\n",
    "\n",
    "high_all = pd.concat([high_nback, high_kdrive])\n",
    "print(f\"high all: {high_all.shape}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8fba6edf",
   "metadata": {},
   "outputs": [],
   "source": [
    "def fit_normalizer(train_data, au_columns, method='standard', scope='global'):\n",
    "    if method == 'standard':\n",
    "        Scaler = StandardScaler\n",
    "    elif method == 'minmax':\n",
    "        Scaler = MinMaxScaler\n",
    "    else:\n",
    "        raise ValueError(\"method must be 'standard' or 'minmax'\")\n",
    "    \n",
    "    scalers = {}\n",
    "    \n",
    "    if scope == 'subject':\n",
    "        for subject in train_data['subjectID'].unique():\n",
    "            subject_mask = train_data['subjectID'] == subject\n",
    "            scaler = Scaler()\n",
    "            scaler.fit(train_data.loc[subject_mask, au_columns])\n",
    "            scalers[subject] = scaler\n",
    "\n",
    "    elif scope == 'global':\n",
    "        scaler = Scaler()\n",
    "        scaler.fit(train_data[au_columns])\n",
    "        scalers['global'] = scaler\n",
    "\n",
    "    else:\n",
    "        raise ValueError(\"scope must be 'subject' or 'global'\")\n",
    "    \n",
    "    return {'scalers': scalers, 'method': method, 'scope': scope}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24e3a77b",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install xgboost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e7fa0fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split,StratifiedKFold, GridSearchCV\n",
    "from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report, confusion_matrix\n",
    "import xgboost as xgb\n",
    "import joblib\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "325ef71c",
   "metadata": {},
   "outputs": [],
   "source": [
    "low = low_all.copy()\n",
    "high = high_all.copy()\n",
    "\n",
    "low[\"label\"] = 0\n",
    "high[\"label\"] = 1\n",
    "\n",
    "data = pd.concat([low, high], ignore_index=True)\n",
    "data = data.drop_duplicates()\n",
    "\n",
    "print(\"Label distribution:\")\n",
    "print(data[\"label\"].value_counts())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "67d70e84",
   "metadata": {},
   "outputs": [],
   "source": [
    "au_columns = [col for col in data.columns if col.lower().startswith(\"au\")]\n",
    "print(\"Gefundene AU-Spalten:\", au_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "960bb8c7",
   "metadata": {},
   "outputs": [],
   "source": [
    "subjects = np.random.permutation(data[\"subjectID\"].unique())\n",
    "\n",
    "n = len(subjects)\n",
    "n_train = int(n * 0.66)\n",
    "\n",
    "train_subjects = subjects[:n_train]\n",
    "test_subjects  = subjects[n_train:]\n",
    "train_subs, val_subs = train_test_split(train_subjects, test_size=0.2, random_state=42)\n",
    "\n",
    "train_df = data[data.subjectID.isin(train_subs)]\n",
    "val_df   = data[data.subjectID.isin(val_subs)]\n",
    "test_df  = data[data.subjectID.isin(test_subjects)]\n",
    "\n",
    "print(train_df.shape, val_df.shape, test_df.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "802a45c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "def apply_normalizer(df_to_transform, normalizer_dict, au_columns):\n",
    "    scalers = normalizer_dict[\"scalers\"]\n",
    "    scope = normalizer_dict[\"scope\"]\n",
    "    df_out = df_to_transform.copy()\n",
    "\n",
    "    if scope == \"global\":\n",
    "        scaler = scalers[\"global\"]\n",
    "        df_out[au_columns] = scaler.transform(df_out[au_columns])\n",
    "\n",
    "    elif scope == \"subject\":\n",
    "        for subj, subdf in df_out.groupby(\"subjectID\"):\n",
    "            if subj in scalers:\n",
    "                df_out.loc[subdf.index, au_columns] = scalers[subj].transform(subdf[au_columns])\n",
    "            elif \"global\" in scalers:\n",
    "                df_out.loc[subdf.index, au_columns] = scalers[\"global\"].transform(subdf[au_columns])\n",
    "\n",
    "    return df_out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "289f6b89",
   "metadata": {},
   "outputs": [],
   "source": [
    "normalizer = fit_normalizer(train_df, au_columns, method=\"standard\", scope=\"global\")\n",
    "\n",
    "train_scaled = apply_normalizer(train_df, normalizer, au_columns)\n",
    "val_scaled   = apply_normalizer(val_df, normalizer, au_columns)\n",
    "test_scaled  = apply_normalizer(test_df, normalizer, au_columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5df30e8d",
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, y_train = train_scaled[au_columns].values, train_scaled[\"label\"].values\n",
    "X_val,   y_val   = val_scaled[au_columns].values, val_scaled[\"label\"].values\n",
    "X_test,  y_test  = test_scaled[au_columns].values, test_scaled[\"label\"].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6fb7c86a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Basis-Modell\n",
    "xgb_clf = xgb.XGBClassifier(\n",
    "    objective=\"binary:logistic\",\n",
    "    eval_metric=\"auc\",\n",
    "    use_label_encoder=False,\n",
    "    random_state=42\n",
    ")\n",
    "\n",
    "# Parameter-Raster\n",
    "param_grid = {\n",
    "    \"learning_rate\": [0.01, 0.05, 0.1],\n",
    "    \"max_depth\": [4, 6, 8],\n",
    "    \"n_estimators\": [200, 500, 800],\n",
    "    \"subsample\": [0.8, 1.0],\n",
    "    \"colsample_bytree\": [0.8, 1.0]\n",
    "}\n",
    "\n",
    "# K-Fold Cross Validation\n",
    "cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n",
    "\n",
    "# Grid Search Setup\n",
    "grid_search = GridSearchCV(\n",
    "    estimator=xgb_clf,\n",
    "    param_grid=param_grid,\n",
    "    scoring=\"roc_auc\",\n",
    "    n_jobs=-1,\n",
    "    cv=cv,\n",
    "    verbose=2\n",
    ")\n",
    "\n",
    "# Training mit Cross Validation\n",
    "grid_search.fit(X_train, y_train)\n",
    "\n",
    "print(\"Beste Parameter:\", grid_search.best_params_)\n",
    "print(\"Bestes AUC:\", grid_search.best_score_)\n",
    "\n",
    "# Bestes Modell extrahieren\n",
    "model = grid_search.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "09a8cd21",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.8; total time=   1.0s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=1.0; total time=   1.0s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=1.0; total time=   1.1s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.9s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=0.8; total time=   1.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=1.0; total time=   1.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=1.0; total time=   0.5s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception ignored in: <function ResourceTracker.__del__ at 0x7f8a96043d80>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 77, in __del__\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 86, in _stop\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 111, in _stop_locked\n",
      "ChildProcessError: [Errno 10] No child processes\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.8; total time=   1.0s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=1.0; total time=   1.0s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.9s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=0.8; total time=   1.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=1.0; total time=   1.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=1.0; total time=   1.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=1.0; total time=   0.5s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception ignored in: <function ResourceTracker.__del__ at 0x7f40af477d80>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 77, in __del__\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 86, in _stop\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 111, in _stop_locked\n",
      "ChildProcessError: [Errno 10] No child processes\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.8; total time=   1.0s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=1.0; total time=   1.0s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.9s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=0.8; total time=   1.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=1.0; total time=   1.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=1.0; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=1.0; total time=   0.5s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception ignored in: <function ResourceTracker.__del__ at 0x7fd2171ffd80>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 77, in __del__\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 86, in _stop\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 111, in _stop_locked\n",
      "ChildProcessError: [Errno 10] No child processes\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.8; total time=   1.0s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.8; total time=   1.1s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=1.0; total time=   1.0s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.9s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.8s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=0.8; total time=   1.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=0.8; total time=   1.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=1.0; total time=   1.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.6s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=0.8; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=1.0; total time=   0.7s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=0.8; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=1.0; total time=   0.2s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time=   0.3s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=0.8; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=1.0; total time=   0.4s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=0.8; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=1.0; total time=   0.5s\n",
      "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=1.0; total time=   0.5s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception ignored in: <function ResourceTracker.__del__ at 0x7ff5daf5fd80>\n",
      "Traceback (most recent call last):\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 77, in __del__\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 86, in _stop\n",
      "  File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 111, in _stop_locked\n",
      "ChildProcessError: [Errno 10] No child processes\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, roc_auc_score, classification_report, ConfusionMatrixDisplay\n",
    "\n",
    "def evaluate(model, X, y, title=\"Evaluation\"):\n",
    "    # Vorhersagen\n",
    "    preds_proba = model.predict_proba(X)[:, 1]\n",
    "    preds = (preds_proba > 0.5).astype(int)\n",
    "\n",
    "    # Metriken ausgeben\n",
    "    print(\"Accuracy:\", accuracy_score(y, preds))\n",
    "    print(\"F1:\", f1_score(y, preds))\n",
    "    print(\"AUC:\", roc_auc_score(y, preds))\n",
    "    print(\"Confusion:\\n\", confusion_matrix(y, preds))\n",
    "    print(classification_report(y, preds))\n",
    "\n",
    "    # Confusion Matrix plotten\n",
    "    def plot_confusion_matrix(true_labels, predictions, label_names):\n",
    "        for normalize in [None, 'true']:\n",
    "            cm = confusion_matrix(true_labels, predictions, normalize=normalize)\n",
    "            cm_disp = ConfusionMatrixDisplay(cm,  display_labels=label_names)\n",
    "            cm_disp.plot(cmap=\"Blues\")\n",
    "    #cm = confusion_matrix(y, preds)\n",
    "    plot_confusion_matrix(y,preds, label_names=['Low','High'])\n",
    "    # plt.figure(figsize=(5,4))\n",
    "    # sns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\", cbar=False,\n",
    "    #             xticklabels=[\"Predicted low\", \"Predicted high\"],\n",
    "    #             yticklabels=[\"Actual low\", \"Actual high\"])\n",
    "    # plt.title(f\"Confusion Matrix - {title}\")\n",
    "    # plt.ylabel(\"True label\")\n",
    "    # plt.xlabel(\"Predicted label\")\n",
    "    # plt.show()\n",
    "\n",
    "# Aufrufen für Train/Val/Test\n",
    "print(\"TRAIN:\")\n",
    "evaluate(model, X_train, y_train, title=\"Train\")\n",
    "\n",
    "print(\"VAL:\")\n",
    "evaluate(model, X_val, y_val, title=\"Validation\")\n",
    "\n",
    "print(\"TEST:\")\n",
    "evaluate(model, X_test, y_test, title=\"Test\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c43b0c80",
   "metadata": {},
   "outputs": [],
   "source": [
    "joblib.dump(model, \"xgb_model.joblib\")\n",
    "joblib.dump(normalizer, \"normalizer.joblib\")\n",
    "print(\"Model gespeichert.\")\n",
    "\n",
    "model.save_model(\"xgb_model.json\")   # als JSON (lesbar, portabel)\n",
    "model.save_model(\"xgb_model.bin\")    # als Binärdatei (kompakt)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}