From e7aac9dabe6d332db4dff3c11399ded11dd50b04 Mon Sep 17 00:00:00 2001 From: Celina Date: Sun, 23 Nov 2025 12:22:47 +0100 Subject: [PATCH] xgboost: 2nd model with grid search and k-fold cross --- model_training/xgboost/xgboost.ipynb | 387 +++++++++++++++++++++++++-- 1 file changed, 372 insertions(+), 15 deletions(-) diff --git a/model_training/xgboost/xgboost.ipynb b/model_training/xgboost/xgboost.ipynb index d6b186f..6966d68 100644 --- a/model_training/xgboost/xgboost.ipynb +++ b/model_training/xgboost/xgboost.ipynb @@ -128,7 +128,7 @@ "outputs": [], "source": [ "import numpy as np\n", - "from sklearn.model_selection import train_test_split\n", + "from sklearn.model_selection import train_test_split,StratifiedKFold, GridSearchCV\n", "from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report, confusion_matrix\n", "import xgboost as xgb\n", "import joblib\n", @@ -248,23 +248,44 @@ "metadata": {}, "outputs": [], "source": [ - "model = xgb.XGBClassifier(\n", + "# Basis-Modell\n", + "xgb_clf = xgb.XGBClassifier(\n", " objective=\"binary:logistic\",\n", " eval_metric=\"auc\",\n", - " learning_rate=0.05,\n", - " max_depth=6,\n", - " n_estimators=500,\n", - " subsample=0.8,\n", - " colsample_bytree=0.8,\n", + " use_label_encoder=False,\n", " random_state=42\n", ")\n", "\n", - "model.fit(\n", - " X_train, y_train,\n", - " eval_set=[(X_val, y_val)],\n", - " #early_stopping_rounds=30,\n", - " verbose=True\n", - ")" + "# Parameter-Raster\n", + "param_grid = {\n", + " \"learning_rate\": [0.01, 0.05, 0.1],\n", + " \"max_depth\": [4, 6, 8],\n", + " \"n_estimators\": [200, 500, 800],\n", + " \"subsample\": [0.8, 1.0],\n", + " \"colsample_bytree\": [0.8, 1.0]\n", + "}\n", + "\n", + "# K-Fold Cross Validation\n", + "cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", + "\n", + "# Grid Search Setup\n", + "grid_search = GridSearchCV(\n", + " estimator=xgb_clf,\n", + " param_grid=param_grid,\n", + " scoring=\"roc_auc\",\n", + " n_jobs=-1,\n", + " cv=cv,\n", + " verbose=2\n", + ")\n", + "\n", + "# Training mit Cross Validation\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "print(\"Beste Parameter:\", grid_search.best_params_)\n", + "print(\"Bestes AUC:\", grid_search.best_score_)\n", + "\n", + "# Bestes Modell extrahieren\n", + "model = grid_search.best_estimator_" ] }, { @@ -272,7 +293,340 @@ "execution_count": null, "id": "09a8cd21", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.8; total time= 1.0s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=1.0; total time= 1.0s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=1.0; total time= 1.1s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.9s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=0.8; total time= 1.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=1.0; total time= 1.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=1.0; total time= 0.5s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Exception ignored in: \n", + "Traceback (most recent call last):\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 77, in __del__\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 86, in _stop\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 111, in _stop_locked\n", + "ChildProcessError: [Errno 10] No child processes\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.8; total time= 1.0s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=1.0; total time= 1.0s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.9s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=0.8; total time= 1.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=1.0; total time= 1.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=1.0; total time= 1.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=1.0; total time= 0.5s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Exception ignored in: \n", + "Traceback (most recent call last):\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 77, in __del__\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 86, in _stop\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 111, in _stop_locked\n", + "ChildProcessError: [Errno 10] No child processes\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.8; total time= 1.0s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=1.0; total time= 1.0s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.9s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=0.8; total time= 1.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=1.0; total time= 1.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=1.0; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=1.0; total time= 0.5s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Exception ignored in: \n", + "Traceback (most recent call last):\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 77, in __del__\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 86, in _stop\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 111, in _stop_locked\n", + "ChildProcessError: [Errno 10] No child processes\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.8; total time= 1.0s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=0.8; total time= 1.1s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=6, n_estimators=800, subsample=1.0; total time= 1.0s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.9s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.8s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=0.8; total time= 1.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=0.8; total time= 1.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.01, max_depth=7, n_estimators=800, subsample=1.0; total time= 1.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=6, n_estimators=800, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.6s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=0.8; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.05, max_depth=7, n_estimators=800, subsample=1.0; total time= 0.7s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=5, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=0.8; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=200, subsample=1.0; total time= 0.2s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=6, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=0.8; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=200, subsample=1.0; total time= 0.3s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=0.8; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=500, subsample=1.0; total time= 0.4s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=0.8; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=1.0; total time= 0.5s\n", + "[CV] END colsample_bytree=1.0, learning_rate=0.1, max_depth=7, n_estimators=800, subsample=1.0; total time= 0.5s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Exception ignored in: \n", + "Traceback (most recent call last):\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 77, in __del__\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 86, in _stop\n", + " File \"/opt/conda/lib/python3.12/multiprocessing/resource_tracker.py\", line 111, in _stop_locked\n", + "ChildProcessError: [Errno 10] No child processes\n" + ] + } + ], "source": [ "from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, roc_auc_score, classification_report, ConfusionMatrixDisplay\n", "\n", @@ -325,7 +679,10 @@ "source": [ "joblib.dump(model, \"xgb_model.joblib\")\n", "joblib.dump(normalizer, \"normalizer.joblib\")\n", - "print(\"Model gespeichert.\")" + "print(\"Model gespeichert.\")\n", + "\n", + "model.save_model(\"xgb_model.json\") # als JSON (lesbar, portabel)\n", + "model.save_model(\"xgb_model.bin\") # als Binärdatei (kompakt)" ] } ],