From 36bae270a1833148a7baa3236f4efb27253f38ec Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 4 Mar 2026 10:55:56 +0100 Subject: [PATCH] notebook to calculate replacement values and minor changes --- EDA/calculate_replacement_values.ipynb | 98 +++++++++++++++++++ EDA/distribution_plots.ipynb | 12 --- .../CNN/CNN_crossVal_EarlyFusion_Filter.ipynb | 14 +-- 3 files changed, 99 insertions(+), 25 deletions(-) create mode 100644 EDA/calculate_replacement_values.ipynb diff --git a/EDA/calculate_replacement_values.ipynb b/EDA/calculate_replacement_values.ipynb new file mode 100644 index 0000000..563e40f --- /dev/null +++ b/EDA/calculate_replacement_values.ipynb @@ -0,0 +1,98 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b9144326", + "metadata": {}, + "source": [ + "### Calculate replacement values for live deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a7b60d6", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from pathlib import Path\n", + "import yaml" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "197cb8a6", + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: insert path to database\n", + "dataset_path = Path(r\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cf26eb2", + "metadata": {}, + "outputs": [], + "source": [ + "df=pd.read_parquet(dataset_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2c5679e", + "metadata": {}, + "outputs": [], + "source": [ + "pd.set_option(\"display.max_rows\", None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e88981b2", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# optional: your dataframe filtering code ...\n", + "\n", + "medians = df.median()\n", + "median_dict = medians.to_dict()\n", + "\n", + "# Wrap in fallback key\n", + "output = {'fallback': median_dict}\n", + "\n", + "# Save to YAML\n", + "with open('config.yaml', 'w') as f:\n", + " yaml.dump(output, f, default_flow_style=False, sort_keys=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "310", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/EDA/distribution_plots.ipynb b/EDA/distribution_plots.ipynb index a73877a..47af5de 100644 --- a/EDA/distribution_plots.ipynb +++ b/EDA/distribution_plots.ipynb @@ -606,18 +606,6 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.10" } }, "nbformat": 4, diff --git a/model_training/CNN/CNN_crossVal_EarlyFusion_Filter.ipynb b/model_training/CNN/CNN_crossVal_EarlyFusion_Filter.ipynb index 9124951..1788035 100644 --- a/model_training/CNN/CNN_crossVal_EarlyFusion_Filter.ipynb +++ b/model_training/CNN/CNN_crossVal_EarlyFusion_Filter.ipynb @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "3e542dfd", "metadata": {}, "outputs": [ @@ -1954,18 +1954,6 @@ "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.10" } }, "nbformat": 4,