From c7295f310c6a265b2a993479c7cb524ae595e04a Mon Sep 17 00:00:00 2001 From: Michael Date: Wed, 10 Dec 2025 19:29:18 +0100 Subject: [PATCH] renamed directory, created mad method python file in tools --- .../mad_outlier_removal.ipynb | 0 model_training/tools/mad_outlier_removal.py | 23 +++++++++++++++++++ 2 files changed, 23 insertions(+) rename model_training/{MAD outlier removal => MAD_outlier_removal}/mad_outlier_removal.ipynb (100%) create mode 100644 model_training/tools/mad_outlier_removal.py diff --git a/model_training/MAD outlier removal/mad_outlier_removal.ipynb b/model_training/MAD_outlier_removal/mad_outlier_removal.ipynb similarity index 100% rename from model_training/MAD outlier removal/mad_outlier_removal.ipynb rename to model_training/MAD_outlier_removal/mad_outlier_removal.ipynb diff --git a/model_training/tools/mad_outlier_removal.py b/model_training/tools/mad_outlier_removal.py new file mode 100644 index 0000000..4fadcc7 --- /dev/null +++ b/model_training/tools/mad_outlier_removal.py @@ -0,0 +1,23 @@ +import numpy as np +import pandas as pd + +def mad_outlier_removal(df, columns, threshold=3.5, c=1.4826): + df_clean = df.copy() + masks = [] + + for col in columns: + x = df_clean[col].values + median = np.median(x) + mad = np.median(np.abs(x - median)) + + if mad == 0: + continue + + z = np.abs((x - median) / (c * mad)) + masks.append(z <= threshold) + + if not masks: + return df_clean + + final_mask = np.logical_and.reduce(masks) + return df_clean[final_mask]