renamed directory, created mad method python file in tools
This commit is contained in:
parent
2ee8b96b22
commit
c7295f310c
23
model_training/tools/mad_outlier_removal.py
Normal file
23
model_training/tools/mad_outlier_removal.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def mad_outlier_removal(df, columns, threshold=3.5, c=1.4826):
|
||||||
|
df_clean = df.copy()
|
||||||
|
masks = []
|
||||||
|
|
||||||
|
for col in columns:
|
||||||
|
x = df_clean[col].values
|
||||||
|
median = np.median(x)
|
||||||
|
mad = np.median(np.abs(x - median))
|
||||||
|
|
||||||
|
if mad == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
z = np.abs((x - median) / (c * mad))
|
||||||
|
masks.append(z <= threshold)
|
||||||
|
|
||||||
|
if not masks:
|
||||||
|
return df_clean
|
||||||
|
|
||||||
|
final_mask = np.logical_and.reduce(masks)
|
||||||
|
return df_clean[final_mask]
|
||||||
Loading…
x
Reference in New Issue
Block a user