adjusted paths (this is the deployment setting)

2026-02-16 20:11:07 +00:00 · 2026-02-16 20:11:07 +00:00 · 4eab3c9876
commit 4eab3c9876
parent 2b01085a9e
1 changed files with 253 additions and 235 deletions
--- a/predict_pipeline/predict_sample.py
+++ b/predict_pipeline/predict_sample.py
@ -1,235 +1,253 @@
-# Imports
+# Imports
-import pandas as pd
+import pandas as pd
-import json
+import json
-from pathlib import Path
+from pathlib import Path
-import numpy as np
+import numpy as np
-import sys
+import sys
-import yaml
+import yaml
-import pickle
+import pickle
-#sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')
+sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')
-sys.path.append(r"c:\\repo\\Fahrsimulator_MSY2526_AI\\tools")
+# sys.path.append(r"c:\\repo\\Fahrsimulator_MSY2526_AI\\tools")
-import db_helpers
+import db_helpers
-import joblib
+import joblib
-
+
-def _load_serialized(path: Path):
+def _load_serialized(path: Path):
-    suffix = path.suffix.lower()
+    suffix = path.suffix.lower()
-    if suffix == ".pkl":
+    if suffix == ".pkl":
-        with path.open("rb") as f:
+        with path.open("rb") as f:
-            return pickle.load(f)
+            return pickle.load(f)
-    if suffix == ".joblib":
+    if suffix == ".joblib":
-        return joblib.load(path)
+        return joblib.load(path)
-    raise ValueError(f"Unsupported file format: {suffix}. Use .pkl or .joblib.")
+    raise ValueError(f"Unsupported file format: {suffix}. Use .pkl or .joblib.")
-
+
-def getLastEntryFromSQLite(path, table_name, key="_Id"):
+def getLastEntryFromSQLite(path, table_name, key="_Id"):
-    conn, cursor = db_helpers.connect_db(path)
+    conn, cursor = db_helpers.connect_db(path)
-    try:
+    try:
-        row_df = db_helpers.get_data_from_table(
+        row_df = db_helpers.get_data_from_table(
-            conn=conn,
+            conn=conn,
-            table_name=table_name,
+            table_name=table_name,
-            order_by={key: "DESC"},
+            order_by={key: "DESC"},
-            limit=1,
+            limit=1,
-        )
+        )
-    finally:
+    finally:
-        db_helpers.disconnect_db(conn, cursor, commit=False)
+        db_helpers.disconnect_db(conn, cursor, commit=False)
-
+
-    if row_df.empty:
+    if row_df.empty:
-        return pd.Series(dtype="object")
+        return pd.Series(dtype="object")
-
+
-    return row_df.iloc[0]
+    return row_df.iloc[0]
-
+
-def callModel(sample, model_path):
+def callModel(sample, model_path):
-    if callable(sample):
+    if callable(sample):
-        raise TypeError(
+        raise TypeError(
-            f"Invalid sample type: got callable `{getattr(sample, '__name__', type(sample).__name__)}`. "
+            f"Invalid sample type: got callable `{getattr(sample, '__name__', type(sample).__name__)}`. "
-            "Expected numpy array / pandas row."
+            "Expected numpy array / pandas row."
-        )
+        )
-
+
-    model_path = Path(model_path)
+    model_path = Path(model_path)
-    if not model_path.is_absolute():
+    if not model_path.is_absolute():
-        model_path = Path.cwd() / model_path
+        model_path = Path.cwd() / model_path
-    model_path = model_path.resolve()
+    model_path = model_path.resolve()
-
+
-    suffix = model_path.suffix.lower()
+    suffix = model_path.suffix.lower()
-    if suffix in {".pkl", ".joblib"}:
+    if suffix in {".pkl", ".joblib"}:
-        model = _load_serialized(model_path)
+        model = _load_serialized(model_path)
-    elif suffix == ".keras":
+    # elif suffix == ".keras":
-        import tensorflow as tf
+       # import tensorflow as tf
-        model = tf.keras.models.load_model(model_path)
+        # model = tf.keras.models.load_model(model_path)
-    else:
+    # else:
-        raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.")
+        # raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.")
-
+
-    x = np.asarray(sample, dtype=np.float32)
+    x = np.asarray(sample, dtype=np.float32)
-    if x.ndim == 1:
+    if x.ndim == 1:
-        x = x.reshape(1, -1)
+        x = x.reshape(1, -1)
-
+
-    if suffix == ".keras":
+    if suffix == ".keras":
-        x_full = x  
+        x_full = x  
-        # Future model (35 features): keep this call when your new model is active.
+        # Future model (35 features): keep this call when your new model is active.
-        # prediction = model.predict(x_full[:, :35], verbose=0)
+        # prediction = model.predict(x_full[:, :35], verbose=0)
-        prediction = model.predict(x_full[:, :20], verbose=0)
+        prediction = model.predict(x_full[:, :20], verbose=0)
-
+
-    else:
+    else:
-        if hasattr(model, "predict"):
+        if hasattr(model, "predict"):
-            prediction = model.predict(x[:,:20])
+            prediction = model.predict(x[:,:20])
-        elif callable(model):
+        elif callable(model):
-            prediction = model(x[:,:20])
+            prediction = model(x[:,:20])
-        else:
+        else:
-            raise TypeError("Loaded model has no .predict(...) and is not callable.")
+            raise TypeError("Loaded model has no .predict(...) and is not callable.")
-
+
-    prediction = np.asarray(prediction)
+    prediction = np.asarray(prediction)
-    if prediction.size == 1:
+    if prediction.size == 1:
-        return prediction.item()
+        return prediction.item()
-    return prediction.squeeze()
+    return prediction.squeeze()
-
+
-def buildMessage(valid, result: np.int32, config_file_path, sample=None):
+def buildMessage(valid, result: np.int32, config_file_path, sample=None):
-    with Path(config_file_path).open("r", encoding="utf-8") as f:
+    with Path(config_file_path).open("r", encoding="utf-8") as f:
-        cfg = yaml.safe_load(f)
+        cfg = yaml.safe_load(f)
-
+
-    mqtt_cfg = cfg.get("mqtt", {})
+    mqtt_cfg = cfg.get("mqtt", {})
-    result_key = mqtt_cfg.get("publish_format", {}).get("result_key", "prediction")
+    result_key = mqtt_cfg.get("publish_format", {}).get("result_key", "prediction")
-
+
-    sample_id = None
+    sample_id = None
-    if isinstance(sample, pd.Series):
+    if isinstance(sample, pd.Series):
-        sample_id = sample.get("_Id", sample.get("_id"))
+        sample_id = sample.get("_Id", sample.get("_id"))
-    elif isinstance(sample, dict):
+    elif isinstance(sample, dict):
-        sample_id = sample.get("_Id", sample.get("_id"))
+        sample_id = sample.get("_Id", sample.get("_id"))
-
+
-    message = {
+    message = {
-        "valid": bool(valid),
+        "valid": bool(valid),
-        "_id": sample_id,
+        "_id": sample_id,
-        result_key: np.asarray(result).tolist() if isinstance(result, np.ndarray) else result,
+        result_key: np.asarray(result).tolist() if isinstance(result, np.ndarray) else result,
-    }
+    }
-    return message
+    return message
-
+
-def sendMessage(config_file_path, message):
+def convert_int64(obj):
-    with Path(config_file_path).open("r", encoding="utf-8") as f:
+    if isinstance(obj, np.int64):
-        cfg = yaml.safe_load(f)
+        return int(obj)
-
+    # If the object is a dictionary or list, recursively convert its values
-    mqtt_cfg = cfg.get("mqtt", {})
+    elif isinstance(obj, dict):
-    topic = mqtt_cfg.get("topic", "ml/predictions")
+        return {key: convert_int64(value) for key, value in obj.items()}
-
+    elif isinstance(obj, list):
-    payload = json.dumps(message, ensure_ascii=False)
+        return [convert_int64(item) for item in obj]
-    print(payload)
+    return obj
-
+
-    # Later: publish via MQTT using config parameters above.
+def sendMessage(config_file_path, message):
-    # Example (kept commented intentionally):
+    # Load the configuration
-    # import paho.mqtt.client as mqtt
+    with Path(config_file_path).open("r", encoding="utf-8") as f:
-    # client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01"))
+        cfg = yaml.safe_load(f)
-    # if "username" in mqtt_cfg and mqtt_cfg.get("username"):
+
-    #     client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password"))
+    # Get MQTT configuration
-    # client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60)
+    mqtt_cfg = cfg.get("mqtt", {})
-    # client.publish(
+    topic = mqtt_cfg.get("topic", "ml/predictions")
-    #     topic=topic,
+
-    #     payload=payload,
+    # Convert message to ensure no np.int64 values remain
-    #     qos=int(mqtt_cfg.get("qos", 1)),
+    message = convert_int64(message)
-    #     retain=bool(mqtt_cfg.get("retain", False)),
+
-    # )
+    # Serialize the message to JSON
-    # client.disconnect()
+    payload = json.dumps(message, ensure_ascii=False)
-    return
+    print(payload)
-
+
-def replace_nan(sample, config_file_path: Path):
+    # Later: publish via MQTT using config parameters above.
-    with config_file_path.open("r", encoding="utf-8") as f:
+    # Example (kept commented intentionally):
-        cfg = yaml.safe_load(f)
+    # import paho.mqtt.client as mqtt
-
+    # client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01"))
-    fallback_list = cfg.get("fallback", [])
+    # if "username" in mqtt_cfg and mqtt_cfg.get("username"):
-    fallback_map = {}
+    #     client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password"))
-    for item in fallback_list:
+    # client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60)
-        if isinstance(item, dict):
+    # client.publish(
-            fallback_map.update(item)
+    #     topic=topic,
-
+    #     payload=payload,
-    if sample.empty:
+    #     qos=int(mqtt_cfg.get("qos", 1)),
-        return False, sample
+    #     retain=bool(mqtt_cfg.get("retain", False)),
-
+    # )
-    nan_ratio = sample.isna().mean()
+    # client.disconnect()
-    valid = nan_ratio <= 0.5
+    return
-
+
-    if valid and fallback_map:
+def replace_nan(sample, config_file_path: Path):
-        sample = sample.fillna(value=fallback_map)
+    with config_file_path.open("r", encoding="utf-8") as f:
-
+        cfg = yaml.safe_load(f)
-
+
-    return valid, sample
+    fallback_list = cfg.get("fallback", [])
-
+    fallback_map = {}
-def sample_to_numpy(sample, drop_cols=("_Id", "start_time")):
+    for item in fallback_list:
-    if isinstance(sample, pd.Series):
+        if isinstance(item, dict):
-        sample = sample.drop(labels=list(drop_cols), errors="ignore")
+            fallback_map.update(item)
-        return sample.to_numpy()
+
-
+    if sample.empty:
-    if isinstance(sample, pd.DataFrame):
+        return False, sample
-        sample = sample.drop(columns=list(drop_cols), errors="ignore")
+
-        return sample.to_numpy()
+    nan_ratio = sample.isna().mean()
-
+    valid = nan_ratio <= 0.5
-    return np.asarray(sample)
+
-
+    if valid and fallback_map:
-def scale_sample(sample, use_scaling=False, scaler_path=None):
+        sample = sample.fillna(value=fallback_map)
-    if not use_scaling or scaler_path is None:
+
-        return sample
+
-    scaler_path = Path(scaler_path)
+    return valid, sample
-    if not scaler_path.is_absolute():
+
-        scaler_path = Path.cwd() / scaler_path
+def sample_to_numpy(sample, drop_cols=("_Id", "start_time")):
-    scaler_path = scaler_path.resolve()
+    if isinstance(sample, pd.Series):
-    normalizer = _load_serialized(scaler_path)
+        sample = sample.drop(labels=list(drop_cols), errors="ignore")
-
+        return sample.to_numpy()
-    # normalizer format from model_training/tools/scaler.py:
+
-    # {"scalers": {...}, "method": "...", "scope": "..."}
+    if isinstance(sample, pd.DataFrame):
-    scalers = normalizer.get("scalers", {}) if isinstance(normalizer, dict) else {}
+        sample = sample.drop(columns=list(drop_cols), errors="ignore")
-    scope = normalizer.get("scope", "global") if isinstance(normalizer, dict) else "global"
+        return sample.to_numpy()
-    if scope == "global":
+
-        scaler = scalers.get("global")
+    return np.asarray(sample)
-    else:
+
-        scaler = scalers.get("global", next(iter(scalers.values()), None))
+def scale_sample(sample, use_scaling=False, scaler_path=None):
-
+    if not use_scaling or scaler_path is None:
-    # Optional fallback if the stored object is already a raw scaler.
+        return sample
-    if scaler is None and hasattr(normalizer, "transform"):
+    scaler_path = Path(scaler_path)
-        scaler = normalizer
+    if not scaler_path.is_absolute():
-    if scaler is None or not hasattr(scaler, "transform"):
+        scaler_path = Path.cwd() / scaler_path
-        return sample
+    scaler_path = scaler_path.resolve()
-
+    normalizer = _load_serialized(scaler_path)
-    df = sample.to_frame().T if isinstance(sample, pd.Series) else sample.copy()
+
-    feature_names = getattr(scaler, "feature_names_in_", None)
+    # normalizer format from model_training/tools/scaler.py:
-    if feature_names is None:
+    # {"scalers": {...}, "method": "...", "scope": "..."}
-        return sample
+    scalers = normalizer.get("scalers", {}) if isinstance(normalizer, dict) else {}
-
+    scope = normalizer.get("scope", "global") if isinstance(normalizer, dict) else "global"
-    # Keep columns not in the normalizer unchanged.
+    if scope == "global":
-    cols_to_scale = [c for c in df.columns if c in set(feature_names)]
+        scaler = scalers.get("global")
-    if cols_to_scale:
+    else:
-        df.loc[:, cols_to_scale] = scaler.transform(df.loc[:, cols_to_scale])
+        scaler = scalers.get("global", next(iter(scalers.values()), None))
-
+
-    return df.iloc[0] if isinstance(sample, pd.Series) else df
+    # Optional fallback if the stored object is already a raw scaler.
-
+    if scaler is None and hasattr(normalizer, "transform"):
-def main():
+        scaler = normalizer
-    config_file_path = Path("predict_pipeline/config.yaml")
+    if scaler is None or not hasattr(scaler, "transform"):
-    with config_file_path.open("r", encoding="utf-8") as f:
+        return sample
-        cfg = yaml.safe_load(f)
+
-
+    df = sample.to_frame().T if isinstance(sample, pd.Series) else sample.copy()
-    database_path = cfg["database"]["path"]
+    feature_names = getattr(scaler, "feature_names_in_", None)
-    table_name = cfg["database"]["table"]
+    if feature_names is None:
-    row_key = cfg["database"]["key"]
+        return sample
-    
+
-
+    # Keep columns not in the normalizer unchanged.
-    sample = getLastEntryFromSQLite(database_path, table_name, row_key)
+    cols_to_scale = [c for c in df.columns if c in set(feature_names)]
-    valid, sample = replace_nan(sample, config_file_path=config_file_path)
+    if cols_to_scale:
-
+        df.loc[:, cols_to_scale] = scaler.transform(df.loc[:, cols_to_scale])
-    if not valid:
+
-        print("Sample invalid: more than 50% NaN.")
+    return df.iloc[0] if isinstance(sample, pd.Series) else df
-        message = buildMessage(valid, None, config_file_path, sample=sample)
+
-        sendMessage(config_file_path, message)
+def main():
-        return
+    pd.set_option('future.no_silent_downcasting', True) # kann ggf raus
-    
+
-    model_path = cfg["model"]["path"]
+    config_file_path = Path("/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/config.yaml")
-    scaler_path = cfg["scaler"]["path"]
+    with config_file_path.open("r", encoding="utf-8") as f:
-    use_scaling = cfg["scaler"]["use_scaling"]
+        cfg = yaml.safe_load(f)
-
+
-    sample = scale_sample(sample, use_scaling=use_scaling, scaler_path=scaler_path)
+    database_path = cfg["database"]["path"]
-    sample_np = sample_to_numpy(sample)
+    table_name = cfg["database"]["table"]
-
+    row_key = cfg["database"]["key"]
-    prediction = callModel(model_path=model_path, sample=sample_np)
+    
-
+
-    message = buildMessage(valid, prediction, config_file_path, sample=sample)
+    sample = getLastEntryFromSQLite(database_path, table_name, row_key)
-    sendMessage(config_file_path, message)
+    valid, sample = replace_nan(sample, config_file_path=config_file_path)
-
+
-
+    if not valid:
-if __name__ == "__main__":
+        print("Sample invalid: more than 50% NaN.")
-    main()
+        message = buildMessage(valid, None, config_file_path, sample=sample)
-
+        sendMessage(config_file_path, message)
-
+        return
-
+    
    model_path = cfg["model"]["path"]
    scaler_path = cfg["scaler"]["path"]
    use_scaling = cfg["scaler"]["use_scaling"]
    sample = scale_sample(sample, use_scaling=use_scaling, scaler_path=scaler_path)
    sample_np = sample_to_numpy(sample)
    prediction = callModel(model_path=model_path, sample=sample_np)
    message = buildMessage(valid, prediction, config_file_path, sample=sample)
    sendMessage(config_file_path, message)
 if __name__ == "__main__":
    main()