adjusted paths (this is the deployment setting)

This commit is contained in:
Michael Weig 2026-02-16 20:11:07 +00:00
parent 2b01085a9e
commit 4eab3c9876

View File

@ -1,235 +1,253 @@
# Imports # Imports
import pandas as pd import pandas as pd
import json import json
from pathlib import Path from pathlib import Path
import numpy as np import numpy as np
import sys import sys
import yaml import yaml
import pickle import pickle
#sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools') sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')
sys.path.append(r"c:\\repo\\Fahrsimulator_MSY2526_AI\\tools") # sys.path.append(r"c:\\repo\\Fahrsimulator_MSY2526_AI\\tools")
import db_helpers import db_helpers
import joblib import joblib
def _load_serialized(path: Path): def _load_serialized(path: Path):
suffix = path.suffix.lower() suffix = path.suffix.lower()
if suffix == ".pkl": if suffix == ".pkl":
with path.open("rb") as f: with path.open("rb") as f:
return pickle.load(f) return pickle.load(f)
if suffix == ".joblib": if suffix == ".joblib":
return joblib.load(path) return joblib.load(path)
raise ValueError(f"Unsupported file format: {suffix}. Use .pkl or .joblib.") raise ValueError(f"Unsupported file format: {suffix}. Use .pkl or .joblib.")
def getLastEntryFromSQLite(path, table_name, key="_Id"): def getLastEntryFromSQLite(path, table_name, key="_Id"):
conn, cursor = db_helpers.connect_db(path) conn, cursor = db_helpers.connect_db(path)
try: try:
row_df = db_helpers.get_data_from_table( row_df = db_helpers.get_data_from_table(
conn=conn, conn=conn,
table_name=table_name, table_name=table_name,
order_by={key: "DESC"}, order_by={key: "DESC"},
limit=1, limit=1,
) )
finally: finally:
db_helpers.disconnect_db(conn, cursor, commit=False) db_helpers.disconnect_db(conn, cursor, commit=False)
if row_df.empty: if row_df.empty:
return pd.Series(dtype="object") return pd.Series(dtype="object")
return row_df.iloc[0] return row_df.iloc[0]
def callModel(sample, model_path): def callModel(sample, model_path):
if callable(sample): if callable(sample):
raise TypeError( raise TypeError(
f"Invalid sample type: got callable `{getattr(sample, '__name__', type(sample).__name__)}`. " f"Invalid sample type: got callable `{getattr(sample, '__name__', type(sample).__name__)}`. "
"Expected numpy array / pandas row." "Expected numpy array / pandas row."
) )
model_path = Path(model_path) model_path = Path(model_path)
if not model_path.is_absolute(): if not model_path.is_absolute():
model_path = Path.cwd() / model_path model_path = Path.cwd() / model_path
model_path = model_path.resolve() model_path = model_path.resolve()
suffix = model_path.suffix.lower() suffix = model_path.suffix.lower()
if suffix in {".pkl", ".joblib"}: if suffix in {".pkl", ".joblib"}:
model = _load_serialized(model_path) model = _load_serialized(model_path)
elif suffix == ".keras": # elif suffix == ".keras":
import tensorflow as tf # import tensorflow as tf
model = tf.keras.models.load_model(model_path) # model = tf.keras.models.load_model(model_path)
else: # else:
raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.") # raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.")
x = np.asarray(sample, dtype=np.float32) x = np.asarray(sample, dtype=np.float32)
if x.ndim == 1: if x.ndim == 1:
x = x.reshape(1, -1) x = x.reshape(1, -1)
if suffix == ".keras": if suffix == ".keras":
x_full = x x_full = x
# Future model (35 features): keep this call when your new model is active. # Future model (35 features): keep this call when your new model is active.
# prediction = model.predict(x_full[:, :35], verbose=0) # prediction = model.predict(x_full[:, :35], verbose=0)
prediction = model.predict(x_full[:, :20], verbose=0) prediction = model.predict(x_full[:, :20], verbose=0)
else: else:
if hasattr(model, "predict"): if hasattr(model, "predict"):
prediction = model.predict(x[:,:20]) prediction = model.predict(x[:,:20])
elif callable(model): elif callable(model):
prediction = model(x[:,:20]) prediction = model(x[:,:20])
else: else:
raise TypeError("Loaded model has no .predict(...) and is not callable.") raise TypeError("Loaded model has no .predict(...) and is not callable.")
prediction = np.asarray(prediction) prediction = np.asarray(prediction)
if prediction.size == 1: if prediction.size == 1:
return prediction.item() return prediction.item()
return prediction.squeeze() return prediction.squeeze()
def buildMessage(valid, result: np.int32, config_file_path, sample=None): def buildMessage(valid, result: np.int32, config_file_path, sample=None):
with Path(config_file_path).open("r", encoding="utf-8") as f: with Path(config_file_path).open("r", encoding="utf-8") as f:
cfg = yaml.safe_load(f) cfg = yaml.safe_load(f)
mqtt_cfg = cfg.get("mqtt", {}) mqtt_cfg = cfg.get("mqtt", {})
result_key = mqtt_cfg.get("publish_format", {}).get("result_key", "prediction") result_key = mqtt_cfg.get("publish_format", {}).get("result_key", "prediction")
sample_id = None sample_id = None
if isinstance(sample, pd.Series): if isinstance(sample, pd.Series):
sample_id = sample.get("_Id", sample.get("_id")) sample_id = sample.get("_Id", sample.get("_id"))
elif isinstance(sample, dict): elif isinstance(sample, dict):
sample_id = sample.get("_Id", sample.get("_id")) sample_id = sample.get("_Id", sample.get("_id"))
message = { message = {
"valid": bool(valid), "valid": bool(valid),
"_id": sample_id, "_id": sample_id,
result_key: np.asarray(result).tolist() if isinstance(result, np.ndarray) else result, result_key: np.asarray(result).tolist() if isinstance(result, np.ndarray) else result,
} }
return message return message
def sendMessage(config_file_path, message): def convert_int64(obj):
with Path(config_file_path).open("r", encoding="utf-8") as f: if isinstance(obj, np.int64):
cfg = yaml.safe_load(f) return int(obj)
# If the object is a dictionary or list, recursively convert its values
mqtt_cfg = cfg.get("mqtt", {}) elif isinstance(obj, dict):
topic = mqtt_cfg.get("topic", "ml/predictions") return {key: convert_int64(value) for key, value in obj.items()}
elif isinstance(obj, list):
payload = json.dumps(message, ensure_ascii=False) return [convert_int64(item) for item in obj]
print(payload) return obj
# Later: publish via MQTT using config parameters above. def sendMessage(config_file_path, message):
# Example (kept commented intentionally): # Load the configuration
# import paho.mqtt.client as mqtt with Path(config_file_path).open("r", encoding="utf-8") as f:
# client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01")) cfg = yaml.safe_load(f)
# if "username" in mqtt_cfg and mqtt_cfg.get("username"):
# client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password")) # Get MQTT configuration
# client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60) mqtt_cfg = cfg.get("mqtt", {})
# client.publish( topic = mqtt_cfg.get("topic", "ml/predictions")
# topic=topic,
# payload=payload, # Convert message to ensure no np.int64 values remain
# qos=int(mqtt_cfg.get("qos", 1)), message = convert_int64(message)
# retain=bool(mqtt_cfg.get("retain", False)),
# ) # Serialize the message to JSON
# client.disconnect() payload = json.dumps(message, ensure_ascii=False)
return print(payload)
def replace_nan(sample, config_file_path: Path): # Later: publish via MQTT using config parameters above.
with config_file_path.open("r", encoding="utf-8") as f: # Example (kept commented intentionally):
cfg = yaml.safe_load(f) # import paho.mqtt.client as mqtt
# client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01"))
fallback_list = cfg.get("fallback", []) # if "username" in mqtt_cfg and mqtt_cfg.get("username"):
fallback_map = {} # client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password"))
for item in fallback_list: # client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60)
if isinstance(item, dict): # client.publish(
fallback_map.update(item) # topic=topic,
# payload=payload,
if sample.empty: # qos=int(mqtt_cfg.get("qos", 1)),
return False, sample # retain=bool(mqtt_cfg.get("retain", False)),
# )
nan_ratio = sample.isna().mean() # client.disconnect()
valid = nan_ratio <= 0.5 return
if valid and fallback_map: def replace_nan(sample, config_file_path: Path):
sample = sample.fillna(value=fallback_map) with config_file_path.open("r", encoding="utf-8") as f:
cfg = yaml.safe_load(f)
return valid, sample fallback_list = cfg.get("fallback", [])
fallback_map = {}
def sample_to_numpy(sample, drop_cols=("_Id", "start_time")): for item in fallback_list:
if isinstance(sample, pd.Series): if isinstance(item, dict):
sample = sample.drop(labels=list(drop_cols), errors="ignore") fallback_map.update(item)
return sample.to_numpy()
if sample.empty:
if isinstance(sample, pd.DataFrame): return False, sample
sample = sample.drop(columns=list(drop_cols), errors="ignore")
return sample.to_numpy() nan_ratio = sample.isna().mean()
valid = nan_ratio <= 0.5
return np.asarray(sample)
if valid and fallback_map:
def scale_sample(sample, use_scaling=False, scaler_path=None): sample = sample.fillna(value=fallback_map)
if not use_scaling or scaler_path is None:
return sample
scaler_path = Path(scaler_path) return valid, sample
if not scaler_path.is_absolute():
scaler_path = Path.cwd() / scaler_path def sample_to_numpy(sample, drop_cols=("_Id", "start_time")):
scaler_path = scaler_path.resolve() if isinstance(sample, pd.Series):
normalizer = _load_serialized(scaler_path) sample = sample.drop(labels=list(drop_cols), errors="ignore")
return sample.to_numpy()
# normalizer format from model_training/tools/scaler.py:
# {"scalers": {...}, "method": "...", "scope": "..."} if isinstance(sample, pd.DataFrame):
scalers = normalizer.get("scalers", {}) if isinstance(normalizer, dict) else {} sample = sample.drop(columns=list(drop_cols), errors="ignore")
scope = normalizer.get("scope", "global") if isinstance(normalizer, dict) else "global" return sample.to_numpy()
if scope == "global":
scaler = scalers.get("global") return np.asarray(sample)
else:
scaler = scalers.get("global", next(iter(scalers.values()), None)) def scale_sample(sample, use_scaling=False, scaler_path=None):
if not use_scaling or scaler_path is None:
# Optional fallback if the stored object is already a raw scaler. return sample
if scaler is None and hasattr(normalizer, "transform"): scaler_path = Path(scaler_path)
scaler = normalizer if not scaler_path.is_absolute():
if scaler is None or not hasattr(scaler, "transform"): scaler_path = Path.cwd() / scaler_path
return sample scaler_path = scaler_path.resolve()
normalizer = _load_serialized(scaler_path)
df = sample.to_frame().T if isinstance(sample, pd.Series) else sample.copy()
feature_names = getattr(scaler, "feature_names_in_", None) # normalizer format from model_training/tools/scaler.py:
if feature_names is None: # {"scalers": {...}, "method": "...", "scope": "..."}
return sample scalers = normalizer.get("scalers", {}) if isinstance(normalizer, dict) else {}
scope = normalizer.get("scope", "global") if isinstance(normalizer, dict) else "global"
# Keep columns not in the normalizer unchanged. if scope == "global":
cols_to_scale = [c for c in df.columns if c in set(feature_names)] scaler = scalers.get("global")
if cols_to_scale: else:
df.loc[:, cols_to_scale] = scaler.transform(df.loc[:, cols_to_scale]) scaler = scalers.get("global", next(iter(scalers.values()), None))
return df.iloc[0] if isinstance(sample, pd.Series) else df # Optional fallback if the stored object is already a raw scaler.
if scaler is None and hasattr(normalizer, "transform"):
def main(): scaler = normalizer
config_file_path = Path("predict_pipeline/config.yaml") if scaler is None or not hasattr(scaler, "transform"):
with config_file_path.open("r", encoding="utf-8") as f: return sample
cfg = yaml.safe_load(f)
df = sample.to_frame().T if isinstance(sample, pd.Series) else sample.copy()
database_path = cfg["database"]["path"] feature_names = getattr(scaler, "feature_names_in_", None)
table_name = cfg["database"]["table"] if feature_names is None:
row_key = cfg["database"]["key"] return sample
# Keep columns not in the normalizer unchanged.
sample = getLastEntryFromSQLite(database_path, table_name, row_key) cols_to_scale = [c for c in df.columns if c in set(feature_names)]
valid, sample = replace_nan(sample, config_file_path=config_file_path) if cols_to_scale:
df.loc[:, cols_to_scale] = scaler.transform(df.loc[:, cols_to_scale])
if not valid:
print("Sample invalid: more than 50% NaN.") return df.iloc[0] if isinstance(sample, pd.Series) else df
message = buildMessage(valid, None, config_file_path, sample=sample)
sendMessage(config_file_path, message) def main():
return pd.set_option('future.no_silent_downcasting', True) # kann ggf raus
model_path = cfg["model"]["path"] config_file_path = Path("/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/config.yaml")
scaler_path = cfg["scaler"]["path"] with config_file_path.open("r", encoding="utf-8") as f:
use_scaling = cfg["scaler"]["use_scaling"] cfg = yaml.safe_load(f)
sample = scale_sample(sample, use_scaling=use_scaling, scaler_path=scaler_path) database_path = cfg["database"]["path"]
sample_np = sample_to_numpy(sample) table_name = cfg["database"]["table"]
row_key = cfg["database"]["key"]
prediction = callModel(model_path=model_path, sample=sample_np)
message = buildMessage(valid, prediction, config_file_path, sample=sample) sample = getLastEntryFromSQLite(database_path, table_name, row_key)
sendMessage(config_file_path, message) valid, sample = replace_nan(sample, config_file_path=config_file_path)
if not valid:
if __name__ == "__main__": print("Sample invalid: more than 50% NaN.")
main() message = buildMessage(valid, None, config_file_path, sample=sample)
sendMessage(config_file_path, message)
return
model_path = cfg["model"]["path"]
scaler_path = cfg["scaler"]["path"]
use_scaling = cfg["scaler"]["use_scaling"]
sample = scale_sample(sample, use_scaling=use_scaling, scaler_path=scaler_path)
sample_np = sample_to_numpy(sample)
prediction = callModel(model_path=model_path, sample=sample_np)
message = buildMessage(valid, prediction, config_file_path, sample=sample)
sendMessage(config_file_path, message)
if __name__ == "__main__":
main()