adjusted paths (this is the deployment setting)
This commit is contained in:
parent
2b01085a9e
commit
4eab3c9876
@ -1,235 +1,253 @@
|
|||||||
# Imports
|
# Imports
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import sys
|
import sys
|
||||||
import yaml
|
import yaml
|
||||||
import pickle
|
import pickle
|
||||||
#sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')
|
sys.path.append('/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/tools')
|
||||||
sys.path.append(r"c:\\repo\\Fahrsimulator_MSY2526_AI\\tools")
|
# sys.path.append(r"c:\\repo\\Fahrsimulator_MSY2526_AI\\tools")
|
||||||
import db_helpers
|
import db_helpers
|
||||||
import joblib
|
import joblib
|
||||||
|
|
||||||
def _load_serialized(path: Path):
|
def _load_serialized(path: Path):
|
||||||
suffix = path.suffix.lower()
|
suffix = path.suffix.lower()
|
||||||
if suffix == ".pkl":
|
if suffix == ".pkl":
|
||||||
with path.open("rb") as f:
|
with path.open("rb") as f:
|
||||||
return pickle.load(f)
|
return pickle.load(f)
|
||||||
if suffix == ".joblib":
|
if suffix == ".joblib":
|
||||||
return joblib.load(path)
|
return joblib.load(path)
|
||||||
raise ValueError(f"Unsupported file format: {suffix}. Use .pkl or .joblib.")
|
raise ValueError(f"Unsupported file format: {suffix}. Use .pkl or .joblib.")
|
||||||
|
|
||||||
def getLastEntryFromSQLite(path, table_name, key="_Id"):
|
def getLastEntryFromSQLite(path, table_name, key="_Id"):
|
||||||
conn, cursor = db_helpers.connect_db(path)
|
conn, cursor = db_helpers.connect_db(path)
|
||||||
try:
|
try:
|
||||||
row_df = db_helpers.get_data_from_table(
|
row_df = db_helpers.get_data_from_table(
|
||||||
conn=conn,
|
conn=conn,
|
||||||
table_name=table_name,
|
table_name=table_name,
|
||||||
order_by={key: "DESC"},
|
order_by={key: "DESC"},
|
||||||
limit=1,
|
limit=1,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
db_helpers.disconnect_db(conn, cursor, commit=False)
|
db_helpers.disconnect_db(conn, cursor, commit=False)
|
||||||
|
|
||||||
if row_df.empty:
|
if row_df.empty:
|
||||||
return pd.Series(dtype="object")
|
return pd.Series(dtype="object")
|
||||||
|
|
||||||
return row_df.iloc[0]
|
return row_df.iloc[0]
|
||||||
|
|
||||||
def callModel(sample, model_path):
|
def callModel(sample, model_path):
|
||||||
if callable(sample):
|
if callable(sample):
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
f"Invalid sample type: got callable `{getattr(sample, '__name__', type(sample).__name__)}`. "
|
f"Invalid sample type: got callable `{getattr(sample, '__name__', type(sample).__name__)}`. "
|
||||||
"Expected numpy array / pandas row."
|
"Expected numpy array / pandas row."
|
||||||
)
|
)
|
||||||
|
|
||||||
model_path = Path(model_path)
|
model_path = Path(model_path)
|
||||||
if not model_path.is_absolute():
|
if not model_path.is_absolute():
|
||||||
model_path = Path.cwd() / model_path
|
model_path = Path.cwd() / model_path
|
||||||
model_path = model_path.resolve()
|
model_path = model_path.resolve()
|
||||||
|
|
||||||
suffix = model_path.suffix.lower()
|
suffix = model_path.suffix.lower()
|
||||||
if suffix in {".pkl", ".joblib"}:
|
if suffix in {".pkl", ".joblib"}:
|
||||||
model = _load_serialized(model_path)
|
model = _load_serialized(model_path)
|
||||||
elif suffix == ".keras":
|
# elif suffix == ".keras":
|
||||||
import tensorflow as tf
|
# import tensorflow as tf
|
||||||
model = tf.keras.models.load_model(model_path)
|
# model = tf.keras.models.load_model(model_path)
|
||||||
else:
|
# else:
|
||||||
raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.")
|
# raise ValueError(f"Unsupported model format: {suffix}. Use .pkl, .joblib, or .keras.")
|
||||||
|
|
||||||
x = np.asarray(sample, dtype=np.float32)
|
x = np.asarray(sample, dtype=np.float32)
|
||||||
if x.ndim == 1:
|
if x.ndim == 1:
|
||||||
x = x.reshape(1, -1)
|
x = x.reshape(1, -1)
|
||||||
|
|
||||||
if suffix == ".keras":
|
if suffix == ".keras":
|
||||||
x_full = x
|
x_full = x
|
||||||
# Future model (35 features): keep this call when your new model is active.
|
# Future model (35 features): keep this call when your new model is active.
|
||||||
# prediction = model.predict(x_full[:, :35], verbose=0)
|
# prediction = model.predict(x_full[:, :35], verbose=0)
|
||||||
prediction = model.predict(x_full[:, :20], verbose=0)
|
prediction = model.predict(x_full[:, :20], verbose=0)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
if hasattr(model, "predict"):
|
if hasattr(model, "predict"):
|
||||||
prediction = model.predict(x[:,:20])
|
prediction = model.predict(x[:,:20])
|
||||||
elif callable(model):
|
elif callable(model):
|
||||||
prediction = model(x[:,:20])
|
prediction = model(x[:,:20])
|
||||||
else:
|
else:
|
||||||
raise TypeError("Loaded model has no .predict(...) and is not callable.")
|
raise TypeError("Loaded model has no .predict(...) and is not callable.")
|
||||||
|
|
||||||
prediction = np.asarray(prediction)
|
prediction = np.asarray(prediction)
|
||||||
if prediction.size == 1:
|
if prediction.size == 1:
|
||||||
return prediction.item()
|
return prediction.item()
|
||||||
return prediction.squeeze()
|
return prediction.squeeze()
|
||||||
|
|
||||||
def buildMessage(valid, result: np.int32, config_file_path, sample=None):
|
def buildMessage(valid, result: np.int32, config_file_path, sample=None):
|
||||||
with Path(config_file_path).open("r", encoding="utf-8") as f:
|
with Path(config_file_path).open("r", encoding="utf-8") as f:
|
||||||
cfg = yaml.safe_load(f)
|
cfg = yaml.safe_load(f)
|
||||||
|
|
||||||
mqtt_cfg = cfg.get("mqtt", {})
|
mqtt_cfg = cfg.get("mqtt", {})
|
||||||
result_key = mqtt_cfg.get("publish_format", {}).get("result_key", "prediction")
|
result_key = mqtt_cfg.get("publish_format", {}).get("result_key", "prediction")
|
||||||
|
|
||||||
sample_id = None
|
sample_id = None
|
||||||
if isinstance(sample, pd.Series):
|
if isinstance(sample, pd.Series):
|
||||||
sample_id = sample.get("_Id", sample.get("_id"))
|
sample_id = sample.get("_Id", sample.get("_id"))
|
||||||
elif isinstance(sample, dict):
|
elif isinstance(sample, dict):
|
||||||
sample_id = sample.get("_Id", sample.get("_id"))
|
sample_id = sample.get("_Id", sample.get("_id"))
|
||||||
|
|
||||||
message = {
|
message = {
|
||||||
"valid": bool(valid),
|
"valid": bool(valid),
|
||||||
"_id": sample_id,
|
"_id": sample_id,
|
||||||
result_key: np.asarray(result).tolist() if isinstance(result, np.ndarray) else result,
|
result_key: np.asarray(result).tolist() if isinstance(result, np.ndarray) else result,
|
||||||
}
|
}
|
||||||
return message
|
return message
|
||||||
|
|
||||||
def sendMessage(config_file_path, message):
|
def convert_int64(obj):
|
||||||
with Path(config_file_path).open("r", encoding="utf-8") as f:
|
if isinstance(obj, np.int64):
|
||||||
cfg = yaml.safe_load(f)
|
return int(obj)
|
||||||
|
# If the object is a dictionary or list, recursively convert its values
|
||||||
mqtt_cfg = cfg.get("mqtt", {})
|
elif isinstance(obj, dict):
|
||||||
topic = mqtt_cfg.get("topic", "ml/predictions")
|
return {key: convert_int64(value) for key, value in obj.items()}
|
||||||
|
elif isinstance(obj, list):
|
||||||
payload = json.dumps(message, ensure_ascii=False)
|
return [convert_int64(item) for item in obj]
|
||||||
print(payload)
|
return obj
|
||||||
|
|
||||||
# Later: publish via MQTT using config parameters above.
|
def sendMessage(config_file_path, message):
|
||||||
# Example (kept commented intentionally):
|
# Load the configuration
|
||||||
# import paho.mqtt.client as mqtt
|
with Path(config_file_path).open("r", encoding="utf-8") as f:
|
||||||
# client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01"))
|
cfg = yaml.safe_load(f)
|
||||||
# if "username" in mqtt_cfg and mqtt_cfg.get("username"):
|
|
||||||
# client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password"))
|
# Get MQTT configuration
|
||||||
# client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60)
|
mqtt_cfg = cfg.get("mqtt", {})
|
||||||
# client.publish(
|
topic = mqtt_cfg.get("topic", "ml/predictions")
|
||||||
# topic=topic,
|
|
||||||
# payload=payload,
|
# Convert message to ensure no np.int64 values remain
|
||||||
# qos=int(mqtt_cfg.get("qos", 1)),
|
message = convert_int64(message)
|
||||||
# retain=bool(mqtt_cfg.get("retain", False)),
|
|
||||||
# )
|
# Serialize the message to JSON
|
||||||
# client.disconnect()
|
payload = json.dumps(message, ensure_ascii=False)
|
||||||
return
|
print(payload)
|
||||||
|
|
||||||
def replace_nan(sample, config_file_path: Path):
|
# Later: publish via MQTT using config parameters above.
|
||||||
with config_file_path.open("r", encoding="utf-8") as f:
|
# Example (kept commented intentionally):
|
||||||
cfg = yaml.safe_load(f)
|
# import paho.mqtt.client as mqtt
|
||||||
|
# client = mqtt.Client(client_id=mqtt_cfg.get("client_id", "predictor-01"))
|
||||||
fallback_list = cfg.get("fallback", [])
|
# if "username" in mqtt_cfg and mqtt_cfg.get("username"):
|
||||||
fallback_map = {}
|
# client.username_pw_set(mqtt_cfg["username"], mqtt_cfg.get("password"))
|
||||||
for item in fallback_list:
|
# client.connect(mqtt_cfg.get("host", "localhost"), int(mqtt_cfg.get("port", 1883)), 60)
|
||||||
if isinstance(item, dict):
|
# client.publish(
|
||||||
fallback_map.update(item)
|
# topic=topic,
|
||||||
|
# payload=payload,
|
||||||
if sample.empty:
|
# qos=int(mqtt_cfg.get("qos", 1)),
|
||||||
return False, sample
|
# retain=bool(mqtt_cfg.get("retain", False)),
|
||||||
|
# )
|
||||||
nan_ratio = sample.isna().mean()
|
# client.disconnect()
|
||||||
valid = nan_ratio <= 0.5
|
return
|
||||||
|
|
||||||
if valid and fallback_map:
|
def replace_nan(sample, config_file_path: Path):
|
||||||
sample = sample.fillna(value=fallback_map)
|
with config_file_path.open("r", encoding="utf-8") as f:
|
||||||
|
cfg = yaml.safe_load(f)
|
||||||
|
|
||||||
return valid, sample
|
fallback_list = cfg.get("fallback", [])
|
||||||
|
fallback_map = {}
|
||||||
def sample_to_numpy(sample, drop_cols=("_Id", "start_time")):
|
for item in fallback_list:
|
||||||
if isinstance(sample, pd.Series):
|
if isinstance(item, dict):
|
||||||
sample = sample.drop(labels=list(drop_cols), errors="ignore")
|
fallback_map.update(item)
|
||||||
return sample.to_numpy()
|
|
||||||
|
if sample.empty:
|
||||||
if isinstance(sample, pd.DataFrame):
|
return False, sample
|
||||||
sample = sample.drop(columns=list(drop_cols), errors="ignore")
|
|
||||||
return sample.to_numpy()
|
nan_ratio = sample.isna().mean()
|
||||||
|
valid = nan_ratio <= 0.5
|
||||||
return np.asarray(sample)
|
|
||||||
|
if valid and fallback_map:
|
||||||
def scale_sample(sample, use_scaling=False, scaler_path=None):
|
sample = sample.fillna(value=fallback_map)
|
||||||
if not use_scaling or scaler_path is None:
|
|
||||||
return sample
|
|
||||||
scaler_path = Path(scaler_path)
|
return valid, sample
|
||||||
if not scaler_path.is_absolute():
|
|
||||||
scaler_path = Path.cwd() / scaler_path
|
def sample_to_numpy(sample, drop_cols=("_Id", "start_time")):
|
||||||
scaler_path = scaler_path.resolve()
|
if isinstance(sample, pd.Series):
|
||||||
normalizer = _load_serialized(scaler_path)
|
sample = sample.drop(labels=list(drop_cols), errors="ignore")
|
||||||
|
return sample.to_numpy()
|
||||||
# normalizer format from model_training/tools/scaler.py:
|
|
||||||
# {"scalers": {...}, "method": "...", "scope": "..."}
|
if isinstance(sample, pd.DataFrame):
|
||||||
scalers = normalizer.get("scalers", {}) if isinstance(normalizer, dict) else {}
|
sample = sample.drop(columns=list(drop_cols), errors="ignore")
|
||||||
scope = normalizer.get("scope", "global") if isinstance(normalizer, dict) else "global"
|
return sample.to_numpy()
|
||||||
if scope == "global":
|
|
||||||
scaler = scalers.get("global")
|
return np.asarray(sample)
|
||||||
else:
|
|
||||||
scaler = scalers.get("global", next(iter(scalers.values()), None))
|
def scale_sample(sample, use_scaling=False, scaler_path=None):
|
||||||
|
if not use_scaling or scaler_path is None:
|
||||||
# Optional fallback if the stored object is already a raw scaler.
|
return sample
|
||||||
if scaler is None and hasattr(normalizer, "transform"):
|
scaler_path = Path(scaler_path)
|
||||||
scaler = normalizer
|
if not scaler_path.is_absolute():
|
||||||
if scaler is None or not hasattr(scaler, "transform"):
|
scaler_path = Path.cwd() / scaler_path
|
||||||
return sample
|
scaler_path = scaler_path.resolve()
|
||||||
|
normalizer = _load_serialized(scaler_path)
|
||||||
df = sample.to_frame().T if isinstance(sample, pd.Series) else sample.copy()
|
|
||||||
feature_names = getattr(scaler, "feature_names_in_", None)
|
# normalizer format from model_training/tools/scaler.py:
|
||||||
if feature_names is None:
|
# {"scalers": {...}, "method": "...", "scope": "..."}
|
||||||
return sample
|
scalers = normalizer.get("scalers", {}) if isinstance(normalizer, dict) else {}
|
||||||
|
scope = normalizer.get("scope", "global") if isinstance(normalizer, dict) else "global"
|
||||||
# Keep columns not in the normalizer unchanged.
|
if scope == "global":
|
||||||
cols_to_scale = [c for c in df.columns if c in set(feature_names)]
|
scaler = scalers.get("global")
|
||||||
if cols_to_scale:
|
else:
|
||||||
df.loc[:, cols_to_scale] = scaler.transform(df.loc[:, cols_to_scale])
|
scaler = scalers.get("global", next(iter(scalers.values()), None))
|
||||||
|
|
||||||
return df.iloc[0] if isinstance(sample, pd.Series) else df
|
# Optional fallback if the stored object is already a raw scaler.
|
||||||
|
if scaler is None and hasattr(normalizer, "transform"):
|
||||||
def main():
|
scaler = normalizer
|
||||||
config_file_path = Path("predict_pipeline/config.yaml")
|
if scaler is None or not hasattr(scaler, "transform"):
|
||||||
with config_file_path.open("r", encoding="utf-8") as f:
|
return sample
|
||||||
cfg = yaml.safe_load(f)
|
|
||||||
|
df = sample.to_frame().T if isinstance(sample, pd.Series) else sample.copy()
|
||||||
database_path = cfg["database"]["path"]
|
feature_names = getattr(scaler, "feature_names_in_", None)
|
||||||
table_name = cfg["database"]["table"]
|
if feature_names is None:
|
||||||
row_key = cfg["database"]["key"]
|
return sample
|
||||||
|
|
||||||
|
# Keep columns not in the normalizer unchanged.
|
||||||
sample = getLastEntryFromSQLite(database_path, table_name, row_key)
|
cols_to_scale = [c for c in df.columns if c in set(feature_names)]
|
||||||
valid, sample = replace_nan(sample, config_file_path=config_file_path)
|
if cols_to_scale:
|
||||||
|
df.loc[:, cols_to_scale] = scaler.transform(df.loc[:, cols_to_scale])
|
||||||
if not valid:
|
|
||||||
print("Sample invalid: more than 50% NaN.")
|
return df.iloc[0] if isinstance(sample, pd.Series) else df
|
||||||
message = buildMessage(valid, None, config_file_path, sample=sample)
|
|
||||||
sendMessage(config_file_path, message)
|
def main():
|
||||||
return
|
pd.set_option('future.no_silent_downcasting', True) # kann ggf raus
|
||||||
|
|
||||||
model_path = cfg["model"]["path"]
|
config_file_path = Path("/home/edgekit/MSY_FS/fahrsimulator_msy2526_ai/predict_pipeline/config.yaml")
|
||||||
scaler_path = cfg["scaler"]["path"]
|
with config_file_path.open("r", encoding="utf-8") as f:
|
||||||
use_scaling = cfg["scaler"]["use_scaling"]
|
cfg = yaml.safe_load(f)
|
||||||
|
|
||||||
sample = scale_sample(sample, use_scaling=use_scaling, scaler_path=scaler_path)
|
database_path = cfg["database"]["path"]
|
||||||
sample_np = sample_to_numpy(sample)
|
table_name = cfg["database"]["table"]
|
||||||
|
row_key = cfg["database"]["key"]
|
||||||
prediction = callModel(model_path=model_path, sample=sample_np)
|
|
||||||
|
|
||||||
message = buildMessage(valid, prediction, config_file_path, sample=sample)
|
sample = getLastEntryFromSQLite(database_path, table_name, row_key)
|
||||||
sendMessage(config_file_path, message)
|
valid, sample = replace_nan(sample, config_file_path=config_file_path)
|
||||||
|
|
||||||
|
if not valid:
|
||||||
if __name__ == "__main__":
|
print("Sample invalid: more than 50% NaN.")
|
||||||
main()
|
message = buildMessage(valid, None, config_file_path, sample=sample)
|
||||||
|
sendMessage(config_file_path, message)
|
||||||
|
return
|
||||||
|
|
||||||
|
model_path = cfg["model"]["path"]
|
||||||
|
scaler_path = cfg["scaler"]["path"]
|
||||||
|
use_scaling = cfg["scaler"]["use_scaling"]
|
||||||
|
|
||||||
|
sample = scale_sample(sample, use_scaling=use_scaling, scaler_path=scaler_path)
|
||||||
|
sample_np = sample_to_numpy(sample)
|
||||||
|
|
||||||
|
prediction = callModel(model_path=model_path, sample=sample_np)
|
||||||
|
|
||||||
|
message = buildMessage(valid, prediction, config_file_path, sample=sample)
|
||||||
|
sendMessage(config_file_path, message)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user