Process Healthcheck added

This commit is contained in:
Niklas Aumueller 2026-03-06 18:39:59 +01:00
parent c7e9bce1d3
commit 1c1709f018
5 changed files with 387 additions and 2 deletions

View File

@ -7,6 +7,7 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Objects; import java.util.Objects;
import java.util.Properties; import java.util.Properties;
import java.util.function.BooleanSupplier;
/** /**
* Starts and stops optional external helper processes used by the application. * Starts and stops optional external helper processes used by the application.
@ -223,10 +224,17 @@ public class ProcessManagerService {
private static final String DEFAULT_SIGNALLING_PID_FILE = private static final String DEFAULT_SIGNALLING_PID_FILE =
"C:\\Users\\Student\\Documents\\Dannick\\avatar\\signalling.pid"; "C:\\Users\\Student\\Documents\\Dannick\\avatar\\signalling.pid";
private static final boolean DEFAULT_STARTUP_HEALTHCHECK_ENABLED = true;
private static final long DEFAULT_STARTUP_HEALTHCHECK_TIMEOUT_MILLIS = 10_000L;
private static final long DEFAULT_STARTUP_HEALTHCHECK_POLL_MILLIS = 250L;
private final Properties config; private final Properties config;
private final ProcessLauncher processLauncher; private final ProcessLauncher processLauncher;
private final String unrealPidFile; private final String unrealPidFile;
private final String signallingPidFile; private final String signallingPidFile;
private final boolean startupHealthcheckEnabled;
private final long startupHealthcheckTimeoutMillis;
private final long startupHealthcheckPollMillis;
private Process pythonProcess; private Process pythonProcess;
private Process unrealProcess; private Process unrealProcess;
@ -273,6 +281,20 @@ public class ProcessManagerService {
this.processLauncher = Objects.requireNonNull(processLauncher); this.processLauncher = Objects.requireNonNull(processLauncher);
this.unrealPidFile = Objects.requireNonNull(unrealPidFile); this.unrealPidFile = Objects.requireNonNull(unrealPidFile);
this.signallingPidFile = Objects.requireNonNull(signallingPidFile); this.signallingPidFile = Objects.requireNonNull(signallingPidFile);
this.startupHealthcheckEnabled = Boolean.parseBoolean(
config.getProperty(
"process.startup.healthcheck.enabled",
String.valueOf(DEFAULT_STARTUP_HEALTHCHECK_ENABLED)
)
);
this.startupHealthcheckTimeoutMillis = readPositiveLong(
config.getProperty("process.startup.healthcheck.timeout.millis"),
DEFAULT_STARTUP_HEALTHCHECK_TIMEOUT_MILLIS
);
this.startupHealthcheckPollMillis = readPositiveLong(
config.getProperty("process.startup.healthcheck.poll.millis"),
DEFAULT_STARTUP_HEALTHCHECK_POLL_MILLIS
);
} }
/** /**
@ -312,10 +334,14 @@ public class ProcessManagerService {
configureMqttSimulatorEnvironment(pb); configureMqttSimulatorEnvironment(pb);
pythonProcess = processLauncher.launch(pb); pythonProcess = processLauncher.launch(pb);
verifyPythonStartupHealth();
Logger.info("PROCESS", "Mqtt Simulator gestartet"); Logger.info("PROCESS", "Mqtt Simulator gestartet");
return ProcessStartStatus.started("mqtt_sim"); return ProcessStartStatus.started("mqtt_sim");
} catch (Exception e) { } catch (Exception e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
Logger.error("PROCESS", "Mqtt Simulator Start fehlgeschlagen", e); Logger.error("PROCESS", "Mqtt Simulator Start fehlgeschlagen", e);
return ProcessStartStatus.failed( return ProcessStartStatus.failed(
"mqtt_sim", "mqtt_sim",
@ -339,9 +365,13 @@ public class ProcessManagerService {
try { try {
startUnrealEngine(); startUnrealEngine();
verifyUnrealStartupHealth();
return ProcessStartStatus.started("unreal"); return ProcessStartStatus.started("unreal");
} catch (Exception e) { } catch (Exception e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
Logger.error("PROCESS", "Unreal Start fehlgeschlagen", e); Logger.error("PROCESS", "Unreal Start fehlgeschlagen", e);
return ProcessStartStatus.failed( return ProcessStartStatus.failed(
"unreal", "unreal",
@ -367,17 +397,54 @@ public class ProcessManagerService {
); );
configureUnrealScriptEnvironment(pb); configureUnrealScriptEnvironment(pb);
pb.redirectErrorStream(true);
unrealProcess = processLauncher.launch(pb); unrealProcess = processLauncher.launch(pb);
//pb.directory(new File(exe).getParentFile()); //pb.directory(new File(exe).getParentFile());
pb.redirectErrorStream(true);
Logger.info("PROCESS", Logger.info("PROCESS",
"Unreal Engine gestartet" + pb.command()); "Unreal Engine gestartet" + pb.command());
} }
/**
* Verifies that the launched MQTT simulator process stays alive shortly after startup.
*
* @throws InterruptedException if health-check waiting is interrupted
*/
private void verifyPythonStartupHealth() throws InterruptedException {
if (!startupHealthcheckEnabled) {
return;
}
boolean started = waitUntil(() -> pythonProcess != null && pythonProcess.isAlive());
if (!started) {
throw new IllegalStateException(
"MQTT simulator startup health check failed: process is not running."
);
}
}
/**
* Verifies Unreal startup by waiting for both PID files and checking whether those PIDs are alive.
*
* @throws InterruptedException if health-check waiting is interrupted
*/
private void verifyUnrealStartupHealth() throws InterruptedException {
if (!startupHealthcheckEnabled) {
return;
}
boolean healthy = waitUntil(() ->
isPidAlive(readPidFromFile(unrealPidFile))
&& isPidAlive(readPidFromFile(signallingPidFile))
);
if (!healthy) {
throw new IllegalStateException(buildUnrealHealthCheckFailureMessage());
}
}
/** /**
* Stops all managed processes and attempts cleanup based on known PID files. * Stops all managed processes and attempts cleanup based on known PID files.
*/ */
@ -473,6 +540,119 @@ public class ProcessManagerService {
return unrealProcess; return unrealProcess;
} }
/**
* Waits until a startup condition is satisfied or health-check timeout is reached.
*
* @param condition startup condition to evaluate
* @return {@code true} when condition becomes true before timeout
* @throws InterruptedException if waiting is interrupted
*/
private boolean waitUntil(BooleanSupplier condition) throws InterruptedException {
long deadline = System.currentTimeMillis() + startupHealthcheckTimeoutMillis;
while (System.currentTimeMillis() <= deadline) {
if (condition.getAsBoolean()) {
return true;
}
sleepUntilNextPoll(deadline);
}
return condition.getAsBoolean();
}
/**
* Sleeps until next health-check poll or until timeout deadline, whichever is sooner.
*
* @param deadlineMillis absolute timeout deadline in milliseconds
* @throws InterruptedException if sleep is interrupted
*/
private void sleepUntilNextPoll(long deadlineMillis) throws InterruptedException {
long remaining = deadlineMillis - System.currentTimeMillis();
if (remaining <= 0) {
return;
}
long sleepMillis = Math.min(startupHealthcheckPollMillis, remaining);
if (sleepMillis > 0) {
Thread.sleep(sleepMillis);
}
}
/**
* Reads a PID from a file path when present and parseable.
*
* @param pidFile absolute PID file path
* @return parsed PID value or {@code null} when unavailable
*/
private static Long readPidFromFile(String pidFile) {
try {
if (pidFile == null || pidFile.isBlank()) {
return null;
}
Path path = Path.of(pidFile);
if (!Files.isRegularFile(path)) {
return null;
}
return Long.parseLong(Files.readString(path).trim());
} catch (Exception ignored) {
return null;
}
}
/**
* Returns whether a PID maps to a currently alive process.
*
* @param pid process identifier
* @return {@code true} when the process exists and is alive
*/
private static boolean isPidAlive(Long pid) {
if (pid == null || pid <= 0) {
return false;
}
return ProcessHandle.of(pid)
.map(ProcessHandle::isAlive)
.orElse(false);
}
/**
* Builds a detailed health-check failure message for Unreal startup diagnostics.
*
* @return failure message with PID file and liveness details
*/
private String buildUnrealHealthCheckFailureMessage() {
Long unrealPid = readPidFromFile(unrealPidFile);
Long signallingPid = readPidFromFile(signallingPidFile);
return "Unreal startup health check failed: "
+ describePidState("unreal", unrealPidFile, unrealPid)
+ "; "
+ describePidState("signalling", signallingPidFile, signallingPid);
}
/**
* Describes PID-file and liveness state for one process component.
*
* @param componentName component identifier
* @param pidFile configured PID file path
* @param pid parsed PID value
* @return human-readable state summary
*/
private static String describePidState(
String componentName,
String pidFile,
Long pid
) {
if (pid == null) {
return componentName + " PID file missing or invalid (" + pidFile + ")";
}
if (!isPidAlive(pid)) {
return componentName + " PID " + pid + " is not running";
}
return componentName + " PID " + pid + " is running";
}
/** /**
* Normalizes string values loaded from properties and strips optional quotes. * Normalizes string values loaded from properties and strips optional quotes.
* *
@ -511,6 +691,27 @@ public class ProcessManagerService {
return type + ": " + message; return type + ": " + message;
} }
/**
* Parses an optional positive long value and falls back when absent or invalid.
*
* @param rawValue raw config value
* @param fallback fallback value
* @return parsed positive value or fallback
*/
private static long readPositiveLong(String rawValue, long fallback) {
String value = cleanConfigValue(rawValue);
if (value == null || value.isBlank()) {
return fallback;
}
try {
long parsed = Long.parseLong(value);
return parsed > 0 ? parsed : fallback;
} catch (NumberFormatException e) {
return fallback;
}
}
/** /**
* Passes optional simulator settings from app config to the Python process environment. * Passes optional simulator settings from app config to the Python process environment.
* *

View File

@ -36,6 +36,7 @@ public final class AppConfigValidator {
validateRequiredMqttSettings(config, errors); validateRequiredMqttSettings(config, errors);
validateCoreUiAndOutputSettings(config, errors); validateCoreUiAndOutputSettings(config, errors);
validateProcessStartupSettings(config, errors);
boolean mqttSimulatorEnabled = boolean mqttSimulatorEnabled =
readBoolean(config, errors, "mqtt_sim.enabled", false); readBoolean(config, errors, "mqtt_sim.enabled", false);
@ -55,6 +56,33 @@ public final class AppConfigValidator {
} }
} }
/**
* Validates optional process startup health-check settings.
*
* @param config loaded properties
* @param errors collected validation errors
*/
private static void validateProcessStartupSettings(
Properties config,
List<String> errors
) {
readBoolean(config, errors, "process.startup.healthcheck.enabled", true);
validateOptionalIntegerInRange(
config,
errors,
"process.startup.healthcheck.timeout.millis",
1,
Integer.MAX_VALUE
);
validateOptionalIntegerInRange(
config,
errors,
"process.startup.healthcheck.poll.millis",
1,
Integer.MAX_VALUE
);
}
/** /**
* Validates mandatory MQTT runtime keys. * Validates mandatory MQTT runtime keys.
* *

View File

@ -29,6 +29,11 @@ mqtt_sim.interval_seconds=5
# ===== ANIMATION ===== # ===== ANIMATION =====
animation.output.path=data/animation.json animation.output.path=data/animation.json
# ===== PROCESS STARTUP HEALTHCHECK =====
process.startup.healthcheck.enabled=true
process.startup.healthcheck.timeout.millis=10000
process.startup.healthcheck.poll.millis=250
# ===== UNREAL ENGINE ===== # ===== UNREAL ENGINE =====
unreal.enabled=true unreal.enabled=true
# unreal.executable=C:\\Users\\Student\\Documents\\Dannick\\avatar\\start_avatar.ps1 # unreal.executable=C:\\Users\\Student\\Documents\\Dannick\\avatar\\start_avatar.ps1

View File

@ -44,6 +44,7 @@ class ProcessManagerServiceTest {
void setUp() throws Exception { void setUp() throws Exception {
config = new Properties(); config = new Properties();
launchedCommands = new ArrayList<>(); launchedCommands = new ArrayList<>();
config.setProperty("process.startup.healthcheck.enabled", "false");
} }
/** /**
@ -84,6 +85,15 @@ class ProcessManagerServiceTest {
when(process.pid()).thenReturn(123L); when(process.pid()).thenReturn(123L);
} }
/**
* Enables startup health checks with short polling and timeout for unit tests.
*/
private void enableFastStartupHealthcheck() {
config.setProperty("process.startup.healthcheck.enabled", "true");
config.setProperty("process.startup.healthcheck.timeout.millis", "20");
config.setProperty("process.startup.healthcheck.poll.millis", "1");
}
/** /**
* Verifies that no processes are launched when all features are disabled. * Verifies that no processes are launched when all features are disabled.
*/ */
@ -152,6 +162,109 @@ class ProcessManagerServiceTest {
assertFalse(report.getMqttSimulator().isEnabled()); assertFalse(report.getMqttSimulator().isEnabled());
} }
/**
* Verifies Unreal startup enables merged error stream before process launch.
*/
@Test
void startProcesses_unrealEnabled_redirectsErrorBeforeLaunch() {
config.setProperty("mqtt_sim.enabled", "false");
config.setProperty("unreal.enabled", "true");
config.setProperty("unreal.executable", "avatar.ps1");
final boolean[] redirectEnabledAtLaunch = new boolean[1];
ProcessManagerService.ProcessLauncher launcher = processBuilder -> {
redirectEnabledAtLaunch[0] = processBuilder.redirectErrorStream();
return process;
};
ProcessManagerService service = new ProcessManagerService(
config,
launcher,
tempDir.resolve("unreal.pid").toString(),
tempDir.resolve("signalling.pid").toString()
);
ProcessManagerService.StartupReport report = service.startProcesses();
assertFalse(report.hasFailures());
assertTrue(redirectEnabledAtLaunch[0]);
}
/**
* Verifies MQTT startup health check reports failure when process is not alive.
*/
@Test
void startProcesses_mqttHealthcheckFails_reportsFailure() {
enableFastStartupHealthcheck();
config.setProperty("mqtt_sim.enabled", "true");
config.setProperty("mqtt_sim.script", "sim.py");
config.setProperty("python.path", "python");
config.setProperty("unreal.enabled", "false");
when(process.isAlive()).thenReturn(false);
ProcessManagerService service = createService();
ProcessManagerService.StartupReport report = service.startProcesses();
assertTrue(report.hasFailures());
assertTrue(report.getMqttSimulator().hasFailed());
assertTrue(report.getMqttSimulator().getErrorMessage().contains("health check"));
}
/**
* Verifies Unreal startup health check fails when PID files are missing.
*/
@Test
void startProcesses_unrealHealthcheckMissingPidFiles_reportsFailure() {
enableFastStartupHealthcheck();
config.setProperty("mqtt_sim.enabled", "false");
config.setProperty("unreal.enabled", "true");
config.setProperty("unreal.executable", "avatar.ps1");
ProcessManagerService service = createService();
ProcessManagerService.StartupReport report = service.startProcesses();
assertTrue(report.hasFailures());
assertTrue(report.getUnreal().hasFailed());
assertTrue(report.getUnreal().getErrorMessage().contains("health check"));
}
/**
* Verifies Unreal startup health check succeeds when PID files contain alive process IDs.
*
* @throws Exception if PID files cannot be created
*/
@Test
void startProcesses_unrealHealthcheckWithAlivePidFiles_startsSuccessfully() throws Exception {
enableFastStartupHealthcheck();
config.setProperty("mqtt_sim.enabled", "false");
config.setProperty("unreal.enabled", "true");
config.setProperty("unreal.executable", "avatar.ps1");
Path unrealPid = tempDir.resolve("unreal.pid");
Path signallingPid = tempDir.resolve("signalling.pid");
long currentPid = ProcessHandle.current().pid();
ProcessManagerService.ProcessLauncher launcher = processBuilder -> {
launchedCommands.add(new ArrayList<>(processBuilder.command()));
Files.writeString(unrealPid, String.valueOf(currentPid));
Files.writeString(signallingPid, String.valueOf(currentPid));
return process;
};
ProcessManagerService service = new ProcessManagerService(
config,
launcher,
unrealPid.toString(),
signallingPid.toString()
);
ProcessManagerService.StartupReport report = service.startProcesses();
assertFalse(report.hasFailures());
assertTrue(report.getUnreal().isStarted());
}
/** /**
* Verifies shutdown issues taskkill commands for tracked process handles. * Verifies shutdown issues taskkill commands for tracked process handles.
*/ */

View File

@ -46,6 +46,44 @@ class AppConfigValidatorTest {
assertTrue(exception.getMessage().contains("mqtt_sim.enabled")); assertTrue(exception.getMessage().contains("mqtt_sim.enabled"));
} }
/**
* Verifies strict boolean validation for startup health-check toggles.
*/
@Test
void validate_withInvalidStartupHealthcheckBoolean_throws() {
Properties config = baseConfig();
config.setProperty("process.startup.healthcheck.enabled", "enabled");
IllegalStateException exception =
assertThrows(
IllegalStateException.class,
() -> AppConfigValidator.validate(config)
);
assertTrue(
exception.getMessage().contains("process.startup.healthcheck.enabled")
);
}
/**
* Verifies startup health-check timeout must be a positive integer.
*/
@Test
void validate_withNonPositiveStartupHealthcheckTimeout_throws() {
Properties config = baseConfig();
config.setProperty("process.startup.healthcheck.timeout.millis", "0");
IllegalStateException exception =
assertThrows(
IllegalStateException.class,
() -> AppConfigValidator.validate(config)
);
assertTrue(
exception.getMessage().contains("process.startup.healthcheck.timeout.millis")
);
}
/** /**
* Verifies simulator-specific numeric validation when simulator is enabled. * Verifies simulator-specific numeric validation when simulator is enabled.
* *