Process Healthcheck added
This commit is contained in:
parent
c7e9bce1d3
commit
1c1709f018
@ -7,6 +7,7 @@ import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Objects;
|
||||
import java.util.Properties;
|
||||
import java.util.function.BooleanSupplier;
|
||||
|
||||
/**
|
||||
* Starts and stops optional external helper processes used by the application.
|
||||
@ -223,10 +224,17 @@ public class ProcessManagerService {
|
||||
private static final String DEFAULT_SIGNALLING_PID_FILE =
|
||||
"C:\\Users\\Student\\Documents\\Dannick\\avatar\\signalling.pid";
|
||||
|
||||
private static final boolean DEFAULT_STARTUP_HEALTHCHECK_ENABLED = true;
|
||||
private static final long DEFAULT_STARTUP_HEALTHCHECK_TIMEOUT_MILLIS = 10_000L;
|
||||
private static final long DEFAULT_STARTUP_HEALTHCHECK_POLL_MILLIS = 250L;
|
||||
|
||||
private final Properties config;
|
||||
private final ProcessLauncher processLauncher;
|
||||
private final String unrealPidFile;
|
||||
private final String signallingPidFile;
|
||||
private final boolean startupHealthcheckEnabled;
|
||||
private final long startupHealthcheckTimeoutMillis;
|
||||
private final long startupHealthcheckPollMillis;
|
||||
|
||||
private Process pythonProcess;
|
||||
private Process unrealProcess;
|
||||
@ -273,6 +281,20 @@ public class ProcessManagerService {
|
||||
this.processLauncher = Objects.requireNonNull(processLauncher);
|
||||
this.unrealPidFile = Objects.requireNonNull(unrealPidFile);
|
||||
this.signallingPidFile = Objects.requireNonNull(signallingPidFile);
|
||||
this.startupHealthcheckEnabled = Boolean.parseBoolean(
|
||||
config.getProperty(
|
||||
"process.startup.healthcheck.enabled",
|
||||
String.valueOf(DEFAULT_STARTUP_HEALTHCHECK_ENABLED)
|
||||
)
|
||||
);
|
||||
this.startupHealthcheckTimeoutMillis = readPositiveLong(
|
||||
config.getProperty("process.startup.healthcheck.timeout.millis"),
|
||||
DEFAULT_STARTUP_HEALTHCHECK_TIMEOUT_MILLIS
|
||||
);
|
||||
this.startupHealthcheckPollMillis = readPositiveLong(
|
||||
config.getProperty("process.startup.healthcheck.poll.millis"),
|
||||
DEFAULT_STARTUP_HEALTHCHECK_POLL_MILLIS
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -312,10 +334,14 @@ public class ProcessManagerService {
|
||||
configureMqttSimulatorEnvironment(pb);
|
||||
|
||||
pythonProcess = processLauncher.launch(pb);
|
||||
verifyPythonStartupHealth();
|
||||
|
||||
Logger.info("PROCESS", "Mqtt Simulator gestartet");
|
||||
return ProcessStartStatus.started("mqtt_sim");
|
||||
} catch (Exception e) {
|
||||
if (e instanceof InterruptedException) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
Logger.error("PROCESS", "Mqtt Simulator Start fehlgeschlagen", e);
|
||||
return ProcessStartStatus.failed(
|
||||
"mqtt_sim",
|
||||
@ -339,9 +365,13 @@ public class ProcessManagerService {
|
||||
|
||||
try {
|
||||
startUnrealEngine();
|
||||
verifyUnrealStartupHealth();
|
||||
return ProcessStartStatus.started("unreal");
|
||||
|
||||
} catch (Exception e) {
|
||||
if (e instanceof InterruptedException) {
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
Logger.error("PROCESS", "Unreal Start fehlgeschlagen", e);
|
||||
return ProcessStartStatus.failed(
|
||||
"unreal",
|
||||
@ -367,17 +397,54 @@ public class ProcessManagerService {
|
||||
);
|
||||
|
||||
configureUnrealScriptEnvironment(pb);
|
||||
pb.redirectErrorStream(true);
|
||||
|
||||
unrealProcess = processLauncher.launch(pb);
|
||||
|
||||
//pb.directory(new File(exe).getParentFile());
|
||||
|
||||
pb.redirectErrorStream(true);
|
||||
|
||||
Logger.info("PROCESS",
|
||||
"Unreal Engine gestartet" + pb.command());
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that the launched MQTT simulator process stays alive shortly after startup.
|
||||
*
|
||||
* @throws InterruptedException if health-check waiting is interrupted
|
||||
*/
|
||||
private void verifyPythonStartupHealth() throws InterruptedException {
|
||||
if (!startupHealthcheckEnabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
boolean started = waitUntil(() -> pythonProcess != null && pythonProcess.isAlive());
|
||||
if (!started) {
|
||||
throw new IllegalStateException(
|
||||
"MQTT simulator startup health check failed: process is not running."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies Unreal startup by waiting for both PID files and checking whether those PIDs are alive.
|
||||
*
|
||||
* @throws InterruptedException if health-check waiting is interrupted
|
||||
*/
|
||||
private void verifyUnrealStartupHealth() throws InterruptedException {
|
||||
if (!startupHealthcheckEnabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
boolean healthy = waitUntil(() ->
|
||||
isPidAlive(readPidFromFile(unrealPidFile))
|
||||
&& isPidAlive(readPidFromFile(signallingPidFile))
|
||||
);
|
||||
|
||||
if (!healthy) {
|
||||
throw new IllegalStateException(buildUnrealHealthCheckFailureMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops all managed processes and attempts cleanup based on known PID files.
|
||||
*/
|
||||
@ -473,6 +540,119 @@ public class ProcessManagerService {
|
||||
return unrealProcess;
|
||||
}
|
||||
|
||||
/**
|
||||
* Waits until a startup condition is satisfied or health-check timeout is reached.
|
||||
*
|
||||
* @param condition startup condition to evaluate
|
||||
* @return {@code true} when condition becomes true before timeout
|
||||
* @throws InterruptedException if waiting is interrupted
|
||||
*/
|
||||
private boolean waitUntil(BooleanSupplier condition) throws InterruptedException {
|
||||
long deadline = System.currentTimeMillis() + startupHealthcheckTimeoutMillis;
|
||||
|
||||
while (System.currentTimeMillis() <= deadline) {
|
||||
if (condition.getAsBoolean()) {
|
||||
return true;
|
||||
}
|
||||
sleepUntilNextPoll(deadline);
|
||||
}
|
||||
|
||||
return condition.getAsBoolean();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sleeps until next health-check poll or until timeout deadline, whichever is sooner.
|
||||
*
|
||||
* @param deadlineMillis absolute timeout deadline in milliseconds
|
||||
* @throws InterruptedException if sleep is interrupted
|
||||
*/
|
||||
private void sleepUntilNextPoll(long deadlineMillis) throws InterruptedException {
|
||||
long remaining = deadlineMillis - System.currentTimeMillis();
|
||||
if (remaining <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
long sleepMillis = Math.min(startupHealthcheckPollMillis, remaining);
|
||||
if (sleepMillis > 0) {
|
||||
Thread.sleep(sleepMillis);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a PID from a file path when present and parseable.
|
||||
*
|
||||
* @param pidFile absolute PID file path
|
||||
* @return parsed PID value or {@code null} when unavailable
|
||||
*/
|
||||
private static Long readPidFromFile(String pidFile) {
|
||||
try {
|
||||
if (pidFile == null || pidFile.isBlank()) {
|
||||
return null;
|
||||
}
|
||||
Path path = Path.of(pidFile);
|
||||
if (!Files.isRegularFile(path)) {
|
||||
return null;
|
||||
}
|
||||
return Long.parseLong(Files.readString(path).trim());
|
||||
} catch (Exception ignored) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether a PID maps to a currently alive process.
|
||||
*
|
||||
* @param pid process identifier
|
||||
* @return {@code true} when the process exists and is alive
|
||||
*/
|
||||
private static boolean isPidAlive(Long pid) {
|
||||
if (pid == null || pid <= 0) {
|
||||
return false;
|
||||
}
|
||||
return ProcessHandle.of(pid)
|
||||
.map(ProcessHandle::isAlive)
|
||||
.orElse(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a detailed health-check failure message for Unreal startup diagnostics.
|
||||
*
|
||||
* @return failure message with PID file and liveness details
|
||||
*/
|
||||
private String buildUnrealHealthCheckFailureMessage() {
|
||||
Long unrealPid = readPidFromFile(unrealPidFile);
|
||||
Long signallingPid = readPidFromFile(signallingPidFile);
|
||||
|
||||
return "Unreal startup health check failed: "
|
||||
+ describePidState("unreal", unrealPidFile, unrealPid)
|
||||
+ "; "
|
||||
+ describePidState("signalling", signallingPidFile, signallingPid);
|
||||
}
|
||||
|
||||
/**
|
||||
* Describes PID-file and liveness state for one process component.
|
||||
*
|
||||
* @param componentName component identifier
|
||||
* @param pidFile configured PID file path
|
||||
* @param pid parsed PID value
|
||||
* @return human-readable state summary
|
||||
*/
|
||||
private static String describePidState(
|
||||
String componentName,
|
||||
String pidFile,
|
||||
Long pid
|
||||
) {
|
||||
if (pid == null) {
|
||||
return componentName + " PID file missing or invalid (" + pidFile + ")";
|
||||
}
|
||||
|
||||
if (!isPidAlive(pid)) {
|
||||
return componentName + " PID " + pid + " is not running";
|
||||
}
|
||||
|
||||
return componentName + " PID " + pid + " is running";
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalizes string values loaded from properties and strips optional quotes.
|
||||
*
|
||||
@ -511,6 +691,27 @@ public class ProcessManagerService {
|
||||
return type + ": " + message;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an optional positive long value and falls back when absent or invalid.
|
||||
*
|
||||
* @param rawValue raw config value
|
||||
* @param fallback fallback value
|
||||
* @return parsed positive value or fallback
|
||||
*/
|
||||
private static long readPositiveLong(String rawValue, long fallback) {
|
||||
String value = cleanConfigValue(rawValue);
|
||||
if (value == null || value.isBlank()) {
|
||||
return fallback;
|
||||
}
|
||||
|
||||
try {
|
||||
long parsed = Long.parseLong(value);
|
||||
return parsed > 0 ? parsed : fallback;
|
||||
} catch (NumberFormatException e) {
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Passes optional simulator settings from app config to the Python process environment.
|
||||
*
|
||||
|
||||
@ -36,6 +36,7 @@ public final class AppConfigValidator {
|
||||
|
||||
validateRequiredMqttSettings(config, errors);
|
||||
validateCoreUiAndOutputSettings(config, errors);
|
||||
validateProcessStartupSettings(config, errors);
|
||||
|
||||
boolean mqttSimulatorEnabled =
|
||||
readBoolean(config, errors, "mqtt_sim.enabled", false);
|
||||
@ -55,6 +56,33 @@ public final class AppConfigValidator {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates optional process startup health-check settings.
|
||||
*
|
||||
* @param config loaded properties
|
||||
* @param errors collected validation errors
|
||||
*/
|
||||
private static void validateProcessStartupSettings(
|
||||
Properties config,
|
||||
List<String> errors
|
||||
) {
|
||||
readBoolean(config, errors, "process.startup.healthcheck.enabled", true);
|
||||
validateOptionalIntegerInRange(
|
||||
config,
|
||||
errors,
|
||||
"process.startup.healthcheck.timeout.millis",
|
||||
1,
|
||||
Integer.MAX_VALUE
|
||||
);
|
||||
validateOptionalIntegerInRange(
|
||||
config,
|
||||
errors,
|
||||
"process.startup.healthcheck.poll.millis",
|
||||
1,
|
||||
Integer.MAX_VALUE
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates mandatory MQTT runtime keys.
|
||||
*
|
||||
|
||||
@ -29,6 +29,11 @@ mqtt_sim.interval_seconds=5
|
||||
# ===== ANIMATION =====
|
||||
animation.output.path=data/animation.json
|
||||
|
||||
# ===== PROCESS STARTUP HEALTHCHECK =====
|
||||
process.startup.healthcheck.enabled=true
|
||||
process.startup.healthcheck.timeout.millis=10000
|
||||
process.startup.healthcheck.poll.millis=250
|
||||
|
||||
# ===== UNREAL ENGINE =====
|
||||
unreal.enabled=true
|
||||
# unreal.executable=C:\\Users\\Student\\Documents\\Dannick\\avatar\\start_avatar.ps1
|
||||
|
||||
@ -44,6 +44,7 @@ class ProcessManagerServiceTest {
|
||||
void setUp() throws Exception {
|
||||
config = new Properties();
|
||||
launchedCommands = new ArrayList<>();
|
||||
config.setProperty("process.startup.healthcheck.enabled", "false");
|
||||
}
|
||||
|
||||
/**
|
||||
@ -84,6 +85,15 @@ class ProcessManagerServiceTest {
|
||||
when(process.pid()).thenReturn(123L);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enables startup health checks with short polling and timeout for unit tests.
|
||||
*/
|
||||
private void enableFastStartupHealthcheck() {
|
||||
config.setProperty("process.startup.healthcheck.enabled", "true");
|
||||
config.setProperty("process.startup.healthcheck.timeout.millis", "20");
|
||||
config.setProperty("process.startup.healthcheck.poll.millis", "1");
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that no processes are launched when all features are disabled.
|
||||
*/
|
||||
@ -152,6 +162,109 @@ class ProcessManagerServiceTest {
|
||||
assertFalse(report.getMqttSimulator().isEnabled());
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies Unreal startup enables merged error stream before process launch.
|
||||
*/
|
||||
@Test
|
||||
void startProcesses_unrealEnabled_redirectsErrorBeforeLaunch() {
|
||||
config.setProperty("mqtt_sim.enabled", "false");
|
||||
config.setProperty("unreal.enabled", "true");
|
||||
config.setProperty("unreal.executable", "avatar.ps1");
|
||||
|
||||
final boolean[] redirectEnabledAtLaunch = new boolean[1];
|
||||
|
||||
ProcessManagerService.ProcessLauncher launcher = processBuilder -> {
|
||||
redirectEnabledAtLaunch[0] = processBuilder.redirectErrorStream();
|
||||
return process;
|
||||
};
|
||||
|
||||
ProcessManagerService service = new ProcessManagerService(
|
||||
config,
|
||||
launcher,
|
||||
tempDir.resolve("unreal.pid").toString(),
|
||||
tempDir.resolve("signalling.pid").toString()
|
||||
);
|
||||
|
||||
ProcessManagerService.StartupReport report = service.startProcesses();
|
||||
|
||||
assertFalse(report.hasFailures());
|
||||
assertTrue(redirectEnabledAtLaunch[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies MQTT startup health check reports failure when process is not alive.
|
||||
*/
|
||||
@Test
|
||||
void startProcesses_mqttHealthcheckFails_reportsFailure() {
|
||||
enableFastStartupHealthcheck();
|
||||
config.setProperty("mqtt_sim.enabled", "true");
|
||||
config.setProperty("mqtt_sim.script", "sim.py");
|
||||
config.setProperty("python.path", "python");
|
||||
config.setProperty("unreal.enabled", "false");
|
||||
when(process.isAlive()).thenReturn(false);
|
||||
|
||||
ProcessManagerService service = createService();
|
||||
ProcessManagerService.StartupReport report = service.startProcesses();
|
||||
|
||||
assertTrue(report.hasFailures());
|
||||
assertTrue(report.getMqttSimulator().hasFailed());
|
||||
assertTrue(report.getMqttSimulator().getErrorMessage().contains("health check"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies Unreal startup health check fails when PID files are missing.
|
||||
*/
|
||||
@Test
|
||||
void startProcesses_unrealHealthcheckMissingPidFiles_reportsFailure() {
|
||||
enableFastStartupHealthcheck();
|
||||
config.setProperty("mqtt_sim.enabled", "false");
|
||||
config.setProperty("unreal.enabled", "true");
|
||||
config.setProperty("unreal.executable", "avatar.ps1");
|
||||
|
||||
ProcessManagerService service = createService();
|
||||
ProcessManagerService.StartupReport report = service.startProcesses();
|
||||
|
||||
assertTrue(report.hasFailures());
|
||||
assertTrue(report.getUnreal().hasFailed());
|
||||
assertTrue(report.getUnreal().getErrorMessage().contains("health check"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies Unreal startup health check succeeds when PID files contain alive process IDs.
|
||||
*
|
||||
* @throws Exception if PID files cannot be created
|
||||
*/
|
||||
@Test
|
||||
void startProcesses_unrealHealthcheckWithAlivePidFiles_startsSuccessfully() throws Exception {
|
||||
enableFastStartupHealthcheck();
|
||||
config.setProperty("mqtt_sim.enabled", "false");
|
||||
config.setProperty("unreal.enabled", "true");
|
||||
config.setProperty("unreal.executable", "avatar.ps1");
|
||||
|
||||
Path unrealPid = tempDir.resolve("unreal.pid");
|
||||
Path signallingPid = tempDir.resolve("signalling.pid");
|
||||
long currentPid = ProcessHandle.current().pid();
|
||||
|
||||
ProcessManagerService.ProcessLauncher launcher = processBuilder -> {
|
||||
launchedCommands.add(new ArrayList<>(processBuilder.command()));
|
||||
Files.writeString(unrealPid, String.valueOf(currentPid));
|
||||
Files.writeString(signallingPid, String.valueOf(currentPid));
|
||||
return process;
|
||||
};
|
||||
|
||||
ProcessManagerService service = new ProcessManagerService(
|
||||
config,
|
||||
launcher,
|
||||
unrealPid.toString(),
|
||||
signallingPid.toString()
|
||||
);
|
||||
|
||||
ProcessManagerService.StartupReport report = service.startProcesses();
|
||||
|
||||
assertFalse(report.hasFailures());
|
||||
assertTrue(report.getUnreal().isStarted());
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies shutdown issues taskkill commands for tracked process handles.
|
||||
*/
|
||||
|
||||
@ -46,6 +46,44 @@ class AppConfigValidatorTest {
|
||||
assertTrue(exception.getMessage().contains("mqtt_sim.enabled"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies strict boolean validation for startup health-check toggles.
|
||||
*/
|
||||
@Test
|
||||
void validate_withInvalidStartupHealthcheckBoolean_throws() {
|
||||
Properties config = baseConfig();
|
||||
config.setProperty("process.startup.healthcheck.enabled", "enabled");
|
||||
|
||||
IllegalStateException exception =
|
||||
assertThrows(
|
||||
IllegalStateException.class,
|
||||
() -> AppConfigValidator.validate(config)
|
||||
);
|
||||
|
||||
assertTrue(
|
||||
exception.getMessage().contains("process.startup.healthcheck.enabled")
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies startup health-check timeout must be a positive integer.
|
||||
*/
|
||||
@Test
|
||||
void validate_withNonPositiveStartupHealthcheckTimeout_throws() {
|
||||
Properties config = baseConfig();
|
||||
config.setProperty("process.startup.healthcheck.timeout.millis", "0");
|
||||
|
||||
IllegalStateException exception =
|
||||
assertThrows(
|
||||
IllegalStateException.class,
|
||||
() -> AppConfigValidator.validate(config)
|
||||
);
|
||||
|
||||
assertTrue(
|
||||
exception.getMessage().contains("process.startup.healthcheck.timeout.millis")
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies simulator-specific numeric validation when simulator is enabled.
|
||||
*
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user