java/com.sap.sailing.landscape/src/com/sap/sailing/landscape/impl/ArchiveCandidateMonitoringBackgroundTask.java
... ...
@@ -101,6 +101,7 @@ public class ArchiveCandidateMonitoringBackgroundTask implements Runnable {
101 101
private final static Duration LONG_TIMEOUT = Duration.ONE_DAY.times(3);
102 102
private final static double MAXIMUM_ONE_MINUTE_SYSTEM_LOAD_AVERAGE = 2.0;
103 103
private final static int MAXIMUM_THREAD_POOL_QUEUE_SIZE = 10;
104
+ private final static Optional<Duration> TIMEOUT_FIRST_CONTACT = Optional.of(Landscape.WAIT_FOR_PROCESS_TIMEOUT.get().plus(Landscape.WAIT_FOR_HOST_TIMEOUT.get()));
104 105
private final static Duration SERVER_COMPARISON_TIMEOUT = Duration.ONE_MINUTE.times(10); // good for two or three attempts, usually
105 106
private final static Duration DELAY_BETWEEN_COMPARISON_CHECKS = Duration.ONE_MINUTE;
106 107
... ...
@@ -183,7 +184,9 @@ public class ArchiveCandidateMonitoringBackgroundTask implements Runnable {
183 184
logger.severe("Check "+currentCheck+" failed and has timed out; giving up on candidate "+replicaSet.getMaster().getHost().getHostname());
184 185
notifyProcessOwnerCandidateFailedToBecomeReadyForProduction(); // this ends the re-scheduling loop
185 186
} else {
186
- logger.info("Check "+currentCheck+" failed but has not yet timed out; re-scheduling to check again after "+currentCheck.getDelayAfterFailure());
187
+ logger.info("Check " + currentCheck + " failed with message \"" + currentCheck.getLastFailureMessage()
188
+ + "\" but has not yet timed out; re-scheduling to check again after "
189
+ + currentCheck.getDelayAfterFailure());
187 190
executor.schedule(this, currentCheck.getDelayAfterFailure().asMillis(), TimeUnit.MILLISECONDS);
188 191
}
189 192
}
... ...
@@ -192,14 +195,14 @@ public class ArchiveCandidateMonitoringBackgroundTask implements Runnable {
192 195
private static final long serialVersionUID = -4265303532881568290L;
193 196
194 197
private IsReady() {
195
- super("is ready", LONG_TIMEOUT, DELAY_BETWEEN_CHECKS);
198
+ super("is ready", TIMEOUT_FIRST_CONTACT.get(), DELAY_BETWEEN_CHECKS);
196 199
}
197 200
198 201
@Override
199 202
public boolean runCheck() throws Exception {
200 203
final boolean result = replicaSet.getMaster().isReady(Landscape.WAIT_FOR_PROCESS_TIMEOUT);
201 204
if (!result) {
202
- setLastFailureMessage("Candidate is not ready yet");
205
+ setLastFailureMessage("Candidate at "+replicaSet.getMaster().getHost().getPrivateAddress()+" not ready yet");
203 206
}
204 207
return result;
205 208
}
... ...
@@ -217,8 +220,9 @@ public class ArchiveCandidateMonitoringBackgroundTask implements Runnable {
217 220
final double lastMinuteSystemLoadAverage = replicaSet.getMaster().getLastMinuteSystemLoadAverage(Landscape.WAIT_FOR_PROCESS_TIMEOUT);
218 221
final boolean result = lastMinuteSystemLoadAverage < MAXIMUM_ONE_MINUTE_SYSTEM_LOAD_AVERAGE;
219 222
if (!result) {
220
- setLastFailureMessage("Candidate has too high system load average of "+lastMinuteSystemLoadAverage+
221
- " which is still above the maximum of "+MAXIMUM_ONE_MINUTE_SYSTEM_LOAD_AVERAGE);
223
+ setLastFailureMessage("Candidate at " + replicaSet.getMaster().getHost().getPrivateAddress()
224
+ + " has too high system load average of " + lastMinuteSystemLoadAverage
225
+ + " which is still above the maximum of " + MAXIMUM_ONE_MINUTE_SYSTEM_LOAD_AVERAGE);
222 226
}
223 227
return result;
224 228
}
... ...
@@ -236,7 +240,8 @@ public class ArchiveCandidateMonitoringBackgroundTask implements Runnable {
236 240
final int defaultBackgroundThreadPoolExecutorQueueSize = replicaSet.getMaster().getDefaultBackgroundThreadPoolExecutorQueueSize(Landscape.WAIT_FOR_PROCESS_TIMEOUT);
237 241
final boolean result = defaultBackgroundThreadPoolExecutorQueueSize < MAXIMUM_THREAD_POOL_QUEUE_SIZE;
238 242
if (!result) {
239
- setLastFailureMessage("Candidate has too many tasks in default background thread pool executor queue: "+defaultBackgroundThreadPoolExecutorQueueSize+
243
+ setLastFailureMessage("Candidate at " + replicaSet.getMaster().getHost().getPrivateAddress()
244
+ + " has too many tasks in default background thread pool executor queue: "+defaultBackgroundThreadPoolExecutorQueueSize+
240 245
" which is still above the maximum of "+MAXIMUM_THREAD_POOL_QUEUE_SIZE);
241 246
}
242 247
return result;
... ...
@@ -255,7 +260,8 @@ public class ArchiveCandidateMonitoringBackgroundTask implements Runnable {
255 260
final int defaultForegroundThreadPoolExecutorQueueSize = replicaSet.getMaster().getDefaultForegroundThreadPoolExecutorQueueSize(Landscape.WAIT_FOR_PROCESS_TIMEOUT);
256 261
final boolean result = defaultForegroundThreadPoolExecutorQueueSize < MAXIMUM_THREAD_POOL_QUEUE_SIZE;
257 262
if (!result) {
258
- setLastFailureMessage("Candidate has too many tasks in default foreground thread pool executor queue: "+defaultForegroundThreadPoolExecutorQueueSize+
263
+ setLastFailureMessage("Candidate at "+replicaSet.getMaster().getHost().getPrivateAddress()
264
+ + " has too many tasks in default foreground thread pool executor queue: "+defaultForegroundThreadPoolExecutorQueueSize+
259 265
" which is still above the maximum of "+MAXIMUM_THREAD_POOL_QUEUE_SIZE);
260 266
}
261 267
return result;