1214bcfe4ec087ac7386f5673fcbc2376f9b6e02
java/com.sap.sailing.landscape/src/com/sap/sailing/landscape/impl/ArchiveCandidateMonitoringBackgroundTask.java
| ... | ... | @@ -1,6 +1,5 @@ |
| 1 | 1 | package com.sap.sailing.landscape.impl; |
| 2 | 2 | |
| 3 | -import java.io.IOException; |
|
| 4 | 3 | import java.util.Arrays; |
| 5 | 4 | import java.util.Iterator; |
| 6 | 5 | import java.util.LinkedList; |
| ... | ... | @@ -8,7 +7,6 @@ import java.util.List; |
| 8 | 7 | import java.util.Optional; |
| 9 | 8 | import java.util.concurrent.ScheduledExecutorService; |
| 10 | 9 | import java.util.concurrent.TimeUnit; |
| 11 | -import java.util.concurrent.TimeoutException; |
|
| 12 | 10 | import java.util.logging.Logger; |
| 13 | 11 | |
| 14 | 12 | import org.apache.shiro.subject.Subject; |
| ... | ... | @@ -16,7 +14,9 @@ import org.apache.shiro.subject.Subject; |
| 16 | 14 | import com.sap.sailing.landscape.SailingAnalyticsMetrics; |
| 17 | 15 | import com.sap.sailing.landscape.SailingAnalyticsProcess; |
| 18 | 16 | import com.sap.sse.common.Duration; |
| 17 | +import com.sap.sse.common.Named; |
|
| 19 | 18 | import com.sap.sse.common.TimePoint; |
| 19 | +import com.sap.sse.common.impl.NamedImpl; |
|
| 20 | 20 | import com.sap.sse.landscape.Landscape; |
| 21 | 21 | import com.sap.sse.landscape.RotatingFileBasedLog; |
| 22 | 22 | import com.sap.sse.landscape.aws.AwsApplicationReplicaSet; |
| ... | ... | @@ -50,14 +50,40 @@ import com.sap.sse.landscape.aws.ReverseProxy; |
| 50 | 50 | * |
| 51 | 51 | */ |
| 52 | 52 | public class ArchiveCandidateMonitoringBackgroundTask implements Runnable { |
| 53 | - @FunctionalInterface |
|
| 54 | - private static interface BooleanSupplierWithException { |
|
| 55 | - boolean getAsBoolean() throws Exception; |
|
| 53 | + private interface Check extends Named { |
|
| 54 | + boolean runCheck() throws Exception; |
|
| 55 | + boolean hasTimedOut(); |
|
| 56 | + Duration getDelayAfterFailure(); |
|
| 57 | + } |
|
| 58 | + |
|
| 59 | + private abstract class AbstractCheck extends NamedImpl implements Check { |
|
| 60 | + private static final long serialVersionUID = -8809199091635882129L; |
|
| 61 | + private final TimePoint creationTime; |
|
| 62 | + private final Duration timeout; |
|
| 63 | + private final Duration delayAfterFailure; |
|
| 64 | + |
|
| 65 | + public AbstractCheck(String name, Duration timeout, Duration delayAfterFailure) { |
|
| 66 | + super(name); |
|
| 67 | + this.creationTime = TimePoint.now(); |
|
| 68 | + this.timeout = timeout; |
|
| 69 | + this.delayAfterFailure = delayAfterFailure; |
|
| 70 | + } |
|
| 71 | + |
|
| 72 | + @Override |
|
| 73 | + public boolean hasTimedOut() { |
|
| 74 | + return creationTime.until(TimePoint.now()).compareTo(timeout) > 0; |
|
| 75 | + } |
|
| 76 | + |
|
| 77 | + @Override |
|
| 78 | + public Duration getDelayAfterFailure() { |
|
| 79 | + return delayAfterFailure; |
|
| 80 | + } |
|
| 56 | 81 | } |
| 57 | 82 | |
| 58 | 83 | private static final Logger logger = Logger.getLogger(ArchiveCandidateMonitoringBackgroundTask.class.getName()); |
| 59 | 84 | |
| 60 | 85 | private final static Duration DELAY_BETWEEN_CHECKS = Duration.ONE_MINUTE.times(5); |
| 86 | + private final static Duration LONG_TIMEOUT = Duration.ONE_DAY.times(3); |
|
| 61 | 87 | private final static double MAXIMUM_ONE_MINUTE_SYSTEM_LOAD_AVERAGE = 2.0; |
| 62 | 88 | private final static int MAXIMUM_THREAD_POOL_QUEUE_SIZE = 10; |
| 63 | 89 | private final static Optional<Duration> TIMEOUT_FIRST_CONTACT = Optional.of(Landscape.WAIT_FOR_PROCESS_TIMEOUT.get().plus(Landscape.WAIT_FOR_HOST_TIMEOUT.get())); |
| ... | ... | @@ -70,10 +96,9 @@ public class ArchiveCandidateMonitoringBackgroundTask implements Runnable { |
| 70 | 96 | private final ScheduledExecutorService executor; |
| 71 | 97 | private final TimePoint firstRun; |
| 72 | 98 | private final List<String> messagesToSendToProcessOwner; |
| 73 | - private Iterable<BooleanSupplierWithException> checks; |
|
| 74 | - private Iterator<BooleanSupplierWithException> checksIterator; |
|
| 75 | - private BooleanSupplierWithException currentCheck; |
|
| 76 | - private boolean candidateSeenServingStatusRequest; |
|
| 99 | + private Iterable<Check> checks; |
|
| 100 | + private Iterator<Check> checksIterator; |
|
| 101 | + private Check currentCheck; |
|
| 77 | 102 | |
| 78 | 103 | public ArchiveCandidateMonitoringBackgroundTask(Subject subject, AwsLandscape<String> landscape, |
| 79 | 104 | AwsApplicationReplicaSet<String, SailingAnalyticsMetrics, SailingAnalyticsProcess<String>> replicaSet, |
| ... | ... | @@ -89,14 +114,13 @@ public class ArchiveCandidateMonitoringBackgroundTask implements Runnable { |
| 89 | 114 | this.executor = executor; |
| 90 | 115 | this.firstRun = TimePoint.now(); |
| 91 | 116 | this.messagesToSendToProcessOwner = new LinkedList<>(); |
| 92 | - this.candidateSeenServingStatusRequest = false; |
|
| 93 | 117 | this.checks = Arrays.asList( |
| 94 | - this::isReady, |
|
| 95 | - this::hasLowEnoughSystemLoad, |
|
| 96 | - this::hasShortEnoughDefaultBackgroundThreadPoolExecutorQueue, |
|
| 97 | - this::hasShortEnoughDefaultForegroundThreadPoolExecutorQueue, |
|
| 98 | - this::compareServersWithRestAPI, |
|
| 99 | - this::compareServersByLeaderboardGroups); |
|
| 118 | + new IsReady(), |
|
| 119 | + new HasLowEnoughSystemLoad(), |
|
| 120 | + new HasShortEnoughDefaultBackgroundThreadPoolExecutorQueue(), |
|
| 121 | + new HasShortEnoughDefaultForegroundThreadPoolExecutorQueue(), |
|
| 122 | + new CompareServersWithRestAPI(), |
|
| 123 | + new CompareServersByLeaderboardGroups()); |
|
| 100 | 124 | this.checksIterator = this.checks.iterator(); |
| 101 | 125 | this.currentCheck = checksIterator.next(); |
| 102 | 126 | } |
| ... | ... | @@ -104,48 +128,109 @@ public class ArchiveCandidateMonitoringBackgroundTask implements Runnable { |
| 104 | 128 | @Override |
| 105 | 129 | public void run() { |
| 106 | 130 | try { |
| 107 | - if (currentCheck.getAsBoolean()) { |
|
| 108 | - logger.info("Another check passed."); |
|
| 131 | + if (currentCheck.runCheck()) { |
|
| 132 | + logger.info("Check "+currentCheck+" passed."); |
|
| 109 | 133 | // the check passed; proceed to next check, if any |
| 110 | 134 | currentCheck = checksIterator.hasNext() ? checksIterator.next() : null; |
| 111 | - } |
|
| 112 | - if (currentCheck != null) { |
|
| 113 | - logger.info("More checks to do; re-scheduling."); |
|
| 114 | - // re-schedule this task to run next check in a while |
|
| 115 | - executor.schedule(this, DELAY_BETWEEN_CHECKS.asMillis(), TimeUnit.MILLISECONDS); |
|
| 135 | + if (currentCheck != null) { |
|
| 136 | + logger.info("More checks to do; re-scheduling to run next check "+currentCheck); |
|
| 137 | + // re-schedule this task to run next check immediately |
|
| 138 | + executor.submit(this); |
|
| 139 | + } else { |
|
| 140 | + logger.info("Done with all checks; candidate is ready for production."); |
|
| 141 | + // all checks passed; candidate is ready for production; nothing more to do here |
|
| 142 | + } |
|
| 116 | 143 | } else { |
| 117 | - logger.info("Done with all checks; candidate is ready for comparison."); |
|
| 118 | - // all checks passed; candidate is ready for comparison; nothing more to do here |
|
| 144 | + rescheduleCurrentCheckAfterFailureOrTimeout(); |
|
| 119 | 145 | } |
| 120 | 146 | } catch (Exception e) { |
| 121 | 147 | logger.warning("Exception while running check " + currentCheck + " for candidate " + replicaSet.getMaster().getHost().getHostname() + ": " + e.getMessage()); |
| 122 | 148 | } |
| 123 | - |
|
| 124 | 149 | } |
| 125 | - |
|
| 126 | - private Boolean isReady() throws IOException { |
|
| 127 | - return replicaSet.getMaster().isReady(Landscape.WAIT_FOR_PROCESS_TIMEOUT); |
|
| 150 | + |
|
| 151 | + private void rescheduleCurrentCheckAfterFailureOrTimeout() { |
|
| 152 | + executor.schedule(this, currentCheck.getDelayAfterFailure().asMillis(), TimeUnit.MILLISECONDS); |
|
| 128 | 153 | } |
| 129 | - |
|
| 130 | - private boolean hasLowEnoughSystemLoad() throws TimeoutException, Exception { |
|
| 131 | - return replicaSet.getMaster().getLastMinuteSystemLoadAverage(Landscape.WAIT_FOR_PROCESS_TIMEOUT) < MAXIMUM_ONE_MINUTE_SYSTEM_LOAD_AVERAGE; |
|
| 154 | + |
|
| 155 | + private class IsReady extends AbstractCheck { |
|
| 156 | + private static final long serialVersionUID = -4265303532881568290L; |
|
| 157 | + |
|
| 158 | + private IsReady() { |
|
| 159 | + super("is ready", TIMEOUT_FIRST_CONTACT.get(), DELAY_BETWEEN_CHECKS); |
|
| 160 | + } |
|
| 161 | + |
|
| 162 | + @Override |
|
| 163 | + public boolean runCheck() throws Exception { |
|
| 164 | + return replicaSet.getMaster().isReady(Landscape.WAIT_FOR_PROCESS_TIMEOUT); |
|
| 165 | + } |
|
| 166 | + } |
|
| 167 | + |
|
| 168 | + private class HasLowEnoughSystemLoad extends AbstractCheck { |
|
| 169 | + private static final long serialVersionUID = -7931266212387969287L; |
|
| 170 | + |
|
| 171 | + public HasLowEnoughSystemLoad() { |
|
| 172 | + super("has low enough system load", LONG_TIMEOUT, DELAY_BETWEEN_CHECKS); |
|
| 173 | + } |
|
| 174 | + |
|
| 175 | + @Override |
|
| 176 | + public boolean runCheck() throws Exception { |
|
| 177 | + return replicaSet.getMaster().getLastMinuteSystemLoadAverage(Landscape.WAIT_FOR_PROCESS_TIMEOUT) < MAXIMUM_ONE_MINUTE_SYSTEM_LOAD_AVERAGE; |
|
| 178 | + } |
|
| 179 | + |
|
| 132 | 180 | } |
| 133 | 181 | |
| 134 | - private boolean hasShortEnoughDefaultBackgroundThreadPoolExecutorQueue() throws TimeoutException, Exception { |
|
| 135 | - return replicaSet.getMaster().getDefaultBackgroundThreadPoolExecutorQueueSize(Landscape.WAIT_FOR_PROCESS_TIMEOUT) < MAXIMUM_THREAD_POOL_QUEUE_SIZE; |
|
| 182 | + private class HasShortEnoughDefaultBackgroundThreadPoolExecutorQueue extends AbstractCheck { |
|
| 183 | + private static final long serialVersionUID = 3482148861663152178L; |
|
| 184 | + |
|
| 185 | + public HasShortEnoughDefaultBackgroundThreadPoolExecutorQueue() { |
|
| 186 | + super("has short enough default background thread pool executor queue", LONG_TIMEOUT, DELAY_BETWEEN_CHECKS); |
|
| 187 | + } |
|
| 188 | + |
|
| 189 | + @Override |
|
| 190 | + public boolean runCheck() throws Exception { |
|
| 191 | + return replicaSet.getMaster().getDefaultBackgroundThreadPoolExecutorQueueSize(Landscape.WAIT_FOR_PROCESS_TIMEOUT) < MAXIMUM_THREAD_POOL_QUEUE_SIZE; |
|
| 192 | + } |
|
| 136 | 193 | } |
| 137 | 194 | |
| 138 | - private boolean hasShortEnoughDefaultForegroundThreadPoolExecutorQueue() throws TimeoutException, Exception { |
|
| 139 | - return replicaSet.getMaster().getDefaultForegroundThreadPoolExecutorQueueSize(Landscape.WAIT_FOR_PROCESS_TIMEOUT) < MAXIMUM_THREAD_POOL_QUEUE_SIZE; |
|
| 195 | + private class HasShortEnoughDefaultForegroundThreadPoolExecutorQueue extends AbstractCheck { |
|
| 196 | + private static final long serialVersionUID = 5194383164577435150L; |
|
| 197 | + |
|
| 198 | + public HasShortEnoughDefaultForegroundThreadPoolExecutorQueue() { |
|
| 199 | + super("has short enough default foreground thread pool executor queue", LONG_TIMEOUT, DELAY_BETWEEN_CHECKS); |
|
| 200 | + } |
|
| 201 | + |
|
| 202 | + @Override |
|
| 203 | + public boolean runCheck() throws Exception { |
|
| 204 | + return replicaSet.getMaster().getDefaultForegroundThreadPoolExecutorQueueSize(Landscape.WAIT_FOR_PROCESS_TIMEOUT) < MAXIMUM_THREAD_POOL_QUEUE_SIZE; |
|
| 205 | + } |
|
| 140 | 206 | } |
| 141 | 207 | |
| 142 | - private boolean compareServersWithRestAPI() throws Exception { |
|
| 143 | - // TODO |
|
| 144 | - return false; |
|
| 208 | + private class CompareServersWithRestAPI extends AbstractCheck { |
|
| 209 | + private static final long serialVersionUID = -5271988056894947109L; |
|
| 210 | + |
|
| 211 | + public CompareServersWithRestAPI() { |
|
| 212 | + super("compare servers with REST API", LONG_TIMEOUT, DELAY_BETWEEN_CHECKS); |
|
| 213 | + } |
|
| 214 | + |
|
| 215 | + |
|
| 216 | + @Override |
|
| 217 | + public boolean runCheck() throws Exception { |
|
| 218 | + // TODO Auto-generated method stub |
|
| 219 | + return false; |
|
| 220 | + } |
|
| 145 | 221 | } |
| 146 | 222 | |
| 147 | - private boolean compareServersByLeaderboardGroups() throws Exception { |
|
| 148 | - // TODO |
|
| 149 | - return false; |
|
| 223 | + private class CompareServersByLeaderboardGroups extends AbstractCheck { |
|
| 224 | + private static final long serialVersionUID = -5271988056894947109L; |
|
| 225 | + |
|
| 226 | + public CompareServersByLeaderboardGroups() { |
|
| 227 | + super("compare servers with Leaderboard Groups", LONG_TIMEOUT, DELAY_BETWEEN_CHECKS); |
|
| 228 | + } |
|
| 229 | + |
|
| 230 | + @Override |
|
| 231 | + public boolean runCheck() throws Exception { |
|
| 232 | + // TODO Auto-generated method stub |
|
| 233 | + return false; |
|
| 234 | + } |
|
| 150 | 235 | } |
| 151 | 236 | } |