java/com.sap.sailing.windestimation.lab/src/com/sap/sailing/windestimation/evaluation/WindEstimatorManeuverNumberDependentEvaluationRunner.java
... ...
@@ -110,6 +110,7 @@ public class WindEstimatorManeuverNumberDependentEvaluationRunner {
110 110
out.write(line);
111 111
}
112 112
}
113
+ LoggingUtil.logInfo("CSV with evaluation results have been stored in: " + csvFile.getAbsolutePath());
113 114
}
114 115
115 116
}
java/com.sap.sailing.windestimation.lab/src/com/sap/sailing/windestimation/model/SimpleModelsTraining.java
... ...
@@ -20,7 +20,11 @@ import com.sap.sailing.windestimation.datavisualization.AggregatedDistanceDimens
20 20
import com.sap.sailing.windestimation.datavisualization.AggregatedDurationDimensionPlot;
21 21
import com.sap.sailing.windestimation.model.classifier.maneuver.ManeuverClassifierTrainer;
22 22
import com.sap.sailing.windestimation.model.classifier.maneuver.PersistedManeuverClassifiersScorePrinter;
23
+import com.sap.sailing.windestimation.model.regressor.twdtransition.DistanceBasedTwdTransitionRegressorModelContext;
24
+import com.sap.sailing.windestimation.model.regressor.twdtransition.DistanceBasedTwdTransitionRegressorModelContext.DistanceValueRange;
23 25
import com.sap.sailing.windestimation.model.regressor.twdtransition.DistanceBasedTwdTransitionStdRegressorTrainer;
26
+import com.sap.sailing.windestimation.model.regressor.twdtransition.DurationBasedTwdTransitionRegressorModelContext;
27
+import com.sap.sailing.windestimation.model.regressor.twdtransition.DurationBasedTwdTransitionRegressorModelContext.DurationValueRange;
24 28
import com.sap.sailing.windestimation.model.regressor.twdtransition.DurationBasedTwdTransitionStdRegressorTrainer;
25 29
import com.sap.sailing.windestimation.util.LoggingUtil;
26 30
... ...
@@ -32,9 +36,7 @@ import com.sap.sailing.windestimation.util.LoggingUtil;
32 36
public class SimpleModelsTraining {
33 37
34 38
private static final int NUMBER_OF_THREADS = 3;
35
- private static final ExecutorService executorService = new ThreadPoolExecutor(NUMBER_OF_THREADS, NUMBER_OF_THREADS,
36
- 0, TimeUnit.MILLISECONDS, new ArrayBlockingQueue<>(NUMBER_OF_THREADS),
37
- new ThreadPoolExecutor.CallerRunsPolicy());
39
+ private static ExecutorService executorService;
38 40
39 41
public static void main(String[] args) throws Exception {
40 42
new ManeuverForEstimationPersistenceManager().dropCollection();
... ...
@@ -63,11 +65,15 @@ public class SimpleModelsTraining {
63 65
showInfoAboutDataCleaning(AggregatedSingleDimensionType.DURATION);
64 66
AggregatedDurationDimensionPlot.awaitWindowClosed();
65 67
} while (JOptionPane.YES_OPTION != askDataCleaningFinished(AggregatedSingleDimensionType.DURATION));
68
+ showInfoAboutIntervalAdjustments(DurationBasedTwdTransitionRegressorModelContext.class,
69
+ DurationValueRange.class);
66 70
do {
67 71
AggregatedDistanceDimensionPlot.main(args);
68 72
showInfoAboutDataCleaning(AggregatedSingleDimensionType.DISTANCE);
69 73
AggregatedDistanceDimensionPlot.awaitWindowClosed();
70 74
} while (JOptionPane.YES_OPTION != askDataCleaningFinished(AggregatedSingleDimensionType.DISTANCE));
75
+ showInfoAboutIntervalAdjustments(DistanceBasedTwdTransitionRegressorModelContext.class,
76
+ DistanceValueRange.class);
71 77
DurationBasedTwdTransitionStdRegressorTrainer.main(args);
72 78
DistanceBasedTwdTransitionStdRegressorTrainer.main(args);
73 79
Thread.sleep(1000);
... ...
@@ -92,15 +98,25 @@ public class SimpleModelsTraining {
92 98
private static void showInfoAboutDataCleaning(AggregatedSingleDimensionType dimension) {
93 99
JOptionPane.showMessageDialog(null, "Now, clean the data for " + dimension
94 100
+ " dimension. Remove instances from MongoDB collection \"" + dimension.getCollectioName()
95
- + "\" which do not make sense. E.g. values which are represented by a small number of supporting instances (see histogram), values which cause implausible zig zag sections within zero-mean standard deviation curve and etc. Close the graphical tool, when you are done to resume model training.");
101
+ + "\" which do not make sense. E.g. values which are represented by a small number of supporting instances (see histogram), values which cause implausible zig zag sections within zero-mean standard deviation curve and etc. Close the graphical tool, when you are done to resume the model training.");
102
+ }
103
+
104
+ private static void showInfoAboutIntervalAdjustments(Class<?> classToAdjust, Class<?> valueRangeEnum) {
105
+ JOptionPane.showMessageDialog(null, "Now, open the source code of the class \"" + classToAdjust.getName()
106
+ + "\". Scroll down to the definition of the inner enum \"" + valueRangeEnum.getSimpleName()
107
+ + "\", read its JavaDoc and adjust its interval definitions so that each interval can be learned by the adjusted regressor model configuration with minimal error. Press OK ONLY after you are done.");
96 108
}
97 109
98 110
private static void awaitThreadPoolCompletion() throws InterruptedException {
111
+ executorService.shutdown();
99 112
executorService.awaitTermination(24, TimeUnit.HOURS);
100 113
Thread.sleep(1000L);
101 114
}
102 115
103 116
private static void executeInThreadPool(RunnableWithExceptionsCatch runnable) {
117
+ if (executorService == null || executorService.isShutdown()) {
118
+ createNewThreadPool();
119
+ }
104 120
executorService.execute(() -> {
105 121
try {
106 122
runnable.run();
... ...
@@ -112,6 +128,11 @@ public class SimpleModelsTraining {
112 128
});
113 129
}
114 130
131
+ private static void createNewThreadPool() {
132
+ executorService = new ThreadPoolExecutor(NUMBER_OF_THREADS, NUMBER_OF_THREADS, 0, TimeUnit.MILLISECONDS,
133
+ new ArrayBlockingQueue<>(NUMBER_OF_THREADS), new ThreadPoolExecutor.CallerRunsPolicy());
134
+ }
135
+
115 136
/**
116 137
*
117 138
* @author Vladislav Chumak (D069712)
wiki/howto/windestimation.md
... ...
@@ -2,66 +2,28 @@
2 2
3 3
This document describes the generation process of Machine Learning (ML) models which are used internally by wind estimation. It is highly recommended to proceed this howto step by step considering the order of sections.
4 4
5
-## Overview
6
-In total, there are the following three categories of ML models used by wind estimation:
7
-1. **Maneuver Classifiers**
8
-2. **Regressors** of TWD delta standard deviation for the dimension **duration**
9
-3. **Regressors** of TWD delta standard deviation for the dimension **distance**
10
-
11
-Each of the model categories are composed of multiple models where each model targets a specific context. A context for a maneuver classifier is determined by the following attributes:
12
-* Maneuver features
13
- * Polar features enabled: yes/no
14
- * Mark features enabled: yes/no
15
- * Scaled speed features enabled: yes/no
16
-* Boat class filtering for the data on which the classifier is trained, such as a specific boat class, or with all boat classes included
17
-
18
-The context of regressor models is represented by its assigned input interval responsibility, e.g. [0 seconds; 62 seconds) for duration, or [80 meters; 1368 meters) for distance.
19
-
20
-Each of the ML model categories must be trained individually. The common workflow looks as follows:
21
-1. Get the training data from REST API of sapsailing.com
22
-2. Preprocess data
23
-3. Train the model category
24
-
25
-For each of the steps, appropriate Java classes must be executed per *Run with...->Java Application*. All referenced classes are located in *com.sap.sailing.windestimation.lab* Java project. Each class execution must finish without uncaught exceptions before proceeding to next instructions. After model training, all trained models can be collected in *./trained_wind_estimation_models*, which is normally */path/to/workspace/com.sap.sailing.windestimation/trained_wind_estimation_models* if you start the training classes in Eclipse per *Run with...->Java Application*.
26
-
27
-The details of the training process for each model category are described in the following sections.
28
-
29 5
## Prerequisites
30 6
To complete the training process successfully, you need to make sure that you have the following stuff:
31 7
* A complete onboarding setup for SAP Sailing Analytics development
32 8
* MongoDB (**3.4 or higher!**) is up and running (same MongoDB instance as required in onboarding howto)
33 9
* At least 100 GB free space on the partition, where MongoDB is operating
34 10
* Installed graphical MongoDB client such as MongoDB Compass (Community version)
35
-
36
-## Get the training data from sapsailing.com
37
-The following steps import all the data required from sapsailing.com into the local MongoDB. These steps constitute a preprequisite for training of all ML model categories:
38
-1. Run *com.sap.sailing.windestimation.data.importer.ManeuverAndWindImporter*
39
-2. Run *com.sap.sailing.windestimation.data.importer.PolarDataImporter*
40
-
41
-## Maneuver classifiers training
42
-1. Run *com.sap.sailing.windestimation.model.classifier.maneuver.ManeuverClassifierTrainer*. Within the this step, the maneuver data is preprocessed and all maneuver classifiers are trained for each supported context.
43
-2. Optionally run *com.sap.sailing.windestimation.model.classifier.maneuver.ManeuverClassifierScoring* to print the performance of the trained classifiers. After this step, a list with macro-averaged F2-score of each trained classifier will be stored in *./maneuverClassifierScores.csv*
44
-
45
-## Duration-based TWD delta standard deviation regressor
46
-
47
-1. Run *com.sap.sailing.windestimation.data.importer.DurationBasedTwdTransitionImporter*
48
-2. Run *com.sap.sailing.windestimation.data.importer.AggregatedDurationBasedTwdTransitionImporter*
49
-3. Run *com.sap.sailing.windestimation.datavisualization.AggregatedDurationDimensionPlot* to visualize the wind data. A Swing-based GUI-Window must open with two charts, one XY-chart where the x-axis represents **seconds**, and the y-axis represents TWD delta-based series measures (e.g. standard deviation or mean). Below the chart, a histogram for data points of the XY-Chart is provided. You can zoom-in and zoom-out in each of the chart by mouse dragging. Be aware that currently the zoom level of both charts is not synchronized
50
-4. Open your graphical MongoDB client and connect to *windEstimation* database hosted by your local MongoDB. Open the collection with name *aggregatedDurationTwdTransition*. Within the collection you will see all the instances/data points visualized in the previous step. The total number of the points must not exceed 100.
51
-5. Delete all the instances within the collection which do not make sense. For this, use the data visualization tool from step 3 to identify such instances. Pay a special attention to the instances in the beginnning and end. Some of the instances are not representative due to small number of supporting instances which is visualized in the histogram. Restart the data visualization tool as often as need to visualize the changed data.
52
-6. Open the source code of the class *com.sap.sailing.windestimation.model.regressor.twdtransition.DurationBasedTwdTransitionRegressorModelContext*. Scroll down to the definition of the inner class/enum *DurationValueRange*. The enum defines the intervals for which a separate regressor model will be trained. Adjust the intervals accordingly in order to allow the regressor model to learn the data curve with minimal error. Make sure that there are at least 2 data points available within each interval. Datapoint with x = 0, y = 0 will be created automatically.
53
-7. Run *com.sap.sailing.windestimation.model.regressor.twdtransition.DurationBasedTwdTransitionStdRegressorTrainer*
54
-8. Verify the trained regressor functions. They are printed in the console output of the previous step. For instance, you can visualize the polynoms by means of https://www.wolframalpha.com/
55
-
56
-## Distance-based TWD delta standard deviation regressor
57
-
58
-The steps of this sections are similar to the steps of the previous section. It is recommended to traverse through the previous section before starting with this one, because due to similarity of the steps, the similar steps in this section are described with less details and hints.
59
-
60
-1. Run *com.sap.sailing.windestimation.data.importer.DistanceBasedTwdTransitionImporter*
61
-2. Run *com.sap.sailing.windestimation.data.importer.AggregatedDistanceBasedTwdTransitionImporter* with at least 10 GB JVM memory.
62
-3. Run *com.sap.sailing.windestimation.datavisualization.AggregatedDistanceDimensionPlot* to visualize the wind data. Here, the x-axis of the XY-chart represents **meters**
63
-4. Open your graphical MongoDB client and connect to *windEstimation* database hosted by your local MongoDB. Open collection *aggregatedDistanceTwdTransition* collection. Within the collection you will see all the instances/data points visualized in the previous step. The total number of the points must not exceed 100.
64
-5. Delete all the instances within the collection which do not make sense.
65
-6. Open the source code of the class *com.sap.sailing.windestimation.model.regressor.twdtransition.DistanceBasedTwdTransitionRegressorModelContext*. Scroll down to the definition of the inner class/enum *DistanceValueRange*. The enum defines the intervals for which a separate regressor model will be trained. Adjust the intervals accordingly in order to allow the regressor model to learn the data curve with minimal error.
66
-7. Run *com.sap.sailing.windestimation.model.regressor.twdtransition.DistanceBasedTwdTransitionStdRegressorTrainer*
67
-8. Verify the trained regressor functions. They are printed in the console output of the previous step.
... ...
\ No newline at end of file
0
+* 16 GB RAM
1
+* ~24 operation hours of your computer
2
+
3
+## Model training process
4
+1. Run ``com.sap.sailing.windestimation.model.SimpleModelsTraining`` as normal Java Application. This program downloads all the necessary maneuver and wind data, pre-processes them and initiates training of maneuver classifiers.
5
+2. Make sure that the launched program does not get termined by an uncaught exception. Wait until graphical info dialog shows up which requests you to perform data cleansing for duration dimension and press OK.
6
+ ![Screenshot of graphical info dialog requesting to perform data cleansing for duration dimension](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Screenshot of graphical info dialog requesting to perform data cleansing for duration dimension")
7
+ A Swing-based GUI-Window must open with two charts, one XY-chart where the x-axis represents **seconds**, and the y-axis represents TWD delta-based series measures (e.g. standard deviation or mean). Below the chart, a histogram for the data points of the XY-Chart is provided. You can zoom-in and zoom-out in each of the chart by mouse dragging. Be aware that currently, the zoom level of both charts is not synchronizing.
8
+ ![Screenshot of graphical wind data visualization tool for duration dimension](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Screenshot of graphical wind data visualization tool for duration dimension")
9
+3. Open your graphical MongoDB client and connect to ``windEstimation`` database hosted by your local MongoDB. Open the collection with name ``aggregatedDurationTwdTransition``. Within the collection you will see all the instances/data points visualized in the previous step. The attribute used for the x-axis is represented by ``value``.
10
+ ![Screenshot of MongoDB Compass with opened aggregatedDurationTwdTransition collection](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Screenshot of MongoDB Compass with opened aggregatedDurationTwdTransition collection")
11
+4. Delete all the instances within the collection which do not make sense. For this, use the data visualization tool from step 2 to identify such instances. Some of the instances are not representative due to the small number of supporting instances which is visualized in the histogram. Such instances can produce unreasonable bumps in the XY-chart. The desired output of this step is that the series curve ``Zero mean sigma`` looks smooth and always growing, like depicted below:
12
+ ![Screenshot of graphical visualization tool of duration dimension with after data cleansing](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Screenshot of graphical visualization tool of duration dimension with after data cleansing")
13
+ Use the ``Refresh charts`` button as often as needed to update the charts with the modified data in MongoDB. Close the graphical visualization tool window after you are done with data cleansing to resume the training process. Confirm the confirmation dialog after you have finished the data cleansing of duration dimension:
14
+ ![Screenshot of confirmation dialog for finishing the data cleansing](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png "Screenshot of confirmation dialog for finishing the data cleansing")
15
+5. A new information dialog shows up (do not press OK yet!) requesting you to open the source code of the class ``com.sap.sailing.windestimation.model.regressor.twdtransition.DurationBasedTwdTransitionRegressorModelContext``. Open it and scroll down to the definition of the inner enum ``DurationValueRange``. The enum defines the intervals for which a separate regressor model will be trained. Read the Javadoc of ``DurationValueRange`` and adjust the intervals accordingly in order to allow the regressor model to learn the ``Zero mean sigma`` curve with minimal error. You can also configurate the polynomial which will be used for regressor training. Make sure that there are at least 2 data points available within each interval. The datapoint with x = 0, y = 0 will be created automatically. Press OK in information dialog after you are done.
16
+6. A graphical info dialog shows up which requests you to perform data cleansing for *distance* dimension. Press OK. All steps for data cleansing for the distance dimension are very similar to the data cleansing steps step 2. until step 5. for the duration dimension. Thus, consult these steps to complete data cleansing and models configuration for the distance duration. The unit used for distance representation is **meters**. The collection name required in step 3. is ``aggregatedDistanceTwdTransition``. The class required in step 5. is ``com.sap.sailing.windestimation.model.regressor.twdtransition.DistanceBasedTwdTransitionRegressorModelContext`` and its inner enum is ``DistanceValueRange``.
17
+7. Wait until model training finishes and the program terminates normally. A new file with serialized representation of internal wind estimation models should be located in ``./windEstimationModels.dat``. The absolute path of the file must be printed in the console output of the program. You can upload the file via HTTP POST to http://sapsailing.com/windestimation/api/windestimation_data (see ``com.sap.sailing.windestimation.jaxrs.api.WindEstimationDataResource``) to update the wind estimation of a server instance.
18
+8. Optionally, run ``com.sap.sailing.windestimation.evaluation.WindEstimatorManeuverNumberDependentEvaluationRunner`` as normal Java Application to evaluate the wind estimation with the new trained models. The evaluation score will be stored as CSV in ``./maneuverNumberDependentEvaluation.csv``.
... ...
\ No newline at end of file