java/com.sap.sse.landscape/src/com/sap/sse/landscape/impl/GithubReleasesRepository.java
... ...
@@ -8,8 +8,7 @@ import java.net.URL;
8 8
import java.net.URLConnection;
9 9
import java.text.SimpleDateFormat;
10 10
import java.util.Iterator;
11
-import java.util.LinkedList;
12
-import java.util.List;
11
+import java.util.NoSuchElementException;
13 12
import java.util.TreeMap;
14 13
import java.util.logging.Logger;
15 14
import java.util.regex.Matcher;
... ...
@@ -31,7 +30,11 @@ import com.sap.sse.util.HttpUrlConnectionHelper;
31 30
* <code>https://github.com/{owner}/{repo}/releases/download/{release-name}</code>. The GitHub
32 31
* {@code /releases} end point delivers the releases in descending chronological order, so
33 32
* newest releases first. With this, we can cache old results and try to get along with the
34
- * harsh rate limit of only 60 requests per hour when used without authentication.
33
+ * harsh rate limit of only 60 requests per hour when used without authentication.<p>
34
+ *
35
+ * TODO Concurrency Control! What, if multiple requests or iterations are run on this repository object concurrently?<p>
36
+ *
37
+ * TODO implement a cool-down period, e.g., one minute, during which the first releases page is loaded only once<p>
35 38
*
36 39
* @author Axel Uhl (d043530)
37 40
*/
... ...
@@ -42,13 +45,35 @@ public class GithubReleasesRepository extends AbstractReleaseRepository implemen
42 45
private final static String GITHUB_BASE_URL = "https://github.com";
43 46
private final String owner;
44 47
private final String repositoryName;
48
+
49
+ /**
50
+ * The cache of releases as loaded from the GitHub web site. The cache is filled when iterating using a
51
+ * {@link ReleaseIterator}, by loading paginated release records, converting them to {@link GithubRelease} objects
52
+ * and storing them in this cache.
53
+ * <p>
54
+ *
55
+ * The cache does not guarantee to contain the newest releases, nor does it guarantee to go back all the way to the
56
+ * oldest release. Its contents are contiguous in the sense of how the releases are returned by the GitHub API in
57
+ * descending order of publication, from new to old. In other words, if there is a release cached that was published
58
+ * at time point {@code t1} and another at a later time point {@code t2}, then the cache is guaranteed to contain
59
+ * all releases published in the time range {@code [t1:t2]} (inclusive).
60
+ * <p>
61
+ *
62
+ * Should a {@link ReleaseIterator} have enumerated all releases back to the oldest one, the
63
+ * {@link #cacheContainsOldestRelease} flag will be set to {@code true} which means that when an iteration has
64
+ * reached the oldest release in the cache, iteration is complete, and no further page loading is necessary
65
+ * to complete the iteration.
66
+ */
45 67
private final TreeMap<TimePoint, Release> releasesByPublishingTimePoint;
46 68
69
+ private boolean cacheContainsOldestRelease;
70
+
47 71
public GithubReleasesRepository(String owner, String repositoryName, String defaultReleaseNamePrefix) {
48 72
super(defaultReleaseNamePrefix);
49 73
this.owner = owner;
50 74
this.repositoryName = repositoryName;
51 75
this.releasesByPublishingTimePoint = new TreeMap<>();
76
+ this.cacheContainsOldestRelease = false;
52 77
}
53 78
54 79
private String getRepositoryPath() {
... ...
@@ -66,57 +91,154 @@ public class GithubReleasesRepository extends AbstractReleaseRepository implemen
66 91
}
67 92
68 93
/**
69
- * Always fetches the first page from the {@code /releases} end point and starts constructing releases, until a
70
- * publishing time point overlap with {@link GithubReleasesRepository#releasesByPublishingTimePoint} is found. Then
71
- * we know we can continue to enumerate the remaining releases from that cache.
94
+ * Always fetches the first page from the {@code /releases} end point and starts constructing and
95
+ * {@link GithubReleasesRepository#releasesByPublishingTimePoint caching} releases, until a publishing time point
96
+ * overlap with {@link GithubReleasesRepository#releasesByPublishingTimePoint} is found. Iteration then starts from
97
+ * that cache. If the iterator has returned all elements from the cache going backwards in publishing history, and
98
+ * {@link GithubReleasesRepository#cacheContainsOldestRelease} is {@code false}, indicating that the cache does not
99
+ * go back to the "beginning of time," and still more elements are requested from this iterator, paginated release
100
+ * documents need to get loaded again until we find even older releases than the oldest one from the cache. The
101
+ * loaded elements will be added to the cache, and a new internal iterator is launched on the cache starting from
102
+ * the then loaded element.
72 103
* <p>
73 104
*
74 105
* All releases found by loading a page are added to the
75
- * {@link GithubReleasesRepository#releasesByPublishingTimePoint} cache.
106
+ * {@link GithubReleasesRepository#releasesByPublishingTimePoint} cache. If the page with the oldest sequence of
107
+ * releases has been loaded (there is no next page then anymore), the
108
+ * {@link GithubReleasesRepository#cacheContainsOldestRelease} flag is set to {@code true}.
76 109
*
77 110
* @author Axel Uhl (d043530)
78 111
*
79 112
*/
80 113
private class ReleaseIterator implements Iterator<Release> {
114
+ /**
115
+ * Initialized to the URL for loading the first page of releases; each call to
116
+ * {@link #loadNextPage(TimePoint)} changes this to the next page, or {@code null}
117
+ * if the last page was loaded.
118
+ */
81 119
private String nextPageURL;
82
- private Iterator<Pair<TimePoint, GithubRelease>> publishingTimePointsAndReleasesFromCurrentPageIterator;
120
+
121
+ /**
122
+ * Takes precedence if not {@code null} and still having elements; enumerates the cached releases, starting from
123
+ * the newest (last in the cache) to the oldest (first in the cache). When fully consumed, page loading has to
124
+ * continue until releases published earlier than the oldest one from the
125
+ * {@link GithubReleasesRepository#releasesByPublishingTimePoint cache} are found.
126
+ */
127
+ private Iterator<Release> cachedReleasesIterator;
83 128
84 129
private ReleaseIterator() throws MalformedURLException, IOException, ParseException {
85 130
nextPageURL = getReleasesURL();
86
- loadNextPage();
131
+ cachedReleasesIterator = null;
132
+ while (nextPageURL != null && cachedReleasesIterator == null) {
133
+ loadNextPage(/* olderThan */ null);
134
+ }
87 135
}
88 136
89
- private void loadNextPage() throws MalformedURLException, IOException, ParseException {
90
- final List<Pair<TimePoint, GithubRelease>> result = new LinkedList<>();
137
+ /**
138
+ * Loads the page of releases referenced by {@link #nextPageURL}.
139
+ * <p>
140
+ *
141
+ * If {@code olderThan} is {@code null}, only the releases newer than the newest entry in the cache are loaded
142
+ * into the cache, and {@link #cachedReleasesIterator} is set to the newest element in the cache if and only if
143
+ * the cache was empty when this method was called, or the page contained a release not newer than the newest
144
+ * release in the cache. This also means that if with {@code olderThan==null} the
145
+ * {@link #cachedReleasesIterator} is {@code null} after this method returns, one or more calls will be required
146
+ * to create an "overlap" with the cache before starting the iteration. This is required because we guarantee
147
+ * the cache to be "contiguous" in terms of the releases that exist.
148
+ * <p>
149
+ *
150
+ * If {@code olderThan} is not {@code null}, only releases published before {@code olderThan} are added to the
151
+ * cache, and {@link #cachedReleasesIterator} is set to the newest element added to the cache, or set to
152
+ * {@code null} if no release was added to the cache by this call.
153
+ * <p>
154
+ * Precondition: {@link #nextPageURL} is not {@code null}.
155
+ * <p>
156
+ * Postcondition: {@link GithubReleasesRepository#cacheContainsOldestRelease} is {@code true} if and only if
157
+ * this invocation has loaded the last page of releases that exist
158
+ *
159
+ * @param olderThan
160
+ * if {@code null}, releases newer than the newest release from the cache will be added to the cache,
161
+ * and the {@link #cachedReleasesIterator} will be set to the then newest cache element; if not
162
+ * {@code null}, only releases published before {@code olderThan} will be loaded, and
163
+ * {@link #cachedReleasesIterator} is then set to the newest of the older releases loaded, if any, or
164
+ * to {@code null} if no releases older than {@code olderThan} were found during this invocation.
165
+ */
166
+ private void loadNextPage(TimePoint olderThan) throws MalformedURLException, IOException, ParseException {
167
+ cachedReleasesIterator = null;
91 168
final URLConnection connection = HttpUrlConnectionHelper.redirectConnection(new URL(nextPageURL));
92 169
final InputStream index = (InputStream) connection.getContent();
93 170
final String linkHeader = connection.getHeaderField("link");
94 171
nextPageURL = getNextPageURL(linkHeader);
172
+ cacheContainsOldestRelease = cacheContainsOldestRelease || nextPageURL == null; // in this case we have seen and cached the last (oldest) page of releases
95 173
final JSONArray releasesJson = (JSONArray) new JSONParser().parse(new InputStreamReader(index));
174
+ boolean addedAtLeastOneReleaseToCache = false;
175
+ final boolean cacheWasEmpty = releasesByPublishingTimePoint.isEmpty();
96 176
for (final Object releaseObject : releasesJson) {
97 177
final Pair<TimePoint, GithubRelease> publishedAtAndRelease = getPublishedAtAndReleaseFromJson((JSONObject) releaseObject);
98
- releasesByPublishingTimePoint.put(publishedAtAndRelease.getA(), publishedAtAndRelease.getB());
99
- result.add(publishedAtAndRelease);
178
+ if (olderThan == null) { // looking for releases published after the newest cache entry
179
+ if (cacheWasEmpty || publishedAtAndRelease.getA().after(releasesByPublishingTimePoint.lastKey())) {
180
+ addedAtLeastOneReleaseToCache = true;
181
+ releasesByPublishingTimePoint.put(publishedAtAndRelease.getA(), publishedAtAndRelease.getB());
182
+ } else {
183
+ cachedReleasesIterator = releasesByPublishingTimePoint.descendingMap().values().iterator();
184
+ }
185
+ } else { // looking for releases published before olderThan
186
+ if (publishedAtAndRelease.getA().before(olderThan)) {
187
+ addedAtLeastOneReleaseToCache = true;
188
+ releasesByPublishingTimePoint.put(publishedAtAndRelease.getA(), publishedAtAndRelease.getB());
189
+ }
190
+ }
191
+ }
192
+ if (olderThan == null) {
193
+ if (cacheWasEmpty) {
194
+ cachedReleasesIterator = releasesByPublishingTimePoint.descendingMap().values().iterator();
195
+ }
196
+ } else {
197
+ if (addedAtLeastOneReleaseToCache) {
198
+ cachedReleasesIterator = releasesByPublishingTimePoint.descendingMap().tailMap(olderThan, /* inclusive */ false).values().iterator();
199
+ }
100 200
}
101
- publishingTimePointsAndReleasesFromCurrentPageIterator = result.iterator();
102 201
}
103 202
104 203
@Override
105 204
public boolean hasNext() {
106
- return publishingTimePointsAndReleasesFromCurrentPageIterator.hasNext() || nextPageURL != null;
205
+ // - we're delivering from the cache and the cache has more elements, or
206
+ // - we've reached the end of the cache but the cache doesn't contain the oldest release and we can load more pages
207
+ return cachedReleasesIterator != null && cachedReleasesIterator.hasNext()
208
+ || !cacheContainsOldestRelease && nextPageURL != null;
107 209
}
108 210
109 211
@Override
110 212
public Release next() {
111
- if (!publishingTimePointsAndReleasesFromCurrentPageIterator.hasNext()) {
112
- try {
113
- // FIXME bug6173: only load next page if we have to... we may already have created an overlap with the cache from releasesByPublishingTimePoint
114
- loadNextPage();
115
- } catch (IOException | ParseException e) {
116
- throw new RuntimeException(e);
213
+ final Release result;
214
+ if (cachedReleasesIterator != null && cachedReleasesIterator.hasNext()) {
215
+ result = getNextElementFromCacheIterator();
216
+ } else if (cacheContainsOldestRelease) {
217
+ throw new NoSuchElementException();
218
+ } else {
219
+ while (nextPageURL != null && cachedReleasesIterator != null) {
220
+ try {
221
+ loadNextPage(/* olderThan */ releasesByPublishingTimePoint.firstKey());
222
+ } catch (IOException | ParseException e) {
223
+ throw new RuntimeException(e);
224
+ }
225
+ }
226
+ if (cachedReleasesIterator == null || !cachedReleasesIterator.hasNext()) {
227
+ throw new NoSuchElementException();
228
+ } else {
229
+ result = getNextElementFromCacheIterator();
117 230
}
118 231
}
119
- return publishingTimePointsAndReleasesFromCurrentPageIterator.next().getB();
232
+ return result;
233
+ }
234
+
235
+ private Release getNextElementFromCacheIterator() {
236
+ final Release result;
237
+ result = cachedReleasesIterator.next();
238
+ if (!cachedReleasesIterator.hasNext()) {
239
+ cachedReleasesIterator = null;
240
+ }
241
+ return result;
120 242
}
121 243
}
122 244
... ...
@@ -129,44 +251,6 @@ public class GithubReleasesRepository extends AbstractReleaseRepository implemen
129 251
}
130 252
}
131 253
132
- @Override
133
- public Release getLatestRelease(String releaseNamePrefix) {
134
- // TODO Auto-generated method stub
135
- return super.getLatestRelease(releaseNamePrefix);
136
- }
137
-
138
- /**
139
- * Enumerating all releases of the GitHub repo is possible but goes against the harsh rate limit when used without
140
- * an access token (currently only 60 requests per hour), so should ideally be avoided altogether. And if it is ever called,
141
- * we will cache the results, so that for later requests we typically need to query only a single page, delivering the latest
142
- * additions, if any.
143
- */
144
- private Iterable<Release> getAvailableReleases() {
145
- final List<Release> result = new LinkedList<>();
146
- try {
147
- String nextPageURL = getReleasesURL();
148
- do {
149
- final URLConnection connection = HttpUrlConnectionHelper.redirectConnection(new URL(nextPageURL));
150
- final InputStream index = (InputStream) connection.getContent();
151
- final String linkHeader = connection.getHeaderField("link");
152
- final JSONArray releasesJson = (JSONArray) new JSONParser().parse(new InputStreamReader(index));
153
- addAllReleasesTo(releasesJson, result);
154
- nextPageURL = getNextPageURL(linkHeader);
155
- } while (nextPageURL != null);
156
- } catch (IOException | ParseException e) {
157
- logger.warning("Exception trying to find releases: "+e.getMessage());
158
- }
159
- return result;
160
- }
161
-
162
- private void addAllReleasesTo(JSONArray releasesJson, List<Release> result) {
163
- for (final Object releaseObject : releasesJson) {
164
- final Pair<TimePoint, GithubRelease> publishedAtAndRelease = getPublishedAtAndReleaseFromJson((JSONObject) releaseObject);
165
- result.add(publishedAtAndRelease.getB());
166
- releasesByPublishingTimePoint.put(publishedAtAndRelease.getA(), publishedAtAndRelease.getB());
167
- }
168
- }
169
-
170 254
private Pair<TimePoint, GithubRelease> getPublishedAtAndReleaseFromJson(JSONObject releaseJson) {
171 255
final String name = releaseJson.get("name").toString();
172 256
final String publishedAtISO = releaseJson.get("published_at").toString();