fabb77680a8956d8b8a9753d7679ecd67ed85c38
java/com.sap.sse.landscape/src/com/sap/sse/landscape/impl/GithubReleasesRepository.java
| ... | ... | @@ -8,8 +8,7 @@ import java.net.URL; |
| 8 | 8 | import java.net.URLConnection; |
| 9 | 9 | import java.text.SimpleDateFormat; |
| 10 | 10 | import java.util.Iterator; |
| 11 | -import java.util.LinkedList; |
|
| 12 | -import java.util.List; |
|
| 11 | +import java.util.NoSuchElementException; |
|
| 13 | 12 | import java.util.TreeMap; |
| 14 | 13 | import java.util.logging.Logger; |
| 15 | 14 | import java.util.regex.Matcher; |
| ... | ... | @@ -31,7 +30,11 @@ import com.sap.sse.util.HttpUrlConnectionHelper; |
| 31 | 30 | * <code>https://github.com/{owner}/{repo}/releases/download/{release-name}</code>. The GitHub |
| 32 | 31 | * {@code /releases} end point delivers the releases in descending chronological order, so |
| 33 | 32 | * newest releases first. With this, we can cache old results and try to get along with the |
| 34 | - * harsh rate limit of only 60 requests per hour when used without authentication. |
|
| 33 | + * harsh rate limit of only 60 requests per hour when used without authentication.<p> |
|
| 34 | + * |
|
| 35 | + * TODO Concurrency Control! What, if multiple requests or iterations are run on this repository object concurrently?<p> |
|
| 36 | + * |
|
| 37 | + * TODO implement a cool-down period, e.g., one minute, during which the first releases page is loaded only once<p> |
|
| 35 | 38 | * |
| 36 | 39 | * @author Axel Uhl (d043530) |
| 37 | 40 | */ |
| ... | ... | @@ -42,13 +45,35 @@ public class GithubReleasesRepository extends AbstractReleaseRepository implemen |
| 42 | 45 | private final static String GITHUB_BASE_URL = "https://github.com"; |
| 43 | 46 | private final String owner; |
| 44 | 47 | private final String repositoryName; |
| 48 | + |
|
| 49 | + /** |
|
| 50 | + * The cache of releases as loaded from the GitHub web site. The cache is filled when iterating using a |
|
| 51 | + * {@link ReleaseIterator}, by loading paginated release records, converting them to {@link GithubRelease} objects |
|
| 52 | + * and storing them in this cache. |
|
| 53 | + * <p> |
|
| 54 | + * |
|
| 55 | + * The cache does not guarantee to contain the newest releases, nor does it guarantee to go back all the way to the |
|
| 56 | + * oldest release. Its contents are contiguous in the sense of how the releases are returned by the GitHub API in |
|
| 57 | + * descending order of publication, from new to old. In other words, if there is a release cached that was published |
|
| 58 | + * at time point {@code t1} and another at a later time point {@code t2}, then the cache is guaranteed to contain |
|
| 59 | + * all releases published in the time range {@code [t1:t2]} (inclusive). |
|
| 60 | + * <p> |
|
| 61 | + * |
|
| 62 | + * Should a {@link ReleaseIterator} have enumerated all releases back to the oldest one, the |
|
| 63 | + * {@link #cacheContainsOldestRelease} flag will be set to {@code true} which means that when an iteration has |
|
| 64 | + * reached the oldest release in the cache, iteration is complete, and no further page loading is necessary |
|
| 65 | + * to complete the iteration. |
|
| 66 | + */ |
|
| 45 | 67 | private final TreeMap<TimePoint, Release> releasesByPublishingTimePoint; |
| 46 | 68 | |
| 69 | + private boolean cacheContainsOldestRelease; |
|
| 70 | + |
|
| 47 | 71 | public GithubReleasesRepository(String owner, String repositoryName, String defaultReleaseNamePrefix) { |
| 48 | 72 | super(defaultReleaseNamePrefix); |
| 49 | 73 | this.owner = owner; |
| 50 | 74 | this.repositoryName = repositoryName; |
| 51 | 75 | this.releasesByPublishingTimePoint = new TreeMap<>(); |
| 76 | + this.cacheContainsOldestRelease = false; |
|
| 52 | 77 | } |
| 53 | 78 | |
| 54 | 79 | private String getRepositoryPath() { |
| ... | ... | @@ -66,57 +91,154 @@ public class GithubReleasesRepository extends AbstractReleaseRepository implemen |
| 66 | 91 | } |
| 67 | 92 | |
| 68 | 93 | /** |
| 69 | - * Always fetches the first page from the {@code /releases} end point and starts constructing releases, until a |
|
| 70 | - * publishing time point overlap with {@link GithubReleasesRepository#releasesByPublishingTimePoint} is found. Then |
|
| 71 | - * we know we can continue to enumerate the remaining releases from that cache. |
|
| 94 | + * Always fetches the first page from the {@code /releases} end point and starts constructing and |
|
| 95 | + * {@link GithubReleasesRepository#releasesByPublishingTimePoint caching} releases, until a publishing time point |
|
| 96 | + * overlap with {@link GithubReleasesRepository#releasesByPublishingTimePoint} is found. Iteration then starts from |
|
| 97 | + * that cache. If the iterator has returned all elements from the cache going backwards in publishing history, and |
|
| 98 | + * {@link GithubReleasesRepository#cacheContainsOldestRelease} is {@code false}, indicating that the cache does not |
|
| 99 | + * go back to the "beginning of time," and still more elements are requested from this iterator, paginated release |
|
| 100 | + * documents need to get loaded again until we find even older releases than the oldest one from the cache. The |
|
| 101 | + * loaded elements will be added to the cache, and a new internal iterator is launched on the cache starting from |
|
| 102 | + * the then loaded element. |
|
| 72 | 103 | * <p> |
| 73 | 104 | * |
| 74 | 105 | * All releases found by loading a page are added to the |
| 75 | - * {@link GithubReleasesRepository#releasesByPublishingTimePoint} cache. |
|
| 106 | + * {@link GithubReleasesRepository#releasesByPublishingTimePoint} cache. If the page with the oldest sequence of |
|
| 107 | + * releases has been loaded (there is no next page then anymore), the |
|
| 108 | + * {@link GithubReleasesRepository#cacheContainsOldestRelease} flag is set to {@code true}. |
|
| 76 | 109 | * |
| 77 | 110 | * @author Axel Uhl (d043530) |
| 78 | 111 | * |
| 79 | 112 | */ |
| 80 | 113 | private class ReleaseIterator implements Iterator<Release> { |
| 114 | + /** |
|
| 115 | + * Initialized to the URL for loading the first page of releases; each call to |
|
| 116 | + * {@link #loadNextPage(TimePoint)} changes this to the next page, or {@code null} |
|
| 117 | + * if the last page was loaded. |
|
| 118 | + */ |
|
| 81 | 119 | private String nextPageURL; |
| 82 | - private Iterator<Pair<TimePoint, GithubRelease>> publishingTimePointsAndReleasesFromCurrentPageIterator; |
|
| 120 | + |
|
| 121 | + /** |
|
| 122 | + * Takes precedence if not {@code null} and still having elements; enumerates the cached releases, starting from |
|
| 123 | + * the newest (last in the cache) to the oldest (first in the cache). When fully consumed, page loading has to |
|
| 124 | + * continue until releases published earlier than the oldest one from the |
|
| 125 | + * {@link GithubReleasesRepository#releasesByPublishingTimePoint cache} are found. |
|
| 126 | + */ |
|
| 127 | + private Iterator<Release> cachedReleasesIterator; |
|
| 83 | 128 | |
| 84 | 129 | private ReleaseIterator() throws MalformedURLException, IOException, ParseException { |
| 85 | 130 | nextPageURL = getReleasesURL(); |
| 86 | - loadNextPage(); |
|
| 131 | + cachedReleasesIterator = null; |
|
| 132 | + while (nextPageURL != null && cachedReleasesIterator == null) { |
|
| 133 | + loadNextPage(/* olderThan */ null); |
|
| 134 | + } |
|
| 87 | 135 | } |
| 88 | 136 | |
| 89 | - private void loadNextPage() throws MalformedURLException, IOException, ParseException { |
|
| 90 | - final List<Pair<TimePoint, GithubRelease>> result = new LinkedList<>(); |
|
| 137 | + /** |
|
| 138 | + * Loads the page of releases referenced by {@link #nextPageURL}. |
|
| 139 | + * <p> |
|
| 140 | + * |
|
| 141 | + * If {@code olderThan} is {@code null}, only the releases newer than the newest entry in the cache are loaded |
|
| 142 | + * into the cache, and {@link #cachedReleasesIterator} is set to the newest element in the cache if and only if |
|
| 143 | + * the cache was empty when this method was called, or the page contained a release not newer than the newest |
|
| 144 | + * release in the cache. This also means that if with {@code olderThan==null} the |
|
| 145 | + * {@link #cachedReleasesIterator} is {@code null} after this method returns, one or more calls will be required |
|
| 146 | + * to create an "overlap" with the cache before starting the iteration. This is required because we guarantee |
|
| 147 | + * the cache to be "contiguous" in terms of the releases that exist. |
|
| 148 | + * <p> |
|
| 149 | + * |
|
| 150 | + * If {@code olderThan} is not {@code null}, only releases published before {@code olderThan} are added to the |
|
| 151 | + * cache, and {@link #cachedReleasesIterator} is set to the newest element added to the cache, or set to |
|
| 152 | + * {@code null} if no release was added to the cache by this call. |
|
| 153 | + * <p> |
|
| 154 | + * Precondition: {@link #nextPageURL} is not {@code null}. |
|
| 155 | + * <p> |
|
| 156 | + * Postcondition: {@link GithubReleasesRepository#cacheContainsOldestRelease} is {@code true} if and only if |
|
| 157 | + * this invocation has loaded the last page of releases that exist |
|
| 158 | + * |
|
| 159 | + * @param olderThan |
|
| 160 | + * if {@code null}, releases newer than the newest release from the cache will be added to the cache, |
|
| 161 | + * and the {@link #cachedReleasesIterator} will be set to the then newest cache element; if not |
|
| 162 | + * {@code null}, only releases published before {@code olderThan} will be loaded, and |
|
| 163 | + * {@link #cachedReleasesIterator} is then set to the newest of the older releases loaded, if any, or |
|
| 164 | + * to {@code null} if no releases older than {@code olderThan} were found during this invocation. |
|
| 165 | + */ |
|
| 166 | + private void loadNextPage(TimePoint olderThan) throws MalformedURLException, IOException, ParseException { |
|
| 167 | + cachedReleasesIterator = null; |
|
| 91 | 168 | final URLConnection connection = HttpUrlConnectionHelper.redirectConnection(new URL(nextPageURL)); |
| 92 | 169 | final InputStream index = (InputStream) connection.getContent(); |
| 93 | 170 | final String linkHeader = connection.getHeaderField("link"); |
| 94 | 171 | nextPageURL = getNextPageURL(linkHeader); |
| 172 | + cacheContainsOldestRelease = cacheContainsOldestRelease || nextPageURL == null; // in this case we have seen and cached the last (oldest) page of releases |
|
| 95 | 173 | final JSONArray releasesJson = (JSONArray) new JSONParser().parse(new InputStreamReader(index)); |
| 174 | + boolean addedAtLeastOneReleaseToCache = false; |
|
| 175 | + final boolean cacheWasEmpty = releasesByPublishingTimePoint.isEmpty(); |
|
| 96 | 176 | for (final Object releaseObject : releasesJson) { |
| 97 | 177 | final Pair<TimePoint, GithubRelease> publishedAtAndRelease = getPublishedAtAndReleaseFromJson((JSONObject) releaseObject); |
| 98 | - releasesByPublishingTimePoint.put(publishedAtAndRelease.getA(), publishedAtAndRelease.getB()); |
|
| 99 | - result.add(publishedAtAndRelease); |
|
| 178 | + if (olderThan == null) { // looking for releases published after the newest cache entry |
|
| 179 | + if (cacheWasEmpty || publishedAtAndRelease.getA().after(releasesByPublishingTimePoint.lastKey())) { |
|
| 180 | + addedAtLeastOneReleaseToCache = true; |
|
| 181 | + releasesByPublishingTimePoint.put(publishedAtAndRelease.getA(), publishedAtAndRelease.getB()); |
|
| 182 | + } else { |
|
| 183 | + cachedReleasesIterator = releasesByPublishingTimePoint.descendingMap().values().iterator(); |
|
| 184 | + } |
|
| 185 | + } else { // looking for releases published before olderThan |
|
| 186 | + if (publishedAtAndRelease.getA().before(olderThan)) { |
|
| 187 | + addedAtLeastOneReleaseToCache = true; |
|
| 188 | + releasesByPublishingTimePoint.put(publishedAtAndRelease.getA(), publishedAtAndRelease.getB()); |
|
| 189 | + } |
|
| 190 | + } |
|
| 191 | + } |
|
| 192 | + if (olderThan == null) { |
|
| 193 | + if (cacheWasEmpty) { |
|
| 194 | + cachedReleasesIterator = releasesByPublishingTimePoint.descendingMap().values().iterator(); |
|
| 195 | + } |
|
| 196 | + } else { |
|
| 197 | + if (addedAtLeastOneReleaseToCache) { |
|
| 198 | + cachedReleasesIterator = releasesByPublishingTimePoint.descendingMap().tailMap(olderThan, /* inclusive */ false).values().iterator(); |
|
| 199 | + } |
|
| 100 | 200 | } |
| 101 | - publishingTimePointsAndReleasesFromCurrentPageIterator = result.iterator(); |
|
| 102 | 201 | } |
| 103 | 202 | |
| 104 | 203 | @Override |
| 105 | 204 | public boolean hasNext() { |
| 106 | - return publishingTimePointsAndReleasesFromCurrentPageIterator.hasNext() || nextPageURL != null; |
|
| 205 | + // - we're delivering from the cache and the cache has more elements, or |
|
| 206 | + // - we've reached the end of the cache but the cache doesn't contain the oldest release and we can load more pages |
|
| 207 | + return cachedReleasesIterator != null && cachedReleasesIterator.hasNext() |
|
| 208 | + || !cacheContainsOldestRelease && nextPageURL != null; |
|
| 107 | 209 | } |
| 108 | 210 | |
| 109 | 211 | @Override |
| 110 | 212 | public Release next() { |
| 111 | - if (!publishingTimePointsAndReleasesFromCurrentPageIterator.hasNext()) { |
|
| 112 | - try { |
|
| 113 | - // FIXME bug6173: only load next page if we have to... we may already have created an overlap with the cache from releasesByPublishingTimePoint |
|
| 114 | - loadNextPage(); |
|
| 115 | - } catch (IOException | ParseException e) { |
|
| 116 | - throw new RuntimeException(e); |
|
| 213 | + final Release result; |
|
| 214 | + if (cachedReleasesIterator != null && cachedReleasesIterator.hasNext()) { |
|
| 215 | + result = getNextElementFromCacheIterator(); |
|
| 216 | + } else if (cacheContainsOldestRelease) { |
|
| 217 | + throw new NoSuchElementException(); |
|
| 218 | + } else { |
|
| 219 | + while (nextPageURL != null && cachedReleasesIterator != null) { |
|
| 220 | + try { |
|
| 221 | + loadNextPage(/* olderThan */ releasesByPublishingTimePoint.firstKey()); |
|
| 222 | + } catch (IOException | ParseException e) { |
|
| 223 | + throw new RuntimeException(e); |
|
| 224 | + } |
|
| 225 | + } |
|
| 226 | + if (cachedReleasesIterator == null || !cachedReleasesIterator.hasNext()) { |
|
| 227 | + throw new NoSuchElementException(); |
|
| 228 | + } else { |
|
| 229 | + result = getNextElementFromCacheIterator(); |
|
| 117 | 230 | } |
| 118 | 231 | } |
| 119 | - return publishingTimePointsAndReleasesFromCurrentPageIterator.next().getB(); |
|
| 232 | + return result; |
|
| 233 | + } |
|
| 234 | + |
|
| 235 | + private Release getNextElementFromCacheIterator() { |
|
| 236 | + final Release result; |
|
| 237 | + result = cachedReleasesIterator.next(); |
|
| 238 | + if (!cachedReleasesIterator.hasNext()) { |
|
| 239 | + cachedReleasesIterator = null; |
|
| 240 | + } |
|
| 241 | + return result; |
|
| 120 | 242 | } |
| 121 | 243 | } |
| 122 | 244 | |
| ... | ... | @@ -129,44 +251,6 @@ public class GithubReleasesRepository extends AbstractReleaseRepository implemen |
| 129 | 251 | } |
| 130 | 252 | } |
| 131 | 253 | |
| 132 | - @Override |
|
| 133 | - public Release getLatestRelease(String releaseNamePrefix) { |
|
| 134 | - // TODO Auto-generated method stub |
|
| 135 | - return super.getLatestRelease(releaseNamePrefix); |
|
| 136 | - } |
|
| 137 | - |
|
| 138 | - /** |
|
| 139 | - * Enumerating all releases of the GitHub repo is possible but goes against the harsh rate limit when used without |
|
| 140 | - * an access token (currently only 60 requests per hour), so should ideally be avoided altogether. And if it is ever called, |
|
| 141 | - * we will cache the results, so that for later requests we typically need to query only a single page, delivering the latest |
|
| 142 | - * additions, if any. |
|
| 143 | - */ |
|
| 144 | - private Iterable<Release> getAvailableReleases() { |
|
| 145 | - final List<Release> result = new LinkedList<>(); |
|
| 146 | - try { |
|
| 147 | - String nextPageURL = getReleasesURL(); |
|
| 148 | - do { |
|
| 149 | - final URLConnection connection = HttpUrlConnectionHelper.redirectConnection(new URL(nextPageURL)); |
|
| 150 | - final InputStream index = (InputStream) connection.getContent(); |
|
| 151 | - final String linkHeader = connection.getHeaderField("link"); |
|
| 152 | - final JSONArray releasesJson = (JSONArray) new JSONParser().parse(new InputStreamReader(index)); |
|
| 153 | - addAllReleasesTo(releasesJson, result); |
|
| 154 | - nextPageURL = getNextPageURL(linkHeader); |
|
| 155 | - } while (nextPageURL != null); |
|
| 156 | - } catch (IOException | ParseException e) { |
|
| 157 | - logger.warning("Exception trying to find releases: "+e.getMessage()); |
|
| 158 | - } |
|
| 159 | - return result; |
|
| 160 | - } |
|
| 161 | - |
|
| 162 | - private void addAllReleasesTo(JSONArray releasesJson, List<Release> result) { |
|
| 163 | - for (final Object releaseObject : releasesJson) { |
|
| 164 | - final Pair<TimePoint, GithubRelease> publishedAtAndRelease = getPublishedAtAndReleaseFromJson((JSONObject) releaseObject); |
|
| 165 | - result.add(publishedAtAndRelease.getB()); |
|
| 166 | - releasesByPublishingTimePoint.put(publishedAtAndRelease.getA(), publishedAtAndRelease.getB()); |
|
| 167 | - } |
|
| 168 | - } |
|
| 169 | - |
|
| 170 | 254 | private Pair<TimePoint, GithubRelease> getPublishedAtAndReleaseFromJson(JSONObject releaseJson) { |
| 171 | 255 | final String name = releaseJson.get("name").toString(); |
| 172 | 256 | final String publishedAtISO = releaseJson.get("published_at").toString(); |