Skip to content

Commit

Permalink
handle no content length
Browse files Browse the repository at this point in the history
  • Loading branch information
msbarry committed Jul 2, 2024
1 parent 07afc77 commit b6a4db3
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.OptionalLong;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
Expand Down Expand Up @@ -165,7 +166,7 @@ public void run() {

for (var toDownload : toDownloadList) {
try {
long size = toDownload.metadata.get(10, TimeUnit.SECONDS).size;
long size = toDownload.metadata.get(10, TimeUnit.SECONDS).size.orElse(0);
loggers.addStorageRatePercentCounter(toDownload.id, size, toDownload::bytesDownloaded, true);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
Expand All @@ -183,22 +184,29 @@ CompletableFuture<Void> downloadIfNecessary(ResourceToDownload resourceToDownloa
return CompletableFuture.runAsync(RunnableThatThrows.wrap(() -> {
LogUtil.setStage("download", resourceToDownload.id);
long existingSize = FileUtils.size(resourceToDownload.output);
var metadata = httpHeadFollowRedirects(resourceToDownload.url, 0);
Path tmpPath = resourceToDownload.tmpPath();
resourceToDownload.metadata.complete(metadata);
if (metadata.size == existingSize) {
LOGGER.info("Skipping {}: {} already up-to-date", resourceToDownload.id, resourceToDownload.output);
return;
try {
resourceToDownload.metadata.complete(httpHeadFollowRedirects(resourceToDownload.url, 0));
} catch (Exception e) {
resourceToDownload.metadata.completeExceptionally(e);
throw e;
}
Path tmpPath = resourceToDownload.tmpPath();
try {
var metadata = resourceToDownload.metadata.get();
if (metadata.size.orElse(-1) == existingSize) {
LOGGER.info("Skipping {}: {} already up-to-date", resourceToDownload.id, resourceToDownload.output);
return;
}
String redirectInfo = metadata.canonicalUrl.equals(resourceToDownload.url) ? "" :
" (redirected to " + metadata.canonicalUrl + ")";
LOGGER.info("Downloading {}{} to {}", resourceToDownload.url, redirectInfo, resourceToDownload.output);
FileUtils.delete(resourceToDownload.output);
FileUtils.createParentDirectories(resourceToDownload.output);
FileUtils.delete(tmpPath);
FileUtils.deleteOnExit(tmpPath);
diskSpaceCheck.addDisk(tmpPath, metadata.size, resourceToDownload.id);
if (metadata.size.isPresent()) {
diskSpaceCheck.addDisk(tmpPath, metadata.size.getAsLong(), resourceToDownload.id);
}
diskSpaceCheck.checkAgainstLimits(config.force(), false);
httpDownload(resourceToDownload, tmpPath);
Files.move(tmpPath, resourceToDownload.output);
Expand All @@ -225,7 +233,7 @@ ResourceMetadata httpHead(String url) throws IOException, InterruptedException {
responseInfo -> {
int status = responseInfo.statusCode();
Optional<String> location = Optional.empty();
long contentLength = 0;
OptionalLong contentLength = OptionalLong.empty();
HttpHeaders headers = responseInfo.headers();
if (status >= 300 && status < 400) {
location = responseInfo.headers().firstValue(LOCATION);
Expand All @@ -235,7 +243,7 @@ ResourceMetadata httpHead(String url) throws IOException, InterruptedException {
} else if (responseInfo.statusCode() != 200) {
throw new IllegalStateException("Bad response: " + responseInfo.statusCode());
} else {
contentLength = headers.firstValueAsLong(CONTENT_LENGTH).orElseThrow();
contentLength = headers.firstValueAsLong(CONTENT_LENGTH);
}
boolean supportsRangeRequest = headers.allValues(ACCEPT_RANGES).contains("bytes");
ResourceMetadata metadata = new ResourceMetadata(location, url, contentLength, supportsRangeRequest);
Expand All @@ -250,12 +258,14 @@ private void httpDownload(ResourceToDownload resource, Path tmpPath)
record Range(long start, long end) {}
List<Range> chunks = new ArrayList<>();
boolean ranges = metadata.acceptRange && config.downloadThreads() > 1;
long chunkSize = ranges ? chunkSizeBytes : metadata.size;
for (long start = 0; start < metadata.size; start += chunkSize) {
long end = Math.min(start + chunkSize, metadata.size);
boolean supportsRange = ranges && metadata.size.isPresent();

Check warning on line 261 in planetiler-core/src/main/java/com/onthegomap/planetiler/util/Downloader.java

View workflow job for this annotation

GitHub Actions / Analyze with Sonar

MINOR CODE_SMELL

Remove this unused "supportsRange" local variable. rule: java:S1481 (https://sonarcloud.io/organizations/onthegomap/rules?open=java%3AS1481&rule_key=java%3AS1481) issue url: https://sonarcloud.io/project/issues?pullRequest=944&open=AZBy18OGMWrXEO7YSwNW&id=onthegomap_planetiler
long fileSize = metadata.size.orElse(Long.MAX_VALUE);
long chunkSize = ranges ? chunkSizeBytes : fileSize;
for (long start = 0; start < fileSize; start += chunkSize) {
long end = Math.min(start + chunkSize, fileSize);
chunks.add(new Range(start, end));
}
FileUtils.setLength(tmpPath, metadata.size);
FileUtils.setLength(tmpPath, metadata.size.orElse(1));
Semaphore perFileLimiter = new Semaphore(config.downloadThreads());
Worker.joinFutures(chunks.stream().map(range -> CompletableFuture.runAsync(RunnableThatThrows.wrap(() -> {
LogUtil.setStage("download", resource.id);
Expand Down Expand Up @@ -299,7 +309,7 @@ private HttpRequest.Builder newHttpRequest(String url) {
.header(USER_AGENT, config.httpUserAgent());
}

record ResourceMetadata(Optional<String> redirect, String canonicalUrl, long size, boolean acceptRange) {}
record ResourceMetadata(Optional<String> redirect, String canonicalUrl, OptionalLong size, boolean acceptRange) {}

record ResourceToDownload(
String id, String url, Path output, CompletableFuture<ResourceMetadata> metadata,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.nio.file.Path;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalLong;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicLong;
Expand All @@ -25,7 +26,8 @@ class DownloaderTest {
private final PlanetilerConfig config = PlanetilerConfig.defaults();
private AtomicLong downloads = new AtomicLong(0);

private Downloader mockDownloader(Map<String, byte[]> resources, boolean supportsRange) {
private Downloader mockDownloader(Map<String, byte[]> resources, boolean supportsRange,
boolean supportsContentLength) {
return new Downloader(config, 2L) {

@Override
Expand Down Expand Up @@ -55,31 +57,38 @@ ResourceMetadata httpHead(String url) {
if (parts.length > 1) {
int redirectNum = Integer.parseInt(parts[1]);
String next = redirectNum <= 1 ? parts[0] : (parts[0] + "#" + (redirectNum - 1));
return new ResourceMetadata(Optional.of(next), url, 0, supportsRange);
return new ResourceMetadata(Optional.of(next), url,
supportsContentLength ? OptionalLong.of(0) : OptionalLong.empty(), supportsRange);
}
byte[] bytes = resources.get(url);
return new ResourceMetadata(Optional.empty(), url, bytes.length, supportsRange);
return new ResourceMetadata(Optional.empty(), url,
supportsContentLength ? OptionalLong.of(bytes.length) : OptionalLong.empty(), supportsRange);
}
};
}

@ParameterizedTest
@CsvSource({
"false,0",
"true,0",
"false,1",
"false,2",
"true,4",
"false,0,true",
"true,0,true",
"false,1,true",
"false,2,true",
"true,4,true",

"false,0,false",
"true,0,false",
"false,1,false",
"true,1,false",
})
void testDownload(boolean range, int redirects) throws Exception {
void testDownload(boolean range, int redirects, boolean supportsContentLength) throws Exception {
Path dest = path.resolve("out");
String string = "0123456789";
String url = "http://url";
String initialUrl = url + (redirects > 0 ? "#" + redirects : "");
Map<String, byte[]> resources = new ConcurrentHashMap<>();

byte[] bytes = string.getBytes(StandardCharsets.UTF_8);
Downloader downloader = mockDownloader(resources, range);
Downloader downloader = mockDownloader(resources, range, supportsContentLength);

// fails if no data
var resource1 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
Expand All @@ -96,13 +105,15 @@ void testDownload(boolean range, int redirects) throws Exception {
assertEquals(10, resource2.bytesDownloaded());

// does not re-request if size is the same
downloads.set(0);
var resource3 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
downloader.downloadIfNecessary(resource3).get();
assertEquals(0, downloads.get());
assertEquals(string, Files.readString(dest));
assertEquals(FileUtils.size(path), FileUtils.size(dest));
assertEquals(0, resource3.bytesDownloaded());
if (supportsContentLength) {
downloads.set(0);
var resource3 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
downloader.downloadIfNecessary(resource3).get();
assertEquals(0, downloads.get());
assertEquals(string, Files.readString(dest));
assertEquals(FileUtils.size(path), FileUtils.size(dest));
assertEquals(0, resource3.bytesDownloaded());
}

// does re-download if size changes
var resource4 = new Downloader.ResourceToDownload("resource", initialUrl, dest);
Expand Down Expand Up @@ -131,7 +142,7 @@ InputStream openStreamRange(String url, long start, long end) {

@Override
ResourceMetadata httpHead(String url) {
return new ResourceMetadata(Optional.empty(), url, Long.MAX_VALUE, true);
return new ResourceMetadata(Optional.empty(), url, OptionalLong.of(Long.MAX_VALUE), true);
}
};

Expand Down

0 comments on commit b6a4db3

Please sign in to comment.