diff --git a/lib/meadow/pipeline/actions/extract_exif_metadata.ex b/lib/meadow/pipeline/actions/extract_exif_metadata.ex index 63202ea6d..073804629 100644 --- a/lib/meadow/pipeline/actions/extract_exif_metadata.ex +++ b/lib/meadow/pipeline/actions/extract_exif_metadata.ex @@ -17,7 +17,7 @@ defmodule Meadow.Pipeline.Actions.ExtractExifMetadata do require Logger @actiondoc "Extract EXIF metadata from FileSet" - @timeout 240_000 + @timeout 10_000 defp already_complete?(file_set, _) do with existing_exif <- diff --git a/priv/nodejs/exif/exif.js b/priv/nodejs/exif/exif.js index 963c09af7..881855e73 100644 --- a/priv/nodejs/exif/exif.js +++ b/priv/nodejs/exif/exif.js @@ -1,14 +1,32 @@ const AWS = require("aws-sdk"); const exifr = require("exifr"); const URI = require("uri-js"); +const s3 = new AWS.S3(); AWS.config.update({httpOptions: {timeout: 600000}}); +const chunkReader = (input, offset, length) => { + return new Promise((resolve, _reject) => { + let params = {...input}; + + if (typeof offset === 'number') { + let end = length ? offset + length - 1 : undefined; + params.Range = `bytes=${[offset, end].join('-')}`; + } + + s3.getObject(params, (err, data) => { + if (err) { + console.error(err); + resolve(undefined); + } else { + resolve(data.Body); + } + }); + }); +} + const extractExif = (source, options) => { return new Promise((resolve, reject) => { - const s3 = new AWS.S3(); - const uri = URI.parse(source); - console.log(`Retrieving ${source}`); const defaultOptions = { @@ -58,24 +76,20 @@ const extractExif = (source, options) => { xmp: true, interop: true, chunkSize: 1024 * 1024, + externalReader: chunkReader }; - s3.getObject({ Bucket: uri.host, Key: getS3Key(uri) }, (error, response) => { - if(error) { - reject(error) - } else { - console.log(`Extracting EXIF metadata from ${source}`); - options = Object.assign(options || defaultOptions, forcedOptions); - exifr.parse(response.Body, options) - .then(exif => resolve(exif)) - .catch(err => reject(err)); - } - }) + const uri = URI.parse(source); + const s3Location = { + Bucket: uri.host, + Key: uri.path.replace(/^\/+/, "") + }; + + options = Object.assign(options || defaultOptions, forcedOptions); + exifr.parse(s3Location, options) + .then(exif => resolve(exif)) + .catch(err => reject(err)); }); } -const getS3Key = (uri) => { - return uri.path.replace(/^\/+/, ""); -}; - module.exports = { extractExif }; diff --git a/priv/nodejs/exif/package.json b/priv/nodejs/exif/package.json index 6c12c89a0..10f0a48f9 100644 --- a/priv/nodejs/exif/package.json +++ b/priv/nodejs/exif/package.json @@ -6,7 +6,7 @@ "author": "bmquinn", "license": "Apache-2.0", "dependencies": { - "exifr": "^6.0.0", + "exifr": "nulib/exifr#external-reader-dist", "uri-js": "^4.4.1" }, "devDependencies": { diff --git a/priv/nodejs/exif/yarn.lock b/priv/nodejs/exif/yarn.lock index 51df39be1..68a9b5502 100644 --- a/priv/nodejs/exif/yarn.lock +++ b/priv/nodejs/exif/yarn.lock @@ -36,10 +36,9 @@ events@1.1.1: resolved "https://registry.yarnpkg.com/events/-/events-1.1.1.tgz#9ebdb7635ad099c70dcc4c2a1f5004288e8bd924" integrity sha1-nr23Y1rQmccNzEwqH1AEKI6L2SQ= -exifr@^6.0.0: - version "6.0.0" - resolved "https://registry.yarnpkg.com/exifr/-/exifr-6.0.0.tgz#e82af10e158852a1c7e19aea45bceb4cdd486727" - integrity sha512-a8n3SVIyuI5NP5VJCb/rJHsqXnofgYL1ZXcJdKBXOmCNIrj+pSExaBFHcbdEF5xp5GQrK4kpOabLJ+wBfUGYuA== +exifr@nulib/exifr#external-reader-dist: + version "6.1.1" + resolved "https://codeload.github.com/nulib/exifr/tar.gz/3d926474bc2e629f6352d668c0e5596dc8d3f874" ieee754@1.1.13: version "1.1.13" diff --git a/terraform/lambdas.tf b/terraform/lambdas.tf index c47d822d3..d4816e3a2 100644 --- a/terraform/lambdas.tf +++ b/terraform/lambdas.tf @@ -75,12 +75,8 @@ module "exif_function" { description = "Function to extract EXIF metadata from an S3 object" role = aws_iam_role.lambda_role.arn stack_name = var.stack_name - memory_size = 8192 - timeout = 240 - - environment = { - NODE_OPTIONS = "--max-old-space-size=8192" - } + memory_size = 512 + timeout = 10 tags = merge( var.tags,