Skip to content

Commit

Permalink
fix logic to proxy S3 requests against *amazonaws.com
Browse files Browse the repository at this point in the history
  • Loading branch information
whummer committed Oct 14, 2024
1 parent c11b6a9 commit f8a6f76
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 7 deletions.
2 changes: 1 addition & 1 deletion aws-replicator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ install: venv
$(VENV_RUN); $(PIP_CMD) install -e ".[test]"

test: venv
$(VENV_RUN); python -m pytest $(TEST_PATH)
$(VENV_RUN); python -m pytest $(PYTEST_ARGS) $(TEST_PATH)

dist: venv
$(VENV_RUN); python setup.py sdist bdist_wheel
Expand Down
3 changes: 2 additions & 1 deletion aws-replicator/aws_replicator/client/auth_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,10 @@ def _fix_headers(self, request: Request, service_name: str):
def _fix_host_and_path(self, request: Request, service_name: str):
if service_name == "s3":
# fix the path and prepend the bucket name, to avoid bucket addressing issues
regex_base_domain = rf"((amazonaws\.com)|({LOCALHOST_HOSTNAME}))"
host = request.headers.pop(HEADER_HOST_ORIGINAL, None)
host = host or request.headers.get("Host") or ""
match = re.match(rf"(.+)\.s3\.{LOCALHOST_HOSTNAME}", host)
match = re.match(rf"(.+)\.s3\.{regex_base_domain}", host)
if match:
# prepend the bucket name (extracted from the host) to the path of the request (path-based addressing)
request.path = f"/{match.group(1)}{request.path}"
Expand Down
25 changes: 20 additions & 5 deletions aws-replicator/tests/test_proxy_requests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Note: these tests depend on the extension being installed and actual AWS credentials being configured, such
# that the proxy can be started within the tests. They are designed to be mostly run in CI at this point.
import gzip
import re
from urllib.parse import urlparse

import boto3
import pytest
Expand Down Expand Up @@ -41,20 +43,33 @@ def _start(config: dict = None):


@pytest.mark.parametrize("metadata_gzip", [True, False])
@pytest.mark.parametrize("host_addressing", [True, False])
def test_s3_requests(start_aws_proxy, s3_create_bucket, metadata_gzip, host_addressing):
@pytest.mark.parametrize("target_endpoint", ["local_domain", "aws_domain", "default"])
def test_s3_requests(start_aws_proxy, s3_create_bucket, metadata_gzip, target_endpoint):
# start proxy
config = ProxyConfig(services={"s3": {"resources": ".*"}}, bind_host=PROXY_BIND_HOST)
start_aws_proxy(config)

# create clients
if host_addressing:
if target_endpoint == "default":
s3_client = connect_to().s3
else:
s3_client = connect_to(
endpoint_url="http://s3.localhost.localstack.cloud:4566",
config=Config(s3={"addressing_style": "virtual"}),
).s3
else:
s3_client = connect_to().s3

if target_endpoint == "aws_domain":

def _add_header(request, **kwargs):
# instrument boto3 client to add custom `Host` header, mimicking a `*.s3.amazonaws.com` request
url = urlparse(request.url)
match = re.match(r"(.+)\.s3\.localhost\.localstack\.cloud", url.netloc)
if match:
request.headers.add_header("host", f"{match.group(1)}.s3.amazonaws.com")

s3_client.meta.events.register_first("before-sign.*.*", _add_header)

# define S3 client pointing to real AWS
s3_client_aws = boto3.client("s3")

# list buckets to assert that proxy is up and running
Expand Down

0 comments on commit f8a6f76

Please sign in to comment.