diff --git a/aws-replicator/Makefile b/aws-replicator/Makefile index e0b920c..6b60fd2 100644 --- a/aws-replicator/Makefile +++ b/aws-replicator/Makefile @@ -49,7 +49,7 @@ enable: $(wildcard ./build/dist/localstack_extension_aws_replicator-*.tar.gz) # localstack extensions -v install file://$? publish: clean-dist venv dist - $(VENV_RUN); cd build; pip install --upgrade twine; twine upload dist/* + $(VENV_RUN); pip install --upgrade twine; twine upload dist/* clean-dist: clean rm -rf dist/ diff --git a/aws-replicator/README.md b/aws-replicator/README.md index 2ce6107..9d05512 100644 --- a/aws-replicator/README.md +++ b/aws-replicator/README.md @@ -152,6 +152,7 @@ localstack extensions install "git+https://github.com/localstack/localstack-exte ## Change Log +* `0.1.19`: Print human-readable message for invalid regexes in resource configs; fix logic for proxying S3 requests with host-based addressing * `0.1.18`: Update environment check to use SDK Docker client and enable starting the proxy from within Docker (e.g., from the LS main container as part of an init script) * `0.1.17`: Add basic support for ARN-based pattern-matching for `secretsmanager` resources * `0.1.16`: Update imports for localstack >=3.6 compatibility diff --git a/aws-replicator/aws_replicator/client/auth_proxy.py b/aws-replicator/aws_replicator/client/auth_proxy.py index e79fa2f..895b6e1 100644 --- a/aws-replicator/aws_replicator/client/auth_proxy.py +++ b/aws-replicator/aws_replicator/client/auth_proxy.py @@ -15,7 +15,7 @@ from localstack import config as localstack_config from localstack.aws.spec import load_service from localstack.config import external_service_url -from localstack.constants import AWS_REGION_US_EAST_1, DOCKER_IMAGE_NAME_PRO +from localstack.constants import AWS_REGION_US_EAST_1, DOCKER_IMAGE_NAME_PRO, LOCALHOST_HOSTNAME from localstack.http import Request from localstack.utils.aws.aws_responses import requests_response from localstack.utils.bootstrap import setup_logging @@ -32,6 +32,7 @@ from aws_replicator import config as repl_config from aws_replicator.client.utils import truncate_content from aws_replicator.config import HANDLER_PATH_PROXIES +from aws_replicator.shared.constants import HEADER_HOST_ORIGINAL from aws_replicator.shared.models import AddProxyRequest, ProxyConfig from .http2_server import run_server @@ -106,6 +107,7 @@ def proxy_request(self, request: Request, data: bytes) -> Response: # fix headers (e.g., "Host") and create client self._fix_headers(request, service_name) + self._fix_host_and_path(request, service_name) # create request and request dict operation_model, aws_request, request_dict = self._parse_aws_request( @@ -262,7 +264,7 @@ def _fix_headers(self, request: Request, service_name: str): host = request.headers.get("Host") or "" regex = r"^(https?://)?([0-9.]+|localhost)(:[0-9]+)?" if re.match(regex, host): - request.headers["Host"] = re.sub(regex, r"\1s3.localhost.localstack.cloud", host) + request.headers["Host"] = re.sub(regex, rf"\1s3.{LOCALHOST_HOSTNAME}", host) request.headers.pop("Content-Length", None) request.headers.pop("x-localstack-request-url", None) request.headers.pop("X-Forwarded-For", None) @@ -270,6 +272,16 @@ def _fix_headers(self, request: Request, service_name: str): request.headers.pop("X-Moto-Account-Id", None) request.headers.pop("Remote-Addr", None) + def _fix_host_and_path(self, request: Request, service_name: str): + if service_name == "s3": + # fix the path and prepend the bucket name, to avoid bucket addressing issues + host = request.headers.pop(HEADER_HOST_ORIGINAL, None) + host = host or request.headers.get("Host") or "" + match = re.match(rf"(.+)\.s3\.{LOCALHOST_HOSTNAME}", host) + if match: + # prepend the bucket name (extracted from the host) to the path of the request (path-based addressing) + request.path = f"/{match.group(1)}{request.path}" + def _extract_region_and_service(self, headers) -> Optional[Tuple[str, str]]: auth_header = headers.pop("Authorization", "") parts = auth_header.split("Credential=", maxsplit=1) diff --git a/aws-replicator/aws_replicator/server/aws_request_forwarder.py b/aws-replicator/aws_replicator/server/aws_request_forwarder.py index a9d1021..d6b1d08 100644 --- a/aws-replicator/aws_replicator/server/aws_request_forwarder.py +++ b/aws-replicator/aws_replicator/server/aws_request_forwarder.py @@ -22,6 +22,7 @@ except ImportError: from localstack.constants import TEST_AWS_ACCESS_KEY_ID +from aws_replicator.shared.constants import HEADER_HOST_ORIGINAL from aws_replicator.shared.models import ProxyInstance, ProxyServiceConfig LOG = logging.getLogger(__name__) @@ -98,33 +99,38 @@ def select_proxy(self, context: RequestContext) -> Optional[ProxyInstance]: def _request_matches_resource( self, context: RequestContext, resource_name_pattern: str ) -> bool: - service_name = self._get_canonical_service_name(context.service.service_name) - if service_name == "s3": - bucket_name = context.service_request.get("Bucket") or "" - s3_bucket_arn = arns.s3_bucket_arn(bucket_name) - return bool(re.match(resource_name_pattern, s3_bucket_arn)) - if service_name == "sqs": - queue_name = context.service_request.get("QueueName") or "" - queue_url = context.service_request.get("QueueUrl") or "" - queue_name = queue_name or queue_url.split("/")[-1] - candidates = ( - queue_name, - queue_url, - sqs_queue_arn( - queue_name, account_id=context.account_id, region_name=context.region - ), - ) - for candidate in candidates: - if re.match(resource_name_pattern, candidate): - return True - return False - if service_name == "secretsmanager": - secret_id = context.service_request.get("SecretId") or "" - secret_arn = secretsmanager_secret_arn( - secret_id, account_id=context.account_id, region_name=context.region - ) - return bool(re.match(resource_name_pattern, secret_arn)) - # TODO: add more resource patterns + try: + service_name = self._get_canonical_service_name(context.service.service_name) + if service_name == "s3": + bucket_name = context.service_request.get("Bucket") or "" + s3_bucket_arn = arns.s3_bucket_arn(bucket_name) + return bool(re.match(resource_name_pattern, s3_bucket_arn)) + if service_name == "sqs": + queue_name = context.service_request.get("QueueName") or "" + queue_url = context.service_request.get("QueueUrl") or "" + queue_name = queue_name or queue_url.split("/")[-1] + candidates = ( + queue_name, + queue_url, + sqs_queue_arn( + queue_name, account_id=context.account_id, region_name=context.region + ), + ) + for candidate in candidates: + if re.match(resource_name_pattern, candidate): + return True + return False + if service_name == "secretsmanager": + secret_id = context.service_request.get("SecretId") or "" + secret_arn = secretsmanager_secret_arn( + secret_id, account_id=context.account_id, region_name=context.region + ) + return bool(re.match(resource_name_pattern, secret_arn)) + # TODO: add more resource patterns + except re.error as e: + raise Exception( + "Error evaluating regular expression - please verify proxy configuration" + ) from e return True def forward_request(self, context: RequestContext, proxy: ProxyInstance) -> requests.Response: @@ -140,7 +146,7 @@ def forward_request(self, context: RequestContext, proxy: ProxyInstance) -> requ result = None try: - headers.pop("Host", None) + headers[HEADER_HOST_ORIGINAL] = headers.pop("Host", None) headers.pop("Content-Length", None) ctype = headers.get("Content-Type") data = b"" diff --git a/aws-replicator/aws_replicator/shared/constants.py b/aws-replicator/aws_replicator/shared/constants.py new file mode 100644 index 0000000..6270a66 --- /dev/null +++ b/aws-replicator/aws_replicator/shared/constants.py @@ -0,0 +1,2 @@ +# header name for the original request host name forwarded in the request to the target proxy handler +HEADER_HOST_ORIGINAL = "x-ls-host-original" diff --git a/aws-replicator/setup.cfg b/aws-replicator/setup.cfg index fd4d9ee..d7a0546 100644 --- a/aws-replicator/setup.cfg +++ b/aws-replicator/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = localstack-extension-aws-replicator -version = 0.1.18 +version = 0.1.19 summary = LocalStack Extension: AWS replicator description = Replicate AWS resources into your LocalStack instance long_description = file: README.md diff --git a/aws-replicator/tests/test_proxy_requests.py b/aws-replicator/tests/test_proxy_requests.py index f96e9ee..8fdfe34 100644 --- a/aws-replicator/tests/test_proxy_requests.py +++ b/aws-replicator/tests/test_proxy_requests.py @@ -4,6 +4,7 @@ import boto3 import pytest +from botocore.client import Config from botocore.exceptions import ClientError from localstack.aws.connect import connect_to from localstack.utils.aws.arns import sqs_queue_arn, sqs_queue_url_for_arn @@ -40,13 +41,20 @@ def _start(config: dict = None): @pytest.mark.parametrize("metadata_gzip", [True, False]) -def test_s3_requests(start_aws_proxy, s3_create_bucket, metadata_gzip): +@pytest.mark.parametrize("host_addressing", [True, False]) +def test_s3_requests(start_aws_proxy, s3_create_bucket, metadata_gzip, host_addressing): # start proxy config = ProxyConfig(services={"s3": {"resources": ".*"}}, bind_host=PROXY_BIND_HOST) start_aws_proxy(config) # create clients - s3_client = connect_to().s3 + if host_addressing: + s3_client = connect_to( + endpoint_url="http://s3.localhost.localstack.cloud:4566", + config=Config(s3={"addressing_style": "virtual"}), + ).s3 + else: + s3_client = connect_to().s3 s3_client_aws = boto3.client("s3") # list buckets to assert that proxy is up and running