Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Catch regex config errors in AWS proxy; fix logic for proxying S3 requests with host-based addressing #80

Merged
merged 5 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aws-replicator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ enable: $(wildcard ./build/dist/localstack_extension_aws_replicator-*.tar.gz) #
localstack extensions -v install file://$?

publish: clean-dist venv dist
$(VENV_RUN); cd build; pip install --upgrade twine; twine upload dist/*
$(VENV_RUN); pip install --upgrade twine; twine upload dist/*

clean-dist: clean
rm -rf dist/
Expand Down
1 change: 1 addition & 0 deletions aws-replicator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ localstack extensions install "git+https://github.com/localstack/localstack-exte

## Change Log

* `0.1.19`: Print human-readable message for invalid regexes in resource configs; fix logic for proxying S3 requests with host-based addressing
* `0.1.18`: Update environment check to use SDK Docker client and enable starting the proxy from within Docker (e.g., from the LS main container as part of an init script)
* `0.1.17`: Add basic support for ARN-based pattern-matching for `secretsmanager` resources
* `0.1.16`: Update imports for localstack >=3.6 compatibility
Expand Down
16 changes: 14 additions & 2 deletions aws-replicator/aws_replicator/client/auth_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from localstack import config as localstack_config
from localstack.aws.spec import load_service
from localstack.config import external_service_url
from localstack.constants import AWS_REGION_US_EAST_1, DOCKER_IMAGE_NAME_PRO
from localstack.constants import AWS_REGION_US_EAST_1, DOCKER_IMAGE_NAME_PRO, LOCALHOST_HOSTNAME
from localstack.http import Request
from localstack.utils.aws.aws_responses import requests_response
from localstack.utils.bootstrap import setup_logging
Expand All @@ -32,6 +32,7 @@
from aws_replicator import config as repl_config
from aws_replicator.client.utils import truncate_content
from aws_replicator.config import HANDLER_PATH_PROXIES
from aws_replicator.shared.constants import HEADER_HOST_ORIGINAL
from aws_replicator.shared.models import AddProxyRequest, ProxyConfig

from .http2_server import run_server
Expand Down Expand Up @@ -106,6 +107,7 @@ def proxy_request(self, request: Request, data: bytes) -> Response:

# fix headers (e.g., "Host") and create client
self._fix_headers(request, service_name)
self._fix_host_and_path(request, service_name)

# create request and request dict
operation_model, aws_request, request_dict = self._parse_aws_request(
Expand Down Expand Up @@ -262,14 +264,24 @@ def _fix_headers(self, request: Request, service_name: str):
host = request.headers.get("Host") or ""
regex = r"^(https?://)?([0-9.]+|localhost)(:[0-9]+)?"
if re.match(regex, host):
request.headers["Host"] = re.sub(regex, r"\1s3.localhost.localstack.cloud", host)
request.headers["Host"] = re.sub(regex, rf"\1s3.{LOCALHOST_HOSTNAME}", host)
request.headers.pop("Content-Length", None)
request.headers.pop("x-localstack-request-url", None)
request.headers.pop("X-Forwarded-For", None)
request.headers.pop("X-Localstack-Tgt-Api", None)
request.headers.pop("X-Moto-Account-Id", None)
request.headers.pop("Remote-Addr", None)

def _fix_host_and_path(self, request: Request, service_name: str):
if service_name == "s3":
# fix the path and prepend the bucket name, to avoid bucket addressing issues
host = request.headers.pop(HEADER_HOST_ORIGINAL, None)
host = host or request.headers.get("Host") or ""
match = re.match(rf"(.+)\.s3\.{LOCALHOST_HOSTNAME}", host)
if match:
# prepend the bucket name (extracted from the host) to the path of the request (path-based addressing)
request.path = f"/{match.group(1)}{request.path}"

def _extract_region_and_service(self, headers) -> Optional[Tuple[str, str]]:
auth_header = headers.pop("Authorization", "")
parts = auth_header.split("Credential=", maxsplit=1)
Expand Down
62 changes: 34 additions & 28 deletions aws-replicator/aws_replicator/server/aws_request_forwarder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
except ImportError:
from localstack.constants import TEST_AWS_ACCESS_KEY_ID

from aws_replicator.shared.constants import HEADER_HOST_ORIGINAL
from aws_replicator.shared.models import ProxyInstance, ProxyServiceConfig

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -98,33 +99,38 @@ def select_proxy(self, context: RequestContext) -> Optional[ProxyInstance]:
def _request_matches_resource(
self, context: RequestContext, resource_name_pattern: str
) -> bool:
service_name = self._get_canonical_service_name(context.service.service_name)
if service_name == "s3":
bucket_name = context.service_request.get("Bucket") or ""
s3_bucket_arn = arns.s3_bucket_arn(bucket_name)
return bool(re.match(resource_name_pattern, s3_bucket_arn))
if service_name == "sqs":
queue_name = context.service_request.get("QueueName") or ""
queue_url = context.service_request.get("QueueUrl") or ""
queue_name = queue_name or queue_url.split("/")[-1]
candidates = (
queue_name,
queue_url,
sqs_queue_arn(
queue_name, account_id=context.account_id, region_name=context.region
),
)
for candidate in candidates:
if re.match(resource_name_pattern, candidate):
return True
return False
if service_name == "secretsmanager":
secret_id = context.service_request.get("SecretId") or ""
secret_arn = secretsmanager_secret_arn(
secret_id, account_id=context.account_id, region_name=context.region
)
return bool(re.match(resource_name_pattern, secret_arn))
# TODO: add more resource patterns
try:
service_name = self._get_canonical_service_name(context.service.service_name)
if service_name == "s3":
bucket_name = context.service_request.get("Bucket") or ""
s3_bucket_arn = arns.s3_bucket_arn(bucket_name)
return bool(re.match(resource_name_pattern, s3_bucket_arn))
if service_name == "sqs":
queue_name = context.service_request.get("QueueName") or ""
queue_url = context.service_request.get("QueueUrl") or ""
queue_name = queue_name or queue_url.split("/")[-1]
candidates = (
queue_name,
queue_url,
sqs_queue_arn(
queue_name, account_id=context.account_id, region_name=context.region
),
)
for candidate in candidates:
if re.match(resource_name_pattern, candidate):
return True
return False
if service_name == "secretsmanager":
secret_id = context.service_request.get("SecretId") or ""
secret_arn = secretsmanager_secret_arn(
secret_id, account_id=context.account_id, region_name=context.region
)
return bool(re.match(resource_name_pattern, secret_arn))
# TODO: add more resource patterns
except re.error as e:
raise Exception(
"Error evaluating regular expression - please verify proxy configuration"
) from e
return True

def forward_request(self, context: RequestContext, proxy: ProxyInstance) -> requests.Response:
Expand All @@ -140,7 +146,7 @@ def forward_request(self, context: RequestContext, proxy: ProxyInstance) -> requ

result = None
try:
headers.pop("Host", None)
headers[HEADER_HOST_ORIGINAL] = headers.pop("Host", None)
headers.pop("Content-Length", None)
ctype = headers.get("Content-Type")
data = b""
Expand Down
2 changes: 2 additions & 0 deletions aws-replicator/aws_replicator/shared/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# header name for the original request host name forwarded in the request to the target proxy handler
HEADER_HOST_ORIGINAL = "x-ls-host-original"
2 changes: 1 addition & 1 deletion aws-replicator/setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = localstack-extension-aws-replicator
version = 0.1.18
version = 0.1.19
summary = LocalStack Extension: AWS replicator
description = Replicate AWS resources into your LocalStack instance
long_description = file: README.md
Expand Down
12 changes: 10 additions & 2 deletions aws-replicator/tests/test_proxy_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import boto3
import pytest
from botocore.client import Config
from botocore.exceptions import ClientError
from localstack.aws.connect import connect_to
from localstack.utils.aws.arns import sqs_queue_arn, sqs_queue_url_for_arn
Expand Down Expand Up @@ -40,13 +41,20 @@ def _start(config: dict = None):


@pytest.mark.parametrize("metadata_gzip", [True, False])
def test_s3_requests(start_aws_proxy, s3_create_bucket, metadata_gzip):
@pytest.mark.parametrize("host_addressing", [True, False])
def test_s3_requests(start_aws_proxy, s3_create_bucket, metadata_gzip, host_addressing):
# start proxy
config = ProxyConfig(services={"s3": {"resources": ".*"}}, bind_host=PROXY_BIND_HOST)
start_aws_proxy(config)

# create clients
s3_client = connect_to().s3
if host_addressing:
s3_client = connect_to(
endpoint_url="http://s3.localhost.localstack.cloud:4566",
config=Config(s3={"addressing_style": "virtual"}),
).s3
else:
s3_client = connect_to().s3
s3_client_aws = boto3.client("s3")

# list buckets to assert that proxy is up and running
Expand Down
Loading