Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scylla-manager: make --scylla-manager-package option more robust #464

Merged
merged 2 commits into from
Jul 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 14 additions & 15 deletions ccmlib/scylla_repository.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


import logging
import random
import time
Expand All @@ -16,17 +14,14 @@
import glob
import urllib

import hashlib
import requests
import yaml


import packaging.version

from ccmlib.common import (
ArgumentError, CCMError, get_default_path, rmdirs, validate_install_dir, get_scylla_version, aws_bucket_ls,
DOWNLOAD_IN_PROGRESS_FILE, print_if_standalone, LockFile)
from ccmlib.utils.download import download_file, download_version_from_s3
from ccmlib.utils.download import download_file, download_version_from_s3, get_url_hash
from ccmlib.utils.version import parse_version

GIT_REPO = "http://github.com/scylladb/scylla.git"
Expand Down Expand Up @@ -407,25 +402,29 @@ def download_packages(version_dir, packages, s3_url, scylla_product, version, ve
return package_version, packages


def setup_scylla_manager(scylla_manager_package=None):
def setup_scylla_manager(scylla_manager_package=None, verbose=False):

"""
download and cache scylla-manager RPMs,
:return:
"""

if scylla_manager_package and '--scylla-manager':
m = hashlib.md5()
m.update(scylla_manager_package.encode('utf-8'))
dir_hash = get_url_hash(scylla_manager_package)

# select a dir to change this version of scylla-manager based on the md5 of the path
manager_install_dir = directory_name(os.path.join('manager', m.hexdigest()))
# select a dir to change this version of scylla-manager based on the md5 of the path or the etag of the s3 object
manager_install_dir = directory_name(os.path.join('manager', dir_hash))
if not os.path.exists(manager_install_dir):
os.makedirs(manager_install_dir)
tar_data = requests.get(scylla_manager_package, stream=True)
destination_file = os.path.join(manager_install_dir, "manager.tar.gz")
with open(destination_file, mode="wb") as f:
f.write(tar_data.raw.read())
_, destination_file = tempfile.mkstemp(suffix=".tar.gz", prefix="ccm-manager-")

if os.path.exists(scylla_manager_package) and scylla_manager_package.endswith('.tar.gz'):
destination_file = scylla_manager_package
elif is_valid(scylla_manager_package):
res = download_version_from_s3(url=scylla_manager_package, target_path=destination_file, verbose=verbose)
if not res:
download_file(url=scylla_manager_package, target_path=destination_file, verbose=verbose)
fruch marked this conversation as resolved.
Show resolved Hide resolved

run(f"""
tar -xvf {destination_file} -C {manager_install_dir}
rm -f {destination_file}
Expand Down
29 changes: 28 additions & 1 deletion ccmlib/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import shutil
import urllib.parse
import hashlib

import tqdm
import requests
Expand Down Expand Up @@ -143,10 +144,15 @@ def download_version_from_s3(url: str, target_path: str, verbose=False):
try:
metadata = s3_client.head_object(Bucket=bucket_name, Key=download_path)
except botocore.client.ClientError as ex:
if 'Not Found' in str(ex):
error_message = ex.response['Error']['Message']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you please revisit my previous comment, and review the document. for a sample of how the error is examined, see https://github.com/aws/aws-cli/blob/855cd0ddb3c7fd47cab02a390281f29cc9bc744c/awscli/customizations/s3uploader.py#L123-L127

i think the "Message" field is supposed to be consumed by human being, not to be checked by an application.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the code was 404, which is Not Found
and 403 which is Forbidden.

from my POV it the same, and less clear or readable

if error_message in ('NoSuchBucket', 'NoSuchKey', 'Not Found'):
logging.warning(f"url: '{url}' wasn't found on S3")
logging.warning(f"download might be very slow")
return None
elif error_message == "Forbidden":
logging.warning(f"url: '{url}' Forbidden (403) on S3")
logging.warning(f"download might be very slow")
return None
else:
raise

Expand All @@ -158,3 +164,24 @@ def download_version_from_s3(url: str, target_path: str, verbose=False):
transfer.download_file(bucket_name, download_path, target_path, callback=progress.update)

return target_path


def get_url_hash(url: str) -> str:
"""
get hash (etag) or a blob in s3/http/local file/dir
"""

if os.path.exists(url): # if file/dir is local, hash based on the path
return hashlib.md5(url).hexdigest()

# first try is on s3
parts = urllib.parse.urlparse(url)
_, bucket_name, download_path = parts.path.split('/', maxsplit=2)
s3_client = Session().client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))

try:
metadata = s3_client.head_object(Bucket=bucket_name, Key=download_path)
return metadata.get('ETag')[1:-1]
except botocore.client.ClientError:
# fallback to http
return requests.head(url).headers.get('ETag')[1:-1]