Skip to content

Commit

Permalink
scylla-manager: make --scylla-manager-package option more robust
Browse files Browse the repository at this point in the history
* now it would first try to download from s3 via boto api (faster)
* local tar.gz file of manager can be used (no need to have it on s3/http)
* hash to identify the specific version now based on ETag and
  not on the url path itself.

Fixes: #463
  • Loading branch information
fruch committed Jun 8, 2023
1 parent e5cda68 commit a52db55
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 14 deletions.
28 changes: 14 additions & 14 deletions ccmlib/scylla_repository.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


import logging
import random
import time
Expand All @@ -16,17 +14,14 @@
import glob
import urllib

import hashlib
import requests
import yaml


import packaging.version

from ccmlib.common import (
ArgumentError, CCMError, get_default_path, rmdirs, validate_install_dir, get_scylla_version, aws_bucket_ls,
DOWNLOAD_IN_PROGRESS_FILE, wait_for_parallel_download_finish, print_if_standalone)
from ccmlib.utils.download import download_file, download_version_from_s3
from ccmlib.utils.download import download_file, download_version_from_s3, get_url_hash
from ccmlib.utils.version import parse_version

GIT_REPO = "http://github.com/scylladb/scylla.git"
Expand Down Expand Up @@ -414,25 +409,30 @@ def download_packages(version_dir, packages, s3_url, scylla_product, version, ve
return package_version, packages


def setup_scylla_manager(scylla_manager_package=None):
def setup_scylla_manager(scylla_manager_package=None, verbose=False):

"""
download and cache scylla-manager RPMs,
:return:
"""

if scylla_manager_package and '--scylla-manager':
m = hashlib.md5()
m.update(scylla_manager_package.encode('utf-8'))
dir_hash = get_url_hash(scylla_manager_package)

# select a dir to change this version of scylla-manager based on the md5 of the path
manager_install_dir = directory_name(os.path.join('manager', m.hexdigest()))
# select a dir to change this version of scylla-manager based on the md5 of the path or the etag of the s3 object
manager_install_dir = directory_name(os.path.join('manager', dir_hash))
if not os.path.exists(manager_install_dir):
os.makedirs(manager_install_dir)
tar_data = requests.get(scylla_manager_package, stream=True)
destination_file = os.path.join(manager_install_dir, "manager.tar.gz")
with open(destination_file, mode="wb") as f:
f.write(tar_data.raw.read())

if os.path.exists(scylla_manager_package) and scylla_manager_package.endswith('.tar.gz'):
destination_file = scylla_manager_package
elif is_valid(scylla_manager_package):
_, target = tempfile.mkstemp(suffix=".tar.gz", prefix="ccm-")
res = download_version_from_s3(url=scylla_manager_package, target_path=destination_file, verbose=verbose)
if not res:
download_file(url=scylla_manager_package, target_path=destination_file, verbose=verbose)

run(f"""
tar -xvf {destination_file} -C {manager_install_dir}
rm -f {destination_file}
Expand Down
22 changes: 22 additions & 0 deletions ccmlib/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import shutil
import urllib.parse
import hashlib

import tqdm
import requests
Expand Down Expand Up @@ -158,3 +159,24 @@ def download_version_from_s3(url: str, target_path: str, verbose=False):
transfer.download_file(bucket_name, download_path, target_path, callback=progress.update)

return target_path


def get_url_hash(url: str) -> str:
"""
get hash (etag) or a blob in s3/http/local file/dir
"""

if os.path.exists(url): # if file/dir is local, hash based on the path
return hashlib.md5(url).hexdigest().encode()

# first try is on s3
parts = urllib.parse.urlparse(url)
_, bucket_name, download_path = parts.path.split('/', maxsplit=2)
s3_client = Session().client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))

try:
metadata = s3_client.head_object(Bucket=bucket_name, Key=download_path)
return metadata.get('ETag')[1:-1]
except botocore.client.ClientError as ex:
# fallback to http
return requests.head(url).headers.get('ETag')[1:-1]

0 comments on commit a52db55

Please sign in to comment.