From ad16596119ff63a4badcb8baf0aa85c771a27df5 Mon Sep 17 00:00:00 2001 From: Israel Fruchter Date: Thu, 6 Jul 2023 01:00:00 +0300 Subject: [PATCH] ccmlib/common: improve `aws_bucket_ls` Automaticlly sort by date the returned file list, since the users of this function only get the key names, and loses the modify date and parsing the date out of the name isn't trivial --- ccmlib/common.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/ccmlib/common.py b/ccmlib/common.py index 61f3e24e..7f0047de 100644 --- a/ccmlib/common.py +++ b/ccmlib/common.py @@ -943,21 +943,16 @@ def assert_jdk_valid_for_cassandra_version(cassandra_version): exit(1) -def aws_bucket_ls(s3_url): +def aws_bucket_ls(s3_url: str) -> list[str]: bucket_object = s3_url.replace('https://s3.amazonaws.com/', '').split('/') prefix = '/'.join(bucket_object[1:]) - s3_conn = Session().client(service_name='s3', config=Config(signature_version=UNSIGNED)) - paginator = s3_conn.get_paginator('list_objects_v2') - pages = paginator.paginate(Bucket=bucket_object[0], Prefix=prefix) - files_in_bucket = [] - for page in pages: - if 'Contents' not in page: - break + s3_resource = Session().resource(service_name='s3', config=Config(signature_version=UNSIGNED)) + bucket = s3_resource.Bucket(bucket_object[0]) + + files_in_bucket = bucket.objects.filter(Prefix=prefix) - for obj in page['Contents']: - files_in_bucket.append(obj['Key'].replace(prefix + "/", '')) - return files_in_bucket + return [f.key.replace(prefix + "/", '') for f in sorted(files_in_bucket, key=lambda x: x.last_modified)] def grouper(n, iterable, padvalue=None):