From 388eccd941abc13c4cc30e9b1c8595eb501399fc Mon Sep 17 00:00:00 2001 From: jtyoung84 <104453205+jtyoung84@users.noreply.github.com> Date: Sat, 14 Sep 2024 11:08:35 -0700 Subject: [PATCH 1/3] feat: trims down configs --- .../check_directories_job.py | 75 +++++++++++-------- tests/test_check_directories_job.py | 20 ++--- 2 files changed, 48 insertions(+), 47 deletions(-) diff --git a/src/aind_data_upload_utils/check_directories_job.py b/src/aind_data_upload_utils/check_directories_job.py index 7c3ba63..9b5a7c5 100644 --- a/src/aind_data_upload_utils/check_directories_job.py +++ b/src/aind_data_upload_utils/check_directories_job.py @@ -12,13 +12,16 @@ from glob import glob from pathlib import Path from time import time -from typing import List, Union +from typing import List, Optional, Union from aind_data_schema_models.modalities import Modality from aind_data_schema_models.platforms import Platform -from aind_data_transfer_models.core import BasicUploadJobConfigs +from aind_data_transfer_models.core import ( + BasicUploadJobConfigs, + ModalityConfigs, +) from dask import bag as dask_bag -from pydantic import Field, field_validator +from pydantic import BaseModel, Field, field_validator from pydantic_settings import BaseSettings # Set log level from env var @@ -26,44 +29,48 @@ logging.basicConfig(level=LOG_LEVEL) -class JobSettings(BaseSettings): - """Job settings for CheckDirectoriesJob""" +class DirectoriesToCheckConfigs(BaseModel): + """Basic model needed from BasicUploadConfigs""" - upload_configs: BasicUploadJobConfigs - n_partitions: int = Field(default=20) - num_of_smart_spim_levels: int = Field(default=3) + platform: Platform.ONE_OF + modalities: List[ModalityConfigs] = [] + metadata_dir: Optional[Path] = None - @field_validator("upload_configs", mode="before") + @field_validator("modalities", mode="before") def parse_json_str( - cls, upload_conf: Union[BasicUploadJobConfigs, dict] + cls, mod_configs: Union[List[ModalityConfigs], List[dict]] ) -> BasicUploadJobConfigs: """ Method to ignore computed fields in serialized model, which might raise validation errors. Parameters ---------- - upload_conf : Union[BasicUploadJobConfigs, dict] + mod_configs : Union[List[ModalityConfigs], List[dict]] Returns ------- - BasicUploadJobConfigs + List[ModalityConfigs] """ - # TODO: This should be moved to the BasicUploadJobConfigs class itself - if isinstance(upload_conf, dict): - json_obj = deepcopy(upload_conf) - # Remove s3_prefix computed field - if json_obj.get("s3_prefix") is not None: - del json_obj["s3_prefix"] - # Remove output_folder_name from modalities - if json_obj.get("modalities") is not None: - for modality in json_obj["modalities"]: - if "output_folder_name" in modality: - del modality["output_folder_name"] - return BasicUploadJobConfigs.model_validate_json( - json.dumps(json_obj) - ) - else: - return upload_conf + parsed_configs = [] + for mod_conf in mod_configs: + if isinstance(mod_conf, dict): + json_obj = deepcopy(mod_conf) + if "output_folder_name" in json_obj: + del json_obj["output_folder_name"] + parsed_configs.append( + ModalityConfigs.model_validate_json(json.dumps(json_obj)) + ) + else: + parsed_configs.append(mod_conf) + return parsed_configs + + +class JobSettings(BaseSettings, extra="allow"): + """Job settings for CheckDirectoriesJob""" + + directories_to_check_configs: DirectoriesToCheckConfigs + n_partitions: int = Field(default=20) + num_of_smart_spim_levels: int = Field(default=3) class CheckDirectoriesJob: @@ -113,16 +120,18 @@ def _get_list_of_directories_to_check(self) -> List[Union[Path, str]]: List[Union[Path, str]] """ - upload_configs = self.job_settings.upload_configs + dirs_to_check_configs = self.job_settings.directories_to_check_configs directories_to_check = [] - platform = upload_configs.platform + platform = dirs_to_check_configs.platform # First, check all the json files in the metadata dir - if upload_configs.metadata_dir is not None: - metadata_dir_path = str(upload_configs.metadata_dir).rstrip("/") + if dirs_to_check_configs.metadata_dir is not None: + metadata_dir_path = str(dirs_to_check_configs.metadata_dir).rstrip( + "/" + ) for json_file in glob(f"{metadata_dir_path}/*.json"): self._check_path(Path(json_file).as_posix()) # Next add modality directories - for modality_config in upload_configs.modalities: + for modality_config in dirs_to_check_configs.modalities: modality = modality_config.modality source_dir = modality_config.source # We'll handle SmartSPIM differently and partition 3 levels deep diff --git a/tests/test_check_directories_job.py b/tests/test_check_directories_job.py index 22975fb..94efd5b 100644 --- a/tests/test_check_directories_job.py +++ b/tests/test_check_directories_job.py @@ -7,13 +7,11 @@ from aind_data_schema_models.modalities import Modality from aind_data_schema_models.platforms import Platform -from aind_data_transfer_models.core import ( - BasicUploadJobConfigs, - ModalityConfigs, -) +from aind_data_transfer_models.core import ModalityConfigs from aind_data_upload_utils.check_directories_job import ( CheckDirectoriesJob, + DirectoriesToCheckConfigs, JobSettings, ) @@ -33,8 +31,7 @@ class TestJobSettings(unittest.TestCase): @classmethod def setUpClass(cls) -> None: """Sets up class with example upload configs""" - example_upload_configs = BasicUploadJobConfigs( - project_name="SmartSPIM", + example_upload_configs = DirectoriesToCheckConfigs( platform=Platform.SMARTSPIM, modalities=[ ModalityConfigs( @@ -52,8 +49,6 @@ def setUpClass(cls) -> None: modality=Modality.SPIM, ), ], - subject_id="12345", - acq_datetime="2020-10-10T01:01:01", metadata_dir=(RESOURCES_DIR / "metadata_dir").as_posix(), ) cls.example_upload_configs = example_upload_configs @@ -61,7 +56,7 @@ def setUpClass(cls) -> None: def test_class_constructor(self): """Tests that job settings can be constructed from serialized json.""" upload_configs = self.example_upload_configs - job_settings = JobSettings(upload_configs=upload_configs) + job_settings = JobSettings(directories_to_check_configs=upload_configs) deserialized_settings = job_settings.model_validate_json( job_settings.model_dump_json() ) @@ -74,8 +69,7 @@ class TestCheckDirectoriesJob(unittest.TestCase): @classmethod def setUpClass(cls) -> None: """Sets up class with example settings""" - example_upload_configs = BasicUploadJobConfigs( - project_name="SmartSPIM", + example_upload_configs = DirectoriesToCheckConfigs( platform=Platform.SMARTSPIM, modalities=[ ModalityConfigs( @@ -93,13 +87,11 @@ def setUpClass(cls) -> None: modality=Modality.SPIM, ), ], - subject_id="12345", - acq_datetime="2020-10-10T01:01:01", metadata_dir=(RESOURCES_DIR / "metadata_dir").as_posix(), ) cls.example_job = CheckDirectoriesJob( job_settings=JobSettings( - upload_configs=example_upload_configs, + directories_to_check_configs=example_upload_configs, num_of_smart_spim_levels=2, ) ) From 04532ba77c675e4237703cdfb973db349cea788b Mon Sep 17 00:00:00 2001 From: jtyoung84 <104453205+jtyoung84@users.noreply.github.com> Date: Sat, 14 Sep 2024 11:12:39 -0700 Subject: [PATCH 2/3] feat: remove unused import --- src/aind_data_upload_utils/check_directories_job.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/aind_data_upload_utils/check_directories_job.py b/src/aind_data_upload_utils/check_directories_job.py index 9b5a7c5..d9cebc5 100644 --- a/src/aind_data_upload_utils/check_directories_job.py +++ b/src/aind_data_upload_utils/check_directories_job.py @@ -16,10 +16,7 @@ from aind_data_schema_models.modalities import Modality from aind_data_schema_models.platforms import Platform -from aind_data_transfer_models.core import ( - BasicUploadJobConfigs, - ModalityConfigs, -) +from aind_data_transfer_models.core import ModalityConfigs from dask import bag as dask_bag from pydantic import BaseModel, Field, field_validator from pydantic_settings import BaseSettings @@ -39,7 +36,7 @@ class DirectoriesToCheckConfigs(BaseModel): @field_validator("modalities", mode="before") def parse_json_str( cls, mod_configs: Union[List[ModalityConfigs], List[dict]] - ) -> BasicUploadJobConfigs: + ) -> List[ModalityConfigs]: """ Method to ignore computed fields in serialized model, which might raise validation errors. From 3e95321fdbc0f3e39496691df61ae510bbc1f84e Mon Sep 17 00:00:00 2001 From: jtyoung84 <104453205+jtyoung84@users.noreply.github.com> Date: Sat, 14 Sep 2024 11:54:10 -0700 Subject: [PATCH 3/3] build: updates aind-data-transfer-models --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cdafef6..c6906ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dynamic = ["version"] dependencies = [ 'dask', - 'aind-data-transfer-models==0.8.2' + 'aind-data-transfer-models==0.8.4' ] [project.optional-dependencies]