From 3e056e5288d2685993a28f2da882861e5d7964c6 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Fri, 23 Aug 2024 14:21:42 +0200 Subject: [PATCH] Add ruff rule for Error Messages (EM) --- examples/evaluation/tru_shared.py | 21 +++++----- examples/notebooks/advancedRAG.ipynb | 3 +- examples/notebooks/conftest.py | 6 ++- examples/notebooks/langchain_evaluation.ipynb | 3 +- .../ragstack_colbert/cassandra_database.py | 20 +++++---- .../ragstack_colbert/colbert_vector_store.py | 8 ++-- libs/e2e-tests/e2e_tests/conftest.py | 6 ++- .../langchain/test_compatibility_rag.py | 3 +- .../ragstack_knowledge_store/_mmr_helper.py | 5 ++- .../ragstack_knowledge_store/_utils.py | 3 +- .../ragstack_knowledge_store/graph_store.py | 41 +++++++++---------- .../ragstack_knowledge_store/math.py | 3 +- .../ragstack_langchain/colbert/__init__.py | 5 ++- .../colbert/colbert_vector_store.py | 19 +++++---- .../ragstack_llamaindex/colbert/__init__.py | 5 ++- .../tests/unit_tests/test_import.py | 3 +- libs/ragulate/colbert_chunk_size_and_k.py | 5 ++- libs/ragulate/ragstack_ragulate/analysis.py | 3 +- .../ragstack_ragulate/cli_commands/query.py | 8 ++-- .../ragstack_ragulate/config/config_parser.py | 3 +- .../config/config_schema_0_1.py | 28 +++++++------ .../pipelines/base_pipeline.py | 11 ++--- .../pipelines/query_pipeline.py | 3 +- .../ragstack_tests_utils/test_store.py | 5 +-- pyproject.toml | 1 - scripts/format-example-notebooks.py | 3 +- scripts/generate-changelog.py | 3 +- 27 files changed, 126 insertions(+), 101 deletions(-) diff --git a/examples/evaluation/tru_shared.py b/examples/evaluation/tru_shared.py index f7674ccea..680bf9534 100644 --- a/examples/evaluation/tru_shared.py +++ b/examples/evaluation/tru_shared.py @@ -129,7 +129,8 @@ def get_recorder( feedbacks=feedbacks, feedback_mode=feedback_mode, ) - raise ValueError(f"Unknown framework: {framework} specified for get_recorder()") + msg = f"Unknown framework: {framework} specified for get_recorder()" + raise ValueError(msg) def get_azure_chat_model( @@ -151,7 +152,8 @@ def get_azure_chat_model( model_version=model_version, temperature=temperature, ) - raise ValueError(f"Unknown framework: {framework} specified for getChatModel()") + msg = f"Unknown framework: {framework} specified for getChatModel()" + raise ValueError(msg) def get_azure_embeddings_model(framework: Framework): @@ -167,9 +169,8 @@ def get_azure_embeddings_model(framework: Framework): api_version="2023-05-15", temperature=temperature, ) - raise ValueError( - f"Unknown framework: {framework} specified for getEmbeddingsModel()" - ) + msg = f"Unknown framework: {framework} specified for getEmbeddingsModel()" + raise ValueError(msg) def get_astra_vector_store(framework: Framework, collection_name: str): @@ -187,9 +188,8 @@ def get_astra_vector_store(framework: Framework, collection_name: str): token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"), embedding_dimension=1536, ) - raise ValueError( - f"Unknown framework: {framework} specified for get_astra_vector_store()" - ) + msg = f"Unknown framework: {framework} specified for get_astra_vector_store()" + raise ValueError(msg) def execute_query(framework: Framework, pipeline, query) -> None: @@ -198,9 +198,8 @@ def execute_query(framework: Framework, pipeline, query) -> None: elif framework == Framework.LLAMA_INDEX: pipeline.query(query) else: - raise ValueError( - f"Unknown framework: {framework} specified for execute_query()" - ) + msg = f"Unknown framework: {framework} specified for execute_query()" + raise ValueError(msg) # runs the pipeline across all queries in all known datasets diff --git a/examples/notebooks/advancedRAG.ipynb b/examples/notebooks/advancedRAG.ipynb index fc21b6486..0d6dcd736 100644 --- a/examples/notebooks/advancedRAG.ipynb +++ b/examples/notebooks/advancedRAG.ipynb @@ -162,7 +162,8 @@ "if uploaded:\n", " SAMPLEDATA = uploaded\n", "else:\n", - " raise ValueError(\"Cannot proceed without Sample Data. Please re-run the cell.\")\n", + " msg = \"Cannot proceed without Sample Data. Please re-run the cell.\"\n", + " raise ValueError(msg)\n", "\n", "print(\"Please make sure to change your queries to match the contents of your file!\")" ] diff --git a/examples/notebooks/conftest.py b/examples/notebooks/conftest.py index c3fd09f10..4311e2e42 100644 --- a/examples/notebooks/conftest.py +++ b/examples/notebooks/conftest.py @@ -8,10 +8,12 @@ def get_required_env(name) -> str: if name not in os.environ: - raise ValueError(f"Missing required environment variable: {name}") + msg = f"Missing required environment variable: {name}" + raise ValueError(msg) value = os.environ[name] if not value: - raise ValueError(f"Empty required environment variable: {name}") + msg = f"Empty required environment variable: {name}" + raise ValueError(msg) return value diff --git a/examples/notebooks/langchain_evaluation.ipynb b/examples/notebooks/langchain_evaluation.ipynb index e18656a37..41b08c351 100644 --- a/examples/notebooks/langchain_evaluation.ipynb +++ b/examples/notebooks/langchain_evaluation.ipynb @@ -193,7 +193,8 @@ "if uploaded:\n", " SAMPLEDATA = uploaded\n", "else:\n", - " raise ValueError(\"Cannot proceed without Sample Data. Please re-run the cell.\")\n", + " msg = \"Cannot proceed without Sample Data. Please re-run the cell.\"\n", + " raise ValueError(msg)\n", "\n", "print(\"Please make sure to change your queries to match the contents of your file!\")" ] diff --git a/libs/colbert/ragstack_colbert/cassandra_database.py b/libs/colbert/ragstack_colbert/cassandra_database.py index 67b53f7fc..562badc4e 100644 --- a/libs/colbert/ragstack_colbert/cassandra_database.py +++ b/libs/colbert/ragstack_colbert/cassandra_database.py @@ -45,10 +45,11 @@ class CassandraDatabase(BaseDatabase): _table: ClusteredMetadataVectorCassandraTable def __new__(cls) -> Self: # noqa: D102 - raise ValueError( + msg = ( "This class cannot be instantiated directly. " "Please use the `from_astra()` or `from_session()` class methods." ) + raise ValueError(msg) @classmethod def from_astra( @@ -173,10 +174,11 @@ def add_chunks(self, chunks: list[Chunk]) -> list[tuple[str, int]]: success_chunks.append((doc_id, chunk_id)) if len(failed_chunks) > 0: - raise CassandraDatabaseError( + msg = ( f"add failed for these chunks: {failed_chunks}. " f"See error logs for more info." ) + raise CassandraDatabaseError(msg) return success_chunks @@ -273,10 +275,11 @@ async def aadd_chunks( failed_chunks.append((doc_id, chunk_id)) if len(failed_chunks) > 0: - raise CassandraDatabaseError( + msg = ( f"add failed for these chunks: {failed_chunks}. " f"See error logs for more info." ) + raise CassandraDatabaseError(msg) return outputs @@ -292,8 +295,9 @@ def delete_chunks(self, doc_ids: list[str]) -> bool: failed_docs.append(doc_id) if len(failed_docs) > 0: + msg = "delete failed for these docs: %s. See error logs for more info." raise CassandraDatabaseError( - "delete failed for these docs: %s. See error logs for more info.", + msg, failed_docs, ) @@ -340,10 +344,11 @@ async def adelete_chunks( failed_docs.append(doc_id) if len(failed_docs) > 0: - raise CassandraDatabaseError( + msg = ( f"delete failed for these docs: {failed_docs}. " f"See error logs for more info." ) + raise CassandraDatabaseError(msg) return success @@ -379,9 +384,8 @@ async def get_chunk_data( row = await self._table.aget(partition_id=doc_id, row_id=row_id) if row is None: - raise CassandraDatabaseError( - f"no chunk found for doc_id: {doc_id} chunk_id: {chunk_id}" - ) + msg = f"no chunk found for doc_id: {doc_id} chunk_id: {chunk_id}" + raise CassandraDatabaseError(msg) if include_embedding is True: embedded_chunk = await self.get_chunk_embedding( diff --git a/libs/colbert/ragstack_colbert/colbert_vector_store.py b/libs/colbert/ragstack_colbert/colbert_vector_store.py index 490098f16..f4b53c365 100644 --- a/libs/colbert/ragstack_colbert/colbert_vector_store.py +++ b/libs/colbert/ragstack_colbert/colbert_vector_store.py @@ -46,9 +46,8 @@ def __init__( def _validate_embedding_model(self) -> BaseEmbeddingModel: if self._embedding_model is None: - raise AttributeError( - "To use this method, `embedding_model` must be set on class creation." - ) + msg = "To use this method, `embedding_model` must be set on class creation." + raise AttributeError(msg) return self._embedding_model def _build_chunks( @@ -60,7 +59,8 @@ def _build_chunks( embedding_model = self._validate_embedding_model() if metadatas is not None and len(texts) != len(metadatas): - raise ValueError("Length of texts and metadatas must match.") + msg = "Length of texts and metadatas must match." + raise ValueError(msg) if doc_id is None: doc_id = str(uuid.uuid4()) diff --git a/libs/e2e-tests/e2e_tests/conftest.py b/libs/e2e-tests/e2e_tests/conftest.py index e946eba9f..988ce672b 100644 --- a/libs/e2e-tests/e2e_tests/conftest.py +++ b/libs/e2e-tests/e2e_tests/conftest.py @@ -55,7 +55,8 @@ def get_required_env(name) -> str: vector_database_type = os.environ.get("VECTOR_DATABASE_TYPE", "astradb") if vector_database_type not in ["astradb", "local-cassandra"]: - raise ValueError(f"Invalid VECTOR_DATABASE_TYPE: {vector_database_type}") + msg = f"Invalid VECTOR_DATABASE_TYPE: {vector_database_type}" + raise ValueError(msg) is_astra = vector_database_type == "astradb" @@ -67,7 +68,8 @@ def get_vector_store_handler( return AstraDBVectorStoreHandler(implementation) if vector_database_type == "local-cassandra": return CassandraVectorStoreHandler(implementation) - raise ValueError("Invalid vector store implementation") + msg = "Invalid vector store implementation" + raise ValueError(msg) failed_report_lines = [] diff --git a/libs/e2e-tests/e2e_tests/langchain/test_compatibility_rag.py b/libs/e2e-tests/e2e_tests/langchain/test_compatibility_rag.py index 6c8ff3856..b3cafccfd 100644 --- a/libs/e2e-tests/e2e_tests/langchain/test_compatibility_rag.py +++ b/libs/e2e-tests/e2e_tests/langchain/test_compatibility_rag.py @@ -363,7 +363,8 @@ def _run_test( vector_store=vector_store, config=resolved_llm["nemo_config"] ) else: - raise ValueError(f"Unknown test case: {test_case}") + msg = f"Unknown test case: {test_case}" + raise ValueError(msg) @pytest.fixture() diff --git a/libs/knowledge-store/ragstack_knowledge_store/_mmr_helper.py b/libs/knowledge-store/ragstack_knowledge_store/_mmr_helper.py index c6b72d58c..d63f86a75 100644 --- a/libs/knowledge-store/ragstack_knowledge_store/_mmr_helper.py +++ b/libs/knowledge-store/ragstack_knowledge_store/_mmr_helper.py @@ -131,11 +131,12 @@ def _pop_candidate(self, candidate_id: str) -> NDArray[np.float32]: """ # Get the embedding for the id. index = self.candidate_id_to_index.pop(candidate_id) - if not self.candidates[index].id == candidate_id: - raise ValueError( + if self.candidates[index].id != candidate_id: + msg = ( "ID in self.candidate_id_to_index doesn't match the ID of the " "corresponding index in self.candidates" ) + raise ValueError(msg) embedding: NDArray[np.float32] = self.candidate_embeddings[index].copy() # Swap that index with the last index in the candidates and diff --git a/libs/knowledge-store/ragstack_knowledge_store/_utils.py b/libs/knowledge-store/ragstack_knowledge_store/_utils.py index 959edb456..37bf7d20d 100644 --- a/libs/knowledge-store/ragstack_knowledge_store/_utils.py +++ b/libs/knowledge-store/ragstack_knowledge_store/_utils.py @@ -16,7 +16,8 @@ # This is equivalent to `itertools.batched`, but that is only available in 3.12 def batched(iterable: Iterable[T], n: int) -> Iterator[tuple[T, ...]]: if n < 1: - raise ValueError("n must be at least one") + msg = "n must be at least one" + raise ValueError(msg) it = iter(iterable) while batch := tuple(islice(it, n)): yield batch diff --git a/libs/knowledge-store/ragstack_knowledge_store/graph_store.py b/libs/knowledge-store/ragstack_knowledge_store/graph_store.py index 02110a62e..2a02209b5 100644 --- a/libs/knowledge-store/ragstack_knowledge_store/graph_store.py +++ b/libs/knowledge-store/ragstack_knowledge_store/graph_store.py @@ -17,6 +17,7 @@ from cassandra.cluster import ConsistencyLevel, PreparedStatement, Session from cassio.config import check_resolve_keyspace, check_resolve_session +from typing_extensions import assert_never from ._mmr_helper import MmrHelper from .concurrency import ConcurrentQueries @@ -76,7 +77,7 @@ def _is_metadata_field_indexed(field_name: str, policy: MetadataIndexingPolicy) return field_name in p_fields if p_mode == MetadataIndexingMode.DEFAULT_TO_SEARCHABLE: return field_name not in p_fields - raise ValueError(f"Unexpected metadata indexing mode {p_mode}") + assert_never(p_mode) def _serialize_metadata(md: dict[str, Any]) -> str: @@ -170,10 +171,12 @@ def __init__( keyspace = check_resolve_keyspace(keyspace) if not _CQL_IDENTIFIER_PATTERN.fullmatch(keyspace): - raise ValueError(f"Invalid keyspace: {keyspace}") + msg = f"Invalid keyspace: {keyspace}" + raise ValueError(msg) if not _CQL_IDENTIFIER_PATTERN.fullmatch(node_table): - raise ValueError(f"Invalid node table name: {node_table}") + msg = f"Invalid node table name: {node_table}" + raise ValueError(msg) self._embedding = embedding self._node_table = node_table @@ -188,10 +191,11 @@ def __init__( if setup_mode == SetupMode.SYNC: self._apply_schema() elif setup_mode != SetupMode.OFF: - raise ValueError( + msg = ( f"Invalid setup mode {setup_mode.name}. " "Only SYNC and OFF are supported at the moment" ) + raise ValueError(msg) # TODO: Parent ID / source ID / etc. self._insert_passage = session.prepare( @@ -350,7 +354,8 @@ def node_callback(rows: Iterable[Any]) -> None: def get_result(node_id: str) -> Node: if (result := results[node_id]) is None: - raise ValueError(f"No node with ID '{node_id}'") + msg = f"No node with ID '{node_id}'" + raise ValueError(msg) return result return [get_result(node_id) for node_id in ids] @@ -800,14 +805,11 @@ def _normalize_metadata_indexing_policy( elif metadata_indexing.lower() == "none": mode, fields = (MetadataIndexingMode.DEFAULT_TO_UNSEARCHABLE, set()) else: - raise ValueError( - f"Unsupported metadata_indexing value '{metadata_indexing}'" - ) + msg = f"Unsupported metadata_indexing value '{metadata_indexing}'" + raise ValueError(msg) else: if len(metadata_indexing) != 2: # noqa: PLR2004 - raise ValueError( - f"Unsupported metadata_indexing value '{metadata_indexing}'." - ) + assert_never(metadata_indexing) # it's a 2-tuple (mode, fields) still to normalize _mode, _field_spec = metadata_indexing fields = {_field_spec} if isinstance(_field_spec, str) else set(_field_spec) @@ -826,10 +828,9 @@ def _normalize_metadata_indexing_policy( }: mode = MetadataIndexingMode.DEFAULT_TO_SEARCHABLE else: - raise ValueError( - f"Unsupported metadata indexing mode specification '{_mode}'" - ) - return (mode, fields) + msg = f"Unsupported metadata indexing mode specification '{_mode}'" + raise ValueError(msg) + return mode, fields @staticmethod def _coerce_string(value: Any) -> str: @@ -865,9 +866,8 @@ def _extract_where_clause_cql( if _is_metadata_field_indexed(key, self._metadata_indexing_policy): wc_blocks.append(f"metadata_s['{key}'] = ?") else: - raise ValueError( - "Non-indexed metadata fields cannot be used in queries." - ) + msg = "Non-indexed metadata fields cannot be used in queries." + raise ValueError(msg) if len(wc_blocks) == 0: return "" @@ -889,9 +889,8 @@ def _extract_where_clause_params( if _is_metadata_field_indexed(key, self._metadata_indexing_policy): params.append(self._coerce_string(value=value)) else: - raise ValueError( - "Non-indexed metadata fields cannot be used in queries." - ) + msg = "Non-indexed metadata fields cannot be used in queries." + raise ValueError(msg) return params diff --git a/libs/knowledge-store/ragstack_knowledge_store/math.py b/libs/knowledge-store/ragstack_knowledge_store/math.py index bb2231fdf..cbcc20593 100644 --- a/libs/knowledge-store/ragstack_knowledge_store/math.py +++ b/libs/knowledge-store/ragstack_knowledge_store/math.py @@ -22,10 +22,11 @@ def cosine_similarity(x: Matrix, y: Matrix) -> NDArray[np.float32]: x = np.array(x) y = np.array(y) if x.shape[1] != y.shape[1]: - raise ValueError( + msg = ( f"Number of columns in X and Y must be the same. X has shape {x.shape} " f"and Y has shape {y.shape}." ) + raise ValueError(msg) try: import simsimd as simd except ImportError: diff --git a/libs/langchain/ragstack_langchain/colbert/__init__.py b/libs/langchain/ragstack_langchain/colbert/__init__.py index efe14e895..19da0762b 100644 --- a/libs/langchain/ragstack_langchain/colbert/__init__.py +++ b/libs/langchain/ragstack_langchain/colbert/__init__.py @@ -1,10 +1,11 @@ try: from ragstack_colbert.base_retriever import BaseRetriever # noqa: F401 except (ImportError, ModuleNotFoundError) as e: - raise ImportError( + msg = ( "Could not import ragstack-ai-colbert. " "Please install it with `pip install ragstack-ai-langchain[colbert]`." - ) from e + ) + raise ImportError(msg) from e from .colbert_retriever import ColbertRetriever from .colbert_vector_store import ColbertVectorStore diff --git a/libs/langchain/ragstack_langchain/colbert/colbert_vector_store.py b/libs/langchain/ragstack_langchain/colbert/colbert_vector_store.py index 5b2f153d1..8021dd9ec 100644 --- a/libs/langchain/ragstack_langchain/colbert/colbert_vector_store.py +++ b/libs/langchain/ragstack_langchain/colbert/colbert_vector_store.py @@ -246,11 +246,11 @@ def from_texts( **kwargs: Any, ) -> Self: if not isinstance(embedding, TokensEmbeddings): - raise TypeError("ColbertVectorStore requires a TokensEmbeddings embedding.") + msg = "ColbertVectorStore requires a TokensEmbeddings embedding." + raise TypeError(msg) if database is None: - raise ValueError( - "ColbertVectorStore requires a ColbertBaseDatabase database." - ) + msg = "ColbertVectorStore requires a ColbertBaseDatabase database." + raise ValueError(msg) instance = cls( database=database, embedding_model=embedding.get_embedding_model(), **kwargs ) @@ -270,11 +270,11 @@ async def afrom_texts( **kwargs: Any, ) -> Self: if not isinstance(embedding, TokensEmbeddings): - raise TypeError("ColbertVectorStore requires a TokensEmbeddings embedding.") + msg = "ColbertVectorStore requires a TokensEmbeddings embedding." + raise TypeError(msg) if database is None: - raise ValueError( - "ColbertVectorStore requires a ColbertBaseDatabase database." - ) + msg = "ColbertVectorStore requires a ColbertBaseDatabase database." + raise ValueError(msg) instance = cls( database=database, embedding_model=embedding.get_embedding_model(), **kwargs ) @@ -290,5 +290,6 @@ def as_retriever(self, k: Optional[int] = 5, **kwargs: Any) -> VectorStoreRetrie search_kwargs["k"] = k search_type = kwargs.get("search_type", "similarity") if search_type != "similarity": - raise ValueError(f"Unsupported search type: {search_type}") + msg = f"Unsupported search type: {search_type}" + raise ValueError(msg) return super().as_retriever(search_kwargs=search_kwargs, **kwargs) diff --git a/libs/llamaindex/ragstack_llamaindex/colbert/__init__.py b/libs/llamaindex/ragstack_llamaindex/colbert/__init__.py index 7d7de5cca..46a0443f0 100644 --- a/libs/llamaindex/ragstack_llamaindex/colbert/__init__.py +++ b/libs/llamaindex/ragstack_llamaindex/colbert/__init__.py @@ -1,10 +1,11 @@ try: from ragstack_colbert.base_retriever import BaseRetriever # noqa: F401 except (ImportError, ModuleNotFoundError) as e: - raise ImportError( + msg = ( "Could not import ragstack-ai-colbert. " "Please install it with `pip install ragstack-ai-llamaindex[colbert]`." - ) from e + ) + raise ImportError(msg) from e from .colbert_retriever import ColbertRetriever diff --git a/libs/llamaindex/tests/unit_tests/test_import.py b/libs/llamaindex/tests/unit_tests/test_import.py index c9200bdb4..529b4a053 100644 --- a/libs/llamaindex/tests/unit_tests/test_import.py +++ b/libs/llamaindex/tests/unit_tests/test_import.py @@ -15,7 +15,8 @@ def test_import() -> None: def check_no_import(fn: Callable[[], Any]) -> None: try: fn() - raise RuntimeError("Should have failed to import") + msg = "Should have failed to import" + raise RuntimeError(msg) except ImportError: pass diff --git a/libs/ragulate/colbert_chunk_size_and_k.py b/libs/ragulate/colbert_chunk_size_and_k.py index a7956cb59..af82eed55 100644 --- a/libs/ragulate/colbert_chunk_size_and_k.py +++ b/libs/ragulate/colbert_chunk_size_and_k.py @@ -88,8 +88,9 @@ async def ingest(file_path: str, chunk_size: int, **_: Any) -> None: print(f"It took {duration} seconds to load and parse the document") # confirm only one document returned per file - if not len(docs) == 1: - raise ValueError("Only one document must be returned per file") + if len(docs) != 1: + msg = "Only one document must be returned per file" + raise ValueError(msg) text_splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, diff --git a/libs/ragulate/ragstack_ragulate/analysis.py b/libs/ragulate/ragstack_ragulate/analysis.py index e2ea76cf0..69356569d 100644 --- a/libs/ragulate/ragstack_ragulate/analysis.py +++ b/libs/ragulate/ragstack_ragulate/analysis.py @@ -263,4 +263,5 @@ def compare(self, recipes: list[str], output: str = "box-plots") -> None: elif output == "histogram-grid": self.output_histograms_by_dataset(df=df, metrics=metrics) else: - raise ValueError(f"Invalid output type: {output}") + msg = f"Invalid output type: {output}" + raise ValueError(msg) diff --git a/libs/ragulate/ragstack_ragulate/cli_commands/query.py b/libs/ragulate/ragstack_ragulate/cli_commands/query.py index 27bdf98de..540840b2c 100644 --- a/libs/ragulate/ragstack_ragulate/cli_commands/query.py +++ b/libs/ragulate/ragstack_ragulate/cli_commands/query.py @@ -116,15 +116,15 @@ def call_query( ) -> None: """Run a query pipeline.""" if sample <= 0.0 or sample > 1.0: - raise ValueError("Sample percent must be between 0 and 1") + msg = "Sample percent must be between 0 and 1" + raise ValueError(msg) datasets = [find_dataset(name=name) for name in dataset] if subset is not None and len(subset) > 0: if len(datasets) > 1: - raise ValueError( - "Only can set `subset` param when there is one dataset" - ) + msg = "Only can set `subset` param when there is one dataset" + raise ValueError(msg) datasets[0].subsets = subset ingredients = convert_vars_to_ingredients( diff --git a/libs/ragulate/ragstack_ragulate/config/config_parser.py b/libs/ragulate/ragstack_ragulate/config/config_parser.py index 527fac8fe..f3e08fe18 100644 --- a/libs/ragulate/ragstack_ragulate/config/config_parser.py +++ b/libs/ragulate/ragstack_ragulate/config/config_parser.py @@ -42,4 +42,5 @@ def from_file(cls, file_path: str) -> ConfigParser: version = config.get("version", _VERSION_0_1) if version == _VERSION_0_1: return cls(config_schema=ConfigSchema0Dot1(), config=config) - raise ValueError(f"config file version {version} is not supported") + msg = f"config file version {version} is not supported" + raise ValueError(msg) diff --git a/libs/ragulate/ragstack_ragulate/config/config_schema_0_1.py b/libs/ragulate/ragstack_ragulate/config/config_schema_0_1.py index d07edd525..7a31f2997 100644 --- a/libs/ragulate/ragstack_ragulate/config/config_schema_0_1.py +++ b/libs/ragulate/ragstack_ragulate/config/config_schema_0_1.py @@ -162,10 +162,11 @@ def parse_document(self, document: dict[str, Any]) -> Config: doc_script = doc_step.get("script", None) doc_method = doc_step.get("method", None) if doc_name in steps: - raise ValueError( + msg = ( f"{step_kind} step names must be unique. Found {doc_name} more " f"than once." ) + raise ValueError(msg) steps[doc_name] = Step( name=doc_name, script=doc_script, method=doc_method ) @@ -180,19 +181,19 @@ def parse_document(self, document: dict[str, Any]) -> Config: for doc_ingredient in doc_ingredients: for key, value in doc_ingredient.items(): if key in ingredients: - raise ValueError( - f"ingredient {key} appears in recipe more than once." - ) + msg = f"ingredient {key} appears in recipe more than once." + raise ValueError(msg) ingredients[key] = value doc_name = doc_recipe.get("name", None) if doc_name is None: if len(doc_ingredients) == 0: - raise ValueError( + msg = ( "recipe must either have a `name` defined or contain at least " "one ingredient." ) + raise ValueError(msg) recipe_name = dict_to_string(ingredients) else: recipe_name = doc_name @@ -203,20 +204,24 @@ def parse_document(self, document: dict[str, Any]) -> Config: doc_recipe_step = doc_recipe.get(step_kind, None) step = step_map[step_kind].get(doc_recipe_step, None) if doc_recipe_step is not None and step is None: - raise ValueError( + msg = ( f"{step_kind} step {doc_recipe_step} for recipe {recipe_name} " f"is not defined in the `steps` section" ) + raise ValueError(msg) if step: recipe_steps[step_kind] = step if "query" not in recipe_steps: - raise ValueError(f"query step is missing for recipe {recipe_name}") + msg = f"query step is missing for recipe {recipe_name}" + raise ValueError(msg) if recipe_name in recipes: - raise ValueError( - f"recipe names must be unique. Found {recipe_name} more than once." + msg = ( + "recipe names must be unique. " + f"Found {recipe_name} more than once." ) + raise ValueError(msg) recipes[recipe_name] = Recipe( name=recipe_name, @@ -235,9 +240,8 @@ def parse_document(self, document: dict[str, Any]) -> Config: doc_dataset_name = doc_dataset.get("name", None) doc_dataset_kind = doc_dataset.get("kind", None) if doc_dataset_name is None or doc_dataset_kind is None: - raise ValueError( - "datasets must be specified with `name` and `kind`" - ) + msg = "datasets must be specified with `name` and `kind`" + raise ValueError(msg) datasets[doc_dataset_name] = get_dataset( name=doc_dataset_name, kind=doc_dataset_kind ) diff --git a/libs/ragulate/ragstack_ragulate/pipelines/base_pipeline.py b/libs/ragulate/ragstack_ragulate/pipelines/base_pipeline.py index d93a4a6ed..ab847412d 100644 --- a/libs/ragulate/ragstack_ragulate/pipelines/base_pipeline.py +++ b/libs/ragulate/ragstack_ragulate/pipelines/base_pipeline.py @@ -17,10 +17,12 @@ def load_module(file_path: str, name: str) -> ModuleType: """Load a module from a file path dynamically.""" spec = importlib.util.spec_from_file_location(name, file_path) if spec is None: - raise ValueError(f"Could not load module from {file_path}") + msg = f"Could not load module from {file_path}" + raise ValueError(msg) module = importlib.util.module_from_spec(spec) if spec.loader is None: - raise ValueError(f"No Module loader found for {file_path}") + msg = f"No Module loader found for {file_path}" + raise ValueError(msg) spec.loader.exec_module(module) return module @@ -48,9 +50,8 @@ def get_ingredients( if method_param in reserved_params or method_param in ["kwargs", "_"]: continue if method_param not in passed_ingredients: - raise ValueError( - f"method param '{method_param}' doesn't exist in the ingredients" - ) + msg = f"method param '{method_param}' doesn't exist in the ingredients" + raise ValueError(msg) ingredients[method_param] = passed_ingredients[method_param] return ingredients diff --git a/libs/ragulate/ragstack_ragulate/pipelines/query_pipeline.py b/libs/ragulate/ragstack_ragulate/pipelines/query_pipeline.py index 5d61db958..a61dd67f1 100644 --- a/libs/ragulate/ragstack_ragulate/pipelines/query_pipeline.py +++ b/libs/ragulate/ragstack_ragulate/pipelines/query_pipeline.py @@ -191,7 +191,8 @@ def get_provider(self) -> LLMProvider: return AzureOpenAI(deployment_name=model_name) if llm_provider == "huggingface": return Huggingface(name=model_name) - raise ValueError(f"Unsupported provider: {llm_provider}") + msg = f"Unsupported provider: {llm_provider}" + raise ValueError(msg) def query(self) -> None: """Run the query pipeline.""" diff --git a/libs/tests-utils/ragstack_tests_utils/test_store.py b/libs/tests-utils/ragstack_tests_utils/test_store.py index 1abff93fa..4cfcd5f88 100644 --- a/libs/tests-utils/ragstack_tests_utils/test_store.py +++ b/libs/tests-utils/ragstack_tests_utils/test_store.py @@ -54,9 +54,8 @@ class AstraDBTestStore(TestStore): def __init__(self) -> None: super().__init__() if not os.getenv("ASTRA_DB_ID") or not os.getenv("ASTRA_DB_TOKEN"): - raise ValueError( - "ASTRA_DB_ID and ASTRA_DB_TOKEN environment variables must be set" - ) + msg = "ASTRA_DB_ID and ASTRA_DB_TOKEN environment variables must be set" + raise ValueError(msg) self.token = os.getenv("ASTRA_DB_TOKEN") self.database_id = os.getenv("ASTRA_DB_ID") self.env = os.getenv("ASTRA_DB_ENV", "prod").lower() diff --git a/pyproject.toml b/pyproject.toml index 67c7c58a8..dcdb776f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,6 @@ ignore = [ "D104", # Do we want to activate (docstring in package) ? "D105", # Do we want to activate (docstring in magic method) ? "D107", # Do we want to activate (docstring in __init__) ? - "EM", # Do we want to activate (error messages) ? "ERA", # Do we want to activate (no commented code) ? "FBT", # Do we want to activate (boolean trap) ? "FIX", # Do we want to activate (no fix-me) ? diff --git a/scripts/format-example-notebooks.py b/scripts/format-example-notebooks.py index 0ec3c857b..765cd7589 100755 --- a/scripts/format-example-notebooks.py +++ b/scripts/format-example-notebooks.py @@ -31,7 +31,8 @@ def main() -> None: found = True break if not found: - raise ValueError("No code cells found in file: ", file) + msg = "No code cells found in file: " + raise ValueError(msg, file) with open(file, "w") as f: f.write(json.dumps(as_json, indent=1, sort_keys=True)) diff --git a/scripts/generate-changelog.py b/scripts/generate-changelog.py index 518d79c1e..b800f9a09 100755 --- a/scripts/generate-changelog.py +++ b/scripts/generate-changelog.py @@ -56,7 +56,8 @@ def main() -> None: version_range = require[i:] break if not version_range: - raise ValueError(f"Could not parse version range from {require}") + msg = f"Could not parse version range from {require}" + raise ValueError(msg) for important_dependency in IMPORTANT_DEPENDENCIES: if package_name.startswith(important_dependency + "["): package_name = important_dependency