From 3e056e5288d2685993a28f2da882861e5d7964c6 Mon Sep 17 00:00:00 2001
From: Christophe Bornet <cbornet@hotmail.com>
Date: Fri, 23 Aug 2024 14:21:42 +0200
Subject: [PATCH] Add ruff rule for Error Messages (EM)

---
 examples/evaluation/tru_shared.py             | 21 +++++-----
 examples/notebooks/advancedRAG.ipynb          |  3 +-
 examples/notebooks/conftest.py                |  6 ++-
 examples/notebooks/langchain_evaluation.ipynb |  3 +-
 .../ragstack_colbert/cassandra_database.py    | 20 +++++----
 .../ragstack_colbert/colbert_vector_store.py  |  8 ++--
 libs/e2e-tests/e2e_tests/conftest.py          |  6 ++-
 .../langchain/test_compatibility_rag.py       |  3 +-
 .../ragstack_knowledge_store/_mmr_helper.py   |  5 ++-
 .../ragstack_knowledge_store/_utils.py        |  3 +-
 .../ragstack_knowledge_store/graph_store.py   | 41 +++++++++----------
 .../ragstack_knowledge_store/math.py          |  3 +-
 .../ragstack_langchain/colbert/__init__.py    |  5 ++-
 .../colbert/colbert_vector_store.py           | 19 +++++----
 .../ragstack_llamaindex/colbert/__init__.py   |  5 ++-
 .../tests/unit_tests/test_import.py           |  3 +-
 libs/ragulate/colbert_chunk_size_and_k.py     |  5 ++-
 libs/ragulate/ragstack_ragulate/analysis.py   |  3 +-
 .../ragstack_ragulate/cli_commands/query.py   |  8 ++--
 .../ragstack_ragulate/config/config_parser.py |  3 +-
 .../config/config_schema_0_1.py               | 28 +++++++------
 .../pipelines/base_pipeline.py                | 11 ++---
 .../pipelines/query_pipeline.py               |  3 +-
 .../ragstack_tests_utils/test_store.py        |  5 +--
 pyproject.toml                                |  1 -
 scripts/format-example-notebooks.py           |  3 +-
 scripts/generate-changelog.py                 |  3 +-
 27 files changed, 126 insertions(+), 101 deletions(-)

diff --git a/examples/evaluation/tru_shared.py b/examples/evaluation/tru_shared.py
index f7674ccea..680bf9534 100644
--- a/examples/evaluation/tru_shared.py
+++ b/examples/evaluation/tru_shared.py
@@ -129,7 +129,8 @@ def get_recorder(
             feedbacks=feedbacks,
             feedback_mode=feedback_mode,
         )
-    raise ValueError(f"Unknown framework: {framework} specified for get_recorder()")
+    msg = f"Unknown framework: {framework} specified for get_recorder()"
+    raise ValueError(msg)
 
 
 def get_azure_chat_model(
@@ -151,7 +152,8 @@ def get_azure_chat_model(
             model_version=model_version,
             temperature=temperature,
         )
-    raise ValueError(f"Unknown framework: {framework} specified for getChatModel()")
+    msg = f"Unknown framework: {framework} specified for getChatModel()"
+    raise ValueError(msg)
 
 
 def get_azure_embeddings_model(framework: Framework):
@@ -167,9 +169,8 @@ def get_azure_embeddings_model(framework: Framework):
             api_version="2023-05-15",
             temperature=temperature,
         )
-    raise ValueError(
-        f"Unknown framework: {framework} specified for getEmbeddingsModel()"
-    )
+    msg = f"Unknown framework: {framework} specified for getEmbeddingsModel()"
+    raise ValueError(msg)
 
 
 def get_astra_vector_store(framework: Framework, collection_name: str):
@@ -187,9 +188,8 @@ def get_astra_vector_store(framework: Framework, collection_name: str):
             token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
             embedding_dimension=1536,
         )
-    raise ValueError(
-        f"Unknown framework: {framework} specified for get_astra_vector_store()"
-    )
+    msg = f"Unknown framework: {framework} specified for get_astra_vector_store()"
+    raise ValueError(msg)
 
 
 def execute_query(framework: Framework, pipeline, query) -> None:
@@ -198,9 +198,8 @@ def execute_query(framework: Framework, pipeline, query) -> None:
     elif framework == Framework.LLAMA_INDEX:
         pipeline.query(query)
     else:
-        raise ValueError(
-            f"Unknown framework: {framework} specified for execute_query()"
-        )
+        msg = f"Unknown framework: {framework} specified for execute_query()"
+        raise ValueError(msg)
 
 
 # runs the pipeline across all queries in all known datasets
diff --git a/examples/notebooks/advancedRAG.ipynb b/examples/notebooks/advancedRAG.ipynb
index fc21b6486..0d6dcd736 100644
--- a/examples/notebooks/advancedRAG.ipynb
+++ b/examples/notebooks/advancedRAG.ipynb
@@ -162,7 +162,8 @@
     "if uploaded:\n",
     "    SAMPLEDATA = uploaded\n",
     "else:\n",
-    "    raise ValueError(\"Cannot proceed without Sample Data. Please re-run the cell.\")\n",
+    "    msg = \"Cannot proceed without Sample Data. Please re-run the cell.\"\n",
+    "    raise ValueError(msg)\n",
     "\n",
     "print(\"Please make sure to change your queries to match the contents of your file!\")"
    ]
diff --git a/examples/notebooks/conftest.py b/examples/notebooks/conftest.py
index c3fd09f10..4311e2e42 100644
--- a/examples/notebooks/conftest.py
+++ b/examples/notebooks/conftest.py
@@ -8,10 +8,12 @@
 
 def get_required_env(name) -> str:
     if name not in os.environ:
-        raise ValueError(f"Missing required environment variable: {name}")
+        msg = f"Missing required environment variable: {name}"
+        raise ValueError(msg)
     value = os.environ[name]
     if not value:
-        raise ValueError(f"Empty required environment variable: {name}")
+        msg = f"Empty required environment variable: {name}"
+        raise ValueError(msg)
     return value
 
 
diff --git a/examples/notebooks/langchain_evaluation.ipynb b/examples/notebooks/langchain_evaluation.ipynb
index e18656a37..41b08c351 100644
--- a/examples/notebooks/langchain_evaluation.ipynb
+++ b/examples/notebooks/langchain_evaluation.ipynb
@@ -193,7 +193,8 @@
     "if uploaded:\n",
     "    SAMPLEDATA = uploaded\n",
     "else:\n",
-    "    raise ValueError(\"Cannot proceed without Sample Data. Please re-run the cell.\")\n",
+    "    msg = \"Cannot proceed without Sample Data. Please re-run the cell.\"\n",
+    "    raise ValueError(msg)\n",
     "\n",
     "print(\"Please make sure to change your queries to match the contents of your file!\")"
    ]
diff --git a/libs/colbert/ragstack_colbert/cassandra_database.py b/libs/colbert/ragstack_colbert/cassandra_database.py
index 67b53f7fc..562badc4e 100644
--- a/libs/colbert/ragstack_colbert/cassandra_database.py
+++ b/libs/colbert/ragstack_colbert/cassandra_database.py
@@ -45,10 +45,11 @@ class CassandraDatabase(BaseDatabase):
     _table: ClusteredMetadataVectorCassandraTable
 
     def __new__(cls) -> Self:  # noqa: D102
-        raise ValueError(
+        msg = (
             "This class cannot be instantiated directly. "
             "Please use the `from_astra()` or `from_session()` class methods."
         )
+        raise ValueError(msg)
 
     @classmethod
     def from_astra(
@@ -173,10 +174,11 @@ def add_chunks(self, chunks: list[Chunk]) -> list[tuple[str, int]]:
             success_chunks.append((doc_id, chunk_id))
 
         if len(failed_chunks) > 0:
-            raise CassandraDatabaseError(
+            msg = (
                 f"add failed for these chunks: {failed_chunks}. "
                 f"See error logs for more info."
             )
+            raise CassandraDatabaseError(msg)
 
         return success_chunks
 
@@ -273,10 +275,11 @@ async def aadd_chunks(
                 failed_chunks.append((doc_id, chunk_id))
 
         if len(failed_chunks) > 0:
-            raise CassandraDatabaseError(
+            msg = (
                 f"add failed for these chunks: {failed_chunks}. "
                 f"See error logs for more info."
             )
+            raise CassandraDatabaseError(msg)
 
         return outputs
 
@@ -292,8 +295,9 @@ def delete_chunks(self, doc_ids: list[str]) -> bool:
                 failed_docs.append(doc_id)
 
         if len(failed_docs) > 0:
+            msg = "delete failed for these docs: %s. See error logs for more info."
             raise CassandraDatabaseError(
-                "delete failed for these docs: %s. See error logs for more info.",
+                msg,
                 failed_docs,
             )
 
@@ -340,10 +344,11 @@ async def adelete_chunks(
                     failed_docs.append(doc_id)
 
         if len(failed_docs) > 0:
-            raise CassandraDatabaseError(
+            msg = (
                 f"delete failed for these docs: {failed_docs}. "
                 f"See error logs for more info."
             )
+            raise CassandraDatabaseError(msg)
 
         return success
 
@@ -379,9 +384,8 @@ async def get_chunk_data(
         row = await self._table.aget(partition_id=doc_id, row_id=row_id)
 
         if row is None:
-            raise CassandraDatabaseError(
-                f"no chunk found for doc_id: {doc_id} chunk_id: {chunk_id}"
-            )
+            msg = f"no chunk found for doc_id: {doc_id} chunk_id: {chunk_id}"
+            raise CassandraDatabaseError(msg)
 
         if include_embedding is True:
             embedded_chunk = await self.get_chunk_embedding(
diff --git a/libs/colbert/ragstack_colbert/colbert_vector_store.py b/libs/colbert/ragstack_colbert/colbert_vector_store.py
index 490098f16..f4b53c365 100644
--- a/libs/colbert/ragstack_colbert/colbert_vector_store.py
+++ b/libs/colbert/ragstack_colbert/colbert_vector_store.py
@@ -46,9 +46,8 @@ def __init__(
 
     def _validate_embedding_model(self) -> BaseEmbeddingModel:
         if self._embedding_model is None:
-            raise AttributeError(
-                "To use this method, `embedding_model` must be set on class creation."
-            )
+            msg = "To use this method, `embedding_model` must be set on class creation."
+            raise AttributeError(msg)
         return self._embedding_model
 
     def _build_chunks(
@@ -60,7 +59,8 @@ def _build_chunks(
         embedding_model = self._validate_embedding_model()
 
         if metadatas is not None and len(texts) != len(metadatas):
-            raise ValueError("Length of texts and metadatas must match.")
+            msg = "Length of texts and metadatas must match."
+            raise ValueError(msg)
 
         if doc_id is None:
             doc_id = str(uuid.uuid4())
diff --git a/libs/e2e-tests/e2e_tests/conftest.py b/libs/e2e-tests/e2e_tests/conftest.py
index e946eba9f..988ce672b 100644
--- a/libs/e2e-tests/e2e_tests/conftest.py
+++ b/libs/e2e-tests/e2e_tests/conftest.py
@@ -55,7 +55,8 @@ def get_required_env(name) -> str:
 
 vector_database_type = os.environ.get("VECTOR_DATABASE_TYPE", "astradb")
 if vector_database_type not in ["astradb", "local-cassandra"]:
-    raise ValueError(f"Invalid VECTOR_DATABASE_TYPE: {vector_database_type}")
+    msg = f"Invalid VECTOR_DATABASE_TYPE: {vector_database_type}"
+    raise ValueError(msg)
 
 is_astra = vector_database_type == "astradb"
 
@@ -67,7 +68,8 @@ def get_vector_store_handler(
         return AstraDBVectorStoreHandler(implementation)
     if vector_database_type == "local-cassandra":
         return CassandraVectorStoreHandler(implementation)
-    raise ValueError("Invalid vector store implementation")
+    msg = "Invalid vector store implementation"
+    raise ValueError(msg)
 
 
 failed_report_lines = []
diff --git a/libs/e2e-tests/e2e_tests/langchain/test_compatibility_rag.py b/libs/e2e-tests/e2e_tests/langchain/test_compatibility_rag.py
index 6c8ff3856..b3cafccfd 100644
--- a/libs/e2e-tests/e2e_tests/langchain/test_compatibility_rag.py
+++ b/libs/e2e-tests/e2e_tests/langchain/test_compatibility_rag.py
@@ -363,7 +363,8 @@ def _run_test(
             vector_store=vector_store, config=resolved_llm["nemo_config"]
         )
     else:
-        raise ValueError(f"Unknown test case: {test_case}")
+        msg = f"Unknown test case: {test_case}"
+        raise ValueError(msg)
 
 
 @pytest.fixture()
diff --git a/libs/knowledge-store/ragstack_knowledge_store/_mmr_helper.py b/libs/knowledge-store/ragstack_knowledge_store/_mmr_helper.py
index c6b72d58c..d63f86a75 100644
--- a/libs/knowledge-store/ragstack_knowledge_store/_mmr_helper.py
+++ b/libs/knowledge-store/ragstack_knowledge_store/_mmr_helper.py
@@ -131,11 +131,12 @@ def _pop_candidate(self, candidate_id: str) -> NDArray[np.float32]:
         """
         # Get the embedding for the id.
         index = self.candidate_id_to_index.pop(candidate_id)
-        if not self.candidates[index].id == candidate_id:
-            raise ValueError(
+        if self.candidates[index].id != candidate_id:
+            msg = (
                 "ID in self.candidate_id_to_index doesn't match the ID of the "
                 "corresponding index in self.candidates"
             )
+            raise ValueError(msg)
         embedding: NDArray[np.float32] = self.candidate_embeddings[index].copy()
 
         # Swap that index with the last index in the candidates and
diff --git a/libs/knowledge-store/ragstack_knowledge_store/_utils.py b/libs/knowledge-store/ragstack_knowledge_store/_utils.py
index 959edb456..37bf7d20d 100644
--- a/libs/knowledge-store/ragstack_knowledge_store/_utils.py
+++ b/libs/knowledge-store/ragstack_knowledge_store/_utils.py
@@ -16,7 +16,8 @@
     # This is equivalent to `itertools.batched`, but that is only available in 3.12
     def batched(iterable: Iterable[T], n: int) -> Iterator[tuple[T, ...]]:
         if n < 1:
-            raise ValueError("n must be at least one")
+            msg = "n must be at least one"
+            raise ValueError(msg)
         it = iter(iterable)
         while batch := tuple(islice(it, n)):
             yield batch
diff --git a/libs/knowledge-store/ragstack_knowledge_store/graph_store.py b/libs/knowledge-store/ragstack_knowledge_store/graph_store.py
index 02110a62e..2a02209b5 100644
--- a/libs/knowledge-store/ragstack_knowledge_store/graph_store.py
+++ b/libs/knowledge-store/ragstack_knowledge_store/graph_store.py
@@ -17,6 +17,7 @@
 
 from cassandra.cluster import ConsistencyLevel, PreparedStatement, Session
 from cassio.config import check_resolve_keyspace, check_resolve_session
+from typing_extensions import assert_never
 
 from ._mmr_helper import MmrHelper
 from .concurrency import ConcurrentQueries
@@ -76,7 +77,7 @@ def _is_metadata_field_indexed(field_name: str, policy: MetadataIndexingPolicy)
         return field_name in p_fields
     if p_mode == MetadataIndexingMode.DEFAULT_TO_SEARCHABLE:
         return field_name not in p_fields
-    raise ValueError(f"Unexpected metadata indexing mode {p_mode}")
+    assert_never(p_mode)
 
 
 def _serialize_metadata(md: dict[str, Any]) -> str:
@@ -170,10 +171,12 @@ def __init__(
         keyspace = check_resolve_keyspace(keyspace)
 
         if not _CQL_IDENTIFIER_PATTERN.fullmatch(keyspace):
-            raise ValueError(f"Invalid keyspace: {keyspace}")
+            msg = f"Invalid keyspace: {keyspace}"
+            raise ValueError(msg)
 
         if not _CQL_IDENTIFIER_PATTERN.fullmatch(node_table):
-            raise ValueError(f"Invalid node table name: {node_table}")
+            msg = f"Invalid node table name: {node_table}"
+            raise ValueError(msg)
 
         self._embedding = embedding
         self._node_table = node_table
@@ -188,10 +191,11 @@ def __init__(
         if setup_mode == SetupMode.SYNC:
             self._apply_schema()
         elif setup_mode != SetupMode.OFF:
-            raise ValueError(
+            msg = (
                 f"Invalid setup mode {setup_mode.name}. "
                 "Only SYNC and OFF are supported at the moment"
             )
+            raise ValueError(msg)
 
         # TODO: Parent ID / source ID / etc.
         self._insert_passage = session.prepare(
@@ -350,7 +354,8 @@ def node_callback(rows: Iterable[Any]) -> None:
 
         def get_result(node_id: str) -> Node:
             if (result := results[node_id]) is None:
-                raise ValueError(f"No node with ID '{node_id}'")
+                msg = f"No node with ID '{node_id}'"
+                raise ValueError(msg)
             return result
 
         return [get_result(node_id) for node_id in ids]
@@ -800,14 +805,11 @@ def _normalize_metadata_indexing_policy(
             elif metadata_indexing.lower() == "none":
                 mode, fields = (MetadataIndexingMode.DEFAULT_TO_UNSEARCHABLE, set())
             else:
-                raise ValueError(
-                    f"Unsupported metadata_indexing value '{metadata_indexing}'"
-                )
+                msg = f"Unsupported metadata_indexing value '{metadata_indexing}'"
+                raise ValueError(msg)
         else:
             if len(metadata_indexing) != 2:  # noqa: PLR2004
-                raise ValueError(
-                    f"Unsupported metadata_indexing value '{metadata_indexing}'."
-                )
+                assert_never(metadata_indexing)
             # it's a 2-tuple (mode, fields) still to normalize
             _mode, _field_spec = metadata_indexing
             fields = {_field_spec} if isinstance(_field_spec, str) else set(_field_spec)
@@ -826,10 +828,9 @@ def _normalize_metadata_indexing_policy(
             }:
                 mode = MetadataIndexingMode.DEFAULT_TO_SEARCHABLE
             else:
-                raise ValueError(
-                    f"Unsupported metadata indexing mode specification '{_mode}'"
-                )
-        return (mode, fields)
+                msg = f"Unsupported metadata indexing mode specification '{_mode}'"
+                raise ValueError(msg)
+        return mode, fields
 
     @staticmethod
     def _coerce_string(value: Any) -> str:
@@ -865,9 +866,8 @@ def _extract_where_clause_cql(
             if _is_metadata_field_indexed(key, self._metadata_indexing_policy):
                 wc_blocks.append(f"metadata_s['{key}'] = ?")
             else:
-                raise ValueError(
-                    "Non-indexed metadata fields cannot be used in queries."
-                )
+                msg = "Non-indexed metadata fields cannot be used in queries."
+                raise ValueError(msg)
 
         if len(wc_blocks) == 0:
             return ""
@@ -889,9 +889,8 @@ def _extract_where_clause_params(
             if _is_metadata_field_indexed(key, self._metadata_indexing_policy):
                 params.append(self._coerce_string(value=value))
             else:
-                raise ValueError(
-                    "Non-indexed metadata fields cannot be used in queries."
-                )
+                msg = "Non-indexed metadata fields cannot be used in queries."
+                raise ValueError(msg)
 
         return params
 
diff --git a/libs/knowledge-store/ragstack_knowledge_store/math.py b/libs/knowledge-store/ragstack_knowledge_store/math.py
index bb2231fdf..cbcc20593 100644
--- a/libs/knowledge-store/ragstack_knowledge_store/math.py
+++ b/libs/knowledge-store/ragstack_knowledge_store/math.py
@@ -22,10 +22,11 @@ def cosine_similarity(x: Matrix, y: Matrix) -> NDArray[np.float32]:
     x = np.array(x)
     y = np.array(y)
     if x.shape[1] != y.shape[1]:
-        raise ValueError(
+        msg = (
             f"Number of columns in X and Y must be the same. X has shape {x.shape} "
             f"and Y has shape {y.shape}."
         )
+        raise ValueError(msg)
     try:
         import simsimd as simd
     except ImportError:
diff --git a/libs/langchain/ragstack_langchain/colbert/__init__.py b/libs/langchain/ragstack_langchain/colbert/__init__.py
index efe14e895..19da0762b 100644
--- a/libs/langchain/ragstack_langchain/colbert/__init__.py
+++ b/libs/langchain/ragstack_langchain/colbert/__init__.py
@@ -1,10 +1,11 @@
 try:
     from ragstack_colbert.base_retriever import BaseRetriever  # noqa: F401
 except (ImportError, ModuleNotFoundError) as e:
-    raise ImportError(
+    msg = (
         "Could not import ragstack-ai-colbert. "
         "Please install it with `pip install ragstack-ai-langchain[colbert]`."
-    ) from e
+    )
+    raise ImportError(msg) from e
 
 from .colbert_retriever import ColbertRetriever
 from .colbert_vector_store import ColbertVectorStore
diff --git a/libs/langchain/ragstack_langchain/colbert/colbert_vector_store.py b/libs/langchain/ragstack_langchain/colbert/colbert_vector_store.py
index 5b2f153d1..8021dd9ec 100644
--- a/libs/langchain/ragstack_langchain/colbert/colbert_vector_store.py
+++ b/libs/langchain/ragstack_langchain/colbert/colbert_vector_store.py
@@ -246,11 +246,11 @@ def from_texts(
         **kwargs: Any,
     ) -> Self:
         if not isinstance(embedding, TokensEmbeddings):
-            raise TypeError("ColbertVectorStore requires a TokensEmbeddings embedding.")
+            msg = "ColbertVectorStore requires a TokensEmbeddings embedding."
+            raise TypeError(msg)
         if database is None:
-            raise ValueError(
-                "ColbertVectorStore requires a ColbertBaseDatabase database."
-            )
+            msg = "ColbertVectorStore requires a ColbertBaseDatabase database."
+            raise ValueError(msg)
         instance = cls(
             database=database, embedding_model=embedding.get_embedding_model(), **kwargs
         )
@@ -270,11 +270,11 @@ async def afrom_texts(
         **kwargs: Any,
     ) -> Self:
         if not isinstance(embedding, TokensEmbeddings):
-            raise TypeError("ColbertVectorStore requires a TokensEmbeddings embedding.")
+            msg = "ColbertVectorStore requires a TokensEmbeddings embedding."
+            raise TypeError(msg)
         if database is None:
-            raise ValueError(
-                "ColbertVectorStore requires a ColbertBaseDatabase database."
-            )
+            msg = "ColbertVectorStore requires a ColbertBaseDatabase database."
+            raise ValueError(msg)
         instance = cls(
             database=database, embedding_model=embedding.get_embedding_model(), **kwargs
         )
@@ -290,5 +290,6 @@ def as_retriever(self, k: Optional[int] = 5, **kwargs: Any) -> VectorStoreRetrie
         search_kwargs["k"] = k
         search_type = kwargs.get("search_type", "similarity")
         if search_type != "similarity":
-            raise ValueError(f"Unsupported search type: {search_type}")
+            msg = f"Unsupported search type: {search_type}"
+            raise ValueError(msg)
         return super().as_retriever(search_kwargs=search_kwargs, **kwargs)
diff --git a/libs/llamaindex/ragstack_llamaindex/colbert/__init__.py b/libs/llamaindex/ragstack_llamaindex/colbert/__init__.py
index 7d7de5cca..46a0443f0 100644
--- a/libs/llamaindex/ragstack_llamaindex/colbert/__init__.py
+++ b/libs/llamaindex/ragstack_llamaindex/colbert/__init__.py
@@ -1,10 +1,11 @@
 try:
     from ragstack_colbert.base_retriever import BaseRetriever  # noqa: F401
 except (ImportError, ModuleNotFoundError) as e:
-    raise ImportError(
+    msg = (
         "Could not import ragstack-ai-colbert. "
         "Please install it with `pip install ragstack-ai-llamaindex[colbert]`."
-    ) from e
+    )
+    raise ImportError(msg) from e
 
 from .colbert_retriever import ColbertRetriever
 
diff --git a/libs/llamaindex/tests/unit_tests/test_import.py b/libs/llamaindex/tests/unit_tests/test_import.py
index c9200bdb4..529b4a053 100644
--- a/libs/llamaindex/tests/unit_tests/test_import.py
+++ b/libs/llamaindex/tests/unit_tests/test_import.py
@@ -15,7 +15,8 @@ def test_import() -> None:
 def check_no_import(fn: Callable[[], Any]) -> None:
     try:
         fn()
-        raise RuntimeError("Should have failed to import")
+        msg = "Should have failed to import"
+        raise RuntimeError(msg)
     except ImportError:
         pass
 
diff --git a/libs/ragulate/colbert_chunk_size_and_k.py b/libs/ragulate/colbert_chunk_size_and_k.py
index a7956cb59..af82eed55 100644
--- a/libs/ragulate/colbert_chunk_size_and_k.py
+++ b/libs/ragulate/colbert_chunk_size_and_k.py
@@ -88,8 +88,9 @@ async def ingest(file_path: str, chunk_size: int, **_: Any) -> None:
     print(f"It took {duration} seconds to load and parse the document")
 
     # confirm only one document returned per file
-    if not len(docs) == 1:
-        raise ValueError("Only one document must be returned per file")
+    if len(docs) != 1:
+        msg = "Only one document must be returned per file"
+        raise ValueError(msg)
 
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=chunk_size,
diff --git a/libs/ragulate/ragstack_ragulate/analysis.py b/libs/ragulate/ragstack_ragulate/analysis.py
index e2ea76cf0..69356569d 100644
--- a/libs/ragulate/ragstack_ragulate/analysis.py
+++ b/libs/ragulate/ragstack_ragulate/analysis.py
@@ -263,4 +263,5 @@ def compare(self, recipes: list[str], output: str = "box-plots") -> None:
         elif output == "histogram-grid":
             self.output_histograms_by_dataset(df=df, metrics=metrics)
         else:
-            raise ValueError(f"Invalid output type: {output}")
+            msg = f"Invalid output type: {output}"
+            raise ValueError(msg)
diff --git a/libs/ragulate/ragstack_ragulate/cli_commands/query.py b/libs/ragulate/ragstack_ragulate/cli_commands/query.py
index 27bdf98de..540840b2c 100644
--- a/libs/ragulate/ragstack_ragulate/cli_commands/query.py
+++ b/libs/ragulate/ragstack_ragulate/cli_commands/query.py
@@ -116,15 +116,15 @@ def call_query(
     ) -> None:
         """Run a query pipeline."""
         if sample <= 0.0 or sample > 1.0:
-            raise ValueError("Sample percent must be between 0 and 1")
+            msg = "Sample percent must be between 0 and 1"
+            raise ValueError(msg)
 
         datasets = [find_dataset(name=name) for name in dataset]
 
         if subset is not None and len(subset) > 0:
             if len(datasets) > 1:
-                raise ValueError(
-                    "Only can set `subset` param when there is one dataset"
-                )
+                msg = "Only can set `subset` param when there is one dataset"
+                raise ValueError(msg)
             datasets[0].subsets = subset
 
         ingredients = convert_vars_to_ingredients(
diff --git a/libs/ragulate/ragstack_ragulate/config/config_parser.py b/libs/ragulate/ragstack_ragulate/config/config_parser.py
index 527fac8fe..f3e08fe18 100644
--- a/libs/ragulate/ragstack_ragulate/config/config_parser.py
+++ b/libs/ragulate/ragstack_ragulate/config/config_parser.py
@@ -42,4 +42,5 @@ def from_file(cls, file_path: str) -> ConfigParser:
             version = config.get("version", _VERSION_0_1)
             if version == _VERSION_0_1:
                 return cls(config_schema=ConfigSchema0Dot1(), config=config)
-            raise ValueError(f"config file version {version} is not supported")
+            msg = f"config file version {version} is not supported"
+            raise ValueError(msg)
diff --git a/libs/ragulate/ragstack_ragulate/config/config_schema_0_1.py b/libs/ragulate/ragstack_ragulate/config/config_schema_0_1.py
index d07edd525..7a31f2997 100644
--- a/libs/ragulate/ragstack_ragulate/config/config_schema_0_1.py
+++ b/libs/ragulate/ragstack_ragulate/config/config_schema_0_1.py
@@ -162,10 +162,11 @@ def parse_document(self, document: dict[str, Any]) -> Config:
                 doc_script = doc_step.get("script", None)
                 doc_method = doc_step.get("method", None)
                 if doc_name in steps:
-                    raise ValueError(
+                    msg = (
                         f"{step_kind} step names must be unique. Found {doc_name} more "
                         f"than once."
                     )
+                    raise ValueError(msg)
                 steps[doc_name] = Step(
                     name=doc_name, script=doc_script, method=doc_method
                 )
@@ -180,19 +181,19 @@ def parse_document(self, document: dict[str, Any]) -> Config:
             for doc_ingredient in doc_ingredients:
                 for key, value in doc_ingredient.items():
                     if key in ingredients:
-                        raise ValueError(
-                            f"ingredient {key} appears in recipe more than once."
-                        )
+                        msg = f"ingredient {key} appears in recipe more than once."
+                        raise ValueError(msg)
                     ingredients[key] = value
 
             doc_name = doc_recipe.get("name", None)
 
             if doc_name is None:
                 if len(doc_ingredients) == 0:
-                    raise ValueError(
+                    msg = (
                         "recipe must either have a `name` defined or contain at least "
                         "one ingredient."
                     )
+                    raise ValueError(msg)
                 recipe_name = dict_to_string(ingredients)
             else:
                 recipe_name = doc_name
@@ -203,20 +204,24 @@ def parse_document(self, document: dict[str, Any]) -> Config:
                 doc_recipe_step = doc_recipe.get(step_kind, None)
                 step = step_map[step_kind].get(doc_recipe_step, None)
                 if doc_recipe_step is not None and step is None:
-                    raise ValueError(
+                    msg = (
                         f"{step_kind} step {doc_recipe_step} for recipe {recipe_name} "
                         f"is not defined in the `steps` section"
                     )
+                    raise ValueError(msg)
                 if step:
                     recipe_steps[step_kind] = step
 
             if "query" not in recipe_steps:
-                raise ValueError(f"query step is missing for recipe {recipe_name}")
+                msg = f"query step is missing for recipe {recipe_name}"
+                raise ValueError(msg)
 
             if recipe_name in recipes:
-                raise ValueError(
-                    f"recipe names must be unique. Found {recipe_name} more than once."
+                msg = (
+                    "recipe names must be unique. "
+                    f"Found {recipe_name} more than once."
                 )
+                raise ValueError(msg)
 
             recipes[recipe_name] = Recipe(
                 name=recipe_name,
@@ -235,9 +240,8 @@ def parse_document(self, document: dict[str, Any]) -> Config:
                 doc_dataset_name = doc_dataset.get("name", None)
                 doc_dataset_kind = doc_dataset.get("kind", None)
                 if doc_dataset_name is None or doc_dataset_kind is None:
-                    raise ValueError(
-                        "datasets must be specified with `name` and `kind`"
-                    )
+                    msg = "datasets must be specified with `name` and `kind`"
+                    raise ValueError(msg)
                 datasets[doc_dataset_name] = get_dataset(
                     name=doc_dataset_name, kind=doc_dataset_kind
                 )
diff --git a/libs/ragulate/ragstack_ragulate/pipelines/base_pipeline.py b/libs/ragulate/ragstack_ragulate/pipelines/base_pipeline.py
index d93a4a6ed..ab847412d 100644
--- a/libs/ragulate/ragstack_ragulate/pipelines/base_pipeline.py
+++ b/libs/ragulate/ragstack_ragulate/pipelines/base_pipeline.py
@@ -17,10 +17,12 @@ def load_module(file_path: str, name: str) -> ModuleType:
     """Load a module from a file path dynamically."""
     spec = importlib.util.spec_from_file_location(name, file_path)
     if spec is None:
-        raise ValueError(f"Could not load module from {file_path}")
+        msg = f"Could not load module from {file_path}"
+        raise ValueError(msg)
     module = importlib.util.module_from_spec(spec)
     if spec.loader is None:
-        raise ValueError(f"No Module loader found for {file_path}")
+        msg = f"No Module loader found for {file_path}"
+        raise ValueError(msg)
     spec.loader.exec_module(module)
     return module
 
@@ -48,9 +50,8 @@ def get_ingredients(
         if method_param in reserved_params or method_param in ["kwargs", "_"]:
             continue
         if method_param not in passed_ingredients:
-            raise ValueError(
-                f"method param '{method_param}' doesn't exist in the ingredients"
-            )
+            msg = f"method param '{method_param}' doesn't exist in the ingredients"
+            raise ValueError(msg)
         ingredients[method_param] = passed_ingredients[method_param]
 
     return ingredients
diff --git a/libs/ragulate/ragstack_ragulate/pipelines/query_pipeline.py b/libs/ragulate/ragstack_ragulate/pipelines/query_pipeline.py
index 5d61db958..a61dd67f1 100644
--- a/libs/ragulate/ragstack_ragulate/pipelines/query_pipeline.py
+++ b/libs/ragulate/ragstack_ragulate/pipelines/query_pipeline.py
@@ -191,7 +191,8 @@ def get_provider(self) -> LLMProvider:
             return AzureOpenAI(deployment_name=model_name)
         if llm_provider == "huggingface":
             return Huggingface(name=model_name)
-        raise ValueError(f"Unsupported provider: {llm_provider}")
+        msg = f"Unsupported provider: {llm_provider}"
+        raise ValueError(msg)
 
     def query(self) -> None:
         """Run the query pipeline."""
diff --git a/libs/tests-utils/ragstack_tests_utils/test_store.py b/libs/tests-utils/ragstack_tests_utils/test_store.py
index 1abff93fa..4cfcd5f88 100644
--- a/libs/tests-utils/ragstack_tests_utils/test_store.py
+++ b/libs/tests-utils/ragstack_tests_utils/test_store.py
@@ -54,9 +54,8 @@ class AstraDBTestStore(TestStore):
     def __init__(self) -> None:
         super().__init__()
         if not os.getenv("ASTRA_DB_ID") or not os.getenv("ASTRA_DB_TOKEN"):
-            raise ValueError(
-                "ASTRA_DB_ID and ASTRA_DB_TOKEN environment variables must be set"
-            )
+            msg = "ASTRA_DB_ID and ASTRA_DB_TOKEN environment variables must be set"
+            raise ValueError(msg)
         self.token = os.getenv("ASTRA_DB_TOKEN")
         self.database_id = os.getenv("ASTRA_DB_ID")
         self.env = os.getenv("ASTRA_DB_ENV", "prod").lower()
diff --git a/pyproject.toml b/pyproject.toml
index 67c7c58a8..dcdb776f2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,7 +68,6 @@ ignore = [
     "D104", # Do we want to activate (docstring in package) ?
     "D105", # Do we want to activate (docstring in magic method) ?
     "D107", # Do we want to activate (docstring in __init__) ?
-    "EM", # Do we want to activate (error messages) ?
     "ERA", # Do we want to activate (no commented code) ?
     "FBT", # Do we want to activate (boolean trap) ?
     "FIX", # Do we want to activate (no fix-me) ?
diff --git a/scripts/format-example-notebooks.py b/scripts/format-example-notebooks.py
index 0ec3c857b..765cd7589 100755
--- a/scripts/format-example-notebooks.py
+++ b/scripts/format-example-notebooks.py
@@ -31,7 +31,8 @@ def main() -> None:
                             found = True
                             break
                     if not found:
-                        raise ValueError("No code cells found in file: ", file)
+                        msg = "No code cells found in file: "
+                        raise ValueError(msg, file)
                 with open(file, "w") as f:
                     f.write(json.dumps(as_json, indent=1, sort_keys=True))
 
diff --git a/scripts/generate-changelog.py b/scripts/generate-changelog.py
index 518d79c1e..b800f9a09 100755
--- a/scripts/generate-changelog.py
+++ b/scripts/generate-changelog.py
@@ -56,7 +56,8 @@ def main() -> None:
                 version_range = require[i:]
                 break
         if not version_range:
-            raise ValueError(f"Could not parse version range from {require}")
+            msg = f"Could not parse version range from {require}"
+            raise ValueError(msg)
         for important_dependency in IMPORTANT_DEPENDENCIES:
             if package_name.startswith(important_dependency + "["):
                 package_name = important_dependency