Add ruff rules for future annotations (#621)

datastax · Jul 29, 2024 · cc8f761 · cc8f761
1 parent efa4b1b
commit cc8f761
Show file tree

Hide file tree

Showing 54 changed files with 554 additions and 401 deletions.
diff --git a/libs/colbert/ragstack_colbert/base_database.py b/libs/colbert/ragstack_colbert/base_database.py
@@ -5,10 +5,13 @@
 models.
 """
 
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
-from typing import List, Tuple
+from typing import TYPE_CHECKING
 
-from .objects import Chunk, Vector
+if TYPE_CHECKING:
+    from .objects import Chunk, Vector
 
 
 class BaseDatabase(ABC):
@@ -24,7 +27,7 @@ class BaseDatabase(ABC):
     """
 
     @abstractmethod
-    def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
+    def add_chunks(self, chunks: list[Chunk]) -> list[tuple[str, int]]:
         """Stores a list of embedded text chunks in the vector store.
 
         Args:
@@ -35,7 +38,7 @@ def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
         """
 
     @abstractmethod
-    def delete_chunks(self, doc_ids: List[str]) -> bool:
+    def delete_chunks(self, doc_ids: list[str]) -> bool:
         """Deletes chunks from the vector store based on their document id.
 
         Args:
@@ -48,8 +51,8 @@ def delete_chunks(self, doc_ids: List[str]) -> bool:
 
     @abstractmethod
     async def aadd_chunks(
-        self, chunks: List[Chunk], concurrent_inserts: int = 100
-    ) -> List[Tuple[str, int]]:
+        self, chunks: list[Chunk], concurrent_inserts: int = 100
+    ) -> list[tuple[str, int]]:
         """Stores a list of embedded text chunks in the vector store.
 
         Args:
@@ -63,7 +66,7 @@ async def aadd_chunks(
 
     @abstractmethod
     async def adelete_chunks(
-        self, doc_ids: List[str], concurrent_deletes: int = 100
+        self, doc_ids: list[str], concurrent_deletes: int = 100
     ) -> bool:
         """Deletes chunks from the vector store based on their document id.
 
@@ -78,7 +81,7 @@ async def adelete_chunks(
         """
 
     @abstractmethod
-    async def search_relevant_chunks(self, vector: Vector, n: int) -> List[Chunk]:
+    async def search_relevant_chunks(self, vector: Vector, n: int) -> list[Chunk]:
         """Retrieves 'n' ANN results for an embedded token vector.
 
         Returns:

diff --git a/libs/colbert/ragstack_colbert/base_embedding_model.py b/libs/colbert/ragstack_colbert/base_embedding_model.py
@@ -4,10 +4,13 @@
 embeddings for text.
 """
 
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
-from typing import List, Optional
+from typing import TYPE_CHECKING
 
-from .objects import Embedding
+if TYPE_CHECKING:
+    from .objects import Embedding
 
 
 class BaseEmbeddingModel(ABC):
@@ -21,7 +24,7 @@ class BaseEmbeddingModel(ABC):
     """
 
     @abstractmethod
-    def embed_texts(self, texts: List[str]) -> List[Embedding]:
+    def embed_texts(self, texts: list[str]) -> list[Embedding]:
         """Embeds a list of texts into their vector embedding representations.
 
         Args:
@@ -36,7 +39,7 @@ def embed_query(
         self,
         query: str,
         full_length_search: bool = False,
-        query_maxlen: Optional[int] = None,
+        query_maxlen: int | None = None,
     ) -> Embedding:
         """Embeds a single query text into its vector representation.
 

diff --git a/libs/colbert/ragstack_colbert/base_retriever.py b/libs/colbert/ragstack_colbert/base_retriever.py
@@ -5,10 +5,13 @@
 models.
 """
 
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
-from typing import Any, List, Optional, Tuple
+from typing import TYPE_CHECKING, Any
 
-from .objects import Chunk, Embedding
+if TYPE_CHECKING:
+    from .objects import Chunk, Embedding
 
 
 class BaseRetriever(ABC):
@@ -24,10 +27,10 @@ class BaseRetriever(ABC):
     def embedding_search(
         self,
         query_embedding: Embedding,
-        k: Optional[int] = None,
+        k: int | None = None,
         include_embedding: bool = False,
         **kwargs: Any,
-    ) -> List[Tuple[Chunk, float]]:
+    ) -> list[tuple[Chunk, float]]:
         """Search for relevant text chunks based on a query embedding.
 
         Retrieves a list of text chunks relevant to a given query from the vector
@@ -53,10 +56,10 @@ def embedding_search(
     async def aembedding_search(
         self,
         query_embedding: Embedding,
-        k: Optional[int] = None,
+        k: int | None = None,
         include_embedding: bool = False,
         **kwargs: Any,
-    ) -> List[Tuple[Chunk, float]]:
+    ) -> list[tuple[Chunk, float]]:
         """Search for relevant text chunks based on a query embedding.
 
         Retrieves a list of text chunks relevant to a given query from the vector
@@ -82,11 +85,11 @@ async def aembedding_search(
     def text_search(
         self,
         query_text: str,
-        k: Optional[int] = None,
-        query_maxlen: Optional[int] = None,
+        k: int | None = None,
+        query_maxlen: int | None = None,
         include_embedding: bool = False,
         **kwargs: Any,
-    ) -> List[Tuple[Chunk, float]]:
+    ) -> list[tuple[Chunk, float]]:
         """Search for relevant text chunks based on a query text.
 
         Retrieves a list of text chunks relevant to a given query from the vector
@@ -113,11 +116,11 @@ def text_search(
     async def atext_search(
         self,
         query_text: str,
-        k: Optional[int] = None,
-        query_maxlen: Optional[int] = None,
+        k: int | None = None,
+        query_maxlen: int | None = None,
         include_embedding: bool = False,
         **kwargs: Any,
-    ) -> List[Tuple[Chunk, float]]:
+    ) -> list[tuple[Chunk, float]]:
         """Search for relevant text chunks based on a query text.
 
         Retrieves a list of text chunks relevant to a given query from the vector

diff --git a/libs/colbert/ragstack_colbert/base_vector_store.py b/libs/colbert/ragstack_colbert/base_vector_store.py
@@ -5,11 +5,14 @@
 and can be used to create a LangChain or LlamaIndex ColBERT vector store.
 """
 
+from __future__ import annotations
+
 from abc import ABC, abstractmethod
-from typing import List, Optional, Tuple
+from typing import TYPE_CHECKING
 
-from .base_retriever import BaseRetriever
-from .objects import Chunk, Metadata
+if TYPE_CHECKING:
+    from .base_retriever import BaseRetriever
+    from .objects import Chunk, Metadata
 
 # LlamaIndex Node (chunk) has ids, text, embedding, metadata
 #            VectorStore.add(nodes: List[Node]) -> List[str](ids): embeds texts OUTside add  # noqa: E501
@@ -37,7 +40,7 @@ class BaseVectorStore(ABC):
 
     # handles LlamaIndex add
     @abstractmethod
-    def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
+    def add_chunks(self, chunks: list[Chunk]) -> list[tuple[str, int]]:
         """Stores a list of embedded text chunks in the vector store.
 
         Args:
@@ -51,10 +54,10 @@ def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
     @abstractmethod
     def add_texts(
         self,
-        texts: List[str],
-        metadatas: Optional[List[Metadata]],
-        doc_id: Optional[str] = None,
-    ) -> List[Tuple[str, int]]:
+        texts: list[str],
+        metadatas: list[Metadata] | None,
+        doc_id: str | None = None,
+    ) -> list[tuple[str, int]]:
         """Adds text chunks to the vector store.
 
         Embeds and stores a list of text chunks and optional metadata into the vector
@@ -73,7 +76,7 @@ def add_texts(
 
     # handles LangChain and LlamaIndex delete
     @abstractmethod
-    def delete_chunks(self, doc_ids: List[str]) -> bool:
+    def delete_chunks(self, doc_ids: list[str]) -> bool:
         """Deletes chunks from the vector store based on their document id.
 
         Args:
@@ -87,8 +90,8 @@ def delete_chunks(self, doc_ids: List[str]) -> bool:
     # handles LlamaIndex add
     @abstractmethod
     async def aadd_chunks(
-        self, chunks: List[Chunk], concurrent_inserts: int = 100
-    ) -> List[Tuple[str, int]]:
+        self, chunks: list[Chunk], concurrent_inserts: int = 100
+    ) -> list[tuple[str, int]]:
         """Stores a list of embedded text chunks in the vector store.
 
         Args:
@@ -104,11 +107,11 @@ async def aadd_chunks(
     @abstractmethod
     async def aadd_texts(
         self,
-        texts: List[str],
-        metadatas: Optional[List[Metadata]],
-        doc_id: Optional[str] = None,
+        texts: list[str],
+        metadatas: list[Metadata] | None,
+        doc_id: str | None = None,
         concurrent_inserts: int = 100,
-    ) -> List[Tuple[str, int]]:
+    ) -> list[tuple[str, int]]:
         """Adds text chunks to the vector store.
 
         Embeds and stores a list of text chunks and optional metadata into the vector
@@ -130,7 +133,7 @@ async def aadd_texts(
     # handles LangChain and LlamaIndex delete
     @abstractmethod
     async def adelete_chunks(
-        self, doc_ids: List[str], concurrent_deletes: int = 100
+        self, doc_ids: list[str], concurrent_deletes: int = 100
     ) -> bool:
         """Deletes chunks from the vector store based on their document id.