Skip to content

Commit

Permalink
Add ruff rules for future annotations (#621)
Browse files Browse the repository at this point in the history
  • Loading branch information
cbornet authored Jul 29, 2024
1 parent efa4b1b commit cc8f761
Show file tree
Hide file tree
Showing 54 changed files with 554 additions and 401 deletions.
19 changes: 11 additions & 8 deletions libs/colbert/ragstack_colbert/base_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
models.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import List, Tuple
from typing import TYPE_CHECKING

from .objects import Chunk, Vector
if TYPE_CHECKING:
from .objects import Chunk, Vector


class BaseDatabase(ABC):
Expand All @@ -24,7 +27,7 @@ class BaseDatabase(ABC):
"""

@abstractmethod
def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
def add_chunks(self, chunks: list[Chunk]) -> list[tuple[str, int]]:
"""Stores a list of embedded text chunks in the vector store.
Args:
Expand All @@ -35,7 +38,7 @@ def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
"""

@abstractmethod
def delete_chunks(self, doc_ids: List[str]) -> bool:
def delete_chunks(self, doc_ids: list[str]) -> bool:
"""Deletes chunks from the vector store based on their document id.
Args:
Expand All @@ -48,8 +51,8 @@ def delete_chunks(self, doc_ids: List[str]) -> bool:

@abstractmethod
async def aadd_chunks(
self, chunks: List[Chunk], concurrent_inserts: int = 100
) -> List[Tuple[str, int]]:
self, chunks: list[Chunk], concurrent_inserts: int = 100
) -> list[tuple[str, int]]:
"""Stores a list of embedded text chunks in the vector store.
Args:
Expand All @@ -63,7 +66,7 @@ async def aadd_chunks(

@abstractmethod
async def adelete_chunks(
self, doc_ids: List[str], concurrent_deletes: int = 100
self, doc_ids: list[str], concurrent_deletes: int = 100
) -> bool:
"""Deletes chunks from the vector store based on their document id.
Expand All @@ -78,7 +81,7 @@ async def adelete_chunks(
"""

@abstractmethod
async def search_relevant_chunks(self, vector: Vector, n: int) -> List[Chunk]:
async def search_relevant_chunks(self, vector: Vector, n: int) -> list[Chunk]:
"""Retrieves 'n' ANN results for an embedded token vector.
Returns:
Expand Down
11 changes: 7 additions & 4 deletions libs/colbert/ragstack_colbert/base_embedding_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@
embeddings for text.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import List, Optional
from typing import TYPE_CHECKING

from .objects import Embedding
if TYPE_CHECKING:
from .objects import Embedding


class BaseEmbeddingModel(ABC):
Expand All @@ -21,7 +24,7 @@ class BaseEmbeddingModel(ABC):
"""

@abstractmethod
def embed_texts(self, texts: List[str]) -> List[Embedding]:
def embed_texts(self, texts: list[str]) -> list[Embedding]:
"""Embeds a list of texts into their vector embedding representations.
Args:
Expand All @@ -36,7 +39,7 @@ def embed_query(
self,
query: str,
full_length_search: bool = False,
query_maxlen: Optional[int] = None,
query_maxlen: int | None = None,
) -> Embedding:
"""Embeds a single query text into its vector representation.
Expand Down
27 changes: 15 additions & 12 deletions libs/colbert/ragstack_colbert/base_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@
models.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, List, Optional, Tuple
from typing import TYPE_CHECKING, Any

from .objects import Chunk, Embedding
if TYPE_CHECKING:
from .objects import Chunk, Embedding


class BaseRetriever(ABC):
Expand All @@ -24,10 +27,10 @@ class BaseRetriever(ABC):
def embedding_search(
self,
query_embedding: Embedding,
k: Optional[int] = None,
k: int | None = None,
include_embedding: bool = False,
**kwargs: Any,
) -> List[Tuple[Chunk, float]]:
) -> list[tuple[Chunk, float]]:
"""Search for relevant text chunks based on a query embedding.
Retrieves a list of text chunks relevant to a given query from the vector
Expand All @@ -53,10 +56,10 @@ def embedding_search(
async def aembedding_search(
self,
query_embedding: Embedding,
k: Optional[int] = None,
k: int | None = None,
include_embedding: bool = False,
**kwargs: Any,
) -> List[Tuple[Chunk, float]]:
) -> list[tuple[Chunk, float]]:
"""Search for relevant text chunks based on a query embedding.
Retrieves a list of text chunks relevant to a given query from the vector
Expand All @@ -82,11 +85,11 @@ async def aembedding_search(
def text_search(
self,
query_text: str,
k: Optional[int] = None,
query_maxlen: Optional[int] = None,
k: int | None = None,
query_maxlen: int | None = None,
include_embedding: bool = False,
**kwargs: Any,
) -> List[Tuple[Chunk, float]]:
) -> list[tuple[Chunk, float]]:
"""Search for relevant text chunks based on a query text.
Retrieves a list of text chunks relevant to a given query from the vector
Expand All @@ -113,11 +116,11 @@ def text_search(
async def atext_search(
self,
query_text: str,
k: Optional[int] = None,
query_maxlen: Optional[int] = None,
k: int | None = None,
query_maxlen: int | None = None,
include_embedding: bool = False,
**kwargs: Any,
) -> List[Tuple[Chunk, float]]:
) -> list[tuple[Chunk, float]]:
"""Search for relevant text chunks based on a query text.
Retrieves a list of text chunks relevant to a given query from the vector
Expand Down
35 changes: 19 additions & 16 deletions libs/colbert/ragstack_colbert/base_vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
and can be used to create a LangChain or LlamaIndex ColBERT vector store.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import List, Optional, Tuple
from typing import TYPE_CHECKING

from .base_retriever import BaseRetriever
from .objects import Chunk, Metadata
if TYPE_CHECKING:
from .base_retriever import BaseRetriever
from .objects import Chunk, Metadata

# LlamaIndex Node (chunk) has ids, text, embedding, metadata
# VectorStore.add(nodes: List[Node]) -> List[str](ids): embeds texts OUTside add # noqa: E501
Expand Down Expand Up @@ -37,7 +40,7 @@ class BaseVectorStore(ABC):

# handles LlamaIndex add
@abstractmethod
def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
def add_chunks(self, chunks: list[Chunk]) -> list[tuple[str, int]]:
"""Stores a list of embedded text chunks in the vector store.
Args:
Expand All @@ -51,10 +54,10 @@ def add_chunks(self, chunks: List[Chunk]) -> List[Tuple[str, int]]:
@abstractmethod
def add_texts(
self,
texts: List[str],
metadatas: Optional[List[Metadata]],
doc_id: Optional[str] = None,
) -> List[Tuple[str, int]]:
texts: list[str],
metadatas: list[Metadata] | None,
doc_id: str | None = None,
) -> list[tuple[str, int]]:
"""Adds text chunks to the vector store.
Embeds and stores a list of text chunks and optional metadata into the vector
Expand All @@ -73,7 +76,7 @@ def add_texts(

# handles LangChain and LlamaIndex delete
@abstractmethod
def delete_chunks(self, doc_ids: List[str]) -> bool:
def delete_chunks(self, doc_ids: list[str]) -> bool:
"""Deletes chunks from the vector store based on their document id.
Args:
Expand All @@ -87,8 +90,8 @@ def delete_chunks(self, doc_ids: List[str]) -> bool:
# handles LlamaIndex add
@abstractmethod
async def aadd_chunks(
self, chunks: List[Chunk], concurrent_inserts: int = 100
) -> List[Tuple[str, int]]:
self, chunks: list[Chunk], concurrent_inserts: int = 100
) -> list[tuple[str, int]]:
"""Stores a list of embedded text chunks in the vector store.
Args:
Expand All @@ -104,11 +107,11 @@ async def aadd_chunks(
@abstractmethod
async def aadd_texts(
self,
texts: List[str],
metadatas: Optional[List[Metadata]],
doc_id: Optional[str] = None,
texts: list[str],
metadatas: list[Metadata] | None,
doc_id: str | None = None,
concurrent_inserts: int = 100,
) -> List[Tuple[str, int]]:
) -> list[tuple[str, int]]:
"""Adds text chunks to the vector store.
Embeds and stores a list of text chunks and optional metadata into the vector
Expand All @@ -130,7 +133,7 @@ async def aadd_texts(
# handles LangChain and LlamaIndex delete
@abstractmethod
async def adelete_chunks(
self, doc_ids: List[str], concurrent_deletes: int = 100
self, doc_ids: list[str], concurrent_deletes: int = 100
) -> bool:
"""Deletes chunks from the vector store based on their document id.
Expand Down
Loading

0 comments on commit cc8f761

Please sign in to comment.