Skip to content

Commit

Permalink
Compute vector store usage bytes
Browse files Browse the repository at this point in the history
Fixes #31

Compute vector store usage bytes in `create_vector_store` and `create_vector_store_file` functions.

* **`impl/routes_v2/vector_stores.py`**:
  - Import `os` and `HTTPException`.
  - Compute `usage_bytes` in `create_vector_store` by summing `usage_bytes` of each file.
  - Compute `usage_bytes` in `create_vector_store_file` by reading file size from the database.
  - Return `DeleteVectorStoreFileResponse` in `delete_vector_store_file`.

* **`client/.github/workflows/run-tests.yml`**:
  - Add a new job for running vector store bytes tests.

* **`client/tests/astra-assistants/test_vector_store_bytes.py`**:
  - Add a new test file to verify the `usage_bytes` attribute for vector stores.
  - Set up the test environment and write a test function that creates a vector store, attaches files to it, and verifies the `usage_bytes` attribute.

---

For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/datastax/astra-assistants-api/issues/31?shareId=XXXX-XXXX-XXXX-XXXX).
  • Loading branch information
phact committed Dec 2, 2024
1 parent 2af1170 commit 90d9e39
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 6 deletions.
36 changes: 36 additions & 0 deletions client/.github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,39 @@ jobs:
- name: run tests
run: |
poetry run pytest -s --disable-warnings tests/test_streaming_run.py
run-astra-assistants-tests-vector-store-bytes:
runs-on: ubuntu-latest
name: run astra-assistants vector store bytes tests
env:
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
base_url: ${{ secrets.BASE_URL }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PERPLEXITYAI_API_KEY: ${{ secrets.PERPLEXITYAI_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}

steps:
- name: Git checkout
uses: actions/checkout@v3
- name: Set up Python 3.10.12
uses: actions/setup-python@v2
with:
python-version: '3.10.12'
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
- name: Check Poetry Version
run: poetry --version
- name: Configure Poetry to Use Python 3.10.12
run: poetry env use python3.10
- name: get dependencies
run: |
poetry install
- name: run tests
run: |
poetry run pytest -s --disable-warnings tests/test_vector_store_bytes.py
36 changes: 36 additions & 0 deletions client/tests/astra-assistants/test_vector_store_bytes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import pytest
from impl.routes_v2.vector_stores import create_vector_store, create_vector_store_file
from openapi_server_v2.models.create_vector_store_request import CreateVectorStoreRequest
from openapi_server_v2.models.create_vector_store_file_request import CreateVectorStoreFileRequest
from openapi_server_v2.models.vector_store_object import VectorStoreObject
from openapi_server_v2.models.vector_store_file_object import VectorStoreFileObject
from impl.astra_vector import CassandraClient

@pytest.fixture(scope="module")
def astradb():
# Setup Cassandra client
client = CassandraClient()
yield client
client.close()

def test_vector_store_usage_bytes(astradb):
# Create a vector store
vector_store_request = CreateVectorStoreRequest(name="Test Vector Store", file_ids=[])
vector_store: VectorStoreObject = create_vector_store(vector_store_request, astradb)

# Attach files to the vector store
file_paths = ["./tests/fixtures/sample1.txt", "./tests/fixtures/sample2.txt"]
total_usage_bytes = 0

for file_path in file_paths:
file_size = os.path.getsize(file_path)
total_usage_bytes += file_size

file_request = CreateVectorStoreFileRequest(file_id=file_path)
vector_store_file: VectorStoreFileObject = create_vector_store_file(vector_store.id, file_request, astradb)
assert vector_store_file.usage_bytes == file_size

# Verify the usage_bytes attribute of the vector store
updated_vector_store: VectorStoreObject = create_vector_store(vector_store_request, astradb)
assert updated_vector_store.usage_bytes == total_usage_bytes
23 changes: 17 additions & 6 deletions impl/routes_v2/vector_stores.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from datetime import datetime
import logging
import time
import os

from fastapi import APIRouter, Path, Depends, Body, Query
from fastapi import APIRouter, Path, Depends, Body, Query, HTTPException

from impl.astra_vector import CassandraClient
from impl.model_v2.vector_store_object import VectorStoreObject
Expand Down Expand Up @@ -67,12 +68,12 @@ async def create_vector_store(
usage_bytes = 0
for file_id in create_vector_store_request.file_ids:
request = CreateVectorStoreFileRequest(file_id=file_id)
await create_vector_store_file(
vsf = await create_vector_store_file(
vector_store_id=vector_store_id,
create_vector_store_file_request=request,
astradb=astradb
)
#TODO - compute usage_bytes
usage_bytes += vsf.usage_bytes

file_id_count = len(create_vector_store_request.file_ids)
file_counts = VectorStoreObjectFileCounts(
Expand Down Expand Up @@ -118,13 +119,19 @@ async def create_vector_store_file(
) -> VectorStoreFileObject:
created_at = int(time.mktime(datetime.now().timetuple()) * 1000)

file_info = astradb.select_from_table_by_pk(
table="files", partition_keys=["id"], args={"id": create_vector_store_file_request.file_id}
)
if len(file_info) == 0:
raise HTTPException(status_code=404, detail="File not found")
file_size = file_info[0]["bytes"]

extra_fields = {
"id": create_vector_store_file_request.file_id,
"vector_store_id": vector_store_id,
"object": "vector_store.file",
"created_at": created_at,
# TODO - grab from file
"usage_bytes": -1,
"usage_bytes": file_size,
"status": "completed"
}
vector_store_file: VectorStoreFileObject = await store_object(
Expand Down Expand Up @@ -262,4 +269,8 @@ async def delete_vector_store_file(
created_at = vsf.created_at
break
astradb.delete_by_pks(table="vector_store_files", keys=["id", "created_at", "vector_store_id"], values=[file_id, created_at, vector_store_id])

return DeleteVectorStoreFileResponse(
id=file_id,
object="vector_store.file",
deleted=True
)

0 comments on commit 90d9e39

Please sign in to comment.