Skip to content

Commit

Permalink
updated
Browse files Browse the repository at this point in the history
  • Loading branch information
jieguangzhou committed Aug 12, 2024
1 parent cc70a2e commit 3316c3c
Show file tree
Hide file tree
Showing 51 changed files with 232 additions and 286 deletions.
42 changes: 2 additions & 40 deletions .github/workflows/ci_code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ jobs:
run: |
# Install core and testsuite dependencies on the cached python environment.
python -m pip install .
# TODO: We currently need a default plugin to run tests using MongoDB.
# Once the local file database is complete, we may need to update this section.
python -m pip install plugins/mongodb
python -m pip install -r deploy/installations/testenv_requirements.txt
- name: Install DevKit (docs, testing, etc)
run: |
Expand All @@ -78,44 +78,6 @@ jobs:
run: |
make unit_testing pytest_arguments="--cov=superduper --cov-report=xml"
- name: Ext Testing
run: |
make ext_testing
- name: Usecase Testing
run: |
make usecase_testing
# # ---------------------------------
# # Integration Testing
# # ---------------------------------
# integration-testing:
# needs: [ 'unit_testing' ]
# runs-on: ubuntu-latest
# steps:
# - name: Checkout repository
# uses: actions/checkout@v4
#
# - name: Build Docker Image
# run: |
# make build_sandbox
#
# - name: Start Testing Environment
# run: |
# # Update hostnames
# echo 127.0.0.1 mongodb | sudo tee -a /etc/hosts
#
# # Run the integrated testing environment
# make testenv_init
# # To make sure that pytest etc. are installed in the container
# docker exec testenv-sandbox-1 sh -c 'make install_devkit'
#
# - name: Data-backend Testing
# run: |
# # Run the test-suite within the sandbox image from the test environment
# docker exec testenv-sandbox-1 sh -c 'make databackend_testing'
#
# - name: Shutdown Testing Environment
# run: |
# # Run the integrated testing environment
# make testenv_shutdown
27 changes: 23 additions & 4 deletions .github/workflows/ci_plugins.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@ jobs:
python-version: '3.10'
cache: 'pip' # caching pip dependencies

- name: Cache Python Installation
uses: actions/cache@v4
with:
path: ${{ env.pythonLocation }} # Cache the whole python installation dir.
key: ${{ matrix.plugin }}_${{ hashFiles('pyproject.toml', '*/pyproject.toml') }}


- name: Install SuperDuperDB Project
run: |
# Install core and testsuite dependencies on the cached python environment.
Expand All @@ -62,6 +69,11 @@ jobs:
- name: Install DevKit (docs, testing, etc)
run: |
make install_devkit
- name: Lint and type-check
run: |
make lint-and-type-check DIRECTORIES="plugins/${{ matrix.plugin }}"
- name: Install Plugin
run: |
python -m pip install 'plugins/${{ matrix.plugin }}[test]'
Expand All @@ -75,11 +87,18 @@ jobs:
echo "No custom CI script found, skipping..."
fi
# - name: Lint and type-check
# run: |
# make lint-and-type-check

- name: Plugin Testing
run: |
export PYTHONPATH=./
pytest --cov=superduper --cov-report=xml plugins/${{ matrix.plugin }}/plugin_test
- name: Optionally run the base testing
run: |
SUPERDUPER_CONFIG="plugins/${{ matrix.plugin }}/plugin_test/config.yaml"
if [ -f "$SUPERDUPER_CONFIG" ]; then
echo "Running the base testing..."
make unit_testing SUPERDUPER_CONFIG=$SUPERDUPER_CONFIG
make usecase_testing SUPERDUPER_CONFIG=$SUPERDUPER_CONFIG
else
echo "No config file found, skipping..."
fi
30 changes: 5 additions & 25 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
DIRECTORIES = superduper test
DIRECTORIES ?= superduper test
SUPERDUPER_CONFIG ?= test/configs/default.yaml
PYTEST_ARGUMENTS ?=
BACKENDS ?= mongodb_community sqlite duckdb pandas

Expand Down Expand Up @@ -26,8 +27,7 @@ CURRENT_RELEASE=$(shell git describe --abbrev=0 --tags)
CURRENT_COMMIT=$(shell git rev-parse --short HEAD)

new_release: ## Release a new version of superduper.io
@ if [[ -z "${RELEASE_VERSION}" ]]; then echo "VERSION is not set"; exit 1; fi
@ if [[ "$(RELEASE_VERSION)" == "v$(CURRENT_RELEASE)" ]]; then echo "No new release version. Please update VERSION file."; exit 1; fi
@ if [[ -z "${RELEASE_VERSION}" ]]; then echo "VERSION is not set"; exit 1; fi @ if [[ "$(RELEASE_VERSION)" == "v$(CURRENT_RELEASE)" ]]; then echo "No new release version. Please update VERSION file."; exit 1; fi
# Switch to release branch
@echo "** Switching to branch release-$(RELEASE_VERSION)"
@git checkout -b release-$(RELEASE_VERSION)
Expand Down Expand Up @@ -238,27 +238,7 @@ testdb_shutdown: check_db_variable ## Shutdown Databases Containers (DB=<mongodb
##@ CI Testing Functions

unit_testing: ## Execute unit testing
# TODO After we have completed separating the plugins, we can run the tests only on default.yaml.
# SUPERDUPER_CONFIG=test/configs/mongodb.yaml pytest $(PYTEST_ARGUMENTS) ./test/unittest
# SUPERDUPER_CONFIG=test/configs/ibis.yaml pytest $(PYTEST_ARGUMENTS) ./test/unittest
SUPERDUPER_CONFIG=test/configs/default.yaml pytest $(PYTEST_ARGUMENTS) ./test/unittest

# databackend_testing: ## Execute integration testing
# @echo "TESTING BACKENDS"
# @for backend in $(BACKENDS); do \
# echo "TESTING $$backend"; \
# SUPERDUPER_CONFIG=deploy/testenv/env/integration/backends/$$backend.yaml pytest $(PYTEST_ARGUMENTS) ./test/integration/backends; \
# done
# @echo "TODO -- implement more backends integration testing..."

# ext_testing: ## Execute integration testing
# find ./test -type d -name __pycache__ -exec rm -r {} +
# find ./test -type f -name "*.pyc" -delete
# pytest $(PYTEST_ARGUMENTS) ./test/integration/ext

SUPERDUPER_CONFIG=$(SUPERDUPER_CONFIG) pytest $(PYTEST_ARGUMENTS) ./test/unittest

usecase_testing: ## Execute usecase testing
# TODO After we have completed separating the plugins, we can run the tests only on default.yaml.
# SUPERDUPER_CONFIG=test/configs/mongodb.yaml pytest $(PYTEST_ARGUMENTS) ./test/integration/usecase
# SUPERDUPER_CONFIG=test/configs/ibis.yaml pytest $(PYTEST_ARGUMENTS) ./test/integration/usecase
SUPERDUPER_CONFIG=test/configs/default.yaml pytest $(PYTEST_ARGUMENTS) ./test/integration/usecase
SUPERDUPER_CONFIG=$(SUPERDUPER_CONFIG) pytest $(PYTEST_ARGUMENTS) ./test/integration/usecase
1 change: 1 addition & 0 deletions plugins/anthropic/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,4 @@ combine-as-imports = true

[tool.ruff.lint.per-file-ignores]
"test/**" = ["D"]
"plugin_test/**" = ["D"]
1 change: 0 additions & 1 deletion plugins/anthropic/superduper_anthropic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from .model import AnthropicCompletions


__version__ = '0.3.0'

__all__ = ('AnthropicCompletions',)
1 change: 1 addition & 0 deletions plugins/cohere/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,4 @@ combine-as-imports = true

[tool.ruff.lint.per-file-ignores]
"test/**" = ["D"]
"plugin_test/**" = ["D"]
88 changes: 44 additions & 44 deletions plugins/ibis/plugin_test/test_end_2_end.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

import PIL.Image
import pytest
from superduper import CFG, superduper
Expand All @@ -11,65 +9,67 @@
@pytest.mark.skip
def test_end_2_end():
memory_table = False
if CFG.data_backend.endswith('csv'):
if CFG.data_backend.endswith("csv"):
memory_table = True
_end_2_end(superduper(), memory_table=memory_table)

#TODO: Fix the test without torch

# TODO: Fix the test without torch
def _end_2_end(db, memory_table=False):
import torch.nn
import torchvision
from superduper.ext.torch.encoder import tensor
from superduper.ext.torch.model import TorchModel
from superduper_pillow import pil_image

schema = Schema(
identifier='my_table',
identifier="my_table",
fields={
'id': FieldType(identifier='str'),
'health': FieldType(identifier='int32'),
'age': FieldType(identifier='int32'),
'image': pil_image,
"id": FieldType(identifier="str"),
"health": FieldType(identifier="int32"),
"age": FieldType(identifier="int32"),
"image": pil_image,
},
)
im = PIL.Image.open('test/material/data/test-image.jpeg')
im = PIL.Image.open("test/material/data/test-image.jpeg")

data_to_insert = [
{'id': '1', 'health': 0, 'age': 25, 'image': im},
{'id': '2', 'health': 1, 'age': 26, 'image': im},
{'id': '3', 'health': 0, 'age': 27, 'image': im},
{'id': '4', 'health': 1, 'age': 28, 'image': im},
{"id": "1", "health": 0, "age": 25, "image": im},
{"id": "2", "health": 1, "age": 26, "image": im},
{"id": "3", "health": 0, "age": 27, "image": im},
{"id": "4", "health": 1, "age": 28, "image": im},
]

from superduper.components.table import Table

t = Table(identifier='my_table', schema=schema, db=db)
t = Table(identifier="my_table", schema=schema, db=db)

db.add(t)
t = db['my_table']
t = db["my_table"]

insert = t.insert(
[
D(
{
'id': d['id'],
'health': d['health'],
'age': d['age'],
'image': d['image'],
"id": d["id"],
"health": d["health"],
"age": d["age"],
"image": d["image"],
}
)
for d in data_to_insert
]
)
db.execute(insert)

q = t.select('image', 'age', 'health')
q = t.select("image", "age", "health")

result = db.execute(q)
for img in result:
img = img.unpack()
assert isinstance(img['image'], PIL.Image.Image)
assert isinstance(img['age'], int)
assert isinstance(img['health'], int)
assert isinstance(img["image"], PIL.Image.Image)
assert isinstance(img["age"], int)
assert isinstance(img["health"], int)

# preprocessing function
preprocess = torchvision.transforms.Compose(
Expand All @@ -88,29 +88,29 @@ def postprocess(x):

# create a torchvision model
resnet = TorchModel(
identifier='resnet18',
identifier="resnet18",
preprocess=preprocess,
postprocess=postprocess,
object=torchvision.models.resnet18(pretrained=False),
datatype=FieldType('int32'),
datatype=FieldType("int32"),
)

# Apply the torchvision model
listener1 = Listener(
model=resnet,
key='image',
select=t.select('id', 'image'),
predict_kwargs={'max_chunk_size': 3000},
identifier='listener1',
key="image",
select=t.select("id", "image"),
predict_kwargs={"max_chunk_size": 3000},
identifier="listener1",
)
db.add(listener1)

# also add a vectorizing model
vectorize = TorchModel(
preprocess=lambda x: torch.randn(32),
object=torch.nn.Linear(32, 16),
identifier='model_linear_a',
datatype=tensor(dtype='float', shape=(16,)),
identifier="model_linear_a",
datatype=tensor(dtype="float", shape=(16,)),
)

# create outputs query
Expand All @@ -121,22 +121,22 @@ def postprocess(x):
model=vectorize,
key=listener1.outputs,
select=q,
predict_kwargs={'max_chunk_size': 3000},
identifier='listener2',
predict_kwargs={"max_chunk_size": 3000},
identifier="listener2",
)
db.add(listener2)

# Build query to get the results back
q = t.outputs(listener2.outputs).select('id', 'image', 'age').filter(t.age > 25)
q = t.outputs(listener2.outputs).select("id", "image", "age").filter(t.age > 25)

# Get the results
result = list(db.execute(q))
assert result
assert 'image' in result[0].unpack()
assert "image" in result[0].unpack()

# TODO: Make this work

q = t.select('id', 'image', 'age').filter(t.age > 25).outputs(listener2.outputs)
q = t.select("id", "image", "age").filter(t.age > 25).outputs(listener2.outputs)

# Get the results
result = list(db.execute(q))
Expand All @@ -147,24 +147,24 @@ def test_nested_query():
db = superduper()

memory_table = False
if CFG.data_backend.endswith('csv'):
if CFG.data_backend.endswith("csv"):
memory_table = True
schema = Schema(
identifier='my_table',
identifier="my_table",
fields={
'id': FieldType(identifier='int64'),
'health': FieldType(identifier='int32'),
'age': FieldType(identifier='int32'),
"id": FieldType(identifier="int64"),
"health": FieldType(identifier="int32"),
"age": FieldType(identifier="int32"),
},
)

from superduper.components.table import Table

t = Table(identifier='my_table', schema=schema)
t = Table(identifier="my_table", schema=schema)

db.add(t)

t = db['my_table']
t = db["my_table"]
q = t.filter(t.age >= 10)

expr_ = q.compile(db)
Expand Down
Loading

0 comments on commit 3316c3c

Please sign in to comment.