-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
.Net: Create a getting started project for vector stores. (#9489)
### Motivation and Context #7606 Only three steps so far, more to be added in a subsequent PR. ### Description - Add a getting started project for vector stores - Add three initial steps, more to follow - Update README files ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄
- Loading branch information
Showing
9 changed files
with
457 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
53 changes: 53 additions & 0 deletions
53
dotnet/samples/GettingStartedWithVectorStores/GettingStartedWithVectorStores.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<PropertyGroup> | ||
<AssemblyName>GettingStartedWithVectorStores</AssemblyName> | ||
<RootNamespace></RootNamespace> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<IsTestProject>true</IsTestProject> | ||
<IsPackable>false</IsPackable> | ||
<!-- Suppress: "Declare types in namespaces", "Require ConfigureAwait", "Experimental" --> | ||
<NoWarn>$(NoWarn);CS8618,IDE0009,CA1051,CA1050,CA1707,CA1054,CA2007,VSTHRD111,CS1591,RCS1110,RCS1243,CA5394,SKEXP0001,SKEXP0010,SKEXP0020,SKEXP0040,SKEXP0050,SKEXP0060,SKEXP0070,SKEXP0101</NoWarn> | ||
<OutputType>Library</OutputType> | ||
<UserSecretsId>5ee045b0-aea3-4f08-8d31-32d1a6f8fed0</UserSecretsId> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<PackageReference Include="Microsoft.NET.Test.Sdk" /> | ||
<PackageReference Include="xRetry" /> | ||
<PackageReference Include="xunit" /> | ||
<PackageReference Include="xunit.abstractions" /> | ||
<PackageReference Include="xunit.runner.visualstudio"> | ||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
<PrivateAssets>all</PrivateAssets> | ||
</PackageReference> | ||
<PackageReference Include="Azure.Identity" /> | ||
<PackageReference Include="Microsoft.Extensions.Configuration" /> | ||
<PackageReference Include="Microsoft.Extensions.Configuration.Binder" /> | ||
<PackageReference Include="Microsoft.Extensions.Configuration.EnvironmentVariables" /> | ||
<PackageReference Include="Microsoft.Extensions.Configuration.Json" /> | ||
<PackageReference Include="Microsoft.Extensions.Configuration.UserSecrets" /> | ||
<PackageReference Include="Microsoft.Extensions.DependencyInjection" /> | ||
<PackageReference Include="Microsoft.Extensions.Http" /> | ||
<PackageReference Include="Microsoft.Extensions.Http.Resilience" /> | ||
<PackageReference Include="Microsoft.Extensions.Logging" /> | ||
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" /> | ||
<PackageReference Include="Microsoft.Extensions.Logging.Console" /> | ||
<PackageReference Include="System.Linq.Async" /> | ||
<PackageReference Include="System.Text.Json" /> | ||
</ItemGroup> | ||
|
||
<Import Project="$(RepoRoot)/dotnet/src/InternalUtilities/samples/SamplesInternalUtilities.props" /> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="..\..\src\Connectors\Connectors.AzureOpenAI\Connectors.AzureOpenAI.csproj" /> | ||
<ProjectReference Include="..\..\src\Connectors\Connectors.Memory.AzureAISearch\Connectors.Memory.AzureAISearch.csproj" /> | ||
<ProjectReference Include="..\..\src\Connectors\Connectors.Memory.InMemory\Connectors.Memory.InMemory.csproj" /> | ||
<ProjectReference Include="..\..\src\Connectors\Connectors.Memory.Redis\Connectors.Memory.Redis.csproj" /> | ||
<ProjectReference Include="..\..\src\SemanticKernel.Abstractions\SemanticKernel.Abstractions.csproj" /> | ||
<ProjectReference Include="..\..\src\SemanticKernel.Core\SemanticKernel.Core.csproj" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<Using Include="Xunit" /> | ||
<Using Include="Xunit.Abstractions" /> | ||
</ItemGroup> | ||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using Microsoft.Extensions.VectorData; | ||
|
||
namespace GettingStartedWithVectorStores; | ||
|
||
/// <summary> | ||
/// Sample model class that represents a glossary entry. | ||
/// </summary> | ||
/// <remarks> | ||
/// Note that each property is decorated with an attribute that specifies how the property should be treated by the vector store. | ||
/// This allows us to create a collection in the vector store and upsert and retrieve instances of this class without any further configuration. | ||
/// </remarks> | ||
internal sealed class Glossary | ||
{ | ||
[VectorStoreRecordKey] | ||
public string Key { get; set; } | ||
|
||
[VectorStoreRecordData(IsFilterable = true)] | ||
public string Category { get; set; } | ||
|
||
[VectorStoreRecordData] | ||
public string Term { get; set; } | ||
|
||
[VectorStoreRecordData] | ||
public string Definition { get; set; } | ||
|
||
[VectorStoreRecordVector(Dimensions: 1536)] | ||
public ReadOnlyMemory<float> DefinitionEmbedding { get; set; } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# Starting With Semantic Kernel Vector Stores | ||
|
||
This project contains a step by step guide to get started using Vector Stores with the Semantic Kernel. | ||
|
||
The examples can be run as integration tests but their code can also be copied to stand-alone programs. | ||
|
||
## Configuring Secrets | ||
|
||
Most of the examples will require secrets and credentials, to access OpenAI, Azure OpenAI, | ||
Vector Stores and other resources. We suggest using .NET | ||
[Secret Manager](https://learn.microsoft.com/aspnet/core/security/app-secrets) | ||
to avoid the risk of leaking secrets into the repository, branches and pull requests. | ||
You can also use environment variables if you prefer. | ||
|
||
To set your secrets with Secret Manager: | ||
|
||
``` | ||
cd dotnet/samples/GettingStartedWithVectorStores | ||
dotnet user-secrets init | ||
dotnet user-secrets set "AzureOpenAIEmbeddings:DeploymentName" "..." | ||
dotnet user-secrets set "AzureOpenAIEmbeddings:Endpoint" "..." | ||
dotnet user-secrets set "AzureAISearch:Endpoint" "..." | ||
dotnet user-secrets set "AzureAISearch:ApiKey" "..." | ||
``` | ||
|
||
To set your secrets with environment variables, use these names: | ||
|
||
``` | ||
AzureOpenAIEmbeddings__DeploymentName | ||
AzureOpenAIEmbeddings__Endpoint | ||
AzureAISearch__Endpoint | ||
AzureAISearch__ApiKey | ||
``` |
112 changes: 112 additions & 0 deletions
112
dotnet/samples/GettingStartedWithVectorStores/Step1_Ingest_Data.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using Microsoft.Extensions.VectorData; | ||
using Microsoft.SemanticKernel.Connectors.InMemory; | ||
using Microsoft.SemanticKernel.Embeddings; | ||
|
||
namespace GettingStartedWithVectorStores; | ||
|
||
/// <summary> | ||
/// Example showing how to generate embeddings and ingest data into an in-memory vector store. | ||
/// </summary> | ||
public class Step1_Ingest_Data(ITestOutputHelper output, VectorStoresFixture fixture) : BaseTest(output), IClassFixture<VectorStoresFixture> | ||
{ | ||
/// <summary> | ||
/// Example showing how to ingest data into an in-memory vector store. | ||
/// </summary> | ||
[Fact] | ||
public async Task IngestDataIntoInMemoryVectorStoreAsync() | ||
{ | ||
// Construct the vector store and get the collection. | ||
var vectorStore = new InMemoryVectorStore(); | ||
var collection = vectorStore.GetCollection<string, Glossary>("skglossary"); | ||
|
||
// Ingest data into the collection. | ||
await IngestDataIntoVectorStoreAsync(collection, fixture.TextEmbeddingGenerationService); | ||
|
||
// Retrieve an item from the collection and write it to the console. | ||
var record = await collection.GetAsync("4"); | ||
Console.WriteLine(record!.Definition); | ||
} | ||
|
||
/// <summary> | ||
/// Ingest data into the given collection. | ||
/// </summary> | ||
/// <param name="collection">The collection to ingest data into.</param> | ||
/// <param name="textEmbeddingGenerationService">The service to use for generating embeddings.</param> | ||
/// <returns>The keys of the upserted records.</returns> | ||
internal static async Task<IEnumerable<string>> IngestDataIntoVectorStoreAsync( | ||
IVectorStoreRecordCollection<string, Glossary> collection, | ||
ITextEmbeddingGenerationService textEmbeddingGenerationService) | ||
{ | ||
// Create the collection if it doesn't exist. | ||
await collection.CreateCollectionIfNotExistsAsync(); | ||
|
||
// Create glossary entries and generate embeddings for them. | ||
var glossaryEntries = CreateGlossaryEntries().ToList(); | ||
var tasks = glossaryEntries.Select(entry => Task.Run(async () => | ||
{ | ||
entry.DefinitionEmbedding = await textEmbeddingGenerationService.GenerateEmbeddingAsync(entry.Definition); | ||
})); | ||
await Task.WhenAll(tasks); | ||
|
||
// Upsert the glossary entries into the collection and return their keys. | ||
var upsertedKeysTasks = glossaryEntries.Select(x => collection.UpsertAsync(x)); | ||
return await Task.WhenAll(upsertedKeysTasks); | ||
} | ||
|
||
/// <summary> | ||
/// Create some sample glossary entries. | ||
/// </summary> | ||
/// <returns>A list of sample glossary entries.</returns> | ||
private static IEnumerable<Glossary> CreateGlossaryEntries() | ||
{ | ||
yield return new Glossary | ||
{ | ||
Key = "1", | ||
Category = "Software", | ||
Term = "API", | ||
Definition = "Application Programming Interface. A set of rules and specifications that allow software components to communicate and exchange data." | ||
}; | ||
|
||
yield return new Glossary | ||
{ | ||
Key = "2", | ||
Category = "Software", | ||
Term = "SDK", | ||
Definition = "Software development kit. A set of libraries and tools that allow software developers to build software more easily." | ||
}; | ||
|
||
yield return new Glossary | ||
{ | ||
Key = "3", | ||
Category = "SK", | ||
Term = "Connectors", | ||
Definition = "Semantic Kernel Connectors allow software developers to integrate with various services providing AI capabilities, including LLM, AudioToText, TextToAudio, Embedding generation, etc." | ||
}; | ||
|
||
yield return new Glossary | ||
{ | ||
Key = "4", | ||
Category = "SK", | ||
Term = "Semantic Kernel", | ||
Definition = "Semantic Kernel is a set of libraries that allow software developers to more easily develop applications that make use of AI experiences." | ||
}; | ||
|
||
yield return new Glossary | ||
{ | ||
Key = "5", | ||
Category = "AI", | ||
Term = "RAG", | ||
Definition = "Retrieval Augmented Generation - a term that refers to the process of retrieving additional data to provide as context to an LLM to use when generating a response (completion) to a user’s question (prompt)." | ||
}; | ||
|
||
yield return new Glossary | ||
{ | ||
Key = "6", | ||
Category = "AI", | ||
Term = "LLM", | ||
Definition = "Large language model. A type of artificial ingelligence algorithm that is designed to understand and generate human language." | ||
}; | ||
} | ||
} |
94 changes: 94 additions & 0 deletions
94
dotnet/samples/GettingStartedWithVectorStores/Step2_Vector_Search.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using Microsoft.Extensions.VectorData; | ||
using Microsoft.SemanticKernel.Connectors.InMemory; | ||
using Microsoft.SemanticKernel.Embeddings; | ||
|
||
namespace GettingStartedWithVectorStores; | ||
|
||
/// <summary> | ||
/// Example showing how to do vector searches with an in-memory vector store. | ||
/// </summary> | ||
public class Step2_Vector_Search(ITestOutputHelper output, VectorStoresFixture fixture) : BaseTest(output), IClassFixture<VectorStoresFixture> | ||
{ | ||
/// <summary> | ||
/// Do a basic vector search where we just want to retrieve the single most relevant result. | ||
/// </summary> | ||
[Fact] | ||
public async Task SearchAnInMemoryVectorStoreAsync() | ||
{ | ||
var collection = await GetVectorStoreCollectionWithDataAsync(); | ||
|
||
// Search the vector store. | ||
var searchResultItem = await SearchVectorStoreAsync( | ||
collection, | ||
"What is an Application Programming Interface?", | ||
fixture.TextEmbeddingGenerationService); | ||
|
||
// Write the search result with its score to the console. | ||
Console.WriteLine(searchResultItem.Record.Definition); | ||
Console.WriteLine(searchResultItem.Score); | ||
} | ||
|
||
/// <summary> | ||
/// Search the given collection for the most relevant result to the given search string. | ||
/// </summary> | ||
/// <param name="collection">The collection to search.</param> | ||
/// <param name="searchString">The string to search matches for.</param> | ||
/// <param name="textEmbeddingGenerationService">The service to generate embeddings with.</param> | ||
/// <returns>The top search result.</returns> | ||
internal static async Task<VectorSearchResult<Glossary>> SearchVectorStoreAsync(IVectorStoreRecordCollection<string, Glossary> collection, string searchString, ITextEmbeddingGenerationService textEmbeddingGenerationService) | ||
{ | ||
// Generate an embedding from the search string. | ||
var searchVector = await textEmbeddingGenerationService.GenerateEmbeddingAsync(searchString); | ||
|
||
// Search the store and get the single most relevant result. | ||
var searchResult = await collection.VectorizedSearchAsync( | ||
searchVector, | ||
new() | ||
{ | ||
Top = 1 | ||
}); | ||
var searchResultItems = await searchResult.Results.ToListAsync(); | ||
return searchResultItems.First(); | ||
} | ||
|
||
/// <summary> | ||
/// Do a more complex vector search with pre-filtering. | ||
/// </summary> | ||
[Fact] | ||
public async Task SearchAnInMemoryVectorStoreWithFilteringAsync() | ||
{ | ||
var collection = await GetVectorStoreCollectionWithDataAsync(); | ||
|
||
// Generate an embedding from the search string. | ||
var searchString = "How do I provide additional context to an LLM?"; | ||
var searchVector = await fixture.TextEmbeddingGenerationService.GenerateEmbeddingAsync(searchString); | ||
|
||
// Search the store with a filter and get the single most relevant result. | ||
var searchResult = await collection.VectorizedSearchAsync( | ||
searchVector, | ||
new() | ||
{ | ||
Top = 1, | ||
Filter = new VectorSearchFilter().EqualTo(nameof(Glossary.Category), "AI") | ||
}); | ||
var searchResultItems = await searchResult.Results.ToListAsync(); | ||
|
||
// Write the search result with its score to the console. | ||
Console.WriteLine(searchResultItems.First().Record.Definition); | ||
Console.WriteLine(searchResultItems.First().Score); | ||
} | ||
|
||
private async Task<IVectorStoreRecordCollection<string, Glossary>> GetVectorStoreCollectionWithDataAsync() | ||
{ | ||
// Construct the vector store and get the collection. | ||
var vectorStore = new InMemoryVectorStore(); | ||
var collection = vectorStore.GetCollection<string, Glossary>("skglossary"); | ||
|
||
// Ingest data into the collection using the code from step 1. | ||
await Step1_Ingest_Data.IngestDataIntoVectorStoreAsync(collection, fixture.TextEmbeddingGenerationService); | ||
|
||
return collection; | ||
} | ||
} |
Oops, something went wrong.