diff --git a/cmd/dataset_tokenizer/dataset_tokenizer.go b/cmd/dataset_tokenizer/dataset_tokenizer.go index f44d1e8..0c20555 100644 --- a/cmd/dataset_tokenizer/dataset_tokenizer.go +++ b/cmd/dataset_tokenizer/dataset_tokenizer.go @@ -16,6 +16,10 @@ import ( "sync" "time" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/endpoints" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" "github.com/wbrown/gpt_bpe" "github.com/wbrown/gpt_bpe/resources" "github.com/yargevad/filepathx" @@ -28,6 +32,11 @@ type PathInfo struct { Dir bool } +type S3Client interface { + GetObject(input *s3.GetObjectInput) (*s3.GetObjectOutput, error) + ListObjectsV2(input *s3.ListObjectsV2Input) (*s3.ListObjectsV2Output, error) +} + // GlobTexts // Given a directory path, recursively finds all `.txt` and `.jsonl` files, // returning a slice of PathInfo. @@ -195,6 +204,212 @@ func resolveSortSpec(matches []PathInfo, sortSpec string) (err error) { return nil } +// getObjectsS3Recursively retrieves objects recursively from an S3 bucket and sends them to the objects channel. +func getObjectsS3Recursively(svc S3Client, bucketName, prefix string, objects chan<- *s3.Object) { + var continuationToken *string + for { + params := &s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + Prefix: aws.String(prefix), + ContinuationToken: continuationToken, + } + + resp, err := svc.ListObjectsV2(params) + if err != nil { + log.Printf("Error listing objects: %v", err) + return + } + + for _, obj := range resp.Contents { + key := *obj.Key + if strings.HasSuffix(key, ".txt") || strings.HasSuffix(key, ".jsonl") { + objects <- obj + } + } + + if !*resp.IsTruncated { + break + } + + continuationToken = resp.NextContinuationToken + } +} + +// fetchJSONLFileS3 reads a JSONL file from S3, extracts the "text" key, and return it as a string with spaces. +func fetchJSONLFileS3(svc S3Client, bucketName, objectKey string) (string, error) { + params := &s3.GetObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + } + + resp, err := svc.GetObject(params) + if err != nil { + return "", err + } + defer resp.Body.Close() + + var text strings.Builder + jsonlReader := bufio.NewReader(resp.Body) + + firstLine := true // Flag to track the first line + for { + line, err := jsonlReader.ReadString('\n') + if err != nil { + if err == io.EOF { + break + } + return "", err + } + + // Parse the JSONL line into a map + var jsonObjectMap map[string]interface{} + if err := json.Unmarshal([]byte(line), &jsonObjectMap); err != nil { + return "", err + } + + // Extract the "text" field + textValue, ok := jsonObjectMap["text"].(string) + if !ok { + return "", fmt.Errorf("JSONL object has no 'text' field or it's not a string") + } + + // Append the text to the result + if firstLine { + firstLine = false + } else { + text.WriteString(" ") // Append a space for all lines except the first + } + text.WriteString(textValue) + } + + return text.String(), nil +} + +// fetchTextFileS3 reads a text file from S3 and return its content as a string. +func fetchTextFileS3(svc S3Client, bucketName, objectKey string) (string, error) { + params := &s3.GetObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(objectKey), + } + + resp, err := svc.GetObject(params) + if err != nil { + return "", err + } + defer resp.Body.Close() + + var text strings.Builder + textReader := bufio.NewReaderSize(resp.Body, 8*1024*1024) + for { + buf := make([]byte, 4096) + n, err := textReader.Read(buf) + if err != nil { + if err == io.EOF { + break + } + return "", err + } + text.Write(buf[:n]) + } + + return text.String(), nil +} + +// removeS3Prefix splits the input into the bucket and to ensure that s3:// is present +func removeS3Prefix(input string) (hasS3Prefix bool, remainder string) { + prefix := "s3://" + if strings.HasPrefix(input, prefix) { + return true, input[len(prefix):] + } + return false, input +} + +// ReadTextsFromS3 reads text files recursively from all prefixes in an S3 bucket. +func ReadTextsFromS3( + svc S3Client, + bucketName string, + sanitize bool, + numReaderThreads int, +) (chan namedRuneReader, error) { + runeReaders := make(chan namedRuneReader, 64) + objects := make(chan *s3.Object, 64) + wg := sync.WaitGroup{} + + // Start reader goroutines. + startReader := func() { + for { + object, ok := <-objects + if !ok { + break + } + + if strings.HasSuffix(*object.Key, ".jsonl") { + // Handle JSONL files. + jsonObject, err := fetchJSONLFileS3(svc, bucketName, *object.Key) + + if err != nil { + log.Printf("Error reading JSONL file %s: %v", *object.Key, err) + continue + } + + // Create our rune reader. + if sanitize { + runeReaders <- namedRuneReader{ + *object.Key, + CreateTextSanitizer(strings.NewReader(jsonObject)), + } + } else { + runeReaders <- namedRuneReader{ + *object.Key, + strings.NewReader(jsonObject), + } + } + } else { + // Handle regular text files. + text, err := fetchTextFileS3(svc, bucketName, *object.Key) + if err != nil { + log.Printf("Error reading text file %s: %v", *object.Key, err) + continue + } + + // Create our rune reader. + if sanitize { + runeReaders <- namedRuneReader{ + *object.Key, + CreateTextSanitizer(strings.NewReader(text)), + } + } else { + runeReaders <- namedRuneReader{ + *object.Key, + strings.NewReader(text), + } + } + } + } + wg.Done() + } + + // Start multiple reader goroutines. + for i := 0; i < numReaderThreads; i++ { + wg.Add(1) + go startReader() + } + + // List objects recursively. + getObjectsS3Recursively(svc, bucketName, "", objects) + + // Close the objects channel when done. + close(objects) + + // Wait for all reader goroutines to finish. + wg.Wait() + + // Close the runeReaders channel. + close(runeReaders) + + return runeReaders, nil +} + // ReadTexts // Consumes a directory path and recursively scans for `.txt` files, producing // a TextsIterator function that yields the text file as an io.Reader type. @@ -938,11 +1153,15 @@ func main() { "comma separated list of tokens to exclude from the vocabulary") sanitizeEncodingBool := flag.Bool("disable_sanitize_encoding", false, "disable sanitizing of misencoding") + s3Endpoint := flag.String("object_storage_endpoint", "https://object.las1.coreweave.com", + "CW S3 Endpoint to use for fetching data") + flag.Parse() if *inputDir == "" { flag.Usage() log.Fatal("Must provide -input for directory source") } + sampling, err := strconv.Atoi(*sampling_str) if err != nil { log.Fatal("Sampling parameter must be an integer") @@ -1018,60 +1237,100 @@ func main() { log.Fatal(tokErr) } - if textReaders, err := ReadTexts(*inputDir, *sanitizeBool, - *reorderPaths, *numReaderThreads); err != nil { - log.Fatal(err) - } else { - numTokens := 0 - begin := time.Now() - if *streaming_encode { - wg := sync.WaitGroup{} - for threadIdx := 0; threadIdx < *numThreads; threadIdx++ { - wg.Add(1) - go func(threadId int) { - var contexts chan gpt_bpe.Tokens - var tokErr error - indexFilePath := fmt.Sprintf("%s.%d.index", - *outputFile, threadId) - outputFilePath := fmt.Sprintf("%s.%d.tokens", - *outputFile, threadId) - contexts, tokErr = textsTokenizer.TokenizeTexts(textReaders, - indexFilePath) - if tokErr != nil { - log.Fatal(tokErr) - } - total, writeErr := WriteContexts(outputFilePath, contexts, - nil, sampling, false) - if writeErr != nil { - log.Fatal(writeErr) - } - numTokens += total - wg.Done() - }(threadIdx) - } - wg.Wait() - } else { - var contexts chan gpt_bpe.Tokens - var tokErr error - contexts, tokErr = textsTokenizer.TokenizeTextsToContexts( - textReaders) - if tokErr != nil { - log.Fatal(tokErr) - } - var enc *gpt_bpe.GPTEncoder - if *showContexts { - enc, _ = textsTokenizer.InitTokenizer() - } - var writeErr error - numTokens, writeErr = WriteContexts(*outputFile, contexts, enc, - sampling, - *reorderPaths == "shuffle") - if writeErr != nil { - log.Fatal(writeErr) + hasS3Prefix, s3Bucket := removeS3Prefix(*inputDir) + + if hasS3Prefix && *s3Endpoint == "" { + flag.Usage() + log.Fatal("Must provide S3 Endpoint if fetching data from CW object storage") + } + + // Declare textReaders + var textReaders chan namedRuneReader + + if hasS3Prefix && *s3Endpoint != "" { + defaultResolver := endpoints.DefaultResolver() + s3CustResolverFn := func(service, region string, optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) { + if service == "s3" { + return endpoints.ResolvedEndpoint{ + URL: *s3Endpoint, + }, nil } + + return defaultResolver.EndpointFor(service, region, optFns...) + } + + sess := session.Must(session.NewSessionWithOptions(session.Options{ + Config: aws.Config{ + EndpointResolver: endpoints.ResolverFunc(s3CustResolverFn), + Region: aws.String("coreweave-object-storage"), + }, + })) + + svc := s3.New(sess) + textReaders, err = ReadTextsFromS3(svc, s3Bucket, *sanitizeBool, *numReaderThreads) + + if err != nil { + log.Fatal(err) + } + } else { + textReaders, err = ReadTexts(*inputDir, *sanitizeBool, *reorderPaths, *numReaderThreads) + + if err != nil { + log.Fatal(err) + } + } + + numTokens := 0 + begin := time.Now() + if *streaming_encode { + wg := sync.WaitGroup{} + for threadIdx := 0; threadIdx < *numThreads; threadIdx++ { + wg.Add(1) + go func(threadId int) { + var contexts chan gpt_bpe.Tokens + var tokErr error + indexFilePath := fmt.Sprintf("%s.%d.index", + *outputFile, threadId) + outputFilePath := fmt.Sprintf("%s.%d.tokens", + *outputFile, threadId) + contexts, tokErr = textsTokenizer.TokenizeTexts(textReaders, + indexFilePath) + if tokErr != nil { + log.Fatal(tokErr) + } + total, writeErr := WriteContexts(outputFilePath, contexts, + nil, sampling, false) + if writeErr != nil { + log.Fatal(writeErr) + } + numTokens += total + wg.Done() + }(threadIdx) + } + wg.Wait() + } else { + var contexts chan gpt_bpe.Tokens + var tokErr error + contexts, tokErr = textsTokenizer.TokenizeTextsToContexts( + textReaders) + if tokErr != nil { + log.Fatal(tokErr) + } + var enc *gpt_bpe.GPTEncoder + if *showContexts { + enc, _ = textsTokenizer.InitTokenizer() + } + var writeErr error + numTokens, writeErr = WriteContexts(*outputFile, contexts, enc, + sampling, + *reorderPaths == "shuffle") + if writeErr != nil { + log.Fatal(writeErr) } - duration := time.Now().Sub(begin).Seconds() - log.Printf("%d tokens in %0.2fs, %0.2f tokens/s", numTokens, - duration, float64(numTokens)/duration) } + + duration := time.Now().Sub(begin).Seconds() + + log.Printf("%d tokens in %0.2fs, %0.2f tokens/s", numTokens, + duration, float64(numTokens)/duration) } diff --git a/cmd/dataset_tokenizer/dataset_tokenizer_test.go b/cmd/dataset_tokenizer/dataset_tokenizer_test.go index 3602156..57855c2 100644 --- a/cmd/dataset_tokenizer/dataset_tokenizer_test.go +++ b/cmd/dataset_tokenizer/dataset_tokenizer_test.go @@ -6,14 +6,21 @@ import ( "crypto/sha256" "encoding/binary" "encoding/hex" + "errors" "fmt" - "github.com/stretchr/testify/assert" - "github.com/wbrown/gpt_bpe" "io" "log" "os" + "strings" + "sync" "testing" "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/aws/aws-sdk-go/service/s3" + "github.com/stretchr/testify/assert" + "github.com/wbrown/gpt_bpe" ) type SanitizerTest struct { @@ -22,6 +29,16 @@ type SanitizerTest struct { Expected string } +// S3MockClient is a mock implementation of S3Client. +type S3MockClient struct { + GetObjectOutput *s3.GetObjectOutput + GetObjectError error + GetObjectOutputs map[string]*s3.GetObjectOutput // Map object keys to GetObjectOutput + GetObjectErrors map[string]error + ListObjectsV2Output *s3.ListObjectsV2Output + ListObjectsV2Error error +} + type SanitizerTests []SanitizerTest var sanitizerTests = SanitizerTests{ @@ -444,3 +461,151 @@ func TestShuffle(t *testing.T) { fmt.Printf("Using Chunk by chunk hashing, shuffle found to be working as intended!! \n") } + +func (m *S3MockClient) ListObjectsV2(input *s3.ListObjectsV2Input) (*s3.ListObjectsV2Output, error) { + return m.ListObjectsV2Output, m.ListObjectsV2Error +} + +func (m *S3MockClient) GetObject(input *s3.GetObjectInput) (*s3.GetObjectOutput, error) { + return m.GetObjectOutput, m.GetObjectError +} + +func (m *S3MockClient) GetObjects(bucketName, prefix string) ([]*s3.Object, error) { + // Simulate listing objects with the specified prefix + var matchingObjects []*s3.Object + + for key := range m.GetObjectOutputs { + if strings.HasPrefix(key, prefix) { + matchingObjects = append(matchingObjects, &s3.Object{Key: aws.String(key)}) + } + } + + if len(matchingObjects) == 0 { + // Simulate the case where no objects match the prefix + return nil, awserr.New("NoSuchKey", "The specified key does not exist", nil) + } + + return matchingObjects, nil +} + +func TestFetchJSONLFileS3(t *testing.T) { + // Define a JSONL file content for testing + jsonlContent := `{"text": "Hello, World!"} +{"text": "Testing JSONL"} +{"text": "This line should be valid"} +` + + // Create a mock S3 client + mockSvc := &S3MockClient{ + GetObjectOutput: &s3.GetObjectOutput{ + Body: io.NopCloser(strings.NewReader(jsonlContent)), + ContentLength: aws.Int64(int64(len(jsonlContent))), + }, + GetObjectError: nil, // No error for this test + } + + // Call readJSONLFileS3 with the mock S3 client + text, err := fetchJSONLFileS3(mockSvc, "test-bucket", "test-object.jsonl") + + if err != nil { + t.Errorf("Expected no error, but got %v", err) + } + + // Case 1: Verify that the extracted text matches the expected result + expectedText := "Hello, World! Testing JSONL This line should be valid" + if text != expectedText { + t.Errorf("Expected text: %s but got: %s", expectedText, text) + } + + // Case 2: Test case with an error returned by GetObject + mockSvc.GetObjectError = errors.New("Simulated error") + _, err = fetchJSONLFileS3(mockSvc, "test-bucket", "error-object.jsonl") + + if err == nil { + t.Error("Expected an error, but got none") + } +} + +func TestFetchTextFileS3(t *testing.T) { + // Define test data + textContent := "This is a test. This is great. Have fun in life." + + // Create a mock S3 client + mockSvc := &S3MockClient{ + GetObjectOutput: &s3.GetObjectOutput{ + Body: io.NopCloser(strings.NewReader(textContent)), + ContentLength: aws.Int64(int64(len(textContent))), + }, + GetObjectError: nil, // No error for this test + } + + // Call readTextFileS3 with the mock S3 client + text, err := fetchTextFileS3(mockSvc, "test-bucket", "test-object.txt") + + if err != nil { + t.Errorf("Expected no error, but got %v", err) + } + + // Case 1: Verify that the extracted text matches the expected result + if text != textContent { + t.Errorf("Expected text: %s, but got: %s", textContent, text) + } + + // Cas 2: Test case with an error returned by GetObject + mockSvc.GetObjectError = errors.New("Simulated error") + _, err = fetchTextFileS3(mockSvc, "test-bucket", "error-object.txt") + + if err == nil { + t.Error("Expected an error, but got none") + } +} + +func TestListObjectsRecursively(t *testing.T) { + // Create a mock S3 client + mockSvc := &S3MockClient{ + // Define the expected behavior for ListObjectsV2 + ListObjectsV2Output: &s3.ListObjectsV2Output{ + Contents: []*s3.Object{ + {Key: aws.String("a/b/c/d/object1.txt")}, + {Key: aws.String("b/c/d/object2.jsonl")}, + }, + IsTruncated: aws.Bool(false), + NextContinuationToken: nil, + }, + } + + // Create a channel for objects + objects := make(chan *s3.Object, 10) + + // Create a WaitGroup + var wg sync.WaitGroup + wg.Add(1) + + // Call listObjectsRecursively with the mock S3 client + go func() { + defer wg.Done() + getObjectsS3Recursively(mockSvc, "test-bucket", "prefix/", objects) + close(objects) // Close the channel when finished + }() + + // Receive objects from the channel + var receivedObjects []*s3.Object + for obj := range objects { + receivedObjects = append(receivedObjects, obj) + } + + // Verify the number of received objects + if len(receivedObjects) != 2 { + t.Errorf("Expected 2 objects, but got %d", len(receivedObjects)) + } + + // Verify the keys of received objects + expectedKeys := []string{"a/b/c/d/object1.txt", "b/c/d/object2.jsonl"} + for i, obj := range receivedObjects { + if *obj.Key != expectedKeys[i] { + t.Errorf("Expected key %s, but got %s", expectedKeys[i], *obj.Key) + } + } + + wg.Wait() // Wait for all goroutines to finish +} diff --git a/cmd/dataset_tokenizer/go.mod b/cmd/dataset_tokenizer/go.mod index 3021641..eda70a7 100644 --- a/cmd/dataset_tokenizer/go.mod +++ b/cmd/dataset_tokenizer/go.mod @@ -5,6 +5,7 @@ go 1.18 replace github.com/wbrown/gpt_bpe => ../../ require ( + github.com/aws/aws-sdk-go v1.45.4 github.com/stretchr/testify v1.7.1 github.com/wbrown/gpt_bpe v0.0.0-00010101000000-000000000000 github.com/yargevad/filepathx v1.0.0 @@ -17,6 +18,7 @@ require ( github.com/edsrzf/mmap-go v1.1.0 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/jdkato/prose/v2 v2.0.0 // indirect + github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/kr/text v0.2.0 // indirect github.com/mingrammer/commonregex v1.0.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect diff --git a/cmd/dataset_tokenizer/go.sum b/cmd/dataset_tokenizer/go.sum index 2ae90a6..27c72b5 100644 --- a/cmd/dataset_tokenizer/go.sum +++ b/cmd/dataset_tokenizer/go.sum @@ -49,6 +49,10 @@ github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kd github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= +github.com/aws/aws-sdk-go v1.45.2 h1:hTong9YUklQKqzrGk3WnKABReb5R8GjbG4Y6dEQfjnk= +github.com/aws/aws-sdk-go v1.45.2/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= +github.com/aws/aws-sdk-go v1.45.4 h1:6B8oTYNEncxga8EV1C6Q4iJNnpDIqLEigy0v0oh2qYw= +github.com/aws/aws-sdk-go v1.45.4/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/bketelsen/crypt v0.0.4/go.mod h1:aI6NrJ0pMGgvZKL1iVgXLnfIFJtfV+bKCoqOes/6LfM= github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= @@ -198,6 +202,10 @@ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANyt github.com/jdkato/prose v1.1.1/go.mod h1:jkF0lkxaX5PFSlk9l4Gh9Y+T57TqUZziWT7uZbW5ADg= github.com/jdkato/prose/v2 v2.0.0 h1:XRwsTM2AJPilvW5T4t/H6Lv702Qy49efHaWfn3YjWbI= github.com/jdkato/prose/v2 v2.0.0/go.mod h1:7LVecNLWSO0OyTMOscbwtZaY7+4YV2TPzlv5g5XLl5c= +github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg= +github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= +github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= +github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= @@ -288,6 +296,7 @@ github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= go.etcd.io/etcd/client/v2 v2.305.0/go.mod h1:h9puh54ZTgAKtEbut2oe9P4L/oqKCVB6xsXlzd7alYQ= @@ -364,6 +373,7 @@ golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -401,6 +411,9 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4/go.mod h1:RBQZq4jEuRlivfhVLdyRGr576XBO4/greRjx4P4O3yc= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.1.0 h1:hZ/3BUoy5aId7sCpA/Tc5lt8DkFgdVS2onTpJsZ/fl0= +golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -424,6 +437,7 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181026203630-95b1ffbd15a5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -472,10 +486,15 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220727055044-e65921a090b8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.3.0 h1:w8ZOecv6NaNa/zC8944JTU3vz4u6Lagfk4RPQxv92NQ= golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -485,6 +504,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -545,6 +566,7 @@ golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.9/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -673,6 +695,7 @@ gopkg.in/neurosnap/sentences.v1 v1.0.7/go.mod h1:YlK+SN+fLQZj+kY3r8DkGDhDr91+S3J gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=