Skip to content

Commit

Permalink
Merge pull request #1049 from Cali0707/improved-like-matching
Browse files Browse the repository at this point in the history
feat: improved performance of LIKE matching
  • Loading branch information
duglin authored May 14, 2024
2 parents b3a8729 + 7bc8a63 commit 0988325
Showing 1 changed file with 44 additions and 52 deletions.
96 changes: 44 additions & 52 deletions sql/v2/expression/like_expression.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,14 @@
package expression

import (
"regexp"
"strings"

cesql "github.com/cloudevents/sdk-go/sql/v2"
"github.com/cloudevents/sdk-go/sql/v2/utils"
cloudevents "github.com/cloudevents/sdk-go/v2"
)

type likeExpression struct {
baseUnaryExpression
pattern *regexp.Regexp
pattern string
}

func (l likeExpression) Evaluate(event cloudevents.Event) (interface{}, error) {
Expand All @@ -30,70 +27,65 @@ func (l likeExpression) Evaluate(event cloudevents.Event) (interface{}, error) {
return nil, err
}

return l.pattern.MatchString(val.(string)), nil
return matchString(val.(string), l.pattern), nil

}

func NewLikeExpression(child cesql.Expression, pattern string) (cesql.Expression, error) {
// Converting to regex is not the most performant impl, but it works
p, err := convertLikePatternToRegex(pattern)
if err != nil {
return nil, err
}

return likeExpression{
baseUnaryExpression: baseUnaryExpression{
child: child,
},
pattern: p,
pattern: pattern,
}, nil
}

func convertLikePatternToRegex(pattern string) (*regexp.Regexp, error) {
var chunks []string
chunks = append(chunks, "^")
func matchString(text, pattern string) bool {
textLen := len(text)
patternLen := len(pattern)
textIdx := 0
patternIdx := 0
lastWildcardIdx := -1
lastMatchIdx := 0

var chunk strings.Builder
if patternLen == 0 {
return patternLen == textLen
}

for i := 0; i < len(pattern); i++ {
if pattern[i] == '\\' && i < len(pattern)-1 {
if pattern[i+1] == '%' {
// \% case
chunk.WriteRune('%')
chunks = append(chunks, "\\Q"+chunk.String()+"\\E")
chunk.Reset()
i++
continue
} else if pattern[i+1] == '_' {
// \_ case
chunk.WriteRune('_')
chunks = append(chunks, "\\Q"+chunk.String()+"\\E")
chunk.Reset()
i++
continue
} else {
// if there is an actual literal \ character, we need to include that in the string
chunk.WriteRune('\\')
}
} else if pattern[i] == '_' {
// replace with .
chunks = append(chunks, "\\Q"+chunk.String()+"\\E")
chunk.Reset()
chunks = append(chunks, ".")
} else if pattern[i] == '%' {
// replace with .*
chunks = append(chunks, "\\Q"+chunk.String()+"\\E")
chunk.Reset()
chunks = append(chunks, ".*")
for textIdx < textLen {
if patternIdx < patternLen-1 && pattern[patternIdx] == '\\' &&
((pattern[patternIdx+1] == '_' || pattern[patternIdx+1] == '%') &&
pattern[patternIdx+1] == text[textIdx]) {
// handle escaped characters -> pattern needs to increment two places here
patternIdx += 2
textIdx += 1
} else if patternIdx < patternLen && (pattern[patternIdx] == '_' || pattern[patternIdx] == text[textIdx]) {
// handle non escaped characters
textIdx += 1
patternIdx += 1
} else if patternIdx < patternLen && pattern[patternIdx] == '%' {
// handle wildcard characters
lastWildcardIdx = patternIdx
lastMatchIdx = textIdx
patternIdx += 1
} else if lastWildcardIdx != -1 {
// greedy match didn't work, try again from the last known match
patternIdx = lastWildcardIdx + 1
lastMatchIdx += 1
textIdx = lastMatchIdx
} else {
chunk.WriteByte(pattern[i])
return false
}
}

if chunk.Len() != 0 {
chunks = append(chunks, "\\Q"+chunk.String()+"\\E")
}
// consume remaining pattern characters as long as they are wildcards
for patternIdx < patternLen {
if pattern[patternIdx] != '%' {
return false
}

chunks = append(chunks, "$")
patternIdx += 1
}

return regexp.Compile(strings.Join(chunks, ""))
return true
}

0 comments on commit 0988325

Please sign in to comment.