-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.go
70 lines (65 loc) · 1.17 KB
/
utils.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
package search
import (
"unicode"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
func tokenize(input string) [][]rune {
var tokens [][]rune
var token []rune
for _, r := range normalize(input) {
if unicode.IsSpace(r) || unicode.IsPunct(r) {
if len(token) > 0 {
tokens = append(tokens, token)
token = nil
}
continue
}
token = append(token, unicode.ToLower(r))
}
if len(token) > 0 {
tokens = append(tokens, token)
}
return tokens
}
func normalize(s string) string {
r, _, err := transform.String(transform.Chain(
norm.NFD,
runes.Remove(runes.In(unicode.Mn)),
norm.NFC,
), s)
if err != nil {
return s
}
return r
}
func levenshteinDistance(a, b []rune) int {
if len(a) == 0 {
return len(b)
}
if len(b) == 0 {
return len(a)
}
if len(a) > len(b) {
a, b = b, a
}
la, lb := len(a), len(b)
row := make([]int, la+1)
for i := 1; i <= la; i++ {
row[i] = i
}
for i := 1; i <= lb; i++ {
prev := i
for j := 1; j <= la; j++ {
curr := row[j-1]
if b[i-1] != a[j-1] {
curr = min(row[j-1]+1, prev+1, row[j]+1)
}
row[j-1] = prev
prev = curr
}
row[la] = prev
}
return row[la]
}