From d58cd5db871b53942cf44614c6cf6e06a54e6d3b Mon Sep 17 00:00:00 2001 From: "Sergienko.V5" Date: Sat, 19 Feb 2022 18:24:11 +0300 Subject: [PATCH] fix ValidUTF8 --- cpd.go | 8 ++++---- go.mod | 14 ++++++++++++++ go.sum | 14 ++++++++++++++ utf8.go | 7 ++++++- 4 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 go.mod create mode 100644 go.sum diff --git a/cpd.go b/cpd.go index 21af8ae..f33d149 100644 --- a/cpd.go +++ b/cpd.go @@ -1,7 +1,7 @@ //Package cpd - code page detect // (c) 2020 softlandia@gmail.com package cpd - + import ( "bufio" "errors" @@ -61,7 +61,7 @@ func CodepageAutoDetect(b []byte) IDCodePage { // support convert only from/to Windows1251/IBM866 func FileConvertCodepage(fileName string, fromCP, toCP IDCodePage) error { switch { - case (fromCP == toCP): + case fromCP == toCP: return nil case (fromCP != CP1251) && (fromCP != CP866): return nil @@ -74,7 +74,7 @@ func FileConvertCodepage(fileName string, fromCP, toCP IDCodePage) error { } defer iFile.Close() - //TODO need using sytem tmp folder + //TODO need using system tmp folder tmpFileName := fileName + "~" oFile, err := os.Create(tmpFileName) if err != nil { @@ -150,7 +150,7 @@ var ( errUnsupportedOutputCodepage = errors.New("cpd: output codepage not support encode") ) -// NewReader - convertion to UTF-8 +// NewReader - conversion to UTF-8 // return input reader if input contain less 4 bytes // return input reader if input contain ASCII data // if cpn[0] exist, then using it as input codepage name diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..1ec7935 --- /dev/null +++ b/go.mod @@ -0,0 +1,14 @@ +module github.com/softlandia/cpd + +go 1.17 + +require ( + github.com/stretchr/testify v1.7.0 + golang.org/x/text v0.3.7 +) + +require ( + github.com/davecgh/go-spew v1.1.0 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..aaea189 --- /dev/null +++ b/go.sum @@ -0,0 +1,14 @@ +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/utf8.go b/utf8.go index 3fdbfd0..c2eea4f 100644 --- a/utf8.go +++ b/utf8.go @@ -50,7 +50,7 @@ func testUTF8bitPattern(b byte) (int, cp int32) { return 0, 0 } -//ValidUTF8 - return true if imput slice contain true UTF-8 +//ValidUTF8 - return true if input slice contain true UTF-8 func ValidUTF8(data []byte) bool { m := len(data) if m <= 1 { @@ -67,6 +67,11 @@ func ValidUTF8(data []byte) bool { if n == 0 { return false } + + if i+int(n) >= m { + break + } + i++ var j int32 for j = 1; j < n; j++ {