Skip to content

Commit 6ccf0b6

Browse files
authored
Merge pull request #227 from bzz/ci-fix-cgo-tests
CI: fix cgo profiles
2 parents ae43e1a + f3ceaa6 commit 6ccf0b6

File tree

6 files changed

+53
-14
lines changed

6 files changed

+53
-14
lines changed

.travis.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
dist: trusty
2-
32
language: go
4-
53
go:
64
- '1.12.x'
75
- '1.11.x'
@@ -10,17 +8,13 @@ env:
108
- GO_VERSION_FOR_JVM='1.11.x'
119
- CGO_ENABLED=0
1210
- GO111MODULE=on
11+
- ONIGURUMA_VERSION='6.9.1'
1312
matrix:
1413
- ONIGURUMA=0
1514
- ONIGURUMA=1
1615
matrix:
1716
fast_finish: true
1817

19-
addons:
20-
apt:
21-
packages:
22-
- libonig-dev
23-
2418
stages:
2519
- name: test
2620
- name: release
@@ -32,8 +26,14 @@ stage: test
3226
install:
3327
- >
3428
if [[ "${ONIGURUMA}" -gt 0 ]]; then
35-
export CGO_ENABLED=1;
36-
export GO_TAGS='oniguruma';
29+
export CGO_ENABLED=1
30+
export GO_TAGS='oniguruma'
31+
# install oniguruma manually as trusty has only ancient 5.x
32+
sudo apt-get install -y dpkg # dpkg >= 1.17.5ubuntu5.8 fixes https://bugs.launchpad.net/ubuntu/+source/dpkg/+bug/1730627
33+
wget "http://archive.ubuntu.com/ubuntu/pool/universe/libo/libonig/libonig5_${ONIGURUMA_VERSION}-1_amd64.deb"
34+
sudo dpkg -i "libonig5_${ONIGURUMA_VERSION}-1_amd64.deb"
35+
wget "http://archive.ubuntu.com/ubuntu/pool/universe/libo/libonig/libonig-dev_${ONIGURUMA_VERSION}-1_amd64.deb"
36+
sudo dpkg -i "libonig-dev_${ONIGURUMA_VERSION}-1_amd64.deb"
3737
fi;
3838
script:
3939
- make test-coverage

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ module github.com/src-d/enry/v2
33
go 1.12
44

55
require (
6-
github.com/src-d/go-oniguruma v1.0.0
6+
github.com/src-d/go-oniguruma v1.1.0
77
github.com/stretchr/testify v1.3.0
88
github.com/toqueteos/trie v1.0.0 // indirect
99
gopkg.in/toqueteos/substring.v1 v1.0.2

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
22
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
33
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
44
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
5-
github.com/src-d/go-oniguruma v1.0.0 h1:JDk5PUAjreGsGAKLsoDLNmrsaryjJ5RqT3h+Si6aw/E=
6-
github.com/src-d/go-oniguruma v1.0.0/go.mod h1:chVbff8kcVtmrhxtZ3yBVLLquXbzCS6DrxQaAK/CeqM=
5+
github.com/src-d/go-oniguruma v1.1.0 h1:EG+Nm5n2JqWUaCjtM0NtutPxU7ZN5Tp50GWrrV8bTww=
6+
github.com/src-d/go-oniguruma v1.1.0/go.mod h1:chVbff8kcVtmrhxtZ3yBVLLquXbzCS6DrxQaAK/CeqM=
77
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
88
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
99
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=

internal/tokenizer/tokenize_c.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
package tokenizer
44

5-
import "gopkg.in/src-d/enry.v1/internal/tokenizer/flex"
5+
import "github.com/src-d/enry/v2/internal/tokenizer/flex"
66

77
// Tokenize returns lexical tokens from content. The tokens returned match what
88
// the Linguist library returns. At most the first ByteLimit bytes of content are tokenized.

internal/tokenizer/tokenize_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,45 @@ func TestTokenize(t *testing.T) {
115115
}
116116
}
117117

118+
func TestTokenizerLatin1AsUtf8(t *testing.T) {
119+
content := []byte("th\xe5 filling") // `th� filling`
120+
t.Logf("%v - %q", content, string(content))
121+
tokens := Tokenize(content)
122+
for i, token := range tokens {
123+
t.Logf("token %d, %s", i+1, token)
124+
}
125+
require.Equal(t, 3, len(tokens))
126+
}
127+
128+
func TestRegexpOnInvalidUtf8(t *testing.T) {
129+
origContent := []struct {
130+
text string
131+
tokens []string
132+
}{
133+
{"th\xe0 filling", []string{"th", "filling"}}, // `th� filling`
134+
{"th\u0100 filling", []string{"th", "filling"}}, // `thĀ filling`
135+
{"привет, как дела?", []string{}}, // empty, no ASCII tokens
136+
}
137+
re := reRegularToken
138+
139+
for _, content := range origContent {
140+
t.Run("", func(t *testing.T) {
141+
t.Logf("%v - %q", content, content.text)
142+
input := []byte(content.text)
143+
tokens := re.FindAll(input, -1)
144+
require.Equal(t, len(content.tokens), len(tokens))
145+
146+
newContent := re.ReplaceAll(input, []byte(` `))
147+
t.Logf("content:%q, tokens:[", newContent)
148+
for i, token := range tokens {
149+
t.Logf("\t%q,", string(token))
150+
require.Equal(t, content.tokens[i], string(token))
151+
}
152+
t.Logf(" ]\n")
153+
})
154+
}
155+
}
156+
118157
func BenchmarkTokenizer_BaselineCopy(b *testing.B) {
119158
b.ReportAllocs()
120159
for i := 0; i < b.N; i++ {

regex/oniguruma.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import (
99
type EnryRegexp = *rubex.Regexp
1010

1111
func MustCompile(str string) EnryRegexp {
12-
return rubex.MustCompile(str)
12+
return rubex.MustCompileASCII(str)
1313
}
1414

1515
func QuoteMeta(s string) string {

0 commit comments

Comments
 (0)