Skip to content

Commit 358dbb2

Browse files
authored
feat: fix unicode var bug by handling multi-byte characters before single byte characters in input (#17)
1 parent 4697815 commit 358dbb2

File tree

4 files changed

+49
-4
lines changed

4 files changed

+49
-4
lines changed

evaluation_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,6 +1453,12 @@ func TestParameterizedEvaluation(test *testing.T) {
14531453
Parameters: []EvaluationParameter{fooParameter},
14541454
Expected: false,
14551455
},
1456+
{
1457+
Name: "Test with cyrilic parameter",
1458+
Input: "переменная * 5",
1459+
Parameters: []EvaluationParameter{{Name: "переменная", Value: 2}},
1460+
Expected: float64(10),
1461+
},
14561462
}
14571463

14581464
runEvaluationTests(evaluationTests, test)

lexerStream.go

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
package govaluate
22

3-
import "sync"
3+
import (
4+
"sync"
5+
"unicode/utf8"
6+
)
47

58
type lexerStream struct {
69
sourceString string
710
source []rune
11+
strPosition int
812
position int
913
length int
1014
}
@@ -25,18 +29,34 @@ func newLexerStream(source string) *lexerStream {
2529
}
2630
ret.sourceString = source
2731
ret.position = 0
32+
ret.strPosition = 0
2833
ret.length = len(ret.source)
2934
return ret
3035
}
3136

3237
func (this *lexerStream) readCharacter() rune {
3338
character := this.source[this.position]
3439
this.position += 1
40+
this.strPosition += utf8.RuneLen(character)
3541
return character
3642
}
3743

3844
func (this *lexerStream) rewind(amount int) {
39-
this.position -= amount
45+
if amount < 0 {
46+
this.position -= amount
47+
this.strPosition -= amount
48+
}
49+
strAmount := 0
50+
for i := 0; i < amount; i++ {
51+
if this.position >= this.length {
52+
strAmount += 1
53+
this.position -= 1
54+
continue
55+
}
56+
strAmount += utf8.RuneLen(this.source[this.position])
57+
this.position -= 1
58+
}
59+
this.strPosition -= strAmount
4060
}
4161

4262
func (this lexerStream) canRead() bool {

parsing.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -308,9 +308,10 @@ Returns false if the stream ended before whitespace was broken or condition was
308308
func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace bool, allowEscaping bool, condition func(rune) bool) (string, bool) {
309309

310310
tokenBuffer := tokenBufferPool.Get().(*bytes.Buffer)
311+
tokenBuffer.Reset()
311312
var character rune
312313

313-
startPosition := stream.position
314+
startPosition := stream.strPosition
314315
reuseString := true
315316
trimString := false
316317
conditioned := false
@@ -356,7 +357,7 @@ func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace
356357
if reuseString {
357358
tokenBuffer.Reset()
358359
tokenBufferPool.Put(tokenBuffer)
359-
ret := stream.sourceString[startPosition:stream.position]
360+
ret := stream.sourceString[startPosition:stream.strPosition]
360361
if trimString {
361362
ret = ret[:len(ret)-1]
362363
}

parsing_test.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,6 +1141,24 @@ func TestModifierParsing(test *testing.T) {
11411141
},
11421142
},
11431143
},
1144+
{
1145+
Name: "Single cyrilic parameter with op",
1146+
Input: "переменная * 1",
1147+
Expected: []ExpressionToken{
1148+
{
1149+
Kind: VARIABLE,
1150+
Value: "переменная",
1151+
},
1152+
{
1153+
Kind: MODIFIER,
1154+
Value: "*",
1155+
},
1156+
{
1157+
Kind: NUMERIC,
1158+
Value: 1.0,
1159+
},
1160+
},
1161+
},
11441162
}
11451163

11461164
tokenParsingTests = combineWhitespaceExpressions(tokenParsingTests)

0 commit comments

Comments
 (0)