From e4be29f1dda1e6e9c18a9d8fe2ac9858edbb1e28 Mon Sep 17 00:00:00 2001 From: Patrick Boyd Date: Tue, 20 May 2025 09:39:46 -0500 Subject: [PATCH] Fix 16: Handle multi-byte characters before single byte characters in input --- evaluation_test.go | 6 ++++++ lexerStream.go | 24 ++++++++++++++++++++++-- parsing.go | 5 +++-- parsing_test.go | 18 ++++++++++++++++++ 4 files changed, 49 insertions(+), 4 deletions(-) diff --git a/evaluation_test.go b/evaluation_test.go index bb92321..714c24e 100644 --- a/evaluation_test.go +++ b/evaluation_test.go @@ -1453,6 +1453,12 @@ func TestParameterizedEvaluation(test *testing.T) { Parameters: []EvaluationParameter{fooParameter}, Expected: false, }, + { + Name: "Test with cyrilic parameter", + Input: "переменная * 5", + Parameters: []EvaluationParameter{{Name: "переменная", Value: 2}}, + Expected: float64(10), + }, } runEvaluationTests(evaluationTests, test) diff --git a/lexerStream.go b/lexerStream.go index b0be027..d5b01b9 100644 --- a/lexerStream.go +++ b/lexerStream.go @@ -1,10 +1,14 @@ package govaluate -import "sync" +import ( + "sync" + "unicode/utf8" +) type lexerStream struct { sourceString string source []rune + strPosition int position int length int } @@ -25,6 +29,7 @@ func newLexerStream(source string) *lexerStream { } ret.sourceString = source ret.position = 0 + ret.strPosition = 0 ret.length = len(ret.source) return ret } @@ -32,11 +37,26 @@ func newLexerStream(source string) *lexerStream { func (this *lexerStream) readCharacter() rune { character := this.source[this.position] this.position += 1 + this.strPosition += utf8.RuneLen(character) return character } func (this *lexerStream) rewind(amount int) { - this.position -= amount + if amount < 0 { + this.position -= amount + this.strPosition -= amount + } + strAmount := 0 + for i := 0; i < amount; i++ { + if this.position >= this.length { + strAmount += 1 + this.position -= 1 + continue + } + strAmount += utf8.RuneLen(this.source[this.position]) + this.position -= 1 + } + this.strPosition -= strAmount } func (this lexerStream) canRead() bool { diff --git a/parsing.go b/parsing.go index a37f259..588b1fd 100644 --- a/parsing.go +++ b/parsing.go @@ -308,9 +308,10 @@ Returns false if the stream ended before whitespace was broken or condition was func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace bool, allowEscaping bool, condition func(rune) bool) (string, bool) { tokenBuffer := tokenBufferPool.Get().(*bytes.Buffer) + tokenBuffer.Reset() var character rune - startPosition := stream.position + startPosition := stream.strPosition reuseString := true trimString := false conditioned := false @@ -356,7 +357,7 @@ func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace if reuseString { tokenBuffer.Reset() tokenBufferPool.Put(tokenBuffer) - ret := stream.sourceString[startPosition:stream.position] + ret := stream.sourceString[startPosition:stream.strPosition] if trimString { ret = ret[:len(ret)-1] } diff --git a/parsing_test.go b/parsing_test.go index 04bcbc6..009e3f5 100644 --- a/parsing_test.go +++ b/parsing_test.go @@ -1141,6 +1141,24 @@ func TestModifierParsing(test *testing.T) { }, }, }, + { + Name: "Single cyrilic parameter with op", + Input: "переменная * 1", + Expected: []ExpressionToken{ + { + Kind: VARIABLE, + Value: "переменная", + }, + { + Kind: MODIFIER, + Value: "*", + }, + { + Kind: NUMERIC, + Value: 1.0, + }, + }, + }, } tokenParsingTests = combineWhitespaceExpressions(tokenParsingTests)