From e4be29f1dda1e6e9c18a9d8fe2ac9858edbb1e28 Mon Sep 17 00:00:00 2001
From: Patrick Boyd <Patrick_Boyd@Dell.com>
Date: Tue, 20 May 2025 09:39:46 -0500
Subject: [PATCH] Fix 16: Handle multi-byte characters before single byte
 characters in input

---
 evaluation_test.go |  6 ++++++
 lexerStream.go     | 24 ++++++++++++++++++++++--
 parsing.go         |  5 +++--
 parsing_test.go    | 18 ++++++++++++++++++
 4 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/evaluation_test.go b/evaluation_test.go
index bb92321..714c24e 100644
--- a/evaluation_test.go
+++ b/evaluation_test.go
@@ -1453,6 +1453,12 @@ func TestParameterizedEvaluation(test *testing.T) {
 			Parameters: []EvaluationParameter{fooParameter},
 			Expected:   false,
 		},
+		{
+			Name:       "Test with cyrilic parameter",
+			Input:      "переменная * 5",
+			Parameters: []EvaluationParameter{{Name: "переменная", Value: 2}},
+			Expected:   float64(10),
+		},
 	}
 
 	runEvaluationTests(evaluationTests, test)
diff --git a/lexerStream.go b/lexerStream.go
index b0be027..d5b01b9 100644
--- a/lexerStream.go
+++ b/lexerStream.go
@@ -1,10 +1,14 @@
 package govaluate
 
-import "sync"
+import (
+	"sync"
+	"unicode/utf8"
+)
 
 type lexerStream struct {
 	sourceString string
 	source       []rune
+	strPosition  int
 	position     int
 	length       int
 }
@@ -25,6 +29,7 @@ func newLexerStream(source string) *lexerStream {
 	}
 	ret.sourceString = source
 	ret.position = 0
+	ret.strPosition = 0
 	ret.length = len(ret.source)
 	return ret
 }
@@ -32,11 +37,26 @@ func newLexerStream(source string) *lexerStream {
 func (this *lexerStream) readCharacter() rune {
 	character := this.source[this.position]
 	this.position += 1
+	this.strPosition += utf8.RuneLen(character)
 	return character
 }
 
 func (this *lexerStream) rewind(amount int) {
-	this.position -= amount
+	if amount < 0 {
+		this.position -= amount
+		this.strPosition -= amount
+	}
+	strAmount := 0
+	for i := 0; i < amount; i++ {
+		if this.position >= this.length {
+			strAmount += 1
+			this.position -= 1
+			continue
+		}
+		strAmount += utf8.RuneLen(this.source[this.position])
+		this.position -= 1
+	}
+	this.strPosition -= strAmount
 }
 
 func (this lexerStream) canRead() bool {
diff --git a/parsing.go b/parsing.go
index a37f259..588b1fd 100644
--- a/parsing.go
+++ b/parsing.go
@@ -308,9 +308,10 @@ Returns false if the stream ended before whitespace was broken or condition was
 func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace bool, allowEscaping bool, condition func(rune) bool) (string, bool) {
 
 	tokenBuffer := tokenBufferPool.Get().(*bytes.Buffer)
+	tokenBuffer.Reset()
 	var character rune
 
-	startPosition := stream.position
+	startPosition := stream.strPosition
 	reuseString := true
 	trimString := false
 	conditioned := false
@@ -356,7 +357,7 @@ func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace
 	if reuseString {
 		tokenBuffer.Reset()
 		tokenBufferPool.Put(tokenBuffer)
-		ret := stream.sourceString[startPosition:stream.position]
+		ret := stream.sourceString[startPosition:stream.strPosition]
 		if trimString {
 			ret = ret[:len(ret)-1]
 		}
diff --git a/parsing_test.go b/parsing_test.go
index 04bcbc6..009e3f5 100644
--- a/parsing_test.go
+++ b/parsing_test.go
@@ -1141,6 +1141,24 @@ func TestModifierParsing(test *testing.T) {
 				},
 			},
 		},
+		{
+			Name:  "Single cyrilic parameter with op",
+			Input: "переменная * 1",
+			Expected: []ExpressionToken{
+				{
+					Kind:  VARIABLE,
+					Value: "переменная",
+				},
+				{
+					Kind:  MODIFIER,
+					Value: "*",
+				},
+				{
+					Kind:  NUMERIC,
+					Value: 1.0,
+				},
+			},
+		},
 	}
 
 	tokenParsingTests = combineWhitespaceExpressions(tokenParsingTests)