Skip to content

Commit ecad15b

Browse files
committed
fix: fix token prediction accuracy and response header issues
1 parent 6ab0495 commit ecad15b

File tree

2 files changed

+4
-7
lines changed

2 files changed

+4
-7
lines changed

core/llm_token_ratelimit/ratelimit_checker.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,13 +177,15 @@ func (c *PETAChecker) checkLimitKey(ctx *Context, rule *MatchedRule) bool {
177177
)
178178
return false
179179
}
180+
defer func() {
181+
ctx.Set(KeyResponseHeaders, responseHeader)
182+
}()
180183
responseHeader.Set(KeyRequestID, ctx.Get(KeyRequestID).(string))
181184
if waitingTime != PETANoWaiting {
182185
responseHeader.Set(ResponseHeaderRemainingTokens, fmt.Sprintf("%d", result[0]))
183186
responseHeader.Set(ResponseHeaderWaitingTime, (time.Duration(waitingTime) * time.Millisecond).String())
184187
return false
185188
}
186-
ctx.Set(KeyResponseHeaders, responseHeader)
187189
c.cacheEstimatedToken(rule, result[2])
188190
return true
189191
}

core/llm_token_ratelimit/script/peta/correct.lua

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -90,12 +90,7 @@ if released_tokens > 0 then -- Expired tokens exist, attempt to replenish new to
9090
end
9191
-- Update the difference from the token encoder
9292
local difference = actual - estimated
93-
local ttl = redis.call('PTTL', token_encoder_key)
94-
if ttl < 0 then
95-
redis.call('SET', token_encoder_key, difference, 'PX', window_size + 5000)
96-
else
97-
redis.call('INCRBY', token_encoder_key, difference)
98-
end
93+
redis.call('SET', token_encoder_key, difference, 'PX', window_size + 5000)
9994
-- Correction result for reservation
10095
local correct_result = 0
10196
if estimated < 0 or actual < 0 then

0 commit comments

Comments
 (0)