File tree Expand file tree Collapse file tree 1 file changed +5
-7
lines changed Expand file tree Collapse file tree 1 file changed +5
-7
lines changed Original file line number Diff line number Diff line change @@ -531,18 +531,16 @@ def _compute_for_prefix_cache_hit(
531531 inter_data .query_lens [
532532 seq_idx ] = inter_data .seq_lens [seq_idx ] - context_len
533533 elif seq_len <= prefix_cache_len :
534- # Full hit. Only compute the last block to avoid
534+ # Full hit. Only compute the last token to avoid
535535 # erroneous behavior. FIXME: Ideally we should directly
536536 # mark all tokens as computed in the scheduler and do not
537537 # schedule this sequence, so this case should not happen.
538- block_size = self .block_size
539538 inter_data .input_tokens [seq_idx ] = inter_data .input_tokens [
540- seq_idx ][- block_size :]
539+ seq_idx ][- 1 :]
541540 inter_data .input_positions [seq_idx ] = inter_data .input_positions [
542- seq_idx ][- block_size :]
543- inter_data .query_lens [seq_idx ] = block_size
544- inter_data .context_lens [seq_idx ] = inter_data .seq_lens [
545- seq_idx ] - inter_data .query_lens [seq_idx ]
541+ seq_idx ][- 1 :]
542+ inter_data .query_lens [seq_idx ] = 1
543+ inter_data .context_lens [seq_idx ] = inter_data .seq_lens [seq_idx ] - 1
546544
547545 def _compute_for_sliding_window (self , inter_data : InterDataForSeqGroup ,
548546 seq_idx : int ,
You can’t perform that action at this time.
0 commit comments