Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions mistralrs-core/src/paged_attention/block_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ impl LogicalTokenBlock {
self.tokens.pop();
self.num_tokens -= 1;
}

pub fn toks(&self) -> &[usize] {
&self.tokens
}
}

impl Hash for LogicalTokenBlock {
Expand Down
62 changes: 29 additions & 33 deletions mistralrs-core/src/prefix_cacher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

use crate::{
get_mut_arcmutex,
paged_attention::{BlockEngine, BlockEngineSequence, LogicalTokenBlock, PhysicalTokenBlock},
paged_attention::{BlockEngine, LogicalTokenBlock, PhysicalTokenBlock},
pipeline::KvCache,
sequence::{self, Sequence},
};
Expand Down Expand Up @@ -116,36 +116,27 @@
return;
}

if let Some(block_engine) = &self.block_engine {
let logical_token_blocks = seq.logical_token_blocks();
let block_engine = get_mut_arcmutex!(block_engine);
let block_table = &block_engine.block_tables[seq.id()];
let hashed_logical_blocks = hash_logical_blocks(logical_token_blocks);

if !self.block_caches.contains_key(&hashed_logical_blocks) {
for block in block_table {
let id = block.deref_mut().block_id;
for BlockCacheElement {
physical_blocks, ..
} in self.block_caches.values()
{
if physical_blocks.iter().any(|x| x.deref_mut().block_id == id) {
return;
}
}

block.deref_mut().increment_refcount();
}

self.block_caches.insert(
hashed_logical_blocks,
BlockCacheElement {
logical_blocks: logical_token_blocks.to_vec(),
physical_blocks: block_table.clone(),
image_hashes: seq.image_hashes().map(|x| x.to_vec()),
},
);
}
if let Some(_block_engine) = &self.block_engine {

Check failure on line 119 in mistralrs-core/src/prefix_cacher.rs

View workflow job for this annotation

GitHub Actions / Clippy

unneeded `return` statement
return;
// let logical_token_blocks = seq.logical_token_blocks();
// let block_engine = get_mut_arcmutex!(block_engine);
// let block_table = &block_engine.block_tables[seq.id()];
// let hashed_logical_blocks = hash_logical_blocks(logical_token_blocks);

// if !self.block_caches.contains_key(&hashed_logical_blocks) {
// for block in block_table {
// block.deref_mut().increment_refcount();
// }

// self.block_caches.insert(
// hashed_logical_blocks,
// BlockCacheElement {
// logical_blocks: logical_token_blocks.to_vec(),
// physical_blocks: block_table.clone(),
// image_hashes: seq.image_hashes().map(|x| x.to_vec()),
// },
// );
// }
} else {
let cache = seq.normal_cache().to_vec();

Expand Down Expand Up @@ -338,8 +329,13 @@

// If the last reused block is full, reserve an extra empty block for new tokens
let new_toks = toks[match_len..].to_vec();
for _ in 0..new_toks.len().div_ceil(block_size) {
logical_prefix.push(LogicalTokenBlock::new(block_size));
logical_prefix.push(LogicalTokenBlock::new(block_size));
for tok in &new_toks {
sequence::util_append_token_to_blocks(
*tok as usize,
&mut logical_prefix,
block_size,
);
}
if logical_prefix.last().is_some_and(|last| last.is_full()) {
logical_prefix.push(LogicalTokenBlock::new(block_size));
Expand Down
Loading