Skip to content

Commit abe58f3

Browse files
committed
fix(engine): add cache size limits to prevent unbounded memory growth
1 parent d201070 commit abe58f3

File tree

2 files changed

+41
-9
lines changed

2 files changed

+41
-9
lines changed

src/cortex-engine/src/config/config_discovery.rs

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,36 @@
44
//! with caching support for performance in monorepo environments.
55
66
use std::collections::HashMap;
7+
use std::hash::Hash;
78
use std::path::{Path, PathBuf};
89
use std::sync::{LazyLock, RwLock};
910

1011
use tracing::{debug, trace};
1112

13+
/// Maximum number of entries in each cache to prevent unbounded memory growth.
14+
const MAX_CACHE_SIZE: usize = 1000;
15+
1216
/// Cache for discovered config paths.
1317
/// Key is the start directory, value is the found config path (or None).
1418
static CONFIG_CACHE: LazyLock<RwLock<HashMap<PathBuf, Option<PathBuf>>>> =
15-
LazyLock::new(|| RwLock::new(HashMap::new()));
19+
LazyLock::new(|| RwLock::new(HashMap::with_capacity(MAX_CACHE_SIZE)));
1620

1721
/// Cache for project roots.
1822
/// Key is the start directory, value is the project root path.
1923
static PROJECT_ROOT_CACHE: LazyLock<RwLock<HashMap<PathBuf, Option<PathBuf>>>> =
20-
LazyLock::new(|| RwLock::new(HashMap::new()));
24+
LazyLock::new(|| RwLock::new(HashMap::with_capacity(MAX_CACHE_SIZE)));
25+
26+
/// Insert a key-value pair into the cache with eviction when full.
27+
/// When the cache reaches MAX_CACHE_SIZE, removes an arbitrary entry before inserting.
28+
fn insert_with_eviction<K: Eq + Hash + Clone, V>(cache: &mut HashMap<K, V>, key: K, value: V) {
29+
if cache.len() >= MAX_CACHE_SIZE {
30+
// Remove first entry (simple eviction strategy)
31+
if let Some(k) = cache.keys().next().cloned() {
32+
cache.remove(&k);
33+
}
34+
}
35+
cache.insert(key, value);
36+
}
2137

2238
/// Markers that indicate a project root directory.
2339
const PROJECT_ROOT_MARKERS: &[&str] = &[
@@ -57,9 +73,9 @@ pub fn find_up(start_dir: &Path, filename: &str) -> Option<PathBuf> {
5773

5874
let result = find_up_uncached(start_dir, filename);
5975

60-
// Store in cache
76+
// Store in cache with eviction when full
6177
if let Ok(mut cache) = CONFIG_CACHE.write() {
62-
cache.insert(cache_key, result.clone());
78+
insert_with_eviction(&mut cache, cache_key, result.clone());
6379
}
6480

6581
result
@@ -169,9 +185,9 @@ pub fn find_project_root(start_dir: &Path) -> Option<PathBuf> {
169185

170186
let result = find_project_root_uncached(start_dir);
171187

172-
// Store in cache
188+
// Store in cache with eviction when full
173189
if let Ok(mut cache) = PROJECT_ROOT_CACHE.write() {
174-
cache.insert(start_dir.to_path_buf(), result.clone());
190+
insert_with_eviction(&mut cache, start_dir.to_path_buf(), result.clone());
175191
}
176192

177193
result

src/cortex-engine/src/tokenizer.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,25 @@
33
//! Provides token counting and text tokenization for various models.
44
55
use std::collections::HashMap;
6+
use std::hash::Hash;
67

78
use serde::{Deserialize, Serialize};
89

10+
/// Maximum number of entries in the token cache to prevent unbounded memory growth.
11+
const MAX_CACHE_SIZE: usize = 1000;
12+
13+
/// Insert a key-value pair into the cache with eviction when full.
14+
/// When the cache reaches MAX_CACHE_SIZE, removes an arbitrary entry before inserting.
15+
fn insert_with_eviction<K: Eq + Hash + Clone, V>(cache: &mut HashMap<K, V>, key: K, value: V) {
16+
if cache.len() >= MAX_CACHE_SIZE {
17+
// Remove first entry (simple eviction strategy)
18+
if let Some(k) = cache.keys().next().cloned() {
19+
cache.remove(&k);
20+
}
21+
}
22+
cache.insert(key, value);
23+
}
24+
925
/// Tokenizer type.
1026
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
1127
#[serde(rename_all = "snake_case")]
@@ -58,7 +74,7 @@ impl TokenizerType {
5874
pub struct TokenCounter {
5975
/// Tokenizer type.
6076
tokenizer: TokenizerType,
61-
/// Cache.
77+
/// Cache with bounded size to prevent unbounded memory growth.
6278
cache: HashMap<u64, u32>,
6379
}
6480

@@ -67,7 +83,7 @@ impl TokenCounter {
6783
pub fn new(tokenizer: TokenizerType) -> Self {
6884
Self {
6985
tokenizer,
70-
cache: HashMap::new(),
86+
cache: HashMap::with_capacity(MAX_CACHE_SIZE),
7187
}
7288
}
7389

@@ -85,7 +101,7 @@ impl TokenCounter {
85101
}
86102

87103
let count = self.count_uncached(text);
88-
self.cache.insert(hash, count);
104+
insert_with_eviction(&mut self.cache, hash, count);
89105
count
90106
}
91107

0 commit comments

Comments
 (0)