-
Notifications
You must be signed in to change notification settings - Fork 6
fs-cache: Add Cache Struct
#95
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 5 commits
b056880
8797e8f
56597ac
e5b41bc
33091bf
60225f4
39d9bb0
a4b201a
de954cd
9136354
b469c6b
4741ab6
d6a002b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| [package] | ||
| name = "fs-cache" | ||
| version = "0.1.0" | ||
| edition = "2021" | ||
|
|
||
| [lib] | ||
| name = "fs_cache" | ||
| crate-type = ["rlib", "cdylib"] | ||
| bench = false | ||
|
|
||
| [dependencies] | ||
| log = { version = "0.4.17", features = ["release_max_level_off"] } | ||
| serde_json = "1.0.82" | ||
| serde = { version = "1.0.138", features = ["derive"] } | ||
| data-error = { path = "../data-error" } | ||
| data-resource = { path = "../data-resource" } | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| fs-storage = { path = "../fs-storage"} | ||
| linked-hash-map = "0.5.6" | ||
|
|
||
| [dev-dependencies] | ||
| anyhow = "1.0.81" | ||
| quickcheck = { version = "1.0.3", features = ["use_logging"] } | ||
| quickcheck_macros = "1.0.0" | ||
| tempdir = "0.3.7" | ||
pushkarm029 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
tareknaser marked this conversation as resolved.
Show resolved
Hide resolved
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| use crate::memory_limited_storage::MemoryLimitedStorage; | ||
| use data_error::Result; | ||
| use std::path::Path; | ||
|
|
||
| pub struct Cache<K, V> { | ||
| storage: MemoryLimitedStorage<K, V>, | ||
| } | ||
pushkarm029 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| impl<K, V> Cache<K, V> | ||
| where | ||
| K: Ord | ||
| + Clone | ||
| + serde::Serialize | ||
| + serde::de::DeserializeOwned | ||
| + std::fmt::Display | ||
| + std::hash::Hash | ||
| + std::str::FromStr, | ||
| V: Clone + serde::Serialize + serde::de::DeserializeOwned, | ||
| { | ||
| pub fn new( | ||
| label: String, | ||
| path: &Path, | ||
| max_memory_bytes: usize, | ||
| ) -> Result<Self> { | ||
| log::debug!( | ||
| "{} cache initialized with {} bytes limit", | ||
| label, | ||
| max_memory_bytes | ||
| ); | ||
| Ok(Self { | ||
| storage: MemoryLimitedStorage::new(label, path, max_memory_bytes)?, | ||
| }) | ||
| } | ||
|
|
||
| pub fn get(&mut self, key: &K) -> Option<V> { | ||
| let result = self.storage.get(key); | ||
| log::debug!( | ||
| "{} cache: get key={} -> found={}", | ||
| self.storage.label(), | ||
| key, | ||
| result.is_some() | ||
| ); | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| result | ||
| } | ||
|
|
||
| pub fn set(&mut self, key: K, value: V) -> Result<()> { | ||
| // Check if value already exists | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if a value already exists? This behavior isn’t documented.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch. It can be indicator of redundant computation. Since the cache uses I suggest the following:
Query |
||
| if self.storage.get(&key).is_some() { | ||
| log::debug!( | ||
| "{} cache: skip setting existing key={}", | ||
| self.storage.label(), | ||
| key | ||
| ); | ||
| return Ok(()); | ||
| } | ||
|
|
||
| log::debug!("{} cache: set key={}", self.storage.label(), key); | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| self.storage.set(key, value) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
| pub mod cache; | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| pub mod memory_limited_storage; | ||
tareknaser marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,273 @@ | ||
| use std::fs::{self, File}; | ||
| use std::io::Write; | ||
| use std::path::{Path, PathBuf}; | ||
| use std::time::SystemTime; | ||
|
|
||
| use data_error::{ArklibError, Result}; | ||
| use linked_hash_map::LinkedHashMap; | ||
|
|
||
| pub struct MemoryLimitedStorage<K, V> { | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| /// Label for logging | ||
| label: String, | ||
| /// Path to the underlying folder where data is persisted | ||
| path: PathBuf, | ||
tareknaser marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| /// In-memory LRU cache combining map and queue functionality | ||
| memory_cache: LinkedHashMap<K, V>, | ||
tareknaser marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // Bytes present in memory | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| current_memory_bytes: usize, | ||
| /// Maximum bytes to keep in memory | ||
| max_memory_bytes: usize, | ||
| } | ||
|
|
||
| impl<K, V> MemoryLimitedStorage<K, V> | ||
| where | ||
| K: Ord | ||
| + Clone | ||
| + serde::Serialize | ||
| + serde::de::DeserializeOwned | ||
| + std::fmt::Display | ||
| + std::hash::Hash | ||
| + std::str::FromStr, | ||
| V: Clone + serde::Serialize + serde::de::DeserializeOwned, | ||
| { | ||
| pub fn new( | ||
| label: String, | ||
| path: &Path, | ||
| max_memory_bytes: usize, | ||
| ) -> Result<Self> { | ||
| let mut storage = Self { | ||
| label, | ||
| path: PathBuf::from(path), | ||
| memory_cache: LinkedHashMap::new(), | ||
| current_memory_bytes: 0, | ||
| max_memory_bytes, | ||
| }; | ||
|
|
||
| storage.load_fs()?; | ||
|
|
||
| Ok(storage) | ||
| } | ||
|
|
||
| pub fn label(&self) -> String { | ||
| self.label.clone() | ||
| } | ||
|
|
||
| pub fn get(&mut self, key: &K) -> Option<V> { | ||
| // Check memory cache first - will update LRU order automatically | ||
| if let Some(value) = self.memory_cache.get_refresh(key) { | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| return Some(value.clone()); | ||
| } | ||
|
|
||
| // Try to load from disk | ||
| let file_path = self.path.join(format!("{}.json", key)); | ||
| if file_path.exists() { | ||
| // Doubt: Update file's modiied time (in disk) on read to preserve LRU across app restarts? | ||
|
||
| match self.load_value_from_disk(key) { | ||
| Ok(value) => { | ||
| self.add_to_memory_cache(key.clone(), value.clone()); | ||
| Some(value) | ||
| } | ||
| Err(err) => { | ||
| log::error!( | ||
| "{} cache: failed to load key={}: {}", | ||
| self.label, | ||
| key, | ||
| err | ||
| ); | ||
| None | ||
| } | ||
| } | ||
| } else { | ||
| None | ||
| } | ||
| } | ||
|
|
||
| pub fn set(&mut self, key: K, value: V) -> Result<()> { | ||
| // Always write to disk first | ||
| self.write_value_to_disk(&key, &value)?; | ||
|
|
||
| // Then update memory cache | ||
| self.add_to_memory_cache(key, value); | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| pub fn load_fs(&mut self) -> Result<()> { | ||
| if !self.path.exists() { | ||
| return Err(ArklibError::Storage( | ||
| self.label.clone(), | ||
| "Folder does not exist".to_owned(), | ||
| )); | ||
| } | ||
|
|
||
| if !self.path.is_dir() { | ||
| return Err(ArklibError::Storage( | ||
| self.label.clone(), | ||
| "Path is not a directory".to_owned(), | ||
| )); | ||
| } | ||
|
|
||
| // First pass: collect metadata only | ||
| let mut file_metadata = Vec::new(); | ||
| for entry in fs::read_dir(&self.path)? { | ||
| let entry = entry?; | ||
| let path = entry.path(); | ||
| if path.is_file() | ||
| && path | ||
| .extension() | ||
| .map_or(false, |ext| ext == "json") | ||
| { | ||
| if let Ok(metadata) = fs::metadata(&path) { | ||
| let key = extract_key_from_file_path(&self.label, &path)?; | ||
| file_metadata.push((key, metadata.len() as usize)); | ||
| } | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
|
|
||
| // Sort by timestamp (newest first) before loading any values | ||
| file_metadata.sort_by(|a, b| b.1.cmp(&a.1)); | ||
|
|
||
| // Clear existing cache | ||
| self.memory_cache.clear(); | ||
| self.current_memory_bytes = 0; | ||
|
|
||
| // TODO: Need some work here | ||
| // Second pass: load only the values that will fit in memory | ||
| let mut loaded_bytes = 0; | ||
| let mut total_bytes = 0; | ||
|
|
||
| for (key, approx_size) in file_metadata { | ||
| total_bytes += approx_size; | ||
|
|
||
| // Only load value if it will likely fit in memory | ||
| if loaded_bytes + approx_size <= self.max_memory_bytes { | ||
| match self.load_value_from_disk(&key) { | ||
| Ok(value) => { | ||
| let actual_size = Self::estimate_size(&value); | ||
| if loaded_bytes + actual_size <= self.max_memory_bytes { | ||
| self.memory_cache.insert(key, value); | ||
| loaded_bytes += actual_size; | ||
| } | ||
| } | ||
| Err(err) => { | ||
| log::warn!( | ||
| "{} cache: failed to load key={}: {}", | ||
| self.label, | ||
| key, | ||
| err | ||
| ); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| self.current_memory_bytes = loaded_bytes; | ||
|
|
||
| log::debug!( | ||
| "{} loaded {}/{} bytes in memory", | ||
| self.label, | ||
| self.current_memory_bytes, | ||
| total_bytes | ||
| ); | ||
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| fn estimate_size(value: &V) -> usize { | ||
| serde_json::to_vec(value) | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| .map(|v| v.len()) | ||
| .unwrap_or(0) | ||
| } | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| // Write a single value to disk | ||
| fn write_value_to_disk(&mut self, key: &K, value: &V) -> Result<()> { | ||
| let file_path = self.path.join(format!("{}.json", key)); | ||
| let mut file = File::create(&file_path)?; | ||
|
||
| file.write_all(serde_json::to_string_pretty(&value)?.as_bytes())?; | ||
| file.flush()?; | ||
|
|
||
| let new_timestamp = SystemTime::now(); | ||
| file.set_modified(new_timestamp)?; | ||
| file.sync_all()?; | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| Ok(()) | ||
| } | ||
tareknaser marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| // Load a single value from disk | ||
| fn load_value_from_disk(&self, key: &K) -> Result<V> { | ||
| let file_path = self.path.join(format!("{}.json", key)); | ||
| let file = File::open(&file_path)?; | ||
| let value: V = serde_json::from_reader(file).map_err(|err| { | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| ArklibError::Storage( | ||
| self.label.clone(), | ||
| format!("Failed to read value for key {}: {}", key, err), | ||
| ) | ||
| })?; | ||
| Ok(value) | ||
| } | ||
|
|
||
| fn add_to_memory_cache(&mut self, key: K, value: V) { | ||
| let value_size = Self::estimate_size(&value); | ||
tareknaser marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| // If single value is larger than total limit, just skip memory caching | ||
| if value_size > self.max_memory_bytes { | ||
| log::debug!( | ||
| "{} cache: value size {} exceeds limit {}", | ||
| self.label, | ||
| value_size, | ||
| self.max_memory_bytes | ||
| ); | ||
| return; | ||
| } | ||
|
|
||
| // Remove oldest entries until we have space for new value | ||
| while self.current_memory_bytes + value_size > self.max_memory_bytes | ||
| && !self.memory_cache.is_empty() | ||
| { | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if let Some((_, old_value)) = self.memory_cache.pop_front() { | ||
| self.current_memory_bytes = self | ||
| .current_memory_bytes | ||
| .saturating_sub(Self::estimate_size(&old_value)); | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
|
|
||
| // Add new value and update size | ||
| self.memory_cache.insert(key, value); | ||
| self.current_memory_bytes += value_size; | ||
|
|
||
| log::debug!( | ||
| "{} cache: added {} bytes, total {}/{}", | ||
| self.label, | ||
| value_size, | ||
| self.current_memory_bytes, | ||
| self.max_memory_bytes | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| fn extract_key_from_file_path<K>(label: &str, path: &Path) -> Result<K> | ||
| where | ||
| K: std::str::FromStr, | ||
| { | ||
| path.file_stem() | ||
pushkarm029 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| .ok_or_else(|| { | ||
| ArklibError::Storage( | ||
| label.to_owned(), | ||
| "Failed to extract file stem from filename".to_owned(), | ||
| ) | ||
| })? | ||
| .to_str() | ||
| .ok_or_else(|| { | ||
| ArklibError::Storage( | ||
| label.to_owned(), | ||
| "Failed to convert file stem to string".to_owned(), | ||
| ) | ||
| })? | ||
| .parse::<K>() | ||
| .map_err(|_| { | ||
| ArklibError::Storage( | ||
| label.to_owned(), | ||
| "Failed to parse key from filename".to_owned(), | ||
| ) | ||
| }) | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.