EricLBuehler · EricLBuehler · Jun 28, 2025 · Jun 28, 2025
diff --git a/mistralrs-core/src/paged_attention/cache_engine.rs b/mistralrs-core/src/paged_attention/cache_engine.rs
@@ -4,9 +4,7 @@ use std::{
     sync::{Arc, Mutex, MutexGuard},
 };
 
-use candle_core::{
-    from_storage_no_op, DType, Device, MetalStorage, Result, Shape, Storage, Tensor,
-};
+use candle_core::{DType, Device, Result, Tensor};
 use mistralrs_paged_attn::{copy_blocks, swap_blocks};
 use serde::{Deserialize, Serialize};
 
@@ -106,9 +104,12 @@ impl CacheEngine {
             .take(model_config.num_layers())
             .map(|x| x.as_ref().unwrap_or(device))
         {
+            #[allow(unused)]
             let key_blocks = if let Device::Metal(dev) = &device {
                 #[cfg(feature = "metal")]
                 {
+                    use candle_core::{from_storage_no_op, MetalStorage, Shape, Storage};
+
                     let elem_count = cache_config.num_gpu_blocks
                         * key_block_shape.0
                         * key_block_shape.1
@@ -149,9 +150,12 @@ impl CacheEngine {
                     )?
                 }
             };
+            #[allow(unused)]
             let value_blocks = if let Device::Metal(dev) = &device {
                 #[cfg(feature = "metal")]
                 {
+                    use candle_core::{from_storage_no_op, MetalStorage, Shape, Storage};
+
                     let elem_count = cache_config.num_gpu_blocks
                         * value_block_shape.0
                         * value_block_shape.1

diff --git a/mistralrs-quant/src/gptq/gptq_cuda.rs b/mistralrs-quant/src/gptq/gptq_cuda.rs
@@ -1,7 +1,7 @@
 use std::{
     cell::RefCell,
     collections::HashMap,
-    sync::{atomic::AtomicUsize, Arc, Mutex},
+    sync::{atomic::AtomicUsize, Arc},
 };
 
 use candle_core::{
@@ -15,7 +15,6 @@ use candle_core::{
     from_storage_no_op, Context, CudaStorage, DType, Device, Result, Shape, Storage, Tensor, D,
 };
 use half::f16;
-use lazy_static::lazy_static;
 
 use crate::{
     gptq::marlin_backend::{marlin_matmul, marlin_weight_repack},