Skip to content

Commit 99a035c

Browse files
committed
No warnings
1 parent aca6733 commit 99a035c

File tree

35 files changed

+58
-248
lines changed

35 files changed

+58
-248
lines changed

mistralrs-bench/src/main.rs

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use mistralrs_core::{
1111
use std::sync::Arc;
1212
use std::{fmt::Display, num::NonZeroUsize};
1313
use tokio::sync::mpsc::channel;
14-
use tracing::{info, warn};
14+
use tracing::info;
1515

1616
enum TestName {
1717
Prompt(usize),
@@ -346,8 +346,6 @@ fn main() -> anyhow::Result<()> {
346346

347347
args.concurrency = Some(args.concurrency.unwrap_or(vec![1]));
348348

349-
let use_flash_attn = mistralrs_core::using_flash_attn();
350-
351349
let prompt_chunksize = match args.prompt_chunksize {
352350
Some(0) => {
353351
anyhow::bail!("`prompt_chunksize` must be a strictly positive integer, got 0.",)
@@ -362,7 +360,6 @@ fn main() -> anyhow::Result<()> {
362360
let max_seq_len = auto_device_map_params.max_seq_len();
363361

364362
let loader: Box<dyn Loader> = LoaderBuilder::new(args.model)
365-
.with_use_flash_attn(use_flash_attn)
366363
.with_prompt_chunksize(prompt_chunksize)
367364
.build()?;
368365
let model_name = loader.get_id();
@@ -389,12 +386,6 @@ fn main() -> anyhow::Result<()> {
389386
candle_core::utils::with_f16c()
390387
);
391388
info!("Sampling method: penalties -> temperature -> topk -> topp -> minp -> multinomial");
392-
if use_flash_attn {
393-
info!("Using flash attention.");
394-
}
395-
if use_flash_attn && loader.get_kind().is_quantized() {
396-
warn!("Using flash attention with a quantized model has no effect!")
397-
}
398389
info!("Model kind is: {}", loader.get_kind().to_string());
399390

400391
// Parse device mapper

mistralrs-core/src/attention.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ impl Sdpa {
282282
/// - v: (b_sz, n_kv_heads, q_len, head_dim)
283283
///
284284
/// The attention implementation is dispatched as follows:
285-
/// 1) If `use_flash_attn == true` (CUDA), use a flash attention V2 kernel
285+
/// 1) If using flash attn (CUDA), use a flash attention V2/V3 kernel
286286
/// 2) If decoding and using a Metal device, use a fused kkernel
287287
/// 2) Otherwise, use the "naive" SDPA implementation (with optimized mask+softmax+scale application)
288288
#[allow(unused_variables, clippy::too_many_arguments)]

mistralrs-core/src/lib.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,14 @@ pub use paged_attention::{MemoryGpuConfig, PagedAttentionConfig};
8686
pub use pipeline::{
8787
chat_template::ChatTemplate, parse_isq_value, AdapterPaths, AnyMoeLoader, AnyMoePipeline,
8888
AutoDeviceMapParams, DiffusionGenerationParams, DiffusionLoader, DiffusionLoaderBuilder,
89-
DiffusionLoaderType, DiffusionSpecificConfig, GGMLLoader, GGMLLoaderBuilder,
90-
GGMLSpecificConfig, GGUFLoader, GGUFLoaderBuilder, GGUFSpecificConfig, GemmaLoader,
91-
Idefics2Loader, IsqOrganization, LLaVALoader, LLaVANextLoader, LlamaLoader, Loader,
92-
LocalModelPaths, LoraAdapterPaths, MistralLoader, MixtralLoader, ModelKind, ModelPaths,
93-
NormalLoader, NormalLoaderBuilder, NormalLoaderType, NormalSpecificConfig, Phi2Loader,
94-
Phi3Loader, Phi3VLoader, Qwen2Loader, SpeculativeConfig, SpeculativeLoader,
95-
SpeculativePipeline, Starcoder2Loader, TokenSource, VisionLoader, VisionLoaderBuilder,
96-
VisionLoaderType, VisionPromptPrefixer, VisionSpecificConfig, UQFF_MULTI_FILE_DELIMITER,
89+
DiffusionLoaderType, GGMLLoader, GGMLLoaderBuilder, GGMLSpecificConfig, GGUFLoader,
90+
GGUFLoaderBuilder, GGUFSpecificConfig, GemmaLoader, Idefics2Loader, IsqOrganization,
91+
LLaVALoader, LLaVANextLoader, LlamaLoader, Loader, LocalModelPaths, LoraAdapterPaths,
92+
MistralLoader, MixtralLoader, ModelKind, ModelPaths, NormalLoader, NormalLoaderBuilder,
93+
NormalLoaderType, NormalSpecificConfig, Phi2Loader, Phi3Loader, Phi3VLoader, Qwen2Loader,
94+
SpeculativeConfig, SpeculativeLoader, SpeculativePipeline, Starcoder2Loader, TokenSource,
95+
VisionLoader, VisionLoaderBuilder, VisionLoaderType, VisionPromptPrefixer,
96+
VisionSpecificConfig, UQFF_MULTI_FILE_DELIMITER,
9797
};
9898
pub use request::{
9999
ApproximateUserLocation, Constraint, DetokenizationRequest, ImageGenerationResponseFormat,

mistralrs-core/src/model_loader.rs

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ use crate::{
1111
get_toml_selected_model_dtype,
1212
pipeline::{GGMLLoaderBuilder, GGMLSpecificConfig, GGUFLoaderBuilder, NormalSpecificConfig},
1313
toml_selector::get_toml_selected_model_device_map_params,
14-
AutoDeviceMapParams, DiffusionLoaderBuilder, DiffusionSpecificConfig, GGUFSpecificConfig,
15-
Loader, ModelDType, ModelSelected, NormalLoaderBuilder, TomlLoaderArgs, TomlSelector, Topology,
14+
AutoDeviceMapParams, DiffusionLoaderBuilder, GGUFSpecificConfig, Loader, ModelDType,
15+
ModelSelected, NormalLoaderBuilder, TomlLoaderArgs, TomlSelector, Topology,
1616
VisionLoaderBuilder, VisionSpecificConfig, GGUF_MULTI_FILE_DELIMITER,
1717
UQFF_MULTI_FILE_DELIMITER,
1818
};
@@ -23,7 +23,6 @@ pub struct LoaderBuilder {
2323
no_kv_cache: bool,
2424
chat_template: Option<String>,
2525
jinja_explicit: Option<String>,
26-
use_flash_attn: bool,
2726
prompt_chunksize: Option<NonZeroUsize>,
2827
}
2928

@@ -33,7 +32,6 @@ impl LoaderBuilder {
3332
model,
3433
no_kv_cache: false,
3534
chat_template: None,
36-
use_flash_attn: false,
3735
prompt_chunksize: None,
3836
jinja_explicit: None,
3937
}
@@ -51,10 +49,6 @@ impl LoaderBuilder {
5149
self.jinja_explicit = jinja_explicit;
5250
self
5351
}
54-
pub fn with_use_flash_attn(mut self, use_flash_attn: bool) -> Self {
55-
self.use_flash_attn = use_flash_attn;
56-
self
57-
}
5852
pub fn with_prompt_chunksize(mut self, prompt_chunksize: Option<NonZeroUsize>) -> Self {
5953
self.prompt_chunksize = prompt_chunksize;
6054
self
@@ -188,15 +182,13 @@ pub fn get_auto_device_map_params(model: &ModelSelected) -> anyhow::Result<AutoD
188182
}
189183

190184
fn loader_from_model_selected(args: LoaderBuilder) -> anyhow::Result<Box<dyn Loader>> {
191-
let use_flash_attn = args.use_flash_attn;
192185
let loader: Box<dyn Loader> = match args.model {
193186
ModelSelected::Toml { file } => {
194187
let selector: TomlSelector = toml::from_str(
195188
&fs::read_to_string(file.clone())
196189
.unwrap_or_else(|_| panic!("Could not load toml selector file at {file}")),
197190
)?;
198191
let args = TomlLoaderArgs {
199-
use_flash_attn,
200192
chat_template: args.chat_template,
201193
no_kv_cache: args.no_kv_cache,
202194
prompt_chunksize: args.prompt_chunksize,
@@ -220,7 +212,6 @@ fn loader_from_model_selected(args: LoaderBuilder) -> anyhow::Result<Box<dyn Loa
220212
hf_cache_path,
221213
} => NormalLoaderBuilder::new(
222214
NormalSpecificConfig {
223-
use_flash_attn,
224215
prompt_chunksize: args.prompt_chunksize,
225216
topology: Topology::from_option_path(topology)?,
226217
organization: organization.unwrap_or_default(),
@@ -258,7 +249,6 @@ fn loader_from_model_selected(args: LoaderBuilder) -> anyhow::Result<Box<dyn Loa
258249
hf_cache_path,
259250
} => NormalLoaderBuilder::new(
260251
NormalSpecificConfig {
261-
use_flash_attn,
262252
prompt_chunksize: args.prompt_chunksize,
263253
topology: Topology::from_option_path(topology)?,
264254
organization: Default::default(),
@@ -303,7 +293,6 @@ fn loader_from_model_selected(args: LoaderBuilder) -> anyhow::Result<Box<dyn Loa
303293
hf_cache_path,
304294
} => NormalLoaderBuilder::new(
305295
NormalSpecificConfig {
306-
use_flash_attn,
307296
prompt_chunksize: args.prompt_chunksize,
308297
topology: Topology::from_option_path(topology)?,
309298
organization: Default::default(),
@@ -526,7 +515,6 @@ fn loader_from_model_selected(args: LoaderBuilder) -> anyhow::Result<Box<dyn Loa
526515
imatrix,
527516
} => VisionLoaderBuilder::new(
528517
VisionSpecificConfig {
529-
use_flash_attn,
530518
prompt_chunksize: args.prompt_chunksize,
531519
topology: Topology::from_option_path(topology)?,
532520
write_uqff,
@@ -551,10 +539,7 @@ fn loader_from_model_selected(args: LoaderBuilder) -> anyhow::Result<Box<dyn Loa
551539
model_id,
552540
arch,
553541
dtype: _,
554-
} => {
555-
DiffusionLoaderBuilder::new(DiffusionSpecificConfig { use_flash_attn }, Some(model_id))
556-
.build(arch)
557-
}
542+
} => DiffusionLoaderBuilder::new(Some(model_id)).build(arch),
558543
};
559544
Ok(loader)
560545
}

mistralrs-core/src/models/deepseek2.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ serde_default_fn!(bool, norm_topk_prob, false);
3939
serde_default_fn!(ScoringFunc, scoring_func, ScoringFunc::Softmax);
4040
serde_default_fn!(Activation, hidden_act, Activation::Silu);
4141
serde_default_fn!(bool, tie_word_embeddings, false);
42-
serde_default_fn!(bool, use_flash_attn_default, false);
4342

4443
#[derive(Deserialize, Clone, Debug)]
4544
enum TopkMethod {

mistralrs-core/src/models/deepseek3.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ serde_default_fn!(usize, first_k_dense_replace, 0);
3838
serde_default_fn!(ScoringFunc, scoring_func, ScoringFunc::Softmax);
3939
serde_default_fn!(Activation, hidden_act, Activation::Silu);
4040
serde_default_fn!(bool, tie_word_embeddings, false);
41-
serde_default_fn!(bool, use_flash_attn_default, false);
4241

4342
#[derive(Deserialize, Clone, Debug)]
4443
enum TopkMethod {

mistralrs-core/src/models/llama.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ use crate::{
3030
};
3131

3232
serde_default_fn!(bool, word_emb_default, false);
33-
serde_default_fn!(bool, use_flash_attn_default, false);
3433

3534
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
3635
pub struct Config {

mistralrs-core/src/models/mistral.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ use crate::{
2626
utils::{progress::NiceProgressBar, unvarbuilder::UnVarBuilder},
2727
};
2828

29-
serde_default_fn!(bool, use_flash_attn, false);
3029
serde_default_fn!(bool, tie_word_embeddings, false);
3130

3231
#[derive(Debug, Clone, Default, Serialize, Deserialize)]

mistralrs-core/src/models/qwen2.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ use crate::{
2525
};
2626

2727
serde_default_fn!(bool, word_emb_default, false);
28-
serde_default_fn!(bool, use_flash_attn, false);
2928

3029
#[derive(Debug, Clone, serde::Deserialize, Default, serde::Serialize)]
3130
pub struct Config {

mistralrs-core/src/models/qwen3.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ macro_rules! sliding_window {
4040
};
4141
}
4242

43-
serde_default_fn!(bool, use_flash_attn, false);
4443
serde_default_fn!(bool, tie_word_embeddings, false);
4544

4645
#[derive(Debug, Clone, Default, Serialize, Deserialize)]

0 commit comments

Comments
 (0)