Skip to content

Commit c5c64f7

Browse files
authored
llama : disable Direct IO by default (#19109)
* llama : disable Direct IO by default * cont : override mmap if supported
1 parent eef375c commit c5c64f7

File tree

6 files changed

+10
-13
lines changed

6 files changed

+10
-13
lines changed

common/arg.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2198,18 +2198,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
21982198
add_opt(common_arg(
21992199
{"--mmap"},
22002200
{"--no-mmap"},
2201-
string_format("whether to memory-map model. Explicitly enabling mmap disables direct-io. (if mmap disabled, slower load but may reduce pageouts if not using mlock) (default: %s)", params.use_mmap ? "enabled" : "disabled"),
2201+
string_format("whether to memory-map model. (if mmap disabled, slower load but may reduce pageouts if not using mlock) (default: %s)", params.use_mmap ? "enabled" : "disabled"),
22022202
[](common_params & params, bool value) {
22032203
params.use_mmap = value;
2204-
if (value) {
2205-
params.use_direct_io = false; // disable direct io when mmap is explicitly enabled
2206-
}
22072204
}
22082205
).set_env("LLAMA_ARG_MMAP"));
22092206
add_opt(common_arg(
22102207
{"-dio", "--direct-io"},
22112208
{"-ndio", "--no-direct-io"},
2212-
string_format("use DirectIO if available. Takes precedence over --mmap (default: %s)", params.use_direct_io ? "enabled" : "disabled"),
2209+
string_format("use DirectIO if available. (default: %s)", params.use_direct_io ? "enabled" : "disabled"),
22132210
[](common_params & params, bool value) {
22142211
params.use_direct_io = value;
22152212
}

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ struct common_params {
438438

439439
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
440440
bool use_mmap = true; // enable mmap to use filesystem cache
441-
bool use_direct_io = true; // read from disk without buffering for faster model loading
441+
bool use_direct_io = false; // read from disk without buffering
442442
bool use_mlock = false; // use mlock to keep model in memory
443443
bool verbose_prompt = false; // print prompt tokens before generation
444444
bool display_prompt = true; // print prompt before generation

include/llama.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ extern "C" {
309309
// Keep the booleans together to avoid misalignment during copy-by-value.
310310
bool vocab_only; // only load the vocabulary, no weights
311311
bool use_mmap; // use mmap if possible
312-
bool use_direct_io; // use direct io, takes precedence over use_mmap
312+
bool use_direct_io; // use direct io, takes precedence over use_mmap when supported
313313
bool use_mlock; // force system to keep model in RAM
314314
bool check_tensors; // validate model tensor data
315315
bool use_extra_bufts; // use extra buffer types (used for weight repacking)

src/llama-model-loader.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -541,15 +541,15 @@ llama_model_loader::llama_model_loader(
541541

542542
if (use_mmap && use_direct_io) {
543543
if (files.back()->has_direct_io()) {
544-
// Disable mmap, as DirectIO is available
545-
use_mmap = false;
546544
LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__);
545+
use_mmap = false;
547546
} else {
548-
// Disable DirectIO and reopen file using std::fopen for mmap
547+
LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
549548
use_direct_io = false;
549+
550+
// reopen file using std::fopen for mmap
550551
files.pop_back();
551552
files.emplace_back(new llama_file(fname.c_str(), "rb", false));
552-
LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
553553
}
554554
}
555555

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8125,7 +8125,7 @@ llama_model_params llama_model_default_params() {
81258125
/*.kv_overrides =*/ nullptr,
81268126
/*.vocab_only =*/ false,
81278127
/*.use_mmap =*/ true,
8128-
/*.use_direct_io =*/ true,
8128+
/*.use_direct_io =*/ false,
81298129
/*.use_mlock =*/ false,
81308130
/*.check_tensors =*/ false,
81318131
/*.use_extra_bufts =*/ true,

src/llama-quant.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
545545
}
546546

547547
std::vector<std::string> splits = {};
548-
llama_model_loader ml(fname_inp, splits, use_mmap, /*use_direct_io*/ true, /*check_tensors*/ true, /*no_alloc*/ false, kv_overrides, nullptr);
548+
llama_model_loader ml(fname_inp, splits, use_mmap, /*use_direct_io*/ false, /*check_tensors*/ true, /*no_alloc*/ false, kv_overrides, nullptr);
549549
ml.init_mappings(false); // no prefetching
550550

551551
llama_model model(llama_model_default_params());

0 commit comments

Comments
 (0)