Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2198,18 +2198,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
add_opt(common_arg(
{"--mmap"},
{"--no-mmap"},
string_format("whether to memory-map model. Explicitly enabling mmap disables direct-io. (if mmap disabled, slower load but may reduce pageouts if not using mlock) (default: %s)", params.use_mmap ? "enabled" : "disabled"),
string_format("whether to memory-map model. (if mmap disabled, slower load but may reduce pageouts if not using mlock) (default: %s)", params.use_mmap ? "enabled" : "disabled"),
[](common_params & params, bool value) {
params.use_mmap = value;
if (value) {
params.use_direct_io = false; // disable direct io when mmap is explicitly enabled
}
}
).set_env("LLAMA_ARG_MMAP"));
add_opt(common_arg(
{"-dio", "--direct-io"},
{"-ndio", "--no-direct-io"},
string_format("use DirectIO if available. Takes precedence over --mmap (default: %s)", params.use_direct_io ? "enabled" : "disabled"),
string_format("use DirectIO if available. (default: %s)", params.use_direct_io ? "enabled" : "disabled"),
[](common_params & params, bool value) {
params.use_direct_io = value;
}
Expand Down
2 changes: 1 addition & 1 deletion common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ struct common_params {

bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
bool use_mmap = true; // enable mmap to use filesystem cache
bool use_direct_io = true; // read from disk without buffering for faster model loading
bool use_direct_io = false; // read from disk without buffering
bool use_mlock = false; // use mlock to keep model in memory
bool verbose_prompt = false; // print prompt tokens before generation
bool display_prompt = true; // print prompt before generation
Expand Down
2 changes: 1 addition & 1 deletion include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ extern "C" {
// Keep the booleans together to avoid misalignment during copy-by-value.
bool vocab_only; // only load the vocabulary, no weights
bool use_mmap; // use mmap if possible
bool use_direct_io; // use direct io, takes precedence over use_mmap
bool use_direct_io; // use direct io, only works if use_mmap is false
bool use_mlock; // force system to keep model in RAM
bool check_tensors; // validate model tensor data
bool use_extra_bufts; // use extra buffer types (used for weight repacking)
Expand Down
14 changes: 6 additions & 8 deletions src/llama-model-loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,20 +536,18 @@ llama_model_loader::llama_model_loader(
get_key(llm_kv(LLM_KV_GENERAL_ARCHITECTURE), arch_name, false);
llm_kv = LLM_KV(llm_arch_from_string(arch_name));

use_direct_io = use_direct_io && !use_mmap;

files.emplace_back(new llama_file(fname.c_str(), "rb", use_direct_io));
contexts.emplace_back(ctx);

if (use_mmap && use_direct_io) {
if (files.back()->has_direct_io()) {
// Disable mmap, as DirectIO is available
use_mmap = false;
LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__);
} else {
// Disable DirectIO and reopen file using std::fopen for mmap
if (use_direct_io) {
if (!files.back()->has_direct_io()) {
// Disable DirectIO and reopen file using std::fopen
use_direct_io = false;
files.pop_back();
files.emplace_back(new llama_file(fname.c_str(), "rb", false));
LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
LLAMA_LOG_WARN("%s: direct I/O is not available\n", __func__);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8125,7 +8125,7 @@ llama_model_params llama_model_default_params() {
/*.kv_overrides =*/ nullptr,
/*.vocab_only =*/ false,
/*.use_mmap =*/ true,
/*.use_direct_io =*/ true,
/*.use_direct_io =*/ false,
/*.use_mlock =*/ false,
/*.check_tensors =*/ false,
/*.use_extra_bufts =*/ true,
Expand Down