Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2198,18 +2198,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
add_opt(common_arg(
{"--mmap"},
{"--no-mmap"},
string_format("whether to memory-map model. Explicitly enabling mmap disables direct-io. (if mmap disabled, slower load but may reduce pageouts if not using mlock) (default: %s)", params.use_mmap ? "enabled" : "disabled"),
string_format("whether to memory-map model. (if mmap disabled, slower load but may reduce pageouts if not using mlock) (default: %s)", params.use_mmap ? "enabled" : "disabled"),
[](common_params & params, bool value) {
params.use_mmap = value;
if (value) {
params.use_direct_io = false; // disable direct io when mmap is explicitly enabled
}
}
).set_env("LLAMA_ARG_MMAP"));
add_opt(common_arg(
{"-dio", "--direct-io"},
{"-ndio", "--no-direct-io"},
string_format("use DirectIO if available. Takes precedence over --mmap (default: %s)", params.use_direct_io ? "enabled" : "disabled"),
string_format("use DirectIO if available. (default: %s)", params.use_direct_io ? "enabled" : "disabled"),
[](common_params & params, bool value) {
params.use_direct_io = value;
}
Expand Down
2 changes: 1 addition & 1 deletion common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ struct common_params {

bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
bool use_mmap = true; // enable mmap to use filesystem cache
bool use_direct_io = true; // read from disk without buffering for faster model loading
bool use_direct_io = false; // read from disk without buffering
bool use_mlock = false; // use mlock to keep model in memory
bool verbose_prompt = false; // print prompt tokens before generation
bool display_prompt = true; // print prompt before generation
Expand Down
2 changes: 1 addition & 1 deletion include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ extern "C" {
// Keep the booleans together to avoid misalignment during copy-by-value.
bool vocab_only; // only load the vocabulary, no weights
bool use_mmap; // use mmap if possible
bool use_direct_io; // use direct io, takes precedence over use_mmap
bool use_direct_io; // use direct io, takes precedence over use_mmap when supported
bool use_mlock; // force system to keep model in RAM
bool check_tensors; // validate model tensor data
bool use_extra_bufts; // use extra buffer types (used for weight repacking)
Expand Down
8 changes: 4 additions & 4 deletions src/llama-model-loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,15 +541,15 @@ llama_model_loader::llama_model_loader(

if (use_mmap && use_direct_io) {
if (files.back()->has_direct_io()) {
// Disable mmap, as DirectIO is available
use_mmap = false;
LLAMA_LOG_WARN("%s: direct I/O is enabled, disabling mmap\n", __func__);
use_mmap = false;
} else {
// Disable DirectIO and reopen file using std::fopen for mmap
LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
use_direct_io = false;

// reopen file using std::fopen for mmap
files.pop_back();
files.emplace_back(new llama_file(fname.c_str(), "rb", false));
LLAMA_LOG_WARN("%s: direct I/O is not available, using mmap\n", __func__);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8125,7 +8125,7 @@ llama_model_params llama_model_default_params() {
/*.kv_overrides =*/ nullptr,
/*.vocab_only =*/ false,
/*.use_mmap =*/ true,
/*.use_direct_io =*/ true,
/*.use_direct_io =*/ false,
/*.use_mlock =*/ false,
/*.check_tensors =*/ false,
/*.use_extra_bufts =*/ true,
Expand Down
2 changes: 1 addition & 1 deletion src/llama-quant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
}

std::vector<std::string> splits = {};
llama_model_loader ml(fname_inp, splits, use_mmap, /*use_direct_io*/ true, /*check_tensors*/ true, /*no_alloc*/ false, kv_overrides, nullptr);
llama_model_loader ml(fname_inp, splits, use_mmap, /*use_direct_io*/ false, /*check_tensors*/ true, /*no_alloc*/ false, kv_overrides, nullptr);
ml.init_mappings(false); // no prefetching

llama_model model(llama_model_default_params());
Expand Down
Loading