Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions tools/server/server-models.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ server_models::server_models(
/* path */ model.manifest_path,
/* path_mmproj */ "", // auto-detected when loading
/* in_cache */ true,
/* hostname */ "",
/* port */ 0,
/* status */ SERVER_MODEL_STATUS_UNLOADED,
/* last_used */ 0,
Expand All @@ -189,6 +190,7 @@ server_models::server_models(
/* path */ model.path,
/* path_mmproj */ model.path_mmproj,
/* in_cache */ false,
/* hostname */ "",
/* port */ 0,
/* status */ SERVER_MODEL_STATUS_UNLOADED,
/* last_used */ 0,
Expand Down Expand Up @@ -364,6 +366,7 @@ void server_models::load(const std::string & name, bool auto_load) {
// prepare new instance info
instance_t inst;
inst.meta = meta;
inst.meta.hostname = "127.0.0.1";
inst.meta.port = get_free_port();
inst.meta.status = SERVER_MODEL_STATUS_LOADING;
inst.meta.last_used = ggml_time_ms();
Expand Down Expand Up @@ -392,6 +395,7 @@ void server_models::load(const std::string & name, bool auto_load) {
}

// set model args
add_or_replace_arg(child_args, "--host", inst.meta.hostname);
add_or_replace_arg(child_args, "--port", std::to_string(inst.meta.port));
add_or_replace_arg(child_args, "--alias", inst.meta.name);

Expand Down Expand Up @@ -571,7 +575,7 @@ server_http_res_ptr server_models::proxy_request(const server_http_req & req, co
SRV_INF("proxying request to model %s on port %d\n", name.c_str(), meta->port);
auto proxy = std::make_unique<server_http_proxy>(
method,
base_params.hostname,
meta->hostname,
meta->port,
req.path,
req.headers,
Expand Down Expand Up @@ -599,7 +603,7 @@ std::thread server_models::setup_child_server(const common_params & base_params,
body["value"] = server_model_status_to_string(SERVER_MODEL_STATUS_LOADED);
req.body = body.dump();

SRV_INF("notifying router server (port=%d) that model %s is ready\n", router_port, name.c_str());
SRV_INF("notifying router server (host=%s port=%d) that model %s is ready\n", base_params.hostname.c_str(), router_port, name.c_str());
auto result = cli.send(std::move(req));
if (result.error() != httplib::Error::Success) {
auto err_str = httplib::to_string(result.error());
Expand Down
1 change: 1 addition & 0 deletions tools/server/server-models.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct server_model_meta {
std::string path;
std::string path_mmproj; // only available if in_cache=false
bool in_cache = false; // if true, use -hf; use -m otherwise
std::string hostname;
int port = 0;
server_model_status status = SERVER_MODEL_STATUS_UNLOADED;
int64_t last_used = 0; // for LRU unloading
Expand Down