Skip to content

Commit 1acc30b

Browse files
committed
docs: add comprehensive timeout configuration documentation
1 parent ec4e4f4 commit 1acc30b

File tree

3 files changed

+66
-0
lines changed

3 files changed

+66
-0
lines changed

src/cortex-app-server/src/config.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,18 @@ pub struct ServerConfig {
4949
pub max_body_size: usize,
5050

5151
/// Request timeout in seconds (applies to full request lifecycle).
52+
///
53+
/// See `cortex_common::http_client` module documentation for the complete
54+
/// timeout hierarchy across Cortex services.
5255
#[serde(default = "default_request_timeout")]
5356
pub request_timeout: u64,
5457

5558
/// Read timeout for individual chunks in seconds.
5659
/// Applies to chunked transfer encoding to prevent indefinite hangs
5760
/// when clients disconnect without sending the terminal chunk.
61+
///
62+
/// See `cortex_common::http_client` module documentation for the complete
63+
/// timeout hierarchy across Cortex services.
5864
#[serde(default = "default_read_timeout")]
5965
pub read_timeout: u64,
6066

@@ -71,12 +77,16 @@ pub struct ServerConfig {
7177
pub cors_origins: Vec<String>,
7278

7379
/// Graceful shutdown timeout in seconds.
80+
///
81+
/// See `cortex_common::http_client` module documentation for the complete
82+
/// timeout hierarchy across Cortex services.
7483
#[serde(default = "default_shutdown_timeout")]
7584
pub shutdown_timeout: u64,
7685
}
7786

7887
fn default_shutdown_timeout() -> u64 {
7988
30 // 30 seconds for graceful shutdown
89+
// See cortex_common::http_client for timeout hierarchy documentation
8090
}
8191

8292
fn default_listen_addr() -> String {

src/cortex-common/src/http_client.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,54 @@
99
//!
1010
//! DNS caching is configured with reasonable TTL to allow failover and load
1111
//! balancer updates (#2177).
12+
//!
13+
//! # Timeout Configuration Guide
14+
//!
15+
//! This section documents the timeout hierarchy across the Cortex codebase. Use this
16+
//! as a reference when configuring timeouts for new features or debugging timeout issues.
17+
//!
18+
//! ## Timeout Hierarchy
19+
//!
20+
//! | Use Case | Timeout | Constant/Location | Rationale |
21+
//! |-----------------------------|---------|--------------------------------------------|-----------------------------------------|
22+
//! | Health checks | 5s | `HEALTH_CHECK_TIMEOUT` (this module) | Quick validation of service status |
23+
//! | Standard HTTP requests | 30s | `DEFAULT_TIMEOUT` (this module) | Normal API calls with reasonable margin |
24+
//! | Per-chunk read (streaming) | 30s | `read_timeout` (cortex-app-server/config) | Individual chunk timeout during stream |
25+
//! | Pool idle timeout | 60s | `POOL_IDLE_TIMEOUT` (this module) | DNS re-resolution for failover |
26+
//! | LLM Request (non-streaming) | 120s | `DEFAULT_REQUEST_TIMEOUT_SECS` (cortex-exec/runner) | Model inference takes time |
27+
//! | LLM Streaming total | 300s | `STREAMING_TIMEOUT` (this module) | Long-running streaming responses |
28+
//! | Server request lifecycle | 300s | `request_timeout` (cortex-app-server/config) | Full HTTP request/response cycle |
29+
//! | Entire exec session | 600s | `DEFAULT_TIMEOUT_SECS` (cortex-exec/runner) | Multi-turn conversation limit |
30+
//! | Graceful shutdown | 30s | `shutdown_timeout` (cortex-app-server/config) | Time for cleanup on shutdown |
31+
//!
32+
//! ## Module-Specific Timeouts
33+
//!
34+
//! ### cortex-common (this module)
35+
//! - `DEFAULT_TIMEOUT` (30s): Use for standard API calls.
36+
//! - `STREAMING_TIMEOUT` (300s): Use for LLM streaming endpoints.
37+
//! - `HEALTH_CHECK_TIMEOUT` (5s): Use for health/readiness checks.
38+
//! - `POOL_IDLE_TIMEOUT` (60s): Connection pool cleanup for DNS freshness.
39+
//!
40+
//! ### cortex-exec (runner.rs)
41+
//! - `DEFAULT_TIMEOUT_SECS` (600s): Maximum duration for entire exec session.
42+
//! - `DEFAULT_REQUEST_TIMEOUT_SECS` (120s): Single LLM request timeout.
43+
//!
44+
//! ### cortex-app-server (config.rs)
45+
//! - `request_timeout` (300s): Full request lifecycle timeout.
46+
//! - `read_timeout` (30s): Per-chunk timeout for streaming reads.
47+
//! - `shutdown_timeout` (30s): Graceful shutdown duration.
48+
//!
49+
//! ### cortex-engine (api_client.rs)
50+
//! - Re-exports constants from this module for consistency.
51+
//!
52+
//! ## Recommendations
53+
//!
54+
//! When adding new timeout configurations:
55+
//! 1. Use constants from this module when possible for consistency.
56+
//! 2. Document any new timeout constants with their rationale.
57+
//! 3. Consider the timeout hierarchy - inner timeouts should be shorter than outer ones.
58+
//! 4. For LLM operations, use longer timeouts (120s-300s) to accommodate model inference.
59+
//! 5. For health checks and quick validations, use short timeouts (5s-10s).
1260
1361
use reqwest::Client;
1462
use std::time::Duration;

src/cortex-exec/src/runner.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,17 @@ use cortex_protocol::ConversationId;
2727
use crate::output::{OutputFormat, OutputWriter};
2828

2929
/// Default timeout for the entire execution (10 minutes).
30+
///
31+
/// This is the maximum duration for a multi-turn exec session.
32+
/// See `cortex_common::http_client` module documentation for the complete
33+
/// timeout hierarchy across Cortex services.
3034
const DEFAULT_TIMEOUT_SECS: u64 = 600;
3135

3236
/// Default timeout for a single LLM request (2 minutes).
37+
///
38+
/// Allows sufficient time for model inference while preventing indefinite hangs.
39+
/// See `cortex_common::http_client` module documentation for the complete
40+
/// timeout hierarchy across Cortex services.
3341
const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 120;
3442

3543
/// Maximum retries for transient errors.

0 commit comments

Comments
 (0)