Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions crates/openfang-kernel/src/heartbeat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,17 @@ mod tests {
assert_eq!(summary.unresponsive_agents[0].name, "agent-2");
}

#[test]
fn test_heartbeat_config_custom_timeout() {
let config = HeartbeatConfig {
default_timeout_secs: 600,
..HeartbeatConfig::default()
};
assert_eq!(config.default_timeout_secs, 600);
assert_eq!(config.check_interval_secs, DEFAULT_CHECK_INTERVAL_SECS);
assert_eq!(config.max_recovery_attempts, DEFAULT_MAX_RECOVERY_ATTEMPTS);
}

#[test]
fn test_recovery_tracker() {
let tracker = RecoveryTracker::new();
Expand Down
5 changes: 4 additions & 1 deletion crates/openfang-kernel/src/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4310,7 +4310,10 @@ impl OpenFangKernel {
use crate::heartbeat::{check_agents, is_quiet_hours, HeartbeatConfig, RecoveryTracker};

let kernel = Arc::clone(self);
let config = HeartbeatConfig::default();
let config = HeartbeatConfig {
default_timeout_secs: self.config.heartbeat.default_timeout_secs,
..HeartbeatConfig::default()
};
let interval_secs = config.check_interval_secs;
let recovery_tracker = RecoveryTracker::new();

Expand Down
58 changes: 58 additions & 0 deletions crates/openfang-types/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,30 @@ pub struct KernelConfig {
/// Defaults to `~/.openfang/workflows`. Set to empty string to disable.
#[serde(default)]
pub workflows_dir: Option<PathBuf>,
/// Heartbeat monitor settings.
#[serde(default)]
pub heartbeat: HeartbeatSettings,
}

/// Heartbeat monitor settings exposed in `[heartbeat]` config section.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeartbeatSettings {
/// Seconds of inactivity before a reactive agent is marked as unresponsive.
/// Default: 180. Set higher to prevent idle hands from being marked as crashed.
#[serde(default = "default_heartbeat_timeout")]
pub default_timeout_secs: u64,
}

fn default_heartbeat_timeout() -> u64 {
180
}

impl Default for HeartbeatSettings {
fn default() -> Self {
Self {
default_timeout_secs: default_heartbeat_timeout(),
}
}
}

/// Dashboard authentication (username/password login).
Expand Down Expand Up @@ -1308,6 +1332,7 @@ impl Default for KernelConfig {
oauth: OAuthConfig::default(),
auth: AuthConfig::default(),
workflows_dir: None,
heartbeat: HeartbeatSettings::default(),
}
}
}
Expand Down Expand Up @@ -4005,4 +4030,37 @@ mod tests {
let config: SlackConfig = toml::from_str("unfurl_links = true").unwrap();
assert!(config.unfurl_links);
}

#[test]
fn test_heartbeat_settings_default() {
let settings = HeartbeatSettings::default();
assert_eq!(settings.default_timeout_secs, 180);
}

#[test]
fn test_heartbeat_settings_deserialization() {
let toml_str = r#"default_timeout_secs = 600"#;
let settings: HeartbeatSettings = toml::from_str(toml_str).unwrap();
assert_eq!(settings.default_timeout_secs, 600);
}

#[test]
fn test_heartbeat_settings_omitted_uses_default() {
// When [heartbeat] section is omitted entirely, KernelConfig should use defaults
let toml_str = r#"
log_level = "debug"
"#;
let config: KernelConfig = toml::from_str(toml_str).unwrap();
assert_eq!(config.heartbeat.default_timeout_secs, 180);
}

#[test]
fn test_heartbeat_settings_in_kernel_config() {
let toml_str = r#"
[heartbeat]
default_timeout_secs = 300
"#;
let config: KernelConfig = toml::from_str(toml_str).unwrap();
assert_eq!(config.heartbeat.default_timeout_secs, 300);
}
}
16 changes: 16 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -1467,6 +1467,22 @@ Configured in agent manifests via `ModelRoutingConfig`:
| `simple_threshold` | `100` | Token count below which a query is classified as simple. |
| `complex_threshold` | `500` | Token count above which a query is classified as complex. |

### Heartbeat Monitor

Global heartbeat settings in `[heartbeat]`:

```toml
[heartbeat]
# Seconds of inactivity before a reactive agent is marked as unresponsive.
# Increase this if you have hands that sit idle between infrequent requests.
# Default: 180
default_timeout_secs = 300
```

| Field | Default | Description |
|-------|---------|-------------|
| `default_timeout_secs` | `180` | Seconds of inactivity before marking an agent as unresponsive. Per-agent `heartbeat_interval_secs` in autonomous config overrides this. |

### Autonomous Guardrails (per-agent manifest)

Configured in agent manifests via `AutonomousConfig`:
Expand Down