Skip to content

Commit 724e16f

Browse files
committed
fix: merge weights by mechanism_id to avoid nonce conflicts in CRv4 commit
Multiple challenges producing weights for the same mechanism_id caused duplicate transactions in the mempool (Priority too low: 1014 error). Now weights are merged per mechanism_id before submission.
1 parent 36cd454 commit 724e16f

File tree

9 files changed

+225
-4
lines changed

9 files changed

+225
-4
lines changed

bins/validator-node/src/main.rs

Lines changed: 84 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,13 @@ async fn main() -> Result<()> {
660660

661661
// Cast storage to trait object for WASM executor
662662
let storage_dyn: Arc<dyn DistributedStore> = Arc::clone(&storage) as Arc<dyn DistributedStore>;
663+
664+
// Shared chain state data for WASM instances
665+
let shared_llm_validators_json: Arc<parking_lot::RwLock<Vec<u8>>> =
666+
Arc::new(parking_lot::RwLock::new(Vec::new()));
667+
let shared_registered_hotkeys_json: Arc<parking_lot::RwLock<Vec<u8>>> =
668+
Arc::new(parking_lot::RwLock::new(Vec::new()));
669+
663670
let wasm_executor = match WasmChallengeExecutor::new(WasmExecutorConfig {
664671
module_dir: wasm_module_dir.clone(),
665672
max_memory_bytes: args.wasm_max_memory,
@@ -674,6 +681,8 @@ async fn main() -> Result<()> {
674681
)),
675682
chutes_api_key: args.chutes_api_key.clone(),
676683
distributed_storage: Some(storage_dyn),
684+
llm_validators_json: Arc::clone(&shared_llm_validators_json),
685+
registered_hotkeys_json: Arc::clone(&shared_registered_hotkeys_json),
677686
}) {
678687
Ok(executor) => {
679688
info!(
@@ -985,6 +994,8 @@ async fn main() -> Result<()> {
985994
&p2p_cmd_tx,
986995
&chain_state,
987996
&challenge_last_sync,
997+
&shared_llm_validators_json,
998+
&shared_registered_hotkeys_json,
988999
).await;
9891000
}
9901001

@@ -1368,6 +1379,9 @@ async fn main() -> Result<()> {
13681379
stake: our_stake,
13691380
timestamp: chrono::Utc::now().timestamp_millis(),
13701381
signature: vec![], // Will be signed by P2P layer
1382+
capabilities: platform_p2p_consensus::ValidatorCapabilities {
1383+
has_llm: args.chutes_api_key.is_some(),
1384+
},
13711385
});
13721386

13731387
if let Err(e) = p2p_broadcast_tx.send(platform_p2p_consensus::P2PCommand::Broadcast(heartbeat)).await {
@@ -1844,6 +1858,8 @@ async fn handle_network_event(
18441858
p2p_cmd_tx: &tokio::sync::mpsc::Sender<platform_p2p_consensus::P2PCommand>,
18451859
chain_state: &Arc<RwLock<platform_core::ChainState>>,
18461860
challenge_last_sync: &Arc<RwLock<std::collections::HashMap<platform_core::ChallengeId, u64>>>,
1861+
shared_llm_validators_json: &Arc<parking_lot::RwLock<Vec<u8>>>,
1862+
shared_registered_hotkeys_json: &Arc<parking_lot::RwLock<Vec<u8>>>,
18471863
) {
18481864
match event {
18491865
NetworkEvent::Message { source, message } => match message {
@@ -1932,6 +1948,33 @@ async fn handle_network_event(
19321948
.validators
19331949
.insert(hb.validator.clone(), validator_info);
19341950

1951+
// Track LLM-capable validators
1952+
if hb.capabilities.has_llm {
1953+
state.llm_capable_validators.insert(hb.validator.clone());
1954+
} else {
1955+
state.llm_capable_validators.remove(&hb.validator);
1956+
}
1957+
1958+
// Update shared WASM-accessible chain state
1959+
if let Ok(json) = serde_json::to_vec(
1960+
&state
1961+
.llm_capable_validators
1962+
.iter()
1963+
.map(|h| h.to_ss58())
1964+
.collect::<Vec<_>>(),
1965+
) {
1966+
*shared_llm_validators_json.write() = json;
1967+
}
1968+
if let Ok(json) = serde_json::to_vec(
1969+
&state
1970+
.registered_hotkeys
1971+
.iter()
1972+
.map(|h| h.to_ss58())
1973+
.collect::<Vec<_>>(),
1974+
) {
1975+
*shared_registered_hotkeys_json.write() = json;
1976+
}
1977+
19351978
// Check core state hash divergence
19361979
let our_core_hash = state.state_hash;
19371980
drop(state);
@@ -3626,16 +3669,54 @@ async fn handle_block_event(
36263669
}
36273670
}
36283671

3629-
// Submit weights (or burn if none)
3630-
let weights_to_submit = if mechanism_weights.is_empty() {
3672+
// Merge weights by mechanism_id before submitting.
3673+
// Multiple challenges may produce weights for the same mechanism;
3674+
// we must merge them into a single submission to avoid nonce conflicts.
3675+
let weights_to_submit: Vec<(u8, Vec<u16>, Vec<u16>)> = if mechanism_weights
3676+
.is_empty()
3677+
{
36313678
let mechanism_id = {
36323679
let cs = chain_state.read();
36333680
cs.mechanism_configs.keys().next().copied().unwrap_or(0u8)
36343681
};
36353682
info!("No weights - submitting burn weights to UID 0");
36363683
vec![(mechanism_id, vec![0u16], vec![65535u16])]
36373684
} else {
3638-
mechanism_weights
3685+
use std::collections::HashMap;
3686+
let mut merged: HashMap<u8, HashMap<u16, u64>> = HashMap::new();
3687+
for (mid, uids, vals) in &mechanism_weights {
3688+
let entry = merged.entry(*mid).or_default();
3689+
for (uid, val) in uids.iter().zip(vals.iter()) {
3690+
*entry.entry(*uid).or_default() += *val as u64;
3691+
}
3692+
}
3693+
let mut result = Vec::new();
3694+
for (mid, uid_map) in merged {
3695+
let max_val = uid_map.values().copied().max().unwrap_or(1).max(1);
3696+
let mut uids = Vec::new();
3697+
let mut vals = Vec::new();
3698+
for (uid, val) in &uid_map {
3699+
let scaled = ((*val as f64 / max_val as f64) * 65535.0).round() as u16;
3700+
if scaled > 0 {
3701+
uids.push(*uid);
3702+
vals.push(scaled);
3703+
}
3704+
}
3705+
if !uids.is_empty() {
3706+
info!("Merged weights for mechanism {}: {} UIDs", mid, uids.len());
3707+
debug!(" Merged UIDs: {:?}, Weights: {:?}", uids, vals);
3708+
result.push((mid, uids, vals));
3709+
}
3710+
}
3711+
if result.is_empty() {
3712+
let mechanism_id = {
3713+
let cs = chain_state.read();
3714+
cs.mechanism_configs.keys().next().copied().unwrap_or(0u8)
3715+
};
3716+
vec![(mechanism_id, vec![0u16], vec![65535u16])]
3717+
} else {
3718+
result
3719+
}
36393720
};
36403721

36413722
for (mechanism_id, uids, weights) in weights_to_submit {

bins/validator-node/src/wasm_executor.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ pub struct WasmExecutorConfig {
2727
pub chutes_api_key: Option<String>,
2828
/// Optional distributed storage for loading WASM modules
2929
pub distributed_storage: Option<Arc<dyn platform_distributed_storage::DistributedStore>>,
30+
/// Shared chain state: LLM-capable validators (JSON bytes)
31+
pub llm_validators_json: Arc<parking_lot::RwLock<Vec<u8>>>,
32+
/// Shared chain state: registered hotkeys (JSON bytes)
33+
pub registered_hotkeys_json: Arc<parking_lot::RwLock<Vec<u8>>>,
3034
}
3135

3236
impl std::fmt::Debug for WasmExecutorConfig {
@@ -55,6 +59,8 @@ impl Default for WasmExecutorConfig {
5559
storage_backend: Arc::new(InMemoryStorageBackend::new()),
5660
chutes_api_key: None,
5761
distributed_storage: None,
62+
llm_validators_json: Arc::new(parking_lot::RwLock::new(Vec::new())),
63+
registered_hotkeys_json: Arc::new(parking_lot::RwLock::new(Vec::new())),
5864
}
5965
}
6066
}
@@ -780,6 +786,8 @@ impl WasmChallengeExecutor {
780786
Some(key) => LlmPolicy::with_api_key(key.clone()),
781787
None => LlmPolicy::default(),
782788
},
789+
llm_validators_json: self.config.llm_validators_json.read().clone(),
790+
registered_hotkeys_json: self.config.registered_hotkeys_json.read().clone(),
783791
..Default::default()
784792
};
785793

crates/challenge-sdk-wasm/src/host_functions.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,8 @@ extern "C" {
311311
fn consensus_get_submission_count() -> i32;
312312
fn consensus_get_block_height() -> i64;
313313
fn consensus_get_subnet_challenges(buf_ptr: i32, buf_len: i32) -> i32;
314+
fn consensus_get_llm_validators(buf_ptr: i32, buf_len: i32) -> i32;
315+
fn consensus_get_registered_hotkeys(buf_ptr: i32, buf_len: i32) -> i32;
314316
}
315317

316318
pub fn host_consensus_get_epoch() -> i64 {
@@ -372,3 +374,28 @@ pub fn host_consensus_get_subnet_challenges() -> Result<Vec<u8>, i32> {
372374
buf.truncate(status as usize);
373375
Ok(buf)
374376
}
377+
378+
/// Get the list of validators that have LLM capability (Chutes API key).
379+
/// Returns JSON-encoded list of hotkey strings.
380+
pub fn host_consensus_get_llm_validators() -> Result<Vec<u8>, i32> {
381+
let mut buf = vec![0u8; RESPONSE_BUF_MEDIUM];
382+
let status = unsafe { consensus_get_llm_validators(buf.as_mut_ptr() as i32, buf.len() as i32) };
383+
if status < 0 {
384+
return Err(status);
385+
}
386+
buf.truncate(status as usize);
387+
Ok(buf)
388+
}
389+
390+
/// Get the list of all registered hotkeys from the subnet metagraph.
391+
/// Returns JSON-encoded list of hotkey strings.
392+
pub fn host_consensus_get_registered_hotkeys() -> Result<Vec<u8>, i32> {
393+
let mut buf = vec![0u8; RESPONSE_BUF_MEDIUM];
394+
let status =
395+
unsafe { consensus_get_registered_hotkeys(buf.as_mut_ptr() as i32, buf.len() as i32) };
396+
if status < 0 {
397+
return Err(status);
398+
}
399+
buf.truncate(status as usize);
400+
Ok(buf)
401+
}

crates/core/src/state.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,10 @@ pub struct ChainState {
206206
/// Monotonic mutation sequence number for deterministic ordering
207207
#[serde(default)]
208208
pub mutation_sequence: u64,
209+
210+
/// Validators that have announced LLM inference capability (Chutes API key)
211+
#[serde(default, with = "hotkey_set_serde")]
212+
pub llm_capable_validators: std::collections::HashSet<Hotkey>,
209213
}
210214

211215
/// Route information for a challenge
@@ -242,6 +246,7 @@ impl Default for ChainState {
242246
paused: false,
243247
pause_reason: None,
244248
mutation_sequence: 0,
249+
llm_capable_validators: std::collections::HashSet::new(),
245250
}
246251
}
247252
}
@@ -268,6 +273,7 @@ impl ChainState {
268273
paused: false,
269274
pause_reason: None,
270275
mutation_sequence: 0,
276+
llm_capable_validators: std::collections::HashSet::new(),
271277
};
272278
state.update_hash();
273279
state

crates/core/src/state_versioning.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ impl ChainStateV1 {
162162
paused: false,
163163
pause_reason: None,
164164
mutation_sequence: 0,
165+
llm_capable_validators: HashSet::new(),
165166
}
166167
}
167168
}
@@ -216,6 +217,7 @@ impl ChainStateV2 {
216217
paused: false,
217218
pause_reason: None,
218219
mutation_sequence: 0,
220+
llm_capable_validators: HashSet::new(),
219221
}
220222
}
221223
}

crates/p2p-consensus/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ pub use messages::{
5454
StateChangeType, StateMutationProposalMessage, StateMutationType, StateMutationVoteMessage,
5555
StateRequest, StateResponse, StorageProposalMessage, StorageRootSyncMessage, StorageSyncEntry,
5656
StorageSyncRequestMessage, StorageSyncResponseMessage, StorageVoteMessage, SubmissionMessage,
57-
TaskProgressMessage, TaskResultMessage, ViewChangeMessage, ViewNumber, WeightVoteMessage,
57+
TaskProgressMessage, TaskResultMessage, ValidatorCapabilities, ViewChangeMessage, ViewNumber,
58+
WeightVoteMessage,
5859
};
5960
pub use network::{
6061
CombinedBehaviour, CombinedEvent, NetworkBehaviour, NetworkError, NetworkEvent, NetworkRunner,

crates/p2p-consensus/src/messages.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,17 @@ pub struct HeartbeatMessage {
432432
pub timestamp: i64,
433433
/// Signature
434434
pub signature: Vec<u8>,
435+
/// Validator capabilities (e.g. LLM inference available)
436+
#[serde(default)]
437+
pub capabilities: ValidatorCapabilities,
438+
}
439+
440+
/// Capabilities announced by a validator in heartbeats
441+
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
442+
pub struct ValidatorCapabilities {
443+
/// Whether this validator has a Chutes API key for LLM inference
444+
#[serde(default)]
445+
pub has_llm: bool,
435446
}
436447

437448
/// Peer announcement for discovery
@@ -894,6 +905,7 @@ mod tests {
894905
stake: 1_000_000_000_000,
895906
timestamp: 1234567890,
896907
signature: vec![0u8; 64],
908+
capabilities: Default::default(),
897909
});
898910

899911
let bytes = msg.to_bytes().expect("serialization should work");
@@ -926,6 +938,7 @@ mod tests {
926938
stake: 0,
927939
timestamp: 0,
928940
signature: vec![],
941+
capabilities: Default::default(),
929942
});
930943
assert_eq!(heartbeat.type_name(), "Heartbeat");
931944
}
@@ -971,6 +984,7 @@ mod tests {
971984
stake: 0,
972985
timestamp: 0,
973986
signature: vec![],
987+
capabilities: Default::default(),
974988
}),
975989
signer: Hotkey([1u8; 32]),
976990
signature: vec![],

0 commit comments

Comments
 (0)