Skip to content

Commit 8d1a027

Browse files
committed
Revert "Remove network starter that is no longer needed (#6400)"
This reverts commit b601d57.
1 parent ca78179 commit 8d1a027

13 files changed

Lines changed: 105 additions & 19 deletions

File tree

cumulus/client/relay-chain-minimal-node/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ async fn new_minimal_relay_chain<Block: BlockT, Network: NetworkBackend<RelayBlo
224224
.chain_get_header(None)
225225
.await?
226226
.ok_or_else(|| RelayChainError::RpcCallError("Unable to fetch best header".to_string()))?;
227-
let (network, sync_service) = build_collator_network::<Network>(
227+
let (network, network_starter, sync_service) = build_collator_network::<Network>(
228228
&config,
229229
net_config,
230230
task_manager.spawn_handle(),
@@ -262,6 +262,8 @@ async fn new_minimal_relay_chain<Block: BlockT, Network: NetworkBackend<RelayBlo
262262
let overseer_handle =
263263
collator_overseer::spawn_overseer(overseer_args, &task_manager, relay_chain_rpc_client)?;
264264

265+
network_starter.start_network();
266+
265267
Ok(NewMinimalNode { task_manager, overseer_handle })
266268
}
267269

cumulus/client/relay-chain-minimal-node/src/network.rs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use sc_network::{
2929

3030
use sc_network::{config::FullNetworkConfiguration, NetworkBackend, NotificationService};
3131
use sc_network_common::{role::Roles, sync::message::BlockAnnouncesHandshake};
32-
use sc_service::{error::Error, Configuration, SpawnTaskHandle};
32+
use sc_service::{error::Error, Configuration, NetworkStarter, SpawnTaskHandle};
3333

3434
use std::{iter, sync::Arc};
3535

@@ -41,7 +41,10 @@ pub(crate) fn build_collator_network<Network: NetworkBackend<Block, Hash>>(
4141
genesis_hash: Hash,
4242
best_header: Header,
4343
notification_metrics: NotificationMetrics,
44-
) -> Result<(Arc<dyn NetworkService>, Arc<dyn sp_consensus::SyncOracle + Send + Sync>), Error> {
44+
) -> Result<
45+
(Arc<dyn NetworkService>, NetworkStarter, Arc<dyn sp_consensus::SyncOracle + Send + Sync>),
46+
Error,
47+
> {
4548
let protocol_id = config.protocol_id();
4649
let (block_announce_config, _notification_service) = get_block_announce_proto_config::<Network>(
4750
protocol_id.clone(),
@@ -82,16 +85,31 @@ pub(crate) fn build_collator_network<Network: NetworkBackend<Block, Hash>>(
8285
let network_worker = Network::new(network_params)?;
8386
let network_service = network_worker.network_service();
8487

88+
let (network_start_tx, network_start_rx) = futures::channel::oneshot::channel();
89+
8590
// The network worker is responsible for gathering all network messages and processing
8691
// them. This is quite a heavy task, and at the time of the writing of this comment it
8792
// frequently happens that this future takes several seconds or in some situations
8893
// even more than a minute until it has processed its entire queue. This is clearly an
8994
// issue, and ideally we would like to fix the network future to take as little time as
9095
// possible, but we also take the extra harm-prevention measure to execute the networking
9196
// future using `spawn_blocking`.
92-
spawn_handle.spawn_blocking("network-worker", Some("networking"), network_worker.run());
97+
spawn_handle.spawn_blocking("network-worker", Some("networking"), async move {
98+
if network_start_rx.await.is_err() {
99+
tracing::warn!(
100+
"The NetworkStart returned as part of `build_network` has been silently dropped"
101+
);
102+
// This `return` might seem unnecessary, but we don't want to make it look like
103+
// everything is working as normal even though the user is clearly misusing the API.
104+
return
105+
}
106+
107+
network_worker.run().await;
108+
});
109+
110+
let network_starter = NetworkStarter::new(network_start_tx);
93111

94-
Ok((network_service, Arc::new(SyncOracle {})))
112+
Ok((network_service, network_starter, Arc::new(SyncOracle {})))
95113
}
96114

97115
fn adjust_network_config_light_in_peers(config: &mut NetworkConfiguration) {

cumulus/client/service/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ use sc_consensus::{
4040
use sc_network::{config::SyncMode, service::traits::NetworkService, NetworkBackend};
4141
use sc_network_sync::SyncingService;
4242
use sc_network_transactions::TransactionsHandlerController;
43-
use sc_service::{Configuration, SpawnTaskHandle, TaskManager, WarpSyncConfig};
43+
use sc_service::{Configuration, NetworkStarter, SpawnTaskHandle, TaskManager, WarpSyncConfig};
4444
use sc_telemetry::{log, TelemetryWorkerHandle};
4545
use sc_utils::mpsc::TracingUnboundedSender;
4646
use sp_api::ProvideRuntimeApi;
@@ -439,6 +439,7 @@ pub async fn build_network<'a, Block, Client, RCInterface, IQ, Network>(
439439
Arc<dyn NetworkService>,
440440
TracingUnboundedSender<sc_rpc::system::Request<Block>>,
441441
TransactionsHandlerController<Block::Hash>,
442+
NetworkStarter,
442443
Arc<SyncingService<Block>>,
443444
)>
444445
where

cumulus/polkadot-omni-node/lib/src/common/spec.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ pub(crate) trait NodeSpec: BaseNodeSpec {
289289
prometheus_registry.clone(),
290290
);
291291

292-
let (network, system_rpc_tx, tx_handler_controller, sync_service) =
292+
let (network, system_rpc_tx, tx_handler_controller, start_network, sync_service) =
293293
build_network(BuildNetworkParams {
294294
parachain_config: &parachain_config,
295295
net_config,
@@ -397,6 +397,8 @@ pub(crate) trait NodeSpec: BaseNodeSpec {
397397
)?;
398398
}
399399

400+
start_network.start_network();
401+
400402
Ok(task_manager)
401403
};
402404

cumulus/polkadot-omni-node/lib/src/nodes/manual_seal.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ impl<NodeSpec: NodeSpecT> ManualSealNode<NodeSpec> {
103103
config.prometheus_config.as_ref().map(|cfg| &cfg.registry),
104104
);
105105

106-
let (network, system_rpc_tx, tx_handler_controller, sync_service) =
106+
let (network, system_rpc_tx, tx_handler_controller, start_network, sync_service) =
107107
sc_service::build_network(sc_service::BuildNetworkParams {
108108
config: &config,
109109
client: client.clone(),
@@ -247,6 +247,7 @@ impl<NodeSpec: NodeSpecT> ManualSealNode<NodeSpec> {
247247
telemetry: telemetry.as_mut(),
248248
})?;
249249

250+
start_network.start_network();
250251
Ok(task_manager)
251252
}
252253
}

cumulus/test/service/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ where
374374
prometheus_registry.clone(),
375375
);
376376

377-
let (network, system_rpc_tx, tx_handler_controller, sync_service) =
377+
let (network, system_rpc_tx, tx_handler_controller, start_network, sync_service) =
378378
build_network(BuildNetworkParams {
379379
parachain_config: &parachain_config,
380380
net_config,
@@ -540,6 +540,8 @@ where
540540
}
541541
}
542542

543+
start_network.start_network();
544+
543545
Ok((task_manager, client, network, rpc_handlers, transaction_pool, backend))
544546
}
545547

polkadot/node/service/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1003,7 +1003,7 @@ pub fn new_full<
10031003
})
10041004
};
10051005

1006-
let (network, system_rpc_tx, tx_handler_controller, sync_service) =
1006+
let (network, system_rpc_tx, tx_handler_controller, network_starter, sync_service) =
10071007
sc_service::build_network(sc_service::BuildNetworkParams {
10081008
config: &config,
10091009
net_config,
@@ -1383,6 +1383,8 @@ pub fn new_full<
13831383
);
13841384
}
13851385

1386+
network_starter.start_network();
1387+
13861388
Ok(NewFull {
13871389
task_manager,
13881390
client,

substrate/bin/node/cli/src/service.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,7 @@ pub fn new_full_base<N: NetworkBackend<Block, <Block as BlockT>::Hash>>(
513513
Vec::default(),
514514
));
515515

516-
let (network, system_rpc_tx, tx_handler_controller, sync_service) =
516+
let (network, system_rpc_tx, tx_handler_controller, network_starter, sync_service) =
517517
sc_service::build_network(sc_service::BuildNetworkParams {
518518
config: &config,
519519
net_config,
@@ -801,6 +801,7 @@ pub fn new_full_base<N: NetworkBackend<Block, <Block as BlockT>::Hash>>(
801801
);
802802
}
803803

804+
network_starter.start_network();
804805
Ok(NewFullBase {
805806
task_manager,
806807
client,

substrate/client/service/src/builder.rs

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::{
2525
start_rpc_servers, BuildGenesisBlock, GenesisBlockBuilder, RpcHandlers, SpawnTaskHandle,
2626
TaskManager, TransactionPoolAdapter,
2727
};
28-
use futures::{select, FutureExt, StreamExt};
28+
use futures::{channel::oneshot, select, FutureExt, StreamExt};
2929
use jsonrpsee::RpcModule;
3030
use log::info;
3131
use prometheus_endpoint::Registry;
@@ -845,6 +845,7 @@ pub fn build_network<Block, Net, TxPool, IQ, Client>(
845845
Arc<dyn sc_network::service::traits::NetworkService>,
846846
TracingUnboundedSender<sc_rpc::system::Request<Block>>,
847847
sc_network_transactions::TransactionsHandlerController<<Block as BlockT>::Hash>,
848+
NetworkStarter,
848849
Arc<SyncingService<Block>>,
849850
),
850851
Error,
@@ -1006,6 +1007,7 @@ pub fn build_network_advanced<Block, Net, TxPool, IQ, Client>(
10061007
Arc<dyn sc_network::service::traits::NetworkService>,
10071008
TracingUnboundedSender<sc_rpc::system::Request<Block>>,
10081009
sc_network_transactions::TransactionsHandlerController<<Block as BlockT>::Hash>,
1010+
NetworkStarter,
10091011
Arc<SyncingService<Block>>,
10101012
),
10111013
Error,
@@ -1146,16 +1148,49 @@ where
11461148
announce_block,
11471149
);
11481150

1151+
// TODO: Normally, one is supposed to pass a list of notifications protocols supported by the
1152+
// node through the `NetworkConfiguration` struct. But because this function doesn't know in
1153+
// advance which components, such as GrandPa or Polkadot, will be plugged on top of the
1154+
// service, it is unfortunately not possible to do so without some deep refactoring. To
1155+
// bypass this problem, the `NetworkService` provides a `register_notifications_protocol`
1156+
// method that can be called even after the network has been initialized. However, we want to
1157+
// avoid the situation where `register_notifications_protocol` is called *after* the network
1158+
// actually connects to other peers. For this reason, we delay the process of the network
1159+
// future until the user calls `NetworkStarter::start_network`.
1160+
//
1161+
// This entire hack should eventually be removed in favour of passing the list of protocols
1162+
// through the configuration.
1163+
//
1164+
// See also https://github.com/paritytech/substrate/issues/6827
1165+
let (network_start_tx, network_start_rx) = oneshot::channel();
1166+
11491167
// The network worker is responsible for gathering all network messages and processing
11501168
// them. This is quite a heavy task, and at the time of the writing of this comment it
11511169
// frequently happens that this future takes several seconds or in some situations
11521170
// even more than a minute until it has processed its entire queue. This is clearly an
11531171
// issue, and ideally we would like to fix the network future to take as little time as
11541172
// possible, but we also take the extra harm-prevention measure to execute the networking
11551173
// future using `spawn_blocking`.
1156-
spawn_handle.spawn_blocking("network-worker", Some("networking"), future);
1174+
spawn_handle.spawn_blocking("network-worker", Some("networking"), async move {
1175+
if network_start_rx.await.is_err() {
1176+
log::warn!(
1177+
"The NetworkStart returned as part of `build_network` has been silently dropped"
1178+
);
1179+
// This `return` might seem unnecessary, but we don't want to make it look like
1180+
// everything is working as normal even though the user is clearly misusing the API.
1181+
return
1182+
}
1183+
1184+
future.await
1185+
});
11571186

1158-
Ok((network, system_rpc_tx, tx_handler_controller, sync_service.clone()))
1187+
Ok((
1188+
network,
1189+
system_rpc_tx,
1190+
tx_handler_controller,
1191+
NetworkStarter(network_start_tx),
1192+
sync_service.clone(),
1193+
))
11591194
}
11601195

11611196
/// Configuration for [`build_default_syncing_engine`].
@@ -1384,3 +1419,21 @@ where
13841419
warp_sync_protocol_name,
13851420
)?))
13861421
}
1422+
1423+
/// Object used to start the network.
1424+
#[must_use]
1425+
pub struct NetworkStarter(oneshot::Sender<()>);
1426+
1427+
impl NetworkStarter {
1428+
/// Create a new NetworkStarter
1429+
pub fn new(sender: oneshot::Sender<()>) -> Self {
1430+
NetworkStarter(sender)
1431+
}
1432+
1433+
/// Start the network. Call this after all sub-components have been initialized.
1434+
///
1435+
/// > **Note**: If you don't call this function, the networking will not work.
1436+
pub fn start_network(self) {
1437+
let _ = self.0.send(());
1438+
}
1439+
}

substrate/client/service/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ pub use self::{
6161
new_client, new_db_backend, new_full_client, new_full_parts, new_full_parts_record_import,
6262
new_full_parts_with_genesis_builder, new_wasm_executor,
6363
propagate_transaction_notifications, spawn_tasks, BuildNetworkAdvancedParams,
64-
BuildNetworkParams, DefaultSyncingEngineConfig, KeystoreContainer, SpawnTasksParams,
65-
TFullBackend, TFullCallExecutor, TFullClient,
64+
BuildNetworkParams, DefaultSyncingEngineConfig, KeystoreContainer, NetworkStarter,
65+
SpawnTasksParams, TFullBackend, TFullCallExecutor, TFullClient,
6666
},
6767
client::{ClientConfig, LocalCallExecutor},
6868
error::Error,

0 commit comments

Comments
 (0)