Skip to content

Commit d0fbbff

Browse files
committed
Improve error checking in MPI backend
1 parent cebe42c commit d0fbbff

File tree

4 files changed

+78
-12
lines changed

4 files changed

+78
-12
lines changed

nestkernel/exceptions.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "exceptions.h"
2424

2525
// C++ includes:
26+
#include <mpi.h>
2627
#include <sstream>
2728

2829
// Generated includes:
@@ -398,6 +399,31 @@ nest::MPIPortsFileUnknown::message() const
398399
<< " which specifies the folder with files containing the MPI ports";
399400
return msg.str();
400401
}
402+
403+
404+
std::string
405+
nest::MPIPortsFileMissing::message() const
406+
{
407+
std::ostringstream msg;
408+
msg << "The node with ID " << node_id_ << " has no file that contains the MPI address.";
409+
return msg.str();
410+
}
411+
412+
std::string
413+
nest::MPIErrorCode::message() const
414+
{
415+
416+
char errmsg[ 256 ];
417+
int len;
418+
419+
MPI_Error_string( error_code_, errmsg, &len );
420+
std::string error;
421+
error.assign( errmsg, len );
422+
423+
std::ostringstream msg;
424+
msg << "MPI Error: " << error;
425+
return msg.str();
426+
}
401427
#endif
402428

403429
std::string

nestkernel/exceptions.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,34 @@ class MPIPortsFileUnknown : public KernelException
11881188

11891189
std::string message() const;
11901190

1191+
private:
1192+
const size_t node_id_;
1193+
};
1194+
1195+
class MPIErrorCode : public KernelException
1196+
{
1197+
public:
1198+
explicit MPIErrorCode( const int error_code )
1199+
: error_code_( error_code )
1200+
{
1201+
}
1202+
1203+
std::string message() const;
1204+
1205+
private:
1206+
int error_code_;
1207+
};
1208+
1209+
class MPIPortsFileMissing : public KernelException
1210+
{
1211+
public:
1212+
explicit MPIPortsFileMissing( const size_t node_id )
1213+
: node_id_( node_id )
1214+
{
1215+
}
1216+
1217+
std::string message() const;
1218+
11911219
private:
11921220
const size_t node_id_;
11931221
};

nestkernel/recording_backend_mpi.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,13 @@ nest::RecordingBackendMPI::prepare()
174174
// 2) connect the thread to the MPI process it needs to be connected to
175175
for ( auto& it_comm : commMap_ )
176176
{
177-
MPI_Comm_connect( it_comm.first.data(),
178-
MPI_INFO_NULL,
179-
0,
180-
MPI_COMM_WORLD,
181-
std::get< 1 >( it_comm.second ) ); // should use the status for handle error
177+
int ret =
178+
MPI_Comm_connect( it_comm.first.data(), MPI_INFO_NULL, 0, MPI_COMM_WORLD, std::get< 1 >( it_comm.second ) );
179+
180+
if ( ret != MPI_SUCCESS )
181+
{
182+
throw MPIErrorCode( ret );
183+
}
182184
std::ostringstream msg;
183185
msg << "Connect to " << it_comm.first.data() << "\n";
184186
LOG( M_INFO, "MPI Record connect", msg.str() );
@@ -383,8 +385,12 @@ nest::RecordingBackendMPI::get_port( const size_t index_node, const std::string&
383385
}
384386

385387
basename << "/" << index_node << ".txt";
386-
std::cout << basename.rdbuf() << std::endl;
388+
std::cout << basename.str() << std::endl;
387389
std::ifstream file( basename.str() );
390+
if ( !file.good() )
391+
{
392+
throw MPIPortsFileMissing( index_node );
393+
}
388394
if ( file.is_open() )
389395
{
390396
getline( file, *port_name );

nestkernel/stimulation_backend_mpi.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,13 @@ nest::StimulationBackendMPI::prepare()
188188
// 2) connect the master thread to the MPI process it needs to be connected to
189189
for ( auto& it_comm : commMap_ )
190190
{
191-
MPI_Comm_connect( it_comm.first.data(),
192-
MPI_INFO_NULL,
193-
0,
194-
MPI_COMM_WORLD,
195-
std::get< 0 >( it_comm.second ) ); // should use the status for handle error
191+
int ret =
192+
MPI_Comm_connect( it_comm.first.data(), MPI_INFO_NULL, 0, MPI_COMM_WORLD, std::get< 0 >( it_comm.second ) );
193+
194+
if ( ret != MPI_SUCCESS )
195+
{
196+
throw MPIErrorCode( ret );
197+
}
196198
std::ostringstream msg;
197199
msg << "Connect to " << it_comm.first.data() << "\n";
198200
LOG( M_INFO, "MPI Input connect", msg.str() );
@@ -311,8 +313,12 @@ nest::StimulationBackendMPI::get_port( const size_t index_node, const std::strin
311313
}
312314
// add the id of the device to the path
313315
basename << "/" << index_node << ".txt";
314-
std::cout << basename.rdbuf() << std::endl;
316+
std::cout << basename.str() << std::endl;
315317
std::ifstream file( basename.str() );
318+
if ( !file.good() )
319+
{
320+
throw MPIPortsFileMissing( index_node );
321+
}
316322

317323
// read the file
318324
if ( file.is_open() )

0 commit comments

Comments
 (0)