Skip to content

Commit b737f23

Browse files
authored
Merge pull request #3564 from heplesser/fix-update-timer
Fix data race in mechanism preventing excessive update times
2 parents c74189b + 33f83d5 commit b737f23

File tree

2 files changed

+23
-13
lines changed

2 files changed

+23
-13
lines changed

nestkernel/simulation_manager.cpp

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -831,8 +831,10 @@ nest::SimulationManager::update_()
831831
bool done_all = true;
832832
long old_to_step;
833833

834+
// These variables will be updated only by the master thread below
834835
double start_current_update = sw_simulate_.elapsed();
835836
bool update_time_limit_exceeded = false;
837+
// End of variables updated by master thread
836838

837839
std::vector< std::shared_ptr< WrappedThreadException > > exceptions_raised( kernel().vp_manager.get_num_threads() );
838840

@@ -1084,17 +1086,29 @@ nest::SimulationManager::update_()
10841086
print_progress_();
10851087
}
10861088

1087-
// We cannot throw exception inside master, would not get caught.
1089+
// Track time needed for single update cycle
10881090
const double end_current_update = sw_simulate_.elapsed();
10891091
const double update_time = end_current_update - start_current_update;
1090-
update_time_limit_exceeded = update_time > update_time_limit_;
1092+
start_current_update = end_current_update;
1093+
10911094
min_update_time_ = std::min( min_update_time_, update_time );
10921095
max_update_time_ = std::max( max_update_time_, update_time );
1093-
start_current_update = end_current_update;
1096+
1097+
// If the simulation slowed down excessively, we cannot throw an exception here
1098+
// in the master section, as it will not be caught by our mechanism for handling
1099+
// exceptions in parallel context. So we set a flag and process it immediately
1100+
// after the master section.
1101+
update_time_limit_exceeded = update_time > update_time_limit_;
10941102
}
10951103
// end of master section, all threads have to synchronize at this point
10961104
#pragma omp barrier
10971105

1106+
if ( update_time_limit_exceeded )
1107+
{
1108+
LOG( M_ERROR, "SimulationManager::update", "Update time limit exceeded." );
1109+
throw KernelException();
1110+
}
1111+
10981112
// if block to avoid omp barrier if SIONLIB is not used
10991113
#ifdef HAVE_SIONLIB
11001114
kernel().io_manager.post_step_hook();
@@ -1104,15 +1118,7 @@ nest::SimulationManager::update_()
11041118
kernel().get_omp_synchronization_simulation_stopwatch().stop();
11051119
#endif
11061120

1107-
const double end_current_update = sw_simulate_.elapsed();
1108-
if ( end_current_update - start_current_update > update_time_limit_ )
1109-
{
1110-
LOG( M_ERROR, "SimulationManager::update", "Update time limit exceeded." );
1111-
throw KernelException();
1112-
}
1113-
start_current_update = end_current_update;
1114-
1115-
} while ( to_do_ > 0 and not update_time_limit_exceeded and not exceptions_raised.at( tid ) );
1121+
} while ( to_do_ > 0 );
11161122

11171123
// End of the slice, we update the number of synaptic elements
11181124
for ( SparseNodeArray::const_iterator i = kernel().node_manager.get_local_nodes( tid ).begin();

testsuite/summarize_tests.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def parse_result_file(fname):
122122
results[ph_name] = {"Tests": 0, "Skipped": 0, "Failures": 0, "Errors": 0, "Time": 0, "Failed tests": [msg]}
123123
totals["Failed tests"].append(msg)
124124

125+
missing_phases = []
125126
cols = ["Tests", "Skipped", "Failures", "Errors", "Time"]
126127

127128
col_w = max(len(c) for c in cols) + 2
@@ -143,7 +144,10 @@ def parse_result_file(fname):
143144
print(tline)
144145
for pn, pr in results.items():
145146
print(f"{pn:<{first_col_w}s}", end="")
146-
if pr["Tests"] == 0 and pr["Failed tests"]:
147+
if pr is None:
148+
print(f"{'--- RESULTS MISSING FOR PHASE ---':^{len(cols) * col_w}}")
149+
missing_phases.append(pn)
150+
elif pr["Tests"] == 0 and pr["Failed tests"]:
147151
print(f"{'--- XML PARSING FAILURE ---':^{len(cols) * col_w}}")
148152
else:
149153
for c in cols:

0 commit comments

Comments
 (0)