Skip to content

Commit af60267

Browse files
authored
Merge pull request #7093 from reyoung/feature/check_nan_executor
Feature/check nan executor
2 parents 93eaa8e + 5139e6c commit af60267

File tree

4 files changed

+32
-11
lines changed

4 files changed

+32
-11
lines changed

paddle/framework/executor.cc

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,17 @@ limitations under the License. */
1414

1515
#include "paddle/framework/executor.h"
1616

17-
#include <algorithm>
18-
#include <iostream>
19-
#include <memory>
2017
#include <set>
21-
#include <vector>
2218

19+
#include "gflags/gflags.h"
2320
#include "paddle/framework/feed_fetch_type.h"
2421
#include "paddle/framework/lod_rank_table.h"
25-
#include "paddle/framework/lod_tensor.h"
2622
#include "paddle/framework/lod_tensor_array.h"
2723
#include "paddle/framework/op_registry.h"
28-
#include "paddle/framework/scope.h"
24+
25+
DEFINE_bool(check_nan_inf, false,
26+
"Checking whether operator produce NAN/INF or not. It will be "
27+
"extremely slow so please use this flag wisely.");
2928

3029
namespace paddle {
3130
namespace framework {
@@ -58,6 +57,19 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) {
5857
}
5958
}
6059

60+
static void CheckTensorNANOrInf(const std::string& name,
61+
const framework::Tensor& tensor) {
62+
if (tensor.memory_size() == 0) {
63+
return;
64+
}
65+
if (tensor.type().hash_code() != typeid(float).hash_code() &&
66+
tensor.type().hash_code() != typeid(double).hash_code()) {
67+
return;
68+
}
69+
PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name);
70+
PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN", name);
71+
}
72+
6173
void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
6274
bool create_local_scope, bool create_vars) {
6375
// TODO(tonyyang-svail):
@@ -101,6 +113,15 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
101113
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
102114
VLOG(3) << op->DebugString();
103115
op->Run(*local_scope, place_);
116+
if (FLAGS_check_nan_inf) {
117+
for (auto& vname : op->OutputVars(true)) {
118+
auto* var = local_scope->FindVar(vname);
119+
if (var == nullptr) continue;
120+
if (var->IsType<framework::LoDTensor>()) {
121+
CheckTensorNANOrInf(vname, var->Get<framework::LoDTensor>());
122+
}
123+
}
124+
}
104125
}
105126
if (create_vars && create_local_scope) {
106127
scope->DeleteScope(local_scope);

paddle/framework/tensor_util.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,10 @@ inline void CopyToVector(const Tensor& src, std::vector<T>* dst) {
210210
}
211211

212212
// Returns true if a tensor contains NAN, i.e., Not A Number.
213-
extern bool HasNAN(const framework::Tensor& tensor);
213+
bool HasNAN(const framework::Tensor& tensor);
214214

215215
// Returns true if a tensor contains Inf, i.e., Infinity.
216-
extern bool HasInf(const framework::Tensor& tensor);
216+
bool HasInf(const framework::Tensor& tensor);
217217

218218
inline void SerializeToStream(std::ostream& os, const Tensor& tensor,
219219
const platform::DeviceContext& dev_ctx) {

paddle/framework/tensor_util_test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ TEST(CopyToVector, Tensor) {
231231
#endif
232232
}
233233

234-
TEST(IsNAN, CPU) {
234+
TEST(HasNAN, CPU) {
235235
using namespace paddle::framework;
236236
using namespace paddle::platform;
237237
Tensor src;
@@ -243,7 +243,7 @@ TEST(IsNAN, CPU) {
243243
ASSERT_TRUE(HasNAN(src));
244244
}
245245

246-
TEST(IsInf, CPU) {
246+
TEST(HasInf, CPU) {
247247
using namespace paddle::framework;
248248
using namespace paddle::platform;
249249
Tensor src;

python/paddle/v2/fluid/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def __read_gflags_from_env__():
3636
"""
3737
import sys
3838
import core
39-
read_env_flags = ['use_pinned_memory']
39+
read_env_flags = ['use_pinned_memory', 'check_nan_inf']
4040
if core.is_compile_gpu():
4141
read_env_flags.append('fraction_of_gpu_memory_to_use')
4242
core.init_gflags([sys.argv[0]] +

0 commit comments

Comments
 (0)