Skip to content

Commit 6ea1640

Browse files
Add option to C++ JSON Parser/Serializer to allow customers to affirmatively disable legacy bug-compatibilty behaviors.
Right now this flag already exists inside the implementation but is not exposed to callers. Since this implementation has now been stable and used for an extended period with this behavior, we did not continue working on adhoc burning down these nonconformant behaviors piecemeal, so by offering the flag 'publicly' it will enable users who want a more conformant implementation to get it much faster. The default behavior is unchanged. PiperOrigin-RevId: 791836360
1 parent 4659cd7 commit 6ea1640

File tree

10 files changed

+73
-34
lines changed

10 files changed

+73
-34
lines changed

src/google/protobuf/json/BUILD.bazel

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ cc_library(
1818
strip_include_prefix = "/src",
1919
visibility = ["//visibility:public"],
2020
deps = [
21+
":lexer",
2122
":parser",
2223
":unparser",
24+
":writer",
2325
"//src/google/protobuf",
2426
"//src/google/protobuf:port",
2527
"//src/google/protobuf/io",

src/google/protobuf/json/internal/lexer.cc

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ absl::StatusOr<LocationWith<MaybeOwnedString>> JsonLexer::ParseUtf8() {
376376
// This is a non-standard extension accepted by the ESF parser that we will
377377
// need to accept for backwards-compat.
378378
bool is_single_quote = stream_.PeekChar() == '\'';
379-
if (!options_.allow_legacy_syntax && is_single_quote) {
379+
if (!options_.allow_legacy_nonconformant_behavior && is_single_quote) {
380380
return Invalid("expected '\"'");
381381
}
382382

@@ -423,14 +423,16 @@ absl::StatusOr<LocationWith<MaybeOwnedString>> JsonLexer::ParseUtf8() {
423423

424424
char c = stream_.PeekChar();
425425
RETURN_IF_ERROR(Advance(1));
426-
if (c == 'u' || (c == 'U' && options_.allow_legacy_syntax)) {
426+
if (c == 'u' ||
427+
(c == 'U' && options_.allow_legacy_nonconformant_behavior)) {
427428
// Ensure there is actual space to scribble the UTF-8 onto.
428429
on_heap.resize(on_heap.size() + 4);
429430
auto written = ParseUnicodeEscape(&on_heap[on_heap.size() - 4]);
430431
RETURN_IF_ERROR(written.status());
431432
on_heap.resize(on_heap.size() - 4 + *written);
432433
} else {
433-
char escape = ParseSimpleEscape(c, options_.allow_legacy_syntax);
434+
char escape = ParseSimpleEscape(
435+
c, options_.allow_legacy_nonconformant_behavior);
434436
if (escape == 0) {
435437
return Invalid(absl::StrFormat("invalid escape char: '%c'", c));
436438
}
@@ -444,7 +446,8 @@ absl::StatusOr<LocationWith<MaybeOwnedString>> JsonLexer::ParseUtf8() {
444446
// If people have newlines in their strings, that's their problem; it
445447
// is too difficult to support correctly in our location tracking, and
446448
// is out of spec, so users will get slightly wrong locations in errors.
447-
if ((uc < 0x20 || uc == 0xff) && !options_.allow_legacy_syntax) {
449+
if ((uc < 0x20 || uc == 0xff) &&
450+
!options_.allow_legacy_nonconformant_behavior) {
448451
return Invalid(absl::StrFormat(
449452
"invalid control character 0x%02x in string", uc));
450453
}

src/google/protobuf/json/internal/lexer.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#ifndef GOOGLE_PROTOBUF_JSON_INTERNAL_LEXER_H__
1010
#define GOOGLE_PROTOBUF_JSON_INTERNAL_LEXER_H__
1111

12+
#include <cstddef>
1213
#include <cstdint>
1314
#include <utility>
1415

@@ -48,7 +49,7 @@ struct ParseOptions {
4849
// What those extensions were is explicitly not documented, beyond what exists
4950
// in the unit tests; we intend to remove this setting eventually. See
5051
// b/234868512.
51-
bool allow_legacy_syntax = false;
52+
bool allow_legacy_nonconformant_behavior = false;
5253
};
5354

5455
// A position in JSON input, for error context.
@@ -271,7 +272,7 @@ absl::Status JsonLexer::VisitArray(F f) {
271272
has_comma = Peek(",");
272273
} while (!Peek("]"));
273274

274-
if (!options_.allow_legacy_syntax && has_comma) {
275+
if (!options_.allow_legacy_nonconformant_behavior && has_comma) {
275276
return Invalid("expected ']'");
276277
}
277278

@@ -302,7 +303,7 @@ absl::Status JsonLexer::VisitObject(F f) {
302303
absl::StatusOr<LocationWith<MaybeOwnedString>> key;
303304
if (stream_.PeekChar() == '"' || stream_.PeekChar() == '\'') {
304305
key = ParseUtf8();
305-
} else if (options_.allow_legacy_syntax) {
306+
} else if (options_.allow_legacy_nonconformant_behavior) {
306307
key = ParseBareWord();
307308
} else {
308309
return Invalid("expected '\"'");
@@ -315,7 +316,7 @@ absl::Status JsonLexer::VisitObject(F f) {
315316
} while (!Peek("}"));
316317
Pop();
317318

318-
if (!options_.allow_legacy_syntax && has_comma) {
319+
if (!options_.allow_legacy_nonconformant_behavior && has_comma) {
319320
return Invalid("expected '}'");
320321
}
321322

src/google/protobuf/json/internal/lexer_test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ void BadInner(absl::string_view json, ParseOptions opts = {}) {
231231
void DoLegacy(absl::string_view json, std::function<void(const Value&)> test) {
232232
Do(json, [&](io::ZeroCopyInputStream* stream) {
233233
ParseOptions options;
234-
options.allow_legacy_syntax = true;
234+
options.allow_legacy_nonconformant_behavior = true;
235235
auto value = Value::Parse(stream, options);
236236
ASSERT_OK(value);
237237
test(*value);
@@ -242,7 +242,7 @@ void DoLegacy(absl::string_view json, std::function<void(const Value&)> test) {
242242
// Like Bad, but ensures json fails to parse in both modes.
243243
void Bad(absl::string_view json) {
244244
ParseOptions options;
245-
options.allow_legacy_syntax = true;
245+
options.allow_legacy_nonconformant_behavior = true;
246246
BadInner(json, options);
247247
BadInner(json);
248248
}

src/google/protobuf/json/internal/parser.cc

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ absl::Status ParseSingular(JsonLexer& lex, Field<Traits> field,
482482
Traits::SetBool(field, msg, false);
483483
break;
484484
case JsonLexer::kStr: {
485-
if (!lex.options().allow_legacy_syntax) {
485+
if (!lex.options().allow_legacy_nonconformant_behavior) {
486486
goto bad;
487487
}
488488

@@ -672,7 +672,7 @@ absl::Status ParseArray(JsonLexer& lex, Field<Traits> field, Msg<Traits>& msg) {
672672
return ParseSingular<Traits>(lex, field, msg);
673673
}
674674

675-
if (lex.options().allow_legacy_syntax) {
675+
if (lex.options().allow_legacy_nonconformant_behavior) {
676676
RETURN_IF_ERROR(lex.Expect("null"));
677677
return EmitNull<Traits>(lex, field, msg);
678678
}
@@ -689,7 +689,7 @@ absl::Status ParseArray(JsonLexer& lex, Field<Traits> field, Msg<Traits>& msg) {
689689
// the custom parser handler.
690690
bool can_flatten =
691691
type != MessageType::kValue && type != MessageType::kList;
692-
if (can_flatten && lex.options().allow_legacy_syntax &&
692+
if (can_flatten && lex.options().allow_legacy_nonconformant_behavior &&
693693
lex.Peek(JsonLexer::kArr)) {
694694
// You read that right. In legacy mode, if we encounter an array within
695695
// an array, we just flatten it as part of the current array!
@@ -1023,7 +1023,7 @@ absl::Status ParseFieldMask(JsonLexer& lex, const Desc<Traits>& desc,
10231023
} else if (absl::ascii_isupper(c)) {
10241024
snake_path.push_back('_');
10251025
snake_path.push_back(absl::ascii_tolower(c));
1026-
} else if (lex.options().allow_legacy_syntax) {
1026+
} else if (lex.options().allow_legacy_nonconformant_behavior) {
10271027
snake_path.push_back(c);
10281028
} else {
10291029
return str->loc.Invalid("unexpected character in FieldMask");
@@ -1069,7 +1069,8 @@ absl::Status ParseAny(JsonLexer& lex, const Desc<Traits>& desc,
10691069
// limit.
10701070
JsonLexer any_lex(&in, lex.options(), &lex.path(), mark.loc);
10711071

1072-
if (!type_url.has_value() && !lex.options().allow_legacy_syntax) {
1072+
if (!type_url.has_value() &&
1073+
!lex.options().allow_legacy_nonconformant_behavior) {
10731074
return mark.loc.Invalid("missing @type in Any");
10741075
}
10751076

@@ -1085,7 +1086,7 @@ absl::Status ParseAny(JsonLexer& lex, const Desc<Traits>& desc,
10851086
});
10861087
} else {
10871088
// Empty {} is accepted in legacy mode.
1088-
ABSL_DCHECK(lex.options().allow_legacy_syntax);
1089+
ABSL_DCHECK(lex.options().allow_legacy_nonconformant_behavior);
10891090
RETURN_IF_ERROR(any_lex.VisitObject([&](auto&) {
10901091
return mark.loc.Invalid(
10911092
"in legacy mode, missing @type in Any is only allowed for an empty "
@@ -1253,9 +1254,9 @@ absl::Status ParseField(JsonLexer& lex, const Desc<Traits>& desc,
12531254
auto pop = lex.path().Push(name, Traits::FieldType(*field),
12541255
Traits::FieldTypeName(*field));
12551256

1256-
if (Traits::HasParsed(
1257-
*field, msg,
1258-
/*allow_repeated_non_oneof=*/lex.options().allow_legacy_syntax) &&
1257+
if (Traits::HasParsed(*field, msg,
1258+
/*allow_repeated_non_oneof=*/
1259+
lex.options().allow_legacy_nonconformant_behavior) &&
12591260
!lex.Peek(JsonLexer::kNull)) {
12601261
return lex.Invalid(absl::StrFormat(
12611262
"'%s' has already been set (either directly or as part of a oneof)",
@@ -1267,7 +1268,8 @@ absl::Status ParseField(JsonLexer& lex, const Desc<Traits>& desc,
12671268
}
12681269

12691270
if (Traits::IsRepeated(*field)) {
1270-
if (lex.options().allow_legacy_syntax && !lex.Peek(JsonLexer::kArr)) {
1271+
if (lex.options().allow_legacy_nonconformant_behavior &&
1272+
!lex.Peek(JsonLexer::kArr)) {
12711273
// The original ESF parser permits a single element in place of an array
12721274
// thereof.
12731275
return ParseSingular<Traits>(lex, *field, msg);
@@ -1297,7 +1299,8 @@ absl::Status ParseMessage(JsonLexer& lex, const Desc<Traits>& desc,
12971299
// It is not clear if this counts as out-of-spec, but we're treating it as
12981300
// such.
12991301
bool is_upcoming_object = lex.Peek(JsonLexer::kObj);
1300-
if (!(is_upcoming_object && lex.options().allow_legacy_syntax)) {
1302+
if (!(is_upcoming_object &&
1303+
lex.options().allow_legacy_nonconformant_behavior)) {
13011304
switch (type) {
13021305
case MessageType::kList:
13031306
return ParseListValue<Traits>(lex, desc, msg);

src/google/protobuf/json/internal/unparser.cc

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ absl::Status WriteSingular(JsonWriter& writer, Field<Traits> field,
129129
case FieldDescriptor::TYPE_FLOAT: {
130130
auto x = Traits::GetFloat(field, std::forward<Args>(args)...);
131131
RETURN_IF_ERROR(x.status());
132-
if (writer.options().allow_legacy_syntax && is_default &&
132+
if (writer.options().allow_legacy_nonconformant_behavior && is_default &&
133133
!std::isfinite(*x)) {
134134
*x = 0;
135135
}
@@ -139,7 +139,7 @@ absl::Status WriteSingular(JsonWriter& writer, Field<Traits> field,
139139
case FieldDescriptor::TYPE_DOUBLE: {
140140
auto x = Traits::GetDouble(field, std::forward<Args>(args)...);
141141
RETURN_IF_ERROR(x.status());
142-
if (writer.options().allow_legacy_syntax && is_default &&
142+
if (writer.options().allow_legacy_nonconformant_behavior && is_default &&
143143
!std::isfinite(*x)) {
144144
*x = 0;
145145
}
@@ -203,7 +203,7 @@ absl::Status WriteSingular(JsonWriter& writer, Field<Traits> field,
203203
auto x = Traits::GetString(field, writer.ScratchBuf(),
204204
std::forward<Args>(args)...);
205205
RETURN_IF_ERROR(x.status());
206-
if (writer.options().allow_legacy_syntax && is_default) {
206+
if (writer.options().allow_legacy_nonconformant_behavior && is_default) {
207207
// Although difficult to verify, it appears that the original ESF parser
208208
// fails to unescape the contents of a
209209
// google.protobuf.Field.default_value, which may potentially be
@@ -417,7 +417,7 @@ absl::Status WriteField(JsonWriter& writer, const Msg<Traits>& msg,
417417
// with an uppercase letter, and the Json name does not, we uppercase it.
418418
absl::string_view original_name = Traits::FieldName(field);
419419
absl::string_view json_name = Traits::FieldJsonName(field);
420-
if (writer.options().allow_legacy_syntax &&
420+
if (writer.options().allow_legacy_nonconformant_behavior &&
421421
absl::ascii_isupper(original_name[0]) &&
422422
!absl::ascii_isupper(json_name[0])) {
423423
writer.Write(MakeQuoted(absl::ascii_toupper(original_name[0]),
@@ -712,10 +712,11 @@ absl::Status WriteFieldMask(JsonWriter& writer, const Msg<Traits>& msg,
712712
} else if (absl::ascii_isdigit(c) || absl::ascii_islower(c) || c == '.') {
713713
writer.Write(c);
714714
} else if (c == '_' &&
715-
(!saw_under || writer.options().allow_legacy_syntax)) {
715+
(!saw_under ||
716+
writer.options().allow_legacy_nonconformant_behavior)) {
716717
saw_under = true;
717718
continue;
718-
} else if (!writer.options().allow_legacy_syntax) {
719+
} else if (!writer.options().allow_legacy_nonconformant_behavior) {
719720
return absl::InvalidArgumentError("unexpected character in FieldMask");
720721
} else {
721722
if (saw_under) {
@@ -744,7 +745,8 @@ absl::Status WriteAny(JsonWriter& writer, const Msg<Traits>& msg,
744745
return absl::OkStatus();
745746
} else if (!has_type_url) {
746747
return absl::InvalidArgumentError("broken Any: missing type URL");
747-
} else if (!has_value && !writer.options().allow_legacy_syntax) {
748+
} else if (!has_value &&
749+
!writer.options().allow_legacy_nonconformant_behavior) {
748750
return absl::InvalidArgumentError("broken Any: missing value");
749751
}
750752

src/google/protobuf/json/internal/writer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#ifndef GOOGLE_PROTOBUF_JSON_INTERNAL_WRITER_H__
99
#define GOOGLE_PROTOBUF_JSON_INTERNAL_WRITER_H__
1010

11+
#include <cstddef>
1112
#include <cstdint>
1213
#include <string>
1314
#include <tuple>
@@ -50,7 +51,7 @@ struct WriterOptions {
5051
// What those extensions were is explicitly not documented, beyond what exists
5152
// in the unit tests; we intend to remove this setting eventually. See
5253
// b/234868512.
53-
bool allow_legacy_syntax = false;
54+
bool allow_legacy_nonconformant_behavior = false;
5455
};
5556

5657
template <typename Tuple, typename F, size_t... i>

src/google/protobuf/json/json.cc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
#include "absl/status/status.h"
1313
#include "absl/strings/string_view.h"
1414
#include "google/protobuf/io/zero_copy_stream.h"
15+
#include "google/protobuf/io/zero_copy_stream_impl_lite.h"
16+
#include "google/protobuf/json/internal/lexer.h"
1517
#include "google/protobuf/json/internal/parser.h"
1618
#include "google/protobuf/json/internal/unparser.h"
19+
#include "google/protobuf/json/internal/writer.h"
20+
#include "google/protobuf/message.h"
1721
#include "google/protobuf/util/type_resolver.h"
18-
#include "google/protobuf/stubs/status_macros.h"
1922

2023
// Must be included last.
2124
#include "google/protobuf/port_def.inc"
@@ -38,7 +41,8 @@ absl::Status BinaryToJsonStream(google::protobuf::util::TypeResolver* resolver,
3841
opts.unquote_int64_if_possible = options.unquote_int64_if_possible;
3942

4043
// TODO: Drop this setting.
41-
opts.allow_legacy_syntax = true;
44+
opts.allow_legacy_nonconformant_behavior =
45+
options.allow_legacy_nonconformant_behavior;
4246

4347
return google::protobuf::json_internal::BinaryToJsonStream(
4448
resolver, type_url, binary_input, json_output, opts);
@@ -65,7 +69,8 @@ absl::Status JsonToBinaryStream(google::protobuf::util::TypeResolver* resolver,
6569
opts.case_insensitive_enum_parsing = options.case_insensitive_enum_parsing;
6670

6771
// TODO: Drop this setting.
68-
opts.allow_legacy_syntax = true;
72+
opts.allow_legacy_nonconformant_behavior =
73+
options.allow_legacy_nonconformant_behavior;
6974

7075
return google::protobuf::json_internal::JsonToBinaryStream(
7176
resolver, type_url, json_input, binary_output, opts);
@@ -100,7 +105,8 @@ absl::Status MessageToJsonStream(const Message& message,
100105
opts.unquote_int64_if_possible = options.unquote_int64_if_possible;
101106

102107
// TODO: Drop this setting.
103-
opts.allow_legacy_syntax = true;
108+
opts.allow_legacy_nonconformant_behavior =
109+
options.allow_legacy_nonconformant_behavior;
104110

105111
return google::protobuf::json_internal::MessageToJsonStream(message, json_output, opts);
106112
}
@@ -119,7 +125,8 @@ absl::Status JsonStreamToMessage(io::ZeroCopyInputStream* input,
119125
opts.case_insensitive_enum_parsing = options.case_insensitive_enum_parsing;
120126

121127
// TODO: Drop this setting.
122-
opts.allow_legacy_syntax = true;
128+
opts.allow_legacy_nonconformant_behavior =
129+
options.allow_legacy_nonconformant_behavior;
123130

124131
return google::protobuf::json_internal::JsonStreamToMessage(input, message, opts);
125132
}

src/google/protobuf/json/json.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ namespace google {
2424
namespace protobuf {
2525
namespace json {
2626
struct ParseOptions {
27+
// Whether some legacy non-spec behaviors are accepted for bug
28+
// compatibility reasons. Setting allow_legacy_nonconformant_behavior=false is
29+
// recommended for new code and is expected to eventually become the default.
30+
bool allow_legacy_nonconformant_behavior = true;
31+
2732
// Whether to ignore unknown JSON fields during parsing
2833
bool ignore_unknown_fields = false;
2934

@@ -36,6 +41,11 @@ struct ParseOptions {
3641
};
3742

3843
struct PrintOptions {
44+
// Whether some legacy non-spec behaviors are accepted for bug
45+
// compatibility reasons. Setting allow_legacy_nonconformant_behavior=false is
46+
// recommended for new code and is expected to eventually become the default.
47+
bool allow_legacy_nonconformant_behavior = true;
48+
3949
// Whether to add spaces, line breaks and indentation to make the JSON output
4050
// easy to read.
4151
bool add_whitespace = false;

src/google/protobuf/json/json_test.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,16 @@ TEST_P(JsonTest, TestAlwaysPrintFieldsWithNoPresence) {
247247
R"("repeatedStringPiece":[],"repeatedCord":[],"repeatedLazyMessage":[]})"));
248248
}
249249

250+
TEST_P(JsonTest, TestDisableLegacyNonconformantBehavior) {
251+
EXPECT_THAT(ToProto<TestMessage>("{\"repeated_bool_value\": true}"),
252+
StatusIs(absl::StatusCode::kOk));
253+
254+
ParseOptions options;
255+
options.allow_legacy_nonconformant_behavior = false;
256+
EXPECT_THAT(ToProto<TestMessage>("{\"repeated_bool_value\": true}", options),
257+
StatusIs(absl::StatusCode::kInvalidArgument));
258+
}
259+
250260
TEST_P(JsonTest, TestPreserveProtoFieldNames) {
251261
TestMessage m;
252262
m.mutable_message_value();

0 commit comments

Comments
 (0)