Skip to content

Commit c36f728

Browse files
habermancopybara-github
authored andcommitted
Fixed a parser bug where closed enums are parsed incorrectly for non-repeated extensions.
The bug is triggered whenever we parse a value for a closed enum that is not a declared member of the enum. The proper behavior in this case is to put the value to the unknown fields, leaving the extension unset. The first part is being performed correctly -- we are indeed putting the value into the unknown fields. But we currently detect this condition rather late in the decoding pipeline, after the extension has already been created. This causes a stray `0` value to be left in the extension for this field (or, in the unlikely case that the message already contained a value for this field, the extension will contain whatever data was already present in the message). The observable effects of this bug are: 1. If an invalid value is parsed for a closed enum extension, `HasExtension(foo)` will return true and `GetExtension(foo)` will return a 0, instead of the correct behavior, which is that `HasExtension(foo)` should return false. 2. If the message is later serialized, the extension will be serialized twice, once from the extension list (which will serialize a 0 value) and once from the unknown fields (which will serialize the correct value). This should not be observable in most cases, since the bug only occurs for non-repeated fields, and parsers will overwrite the existing value if a non-repeated field is encountered multiple times on the wire. PiperOrigin-RevId: 799202921
1 parent d4fed8e commit c36f728

File tree

2 files changed

+30
-57
lines changed

2 files changed

+30
-57
lines changed

python/google/protobuf/internal/message_test.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,25 +1492,15 @@ def testClosedEnumExtension(self, message_module):
14921492
m.ParseFromString(b'\xa8\x01\x7f')
14931493
unknown = unknown_fields.UnknownFieldSet(m)
14941494

1495-
# All implementations put the data into unknown fields.
1495+
# The data is present in unknown fields.
14961496
self.assertEqual(unknown[0].field_number, 21)
14971497
self.assertEqual(unknown[0].wire_type, wire_format.WIRETYPE_VARINT)
14981498
self.assertEqual(unknown[0].data, 0x7f)
14991499

1500-
if api_implementation.Type() == 'upb':
1501-
# upb incorrectly leaves a '0' value in the actual extension field.
1502-
# TODO: fix upb to not do this.
1503-
self.assertTrue(
1504-
m.HasExtension(unittest_pb2.optional_nested_enum_extension)
1505-
)
1506-
self.assertEqual(
1507-
m.Extensions[unittest_pb2.optional_nested_enum_extension], 0
1508-
)
1509-
else:
1510-
# Pure python and cpp extensions correctly have no extension present.
1511-
self.assertFalse(
1512-
m.HasExtension(unittest_pb2.optional_nested_enum_extension)
1513-
)
1500+
# There is no extension present.
1501+
self.assertFalse(
1502+
m.HasExtension(unittest_pb2.optional_nested_enum_extension)
1503+
)
15141504

15151505
def testAssignBoolToInt(self, message_module):
15161506
with warnings.catch_warnings(record=True) as w:

upb/wire/decode.c

Lines changed: 25 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ enum {
7575
kUpb_DecodeOp_Scalar1Byte = 0,
7676
kUpb_DecodeOp_Scalar4Byte = 2,
7777
kUpb_DecodeOp_Scalar8Byte = 3,
78-
kUpb_DecodeOp_Enum = 1,
7978

8079
// Scalar/repeated ops.
8180
kUpb_DecodeOp_String = 4,
@@ -221,8 +220,8 @@ static void _upb_Decoder_MungeInt32(wireval* val) {
221220
}
222221
}
223222

224-
static void _upb_Decoder_Munge(int type, wireval* val) {
225-
switch (type) {
223+
static void _upb_Decoder_Munge(const upb_MiniTableField* field, wireval* val) {
224+
switch (field->UPB_PRIVATE(descriptortype)) {
226225
case kUpb_FieldType_Bool:
227226
val->bool_val = val->uint64_val != 0;
228227
break;
@@ -238,9 +237,10 @@ static void _upb_Decoder_Munge(int type, wireval* val) {
238237
}
239238
case kUpb_FieldType_Int32:
240239
case kUpb_FieldType_UInt32:
241-
case kUpb_FieldType_Enum:
242240
_upb_Decoder_MungeInt32(val);
243241
break;
242+
case kUpb_FieldType_Enum:
243+
UPB_UNREACHABLE();
244244
}
245245
}
246246

@@ -385,13 +385,9 @@ static char* upb_Decoder_EncodeVarint32(uint32_t val, char* ptr) {
385385
}
386386

387387
UPB_FORCEINLINE
388-
bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, upb_Message* msg,
389-
const upb_MiniTableEnum* e,
390-
const upb_MiniTableField* field, wireval* val) {
391-
const uint32_t v = val->uint32_val;
392-
393-
if (UPB_LIKELY(upb_MiniTableEnum_CheckValue(e, v))) return true;
394-
388+
void _upb_Decoder_AddEnumValueToUnknown(upb_Decoder* d, upb_Message* msg,
389+
const upb_MiniTableField* field,
390+
wireval* val) {
395391
// Unrecognized enum goes into unknown fields.
396392
// For packed fields the tag could be arbitrarily far in the past,
397393
// so we just re-encode the tag and value here.
@@ -403,27 +399,12 @@ bool _upb_Decoder_CheckEnum(upb_Decoder* d, const char* ptr, upb_Message* msg,
403399
char buf[2 * kUpb_Decoder_EncodeVarint32MaxSize];
404400
char* end = buf;
405401
end = upb_Decoder_EncodeVarint32(tag, end);
406-
end = upb_Decoder_EncodeVarint32(v, end);
402+
end = upb_Decoder_EncodeVarint32(val->uint64_val, end);
407403

408404
if (!UPB_PRIVATE(_upb_Message_AddUnknown)(unknown_msg, buf, end - buf,
409405
&d->arena, NULL)) {
410406
_upb_Decoder_ErrorJmp(d, kUpb_DecodeStatus_OutOfMemory);
411407
}
412-
return false;
413-
}
414-
415-
UPB_NOINLINE
416-
static const char* _upb_Decoder_DecodeEnumArray(
417-
upb_Decoder* d, const char* ptr, upb_Message* msg, upb_Array* arr,
418-
const upb_MiniTableSubInternal* subs, const upb_MiniTableField* field,
419-
wireval* val) {
420-
const upb_MiniTableEnum* e = _upb_MiniTableSubs_EnumByField(subs, field);
421-
if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, val)) return ptr;
422-
void* mem = UPB_PTR_AT(upb_Array_MutableDataPtr(arr),
423-
arr->UPB_PRIVATE(size) * 4, void);
424-
arr->UPB_PRIVATE(size)++;
425-
memcpy(mem, val, 4);
426-
return ptr;
427408
}
428409

429410
UPB_FORCEINLINE
@@ -476,7 +457,7 @@ const char* _upb_Decoder_DecodeVarintPacked(upb_Decoder* d, const char* ptr,
476457
while (!_upb_Decoder_IsDone(d, &ptr)) {
477458
wireval elem;
478459
ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val);
479-
_upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), &elem);
460+
_upb_Decoder_Munge(field, &elem);
480461
if (_upb_Decoder_Reserve(d, arr, 1)) {
481462
out = UPB_PTR_AT(upb_Array_MutableDataPtr(arr),
482463
arr->UPB_PRIVATE(size) << lg2, void);
@@ -501,8 +482,8 @@ static const char* _upb_Decoder_DecodeEnumPacked(
501482
while (!_upb_Decoder_IsDone(d, &ptr)) {
502483
wireval elem;
503484
ptr = _upb_Decoder_DecodeVarint(d, ptr, &elem.uint64_val);
504-
_upb_Decoder_MungeInt32(&elem);
505-
if (!_upb_Decoder_CheckEnum(d, ptr, msg, e, field, &elem)) {
485+
if (!upb_MiniTableEnum_CheckValue(e, elem.uint64_val)) {
486+
_upb_Decoder_AddEnumValueToUnknown(d, msg, field, &elem);
506487
continue;
507488
}
508489
if (_upb_Decoder_Reserve(d, arr, 1)) {
@@ -585,8 +566,6 @@ static const char* _upb_Decoder_DecodeToArray(
585566
case OP_VARPCK_LG2(3):
586567
return _upb_Decoder_DecodeVarintPacked(d, ptr, arr, val, field,
587568
op - OP_VARPCK_LG2(0));
588-
case kUpb_DecodeOp_Enum:
589-
return _upb_Decoder_DecodeEnumArray(d, ptr, msg, arr, subs, field, val);
590569
case kUpb_DecodeOp_PackedEnum:
591570
return _upb_Decoder_DecodeEnumPacked(d, ptr, msg, arr, subs, field, val);
592571
default:
@@ -710,13 +689,6 @@ static const char* _upb_Decoder_DecodeToSubMessage(
710689
void* mem = UPB_PTR_AT(msg, field->UPB_PRIVATE(offset), void);
711690
int type = field->UPB_PRIVATE(descriptortype);
712691

713-
if (UPB_UNLIKELY(op == kUpb_DecodeOp_Enum) &&
714-
!_upb_Decoder_CheckEnum(d, ptr, msg,
715-
_upb_MiniTableSubs_EnumByField(subs, field),
716-
field, val)) {
717-
return ptr;
718-
}
719-
720692
// Set presence if necessary.
721693
if (UPB_PRIVATE(_upb_MiniTableField_HasHasbit)(field)) {
722694
UPB_PRIVATE(_upb_Message_SetHasbit)(msg, field);
@@ -756,7 +728,6 @@ static const char* _upb_Decoder_DecodeToSubMessage(
756728
case kUpb_DecodeOp_Scalar8Byte:
757729
memcpy(mem, val, 8);
758730
break;
759-
case kUpb_DecodeOp_Enum:
760731
case kUpb_DecodeOp_Scalar4Byte:
761732
memcpy(mem, val, 4);
762733
break;
@@ -970,7 +941,7 @@ static int _upb_Decoder_GetVarintOp(const upb_MiniTableField* field) {
970941
[kUpb_FieldType_Message] = kUpb_DecodeOp_UnknownField,
971942
[kUpb_FieldType_Bytes] = kUpb_DecodeOp_UnknownField,
972943
[kUpb_FieldType_UInt32] = kUpb_DecodeOp_Scalar4Byte,
973-
[kUpb_FieldType_Enum] = kUpb_DecodeOp_Enum,
944+
[kUpb_FieldType_Enum] = kUpb_DecodeOp_Scalar4Byte,
974945
[kUpb_FieldType_SFixed32] = kUpb_DecodeOp_UnknownField,
975946
[kUpb_FieldType_SFixed64] = kUpb_DecodeOp_UnknownField,
976947
[kUpb_FieldType_SInt32] = kUpb_DecodeOp_Scalar4Byte,
@@ -1096,8 +1067,20 @@ const char* _upb_Decoder_DecodeWireValue(upb_Decoder* d, const char* ptr,
10961067
switch (wire_type) {
10971068
case kUpb_WireType_Varint:
10981069
ptr = _upb_Decoder_DecodeVarint(d, ptr, &val->uint64_val);
1070+
if (upb_MiniTableField_IsClosedEnum(field)) {
1071+
const upb_MiniTableEnum* e =
1072+
upb_MiniTableField_IsExtension(field)
1073+
? upb_MiniTableExtension_GetSubEnum(
1074+
(const upb_MiniTableExtension*)field)
1075+
: upb_MiniTable_GetSubEnumTable(mt, field);
1076+
if (!upb_MiniTableEnum_CheckValue(e, val->uint64_val)) {
1077+
*op = kUpb_DecodeOp_UnknownField;
1078+
return ptr;
1079+
}
1080+
} else {
1081+
_upb_Decoder_Munge(field, val);
1082+
}
10991083
*op = _upb_Decoder_GetVarintOp(field);
1100-
_upb_Decoder_Munge(field->UPB_PRIVATE(descriptortype), val);
11011084
return ptr;
11021085
case kUpb_WireType_32Bit:
11031086
*op = kUpb_DecodeOp_Scalar4Byte;

0 commit comments

Comments
 (0)