Skip to content

Commit 21d48d1

Browse files
CopilotMihaZupan
andauthored
Fix Base64.DecodeFromUtf8 consuming whitespace in partial final quantum when isFinalBlock=false (#123313)
Co-authored-by: copilot-swe-agent[bot] <[email protected]> Co-authored-by: MihaZupan <[email protected]> Co-authored-by: Miha Zupan <[email protected]>
1 parent 257a9d2 commit 21d48d1

File tree

2 files changed

+196
-16
lines changed

2 files changed

+196
-16
lines changed

src/libraries/System.Memory/tests/Base64/Base64DecoderUnitTests.cs

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,89 @@ public void DecodingWithEmbeddedWhiteSpaceIntoSmallDestination_ActualDestination
10041004
Assert.Equal(new byte[] { 1, 2, 3, 4 }, destination4);
10051005
}
10061006

1007+
[Theory]
1008+
[InlineData("AQ\r\nQ=")]
1009+
[InlineData("AQ\r\nQ=\r\n")]
1010+
[InlineData("AQ Q=")]
1011+
[InlineData("AQ\tQ=")]
1012+
public void DecodingWithWhiteSpaceSplitFinalQuantumAndIsFinalBlockFalse(string base64String)
1013+
{
1014+
// When a final quantum (containing padding) is split by whitespace and isFinalBlock=false,
1015+
// the decoder should not consume any bytes, allowing the caller to retry with isFinalBlock=true
1016+
ReadOnlySpan<byte> base64Data = Encoding.ASCII.GetBytes(base64String);
1017+
var output = new byte[10];
1018+
1019+
// First call with isFinalBlock=false should consume 0 bytes
1020+
OperationStatus status = Base64.DecodeFromUtf8(base64Data, output, out int bytesConsumed, out int bytesWritten, isFinalBlock: false);
1021+
Assert.Equal(0, bytesConsumed);
1022+
Assert.Equal(0, bytesWritten);
1023+
Assert.Equal(OperationStatus.InvalidData, status);
1024+
1025+
// Second call with isFinalBlock=true should succeed
1026+
status = Base64.DecodeFromUtf8(base64Data, output, out bytesConsumed, out bytesWritten, isFinalBlock: true);
1027+
Assert.Equal(OperationStatus.Done, status);
1028+
Assert.Equal(base64Data.Length, bytesConsumed);
1029+
Assert.Equal(2, bytesWritten); // "AQQ=" decodes to 2 bytes: {1, 4}
1030+
Assert.Equal(new byte[] { 1, 4 }, output[..2]);
1031+
}
1032+
1033+
[Fact]
1034+
public void DecodingCompleteQuantumWithIsFinalBlockFalse()
1035+
{
1036+
// Complete quantum without padding should be decoded even when isFinalBlock=false
1037+
ReadOnlySpan<byte> base64Data = "AAAA"u8;
1038+
var output = new byte[10];
1039+
1040+
OperationStatus status = Base64.DecodeFromUtf8(base64Data, output, out int bytesConsumed, out int bytesWritten, isFinalBlock: false);
1041+
Assert.Equal(OperationStatus.Done, status);
1042+
Assert.Equal(4, bytesConsumed);
1043+
Assert.Equal(3, bytesWritten);
1044+
}
1045+
1046+
[Fact]
1047+
public void DecodingPaddedQuantumWithIsFinalBlockFalse()
1048+
{
1049+
// Quantum with padding should not be decoded when isFinalBlock=false
1050+
ReadOnlySpan<byte> base64Data = "AAA="u8;
1051+
var output = new byte[10];
1052+
1053+
OperationStatus status = Base64.DecodeFromUtf8(base64Data, output, out int bytesConsumed, out int bytesWritten, isFinalBlock: false);
1054+
Assert.Equal(OperationStatus.InvalidData, status);
1055+
Assert.Equal(0, bytesConsumed);
1056+
Assert.Equal(0, bytesWritten);
1057+
}
1058+
1059+
[Theory]
1060+
[InlineData("AQIDBAUG AQ\r\nQ=", 9, 6, "AQ\r\nQ=")] // Two complete blocks, then whitespace-split final quantum
1061+
[InlineData("AQID BAUG AQ\r\nQ=", 10, 6, "AQ\r\nQ=")] // Two blocks with space, then whitespace-split final quantum
1062+
[InlineData("AQIDBAUG\r\nAQID AQ\r\nQ=", 15, 9, "AQ\r\nQ=")] // Multiple blocks with various whitespace patterns
1063+
public void DecodingWithValidDataBeforeWhiteSpaceSplitFinalQuantum(string base64String, int expectedBytesConsumedFirstCall, int expectedBytesWrittenFirstCall, string expectedRemainingAfterFirstCall)
1064+
{
1065+
// When there's valid data before a whitespace-split final quantum and isFinalBlock=false,
1066+
// verify the streaming scenario works correctly
1067+
ReadOnlySpan<byte> base64Data = Encoding.ASCII.GetBytes(base64String);
1068+
var output = new byte[100];
1069+
1070+
// First call with isFinalBlock=false should decode the valid complete blocks and stop before the incomplete final quantum
1071+
OperationStatus status = Base64.DecodeFromUtf8(base64Data, output, out int bytesConsumed, out int bytesWritten, isFinalBlock: false);
1072+
1073+
Assert.Equal(OperationStatus.InvalidData, status);
1074+
Assert.Equal(expectedBytesConsumedFirstCall, bytesConsumed);
1075+
Assert.Equal(expectedBytesWrittenFirstCall, bytesWritten);
1076+
1077+
// Verify that only the final block remains
1078+
ReadOnlySpan<byte> remaining = base64Data.Slice(bytesConsumed);
1079+
string remainingString = Encoding.ASCII.GetString(remaining);
1080+
Assert.Equal(expectedRemainingAfterFirstCall, remainingString);
1081+
1082+
// Verify we can complete decoding by retrying with the FULL input and isFinalBlock=true
1083+
Array.Clear(output, 0, output.Length);
1084+
status = Base64.DecodeFromUtf8(base64Data, output, out bytesConsumed, out bytesWritten, isFinalBlock: true);
1085+
Assert.Equal(OperationStatus.Done, status);
1086+
Assert.Equal(base64Data.Length, bytesConsumed);
1087+
Assert.True(bytesWritten > 0, "Should have decoded data");
1088+
}
1089+
10071090
[Fact]
10081091
public void DecodingWithEmbeddedWhiteSpaceIntoSmallDestination_TrailingWhiteSpacesAreConsumed()
10091092
{
@@ -1020,5 +1103,88 @@ public void DecodingWithEmbeddedWhiteSpaceIntoSmallDestination_TrailingWhiteSpac
10201103
Assert.Equal(destination.Length, written);
10211104
Assert.Equal(new byte[] { 240, 159, 141, 137, 240, 159 }, destination);
10221105
}
1106+
1107+
[Theory]
1108+
[InlineData("AQ\r\nQ=")]
1109+
[InlineData("AQ\r\nQ=\r\n")]
1110+
[InlineData("AQ Q=")]
1111+
[InlineData("AQ\tQ=")]
1112+
public void DecodingFromCharsWithWhiteSpaceSplitFinalQuantumAndIsFinalBlockFalse(string base64String)
1113+
{
1114+
// When a final quantum (containing padding) is split by whitespace and isFinalBlock=false,
1115+
// the decoder should not consume any bytes, allowing the caller to retry with isFinalBlock=true
1116+
ReadOnlySpan<char> base64Data = base64String.AsSpan();
1117+
var output = new byte[10];
1118+
1119+
// First call with isFinalBlock=false should consume 0 bytes
1120+
OperationStatus status = Base64.DecodeFromChars(base64Data, output, out int bytesConsumed, out int bytesWritten, isFinalBlock: false);
1121+
Assert.Equal(0, bytesConsumed);
1122+
Assert.Equal(0, bytesWritten);
1123+
Assert.Equal(OperationStatus.InvalidData, status);
1124+
1125+
// Second call with isFinalBlock=true should succeed
1126+
status = Base64.DecodeFromChars(base64Data, output, out bytesConsumed, out bytesWritten, isFinalBlock: true);
1127+
Assert.Equal(OperationStatus.Done, status);
1128+
Assert.Equal(base64Data.Length, bytesConsumed);
1129+
Assert.Equal(2, bytesWritten); // "AQQ=" decodes to 2 bytes: {1, 4}
1130+
Assert.Equal(new byte[] { 1, 4 }, output[..2]);
1131+
}
1132+
1133+
[Fact]
1134+
public void DecodingFromCharsCompleteQuantumWithIsFinalBlockFalse()
1135+
{
1136+
// Complete quantum without padding should be decoded even when isFinalBlock=false
1137+
ReadOnlySpan<char> base64Data = "AAAA".AsSpan();
1138+
var output = new byte[10];
1139+
1140+
OperationStatus status = Base64.DecodeFromChars(base64Data, output, out int bytesConsumed, out int bytesWritten, isFinalBlock: false);
1141+
Assert.Equal(OperationStatus.Done, status);
1142+
Assert.Equal(4, bytesConsumed);
1143+
Assert.Equal(3, bytesWritten);
1144+
}
1145+
1146+
[Fact]
1147+
public void DecodingFromCharsPaddedQuantumWithIsFinalBlockFalse()
1148+
{
1149+
// Quantum with padding should not be decoded when isFinalBlock=false
1150+
ReadOnlySpan<char> base64Data = "AAA=".AsSpan();
1151+
var output = new byte[10];
1152+
1153+
OperationStatus status = Base64.DecodeFromChars(base64Data, output, out int bytesConsumed, out int bytesWritten, isFinalBlock: false);
1154+
Assert.Equal(OperationStatus.InvalidData, status);
1155+
Assert.Equal(0, bytesConsumed);
1156+
Assert.Equal(0, bytesWritten);
1157+
}
1158+
1159+
[Theory]
1160+
[InlineData("AQIDBAUG AQ\r\nQ=", 9, 6, "AQ\r\nQ=")] // Two complete blocks, then whitespace-split final quantum
1161+
[InlineData("AQID BAUG AQ\r\nQ=", 10, 6, "AQ\r\nQ=")] // Two blocks with space, then whitespace-split final quantum
1162+
[InlineData("AQIDBAUG\r\nAQID AQ\r\nQ=", 15, 9, "AQ\r\nQ=")] // Multiple blocks with various whitespace patterns
1163+
public void DecodingFromCharsWithValidDataBeforeWhiteSpaceSplitFinalQuantum(string base64String, int expectedBytesConsumedFirstCall, int expectedBytesWrittenFirstCall, string expectedRemainingAfterFirstCall)
1164+
{
1165+
// When there's valid data before a whitespace-split final quantum and isFinalBlock=false,
1166+
// verify the streaming scenario works correctly
1167+
ReadOnlySpan<char> base64Data = base64String.AsSpan();
1168+
var output = new byte[100];
1169+
1170+
// First call with isFinalBlock=false should decode the valid complete blocks and stop before the incomplete final quantum
1171+
OperationStatus status = Base64.DecodeFromChars(base64Data, output, out int bytesConsumed, out int bytesWritten, isFinalBlock: false);
1172+
1173+
Assert.Equal(OperationStatus.InvalidData, status);
1174+
Assert.Equal(expectedBytesConsumedFirstCall, bytesConsumed);
1175+
Assert.Equal(expectedBytesWrittenFirstCall, bytesWritten);
1176+
1177+
// Verify that only the final block remains
1178+
ReadOnlySpan<char> remaining = base64Data.Slice(bytesConsumed);
1179+
string remainingString = new string(remaining);
1180+
Assert.Equal(expectedRemainingAfterFirstCall, remainingString);
1181+
1182+
// Verify we can complete decoding by retrying with the FULL input and isFinalBlock=true
1183+
Array.Clear(output, 0, output.Length);
1184+
status = Base64.DecodeFromChars(base64Data, output, out bytesConsumed, out bytesWritten, isFinalBlock: true);
1185+
Assert.Equal(OperationStatus.Done, status);
1186+
Assert.Equal(base64Data.Length, bytesConsumed);
1187+
Assert.True(bytesWritten > 0, "Should have decoded data");
1188+
}
10231189
}
10241190
}

src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64DecoderHelper.cs

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,14 @@ internal static OperationStatus DecodeWithWhiteSpaceBlockwise<TBase64Decoder>(TB
467467

468468
while (!source.IsEmpty)
469469
{
470+
// Skip over any leading whitespace
471+
if (IsWhiteSpace(source[0]))
472+
{
473+
source = source.Slice(1);
474+
bytesConsumed++;
475+
continue;
476+
}
477+
470478
int encodedIdx = 0;
471479
int bufferIdx = 0;
472480
int skipped = 0;
@@ -485,12 +493,7 @@ internal static OperationStatus DecodeWithWhiteSpaceBlockwise<TBase64Decoder>(TB
485493
}
486494

487495
source = source.Slice(encodedIdx);
488-
bytesConsumed += skipped;
489-
490-
if (bufferIdx == 0)
491-
{
492-
continue;
493-
}
496+
Debug.Assert(bufferIdx > 0);
494497

495498
bool hasAnotherBlock;
496499

@@ -522,14 +525,17 @@ internal static OperationStatus DecodeWithWhiteSpaceBlockwise<TBase64Decoder>(TB
522525
}
523526

524527
status = DecodeFrom<TBase64Decoder, byte>(decoder, buffer.Slice(0, bufferIdx), bytes, out int localConsumed, out int localWritten, localIsFinalBlock, ignoreWhiteSpace: false);
525-
bytesConsumed += localConsumed;
526-
bytesWritten += localWritten;
527528

528529
if (status != OperationStatus.Done)
529530
{
531+
Debug.Assert(localConsumed == 0 && localWritten == 0, "On failure, should not have consumed or written any bytes");
530532
return status;
531533
}
532534

535+
bytesConsumed += skipped;
536+
bytesConsumed += localConsumed;
537+
bytesWritten += localWritten;
538+
533539
// The remaining data must all be whitespace in order to be valid.
534540
if (!hasAnotherBlock)
535541
{
@@ -551,6 +557,7 @@ internal static OperationStatus DecodeWithWhiteSpaceBlockwise<TBase64Decoder>(TB
551557
}
552558

553559
bytes = bytes.Slice(localWritten);
560+
Debug.Assert(!source.IsEmpty);
554561
}
555562

556563
return status;
@@ -565,6 +572,14 @@ internal static OperationStatus DecodeWithWhiteSpaceBlockwise<TBase64Decoder>(TB
565572

566573
while (!source.IsEmpty)
567574
{
575+
// Skip over any leading whitespace
576+
if (IsWhiteSpace(source[0]))
577+
{
578+
source = source.Slice(1);
579+
bytesConsumed++;
580+
continue;
581+
}
582+
568583
int encodedIdx = 0;
569584
int bufferIdx = 0;
570585
int skipped = 0;
@@ -583,12 +598,7 @@ internal static OperationStatus DecodeWithWhiteSpaceBlockwise<TBase64Decoder>(TB
583598
}
584599

585600
source = source.Slice(encodedIdx);
586-
bytesConsumed += skipped;
587-
588-
if (bufferIdx == 0)
589-
{
590-
continue;
591-
}
601+
Debug.Assert(bufferIdx > 0);
592602

593603
bool hasAnotherBlock;
594604

@@ -620,14 +630,17 @@ internal static OperationStatus DecodeWithWhiteSpaceBlockwise<TBase64Decoder>(TB
620630
}
621631

622632
status = DecodeFrom(decoder, buffer.Slice(0, bufferIdx), bytes, out int localConsumed, out int localWritten, localIsFinalBlock, ignoreWhiteSpace: false);
623-
bytesConsumed += localConsumed;
624-
bytesWritten += localWritten;
625633

626634
if (status != OperationStatus.Done)
627635
{
636+
Debug.Assert(localConsumed == 0 && localWritten == 0, "On failure, should not have consumed or written any bytes");
628637
return status;
629638
}
630639

640+
bytesConsumed += skipped;
641+
bytesConsumed += localConsumed;
642+
bytesWritten += localWritten;
643+
631644
// The remaining data must all be whitespace in order to be valid.
632645
if (!hasAnotherBlock)
633646
{
@@ -648,6 +661,7 @@ internal static OperationStatus DecodeWithWhiteSpaceBlockwise<TBase64Decoder>(TB
648661
}
649662

650663
bytes = bytes.Slice(localWritten);
664+
Debug.Assert(!source.IsEmpty);
651665
}
652666

653667
return status;

0 commit comments

Comments
 (0)