Skip to content

Commit 67d78bc

Browse files
Merge pull request #1234 from robryk:sizelimit
PiperOrigin-RevId: 713282049
2 parents b01b63a + d144c58 commit 67d78bc

File tree

4 files changed

+197
-67
lines changed

4 files changed

+197
-67
lines changed

python/_brotli.c

Lines changed: 155 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ typedef struct {
2323
PyObject *list;
2424
/* Number of whole allocated size. */
2525
Py_ssize_t allocated;
26+
Py_ssize_t size_limit;
2627
} BlocksOutputBuffer;
2728

2829
static const char unable_allocate_msg[] = "Unable to allocate output buffer.";
@@ -69,11 +70,17 @@ static const Py_ssize_t BUFFER_BLOCK_SIZE[] =
6970
Return -1 on failure
7071
*/
7172
static inline int
72-
BlocksOutputBuffer_InitAndGrow(BlocksOutputBuffer *buffer,
73+
BlocksOutputBuffer_InitAndGrow(BlocksOutputBuffer *buffer, Py_ssize_t size_limit,
7374
size_t *avail_out, uint8_t **next_out)
7475
{
7576
PyObject *b;
76-
const Py_ssize_t block_size = BUFFER_BLOCK_SIZE[0];
77+
Py_ssize_t block_size = BUFFER_BLOCK_SIZE[0];
78+
79+
assert(size_limit > 0);
80+
81+
if (size_limit < block_size) {
82+
block_size = size_limit;
83+
}
7784

7885
// Ensure .list was set to NULL, for BlocksOutputBuffer_OnError().
7986
assert(buffer->list == NULL);
@@ -94,6 +101,7 @@ BlocksOutputBuffer_InitAndGrow(BlocksOutputBuffer *buffer,
94101

95102
// Set variables
96103
buffer->allocated = block_size;
104+
buffer->size_limit = size_limit;
97105

98106
*avail_out = (size_t) block_size;
99107
*next_out = (uint8_t*) PyBytes_AS_STRING(b);
@@ -122,10 +130,16 @@ BlocksOutputBuffer_Grow(BlocksOutputBuffer *buffer,
122130
block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1];
123131
}
124132

125-
// Check buffer->allocated overflow
126-
if (block_size > PY_SSIZE_T_MAX - buffer->allocated) {
127-
PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
128-
return -1;
133+
if (block_size > buffer->size_limit - buffer->allocated) {
134+
block_size = buffer->size_limit - buffer->allocated;
135+
}
136+
137+
if (block_size == 0) {
138+
// We are at the size_limit (either the provided one, in which case we
139+
// shouldn't have been called, or the implicit PY_SSIZE_T_MAX one, in
140+
// which case we wouldn't be able to concatenate the blocks at the end).
141+
PyErr_SetString(PyExc_MemoryError, "too long");
142+
return -1;
129143
}
130144

131145
// Create the block
@@ -291,7 +305,7 @@ static PyObject* compress_stream(BrotliEncoderState* enc, BrotliEncoderOperation
291305
BlocksOutputBuffer buffer = {.list=NULL};
292306
PyObject *ret;
293307

294-
if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) {
308+
if (BlocksOutputBuffer_InitAndGrow(&buffer, PY_SSIZE_T_MAX, &available_out, &next_out) < 0) {
295309
goto error;
296310
}
297311

@@ -592,57 +606,6 @@ static PyTypeObject brotli_CompressorType = {
592606
brotli_Compressor_new, /* tp_new */
593607
};
594608

595-
static PyObject* decompress_stream(BrotliDecoderState* dec,
596-
uint8_t* input, size_t input_length) {
597-
BrotliDecoderResult result;
598-
599-
size_t available_in = input_length;
600-
const uint8_t* next_in = input;
601-
602-
size_t available_out;
603-
uint8_t* next_out;
604-
BlocksOutputBuffer buffer = {.list=NULL};
605-
PyObject *ret;
606-
607-
if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) {
608-
goto error;
609-
}
610-
611-
while (1) {
612-
Py_BEGIN_ALLOW_THREADS
613-
result = BrotliDecoderDecompressStream(dec,
614-
&available_in, &next_in,
615-
&available_out, &next_out, NULL);
616-
Py_END_ALLOW_THREADS
617-
618-
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
619-
if (available_out == 0) {
620-
if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) {
621-
goto error;
622-
}
623-
}
624-
continue;
625-
}
626-
627-
break;
628-
}
629-
630-
if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) {
631-
goto error;
632-
}
633-
634-
ret = BlocksOutputBuffer_Finish(&buffer, available_out);
635-
if (ret != NULL) {
636-
goto finally;
637-
}
638-
639-
error:
640-
BlocksOutputBuffer_OnError(&buffer);
641-
ret = NULL;
642-
finally:
643-
return ret;
644-
}
645-
646609
PyDoc_STRVAR(brotli_Decompressor_doc,
647610
"An object to decompress a byte string.\n"
648611
"\n"
@@ -655,10 +618,14 @@ PyDoc_STRVAR(brotli_Decompressor_doc,
655618
typedef struct {
656619
PyObject_HEAD
657620
BrotliDecoderState* dec;
621+
uint8_t* unconsumed_data;
622+
size_t unconsumed_data_length;
658623
} brotli_Decompressor;
659624

660625
static void brotli_Decompressor_dealloc(brotli_Decompressor* self) {
661626
BrotliDecoderDestroyInstance(self->dec);
627+
if (self->unconsumed_data)
628+
free(self->unconsumed_data);
662629
#if PY_MAJOR_VERSION >= 3
663630
Py_TYPE(self)->tp_free((PyObject*)self);
664631
#else
@@ -674,6 +641,9 @@ static PyObject* brotli_Decompressor_new(PyTypeObject *type, PyObject *args, PyO
674641
self->dec = BrotliDecoderCreateInstance(0, 0, 0);
675642
}
676643

644+
self->unconsumed_data = NULL;
645+
self->unconsumed_data_length = 0;
646+
677647
return (PyObject *)self;
678648
}
679649

@@ -692,35 +662,118 @@ static int brotli_Decompressor_init(brotli_Decompressor *self, PyObject *args, P
692662
return 0;
693663
}
694664

665+
static PyObject* decompress_stream(brotli_Decompressor* self,
666+
uint8_t* input, size_t input_length, Py_ssize_t max_output_length) {
667+
BrotliDecoderResult result;
668+
669+
size_t available_in = input_length;
670+
const uint8_t* next_in = input;
671+
672+
size_t available_out;
673+
uint8_t* next_out;
674+
uint8_t* new_tail;
675+
BlocksOutputBuffer buffer = {.list=NULL};
676+
PyObject *ret;
677+
678+
if (BlocksOutputBuffer_InitAndGrow(&buffer, max_output_length, &available_out, &next_out) < 0) {
679+
goto error;
680+
}
681+
682+
while (1) {
683+
Py_BEGIN_ALLOW_THREADS
684+
result = BrotliDecoderDecompressStream(self->dec,
685+
&available_in, &next_in,
686+
&available_out, &next_out, NULL);
687+
Py_END_ALLOW_THREADS
688+
689+
if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) {
690+
if (available_out == 0) {
691+
if (buffer.allocated == PY_SSIZE_T_MAX) {
692+
PyErr_SetString(PyExc_MemoryError, unable_allocate_msg);
693+
goto error;
694+
}
695+
if (buffer.allocated == max_output_length) {
696+
// We've reached the output length limit.
697+
break;
698+
}
699+
if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) {
700+
goto error;
701+
}
702+
}
703+
continue;
704+
}
705+
706+
if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) {
707+
available_in = 0;
708+
goto error;
709+
}
710+
711+
break;
712+
}
713+
714+
ret = BlocksOutputBuffer_Finish(&buffer, available_out);
715+
if (ret != NULL) {
716+
goto finally;
717+
}
718+
719+
error:
720+
BlocksOutputBuffer_OnError(&buffer);
721+
ret = NULL;
722+
723+
finally:
724+
new_tail = available_in > 0 ? malloc(available_in) : NULL;
725+
if (available_in > 0) {
726+
memcpy(new_tail, next_in, available_in);
727+
}
728+
if (self->unconsumed_data) {
729+
free(self->unconsumed_data);
730+
}
731+
self->unconsumed_data = new_tail;
732+
self->unconsumed_data_length = available_in;
733+
734+
return ret;
735+
}
736+
737+
695738
PyDoc_STRVAR(brotli_Decompressor_process_doc,
696739
"Process \"string\" for decompression, returning a string that contains \n"
697740
"decompressed output data. This data should be concatenated to the output \n"
698741
"produced by any preceding calls to the \"process()\" method. \n"
699742
"Some or all of the input may be kept in internal buffers for later \n"
700743
"processing, and the decompressed output data may be empty until enough input \n"
701744
"has been accumulated.\n"
745+
"If max_output_length is set, no more than max_output_length bytes will be\n"
746+
"returned. If the limit is reached, further calls to process (potentially with\n"
747+
"empty input) will continue to yield more data. If, after returning a string of\n"
748+
"the length equal to limit, can_accept_more_data() returns False, process()\n"
749+
"must only be called with empty input until can_accept_more_data() once again\n"
750+
"returns True.\n"
702751
"\n"
703752
"Signature:\n"
704-
" decompress(string)\n"
753+
" decompress(string, max_output_length=int)\n"
705754
"\n"
706755
"Args:\n"
707756
" string (bytes): The input data\n"
708-
"\n"
709-
"Returns:\n"
757+
"\n""Returns:\n"
710758
" The decompressed output data (bytes)\n"
711759
"\n"
712760
"Raises:\n"
713761
" brotli.error: If decompression fails\n");
714762

715-
static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args) {
763+
static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args, PyObject* keywds) {
716764
PyObject* ret;
717765
Py_buffer input;
718766
int ok;
767+
Py_ssize_t max_output_length = PY_SSIZE_T_MAX;
768+
uint8_t* data;
769+
size_t data_length;
770+
771+
static char* kwlist[] = { "", "max_output_length", NULL };
719772

720773
#if PY_MAJOR_VERSION >= 3
721-
ok = PyArg_ParseTuple(args, "y*:process", &input);
774+
ok = PyArg_ParseTupleAndKeywords(args, keywds, "y*|n:process", kwlist, &input, &max_output_length);
722775
#else
723-
ok = PyArg_ParseTuple(args, "s*:process", &input);
776+
ok = PyArg_ParseTupleAndKeywords(args, keywds, "s*|n:process", kwlist, &input, &max_output_length);
724777
#endif
725778

726779
if (!ok) {
@@ -731,7 +784,20 @@ static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject
731784
goto error;
732785
}
733786

734-
ret = decompress_stream(self->dec, (uint8_t*) input.buf, input.len);
787+
if (self->unconsumed_data_length > 0) {
788+
if (input.len > 0) {
789+
PyErr_SetString(BrotliError, "process called with data when accept_more_data is False");
790+
ret = NULL;
791+
goto finally;
792+
}
793+
data = self->unconsumed_data;
794+
data_length = self->unconsumed_data_length;
795+
} else {
796+
data = (uint8_t*)input.buf;
797+
data_length = input.len;
798+
}
799+
800+
ret = decompress_stream(self, data, data_length, max_output_length);
735801
if (ret != NULL) {
736802
goto finally;
737803
}
@@ -773,13 +839,35 @@ static PyObject* brotli_Decompressor_is_finished(brotli_Decompressor *self) {
773839
}
774840
}
775841

842+
PyDoc_STRVAR(brotli_Decompressor_can_accept_more_data_doc,
843+
"Checks if the decoder instance can accept more compressed data. If the decompress()\n"
844+
"method on this instance of decompressor was never called with max_length,\n"
845+
"this method will always return True.\n"
846+
"\n"
847+
"Signature:"
848+
" can_accept_more_data()\n"
849+
"\n"
850+
"Returns:\n"
851+
" True if the decoder is ready to accept more compressed data via decompress()\n"
852+
" False if the decoder needs to output some data via decompress(b'') before\n"
853+
" being provided any more compressed data\n");
854+
855+
static PyObject* brotli_Decompressor_can_accept_more_data(brotli_Decompressor* self) {
856+
if (self->unconsumed_data_length > 0) {
857+
Py_RETURN_FALSE;
858+
} else {
859+
Py_RETURN_TRUE;
860+
}
861+
}
862+
776863
static PyMemberDef brotli_Decompressor_members[] = {
777864
{NULL} /* Sentinel */
778865
};
779866

780867
static PyMethodDef brotli_Decompressor_methods[] = {
781-
{"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS, brotli_Decompressor_process_doc},
868+
{"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS | METH_KEYWORDS, brotli_Decompressor_process_doc},
782869
{"is_finished", (PyCFunction)brotli_Decompressor_is_finished, METH_NOARGS, brotli_Decompressor_is_finished_doc},
870+
{"can_accept_more_data", (PyCFunction)brotli_Decompressor_can_accept_more_data, METH_NOARGS, brotli_Decompressor_can_accept_more_data_doc},
783871
{NULL} /* Sentinel */
784872
};
785873

@@ -877,7 +965,7 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *key
877965
next_in = (uint8_t*) input.buf;
878966
available_in = input.len;
879967

880-
if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) {
968+
if (BlocksOutputBuffer_InitAndGrow(&buffer, PY_SSIZE_T_MAX, &available_out, &next_out) < 0) {
881969
goto error;
882970
}
883971

0 commit comments

Comments
 (0)