From ad6129ec70da6f70ffddf573296134acd8bea82f Mon Sep 17 00:00:00 2001 From: wjiang Date: Thu, 6 Nov 2025 10:01:01 +0800 Subject: [PATCH] fix CVE-2025-6176 (cherry picked from commit 33790074a1866d761bff90fab04a380849dd3ea2) --- ...-Python-use-a-new-output-buffer-code.patch | 815 ++++++++++++++++++ ...th-to-Python-streaming-decompression.patch | 281 ++++++ backport-add-size-limit-to-buffer.patch | 98 +++ brotli.spec | 8 +- 4 files changed, 1201 insertions(+), 1 deletion(-) create mode 100644 backport-Python-use-a-new-output-buffer-code.patch create mode 100644 backport-add-max_length-to-Python-streaming-decompression.patch create mode 100644 backport-add-size-limit-to-buffer.patch diff --git a/backport-Python-use-a-new-output-buffer-code.patch b/backport-Python-use-a-new-output-buffer-code.patch new file mode 100644 index 0000000..5014bd0 --- /dev/null +++ b/backport-Python-use-a-new-output-buffer-code.patch @@ -0,0 +1,815 @@ +From c8df4b3049ff1283fc4525defbcd003188f88963 Mon Sep 17 00:00:00 2001 +From: Ma Lin +Date: Thu, 29 Dec 2022 21:07:16 +0800 +Subject: [PATCH] Python: use a new output buffer code (#902) + +Currently, the output buffer is a std::vector. +When the buffer grows, resizing will cause unnecessary memcpy(). + +This change uses a list of bytes object to represent output buffer, can avoid the extra overhead of resizing. +In addition, C++ code can be removed, it's a pure C extension. +--- + python/{_brotli.cc => _brotli.c} | 514 ++++++++++++++++++++++--------- + setup.py | 56 ++-- + 2 files changed, 396 insertions(+), 174 deletions(-) + rename python/{_brotli.cc => _brotli.c} (65%) + +diff --git a/python/_brotli.cc b/python/_brotli.c +similarity index 65% +rename from python/_brotli.cc +rename to python/_brotli.c +index 54c736345..c6a0da03d 100644 +--- a/python/_brotli.cc ++++ b/python/_brotli.c +@@ -2,7 +2,6 @@ + #include + #include + #include +-#include + #include "../common/version.h" + #include + #include +@@ -10,10 +9,203 @@ + #if PY_MAJOR_VERSION >= 3 + #define PyInt_Check PyLong_Check + #define PyInt_AsLong PyLong_AsLong ++#else ++#define Py_ARRAY_LENGTH(array) (sizeof(array) / sizeof((array)[0])) + #endif + + static PyObject *BrotliError; + ++/* ----------------------------------- ++ BlocksOutputBuffer code ++ ----------------------------------- */ ++typedef struct { ++ /* List of blocks */ ++ PyObject *list; ++ /* Number of whole allocated size. */ ++ Py_ssize_t allocated; ++} BlocksOutputBuffer; ++ ++static const char unable_allocate_msg[] = "Unable to allocate output buffer."; ++ ++/* Block size sequence */ ++#define KB (1024) ++#define MB (1024*1024) ++static const Py_ssize_t BUFFER_BLOCK_SIZE[] = ++ { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB, ++ 32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB, ++ 256*MB }; ++#undef KB ++#undef MB ++ ++/* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole ++ allocated size growth step is: ++ 1 32 KB +32 KB ++ 2 96 KB +64 KB ++ 3 352 KB +256 KB ++ 4 1.34 MB +1 MB ++ 5 5.34 MB +4 MB ++ 6 13.34 MB +8 MB ++ 7 29.34 MB +16 MB ++ 8 45.34 MB +16 MB ++ 9 77.34 MB +32 MB ++ 10 109.34 MB +32 MB ++ 11 141.34 MB +32 MB ++ 12 173.34 MB +32 MB ++ 13 237.34 MB +64 MB ++ 14 301.34 MB +64 MB ++ 15 429.34 MB +128 MB ++ 16 557.34 MB +128 MB ++ 17 813.34 MB +256 MB ++ 18 1069.34 MB +256 MB ++ 19 1325.34 MB +256 MB ++ 20 1581.34 MB +256 MB ++ 21 1837.34 MB +256 MB ++ 22 2093.34 MB +256 MB ++ ... ++*/ ++ ++/* Initialize the buffer, and grow the buffer. ++ Return 0 on success ++ Return -1 on failure ++*/ ++static inline int ++BlocksOutputBuffer_InitAndGrow(BlocksOutputBuffer *buffer, ++ size_t *avail_out, uint8_t **next_out) ++{ ++ PyObject *b; ++ const Py_ssize_t block_size = BUFFER_BLOCK_SIZE[0]; ++ ++ // Ensure .list was set to NULL, for BlocksOutputBuffer_OnError(). ++ assert(buffer->list == NULL); ++ ++ // The first block ++ b = PyBytes_FromStringAndSize(NULL, block_size); ++ if (b == NULL) { ++ return -1; ++ } ++ ++ // Create list ++ buffer->list = PyList_New(1); ++ if (buffer->list == NULL) { ++ Py_DECREF(b); ++ return -1; ++ } ++ PyList_SET_ITEM(buffer->list, 0, b); ++ ++ // Set variables ++ buffer->allocated = block_size; ++ ++ *avail_out = (size_t) block_size; ++ *next_out = (uint8_t*) PyBytes_AS_STRING(b); ++ return 0; ++} ++ ++/* Grow the buffer. The avail_out must be 0, please check it before calling. ++ Return 0 on success ++ Return -1 on failure ++*/ ++static inline int ++BlocksOutputBuffer_Grow(BlocksOutputBuffer *buffer, ++ size_t *avail_out, uint8_t **next_out) ++{ ++ PyObject *b; ++ const Py_ssize_t list_len = Py_SIZE(buffer->list); ++ Py_ssize_t block_size; ++ ++ // Ensure no gaps in the data ++ assert(*avail_out == 0); ++ ++ // Get block size ++ if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) { ++ block_size = BUFFER_BLOCK_SIZE[list_len]; ++ } else { ++ block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1]; ++ } ++ ++ // Check buffer->allocated overflow ++ if (block_size > PY_SSIZE_T_MAX - buffer->allocated) { ++ PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); ++ return -1; ++ } ++ ++ // Create the block ++ b = PyBytes_FromStringAndSize(NULL, block_size); ++ if (b == NULL) { ++ PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); ++ return -1; ++ } ++ if (PyList_Append(buffer->list, b) < 0) { ++ Py_DECREF(b); ++ return -1; ++ } ++ Py_DECREF(b); ++ ++ // Set variables ++ buffer->allocated += block_size; ++ ++ *avail_out = (size_t) block_size; ++ *next_out = (uint8_t*) PyBytes_AS_STRING(b); ++ return 0; ++} ++ ++/* Finish the buffer. ++ Return a bytes object on success ++ Return NULL on failure ++*/ ++static inline PyObject * ++BlocksOutputBuffer_Finish(BlocksOutputBuffer *buffer, size_t avail_out) ++{ ++ PyObject *result, *block; ++ const Py_ssize_t list_len = Py_SIZE(buffer->list); ++ ++ // Fast path for single block ++ if ((list_len == 1 && avail_out == 0) || ++ (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == (Py_ssize_t) avail_out)) ++ { ++ block = PyList_GET_ITEM(buffer->list, 0); ++ Py_INCREF(block); ++ ++ Py_CLEAR(buffer->list); ++ return block; ++ } ++ ++ // Final bytes object ++ result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out); ++ if (result == NULL) { ++ PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); ++ return NULL; ++ } ++ ++ // Memory copy ++ if (list_len > 0) { ++ char *posi = PyBytes_AS_STRING(result); ++ ++ // Blocks except the last one ++ Py_ssize_t i = 0; ++ for (; i < list_len-1; i++) { ++ block = PyList_GET_ITEM(buffer->list, i); ++ memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block)); ++ posi += Py_SIZE(block); ++ } ++ // The last block ++ block = PyList_GET_ITEM(buffer->list, i); ++ memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out); ++ } else { ++ assert(Py_SIZE(result) == 0); ++ } ++ ++ Py_CLEAR(buffer->list); ++ return result; ++} ++ ++/* Clean up the buffer */ ++static inline void ++BlocksOutputBuffer_OnError(BlocksOutputBuffer *buffer) ++{ ++ Py_CLEAR(buffer->list); ++} ++ ++ + static int as_bounded_int(PyObject *o, int* result, int lower_bound, int upper_bound) { + long value = PyInt_AsLong(o); + if ((value < (long) lower_bound) || (value > (long) upper_bound)) { +@@ -87,39 +279,52 @@ static int lgblock_convertor(PyObject *o, int *lgblock) { + return 1; + } + +-static BROTLI_BOOL compress_stream(BrotliEncoderState* enc, BrotliEncoderOperation op, +- std::vector* output, +- uint8_t* input, size_t input_length) { +- BROTLI_BOOL ok = BROTLI_TRUE; +- Py_BEGIN_ALLOW_THREADS ++static PyObject* compress_stream(BrotliEncoderState* enc, BrotliEncoderOperation op, ++ uint8_t* input, size_t input_length) { ++ BROTLI_BOOL ok; + + size_t available_in = input_length; + const uint8_t* next_in = input; +- size_t available_out = 0; +- uint8_t* next_out = NULL; + +- while (ok) { ++ size_t available_out; ++ uint8_t* next_out; ++ BlocksOutputBuffer buffer = {.list=NULL}; ++ PyObject *ret; ++ ++ if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) { ++ goto error; ++ } ++ ++ while (1) { ++ Py_BEGIN_ALLOW_THREADS + ok = BrotliEncoderCompressStream(enc, op, + &available_in, &next_in, + &available_out, &next_out, NULL); +- if (!ok) +- break; +- +- size_t buffer_length = 0; // Request all available output. +- const uint8_t* buffer = BrotliEncoderTakeOutput(enc, &buffer_length); +- if (buffer_length) { +- (*output).insert((*output).end(), buffer, buffer + buffer_length); ++ Py_END_ALLOW_THREADS ++ if (!ok) { ++ goto error; + } + + if (available_in || BrotliEncoderHasMoreOutput(enc)) { ++ if (available_out == 0) { ++ if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) { ++ goto error; ++ } ++ } + continue; + } + + break; + } + +- Py_END_ALLOW_THREADS +- return ok; ++ ret = BlocksOutputBuffer_Finish(&buffer, available_out); ++ if (ret != NULL) { ++ return ret; ++ } ++ ++error: ++ BlocksOutputBuffer_OnError(&buffer); ++ return NULL; + } + + PyDoc_STRVAR(brotli_Compressor_doc, +@@ -178,7 +383,7 @@ static int brotli_Compressor_init(brotli_Compressor *self, PyObject *args, PyObj + static const char *kwlist[] = {"mode", "quality", "lgwin", "lgblock", NULL}; + + ok = PyArg_ParseTupleAndKeywords(args, keywds, "|O&O&O&O&:Compressor", +- const_cast(kwlist), ++ (char **) kwlist, + &mode_convertor, &mode, + &quality_convertor, &quality, + &lgwin_convertor, &lgwin, +@@ -221,36 +426,37 @@ PyDoc_STRVAR(brotli_Compressor_process_doc, + " brotli.error: If compression fails\n"); + + static PyObject* brotli_Compressor_process(brotli_Compressor *self, PyObject *args) { +- PyObject* ret = NULL; +- std::vector output; ++ PyObject* ret; + Py_buffer input; +- BROTLI_BOOL ok = BROTLI_TRUE; ++ int ok; + + #if PY_MAJOR_VERSION >= 3 +- ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "y*:process", &input); ++ ok = PyArg_ParseTuple(args, "y*:process", &input); + #else +- ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s*:process", &input); ++ ok = PyArg_ParseTuple(args, "s*:process", &input); + #endif + +- if (!ok) ++ if (!ok) { + return NULL; ++ } + + if (!self->enc) { +- ok = BROTLI_FALSE; +- goto end; ++ goto error; + } + +- ok = compress_stream(self->enc, BROTLI_OPERATION_PROCESS, +- &output, static_cast(input.buf), input.len); +- +-end: +- PyBuffer_Release(&input); +- if (ok) { +- ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size()); +- } else { +- PyErr_SetString(BrotliError, "BrotliEncoderCompressStream failed while processing the stream"); ++ ret = compress_stream(self->enc, BROTLI_OPERATION_PROCESS, ++ (uint8_t*) input.buf, input.len); ++ if (ret != NULL) { ++ goto finally; + } + ++error: ++ PyErr_SetString(BrotliError, ++ "BrotliEncoderCompressStream failed while processing the stream"); ++ ret = NULL; ++ ++finally: ++ PyBuffer_Release(&input); + return ret; + } + +@@ -269,25 +475,23 @@ PyDoc_STRVAR(brotli_Compressor_flush_doc, + " brotli.error: If compression fails\n"); + + static PyObject* brotli_Compressor_flush(brotli_Compressor *self) { +- PyObject *ret = NULL; +- std::vector output; +- BROTLI_BOOL ok = BROTLI_TRUE; ++ PyObject *ret; + + if (!self->enc) { +- ok = BROTLI_FALSE; +- goto end; ++ goto error; + } + +- ok = compress_stream(self->enc, BROTLI_OPERATION_FLUSH, +- &output, NULL, 0); +- +-end: +- if (ok) { +- ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size()); +- } else { +- PyErr_SetString(BrotliError, "BrotliEncoderCompressStream failed while flushing the stream"); ++ ret = compress_stream(self->enc, BROTLI_OPERATION_FLUSH, ++ NULL, 0); ++ if (ret != NULL) { ++ goto finally; + } + ++error: ++ PyErr_SetString(BrotliError, ++ "BrotliEncoderCompressStream failed while flushing the stream"); ++ ret = NULL; ++finally: + return ret; + } + +@@ -309,29 +513,25 @@ PyDoc_STRVAR(brotli_Compressor_finish_doc, + " brotli.error: If compression fails\n"); + + static PyObject* brotli_Compressor_finish(brotli_Compressor *self) { +- PyObject *ret = NULL; +- std::vector output; +- BROTLI_BOOL ok = BROTLI_TRUE; ++ PyObject *ret; + + if (!self->enc) { +- ok = BROTLI_FALSE; +- goto end; ++ goto error; + } + +- ok = compress_stream(self->enc, BROTLI_OPERATION_FINISH, +- &output, NULL, 0); +- +- if (ok) { +- ok = BrotliEncoderIsFinished(self->enc); +- } ++ ret = compress_stream(self->enc, BROTLI_OPERATION_FINISH, ++ NULL, 0); + +-end: +- if (ok) { +- ret = PyBytes_FromStringAndSize((char*)(output.empty() ? NULL : &output[0]), output.size()); +- } else { +- PyErr_SetString(BrotliError, "BrotliEncoderCompressStream failed while finishing the stream"); ++ if (ret == NULL || !BrotliEncoderIsFinished(self->enc)) { ++ goto error; + } ++ goto finally; + ++error: ++ PyErr_SetString(BrotliError, ++ "BrotliEncoderCompressStream failed while finishing the stream"); ++ ret = NULL; ++finally: + return ret; + } + +@@ -392,32 +592,55 @@ static PyTypeObject brotli_CompressorType = { + brotli_Compressor_new, /* tp_new */ + }; + +-static BROTLI_BOOL decompress_stream(BrotliDecoderState* dec, +- std::vector* output, +- uint8_t* input, size_t input_length) { +- BROTLI_BOOL ok = BROTLI_TRUE; +- Py_BEGIN_ALLOW_THREADS ++static PyObject* decompress_stream(BrotliDecoderState* dec, ++ uint8_t* input, size_t input_length) { ++ BrotliDecoderResult result; + + size_t available_in = input_length; + const uint8_t* next_in = input; +- size_t available_out = 0; +- uint8_t* next_out = NULL; + +- BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT; +- while (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { ++ size_t available_out; ++ uint8_t* next_out; ++ BlocksOutputBuffer buffer = {.list=NULL}; ++ PyObject *ret; ++ ++ if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) { ++ goto error; ++ } ++ ++ while (1) { ++ Py_BEGIN_ALLOW_THREADS + result = BrotliDecoderDecompressStream(dec, + &available_in, &next_in, + &available_out, &next_out, NULL); +- size_t buffer_length = 0; // Request all available output. +- const uint8_t* buffer = BrotliDecoderTakeOutput(dec, &buffer_length); +- if (buffer_length) { +- (*output).insert((*output).end(), buffer, buffer + buffer_length); ++ Py_END_ALLOW_THREADS ++ ++ if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { ++ if (available_out == 0) { ++ if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) { ++ goto error; ++ } ++ } ++ continue; + } ++ ++ break; ++ } ++ ++ if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) { ++ goto error; ++ } ++ ++ ret = BlocksOutputBuffer_Finish(&buffer, available_out); ++ if (ret != NULL) { ++ goto finally; + } +- ok = result != BROTLI_DECODER_RESULT_ERROR && !available_in; + +- Py_END_ALLOW_THREADS +- return ok; ++error: ++ BlocksOutputBuffer_OnError(&buffer); ++ ret = NULL; ++finally: ++ return ret; + } + + PyDoc_STRVAR(brotli_Decompressor_doc, +@@ -460,7 +683,7 @@ static int brotli_Decompressor_init(brotli_Decompressor *self, PyObject *args, P + static const char *kwlist[] = {NULL}; + + ok = PyArg_ParseTupleAndKeywords(args, keywds, "|:Decompressor", +- const_cast(kwlist)); ++ (char **) kwlist); + if (!ok) + return -1; + if (!self->dec) +@@ -490,35 +713,36 @@ PyDoc_STRVAR(brotli_Decompressor_process_doc, + " brotli.error: If decompression fails\n"); + + static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args) { +- PyObject* ret = NULL; +- std::vector output; ++ PyObject* ret; + Py_buffer input; +- BROTLI_BOOL ok = BROTLI_TRUE; ++ int ok; + + #if PY_MAJOR_VERSION >= 3 +- ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "y*:process", &input); ++ ok = PyArg_ParseTuple(args, "y*:process", &input); + #else +- ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s*:process", &input); ++ ok = PyArg_ParseTuple(args, "s*:process", &input); + #endif + +- if (!ok) ++ if (!ok) { + return NULL; ++ } + + if (!self->dec) { +- ok = BROTLI_FALSE; +- goto end; ++ goto error; + } + +- ok = decompress_stream(self->dec, &output, static_cast(input.buf), input.len); +- +-end: +- PyBuffer_Release(&input); +- if (ok) { +- ret = PyBytes_FromStringAndSize((char*)(output.empty() ? NULL : &output[0]), output.size()); +- } else { +- PyErr_SetString(BrotliError, "BrotliDecoderDecompressStream failed while processing the stream"); ++ ret = decompress_stream(self->dec, (uint8_t*) input.buf, input.len); ++ if (ret != NULL) { ++ goto finally; + } + ++error: ++ PyErr_SetString(BrotliError, ++ "BrotliDecoderDecompressStream failed while processing the stream"); ++ ret = NULL; ++ ++finally: ++ PyBuffer_Release(&input); + return ret; + } + +@@ -537,14 +761,9 @@ PyDoc_STRVAR(brotli_Decompressor_is_finished_doc, + " brotli.error: If decompression fails\n"); + + static PyObject* brotli_Decompressor_is_finished(brotli_Decompressor *self) { +- PyObject *ret = NULL; +- std::vector output; +- BROTLI_BOOL ok = BROTLI_TRUE; +- + if (!self->dec) { +- ok = BROTLI_FALSE; + PyErr_SetString(BrotliError, "BrotliDecoderState is NULL while checking is_finished"); +- goto end; ++ return NULL; + } + + if (BrotliDecoderIsFinished(self->dec)) { +@@ -552,15 +771,6 @@ static PyObject* brotli_Decompressor_is_finished(brotli_Decompressor *self) { + } else { + Py_RETURN_FALSE; + } +- +-end: +- if (ok) { +- ret = PyBytes_FromStringAndSize((char*)(output.empty() ? NULL : &output[0]), output.size()); +- } else { +- PyErr_SetString(BrotliError, "BrotliDecoderDecompressStream failed while finishing the stream"); +- } +- +- return ret; + } + + static PyMemberDef brotli_Decompressor_members[] = { +@@ -635,56 +845,77 @@ PyDoc_STRVAR(brotli_decompress__doc__, + " brotli.error: If decompressor fails.\n"); + + static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *keywds) { +- PyObject *ret = NULL; +- Py_buffer input; ++ BrotliDecoderState* state; ++ BrotliDecoderResult result; ++ + const uint8_t* next_in; + size_t available_in; +- int ok; ++ ++ uint8_t* next_out; ++ size_t available_out; ++ BlocksOutputBuffer buffer = {.list=NULL}; ++ PyObject *ret; + + static const char *kwlist[] = {"string", NULL}; ++ Py_buffer input; ++ int ok; + + #if PY_MAJOR_VERSION >= 3 + ok = PyArg_ParseTupleAndKeywords(args, keywds, "y*|:decompress", +- const_cast(kwlist), &input); ++ (char**) kwlist, &input); + #else + ok = PyArg_ParseTupleAndKeywords(args, keywds, "s*|:decompress", +- const_cast(kwlist), &input); ++ (char**) kwlist, &input); + #endif + +- if (!ok) ++ if (!ok) { + return NULL; ++ } + +- std::vector output; ++ state = BrotliDecoderCreateInstance(0, 0, 0); + +- /* >>> Pure C block; release python GIL. */ +- Py_BEGIN_ALLOW_THREADS ++ next_in = (uint8_t*) input.buf; ++ available_in = input.len; + +- BrotliDecoderState* state = BrotliDecoderCreateInstance(0, 0, 0); ++ if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) { ++ goto error; ++ } + +- BrotliDecoderResult result = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT; +- next_in = static_cast(input.buf); +- available_in = input.len; +- while (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { +- size_t available_out = 0; ++ while (1) { ++ Py_BEGIN_ALLOW_THREADS + result = BrotliDecoderDecompressStream(state, &available_in, &next_in, +- &available_out, 0, 0); +- const uint8_t* next_out = BrotliDecoderTakeOutput(state, &available_out); +- if (available_out != 0) +- output.insert(output.end(), next_out, next_out + available_out); ++ &available_out, &next_out, 0); ++ Py_END_ALLOW_THREADS ++ ++ if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { ++ if (available_out == 0) { ++ if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) { ++ goto error; ++ } ++ } ++ continue; ++ } ++ ++ break; + } +- ok = result == BROTLI_DECODER_RESULT_SUCCESS && !available_in; +- BrotliDecoderDestroyInstance(state); + +- Py_END_ALLOW_THREADS +- /* <<< Pure C block end. Python GIL reacquired. */ ++ if (result != BROTLI_DECODER_RESULT_SUCCESS || available_in != 0) { ++ goto error; ++ } + +- PyBuffer_Release(&input); +- if (ok) { +- ret = PyBytes_FromStringAndSize((char*)(output.size() ? &output[0] : NULL), output.size()); +- } else { +- PyErr_SetString(BrotliError, "BrotliDecompress failed"); ++ ret = BlocksOutputBuffer_Finish(&buffer, available_out); ++ if (ret != NULL) { ++ goto finally; + } + ++error: ++ BlocksOutputBuffer_OnError(&buffer); ++ PyErr_SetString(BrotliError, "BrotliDecompress failed"); ++ ret = NULL; ++ ++finally: ++ BrotliDecoderDestroyInstance(state); ++ PyBuffer_Release(&input); + return ret; + } + +diff --git a/setup.py b/setup.py +index 3afb0351d..5d934831c 100644 +--- a/setup.py ++++ b/setup.py +@@ -71,40 +71,33 @@ def build_extension(self, ext): + log.info("building '%s' extension", ext.name) + + c_sources = [] +- cxx_sources = [] + for source in ext.sources: + if source.endswith('.c'): + c_sources.append(source) +- else: +- cxx_sources.append(source) + extra_args = ext.extra_compile_args or [] + + objects = [] +- for lang, sources in (('c', c_sources), ('c++', cxx_sources)): +- if lang == 'c++': +- if self.compiler.compiler_type == 'msvc': +- extra_args.append('/EHsc') +- +- macros = ext.define_macros[:] +- if platform.system() == 'Darwin': +- macros.append(('OS_MACOSX', '1')) +- elif self.compiler.compiler_type == 'mingw32': +- # On Windows Python 2.7, pyconfig.h defines "hypot" as "_hypot", +- # This clashes with GCC's cmath, and causes compilation errors when +- # building under MinGW: http://bugs.python.org/issue11566 +- macros.append(('_hypot', 'hypot')) +- for undef in ext.undef_macros: +- macros.append((undef,)) +- +- objs = self.compiler.compile( +- sources, +- output_dir=self.build_temp, +- macros=macros, +- include_dirs=ext.include_dirs, +- debug=self.debug, +- extra_postargs=extra_args, +- depends=ext.depends) +- objects.extend(objs) ++ ++ macros = ext.define_macros[:] ++ if platform.system() == 'Darwin': ++ macros.append(('OS_MACOSX', '1')) ++ elif self.compiler.compiler_type == 'mingw32': ++ # On Windows Python 2.7, pyconfig.h defines "hypot" as "_hypot", ++ # This clashes with GCC's cmath, and causes compilation errors when ++ # building under MinGW: http://bugs.python.org/issue11566 ++ macros.append(('_hypot', 'hypot')) ++ for undef in ext.undef_macros: ++ macros.append((undef,)) ++ ++ objs = self.compiler.compile( ++ c_sources, ++ output_dir=self.build_temp, ++ macros=macros, ++ include_dirs=ext.include_dirs, ++ debug=self.debug, ++ extra_postargs=extra_args, ++ depends=ext.depends) ++ objects.extend(objs) + + self._built_objects = objects[:] + if ext.extra_objects: +@@ -117,7 +110,7 @@ def build_extension(self, ext): + + ext_path = self.get_ext_fullpath(ext.name) + # Detect target language, if not provided +- language = ext.language or self.compiler.detect_language(sources) ++ language = ext.language or self.compiler.detect_language(c_sources) + + self.compiler.link_shared_object( + objects, +@@ -180,7 +173,7 @@ def build_extension(self, ext): + Extension( + '_brotli', + sources=[ +- 'python/_brotli.cc', ++ 'python/_brotli.c', + 'c/common/constants.c', + 'c/common/context.c', + 'c/common/dictionary.c', +@@ -267,8 +260,7 @@ def build_extension(self, ext): + ], + include_dirs=[ + 'c/include', +- ], +- language='c++'), ++ ]), + ] + + TEST_SUITE = 'setup.get_test_suite' + diff --git a/backport-add-max_length-to-Python-streaming-decompression.patch b/backport-add-max_length-to-Python-streaming-decompression.patch new file mode 100644 index 0000000..d4c6264 --- /dev/null +++ b/backport-add-max_length-to-Python-streaming-decompression.patch @@ -0,0 +1,281 @@ +From eb3a31e2d356d5a633de995afe7fe60e590a26d8 Mon Sep 17 00:00:00 2001 +From: Robert Obryk +Date: Wed, 18 Sep 2024 15:25:06 +0200 +Subject: [PATCH] add max_length to Python streaming decompression + +--- + python/_brotli.c | 192 ++++++++++++++++-------- + 1 files changed, 133 insertions(+), 59 deletions(-) + +diff --git a/python/_brotli.c b/python/_brotli.c +index 75c54c489..f86b04f93 100644 +--- a/python/_brotli.c ++++ b/python/_brotli.c +@@ -606,57 +606,6 @@ static PyTypeObject brotli_CompressorType = { + brotli_Compressor_new, /* tp_new */ + }; + +-static PyObject* decompress_stream(BrotliDecoderState* dec, +- uint8_t* input, size_t input_length) { +- BrotliDecoderResult result; +- +- size_t available_in = input_length; +- const uint8_t* next_in = input; +- +- size_t available_out; +- uint8_t* next_out; +- BlocksOutputBuffer buffer = {.list=NULL}; +- PyObject *ret; +- +- if (BlocksOutputBuffer_InitAndGrow(&buffer, PY_SSIZE_T_MAX, &available_out, &next_out) < 0) { +- goto error; +- } +- +- while (1) { +- Py_BEGIN_ALLOW_THREADS +- result = BrotliDecoderDecompressStream(dec, +- &available_in, &next_in, +- &available_out, &next_out, NULL); +- Py_END_ALLOW_THREADS +- +- if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { +- if (available_out == 0) { +- if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) { +- goto error; +- } +- } +- continue; +- } +- +- break; +- } +- +- if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) { +- goto error; +- } +- +- ret = BlocksOutputBuffer_Finish(&buffer, available_out); +- if (ret != NULL) { +- goto finally; +- } +- +-error: +- BlocksOutputBuffer_OnError(&buffer); +- ret = NULL; +-finally: +- return ret; +-} +- + PyDoc_STRVAR(brotli_Decompressor_doc, + "An object to decompress a byte string.\n" + "\n" +@@ -669,10 +618,14 @@ PyDoc_STRVAR(brotli_Decompressor_doc, + typedef struct { + PyObject_HEAD + BrotliDecoderState* dec; ++ uint8_t* unconsumed_data; ++ size_t unconsumed_data_length; + } brotli_Decompressor; + + static void brotli_Decompressor_dealloc(brotli_Decompressor* self) { + BrotliDecoderDestroyInstance(self->dec); ++ if (self->unconsumed_data) ++ free(self->unconsumed_data); + #if PY_MAJOR_VERSION >= 3 + Py_TYPE(self)->tp_free((PyObject*)self); + #else +@@ -688,6 +641,9 @@ static PyObject* brotli_Decompressor_new(PyTypeObject *type, PyObject *args, PyO + self->dec = BrotliDecoderCreateInstance(0, 0, 0); + } + ++ self->unconsumed_data = NULL; ++ self->unconsumed_data_length = 0; ++ + return (PyObject *)self; + } + +@@ -706,6 +662,79 @@ static int brotli_Decompressor_init(brotli_Decompressor *self, PyObject *args, P + return 0; + } + ++static PyObject* decompress_stream(brotli_Decompressor* self, ++ uint8_t* input, size_t input_length, Py_ssize_t output_buffer_limit) { ++ BrotliDecoderResult result; ++ ++ size_t available_in = input_length; ++ const uint8_t* next_in = input; ++ ++ size_t available_out; ++ uint8_t* next_out; ++ uint8_t* new_tail; ++ BlocksOutputBuffer buffer = {.list=NULL}; ++ PyObject *ret; ++ ++ if (BlocksOutputBuffer_InitAndGrow(&buffer, output_buffer_limit, &available_out, &next_out) < 0) { ++ goto error; ++ } ++ ++ while (1) { ++ Py_BEGIN_ALLOW_THREADS ++ result = BrotliDecoderDecompressStream(self->dec, ++ &available_in, &next_in, ++ &available_out, &next_out, NULL); ++ Py_END_ALLOW_THREADS ++ ++ if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { ++ if (available_out == 0) { ++ if (buffer.allocated == PY_SSIZE_T_MAX) { ++ PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); ++ goto error; ++ } ++ if (buffer.allocated == output_buffer_limit) { ++ // We've reached the output length limit. ++ break; ++ } ++ if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) { ++ goto error; ++ } ++ } ++ continue; ++ } ++ ++ if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) { ++ available_in = 0; ++ goto error; ++ } ++ ++ break; ++ } ++ ++ ret = BlocksOutputBuffer_Finish(&buffer, available_out); ++ if (ret != NULL) { ++ goto finally; ++ } ++ ++error: ++ BlocksOutputBuffer_OnError(&buffer); ++ ret = NULL; ++ ++finally: ++ new_tail = available_in > 0 ? malloc(available_in) : NULL; ++ if (available_in > 0) { ++ memcpy(new_tail, next_in, available_in); ++ } ++ if (self->unconsumed_data) { ++ free(self->unconsumed_data); ++ } ++ self->unconsumed_data = new_tail; ++ self->unconsumed_data_length = available_in; ++ ++ return ret; ++} ++ ++ + PyDoc_STRVAR(brotli_Decompressor_process_doc, + "Process \"string\" for decompression, returning a string that contains \n" + "decompressed output data. This data should be concatenated to the output \n" +@@ -713,28 +742,38 @@ PyDoc_STRVAR(brotli_Decompressor_process_doc, + "Some or all of the input may be kept in internal buffers for later \n" + "processing, and the decompressed output data may be empty until enough input \n" + "has been accumulated.\n" ++"If output_buffer_limit is set, no more than output_buffer_limit bytes will be\n" ++"returned. If the limit is reached, further calls to process (potentially with\n" ++"empty input) will continue to yield more data. If, after returning a string of\n" ++"the length equal to limit, can_accept_more_data() returns False, process()\n" ++"must only be called with empty input until can_accept_more_data() once again\n" ++"returns True.\n" + "\n" + "Signature:\n" +-" decompress(string)\n" ++" decompress(string, output_buffer_limit=int)\n" + "\n" + "Args:\n" + " string (bytes): The input data\n" +-"\n" +-"Returns:\n" ++"\n""Returns:\n" + " The decompressed output data (bytes)\n" + "\n" + "Raises:\n" + " brotli.error: If decompression fails\n"); + +-static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args) { ++static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args, PyObject* keywds) { + PyObject* ret; + Py_buffer input; + int ok; ++ Py_ssize_t output_buffer_limit = PY_SSIZE_T_MAX; ++ uint8_t* data; ++ size_t data_length; ++ ++ static char* kwlist[] = { "", "output_buffer_limit", NULL }; + + #if PY_MAJOR_VERSION >= 3 +- ok = PyArg_ParseTuple(args, "y*:process", &input); ++ ok = PyArg_ParseTupleAndKeywords(args, keywds, "y*|n:process", kwlist, &input, &output_buffer_limit); + #else +- ok = PyArg_ParseTuple(args, "s*:process", &input); ++ ok = PyArg_ParseTupleAndKeywords(args, keywds, "s*|n:process", kwlist, &input, &output_buffer_limit); + #endif + + if (!ok) { +@@ -745,7 +784,20 @@ static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject + goto error; + } + +- ret = decompress_stream(self->dec, (uint8_t*) input.buf, input.len); ++ if (self->unconsumed_data_length > 0) { ++ if (input.len > 0) { ++ PyErr_SetString(BrotliError, "brotli: decoder process called with data when 'can_accept_more_data()' is False"); ++ ret = NULL; ++ goto finally; ++ } ++ data = self->unconsumed_data; ++ data_length = self->unconsumed_data_length; ++ } else { ++ data = (uint8_t*)input.buf; ++ data_length = input.len; ++ } ++ ++ ret = decompress_stream(self, data, data_length, output_buffer_limit); + if (ret != NULL) { + goto finally; + } +@@ -787,13 +839,35 @@ static PyObject* brotli_Decompressor_is_finished(brotli_Decompressor *self) { + } + } + ++PyDoc_STRVAR(brotli_Decompressor_can_accept_more_data_doc, ++"Checks if the decoder instance can accept more compressed data. If the decompress()\n" ++"method on this instance of decompressor was never called with max_length,\n" ++"this method will always return True.\n" ++"\n" ++"Signature:" ++" can_accept_more_data()\n" ++"\n" ++"Returns:\n" ++" True if the decoder is ready to accept more compressed data via decompress()\n" ++" False if the decoder needs to output some data via decompress(b'') before\n" ++" being provided any more compressed data\n"); ++ ++static PyObject* brotli_Decompressor_can_accept_more_data(brotli_Decompressor* self) { ++ if (self->unconsumed_data_length > 0) { ++ Py_RETURN_FALSE; ++ } else { ++ Py_RETURN_TRUE; ++ } ++} ++ + static PyMemberDef brotli_Decompressor_members[] = { + {NULL} /* Sentinel */ + }; + + static PyMethodDef brotli_Decompressor_methods[] = { +- {"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS, brotli_Decompressor_process_doc}, ++ {"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS | METH_KEYWORDS, brotli_Decompressor_process_doc}, + {"is_finished", (PyCFunction)brotli_Decompressor_is_finished, METH_NOARGS, brotli_Decompressor_is_finished_doc}, ++ {"can_accept_more_data", (PyCFunction)brotli_Decompressor_can_accept_more_data, METH_NOARGS, brotli_Decompressor_can_accept_more_data_doc}, + {NULL} /* Sentinel */ + }; + + diff --git a/backport-add-size-limit-to-buffer.patch b/backport-add-size-limit-to-buffer.patch new file mode 100644 index 0000000..ed04015 --- /dev/null +++ b/backport-add-size-limit-to-buffer.patch @@ -0,0 +1,98 @@ +From 28ce91caf605ac5481e9ca69131a28e1087574b7 Mon Sep 17 00:00:00 2001 +From: Robert Obryk +Date: Tue, 17 Sep 2024 16:50:39 +0200 +Subject: [PATCH] add size limit to buffer + +--- + python/_brotli.c | 32 +++++++++++++++++++++++--------- + 1 file changed, 23 insertions(+), 9 deletions(-) + +diff --git a/python/_brotli.c b/python/_brotli.c +index c6a0da03d..75c54c489 100644 +--- a/python/_brotli.c ++++ b/python/_brotli.c +@@ -23,6 +23,7 @@ typedef struct { + PyObject *list; + /* Number of whole allocated size. */ + Py_ssize_t allocated; ++ Py_ssize_t size_limit; + } BlocksOutputBuffer; + + static const char unable_allocate_msg[] = "Unable to allocate output buffer."; +@@ -69,11 +70,17 @@ static const Py_ssize_t BUFFER_BLOCK_SIZE[] = + Return -1 on failure + */ + static inline int +-BlocksOutputBuffer_InitAndGrow(BlocksOutputBuffer *buffer, ++BlocksOutputBuffer_InitAndGrow(BlocksOutputBuffer *buffer, Py_ssize_t size_limit, + size_t *avail_out, uint8_t **next_out) + { + PyObject *b; +- const Py_ssize_t block_size = BUFFER_BLOCK_SIZE[0]; ++ Py_ssize_t block_size = BUFFER_BLOCK_SIZE[0]; ++ ++ assert(size_limit > 0); ++ ++ if (size_limit < block_size) { ++ block_size = size_limit; ++ } + + // Ensure .list was set to NULL, for BlocksOutputBuffer_OnError(). + assert(buffer->list == NULL); +@@ -94,6 +101,7 @@ BlocksOutputBuffer_InitAndGrow(BlocksOutputBuffer *buffer, + + // Set variables + buffer->allocated = block_size; ++ buffer->size_limit = size_limit; + + *avail_out = (size_t) block_size; + *next_out = (uint8_t*) PyBytes_AS_STRING(b); +@@ -122,10 +130,16 @@ BlocksOutputBuffer_Grow(BlocksOutputBuffer *buffer, + block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1]; + } + +- // Check buffer->allocated overflow +- if (block_size > PY_SSIZE_T_MAX - buffer->allocated) { +- PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); +- return -1; ++ if (block_size > buffer->size_limit - buffer->allocated) { ++ block_size = buffer->size_limit - buffer->allocated; ++ } ++ ++ if (block_size == 0) { ++ // We are at the size_limit (either the provided one, in which case we ++ // shouldn't have been called, or the implicit PY_SSIZE_T_MAX one, in ++ // which case we wouldn't be able to concatenate the blocks at the end). ++ PyErr_SetString(PyExc_MemoryError, "too long"); ++ return -1; + } + + // Create the block +@@ -291,7 +305,7 @@ static PyObject* compress_stream(BrotliEncoderState* enc, BrotliEncoderOperation + BlocksOutputBuffer buffer = {.list=NULL}; + PyObject *ret; + +- if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) { ++ if (BlocksOutputBuffer_InitAndGrow(&buffer, PY_SSIZE_T_MAX, &available_out, &next_out) < 0) { + goto error; + } + +@@ -604,7 +618,7 @@ static PyObject* decompress_stream(BrotliDecoderState* dec, + BlocksOutputBuffer buffer = {.list=NULL}; + PyObject *ret; + +- if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) { ++ if (BlocksOutputBuffer_InitAndGrow(&buffer, PY_SSIZE_T_MAX, &available_out, &next_out) < 0) { + goto error; + } + +@@ -877,7 +891,7 @@ static PyObject* brotli_decompress(PyObject *self, PyObject *args, PyObject *key + next_in = (uint8_t*) input.buf; + available_in = input.len; + +- if (BlocksOutputBuffer_InitAndGrow(&buffer, &available_out, &next_out) < 0) { ++ if (BlocksOutputBuffer_InitAndGrow(&buffer, PY_SSIZE_T_MAX, &available_out, &next_out) < 0) { + goto error; + } + + diff --git a/brotli.spec b/brotli.spec index 92df280..67e0b8b 100644 --- a/brotli.spec +++ b/brotli.spec @@ -1,6 +1,6 @@ Name: brotli Version: 1.0.9 -Release: 3 +Release: 4 Summary: Lossless compression algorithm License: MIT @@ -8,6 +8,9 @@ URL: https://github.com/google/brotli Source0: https://github.com/google/brotli/archive/v%{version}.tar.gz Patch6000: backport-Revert-Add-runtime-linker-path-to-pkg-config-files.patch +Patch6001: backport-Python-use-a-new-output-buffer-code.patch +Patch6002: backport-add-size-limit-to-buffer.patch +Patch6003: backport-add-max_length-to-Python-streaming-decompression.patch BuildRequires: python3-devel gcc-c++ gcc cmake @@ -92,6 +95,9 @@ popd %{_mandir}/man3/* %changelog +* Thu Nov 06 2025 wangjiang - 1.0.9-4 +- fix CVE-2025-6176 + * Tue Oct 25 2022 yanglongkang - 1.0.9-3 - rebuild for next release -- Gitee