Address Security Issue GHSA-jc7w-c686-c4v9

ulikunitz · ulikunitz · commit 88ddf1d0d98d · 2025-08-21T17:57:47.000+02:00
This commit addresses security issue GHSA-jc7w-c686-c4v9. The mitigating measures are described for the Reader type and I added a TestZeroPrefixIssue function to test the mitigations. // # Security concerns // // Note that LZMA format doesn't support a magic marker in the header. So // [NewReader] cannot determine whether it reads the actual header. For instance // the LZMA stream might have a zero byte in front of the reader, leading to // larger dictionary sizes and file sizes. The code will detect later that there // are problems with the stream, but the dictionary has already been allocated // and this might consume a lot of memory. // // Version 0.5.14 introduces built-in mitigations: // // - The [ReaderConfig] DictCap field is now interpreted as a limit for the // dictionary size. // - The default is 2 Gigabytes (2^31 bytes). // - Users can check with the [Reader.Header] method what the actual values are in // their LZMA files and set a smaller limit using [ReaderConfig]. // - The dictionary size doesn't exceed the larger of the file size and // the minimum dictionary size. This is another measure to prevent huge // memory allocations for the dictionary. // - The code supports stream sizes only up to a pebibyte (1024^5).
diff --git a/TODO.md b/TODO.md
@@ -1,8 +1,13 @@
 # TODO list
 
-## Release v0.5.x
-
-1. Support check flag in gxz command.
+## Release v0.5.14
+
+* If the DictionarySize is larger than the UncompressedSize set it to
+  UncompressedSize
+* make a Header() (h Header, ok bool) function so the user can implement its own
+  policy
+* Add documentation to Reader to explain the situation
+* Add a TODO for the rewrite version
 
 ## Release v0.6
 
diff --git a/lzma/header.go b/lzma/header.go
@@ -60,36 +60,36 @@ const noHeaderSize uint64 = 1<<64 - 1
 // HeaderLen provides the length of the LZMA file header.
 const HeaderLen = 13
 
-// header represents the header of an LZMA file.
-type header struct {
-	properties Properties
-	dictCap    int
-	// uncompressed size; negative value if no size is given
-	size int64
+// Header represents the Header of an LZMA file.
+type Header struct {
+	Properties Properties
+	DictSize   uint32
+	// uncompressed Size; negative value if no Size is given
+	Size int64
 }
 
 // marshalBinary marshals the header.
-func (h *header) marshalBinary() (data []byte, err error) {
-	if err = h.properties.verify(); err != nil {
+func (h *Header) marshalBinary() (data []byte, err error) {
+	if err = h.Properties.verify(); err != nil {
 		return nil, err
 	}
-	if !(0 <= h.dictCap && int64(h.dictCap) <= MaxDictCap) {
+	if !(h.DictSize <= MaxDictCap) {
 		return nil, fmt.Errorf("lzma: DictCap %d out of range",
-			h.dictCap)
+			h.DictSize)
 	}
 
 	data = make([]byte, 13)
 
 	// property byte
-	data[0] = h.properties.Code()
+	data[0] = h.Properties.Code()
 
 	// dictionary capacity
-	putUint32LE(data[1:5], uint32(h.dictCap))
+	putUint32LE(data[1:5], uint32(h.DictSize))
 
 	// uncompressed size
 	var s uint64
-	if h.size > 0 {
-		s = uint64(h.size)
+	if h.Size > 0 {
+		s = uint64(h.Size)
 	} else {
 		s = noHeaderSize
 	}
@@ -99,20 +99,20 @@ func (h *header) marshalBinary() (data []byte, err error) {
 }
 
 // unmarshalBinary unmarshals the header.
-func (h *header) unmarshalBinary(data []byte) error {
+func (h *Header) unmarshalBinary(data []byte) error {
 	if len(data) != HeaderLen {
 		return errors.New("lzma.unmarshalBinary: data has wrong length")
 	}
 
 	// properties
 	var err error
-	if h.properties, err = PropertiesForCode(data[0]); err != nil {
+	if h.Properties, err = PropertiesForCode(data[0]); err != nil {
 		return err
 	}
 
 	// dictionary capacity
-	h.dictCap = int(uint32LE(data[1:]))
-	if h.dictCap < 0 {
+	h.DictSize = uint32LE(data[1:])
+	if int(h.DictSize) < 0 {
 		return errors.New(
 			"LZMA header: dictionary capacity exceeds maximum " +
 				"integer")
@@ -121,10 +121,10 @@ func (h *header) unmarshalBinary(data []byte) error {
 	// uncompressed size
 	s := uint64LE(data[5:])
 	if s == noHeaderSize {
-		h.size = -1
+		h.Size = -1
 	} else {
-		h.size = int64(s)
-		if h.size < 0 {
+		h.Size = int64(s)
+		if h.Size < 0 {
 			return errors.New(
 				"LZMA header: uncompressed size " +
 					"out of int64 range")
@@ -134,9 +134,9 @@ func (h *header) unmarshalBinary(data []byte) error {
 	return nil
 }
 
-// validDictCap checks whether the dictionary capacity is correct. This
+// validDictSize checks whether the dictionary capacity is correct. This
 // is used to weed out wrong file headers.
-func validDictCap(dictcap int) bool {
+func validDictSize(dictcap int) bool {
 	if int64(dictcap) == MaxDictCap {
 		return true
 	}
@@ -155,13 +155,16 @@ func validDictCap(dictcap int) bool {
 // dictionary sizes of 2^n or 2^n+2^(n-1) with n >= 10 or 2^32-1. If
 // there is an explicit size it must not exceed 256 GiB. The length of
 // the data argument must be HeaderLen.
+//
+// This function should be disregarded because there is no guarantee that LZMA
+// files follow the constraints.
 func ValidHeader(data []byte) bool {
-	var h header
+	var h Header
 	if err := h.unmarshalBinary(data); err != nil {
 		return false
 	}
-	if !validDictCap(h.dictCap) {
+	if !validDictSize(int(h.DictSize)) {
 		return false
 	}
-	return h.size < 0 || h.size <= 1<<38
+	return h.Size < 0 || h.Size <= 1<<38
 }
diff --git a/lzma/header2_test.go b/lzma/header2_test.go
@@ -78,6 +78,7 @@ func TestHeaderLen(t *testing.T) {
 }
 
 func chunkHeaderSamples(t *testing.T) []chunkHeader {
+	_ = t
 	props := Properties{LC: 3, LP: 0, PB: 2}
 	headers := make([]chunkHeader, 0, 12)
 	for c := cEOS; c <= cLRND; c++ {
diff --git a/lzma/header_test.go b/lzma/header_test.go
@@ -7,18 +7,18 @@ package lzma
 import "testing"
 
 func TestHeaderMarshalling(t *testing.T) {
-	tests := []header{
-		{properties: Properties{3, 0, 2}, dictCap: 8 * 1024 * 1024,
-			size: -1},
-		{properties: Properties{4, 3, 3}, dictCap: 4096,
-			size: 10},
+	tests := []Header{
+		{Properties: Properties{3, 0, 2}, DictSize: 8 * 1024 * 1024,
+			Size: -1},
+		{Properties: Properties{4, 3, 3}, DictSize: 4096,
+			Size: 10},
 	}
 	for _, h := range tests {
 		data, err := h.marshalBinary()
 		if err != nil {
 			t.Fatalf("marshalBinary error %s", err)
 		}
-		var g header
+		var g Header
 		if err = g.unmarshalBinary(data); err != nil {
 			t.Fatalf("unmarshalBinary error %s", err)
 		}
@@ -29,11 +29,11 @@ func TestHeaderMarshalling(t *testing.T) {
 }
 
 func TestValidHeader(t *testing.T) {
-	tests := []header{
-		{properties: Properties{3, 0, 2}, dictCap: 8 * 1024 * 1024,
-			size: -1},
-		{properties: Properties{4, 3, 3}, dictCap: 4096,
-			size: 10},
+	tests := []Header{
+		{Properties: Properties{3, 0, 2}, DictSize: 8 * 1024 * 1024,
+			Size: -1},
+		{Properties: Properties{4, 3, 3}, DictSize: 4096,
+			Size: 10},
 	}
 	for _, h := range tests {
 		data, err := h.marshalBinary()
diff --git a/lzma/reader.go b/lzma/reader.go
@@ -6,25 +6,32 @@
 // Reader and Writer support the classic LZMA format. Reader2 and
 // Writer2 support the decoding and encoding of LZMA2 streams.
 //
-// The package is written completely in Go and doesn't rely on any external
+// The package is written completely in Go and does not rely on any external
 // library.
 package lzma
 
 import (
 	"errors"
+	"fmt"
 	"io"
 )
 
 // ReaderConfig stores the parameters for the reader of the classic LZMA
 // format.
 type ReaderConfig struct {
+	// Since v0.5.14 this parameter sets an upper limit for a .lzma file's 
+	// dictionary size. This helps to mitigate problems with mangled
+	// headers.
 	DictCap int
 }
 
 // fill converts the zero values of the configuration to the default values.
 func (c *ReaderConfig) fill() {
 	if c.DictCap == 0 {
-		c.DictCap = 8 * 1024 * 1024
+		// set an upper limit of 2 GB for dictionary capacity to address
+		// the zero prefix security issue.
+		c.DictCap = 1 << 31
+		// original: c.DictCap = 8 * 1024 * 1024
 	}
 }
 
@@ -39,10 +46,33 @@ func (c *ReaderConfig) Verify() error {
 }
 
 // Reader provides a reader for LZMA files or streams.
+//
+// # Security concerns
+//
+// Note that LZMA format doesn't support a magic marker in the header. So
+// [NewReader] cannot determine whether it reads the actual header. For instance
+// the LZMA stream might have a zero byte in front of the reader, leading to
+// larger dictionary sizes and file sizes. The code will detect later that there
+// are problems with the stream, but the dictionary has already been allocated
+// and this might consume a lot of memory.
+//
+// Version 0.5.14 introduces built-in mitigations:
+//
+//   - The [ReaderConfig] DictCap field is now interpreted as a limit for the
+//     dictionary size.
+//   - The default is 2 Gigabytes (2^31 bytes).
+//   - Users can check with the [Reader.Header] method what the actual values are in
+//     their LZMA files and set a smaller limit using [ReaderConfig].
+//   - The dictionary size doesn't exceed the larger of the file size and
+//     the minimum dictionary size. This is another measure to prevent huge
+//     memory allocations for the dictionary.
+//   - The code supports stream sizes only up to a pebibyte (1024^5).
 type Reader struct {
-	lzma io.Reader
-	h    header
-	d    *decoder
+	lzma   io.Reader
+	header Header
+	// headerOrig stores the original header read from the stream.
+	headerOrig Header
+	d          *decoder
 }
 
 // NewReader creates a new reader for an LZMA stream using the classic
@@ -51,8 +81,37 @@ func NewReader(lzma io.Reader) (r *Reader, err error) {
 	return ReaderConfig{}.NewReader(lzma)
 }
 
+// ErrDictSize reports about an error of the dictionary size.
+type ErrDictSize struct {
+	ConfigDictCap  int
+	HeaderDictSize uint32
+	Message        string
+}
+
+// Error returns the error message.
+func (e *ErrDictSize) Error() string {
+	return e.Message
+}
+
+func newErrDictSize(messageformat string,
+	configDictCap int, headerDictSize uint32,
+	args ...interface{}) *ErrDictSize {
+	newArgs := make([]interface{}, len(args)+2)
+	newArgs[0] = configDictCap
+	newArgs[1] = headerDictSize
+	copy(newArgs[2:], args)
+	return &ErrDictSize{
+		ConfigDictCap:  configDictCap,
+		HeaderDictSize: headerDictSize,
+		Message:        fmt.Sprintf(messageformat, newArgs...),
+	}
+}
+
+// We support only files not larger than 1 << 50 bytes (a pebibyte, 1024^5).
+const maxStreamSize = 1 << 50
+
 // NewReader creates a new reader for an LZMA stream in the classic
-// format. The function reads and verifies the the header of the LZMA
+// format. The function reads and verifies the header of the LZMA
 // stream.
 func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
 	if err = c.Verify(); err != nil {
@@ -66,29 +125,63 @@ func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
 		return nil, err
 	}
 	r = &Reader{lzma: lzma}
-	if err = r.h.unmarshalBinary(data); err != nil {
+	if err = r.header.unmarshalBinary(data); err != nil {
 		return nil, err
 	}
-	if r.h.dictCap < MinDictCap {
-		r.h.dictCap = MinDictCap
+	r.headerOrig = r.header
+	dictSize := int64(r.header.DictSize)
+	if int64(c.DictCap) < dictSize {
+		return nil, newErrDictSize(
+			"lzma: header dictionary size %[2]d exceeds configured dictionary capacity %[1]d",
+			c.DictCap, uint32(dictSize),
+		)
+	}
+	if dictSize < MinDictCap {
+		dictSize = MinDictCap
+	}
+	// original code: disabled this because there is no point in increasing
+	// the dictionary above what is stated in the file.
+	/*
+		if int64(c.DictCap) > int64(dictSize) {
+			dictSize = int64(c.DictCap)
+		}
+	*/
+	size := r.header.Size
+	if size >= 0 && size < dictSize {
+		dictSize = size
 	}
-	dictCap := r.h.dictCap
-	if c.DictCap > dictCap {
-		dictCap = c.DictCap
+	// Protect against modified or malicious headers.
+	if size > maxStreamSize {
+		return nil, fmt.Errorf(
+			"lzma: stream size %d exceeds a pebibyte (1024^5)",
+			size)
 	}
+	if dictSize < MinDictCap {
+		dictSize = MinDictCap
+	}
+
+	r.header.DictSize = uint32(dictSize)
 
-	state := newState(r.h.properties)
-	dict, err := newDecoderDict(dictCap)
+	state := newState(r.header.Properties)
+	dict, err := newDecoderDict(int(dictSize))
 	if err != nil {
 		return nil, err
 	}
-	r.d, err = newDecoder(ByteReader(lzma), state, dict, r.h.size)
+	r.d, err = newDecoder(ByteReader(lzma), state, dict, r.header.Size)
 	if err != nil {
 		return nil, err
 	}
 	return r, nil
 }
 
+// Header returns the header as read from the LZMA stream. It is intended to
+// allow the user to understand what parameters are typically provided in the
+// headers of the LZMA files and set the DictCap field in [ReaderConfig]
+// accordingly.
+func (r *Reader) Header() (h Header, ok bool) {
+	return r.headerOrig, r.d != nil
+}
+
 // EOSMarker indicates that an EOS marker has been encountered.
 func (r *Reader) EOSMarker() bool {
 	return r.d.eosMarker
diff --git a/lzma/reader_test.go b/lzma/reader_test.go
diff --git a/lzma/writer.go b/lzma/writer.go

Original file line number	Diff line number	Diff line change
`@@ -78,6 +78,7 @@ func TestHeaderLen(t *testing.T) {`
`78`	`78`	`}`
`79`	`79`
`80`	`80`	`func chunkHeaderSamples(t *testing.T) []chunkHeader {`
	`81`	`+ _ = t`
`81`	`82`	`props := Properties{LC: 3, LP: 0, PB: 2}`
`82`	`83`	`headers := make([]chunkHeader, 0, 12)`
`83`	`84`	`for c := cEOS; c <= cLRND; c++ {`