66// Reader and Writer support the classic LZMA format. Reader2 and 
77// Writer2 support the decoding and encoding of LZMA2 streams. 
88// 
9- // The package is written completely in Go and doesn't  rely on any external 
9+ // The package is written completely in Go and does not  rely on any external 
1010// library. 
1111package  lzma
1212
1313import  (
1414	"errors" 
15+ 	"fmt" 
1516	"io" 
1617)
1718
1819// ReaderConfig stores the parameters for the reader of the classic LZMA 
1920// format. 
2021type  ReaderConfig  struct  {
22+ 	// Since v0.5.14 this parameter sets an upper limit for a .lzma file's  
23+ 	// dictionary size. This helps to mitigate problems with mangled 
24+ 	// headers. 
2125	DictCap  int 
2226}
2327
2428// fill converts the zero values of the configuration to the default values. 
2529func  (c  * ReaderConfig ) fill () {
2630	if  c .DictCap  ==  0  {
27- 		c .DictCap  =  8  *  1024  *  1024 
31+ 		// set an upper limit of 2 GB for dictionary capacity to address 
32+ 		// the zero prefix security issue. 
33+ 		c .DictCap  =  1  <<  31 
34+ 		// original: c.DictCap = 8 * 1024 * 1024 
2835	}
2936}
3037
@@ -39,10 +46,33 @@ func (c *ReaderConfig) Verify() error {
3946}
4047
4148// Reader provides a reader for LZMA files or streams. 
49+ // 
50+ // # Security concerns 
51+ // 
52+ // Note that LZMA format doesn't support a magic marker in the header. So 
53+ // [NewReader] cannot determine whether it reads the actual header. For instance 
54+ // the LZMA stream might have a zero byte in front of the reader, leading to 
55+ // larger dictionary sizes and file sizes. The code will detect later that there 
56+ // are problems with the stream, but the dictionary has already been allocated 
57+ // and this might consume a lot of memory. 
58+ // 
59+ // Version 0.5.14 introduces built-in mitigations: 
60+ // 
61+ //   - The [ReaderConfig] DictCap field is now interpreted as a limit for the 
62+ //     dictionary size. 
63+ //   - The default is 2 Gigabytes (2^31 bytes). 
64+ //   - Users can check with the [Reader.Header] method what the actual values are in 
65+ //     their LZMA files and set a smaller limit using [ReaderConfig]. 
66+ //   - The dictionary size doesn't exceed the larger of the file size and 
67+ //     the minimum dictionary size. This is another measure to prevent huge 
68+ //     memory allocations for the dictionary. 
69+ //   - The code supports stream sizes only up to a pebibyte (1024^5). 
4270type  Reader  struct  {
43- 	lzma  io.Reader 
44- 	h     header 
45- 	d     * decoder 
71+ 	lzma    io.Reader 
72+ 	header  Header 
73+ 	// headerOrig stores the original header read from the stream. 
74+ 	headerOrig  Header 
75+ 	d           * decoder 
4676}
4777
4878// NewReader creates a new reader for an LZMA stream using the classic 
@@ -51,8 +81,37 @@ func NewReader(lzma io.Reader) (r *Reader, err error) {
5181	return  ReaderConfig {}.NewReader (lzma )
5282}
5383
84+ // ErrDictSize reports about an error of the dictionary size. 
85+ type  ErrDictSize  struct  {
86+ 	ConfigDictCap   int 
87+ 	HeaderDictSize  uint32 
88+ 	Message         string 
89+ }
90+ 
91+ // Error returns the error message. 
92+ func  (e  * ErrDictSize ) Error () string  {
93+ 	return  e .Message 
94+ }
95+ 
96+ func  newErrDictSize (messageformat  string ,
97+ 	configDictCap  int , headerDictSize  uint32 ,
98+ 	args  ... interface {}) * ErrDictSize  {
99+ 	newArgs  :=  make ([]interface {}, len (args )+ 2 )
100+ 	newArgs [0 ] =  configDictCap 
101+ 	newArgs [1 ] =  headerDictSize 
102+ 	copy (newArgs [2 :], args )
103+ 	return  & ErrDictSize {
104+ 		ConfigDictCap :  configDictCap ,
105+ 		HeaderDictSize : headerDictSize ,
106+ 		Message :        fmt .Sprintf (messageformat , newArgs ... ),
107+ 	}
108+ }
109+ 
110+ // We support only files not larger than 1 << 50 bytes (a pebibyte, 1024^5). 
111+ const  maxStreamSize  =  1  <<  50 
112+ 
54113// NewReader creates a new reader for an LZMA stream in the classic 
55- // format. The function reads and verifies the the  header of the LZMA 
114+ // format. The function reads and verifies the header of the LZMA 
56115// stream. 
57116func  (c  ReaderConfig ) NewReader (lzma  io.Reader ) (r  * Reader , err  error ) {
58117	if  err  =  c .Verify (); err  !=  nil  {
@@ -66,29 +125,63 @@ func (c ReaderConfig) NewReader(lzma io.Reader) (r *Reader, err error) {
66125		return  nil , err 
67126	}
68127	r  =  & Reader {lzma : lzma }
69- 	if  err  =  r .h .unmarshalBinary (data ); err  !=  nil  {
128+ 	if  err  =  r .header .unmarshalBinary (data ); err  !=  nil  {
70129		return  nil , err 
71130	}
72- 	if  r .h .dictCap  <  MinDictCap  {
73- 		r .h .dictCap  =  MinDictCap 
131+ 	r .headerOrig  =  r .header 
132+ 	dictSize  :=  int64 (r .header .DictSize )
133+ 	if  int64 (c .DictCap ) <  dictSize  {
134+ 		return  nil , newErrDictSize (
135+ 			"lzma: header dictionary size %[2]d exceeds configured dictionary capacity %[1]d" ,
136+ 			c .DictCap , uint32 (dictSize ),
137+ 		)
138+ 	}
139+ 	if  dictSize  <  MinDictCap  {
140+ 		dictSize  =  MinDictCap 
141+ 	}
142+ 	// original code: disabled this because there is no point in increasing 
143+ 	// the dictionary above what is stated in the file. 
144+ 	/* 
145+ 		if int64(c.DictCap) > int64(dictSize) { 
146+ 			dictSize = int64(c.DictCap) 
147+ 		} 
148+ 	*/ 
149+ 	size  :=  r .header .Size 
150+ 	if  size  >=  0  &&  size  <  dictSize  {
151+ 		dictSize  =  size 
74152	}
75- 	dictCap  :=  r .h .dictCap 
76- 	if  c .DictCap  >  dictCap  {
77- 		dictCap  =  c .DictCap 
153+ 	// Protect against modified or malicious headers. 
154+ 	if  size  >  maxStreamSize  {
155+ 		return  nil , fmt .Errorf (
156+ 			"lzma: stream size %d exceeds a pebibyte (1024^5)" ,
157+ 			size )
78158	}
159+ 	if  dictSize  <  MinDictCap  {
160+ 		dictSize  =  MinDictCap 
161+ 	}
162+ 
163+ 	r .header .DictSize  =  uint32 (dictSize )
79164
80- 	state  :=  newState (r .h . properties )
81- 	dict , err  :=  newDecoderDict (dictCap )
165+ 	state  :=  newState (r .header . Properties )
166+ 	dict , err  :=  newDecoderDict (int ( dictSize ) )
82167	if  err  !=  nil  {
83168		return  nil , err 
84169	}
85- 	r .d , err  =  newDecoder (ByteReader (lzma ), state , dict , r .h . size )
170+ 	r .d , err  =  newDecoder (ByteReader (lzma ), state , dict , r .header . Size )
86171	if  err  !=  nil  {
87172		return  nil , err 
88173	}
89174	return  r , nil 
90175}
91176
177+ // Header returns the header as read from the LZMA stream. It is intended to 
178+ // allow the user to understand what parameters are typically provided in the 
179+ // headers of the LZMA files and set the DictCap field in [ReaderConfig] 
180+ // accordingly. 
181+ func  (r  * Reader ) Header () (h  Header , ok  bool ) {
182+ 	return  r .headerOrig , r .d  !=  nil 
183+ }
184+ 
92185// EOSMarker indicates that an EOS marker has been encountered. 
93186func  (r  * Reader ) EOSMarker () bool  {
94187	return  r .d .eosMarker 
0 commit comments