From a2a04638b9f8bdaef1872582d0b9255f66759965 Mon Sep 17 00:00:00 2001 From: Micah Snyder Date: Tue, 7 Feb 2023 18:48:46 -0800 Subject: [PATCH 1/3] Update libmspack 0.10.1alpha to 0.11alpha --- libclammspack/ChangeLog | 145 ++++++++++++ libclammspack/Makefile.am | 11 +- libclammspack/README | 5 +- libclammspack/cleanup.sh | 8 +- libclammspack/configure.ac | 6 +- libclammspack/doc/szdd_kwaj_format.html | 2 +- libclammspack/examples/cabrip.c | 2 +- libclammspack/libmspack.cygport | 2 +- libclammspack/mspack/cabc.c | 4 +- libclammspack/mspack/cabd.c | 66 +++--- libclammspack/mspack/chm.h | 7 +- libclammspack/mspack/chmc.c | 5 +- libclammspack/mspack/chmd.c | 210 ++++++++++++------ libclammspack/mspack/hlp.h | 2 +- libclammspack/mspack/hlpc.c | 4 +- libclammspack/mspack/hlpd.c | 4 +- libclammspack/mspack/kwaj.h | 2 +- libclammspack/mspack/kwajc.c | 4 +- libclammspack/mspack/kwajd.c | 66 +++--- libclammspack/mspack/lit.h | 6 +- libclammspack/mspack/litc.c | 4 +- libclammspack/mspack/litd.c | 4 +- libclammspack/mspack/lzssd.c | 4 +- libclammspack/mspack/lzx.h | 1 - libclammspack/mspack/lzxc.c | 4 +- libclammspack/mspack/lzxd.c | 196 +++------------- libclammspack/mspack/macros.h | 64 ++++++ libclammspack/mspack/mspack.def | 8 +- libclammspack/mspack/mszipc.c | 4 +- libclammspack/mspack/mszipd.c | 39 ++-- libclammspack/mspack/oab.h | 2 +- libclammspack/mspack/oabc.c | 4 +- libclammspack/mspack/oabd.c | 8 +- libclammspack/mspack/qtmc.c | 4 +- libclammspack/mspack/qtmd.c | 20 +- libclammspack/mspack/readbits.h | 19 +- libclammspack/mspack/readhuff.h | 47 ++-- libclammspack/mspack/system.c | 7 +- libclammspack/mspack/system.h | 89 ++------ libclammspack/mspack/szdd.h | 2 +- libclammspack/mspack/szddc.c | 4 +- libclammspack/mspack/szddd.c | 6 +- libclammspack/test/cabd_compare | 36 +-- libclammspack/test/cabd_test.c | 57 +++-- libclammspack/test/chmd_compare | 31 +-- libclammspack/test/chmd_find.c | 3 +- libclammspack/test/chmd_test.c | 51 ++++- libclammspack/test/chminfo.c | 42 ++-- libclammspack/test/kwajd_test.c | 3 +- libclammspack/test/md5_fh.h | 9 +- libclammspack/test/msexpand_md5 | 44 ++++ .../cabd/cve-2017-11423-fname-overread.cab | Bin 0 -> 332 bytes .../cve-2018-18584-qtm-max-size-block.cab | Bin 0 -> 38990 bytes .../chmd/cve-2015-4468-namelen-bounds.chm | Bin 3245 -> 3245 bytes .../chmd/cve-2015-4469-namelen-bounds.chm | Bin 5634 -> 5634 bytes .../chmd/cve-2018-14679-off-by-one.chm | Bin 0 -> 3504 bytes .../chmd/cve-2018-14680-blank-filenames.chm | Bin 0 -> 4300 bytes .../chmd/cve-2018-14682-unicode-u100.chm | Bin 0 -> 4292 bytes .../chmd/cve-2018-18585-blank-filenames.chm | Bin 0 -> 4300 bytes .../chmd/cve-2019-1010305-name-overread.chm | Bin 0 -> 4292 bytes .../test_files/chmd/encints-32bit-both.chm | Bin 0 -> 4292 bytes .../test_files/chmd/encints-32bit-lengths.chm | Bin 0 -> 4292 bytes .../test_files/chmd/encints-32bit-offsets.chm | Bin 0 -> 4292 bytes .../test_files/chmd/encints-64bit-both.chm | Bin 0 -> 4292 bytes .../test_files/chmd/encints-64bit-lengths.chm | Bin 0 -> 4292 bytes .../test_files/chmd/encints-64bit-offsets.chm | Bin 0 -> 4292 bytes .../test/test_files/chmd/generate.pl | 205 +++++++++++++++++ .../test/test_files/kwajd/cve-2018-14681.kwj | Bin 0 -> 35 bytes 68 files changed, 1022 insertions(+), 560 deletions(-) create mode 100644 libclammspack/mspack/macros.h create mode 100755 libclammspack/test/msexpand_md5 create mode 100644 libclammspack/test/test_files/cabd/cve-2017-11423-fname-overread.cab create mode 100644 libclammspack/test/test_files/cabd/cve-2018-18584-qtm-max-size-block.cab create mode 100644 libclammspack/test/test_files/chmd/cve-2018-14679-off-by-one.chm create mode 100644 libclammspack/test/test_files/chmd/cve-2018-14680-blank-filenames.chm create mode 100644 libclammspack/test/test_files/chmd/cve-2018-14682-unicode-u100.chm create mode 100644 libclammspack/test/test_files/chmd/cve-2018-18585-blank-filenames.chm create mode 100644 libclammspack/test/test_files/chmd/cve-2019-1010305-name-overread.chm create mode 100644 libclammspack/test/test_files/chmd/encints-32bit-both.chm create mode 100644 libclammspack/test/test_files/chmd/encints-32bit-lengths.chm create mode 100644 libclammspack/test/test_files/chmd/encints-32bit-offsets.chm create mode 100644 libclammspack/test/test_files/chmd/encints-64bit-both.chm create mode 100644 libclammspack/test/test_files/chmd/encints-64bit-lengths.chm create mode 100644 libclammspack/test/test_files/chmd/encints-64bit-offsets.chm create mode 100755 libclammspack/test/test_files/chmd/generate.pl create mode 100644 libclammspack/test/test_files/kwajd/cve-2018-14681.kwj diff --git a/libclammspack/ChangeLog b/libclammspack/ChangeLog index 4068c161b5..136b7e925b 100644 --- a/libclammspack/ChangeLog +++ b/libclammspack/ChangeLog @@ -1,3 +1,148 @@ +2023-02-03 Stuart Caie + + * configure.ac: do AC_CHECK_SIZEOF([off_t]) test only after + AC_SYS_LARGEFILE, because the latter can alter the size of off_t. + + * cabd_extract(): file->offset and file->length are unsigned ints, + both of them and their sum are checked to be <= CAB_LENGTHMAX. But + recent code stuffs file->length into an off_t and checks that instead. + On 32-bit architectures, if file->length > 2GiB then the off_t is + negative, evading the check. Ultimately this causes the decompression + functions to return MSPACK_ERR_ARGS as they already guard against + being asked to decompress a negative number of bytes. + +2023-02-01 Stuart Caie + + * readbits.h, readhuff.h, cabd.c, kwajd.c, lzxd.c, mszipd.c, qtmd.c: + ensure bit operations (including intermediary ones) are considered + as unsigned int, so UBSan is happy. + +2023-01-31 Stuart Caie + + * chmd.c: replace READ_ENCINT() macro with stricter read_encint() + function that reads no more than 63 or 31 bits so ENCINTs can never + be negative. + + I'd prefer to use unsigned types, but off_t is used for file offsets + and lengths to match the environment's file I/O, so changing it is + tricky and would change the current public API. + + Additionally, UBSan complains about shifting a 1 into a signed + type's MSB. https://www.cs.utah.edu/~regehr/papers/tosem15.pdf + notes that this is legal in ANSI C and "fairly benign (and well- + defined until C99)", but C99 made it undefined for no good reason. + I don't agree with this, but I don't want someone else using a C99 + compiler to end up miscompiling the code. + + * chmd_read_headers(): the CHM's internally declared file length is + compared against its actual file length and a warning is printed if + they don't match. + + * chmd_extract(): files in the uncompressed section will print a + warning if their declared length goes beyond the declared end of the + CHM file. This may not match the actual CHM file length. You will + still get seek or read errors if a file's offset or length go beyond + the actual CHM file length. + + Files in the compressed section will now cause a decrunch error if + their declared offset goes beyond the uncompressed length of the + section. If their offset is OK but their declared length goes beyond + the end, they will print a warning and then decompress as much as + possible before causing an error. + +2023-01-02 Stuart Caie + + * kwajd_extract(): KWAJ compression method #2 is the QBasic variant + of the SZDD compression algorithm. Thanks to Jason Summers for finding + this and providing examples. + +2021-07-20 Stuart Caie + + * lzxd_decompress(): simplified the code that decodes match_offset. + Thanks to Jasper St. Pierre for prompting me to look at it. + +2020-12-30 Stuart Caie + + * cabd_read_string(): libmspack no longer rejects CAB files with + empty previnfo/nextinfo strings. Thanks to Simon Tatham for the + patch, and for noting that WiX v4 currently generates such files. + +2020-08-10 Stuart Caie + + * lzxd_decompress(): merged the code for decoding aligned and + verbatim blocks, also verified there is no significant performance + penalty. + +2020-08-07 Stuart Caie + + * read_sys_file(): in a CHM file, the ControlData and ResetTable + files are loaded entirely into memory, regardless of file size. + This is not in the spirit of letting users control memory usage. + + ControlData previously had to be at least 28 bytes (in case a new, + larger version of the file ever appeared), but is now rejected + if not exactly 28 bytes. + + ResetTable can theoretically be huge; the longest LZX stream of + 16 exabytes could have a 4 petabyte ResetTable. Practically, the + largest seen in the wild is 46 kilobytes (PHP manuals). I picked + an arbitrary upper limit of 1MB; please get in contact if you + know of any CHM files in the wild that are largest than this. + + Thanks to seviezhou on Github for reporting this. + +2020-04-13 Stuart Caie + + * system.h: clear up libmspack's large file support. + + To support large files, do this: + + 1. add any defines that your compiler needs to enable large file + support. It may be supported by default. + 2. Define HAVE_FSEEKO if fseeko() and ftello() are available. + 3. Define SIZEOF_OFF_T to the value of sizeof(off_t); it must be a + literal value because sizeof() can't be used in preprocessor tests. + + libmspack uses the off_t datatype for all file offsets. If off_t is + less than 64 bits, libmspack will return an error when processing + CHM files with offsets beyond 2GB, and won't search for CAB headers + beyond 2GB into a file. In both cases, it prints a warning message + that the library doesn't support large files. + +2020-04-13 Stuart Caie + + * macros.h: new header for the D(), LD/LU and EndGet???() macros. + Use this instead of system.h. + + * system.h: if MSPACK_NO_DEFAULT_SYSTEM is defined, define + inline versions of the only standard C functions used in + mspack (strlen, memcmp, memset), so that no standard C library + functions are needed at all. + +2020-01-08 Stuart Caie + + * lzxd_decompress(): do not apply the E8 transformation on the + 32769th LZX frame! Thanks to Cezary Sliwa for discovering this + bug and providing an example cab file (which is + http://download.windowsupdate.com/d/msdownload/update/driver/ + drvs/2019/11/016c7f3e-809d-4720-893b- + e0d74f10c39d_35e12507628e8dc8ae5fb3332835f4253d2dab23.cab) + + * cabd_compare: use EXPAND.EXE instead of EXTRACT.EXE when + testing files in a directory called 'expand'. The example + cab file above is extracted wrongly by EXTRACT.EXE, but + correctly by EXPAND.EXE because they take different approaches + to E8 transformations: + + - EXTRACT.EXE writes "E8E8E8E8E8E8' to the last 6 bytes of + frame, looks for E8 bytes up to the last 6 bytes, then restores + the last 6 bytes, leaving partial transforms of 1-3 bytes if + E8 byte is found near the end of the frame + + - EXPAND.EXE looks for E8 bytes up to the last 10 bytes of a + frame, therefore the last 6 bytes are never altered and all + transforms are 4 bytes + 2019-02-18 Stuart Caie * chmd_read_headers(): a CHM file name beginning "::" but shorter diff --git a/libclammspack/Makefile.am b/libclammspack/Makefile.am index ec88b28ae5..440d0aa7a4 100644 --- a/libclammspack/Makefile.am +++ b/libclammspack/Makefile.am @@ -5,7 +5,10 @@ TESTS = $(check_PROGRAMS) ACLOCAL_AMFLAGS = -I m4 AM_CFLAGS = -# add "-DMSPACK_NO_DEFAULT_SYSTEM" to remove default mspack_system +# add "-DMSPACK_NO_DEFAULT_SYSTEM" to remove default mspack_system. +# however, note that many of the tests and examples provided DO rely on the +# default mspack_system and will fail without it -- any program with a call +# like "mspack_create_...(NULL)" expects a default mspack_system. if GCC AM_CFLAGS += -Wall -Wextra -Wno-unused-parameter -Wno-unused-result endif @@ -34,7 +37,7 @@ libmspack_la_SOURCES = mspack/mspack.h \ mspack/lzx.h mspack/lzxc.c mspack/lzxd.c \ mspack/mszip.h mspack/mszipc.c mspack/mszipd.c \ mspack/qtm.h mspack/qtmd.c \ - mspack/readbits.h mspack/readhuff.h \ + mspack/macros.h mspack/readbits.h mspack/readhuff.h \ mspack/lzss.h mspack/lzssd.c \ mspack/des.h mspack/sha.h \ mspack/crc32.c mspack/crc32.h @@ -46,14 +49,14 @@ libmscabd_la_SOURCES = mspack/mspack.h \ mspack/lzx.h mspack/lzxd.c \ mspack/mszip.h mspack/mszipd.c \ mspack/qtm.h mspack/qtmd.c \ - mspack/readbits.h mspack/readhuff.h + mspack/macros.h mspack/readbits.h mspack/readhuff.h libmscabd_la_LDFLAGS = -export-symbols-regex '^mspack_' libmschmd_la_SOURCES = mspack/mspack.h \ mspack/system.h mspack/system.c \ mspack/chm.h mspack/chmd.c \ mspack/lzx.h mspack/lzxd.c \ - mspack/readbits.h mspack/readhuff.h + mspack/macros.h mspack/readbits.h mspack/readhuff.h libmschmd_la_LDFLAGS = -export-symbols-regex '^mspack_' examples_cabd_memory_SOURCES = examples/cabd_memory.c libmscabd.la diff --git a/libclammspack/README b/libclammspack/README index a3eaf53528..b2c6c4f8c7 100644 --- a/libclammspack/README +++ b/libclammspack/README @@ -1,4 +1,4 @@ -libmspack 0.10.1alpha +libmspack 0.11alpha The purpose of libmspack is to provide compressors and decompressors, archivers and dearchivers for Microsoft compression formats: CAB, CHM, WIM, @@ -90,7 +90,7 @@ examples/msexpand.c - expands an SZDD or KWAJ file examples/oabextract.c - extracts an Exchange Offline Address Book (.LZX) file test/cabd_c10 - tests the CAB decompressor on the C10 collection -test/cabd_compare - compares libmspack with Microsoft's EXTRACT.EXE +test/cabd_compare - compares libmspack with Microsoft's EXTRACT/EXPAND.EXE test/cabd_md5 - shows MD5 checksums of all files in a CAB file/set test/chmd_compare - compares libmspack with Microsoft's HH.EXE test/chmd_find.c - checks all files in a CHM file can be fast-found @@ -98,6 +98,7 @@ test/chmd_md5.c - shows MD5 checksums of all files within a CHM file test/chmd_order.c - extracts files in a CHM file in four different ways test/chminfo.c - prints verbose information about CHM file structures test/msdecompile_md5 - runs Microsoft's HH.EXE -DECOMPILE via WINE +test/msexpand_md5 - runs Microsoft's EXPAND.EXE via WINE test/msextract_md5 - runs Microsoft's EXTRACT.EXE via WINE Here is a simple example of usage, which will create a CAB decompressor, diff --git a/libclammspack/cleanup.sh b/libclammspack/cleanup.sh index 6bd616b518..a2e8ec3ff1 100755 --- a/libclammspack/cleanup.sh +++ b/libclammspack/cleanup.sh @@ -1,9 +1,3 @@ #!/bin/sh # deletes all auto-generated / compiled files - -for dir in . doc examples mspack test; do - while read path; do - chmod -R a+rwx $dir/$path 2>/dev/null - rm -vrf $dir/$path - done < $dir/.gitignore -done +git clean -dfX diff --git a/libclammspack/configure.ac b/libclammspack/configure.ac index f3da1803ae..ce60f60a52 100644 --- a/libclammspack/configure.ac +++ b/libclammspack/configure.ac @@ -1,12 +1,12 @@ # -*- Autoconf -*- # Process this file with autoconf to produce a configure script. AC_PREREQ(2.59) -AC_INIT([libmspack],[0.10.1alpha],[kyzer@cabextract.org.uk]) +AC_INIT([libmspack],[0.11alpha],[kyzer@cabextract.org.uk]) AC_CONFIG_MACRO_DIR([m4]) AM_INIT_AUTOMAKE([1.11]) AM_SILENT_RULES([yes]) AC_CONFIG_SRCDIR([mspack/mspack.h]) -AC_CONFIG_HEADER([config.h]) +AC_CONFIG_HEADERS([config.h]) # --enable-debug option AC_ARG_ENABLE(debug, @@ -34,7 +34,6 @@ AC_C_BIGENDIAN AC_TYPE_MODE_T AC_TYPE_OFF_T AC_TYPE_SIZE_T -AC_CHECK_SIZEOF([off_t]) # Checks for library functions AX_FUNC_MKDIR @@ -43,6 +42,7 @@ AC_CHECK_FUNCS([towlower]) # largefile support AC_SYS_LARGEFILE AC_FUNC_FSEEKO +AC_CHECK_SIZEOF([off_t]) AC_CONFIG_FILES([Makefile libmspack.pc]) AC_OUTPUT diff --git a/libclammspack/doc/szdd_kwaj_format.html b/libclammspack/doc/szdd_kwaj_format.html index 6c861632ba..aa0d2a004c 100644 --- a/libclammspack/doc/szdd_kwaj_format.html +++ b/libclammspack/doc/szdd_kwaj_format.html @@ -158,7 +158,7 @@

COMPRESS.EXE file formats: SZDD and KWAJ

  1. No compression
  2. No compression, data is XORed with byte 0xFF
  3. -
  4. The same compression method as regular SZDD
  5. +
  6. The same compression method as the QBasic variant of SZDD
  7. LZ + Huffman "Jeff Johnson" compression
  8. MS-ZIP
diff --git a/libclammspack/examples/cabrip.c b/libclammspack/examples/cabrip.c index b56b161a30..9dc142aa57 100644 --- a/libclammspack/examples/cabrip.c +++ b/libclammspack/examples/cabrip.c @@ -6,7 +6,7 @@ #include #include #include -#include "system.h" +#include "mspack/macros.h" #if HAVE_FSEEKO # define fseek fseeko diff --git a/libclammspack/libmspack.cygport b/libclammspack/libmspack.cygport index 2e9db866b5..13b424325c 100644 --- a/libclammspack/libmspack.cygport +++ b/libclammspack/libmspack.cygport @@ -1,5 +1,5 @@ NAME=libmspack -VERSION=0.10.1 +VERSION=0.11 RELEASE=1 CATEGORY=Libs SUMMARY="A library for Microsoft compression formats" diff --git a/libclammspack/mspack/cabc.c b/libclammspack/mspack/cabc.c index 08f3a81a99..242e0347c3 100644 --- a/libclammspack/mspack/cabc.c +++ b/libclammspack/mspack/cabc.c @@ -9,8 +9,8 @@ /* CAB compression implementation */ -#include "system.h" -#include "cab.h" +#include +#include struct mscab_compressor * mspack_create_cab_compressor(struct mspack_system *sys) diff --git a/libclammspack/mspack/cabd.c b/libclammspack/mspack/cabd.c index 75ef277eb9..780d69e8ec 100644 --- a/libclammspack/mspack/cabd.c +++ b/libclammspack/mspack/cabd.c @@ -1,5 +1,5 @@ /* This file is part of libmspack. - * (C) 2003-2018 Stuart Caie. + * (C) 2003-2023 Stuart Caie. * * libmspack is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License (LGPL) version 2.1 @@ -21,11 +21,11 @@ /* CAB decompression implementation */ -#include "system.h" -#include "cab.h" -#include "mszip.h" -#include "lzx.h" -#include "qtm.h" +#include +#include +#include +#include +#include /* Notes on compliance with cabinet specification: * @@ -56,7 +56,7 @@ * means each consecutive data block can have completely different * "uncompressed" sizes, ranging from 1 to 32768 bytes. However, in * reality, all data blocks in a folder decompress to exactly 32768 bytes, - * excepting the final block. + * excepting the final block. * * Given this situation, the decompression algorithms are designed to * realign their input bitstreams on 32768 output-byte boundaries, and @@ -76,7 +76,8 @@ static int cabd_read_headers( struct mspack_system *sys, struct mspack_file *fh, struct mscabd_cabinet_p *cab, off_t offset, int salvage, int quiet); static char *cabd_read_string( - struct mspack_system *sys, struct mspack_file *fh, int *error); + struct mspack_system *sys, struct mspack_file *fh, int permit_empty, + int *error); static struct mscabd_cabinet *cabd_search( struct mscab_decompressor *base, const char *filename); @@ -388,23 +389,23 @@ static int cabd_read_headers(struct mspack_system *sys, } else { cab->base.header_resv = 0; - folder_resv = 0; + folder_resv = 0; cab->block_resv = 0; } /* read name and info of preceeding cabinet in set, if present */ if (cab->base.flags & cfheadPREV_CABINET) { - cab->base.prevname = cabd_read_string(sys, fh, &err); + cab->base.prevname = cabd_read_string(sys, fh, 0, &err); if (err) return err; - cab->base.previnfo = cabd_read_string(sys, fh, &err); + cab->base.previnfo = cabd_read_string(sys, fh, 1, &err); if (err) return err; } /* read name and info of next cabinet in set, if present */ if (cab->base.flags & cfheadNEXT_CABINET) { - cab->base.nextname = cabd_read_string(sys, fh, &err); + cab->base.nextname = cabd_read_string(sys, fh, 0, &err); if (err) return err; - cab->base.nextinfo = cabd_read_string(sys, fh, &err); + cab->base.nextinfo = cabd_read_string(sys, fh, 1, &err); if (err) return err; } @@ -508,7 +509,7 @@ static int cabd_read_headers(struct mspack_system *sys, file->date_y = (x >> 9) + 1980; /* get filename */ - file->filename = cabd_read_string(sys, fh, &err); + file->filename = cabd_read_string(sys, fh, 0, &err); /* if folder index or filename are bad, either skip it or fail */ if (err || !file->folder) { @@ -535,7 +536,8 @@ static int cabd_read_headers(struct mspack_system *sys, } static char *cabd_read_string(struct mspack_system *sys, - struct mspack_file *fh, int *error) + struct mspack_file *fh, int permit_empty, + int *error) { off_t base = sys->tell(fh); char buf[256], *str; @@ -549,8 +551,8 @@ static char *cabd_read_string(struct mspack_system *sys, /* search for a null terminator in the buffer */ for (i = 0, ok = 0; i < len; i++) if (!buf[i]) { ok = 1; break; } - /* reject empty strings */ - if (i == 0) ok = 0; + /* optionally reject empty strings */ + if (i == 0 && !permit_empty) ok = 0; if (!ok) { *error = MSPACK_ERR_DATAFORMAT; @@ -574,7 +576,7 @@ static char *cabd_read_string(struct mspack_system *sys, *error = MSPACK_ERR_OK; return str; } - + /*************************************** * CABD_SEARCH, CABD_FIND *************************************** @@ -627,7 +629,7 @@ static struct mscabd_cabinet *cabd_search(struct mscab_decompressor *base, firstlen - filelen); } } - + sys->close(fh); } else { @@ -651,10 +653,10 @@ static int cabd_find(struct mscab_decompressor_p *self, unsigned char *buf, unsigned int cablen_u32 = 0, foffset_u32 = 0; int false_cabs = 0; -#if !LARGEFILE_SUPPORT +#if SIZEOF_OFF_T < 8 /* detect 32-bit off_t overflow */ if (flen < 0) { - sys->message(fh, largefile_msg); + sys->message(fh, "library not compiled to support large files."); return MSPACK_ERR_OK; } #endif @@ -676,7 +678,7 @@ static int cabd_find(struct mscab_decompressor_p *self, unsigned char *buf, /* FAQ avoidance strategy */ if ((offset == 0) && (EndGetI32(&buf[0]) == 0x28635349)) { sys->message(fh, "WARNING; found InstallShield header. Use unshield " - "(https://github.com/twogood/unshield) to unpack this file"); + "(https://github.com/twogood/unshield) to unpack this file"); } /* read through the entire buffer. */ @@ -753,13 +755,13 @@ static int cabd_find(struct mscab_decompressor_p *self, unsigned char *buf, /* cause the search to restart after this cab's data. */ offset = caboff + (off_t) cablen_u32; -#if !LARGEFILE_SUPPORT +#if SIZEOF_OFF_T < 8 /* detect 32-bit off_t overflow */ if (offset < caboff) { - sys->message(fh, largefile_msg); + sys->message(fh, "library not compiled to support large files."); return MSPACK_ERR_OK; } -#endif +#endif } } @@ -786,7 +788,7 @@ static int cabd_find(struct mscab_decompressor_p *self, unsigned char *buf, return MSPACK_ERR_OK; } - + /*************************************** * CABD_MERGE, CABD_PREPEND, CABD_APPEND *************************************** @@ -1012,7 +1014,7 @@ static int cabd_extract(struct mscab_decompressor *base, struct mscabd_folder_p *fol; struct mspack_system *sys; struct mspack_file *fh; - off_t filelen; + unsigned int filelen; if (!self) return MSPACK_ERR_ARGS; if (!file) return self->error = MSPACK_ERR_ARGS; @@ -1029,7 +1031,7 @@ static int cabd_extract(struct mscab_decompressor *base, * or in salvage mode reduce file length so it fits 2GB limit */ filelen = file->length; - if (filelen > CAB_LENGTHMAX || (file->offset + filelen) > CAB_LENGTHMAX) { + if (filelen > (CAB_LENGTHMAX - file->offset)) { if (self->salvage) { filelen = CAB_LENGTHMAX - file->offset; } @@ -1049,8 +1051,8 @@ static int cabd_extract(struct mscab_decompressor *base, * In salvage mode, don't assume block sizes, just try decoding */ if (!self->salvage) { - off_t maxlen = fol->base.num_blocks * CAB_BLOCKMAX; - if ((file->offset + filelen) > maxlen) { + unsigned int maxlen = fol->base.num_blocks * CAB_BLOCKMAX; + if (file->offset > maxlen || filelen > (maxlen - file->offset)) { sys->message(NULL, "ERROR; file \"%s\" cannot be extracted, " "cabinet set is incomplete", file->filename); return self->error = MSPACK_ERR_DECRUNCH; @@ -1220,7 +1222,7 @@ static int cabd_sys_read(struct mspack_file *file, void *buffer, int bytes) { int avail, todo, outlen, ignore_cksum, ignore_blocksize; ignore_cksum = self->salvage || - (self->fix_mszip && + (self->fix_mszip && ((self->d->comp_type & cffoldCOMPTYPE_MASK) == cffoldCOMPTYPE_MSZIP)); ignore_blocksize = self->salvage; @@ -1398,7 +1400,7 @@ static unsigned int cabd_checksum(unsigned char *data, unsigned int bytes, unsigned int len, ul = 0; for (len = bytes >> 2; len--; data += 4) { - cksum ^= ((data[0]) | (data[1]<<8) | (data[2]<<16) | (data[3]<<24)); + cksum ^= EndGetI32(data); } switch (bytes & 3) { diff --git a/libclammspack/mspack/chm.h b/libclammspack/mspack/chm.h index 4b19f1505a..d7eca81822 100644 --- a/libclammspack/mspack/chm.h +++ b/libclammspack/mspack/chm.h @@ -1,5 +1,5 @@ /* This file is part of libmspack. - * (C) 2003-2004 Stuart Caie. + * (C) 2003-2023 Stuart Caie. * * libmspack is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License (LGPL) version 2.1 @@ -10,7 +10,7 @@ #ifndef MSPACK_CHM_H #define MSPACK_CHM_H 1 -#include "lzx.h" +#include /* generic CHM definitions */ @@ -104,7 +104,8 @@ struct mschm_compressor_p { /* CHM decompression definitions */ struct mschmd_decompress_state { struct mschmd_header *chm; /* CHM file being decompressed */ - off_t offset; /* uncompressed offset within folder */ + off_t length; /* uncompressed length of LZX stream */ + off_t offset; /* uncompressed offset within stream */ off_t inoffset; /* offset in input file */ struct lzxd_stream *state; /* LZX decompressor state */ struct mspack_system sys; /* special I/O code for decompressor */ diff --git a/libclammspack/mspack/chmc.c b/libclammspack/mspack/chmc.c index 0183abf3c1..72f6c5b389 100644 --- a/libclammspack/mspack/chmc.c +++ b/libclammspack/mspack/chmc.c @@ -1,4 +1,3 @@ - /* This file is part of libmspack. * (C) 2003-2004 Stuart Caie. * @@ -10,8 +9,8 @@ /* CHM compression implementation */ -#include "system.h" -#include "chm.h" +#include +#include struct mschm_compressor * mspack_create_chm_compressor(struct mspack_system *sys) diff --git a/libclammspack/mspack/chmd.c b/libclammspack/mspack/chmd.c index 99459cad02..061dca1594 100644 --- a/libclammspack/mspack/chmd.c +++ b/libclammspack/mspack/chmd.c @@ -1,5 +1,5 @@ /* This file is part of libmspack. - * (C) 2003-2018 Stuart Caie. + * (C) 2003-2023 Stuart Caie. * * libmspack is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License (LGPL) version 2.1 @@ -9,8 +9,8 @@ /* CHM decompression implementation */ -#include "system.h" -#include "chm.h" +#include +#include /* prototypes */ static struct mschmd_header * chmd_open( @@ -58,6 +58,8 @@ static int chmd_error( static int read_off64( off_t *var, unsigned char *mem, struct mspack_system *sys, struct mspack_file *fh); +static off_t read_encint( + const unsigned char **p, const unsigned char *end, int *err); /* filenames of the system files used for decompression. * Content and ControlData are essential. @@ -249,24 +251,15 @@ static const unsigned char guids[32] = { 0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC }; -/* reads an encoded integer into a variable; 7 bits of data per byte, - * the high bit is used to indicate that there is another byte */ -#define READ_ENCINT(var) do { \ - (var) = 0; \ - do { \ - if (p >= end) goto chunk_end; \ - (var) = ((var) << 7) | (*p & 0x7F); \ - } while (*p++ & 0x80); \ -} while (0) - static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, struct mschmd_header *chm, int entire) { - unsigned int section, name_len, x, errors, num_chunks; - unsigned char buf[0x54], *chunk = NULL, *name, *p, *end; + unsigned int errors, num_chunks; + unsigned char buf[0x54], *chunk = NULL; + const unsigned char *name, *p, *end; struct mschmd_file *fi, *link = NULL; - off_t offset, length; - int num_entries; + off_t offset_hs0, filelen; + int num_entries, err = 0; /* initialise pointers */ chm->files = NULL; @@ -312,7 +305,7 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, /* chmhst3_OffsetCS0 does not exist in version 1 or 2 CHM files. * The offset will be corrected later, once HS1 is read. */ - if (read_off64(&offset, &buf[chmhst_OffsetHS0], sys, fh) || + if (read_off64(&offset_hs0, &buf[chmhst_OffsetHS0], sys, fh) || read_off64(&chm->dir_offset, &buf[chmhst_OffsetHS1], sys, fh) || read_off64(&chm->sec0.offset, &buf[chmhst3_OffsetCS0], sys, fh)) { @@ -320,7 +313,7 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, } /* seek to header section 0 */ - if (sys->seek(fh, offset, MSPACK_SYS_SEEK_START)) { + if (sys->seek(fh, offset_hs0, MSPACK_SYS_SEEK_START)) { return MSPACK_ERR_SEEK; } @@ -332,6 +325,18 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, return MSPACK_ERR_DATAFORMAT; } + /* compare declared CHM file size against actual size */ + if (!mspack_sys_filelen(sys, fh, &filelen)) { + if (chm->length > filelen) { + sys->message(fh, "WARNING; file possibly truncated by %" LD " bytes", + chm->length - filelen); + } + else if (chm->length < filelen) { + sys->message(fh, "WARNING; possible %" LD " extra bytes at end of file", + filelen - chm->length); + } + } + /* seek to header section 1 */ if (sys->seek(fh, chm->dir_offset, MSPACK_SYS_SEEK_START)) { return MSPACK_ERR_SEEK; @@ -361,7 +366,7 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, D(("content section begins after file has ended")) return MSPACK_ERR_DATAFORMAT; } - + /* ensure there are chunks and that chunk size is * large enough for signature and num_entries */ if (chm->chunk_size < (pmgl_Entries + 2)) { @@ -412,12 +417,13 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, } /* seek to the first PMGL chunk, and reduce the number of chunks to read */ - if ((x = chm->first_pmgl) != 0) { - if (sys->seek(fh,(off_t) (x * chm->chunk_size), MSPACK_SYS_SEEK_CUR)) { + if (chm->first_pmgl != 0) { + off_t pmgl_offset = (off_t) chm->first_pmgl * (off_t) chm->chunk_size; + if (sys->seek(fh, pmgl_offset, MSPACK_SYS_SEEK_CUR)) { return MSPACK_ERR_SEEK; } } - num_chunks = chm->last_pmgl - x + 1; + num_chunks = chm->last_pmgl - chm->first_pmgl + 1; if (!(chunk = (unsigned char *) sys->alloc(sys, (size_t)chm->chunk_size))) { return MSPACK_ERR_NOMEMORY; @@ -438,7 +444,7 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, if (EndGetI32(&chunk[pmgl_QuickRefSize]) < 2) { sys->message(fh, "WARNING; PMGL quickref area is too small"); } - if (EndGetI32(&chunk[pmgl_QuickRefSize]) > + if (EndGetI32(&chunk[pmgl_QuickRefSize]) > (chm->chunk_size - pmgl_Entries)) { sys->message(fh, "WARNING; PMGL quickref area is too large"); @@ -449,12 +455,15 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, num_entries = EndGetI16(end); while (num_entries--) { - READ_ENCINT(name_len); - if (name_len > (unsigned int) (end - p)) goto chunk_end; + unsigned int name_len, section; + off_t offset, length; + name_len = read_encint(&p, end, &err); + if (err || (name_len > (unsigned int) (end - p))) goto encint_err; name = p; p += name_len; - READ_ENCINT(section); - READ_ENCINT(offset); - READ_ENCINT(length); + section = read_encint(&p, end, &err); + offset = read_encint(&p, end, &err); + length = read_encint(&p, end, &err); + if (err) goto encint_err; /* ignore blank or one-char (e.g. "/") filenames we'd return as blank */ if (name_len < 2 || !name[0] || !name[1]) continue; @@ -482,7 +491,7 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, : (struct mschmd_section *) (&chm->sec1)); fi->offset = offset; fi->length = length; - sys->copy(name, fi->filename, (size_t) name_len); + sys->copy((unsigned char *) name, fi->filename, (size_t) name_len); fi->filename[name_len] = '\0'; if (name[0] == ':' && name[1] == ':') { @@ -510,10 +519,10 @@ static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, } /* this is reached either when num_entries runs out, or if - * reading data from the chunk reached a premature end of chunk */ - chunk_end: + * an ENCINT is badly encoded */ + encint_err: if (num_entries >= 0) { - D(("chunk ended before all entries could be read")) + D(("bad encint before all entries could be read")) errors++; } @@ -572,7 +581,10 @@ static int chmd_fast_find(struct mschm_decompressor *base, } /* found result. loop around for next chunk if this is PMGI */ - if (chunk[3] == 0x4C) break; else READ_ENCINT(n); + if (chunk[3] == 0x4C) break; + + n = read_encint(&p, end, &err); + if (err) goto encint_err; } } else { @@ -599,11 +611,12 @@ static int chmd_fast_find(struct mschm_decompressor *base, /* if we found a file, read it */ if (result > 0) { - READ_ENCINT(sec); + sec = read_encint(&p, end, &err); f_ptr->section = (sec == 0) ? (struct mschmd_section *) &chm->sec0 : (struct mschmd_section *) &chm->sec1; - READ_ENCINT(f_ptr->offset); - READ_ENCINT(f_ptr->length); + f_ptr->offset = read_encint(&p, end, &err); + f_ptr->length = read_encint(&p, end, &err); + if (err) goto encint_err; } else if (result < 0) { err = MSPACK_ERR_DATAFORMAT; @@ -612,8 +625,8 @@ static int chmd_fast_find(struct mschm_decompressor *base, sys->close(fh); return self->error = err; - chunk_end: - D(("read beyond end of chunk entries")) + encint_err: + D(("bad encint in PGMI/PGML chunk")) sys->close(fh); return self->error = MSPACK_ERR_DATAFORMAT; } @@ -631,7 +644,7 @@ static unsigned char *read_chunk(struct mschm_decompressor_p *self, /* check arguments - most are already checked by chmd_fast_find */ if (chunk_num >= chm->num_chunks) return NULL; - + /* ensure chunk cache is available */ if (!chm->chunk_cache) { size_t size = sizeof(unsigned char *) * chm->num_chunks; @@ -697,7 +710,7 @@ static int search_chunk(struct mschmd_header *chm, const unsigned char *start, *end, *p; unsigned int qr_size, num_entries, qr_entries, qr_density, name_len; unsigned int L, R, M, fname_len, entries_off, is_pmgl; - int cmp; + int cmp, err = 0; fname_len = strlen(filename); @@ -755,8 +768,8 @@ static int search_chunk(struct mschmd_header *chm, /* compare filename with entry QR points to */ p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)]; - READ_ENCINT(name_len); - if (name_len > (unsigned int) (end - p)) goto chunk_end; + name_len = read_encint(&p, end, &err); + if (err || (name_len > (unsigned int) (end - p))) goto encint_err; cmp = compare(filename, (char *)p, fname_len, name_len); if (cmp == 0) break; @@ -788,12 +801,12 @@ static int search_chunk(struct mschmd_header *chm, * entry not found, stop now * - filename > all entries * entry not found (PMGL) / maybe found (PMGI) - * - + * - */ *result = NULL; while (num_entries-- > 0) { - READ_ENCINT(name_len); - if (name_len > (unsigned int) (end - p)) goto chunk_end; + name_len = read_encint(&p, end, &err); + if (err || (name_len > (unsigned int) (end - p))) goto encint_err; cmp = compare(filename, (char *)p, fname_len, name_len); p += name_len; @@ -810,21 +823,21 @@ static int search_chunk(struct mschmd_header *chm, /* read and ignore the rest of this entry */ if (is_pmgl) { - READ_ENCINT(R); /* skip section */ - READ_ENCINT(R); /* skip offset */ - READ_ENCINT(R); /* skip length */ + while (p < end && (*p++ & 0x80)); /* skip section ENCINT */ + while (p < end && (*p++ & 0x80)); /* skip offset ENCINT */ + while (p < end && (*p++ & 0x80)); /* skip length ENCINT */ } else { *result = p; /* store potential final result */ - READ_ENCINT(R); /* skip chunk number */ + while (p < end && (*p++ & 0x80)); /* skip chunk number ENCINT */ } } /* PMGL? not found. PMGI? maybe found */ return (is_pmgl) ? 0 : (*result ? 1 : 0); - chunk_end: - D(("reached end of chunk data while searching")) + encint_err: + D(("bad encint while searching")) return -1; } @@ -836,7 +849,7 @@ static int search_chunk(struct mschmd_header *chm, # define TOLOWER(x) tolower(x) #endif -/* decodes a UTF-8 character from s[] into c. Will not read past e. +/* decodes a UTF-8 character from s[] into c. Will not read past e. * doesn't test that extension bytes are %10xxxxxx. * allows some overlong encodings. */ @@ -938,14 +951,19 @@ static int chmd_extract(struct mschm_decompressor *base, switch (file->section->id) { case 0: /* Uncompressed section file */ /* simple seek + copy */ - if (sys->seek(self->d->infh, file->section->chm->sec0.offset - + file->offset, MSPACK_SYS_SEEK_START)) + if (sys->seek(self->d->infh, chm->sec0.offset + file->offset, + MSPACK_SYS_SEEK_START)) { self->error = MSPACK_ERR_SEEK; } else { unsigned char buf[512]; off_t length = file->length; + off_t maxlen = chm->length - sys->tell(self->d->infh); + if (length > maxlen) { + sys->message(fh, "WARNING; file is %" LD " bytes longer than CHM file", + length - maxlen); + } while (length > 0) { int run = sizeof(buf); if ((off_t)run > length) run = (int)length; @@ -963,7 +981,7 @@ static int chmd_extract(struct mschm_decompressor *base, break; case 1: /* MSCompressed section file */ - /* (re)initialise compression state if we it is not yet initialised, + /* (re)initialise compression state if not yet initialised, * or we have advanced too far and have to backtrack */ if (!self->d->state || (file->offset < self->d->offset)) { @@ -974,6 +992,12 @@ static int chmd_extract(struct mschm_decompressor *base, if (chmd_init_decomp(self, file)) break; } + /* check file offset is not impossible */ + if (file->offset > self->d->length) { + self->error = MSPACK_ERR_DECRUNCH; + break; + } + /* seek to input data */ if (sys->seek(self->d->infh, self->d->inoffset, MSPACK_SYS_SEEK_START)) { self->error = MSPACK_ERR_SEEK; @@ -988,8 +1012,15 @@ static int chmd_extract(struct mschm_decompressor *base, /* if getting to the correct offset was error free, unpack file */ if (!self->error) { + off_t length = file->length; + off_t maxlen = self->d->length - file->offset; + if (length > maxlen) { + sys->message(fh, "WARNING; file is %" LD " bytes longer than " + "compressed section", length - maxlen); + length = maxlen + 1; /* should decompress but still error out */ + } self->d->outfh = fh; - self->error = lzxd_decompress(self->d->state, file->length); + self->error = lzxd_decompress(self->d->state, length); } /* save offset in input source stream, in case there is a section 0 @@ -1052,8 +1083,8 @@ static int chmd_init_decomp(struct mschm_decompressor_p *self, if (err) return self->error = err; /* read ControlData */ - if (sec->control->length < lzxcd_SIZEOF) { - D(("ControlData file is too short")) + if (sec->control->length != lzxcd_SIZEOF) { + D(("ControlData file is wrong size")) return self->error = MSPACK_ERR_DATAFORMAT; } if (!(data = read_sys_file(self, sec->control))) { @@ -1125,8 +1156,8 @@ static int chmd_init_decomp(struct mschm_decompressor_p *self, entry = 0; offset = 0; err = read_spaninfo(self, sec, &length); + if (err) return self->error = err; } - if (err) return self->error = err; /* get offset of compressed data stream: * = offset of uncompressed section from start of file @@ -1136,6 +1167,7 @@ static int chmd_init_decomp(struct mschm_decompressor_p *self, /* set start offset and overall remaining stream length */ self->d->offset = entry * LZX_FRAME_SIZE; + self->d->length = length; length -= self->d->offset; /* initialise LZX stream */ @@ -1172,6 +1204,11 @@ static int read_reset_table(struct mschm_decompressor_p *self, D(("ResetTable file is too short")) return 0; } + if (sec->rtable->length > 1000000) { /* arbitrary upper limit */ + D(("ResetTable >1MB (%"LD"), report if genuine", sec->rtable->length)) + return 0; + } + if (!(data = read_sys_file(self, sec->rtable))) { D(("can't read reset table")) return 0; @@ -1235,7 +1272,7 @@ static int read_spaninfo(struct mschm_decompressor_p *self, { struct mspack_system *sys = self->system; unsigned char *data; - + /* find SpanInfo file */ int err = find_sys_file(self, sec, &sec->spaninfo, spaninfo_name); if (err) return MSPACK_ERR_DATAFORMAT; @@ -1246,6 +1283,12 @@ static int read_spaninfo(struct mschm_decompressor_p *self, return MSPACK_ERR_DATAFORMAT; } + /* unconditionally set length here, because gcc -Wuninitialized isn't + * clever enough to recognise that read_sys_file() will always set + * self->error to a non-zero value if it returns NULL, and gcc warnings + * spook humans (even false positives) */ + *length_ptr = 0; + /* read the SpanInfo file */ if (!(data = read_sys_file(self, sec->spaninfo))) { D(("can't read SpanInfo file")) @@ -1364,14 +1407,51 @@ static int chmd_error(struct mschm_decompressor *base) { static int read_off64(off_t *var, unsigned char *mem, struct mspack_system *sys, struct mspack_file *fh) { -#if LARGEFILE_SUPPORT +#if SIZEOF_OFF_T >= 8 *var = EndGetI64(mem); #else - *var = EndGetI32(mem); - if ((*var & 0x80000000) || EndGetI32(mem+4)) { - sys->message(fh, (char *)largefile_msg); + if ((mem[3] & 0x80) | mem[4] | mem[5] | mem[6] | mem[7]) { + sys->message(fh, "library not compiled to support large files."); return 1; } + *var = EndGetI32(mem); #endif return 0; } + +#if SIZEOF_OFF_T >= 8 + /* 63 bits allowed: 9 * 7 bits/byte, last byte must be 0x00-0x7F */ +# define ENCINT_MAX_BYTES 9 +# define ENCINT_BAD_LAST_BYTE 0x80 +#else + /* 31 bits allowed: 5 * 7 bits/byte, last byte must be 0x00-0x07 */ +# define ENCINT_MAX_BYTES 5 +# define ENCINT_BAD_LAST_BYTE 0xF1 +#endif + +/*************************************** + * READ_ENCINT + *************************************** + * Reads an ENCINT from memory. If running on a system with a 32-bit off_t, + * ENCINTs up to 0x7FFFFFFF are accepted, values beyond that are an error. + */ +static off_t read_encint(const unsigned char **p, const unsigned char *end, + int *err) +{ + off_t result = 0; + unsigned char c = 0x80; + int i = 0; + while ((c & 0x80) && (i++ < ENCINT_MAX_BYTES)) { + if (*p >= end) { + *err = 1; + return 0; + } + c = *(*p)++; + result = (result << 7) | (c & 0x7F); + } + if (i == ENCINT_MAX_BYTES && (c & ENCINT_BAD_LAST_BYTE)) { + *err = 1; + return 0; + } + return result; +} diff --git a/libclammspack/mspack/hlp.h b/libclammspack/mspack/hlp.h index a6e3abc7ff..b7486fa160 100644 --- a/libclammspack/mspack/hlp.h +++ b/libclammspack/mspack/hlp.h @@ -10,7 +10,7 @@ #ifndef MSPACK_HLP_H #define MSPACK_HLP_H 1 -#include "lzss.h" +#include /* generic HLP definitions */ diff --git a/libclammspack/mspack/hlpc.c b/libclammspack/mspack/hlpc.c index f3be51df9b..60eabfe207 100644 --- a/libclammspack/mspack/hlpc.c +++ b/libclammspack/mspack/hlpc.c @@ -9,8 +9,8 @@ /* HLP compression implementation */ -#include "system.h" -#include "hlp.h" +#include +#include struct mshlp_compressor * mspack_create_hlp_compressor(struct mspack_system *sys) diff --git a/libclammspack/mspack/hlpd.c b/libclammspack/mspack/hlpd.c index b84557c177..43354f008f 100644 --- a/libclammspack/mspack/hlpd.c +++ b/libclammspack/mspack/hlpd.c @@ -9,8 +9,8 @@ /* HLP decompression implementation */ -#include "system.h" -#include "hlp.h" +#include +#include struct mshlp_decompressor * mspack_create_hlp_decompressor(struct mspack_system *sys) diff --git a/libclammspack/mspack/kwaj.h b/libclammspack/mspack/kwaj.h index 75425d958a..09673c0779 100644 --- a/libclammspack/mspack/kwaj.h +++ b/libclammspack/mspack/kwaj.h @@ -10,7 +10,7 @@ #ifndef MSPACK_KWAJ_H #define MSPACK_KWAJ_H 1 -#include "lzss.h" +#include /* generic KWAJ definitions */ #define kwajh_Signature1 (0x00) diff --git a/libclammspack/mspack/kwajc.c b/libclammspack/mspack/kwajc.c index babfa2103c..b88ed7690a 100644 --- a/libclammspack/mspack/kwajc.c +++ b/libclammspack/mspack/kwajc.c @@ -9,8 +9,8 @@ /* KWAJ compression implementation */ -#include "system.h" -#include "kwaj.h" +#include +#include struct mskwaj_compressor * mspack_create_kwaj_compressor(struct mspack_system *sys) diff --git a/libclammspack/mspack/kwajd.c b/libclammspack/mspack/kwajd.c index 92d4e1b425..30caa899a1 100644 --- a/libclammspack/mspack/kwajd.c +++ b/libclammspack/mspack/kwajd.c @@ -1,5 +1,5 @@ /* This file is part of libmspack. - * (C) 2003-2011 Stuart Caie. + * (C) 2003-2023 Stuart Caie. * * KWAJ is a format very similar to SZDD. KWAJ method 3 (LZH) was * written by Jeff Johnson. @@ -12,9 +12,9 @@ /* KWAJ decompression implementation */ -#include "system.h" -#include "kwaj.h" -#include "mszip.h" +#include +#include +#include /* prototypes */ static struct mskwajd_header *kwajd_open( @@ -97,27 +97,30 @@ static struct mskwajd_header *kwajd_open(struct mskwaj_decompressor *base, struct mskwajd_header *hdr; struct mspack_system *sys; struct mspack_file *fh; + int err; if (!self) return NULL; sys = self->system; - fh = sys->open(sys, filename, MSPACK_SYS_OPEN_READ); - hdr = (struct mskwajd_header *) sys->alloc(sys, sizeof(struct mskwajd_header_p)); - if (fh && hdr) { - ((struct mskwajd_header_p *) hdr)->fh = fh; - self->error = kwajd_read_headers(sys, fh, hdr); - } - else { - if (!fh) self->error = MSPACK_ERR_OPEN; - if (!hdr) self->error = MSPACK_ERR_NOMEMORY; + fh = sys->open(sys, filename, MSPACK_SYS_OPEN_READ); + if (!fh) { + self->error = MSPACK_ERR_OPEN; + return NULL; } - if (self->error) { - if (fh) sys->close(fh); - sys->free(hdr); - hdr = NULL; + hdr = (struct mskwajd_header *) sys->alloc(sys, sizeof(struct mskwajd_header_p)); + if (!hdr) { + sys->close(fh); + self->error = MSPACK_ERR_NOMEMORY; + return NULL; } + ((struct mskwajd_header_p *) hdr)->fh = fh; + if ((err = kwajd_read_headers(sys, fh, hdr))) { + kwajd_close(base, hdr); + self->error = err; + return NULL; + } return hdr; } @@ -138,6 +141,8 @@ static void kwajd_close(struct mskwaj_decompressor *base, self->system->close(hdr_p->fh); /* free the memory associated */ + self->system->free(hdr->filename); + self->system->free(hdr->extra); self->system->free(hdr); self->error = MSPACK_ERR_OK; @@ -200,7 +205,7 @@ static int kwajd_read_headers(struct mspack_system *sys, if (hdr->headers & (MSKWAJ_HDR_HASFILENAME | MSKWAJ_HDR_HASFILEEXT)) { int len; /* allocate memory for maximum length filename */ - char *fn = (char *) sys->alloc(sys, (size_t) 13); + char *fn = (char *) sys->alloc(sys, 13); if (!(hdr->filename = fn)) return MSPACK_ERR_NOMEMORY; /* copy filename if present */ @@ -236,7 +241,7 @@ static int kwajd_read_headers(struct mspack_system *sys, if (hdr->headers & MSKWAJ_HDR_HASEXTRATEXT) { if (sys->read(fh, &buf[0], 2) != 2) return MSPACK_ERR_READ; i = EndGetI16(&buf[0]); - hdr->extra = (char *) sys->alloc(sys, (size_t)i+1); + hdr->extra = (char *) sys->alloc(sys, i+1); if (! hdr->extra) return MSPACK_ERR_NOMEMORY; if (sys->read(fh, hdr->extra, i) != i) return MSPACK_ERR_READ; hdr->extra[i] = '\0'; @@ -280,7 +285,7 @@ static int kwajd_extract(struct mskwaj_decompressor *base, hdr->comp_type == MSKWAJ_COMP_XOR) { /* NONE is a straight copy. XOR is a copy xored with 0xFF */ - unsigned char *buf = (unsigned char *) sys->alloc(sys, (size_t) KWAJ_INPUT_SIZE); + unsigned char *buf = (unsigned char *) sys->alloc(sys, KWAJ_INPUT_SIZE); if (buf) { int read, i; while ((read = sys->read(fh, buf, KWAJ_INPUT_SIZE)) > 0) { @@ -301,7 +306,7 @@ static int kwajd_extract(struct mskwaj_decompressor *base, } else if (hdr->comp_type == MSKWAJ_COMP_SZDD) { self->error = lzss_decompress(sys, fh, outfh, KWAJ_INPUT_SIZE, - LZSS_MODE_EXPAND); + LZSS_MODE_QBASIC); } else if (hdr->comp_type == MSKWAJ_COMP_LZH) { struct kwajd_stream *lzh = lzh_init(sys, fh, outfh); @@ -373,7 +378,7 @@ static int kwajd_error(struct mskwaj_decompressor *base) } \ INJECT_BITS(*i_ptr++, 8); \ } while (0) -#include "readbits.h" +#include /* import huffman-reading macros and code */ #define TABLEBITS(tbl) KWAJ_TABLEBITS @@ -381,7 +386,7 @@ static int kwajd_error(struct mskwaj_decompressor *base) #define HUFF_TABLE(tbl,idx) lzh->tbl##_table[idx] #define HUFF_LEN(tbl,idx) lzh->tbl##_len[idx] #define HUFF_ERROR return MSPACK_ERR_DATAFORMAT -#include "readhuff.h" +#include /* In the KWAJ LZH format, there is no special 'eof' marker, it just * ends. Depending on how many bits are left in the final byte when @@ -435,17 +440,14 @@ static struct kwajd_stream *lzh_init(struct mspack_system *sys, static int lzh_decompress(struct kwajd_stream *lzh) { - register unsigned int bit_buffer; - register int bits_left, i; - register unsigned short sym; - unsigned char *i_ptr, *i_end, lit_run = 0; - int j, pos = 0, len, offset, err; - unsigned int types[6]; + DECLARE_HUFF_VARS; + unsigned int types[6], i, j, pos = 0, len, offset, lit_run = 0; + int err; /* reset global state */ INIT_BITS; RESTORE_BITS; - memset(&lzh->window[0], LZSS_WINDOW_FILL, (size_t) LZSS_WINDOW_SIZE); + memset(&lzh->window[0], LZSS_WINDOW_FILL, LZSS_WINDOW_SIZE); /* read 6 encoding types (for byte alignment) but only 5 are needed */ for (i = 0; i < 6; i++) READ_BITS_SAFE(types[i], 4); @@ -501,9 +503,7 @@ static int lzh_read_lens(struct kwajd_stream *lzh, unsigned int type, unsigned int numsyms, unsigned char *lens) { - register unsigned int bit_buffer; - register int bits_left; - unsigned char *i_ptr, *i_end; + DECLARE_BIT_VARS; unsigned int i, c, sel; int err; diff --git a/libclammspack/mspack/lit.h b/libclammspack/mspack/lit.h index 2ccc7dd7f3..79ba44d877 100644 --- a/libclammspack/mspack/lit.h +++ b/libclammspack/mspack/lit.h @@ -10,9 +10,9 @@ #ifndef MSPACK_LIT_H #define MSPACK_LIT_H 1 -#include "lzx.h" -#include "des.h" -#include "sha.h" +#include +#include +#include /* generic LIT definitions */ diff --git a/libclammspack/mspack/litc.c b/libclammspack/mspack/litc.c index 3e17d0047e..a8a709af07 100644 --- a/libclammspack/mspack/litc.c +++ b/libclammspack/mspack/litc.c @@ -9,8 +9,8 @@ /* LIT compression implementation */ -#include "system.h" -#include "lit.h" +#include +#include struct mslit_compressor * mspack_create_lit_compressor(struct mspack_system *sys) diff --git a/libclammspack/mspack/litd.c b/libclammspack/mspack/litd.c index 22df020a82..6e0dc9af27 100644 --- a/libclammspack/mspack/litd.c +++ b/libclammspack/mspack/litd.c @@ -9,8 +9,8 @@ /* LIT decompression implementation */ -#include "system.h" -#include "lit.h" +#include +#include struct mslit_decompressor * mspack_create_lit_decompressor(struct mspack_system *sys) diff --git a/libclammspack/mspack/lzssd.c b/libclammspack/mspack/lzssd.c index 910baeb1a7..bf2b82a871 100644 --- a/libclammspack/mspack/lzssd.c +++ b/libclammspack/mspack/lzssd.c @@ -11,8 +11,8 @@ * For further details, see the file COPYING.LIB distributed with libmspack */ -#include "system.h" -#include "lzss.h" +#include +#include #define ENSURE_BYTES do { \ if (i_ptr >= i_end) { \ diff --git a/libclammspack/mspack/lzx.h b/libclammspack/mspack/lzx.h index a6152f622b..281fe5f0fb 100644 --- a/libclammspack/mspack/lzx.h +++ b/libclammspack/mspack/lzx.h @@ -67,7 +67,6 @@ struct lzxd_stream { unsigned int block_remaining; /* uncompressed bytes still left to decode */ signed int intel_filesize; /* magic header value used for transform */ - signed int intel_curpos; /* current offset in transform space */ unsigned char intel_started; /* has intel E8 decoding started? */ unsigned char block_type; /* type of the current block */ diff --git a/libclammspack/mspack/lzxc.c b/libclammspack/mspack/lzxc.c index 3ad474be9f..1207a0d747 100644 --- a/libclammspack/mspack/lzxc.c +++ b/libclammspack/mspack/lzxc.c @@ -12,7 +12,7 @@ /* LZX compression implementation */ -#include "system.h" -#include "lzx.h" +#include +#include /* todo */ diff --git a/libclammspack/mspack/lzxd.c b/libclammspack/mspack/lzxd.c index f305a1a5b0..f498dba95e 100644 --- a/libclammspack/mspack/lzxd.c +++ b/libclammspack/mspack/lzxd.c @@ -1,5 +1,5 @@ /* This file is part of libmspack. - * (C) 2003-2013 Stuart Caie. + * (C) 2003-2023 Stuart Caie. * * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted * by Microsoft Corporation. @@ -12,8 +12,8 @@ /* LZX decompression implementation */ -#include "system.h" -#include "lzx.h" +#include +#include /* Microsoft's LZX document (in cab-sdk.exe) and their implementation * of the com.ms.util.cab Java package do not concur. @@ -89,7 +89,7 @@ READ_IF_NEEDED; b1 = *i_ptr++; \ INJECT_BITS((b1 << 8) | b0, 16); \ } while (0) -#include "readbits.h" +#include /* import huffman-reading macros and code */ #define TABLEBITS(tbl) LZX_##tbl##_TABLEBITS @@ -97,7 +97,7 @@ #define HUFF_TABLE(tbl,idx) lzx->tbl##_table[idx] #define HUFF_LEN(tbl,idx) lzx->tbl##_len[idx] #define HUFF_ERROR return lzx->error = MSPACK_ERR_DECRUNCH -#include "readhuff.h" +#include /* BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */ #define BUILD_TABLE(tbl) \ @@ -138,17 +138,12 @@ static int lzxd_read_lens(struct lzxd_stream *lzx, unsigned char *lens, unsigned int first, unsigned int last) { - /* bit buffer and huffman symbol decode variables */ - register unsigned int bit_buffer; - register int bits_left, i; - register unsigned short sym; - unsigned char *i_ptr, *i_end; - + DECLARE_HUFF_VARS; unsigned int x, y; int z; RESTORE_BITS; - + /* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */ for (x = 0; x < 20; x++) { READ_BITS(y, 4); @@ -315,8 +310,8 @@ struct lzxd_stream *lzxd_init(struct mspack_system *system, } /* allocate decompression window and input buffer */ - lzx->window = (unsigned char *) system->alloc(system, (size_t) window_size); - lzx->inbuf = (unsigned char *) system->alloc(system, (size_t) input_buffer_size); + lzx->window = (unsigned char *) system->alloc(system, window_size); + lzx->inbuf = (unsigned char *) system->alloc(system, input_buffer_size); if (!lzx->window || !lzx->inbuf) { system->free(lzx->window); system->free(lzx->inbuf); @@ -339,7 +334,6 @@ struct lzxd_stream *lzxd_init(struct mspack_system *system, lzx->frame = 0; lzx->reset_interval = reset_interval; lzx->intel_filesize = 0; - lzx->intel_curpos = 0; lzx->intel_started = 0; lzx->error = MSPACK_ERR_OK; lzx->num_offsets = position_slots[window_bits - 15] << 3; @@ -392,17 +386,10 @@ void lzxd_set_output_length(struct lzxd_stream *lzx, off_t out_bytes) { } int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { - /* bitstream and huffman reading variables */ - register unsigned int bit_buffer; - register int bits_left, i=0; - unsigned char *i_ptr, *i_end; - register unsigned short sym; - - int match_length, length_footer, extra, verbatim_bits, bytes_todo; - int this_run, main_element, aligned_bits, j, warned = 0; - unsigned char *window, *runsrc, *rundest, buf[12]; - unsigned int frame_size=0, end_frame, match_offset, window_posn; - unsigned int R0, R1, R2; + DECLARE_HUFF_VARS; + unsigned char *window, *runsrc, *rundest, buf[12], warned = 0; + unsigned int frame_size, end_frame, window_posn, R0, R1, R2; + int bytes_todo, this_run, i, j; /* easy answers */ if (!lzx || (out_bytes < 0)) return MSPACK_ERR_ARGS; @@ -463,7 +450,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { j = 0; READ_BITS(i, 1); if (i) { READ_BITS(i, 16); READ_BITS(j, 16); } lzx->intel_filesize = (i << 16) | j; lzx->header_read = 1; - } + } /* calculate size of frame: all frames are 32k except the final frame * which is 32kb or less. this can only be calculated when lzx->length @@ -524,9 +511,9 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { READ_IF_NEEDED; *rundest++ = *i_ptr++; } - R0 = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24); - R1 = buf[4] | (buf[5] << 8) | (buf[6] << 16) | (buf[7] << 24); - R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24); + R0 = EndGetI32(&buf[0]); + R1 = EndGetI32(&buf[4]); + R2 = EndGetI32(&buf[8]); break; default: @@ -546,111 +533,12 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { /* decode at least this_run bytes */ switch (lzx->block_type) { - case LZX_BLOCKTYPE_VERBATIM: - while (this_run > 0) { - READ_HUFFSYM(MAINTREE, main_element); - if (main_element < LZX_NUM_CHARS) { - /* literal: 0 to LZX_NUM_CHARS-1 */ - window[window_posn++] = main_element; - this_run--; - } - else { - /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ - main_element -= LZX_NUM_CHARS; - - /* get match length */ - match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; - if (match_length == LZX_NUM_PRIMARY_LENGTHS) { - if (lzx->LENGTH_empty) { - D(("LENGTH symbol needed but tree is empty")) - return lzx->error = MSPACK_ERR_DECRUNCH; - } - READ_HUFFSYM(LENGTH, length_footer); - match_length += length_footer; - } - match_length += LZX_MIN_MATCH; - - /* get match offset */ - switch ((match_offset = (main_element >> 3))) { - case 0: match_offset = R0; break; - case 1: match_offset = R1; R1=R0; R0 = match_offset; break; - case 2: match_offset = R2; R2=R0; R0 = match_offset; break; - case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break; - default: - extra = (match_offset >= 36) ? 17 : extra_bits[match_offset]; - READ_BITS(verbatim_bits, extra); - match_offset = position_base[match_offset] - 2 + verbatim_bits; - R2 = R1; R1 = R0; R0 = match_offset; - } - - /* LZX DELTA uses max match length to signal even longer match */ - if (match_length == LZX_MAX_MATCH && lzx->is_delta) { - int extra_len = 0; - ENSURE_BITS(3); /* 4 entry huffman tree */ - if (PEEK_BITS(1) == 0) { - REMOVE_BITS(1); /* '0' -> 8 extra length bits */ - READ_BITS(extra_len, 8); - } - else if (PEEK_BITS(2) == 2) { - REMOVE_BITS(2); /* '10' -> 10 extra length bits + 0x100 */ - READ_BITS(extra_len, 10); - extra_len += 0x100; - } - else if (PEEK_BITS(3) == 6) { - REMOVE_BITS(3); /* '110' -> 12 extra length bits + 0x500 */ - READ_BITS(extra_len, 12); - extra_len += 0x500; - } - else { - REMOVE_BITS(3); /* '111' -> 15 extra length bits */ - READ_BITS(extra_len, 15); - } - match_length += extra_len; - } - - if ((window_posn + match_length) > lzx->window_size) { - D(("match ran over window wrap")) - return lzx->error = MSPACK_ERR_DECRUNCH; - } - - /* copy match */ - rundest = &window[window_posn]; - i = match_length; - /* does match offset wrap the window? */ - if (match_offset > window_posn) { - if (match_offset > lzx->offset && - (match_offset - window_posn) > lzx->ref_data_size) - { - D(("match offset beyond LZX stream")) - return lzx->error = MSPACK_ERR_DECRUNCH; - } - /* j = length from match offset to end of window */ - j = match_offset - window_posn; - if (j > (int) lzx->window_size) { - D(("match offset beyond window boundaries")) - return lzx->error = MSPACK_ERR_DECRUNCH; - } - runsrc = &window[lzx->window_size - j]; - if (j < i) { - /* if match goes over the window edge, do two copy runs */ - i -= j; while (j-- > 0) *rundest++ = *runsrc++; - runsrc = window; - } - while (i-- > 0) *rundest++ = *runsrc++; - } - else { - runsrc = rundest - match_offset; - while (i-- > 0) *rundest++ = *runsrc++; - } - - this_run -= match_length; - window_posn += match_length; - } - } /* while (this_run > 0) */ - break; - case LZX_BLOCKTYPE_ALIGNED: + case LZX_BLOCKTYPE_VERBATIM: while (this_run > 0) { + int main_element, length_footer, verbatim_bits, aligned_bits, extra; + int match_length; + unsigned int match_offset; READ_HUFFSYM(MAINTREE, main_element); if (main_element < LZX_NUM_CHARS) { /* literal: 0 to LZX_NUM_CHARS-1 */ @@ -675,34 +563,24 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { /* get match offset */ switch ((match_offset = (main_element >> 3))) { - case 0: match_offset = R0; break; - case 1: match_offset = R1; R1 = R0; R0 = match_offset; break; - case 2: match_offset = R2; R2 = R0; R0 = match_offset; break; + case 0: match_offset = R0; break; + case 1: match_offset = R1; R1=R0; R0 = match_offset; break; + case 2: match_offset = R2; R2=R0; R0 = match_offset; break; default: extra = (match_offset >= 36) ? 17 : extra_bits[match_offset]; match_offset = position_base[match_offset] - 2; - if (extra > 3) { - /* verbatim and aligned bits */ - extra -= 3; - READ_BITS(verbatim_bits, extra); - match_offset += (verbatim_bits << 3); - READ_HUFFSYM(ALIGNED, aligned_bits); - match_offset += aligned_bits; - } - else if (extra == 3) { - /* aligned bits only */ + if (extra >= 3 && lzx->block_type == LZX_BLOCKTYPE_ALIGNED) { + if (extra > 3) { + READ_BITS(verbatim_bits, extra - 3); /* 1-14 bits */ + match_offset += verbatim_bits << 3; + } READ_HUFFSYM(ALIGNED, aligned_bits); match_offset += aligned_bits; } - else if (extra > 0) { /* extra==1, extra==2 */ - /* verbatim bits only */ - READ_BITS(verbatim_bits, extra); + else if (extra) { + READ_BITS(verbatim_bits, extra); /* 1-17 bits */ match_offset += verbatim_bits; } - else /* extra == 0 */ { - /* ??? not defined in LZX specification! */ - match_offset = 1; - } /* update repeated offset LRU queue */ R2 = R1; R1 = R0; R0 = match_offset; } @@ -742,7 +620,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { i = match_length; /* does match offset wrap the window? */ if (match_offset > window_posn) { - if (match_offset > lzx->offset && + if ((off_t)match_offset > lzx->offset && (match_offset - window_posn) > lzx->ref_data_size) { D(("match offset beyond LZX stream")) @@ -784,7 +662,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { } else { if (i > this_run) i = this_run; - lzx->sys->copy(i_ptr, rundest, (size_t) i); + lzx->sys->copy(i_ptr, rundest, i); rundest += i; i_ptr += i; this_run -= i; @@ -827,11 +705,11 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { /* does this intel block _really_ need decoding? */ if (lzx->intel_started && lzx->intel_filesize && - (lzx->frame <= 32768) && (frame_size > 10)) + (lzx->frame < 32768) && (frame_size > 10)) { unsigned char *data = &lzx->e8_buf[0]; unsigned char *dataend = &lzx->e8_buf[frame_size - 10]; - signed int curpos = lzx->intel_curpos; + signed int curpos = (int) lzx->offset; signed int filesize = lzx->intel_filesize; signed int abs_off, rel_off; @@ -841,7 +719,7 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { while (data < dataend) { if (*data++ != 0xE8) { curpos++; continue; } - abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24); + abs_off = EndGetI32(data); if ((abs_off >= -curpos) && (abs_off < filesize)) { rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize; data[0] = (unsigned char) rel_off; @@ -852,11 +730,9 @@ int lzxd_decompress(struct lzxd_stream *lzx, off_t out_bytes) { data += 4; curpos += 5; } - lzx->intel_curpos += frame_size; } else { lzx->o_ptr = &lzx->window[lzx->frame_posn]; - if (lzx->intel_filesize) lzx->intel_curpos += frame_size; } lzx->o_end = &lzx->o_ptr[frame_size]; diff --git a/libclammspack/mspack/macros.h b/libclammspack/mspack/macros.h new file mode 100644 index 0000000000..427b705b36 --- /dev/null +++ b/libclammspack/mspack/macros.h @@ -0,0 +1,64 @@ +/* This file is part of libmspack. + * (C) 2003-2020 Stuart Caie. + * + * libmspack is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License (LGPL) version 2.1 + * + * For further details, see the file COPYING.LIB distributed with libmspack + */ + +#ifndef MSPACK_MACROS_H +#define MSPACK_MACROS_H 1 + +/* define LD and LU as printf-format for signed and unsigned long offsets */ +#if HAVE_INTTYPES_H +# include +#else +# define PRId64 "lld" +# define PRIu64 "llu" +# define PRId32 "ld" +# define PRIu32 "lu" +#endif + +#if SIZEOF_OFF_T >= 8 +# define LD PRId64 +# define LU PRIu64 +#else +# define LD PRId32 +# define LU PRIu32 +#endif + +/* endian-neutral reading of little-endian data */ +#define __egi32(a,n) (((unsigned int) ((unsigned char *)(a))[n+3] << 24) | \ + ((unsigned int) ((unsigned char *)(a))[n+2] << 16) | \ + ((unsigned int) ((unsigned char *)(a))[n+1] << 8) | \ + ((unsigned int) ((unsigned char *)(a))[n])) +#define EndGetI64(a) (((unsigned long long int) __egi32(a,4) << 32) | __egi32(a,0)) +#define EndGetI32(a) __egi32(a,0) +#define EndGetI16(a) ((((a)[1])<<8)|((a)[0])) + +/* endian-neutral reading of big-endian data */ +#define EndGetM32(a) (((unsigned int) ((unsigned char *)(a))[0] << 24) | \ + ((unsigned int) ((unsigned char *)(a))[1] << 16) | \ + ((unsigned int) ((unsigned char *)(a))[2] << 8) | \ + ((unsigned int) ((unsigned char *)(a))[3])) +#define EndGetM16(a) ((((a)[0])<<8)|((a)[1])) + +/* D(("formatstring", args)) prints debug messages if DEBUG defined */ +#if DEBUG + /* http://gcc.gnu.org/onlinedocs/gcc/Function-Names.html */ +# if __STDC_VERSION__ < 199901L +# if __GNUC__ >= 2 +# define __func__ __FUNCTION__ +# else +# define __func__ "" +# endif +# endif +# include +# define D(x) do { printf("%s:%d (%s) ",__FILE__, __LINE__, __func__); \ + printf x ; fputc('\n', stdout); fflush(stdout);} while (0); +#else +# define D(x) +#endif + +#endif diff --git a/libclammspack/mspack/mspack.def b/libclammspack/mspack/mspack.def index df6363149b..efcf32495b 100644 --- a/libclammspack/mspack/mspack.def +++ b/libclammspack/mspack/mspack.def @@ -1,5 +1,5 @@ -LIBRARY libmspack -EXPORTS +LIBRARY mspack +EXPORTS mspack_create_cab_compressor mspack_create_cab_decompressor mspack_create_chm_compressor @@ -8,6 +8,8 @@ EXPORTS mspack_create_hlp_decompressor mspack_create_kwaj_compressor mspack_create_kwaj_decompressor + mspack_create_oab_compressor + mspack_create_oab_decompressor mspack_create_lit_compressor mspack_create_lit_decompressor mspack_create_szdd_compressor @@ -20,6 +22,8 @@ EXPORTS mspack_destroy_hlp_decompressor mspack_destroy_kwaj_compressor mspack_destroy_kwaj_decompressor + mspack_destroy_oab_compressor + mspack_destroy_oab_decompressor mspack_destroy_lit_compressor mspack_destroy_lit_decompressor mspack_destroy_szdd_compressor diff --git a/libclammspack/mspack/mszipc.c b/libclammspack/mspack/mszipc.c index 30f84bf7ef..2f1ecb2e87 100644 --- a/libclammspack/mspack/mszipc.c +++ b/libclammspack/mspack/mszipc.c @@ -12,7 +12,7 @@ /* MS-ZIP compression implementation */ -#include "system.h" -#include "mszip.h" +#include +#include /* todo */ diff --git a/libclammspack/mspack/mszipd.c b/libclammspack/mspack/mszipd.c index 96c273e19a..1e5636b1b6 100644 --- a/libclammspack/mspack/mszipd.c +++ b/libclammspack/mspack/mszipd.c @@ -1,5 +1,5 @@ /* This file is part of libmspack. - * (C) 2003-2010 Stuart Caie. + * (C) 2003-2023 Stuart Caie. * * The deflate method was created by Phil Katz. MSZIP is equivalent to the * deflate method. @@ -12,8 +12,8 @@ /* MS-ZIP decompression implementation. */ -#include "system.h" -#include "mszip.h" +#include +#include /* import bit-reading macros and code */ #define BITS_TYPE struct mszipd_stream @@ -24,7 +24,7 @@ READ_IF_NEEDED; \ INJECT_BITS(*i_ptr++, 8); \ } while (0) -#include "readbits.h" +#include /* import huffman macros and code */ #define TABLEBITS(tbl) MSZIP_##tbl##_TABLEBITS @@ -32,7 +32,7 @@ #define HUFF_TABLE(tbl,idx) zip->tbl##_table[idx] #define HUFF_LEN(tbl,idx) zip->tbl##_len[idx] #define HUFF_ERROR return INF_ERR_HUFFSYM -#include "readhuff.h" +#include #define FLUSH_IF_NEEDED do { \ if (zip->window_posn == MSZIP_FRAME_SIZE) { \ @@ -89,10 +89,7 @@ static const unsigned char bitlen_order[19] = { #define INF_ERR_HUFFSYM (-14) /* out of bits decoding huffman symbol */ static int zip_read_lens(struct mszipd_stream *zip) { - /* for the bit buffer and huffman decoding */ - register unsigned int bit_buffer; - register int bits_left; - unsigned char *i_ptr, *i_end; + DECLARE_BIT_VARS; /* bitlen Huffman codes -- immediate lookup, 7 bit max code length */ unsigned short bl_table[(1 << 7)]; @@ -155,13 +152,9 @@ static int zip_read_lens(struct mszipd_stream *zip) { /* a clean implementation of RFC 1951 / inflate */ static int inflate(struct mszipd_stream *zip) { - unsigned int last_block, block_type, distance, length, this_run, i; - - /* for the bit buffer and huffman decoding */ - register unsigned int bit_buffer; - register int bits_left; - register unsigned short sym; - unsigned char *i_ptr, *i_end; + DECLARE_HUFF_VARS; + unsigned int last_block, block_type, distance, length, this_run; + int i; RESTORE_BITS; @@ -232,7 +225,7 @@ static int inflate(struct mszipd_stream *zip) { RESTORE_BITS; } - /* now huffman lengths are read for either kind of block, + /* now huffman lengths are read for either kind of block, * create huffman decoding tables */ if (make_decode_table(MSZIP_LITERAL_MAXSYMBOLS, MSZIP_LITERAL_TABLEBITS, &zip->LITERAL_len[0], &zip->LITERAL_table[0])) @@ -270,7 +263,7 @@ static int inflate(struct mszipd_stream *zip) { /* match position is window position minus distance. If distance * is more than window position numerically, it must 'wrap - * around' the frame size. */ + * around' the frame size. */ match_posn = ((distance > zip->window_posn) ? MSZIP_FRAME_SIZE : 0) + zip->window_posn - distance; @@ -326,7 +319,7 @@ static int inflate(struct mszipd_stream *zip) { * MSZIP only expands to the size of the window, the implementation used * simply keeps track of the amount of data flushed, and if more than 32k * is flushed, an error is raised. - */ + */ static int mszipd_flush_window(struct mszipd_stream *zip, unsigned int data_flushed) { @@ -359,7 +352,7 @@ struct mszipd_stream *mszipd_init(struct mspack_system *system, } /* allocate input buffer */ - zip->inbuf = (unsigned char *) system->alloc(system, (size_t) input_buffer_size); + zip->inbuf = (unsigned char *) system->alloc(system, input_buffer_size); if (!zip->inbuf) { system->free(zip); return NULL; @@ -467,11 +460,7 @@ int mszipd_decompress(struct mszipd_stream *zip, off_t out_bytes) { } int mszipd_decompress_kwaj(struct mszipd_stream *zip) { - /* for the bit buffer */ - register unsigned int bit_buffer; - register int bits_left; - unsigned char *i_ptr, *i_end; - + DECLARE_BIT_VARS; int i, error, block_len; /* unpack blocks until block_len == 0 */ diff --git a/libclammspack/mspack/oab.h b/libclammspack/mspack/oab.h index d37fbb9c0f..7bd4993ebc 100644 --- a/libclammspack/mspack/oab.h +++ b/libclammspack/mspack/oab.h @@ -10,7 +10,7 @@ #ifndef MSPACK_OAB_H #define MSPACK_OAB_H 1 -#include "system.h" +#include /* generic OAB definitions */ diff --git a/libclammspack/mspack/oabc.c b/libclammspack/mspack/oabc.c index 6bb1832fda..327ce61ba1 100644 --- a/libclammspack/mspack/oabc.c +++ b/libclammspack/mspack/oabc.c @@ -9,8 +9,8 @@ /* OAB compression implementation */ -#include "system.h" -#include "oab.h" +#include +#include struct msoab_compressor * mspack_create_oab_compressor(struct mspack_system *sys) diff --git a/libclammspack/mspack/oabd.c b/libclammspack/mspack/oabd.c index ed3c2b7138..6cd6541c1e 100644 --- a/libclammspack/mspack/oabd.c +++ b/libclammspack/mspack/oabd.c @@ -22,10 +22,10 @@ /* OAB decompression implementation */ -#include "system.h" -#include "oab.h" -#include "lzx.h" -#include "crc32.h" +#include +#include +#include +#include /* prototypes */ static int oabd_decompress(struct msoab_decompressor *self, const char *input, diff --git a/libclammspack/mspack/qtmc.c b/libclammspack/mspack/qtmc.c index 15554d90b1..f6e3718166 100644 --- a/libclammspack/mspack/qtmc.c +++ b/libclammspack/mspack/qtmc.c @@ -12,7 +12,7 @@ /* Quantum compression implementation */ -#include "system.h" -#include "qtm.h" +#include +#include /* todo */ diff --git a/libclammspack/mspack/qtmd.c b/libclammspack/mspack/qtmd.c index 4510866d71..ec72136fc2 100644 --- a/libclammspack/mspack/qtmd.c +++ b/libclammspack/mspack/qtmd.c @@ -1,5 +1,5 @@ /* This file is part of libmspack. - * (C) 2003-2004 Stuart Caie. + * (C) 2003-2023 Stuart Caie. * * The Quantum method was created by David Stafford, adapted by Microsoft * Corporation. @@ -20,8 +20,8 @@ * http://www.speakeasy.org/~russotto/quantumcomp.html */ -#include "system.h" -#include "qtm.h" +#include +#include /* import bit-reading macros and code */ #define BITS_TYPE struct qtmd_stream @@ -33,7 +33,7 @@ READ_IF_NEEDED; b1 = *i_ptr++; \ INJECT_BITS((b0 << 8) | b1, 16); \ } while (0) -#include "readbits.h" +#include /* Quantum static data tables: * @@ -83,7 +83,7 @@ static const unsigned char length_extra[27] = { /* Arithmetic decoder: - * + * * GET_SYMBOL(model, var) fetches the next symbol from the stated model * and puts it in var. * @@ -254,14 +254,12 @@ struct qtmd_stream *qtmd_init(struct mspack_system *system, } int qtmd_decompress(struct qtmd_stream *qtm, off_t out_bytes) { + DECLARE_BIT_VARS; unsigned int frame_todo, frame_end, window_posn, match_offset, range; - unsigned char *window, *i_ptr, *i_end, *runsrc, *rundest; + unsigned char *window, *runsrc, *rundest; int i, j, selector, extra, sym, match_length; unsigned short H, L, C, symf; - register unsigned int bit_buffer; - register unsigned char bits_left; - /* easy answers */ if (!qtm || (out_bytes < 0)) return MSPACK_ERR_ARGS; if (qtm->error) return qtm->error; @@ -379,7 +377,7 @@ int qtmd_decompress(struct qtmd_stream *qtm, off_t out_bytes) { } out_bytes -= i; qtm->o_ptr = &window[0]; - qtm->o_end = &window[0]; + qtm->o_end = &window[0]; /* copy second part of match, after window wrap */ rundest = &window[0]; @@ -453,7 +451,7 @@ int qtmd_decompress(struct qtmd_stream *qtm, off_t out_bytes) { } out_bytes -= i; qtm->o_ptr = &window[0]; - qtm->o_end = &window[0]; + qtm->o_end = &window[0]; window_posn = 0; } diff --git a/libclammspack/mspack/readbits.h b/libclammspack/mspack/readbits.h index 9b237a3693..846e8454f7 100644 --- a/libclammspack/mspack/readbits.h +++ b/libclammspack/mspack/readbits.h @@ -13,6 +13,7 @@ /* this header defines macros that read data streams by * the individual bits * + * DECLARE_BIT_VARS declares local variables * INIT_BITS initialises bitstream state in state structure * STORE_BITS stores bitstream state in state structure * RESTORE_BITS restores bitstream state from state structure @@ -92,13 +93,20 @@ # endif #endif +typedef unsigned int bitbuf_type; + #if HAVE_LIMITS_H # include #endif #ifndef CHAR_BIT # define CHAR_BIT (8) #endif -#define BITBUF_WIDTH (sizeof(bit_buffer) * CHAR_BIT) +#define BITBUF_WIDTH (sizeof(bitbuf_type) * CHAR_BIT) + +#define DECLARE_BIT_VARS \ + unsigned char *i_ptr, *i_end; \ + register bitbuf_type bit_buffer; \ + register int bits_left #define INIT_BITS do { \ BITS_VAR->i_ptr = &BITS_VAR->inbuf[0]; \ @@ -136,7 +144,7 @@ unsigned char needed = (bits), bitrun; \ (val) = 0; \ while (needed > 0) { \ - if (bits_left <= (BITBUF_WIDTH - 16)) READ_BYTES; \ + if (bits_left <= (int)(BITBUF_WIDTH - 16)) READ_BYTES; \ bitrun = (bits_left < needed) ? bits_left : needed; \ (val) = ((val) << bitrun) | PEEK_BITS(bitrun); \ REMOVE_BITS(bitrun); \ @@ -148,12 +156,13 @@ # define PEEK_BITS(nbits) (bit_buffer >> (BITBUF_WIDTH - (nbits))) # define REMOVE_BITS(nbits) ((bit_buffer <<= (nbits)), (bits_left -= (nbits))) # define INJECT_BITS(bitdata,nbits) ((bit_buffer |= \ - (bitdata) << (BITBUF_WIDTH - (nbits) - bits_left)), (bits_left += (nbits))) + (bitbuf_type)(bitdata) << (BITBUF_WIDTH - (nbits) - bits_left)), \ + (bits_left += (nbits))) #else /* BITS_ORDER_LSB */ -# define PEEK_BITS(nbits) (bit_buffer & ((1 << (nbits))-1)) +# define PEEK_BITS(nbits) (bit_buffer & ((bitbuf_type)(1 << (nbits))-1)) # define REMOVE_BITS(nbits) ((bit_buffer >>= (nbits)), (bits_left -= (nbits))) # define INJECT_BITS(bitdata,nbits) ((bit_buffer |= \ - (bitdata) << bits_left), (bits_left += (nbits))) + (bitbuf_type)(bitdata) << bits_left), (bits_left += (nbits))) #endif #ifdef BITS_LSB_TABLE diff --git a/libclammspack/mspack/readhuff.h b/libclammspack/mspack/readhuff.h index 4d94225789..f6e449f1d2 100644 --- a/libclammspack/mspack/readhuff.h +++ b/libclammspack/mspack/readhuff.h @@ -28,35 +28,40 @@ # define HUFF_MAXBITS 16 #endif +#define DECLARE_HUFF_VARS \ + DECLARE_BIT_VARS; \ + register int huff_idx; \ + register unsigned short huff_sym + /* Decodes the next huffman symbol from the input bitstream into var. * Do not use this macro on a table unless build_decode_table() succeeded. */ -#define READ_HUFFSYM(tbl, var) do { \ - ENSURE_BITS(HUFF_MAXBITS); \ - sym = HUFF_TABLE(tbl, PEEK_BITS(TABLEBITS(tbl))); \ - if (sym >= MAXSYMBOLS(tbl)) HUFF_TRAVERSE(tbl); \ - (var) = sym; \ - i = HUFF_LEN(tbl, sym); \ - REMOVE_BITS(i); \ +#define READ_HUFFSYM(tbl, var) do { \ + ENSURE_BITS(HUFF_MAXBITS); \ + huff_sym = HUFF_TABLE(tbl, PEEK_BITS(TABLEBITS(tbl))); \ + if (huff_sym >= MAXSYMBOLS(tbl)) HUFF_TRAVERSE(tbl); \ + (var) = huff_sym; \ + huff_idx = HUFF_LEN(tbl, huff_sym); \ + REMOVE_BITS(huff_idx); \ } while (0) #ifdef BITS_ORDER_LSB -# define HUFF_TRAVERSE(tbl) do { \ - i = TABLEBITS(tbl) - 1; \ - do { \ - if (i++ > HUFF_MAXBITS) HUFF_ERROR; \ - sym = HUFF_TABLE(tbl, \ - (sym << 1) | ((bit_buffer >> i) & 1)); \ - } while (sym >= MAXSYMBOLS(tbl)); \ +# define HUFF_TRAVERSE(tbl) do { \ + huff_idx = TABLEBITS(tbl) - 1; \ + do { \ + if (huff_idx++ > HUFF_MAXBITS) HUFF_ERROR; \ + huff_sym = HUFF_TABLE(tbl, \ + (huff_sym << 1) | ((bit_buffer >> huff_idx) & 1)); \ + } while (huff_sym >= MAXSYMBOLS(tbl)); \ } while (0) #else -#define HUFF_TRAVERSE(tbl) do { \ - i = 1 << (BITBUF_WIDTH - TABLEBITS(tbl)); \ - do { \ - if ((i >>= 1) == 0) HUFF_ERROR; \ - sym = HUFF_TABLE(tbl, \ - (sym << 1) | ((bit_buffer & i) ? 1 : 0)); \ - } while (sym >= MAXSYMBOLS(tbl)); \ +#define HUFF_TRAVERSE(tbl) do { \ + huff_idx = 1 << (BITBUF_WIDTH - TABLEBITS(tbl)); \ + do { \ + if ((huff_idx >>= 1) == 0) HUFF_ERROR; \ + huff_sym = HUFF_TABLE(tbl, \ + (huff_sym << 1) | ((bit_buffer & huff_idx) ? 1 : 0)); \ + } while (huff_sym >= MAXSYMBOLS(tbl)); \ } while (0) #endif diff --git a/libclammspack/mspack/system.c b/libclammspack/mspack/system.c index e2ce3692f0..d085551dfd 100644 --- a/libclammspack/mspack/system.c +++ b/libclammspack/mspack/system.c @@ -11,12 +11,7 @@ # include #endif -#include "system.h" - -#if !LARGEFILE_SUPPORT -const char *largefile_msg = "library not compiled to support large files."; -#endif - +#include int mspack_version(int entity) { switch (entity) { diff --git a/libclammspack/mspack/system.h b/libclammspack/mspack/system.h index e5f87451e4..646a8466fb 100644 --- a/libclammspack/mspack/system.h +++ b/libclammspack/mspack/system.h @@ -18,11 +18,29 @@ extern "C" { #ifdef HAVE_CONFIG_H # include #endif - -#include "mspack.h" +#include +#include /* assume exists */ -#include +#ifndef MSPACK_NO_DEFAULT_SYSTEM +# include +#else + /* but if no default system wanted, avoid using entirely, + * to avoid linking to even these standard C library functions */ +static inline int memcmp(const void *s1, const void *s2, size_t n) { + const unsigned char *a = s1, *b = s2; + while (n--) if (*a++ != *b++) return a[-1] - b[-1]; + return 0; +} +static inline void *memset(void *s, int c, size_t n) { + unsigned char *s2 = s, c2 = (unsigned char) c; + while (n--) *s2++ = c2; + return s; +} +static inline size_t strlen(const char *s) { + size_t c = 0; while (*s++) c++; return c; +} +#endif /* fix for problem with GCC 4 and glibc (thanks to Ville Skytta) * http://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=150429 @@ -31,71 +49,6 @@ extern "C" { # undef read #endif -/* Old GCCs don't have __func__, but __FUNCTION__: - * http://gcc.gnu.org/onlinedocs/gcc/Function-Names.html - */ -#if __STDC_VERSION__ < 199901L -# if __GNUC__ >= 2 -# define __func__ __FUNCTION__ -# else -# define __func__ "" -# endif -#endif - -#if DEBUG -# include -# define D(x) do { printf("%s:%d (%s) ",__FILE__, __LINE__, __func__); \ - printf x ; fputc('\n', stdout); fflush(stdout);} while (0); -#else -# define D(x) -#endif - -/* CAB supports searching through files over 4GB in size, and the CHM file - * format actively uses 64-bit offsets. These can only be fully supported - * if the system the code runs on supports large files. If not, the library - * will work as normal using only 32-bit arithmetic, but if an offset - * greater than 2GB is detected, an error message indicating the library - * can't support the file should be printed. - */ -#if HAVE_INTTYPES_H -# include -#else -# define PRId64 "lld" -# define PRIu64 "llu" -# define PRId32 "ld" -# define PRIu32 "lu" -#endif - -#include -#if ((defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS >= 64) || \ - (defined(FILESIZEBITS) && FILESIZEBITS >= 64) || \ - defined(_LARGEFILE_SOURCE) || defined(_LARGEFILE64_SOURCE) || \ - SIZEOF_OFF_T >= 8) -# define LARGEFILE_SUPPORT 1 -# define LD PRId64 -# define LU PRIu64 -#else -extern const char *largefile_msg; -# define LD PRId32 -# define LU PRIu32 -#endif - -/* endian-neutral reading of little-endian data */ -#define __egi32(a,n) (((unsigned int) ((unsigned char *)(a))[n+3] << 24) | \ - ((unsigned int) ((unsigned char *)(a))[n+2] << 16) | \ - ((unsigned int) ((unsigned char *)(a))[n+1] << 8) | \ - ((unsigned int) ((unsigned char *)(a))[n])) -#define EndGetI64(a) (((unsigned long long int) __egi32(a,4) << 32) | __egi32(a,0)) -#define EndGetI32(a) __egi32(a,0) -#define EndGetI16(a) ((((a)[1])<<8)|((a)[0])) - -/* endian-neutral reading of big-endian data */ -#define EndGetM32(a) (((unsigned int) ((unsigned char *)(a))[0] << 24) | \ - ((unsigned int) ((unsigned char *)(a))[1] << 16) | \ - ((unsigned int) ((unsigned char *)(a))[2] << 8) | \ - ((unsigned int) ((unsigned char *)(a))[3])) -#define EndGetM16(a) ((((a)[0])<<8)|((a)[1])) - extern struct mspack_system *mspack_default_system; /* returns the length of a file opened for reading */ diff --git a/libclammspack/mspack/szdd.h b/libclammspack/mspack/szdd.h index b9936b42d1..e07c6b7c8e 100644 --- a/libclammspack/mspack/szdd.h +++ b/libclammspack/mspack/szdd.h @@ -10,7 +10,7 @@ #ifndef MSPACK_SZDD_H #define MSPACK_SZDD_H 1 -#include "lzss.h" +#include /* input buffer size during decompression - not worth parameterising IMHO */ #define SZDD_INPUT_SIZE (2048) diff --git a/libclammspack/mspack/szddc.c b/libclammspack/mspack/szddc.c index bf7e6f9916..cdd39a6305 100644 --- a/libclammspack/mspack/szddc.c +++ b/libclammspack/mspack/szddc.c @@ -9,8 +9,8 @@ /* SZDD compression implementation */ -#include "system.h" -#include "szdd.h" +#include +#include struct msszdd_compressor * mspack_create_szdd_compressor(struct mspack_system *sys) diff --git a/libclammspack/mspack/szddd.c b/libclammspack/mspack/szddd.c index 0edafd07fc..100fa34aea 100644 --- a/libclammspack/mspack/szddd.c +++ b/libclammspack/mspack/szddd.c @@ -13,8 +13,8 @@ /* SZDD decompression implementation */ -#include "system.h" -#include "szdd.h" +#include +#include /* prototypes */ static struct msszddd_header *szddd_open( @@ -97,7 +97,7 @@ static struct msszddd_header *szddd_open(struct msszdd_decompressor *base, if (!fh) self->error = MSPACK_ERR_OPEN; if (!hdr) self->error = MSPACK_ERR_NOMEMORY; } - + if (self->error) { if (fh) sys->close(fh); sys->free(hdr); diff --git a/libclammspack/test/cabd_compare b/libclammspack/test/cabd_compare index bda2e2373d..ebe12002e6 100755 --- a/libclammspack/test/cabd_compare +++ b/libclammspack/test/cabd_compare @@ -1,10 +1,13 @@ #!/bin/sh # Test if cabd_md5 expands cab files identically to Microsoft's EXTRACT.EXE +# (or EXPAND.EXE if cab file is in a directory called 'expand') [ -d .cache ] || mkdir .cache BASEDIR=`dirname "$0"` cnt=1 +orig=.$$.orig +test=.$$.test for cab in "$@"; do name=`printf '%d/%d %s' $cnt $# $cab` cnt=`expr $cnt + 1` @@ -12,23 +15,27 @@ for cab in "$@"; do echo "test $name" cached=`echo $cab | sed -e 's/\//-/g' -e 's/^/.cache\//'` if [ ! -s $cached ]; then - $BASEDIR/msextract_md5 $cab >.orig.out 2>.orig.err - if [ -s .orig.err ]; then + case $cab in + */expand/*) $BASEDIR/msexpand_md5 $cab >$orig 2>$orig.err;; + *) $BASEDIR/msextract_md5 $cab >$orig 2>$orig.err;; + esac + + if [ -s $orig.err ]; then echo "FAIL $name: MS errors" >&2 - cat .orig.err >&2 - else - mv .orig.out $cached + cat $orig.err >&2 + continue fi + mv $orig $cached fi - $BASEDIR/cabd_md5 $cab >.test.out 2>.test.err - perl -pi -e 'if($.>1){s{\\}{/}g;s{ /}{ }}' .test.out + $BASEDIR/cabd_md5 $cab >$test 2>$test.err + perl -pi -e 'if($.>1){s{\\}{/}g;s{ /}{ }}' $test # suppress warning. PRECOPY2.CAB does not extend to CATALOG3.CAB, but # CATALOG3.CAB extends backwards to PRECOPY2.CAB. cabd_md5 supports this # but msextract_md5 does not, so differences appear. As a workaround, test # PRECOPYn.CAB separately and suppress the warning when testing CATALOG3.CAB - sed -i "/can't find \"PRECOPY2.CAB\" to prepend/d" .test.err + sed -i "/can't find \"PRECOPY2.CAB\" to prepend/d" $test.err # suppress warning. One cabinet set has this structure: # * cab1: file1 FROM_PREV, file2 TO_NEXT @@ -38,18 +45,19 @@ for cab in "$@"; do # This is wrong. file3 and file4 are in the same folder, so both should # be FROM_PREV_AND_TO_NEXT in cab3, and both should be listed in cab4. # However, the set unpacks despite the warning, so suppress it. - sed -i '/rainloop.xa not listed in both cabinets/d' .test.err + sed -i '/rainloop.xa not listed in both cabinets/d' $test.err - if [ -s .test.err ]; then + if [ -s $test.err ]; then echo "FAIL $name: errors" >&2 - cat .test.err >&2 + cat $test.err >&2 + continue fi - if cmp $cached .test.out >/dev/null; then + if cmp $cached $test >/dev/null; then echo "OK $name" else echo "FAIL $name: differences" >&2 - diff -u $cached .test.out >&2 + diff -u $cached $test >&2 fi done -rm -f .orig.out .orig.err .test.out .test.err +rm -f $orig $orig.err $test $test.err diff --git a/libclammspack/test/cabd_test.c b/libclammspack/test/cabd_test.c index 91e9dada1c..23cc2c94ba 100644 --- a/libclammspack/test/cabd_test.c +++ b/libclammspack/test/cabd_test.c @@ -8,7 +8,7 @@ #include #include #include -#include "system.h" +#include #define __tf3(x) #x #define __tf2(x) __tf3(x) @@ -125,7 +125,7 @@ void cabd_open_test_04() { /* cab has 0 files */ TEST(!cabd->open(cabd, TESTFILE("bad_nofiles.cab"))); - /* second file in the cab has a folder index for a non-existant folder */ + /* second file in the cab has a folder index for a non-existent folder */ TEST(!cabd->open(cabd, TESTFILE("bad_folderindex.cab"))); /* cab has one file with empty filename */ @@ -197,6 +197,38 @@ void cabd_open_test_06() { } +int reads_allowed_before_failure = 0; +static int readfail_read(struct mspack_file *fh, void *buffer, int bytes) { + return (reads_allowed_before_failure-- <= 0) ? -1 : + read_files_write_md5.read(fh, buffer, bytes); +} + +/* open cab with overlong filename and an mspack_system.read() that fails + * after a certain number of calls. Tests that CVE-2017-11423 is fixed */ +void cabd_open_test_07() { + struct mscab_decompressor *cabd; + struct mscabd_cabinet *cab; + int i; + + struct mspack_system readfail = read_files_write_md5; + readfail.read = &readfail_read; + cabd = mspack_create_cab_decompressor(&readfail); + TEST(cabd != NULL); + + /* normally the cabd_read_string() call to read() is the 3rd read but try + * 1-5 just in case implementation changes. If I re-introduce the code bug, + * I can get valgrind to say "Conditional jump or move depends on + * uninitialised value(s)" as it reads beyond the stack buffer, but I can't + * get ASan to notice it. Also, I don't have the original POC file from + * https://bugzilla.clamav.net/show_bug.cgi?id=11873 */ + for (i = 1; i <= 5; i++) { + reads_allowed_before_failure = i; + cab = cabd->open(cabd, TESTFILE("cve-2017-11423-fname-overread.cab")); + if (cab) cabd->close(cabd, cab); + } + mspack_destroy_cab_decompressor(cabd); +} + /* open where search file doesn't exist */ void cabd_search_test_01() { struct mscab_decompressor *cabd; @@ -207,7 +239,7 @@ void cabd_search_test_01() { mspack_destroy_cab_decompressor(cabd); } - + /* search file using 1-byte buffer */ void cabd_search_test_02() { struct mscab_decompressor *cabd; @@ -279,7 +311,7 @@ void cabd_merge_test_01() { cabd->close(cabd, cab1); mspack_destroy_cab_decompressor(cabd); } - + /* test merging a normal 5 part single folder cabinet set with slightly * haphazard ordering. should still merge fine */ void cabd_merge_test_02() { @@ -329,21 +361,21 @@ void cabd_extract_test_01() { TESTFILE("cve-2014-9556-qtm-infinite-loop.cab"), TESTFILE("cve-2015-4470-mszip-over-read.cab"), TESTFILE("cve-2015-4471-lzx-under-read.cab"), + TESTFILE("cve-2018-18584-qtm-max-size-block.cab"), TESTFILE("filename-read-violation-2.cab"), TESTFILE("filename-read-violation-3.cab"), TESTFILE("filename-read-violation-4.cab"), TESTFILE("lzx-main-tree-no-lengths.cab"), TESTFILE("lzx-premature-matches.cab"), - TESTFILE("qtm-max-size-block.cab"), }; - TEST(cabd = mspack_create_cab_decompressor(NULL)); + TEST(cabd = mspack_create_cab_decompressor(&read_files_write_md5)); for (i = 0; i < (sizeof(files)/sizeof(char *)); i++) { TEST(cab = cabd->open(cabd, files[i])); TEST(cab->files != NULL); for (file = cab->files; file; file = file->next) { - int err = cabd->extract(cabd, file, "/dev/null"); + int err = cabd->extract(cabd, file, NULL); TEST(err == MSPACK_ERR_DATAFORMAT || err == MSPACK_ERR_DECRUNCH); } cabd->close(cabd, cab); @@ -362,20 +394,18 @@ void cabd_extract_test_02() { * caused cabd.c to try and free the invalid folder state left by * extracting from folder 2, which caused a jump to NULL / segfault */ - TEST(cabd = mspack_create_cab_decompressor(NULL)); + TEST(cabd = mspack_create_cab_decompressor(&read_files_write_md5)); TEST(cab = cabd->open(cabd, TESTFILE("cve-2014-9732-folders-segfault.cab"))); - err = cabd->extract(cabd, cab->files, "/dev/null"); + err = cabd->extract(cabd, cab->files, NULL); TEST(err == MSPACK_ERR_OK); - err = cabd->extract(cabd, cab->files->next, "/dev/null"); + err = cabd->extract(cabd, cab->files->next, NULL); TEST(err == MSPACK_ERR_DATAFORMAT || err == MSPACK_ERR_DECRUNCH); - err = cabd->extract(cabd, cab->files, "/dev/null"); + err = cabd->extract(cabd, cab->files, NULL); TEST(err == MSPACK_ERR_OK); cabd->close(cabd, cab); mspack_destroy_cab_decompressor(cabd); } -#include - /* test that extraction works with all compression methods */ void cabd_extract_test_03() { struct mscab_decompressor *cabd; @@ -450,6 +480,7 @@ int main() { cabd_open_test_04(); cabd_open_test_05(); cabd_open_test_06(); + cabd_open_test_07(); cabd_search_test_01(); cabd_search_test_02(); diff --git a/libclammspack/test/chmd_compare b/libclammspack/test/chmd_compare index c2e4594966..7b6ca95fc4 100755 --- a/libclammspack/test/chmd_compare +++ b/libclammspack/test/chmd_compare @@ -5,38 +5,41 @@ BASEDIR=`dirname $0` cnt=1 +orig=.$$.orig +test=.$$.test for chm in "$@"; do name=`printf '%d/%d %s' $cnt $# $chm`; cnt=`expr $cnt + 1` echo "test $name" cached=`echo $chm | sed -e 's/\//-/g' -e 's/^/.cache\//'` if [ ! -s $cached ]; then - $BASEDIR/msdecompile_md5 $chm >.orig.out 2>.orig.err - if [ -s .orig.err ]; then + $BASEDIR/msdecompile_md5 $chm >$orig 2>$orig.err + if [ -s $orig.err ]; then echo "FAIL $name: MS errors" >&2 - cat .orig.err >&2 - else - LANG=C sort -k2 .orig.out >$cached + cat $orig.err >&2 + continue fi + LANG=C sort -k2 $orig >$cached fi - $BASEDIR/chmd_md5 $chm >.test.out 2>.test.errwarn - perl -pe 'if($.>1){$_=""if/^[0-9a-f]{32} \/[#\$]/;s{ /}{ }}' .test.out | LANG=C sort -k2 >.test.sorted - sed '/^WARNING; /d' .test.errwarn > .test.err - if [ -s .test.err ]; then + $BASEDIR/chmd_md5 $chm >$test.unsorted 2>$test.errwarn + perl -pe 'if($.>1){$_=""if/^[0-9a-f]{32} \/[#\$]/;s{ /}{ }}' $test.unsorted | LANG=C sort -k2 >$test + sed '/^WARNING; /d' $test.errwarn > $test.err + if [ -s $test.err ]; then echo "FAIL $name: errors" >&2 - cat .test.errwarn >&2 + cat $test.errwarn >&2 + continue fi - if cmp $cached .test.sorted >/dev/null; then + if cmp $cached $test >/dev/null; then echo "OK $name" else - if [ `diff $cached .test.sorted | grep -c '^<'` -gt 0 ]; then + if [ `diff $cached $test | grep -c '^<'` -gt 0 ]; then echo "FAIL $name: differences" >&2 - diff -u $cached .test.sorted >&2 + diff -u $cached $test >&2 else echo "OK $name (better than hh.exe)" fi fi done -rm -f .orig.out .orig.err .test.out .test.err .test.errwarn .test.sorted +rm -f $orig $orig.err $test $test.err $test.errwarn $test.unsorted diff --git a/libclammspack/test/chmd_find.c b/libclammspack/test/chmd_find.c index 0da9a55a15..cb95ed8dbf 100644 --- a/libclammspack/test/chmd_find.c +++ b/libclammspack/test/chmd_find.c @@ -8,9 +8,8 @@ #include #include #include - +#include "mspack/macros.h" #include -#include "system.h" void find(struct mschm_decompressor *chmd, struct mschmd_header *chm, char *archive, char *filename, struct mschmd_file *compare) diff --git a/libclammspack/test/chmd_test.c b/libclammspack/test/chmd_test.c index 505e76f77c..9ab6f24842 100644 --- a/libclammspack/test/chmd_test.c +++ b/libclammspack/test/chmd_test.c @@ -10,7 +10,7 @@ #include #include -#include "system.h" +#include #define __tf3(x) #x #define __tf2(x) __tf3(x) @@ -44,7 +44,8 @@ void chmd_open_test_02() { struct mschmd_file *f; unsigned int i; const char *files[] = { - TESTFILE("blank-filenames.chm"), + TESTFILE("cve-2018-14680-blank-filenames.chm"), + TESTFILE("cve-2018-18585-blank-filenames.chm"), }; TEST(chmd = mspack_create_chm_decompressor(NULL)); @@ -56,10 +57,41 @@ void chmd_open_test_02() { for (f = chm->sysfiles; f; f = f->next) { TEST(f->filename && f->filename[0]); } + chmd->close(chmd, chm); } mspack_destroy_chm_decompressor(chmd); } +/* check that files with a mix of normal and over-long ENCINTs for offsets + * and lengths can be opened and all offsets/lengths are non-negative */ +void chmd_open_test_03() { + struct mschm_decompressor *chmd; + struct mschmd_header *chm; + struct mschmd_file *f; + unsigned int i; + const char *files[] = { +#if SIZEOF_OFF_T >= 8 + TESTFILE("encints-64bit-offsets.chm"), + TESTFILE("encints-64bit-lengths.chm"), + TESTFILE("encints-64bit-both.chm"), +#else + TESTFILE("encints-32bit-offsets.chm"), + TESTFILE("encints-32bit-lengths.chm"), + TESTFILE("encints-32bit-both.chm"), +#endif + }; + + TEST(chmd = mspack_create_chm_decompressor(NULL)); + for (i = 0; i < (sizeof(files)/sizeof(char *)); i++) { + TEST(chm = chmd->open(chmd, files[i])); + for (f = chm->files; f; f = f->next) { + TEST(f->offset >= 0); + TEST(f->length >= 0); + } + chmd->close(chmd, chm); + } + mspack_destroy_chm_decompressor(chmd); +} /* check searching bad files doesn't crash */ void chmd_search_test_01() { @@ -71,6 +103,9 @@ void chmd_search_test_01() { TESTFILE("cve-2015-4468-namelen-bounds.chm"), TESTFILE("cve-2015-4469-namelen-bounds.chm"), TESTFILE("cve-2015-4472-namelen-bounds.chm"), + TESTFILE("cve-2018-14679-off-by-one.chm"), + TESTFILE("cve-2018-14682-unicode-u100.chm"), + TESTFILE("cve-2019-1010305-name-overread.chm"), }; TEST(chmd = mspack_create_chm_decompressor(NULL)); @@ -89,9 +124,8 @@ void chmd_search_test_01() { mspack_destroy_chm_decompressor(chmd); } -#include -static int m_read_xor(struct mspack_file *file, void *buffer, int bytes) { - int read = m_read(file, buffer, bytes); +static int read_xor(struct mspack_file *file, void *buffer, int bytes) { + int read = read_files_write_md5.read(file, buffer, bytes); if (read > 0) { char *p = (char *) buffer, *end = &p[read]; while (p < end) *p++ ^= 0xFF; @@ -107,8 +141,11 @@ void chmd_extract_test_01() { /* create an mspack_system that XORs the files it reads */ struct mspack_system xor_files = read_files_write_md5; - xor_files.read = &m_read_xor; + xor_files.read = &read_xor; + /* source file is obfuscafted with XOR because clamav calls it + * "BC.Legacy.Exploit.CVE_2012_1458-1" and blocks distributing libmspack + * https://github.com/kyz/libmspack/issues/17#issuecomment-411583917 */ TEST(chmd = mspack_create_chm_decompressor(&xor_files)); TEST(chm = chmd->open(chmd, TESTFILE("cve-2015-4467-reset-interval-zero.chm.xor"))); for (f = chm->files; f; f = f->next) { @@ -118,6 +155,7 @@ void chmd_extract_test_01() { mspack_destroy_chm_decompressor(chmd); } + int main() { int selftest; @@ -126,6 +164,7 @@ int main() { chmd_open_test_01(); chmd_open_test_02(); + chmd_open_test_03(); chmd_search_test_01(); chmd_extract_test_01(); diff --git a/libclammspack/test/chminfo.c b/libclammspack/test/chminfo.c index b358f90ea2..869e34e9ff 100644 --- a/libclammspack/test/chminfo.c +++ b/libclammspack/test/chminfo.c @@ -8,7 +8,7 @@ #include #include -#include "system.h" +#include "mspack/macros.h" #define FILENAME ".chminfo-temp" @@ -53,12 +53,12 @@ char *guid(unsigned char *data) { return result; } -#define READ_ENCINT(var, label) do { \ - (var) = 0; \ - do { \ - if (p > &chunk[chm->chunk_size-2]) goto label; \ - (var) = ((var) << 7) | (*p & 0x7F); \ - } while (*p++ & 0x80); \ +#define READ_ENCINT(var, label) do { \ + (var) = 0; \ + do { \ + if (p > pend) goto label; \ + (var) = ((var) << 7) | (*p & 0x7F); \ + } while (*p++ & 0x80); \ } while (0) void print_dir(struct mschmd_header *chm, char *filename) { @@ -67,7 +67,7 @@ void print_dir(struct mschmd_header *chm, char *filename) { FILE *fh; if (!(chunk = (unsigned char *) malloc(chm->chunk_size))) return; - + if ((fh = fopen(filename, "rb"))) { #if HAVE_FSEEKO fseeko(fh, chm->dir_offset - 84, SEEK_SET); @@ -96,7 +96,7 @@ void print_dir(struct mschmd_header *chm, char *filename) { for (i = 0; i < chm->num_chunks; i++) { unsigned int num_entries, quickref_size, j, k; - unsigned char *p, *name; + unsigned char *p, *pend, *name; printf(" CHUNK %u:\n", i); fread(chunk, chm->chunk_size, 1, fh); @@ -126,14 +126,18 @@ void print_dir(struct mschmd_header *chm, char *filename) { } p = &chunk[20]; + pend = &chunk[chm->chunk_size-2]; for (j = 0; j < num_entries; j++) { - unsigned int name_len = 0, section = 0, offset = 0, length = 0; + off_t name_len = 0, section = 0, offset = 0, length = 0; printf(" %4d: ", (int) (p - &chunk[0])); - READ_ENCINT(name_len, PMGL_end); name = p; p += name_len; + READ_ENCINT(name_len, PMGL_end); + if (name_len < 0 || name_len > (pend - p)) goto PMGL_end; + name = p; p += name_len; READ_ENCINT(section, PMGL_end); READ_ENCINT(offset, PMGL_end); READ_ENCINT(length, PMGL_end); - printf("sec=%u off=%-10u len=%-10u name=\"",section,offset,length); + printf("sec=%" LD " off=%-9" LD " len=%-9" LD " name=\"", + section, offset, length); if (name_len) fwrite(name, 1, name_len, stdout); printf("\"\n"); } @@ -159,16 +163,19 @@ void print_dir(struct mschmd_header *chm, char *filename) { } p = &chunk[8]; + pend = &chunk[chm->chunk_size-2]; for (j = 0; j < num_entries; j++) { - unsigned int name_len, section; + off_t name_len, section; printf(" %4d: ", (int) (p - &chunk[0])); - READ_ENCINT(name_len, PMGI_end); name = p; p += name_len; + READ_ENCINT(name_len, PMGI_end); + if (name_len < 0 || name_len > (pend - p)) goto PMGI_end; + name = p; p += name_len; READ_ENCINT(section, PMGI_end); - printf("chunk=%-4u name=\"",section); + printf("chunk=%-4" LD " name=\"",section); if (name_len) fwrite(name, 1, name_len, stdout); printf("\"\n"); } - PMGI_end: + PMGI_end: if (j != num_entries) printf("premature end of chunk\n"); } else { @@ -178,6 +185,7 @@ void print_dir(struct mschmd_header *chm, char *filename) { fclose(fh); } + free(chunk); } @@ -252,7 +260,7 @@ int main(int argc, char *argv[]) { case 4: for (i = 0; i < numf && pos < len; i++, pos += 4) { unsigned int rtdata = EndGetI32(&data[pos]); - printf(" %-10u -> %-10u [ %" LU " %u ]\n", + printf(" %-10u -> %-10u [ %" LD " %u ]\n", i * EndGetI32(&data[32]), rtdata, contents + rtdata, diff --git a/libclammspack/test/kwajd_test.c b/libclammspack/test/kwajd_test.c index fdaaef82e4..50ca4d568e 100644 --- a/libclammspack/test/kwajd_test.c +++ b/libclammspack/test/kwajd_test.c @@ -8,7 +8,6 @@ #include #include #include -#include "system.h" #define __tf3(x) #x #define __tf2(x) __tf3(x) @@ -100,6 +99,8 @@ void kwajd_open_test_01() { BAD(TESTFILE("f93.kwj")); BAD(TESTFILE("f94.kwj")); + BAD(TESTFILE("cve-2018-14681.kwj")); + #undef GOOD #undef BAD diff --git a/libclammspack/test/md5_fh.h b/libclammspack/test/md5_fh.h index 9bc2900d71..0ed596f71c 100644 --- a/libclammspack/test/md5_fh.h +++ b/libclammspack/test/md5_fh.h @@ -14,6 +14,7 @@ char md5_string[33]; struct mspack_file_p { FILE *fh; + const char *filename; }; static struct mspack_file *m_open(struct mspack_system *self, const char *filename, int mode) { @@ -24,12 +25,15 @@ static struct mspack_file *m_open(struct mspack_system *self, const char *filena if ((fh = (struct mspack_file_p *) malloc(sizeof(struct mspack_file_p)))) { if (mode == MSPACK_SYS_OPEN_WRITE) { fh->fh = NULL; + fh->filename = ""; md5_init_ctx(&md5_context); return (struct mspack_file *) fh; } else { - if ((fh->fh = fopen(filename, "rb"))) + if ((fh->fh = fopen(filename, "rb"))) { + fh->filename = filename; return (struct mspack_file *) fh; + } } /* error - free file handle and return NULL */ free(fh); @@ -101,6 +105,9 @@ static off_t m_tell(struct mspack_file *file) { static void m_msg(struct mspack_file *file, const char *format, ...) { va_list ap; + if (file) { + fprintf(stderr, "%s: ", ((struct mspack_file_p *) file)->filename); + } va_start(ap, format); vfprintf(stderr, format, ap); va_end(ap); diff --git a/libclammspack/test/msexpand_md5 b/libclammspack/test/msexpand_md5 new file mode 100755 index 0000000000..4fd039d8f6 --- /dev/null +++ b/libclammspack/test/msexpand_md5 @@ -0,0 +1,44 @@ +#!/usr/bin/perl -w +# put expand.exe and dpx.dll (or a link to them) into this directory +# for this script to work. +# +# NOTE: when a cabinet only contains one file, expand.exe fails to +# preserve its filename, it renames the output after the source cabinet + +use strict; +use File::Temp qw(tempdir); +use Cwd qw(cwd); + +my $expand = $0; $expand =~ s{[^/]+$}{expand.exe}; +my $HDR1 = 'Microsoft (R) File Expansion Utility'; +my $HDR2 = 'Copyright (c) Microsoft Corp'; + +my $dir = tempdir("./.tempXXXX", CLEANUP => 1) . '/extradir'; +mkdir $dir; +$ENV{LANG} = 'C'; +$ENV{WINEPREFIX} = "$ENV{HOME}/.wine64"; +$ENV{WINEARCH} = 'win64'; + +for my $cab (@ARGV) { + my @files; + print "*** $cab\n"; + for (`wine $expand $cab -F:* $dir 2>&1`) { + s/\015?\012$//s; # remove line endings + next if /^(\Q$HDR1\E|\Q$HDR2\E|\s*$|Expanding Files |Progress: |\d+ files total)/; + if (/^Adding \Q$dir\E\\(.+) to Extraction Queue$/) { + my $file = $1; + $file =~ s{\\}{/}g; + $file =~ s{^/+}{}; + push @files, $file; + } + else { + print STDERR "$_\n"; + } + } + + next unless @files; + my $olddir = cwd(); + chdir $dir; + system 'md5sum', @files; + chdir $olddir; +} diff --git a/libclammspack/test/test_files/cabd/cve-2017-11423-fname-overread.cab b/libclammspack/test/test_files/cabd/cve-2017-11423-fname-overread.cab new file mode 100644 index 0000000000000000000000000000000000000000..11c2a6442e3b91f5539a6cd1dc7df59aadfb289a GIT binary patch literal 332 zcmeYbc6MU`0v|>Y$-tlkp%|DM85uxAAZ!aHKr}m4j)5UZX;&l{gGXvkPX34_6czPK RDoVh?!43=-a2P0Z0RW-tX+Qt~ literal 0 HcmV?d00001 diff --git a/libclammspack/test/test_files/cabd/cve-2018-18584-qtm-max-size-block.cab b/libclammspack/test/test_files/cabd/cve-2018-18584-qtm-max-size-block.cab new file mode 100644 index 0000000000000000000000000000000000000000..15ee964215fbf3c49d4564830cf7c1147e449068 GIT binary patch literal 38990 zcmeIuu?fOp5C+ii#~JK{wFu4-NT-dJZNMTBEKCM)jLhP68WM2_c~{&W-(B@im%7QP z&ePnJ{W7vCC1u^`hmyxO&oxNDdtIxHw_!XU+U~x@)9?QMGYJqNK!5-N0t5&UAV7cs z0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZ zfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&U zAV7cs0RjXF5FkK+009C72oNAZfB*pk1PBlyK!5-N0t5&UAV7cs0RjXF5FkK+z&{AQ E0U6Q>!2kdN literal 0 HcmV?d00001 diff --git a/libclammspack/test/test_files/chmd/cve-2015-4468-namelen-bounds.chm b/libclammspack/test/test_files/chmd/cve-2015-4468-namelen-bounds.chm index 6c829bda2ec4e6d314832223264c0b0410788437..a211f479d63f6ec679872104cbde509739f2d54b 100644 GIT binary patch delta 17 XcmZ20xmI#Q2HXGt3_!3k&z=VWJeCFX delta 17 ZcmZ20xmI#Q2AhlWv;Y79Z_Kmj0RTp-2tNP- diff --git a/libclammspack/test/test_files/chmd/cve-2015-4469-namelen-bounds.chm b/libclammspack/test/test_files/chmd/cve-2015-4469-namelen-bounds.chm index c4c32602073bb881dc056d28b2413cd4b327f862..de5b615a9db98fd607df730c9caa27de18c72acc 100644 GIT binary patch delta 29 ccmZqDY0{YxBg6m%{}@3e2rvQh#=I$_099)RBme*a delta 29 gcmZqDY0{YxBgD?Y!0?X|NPqzo!~c65^QMRb0AN)I2mk;8 diff --git a/libclammspack/test/test_files/chmd/cve-2018-14679-off-by-one.chm b/libclammspack/test/test_files/chmd/cve-2018-14679-off-by-one.chm new file mode 100644 index 0000000000000000000000000000000000000000..5fae90e720cbcccf70df125d37772cacb947ba8e GIT binary patch literal 3504 zcmeYa33fBU0tEgt)~u?&AUKbQVZlkIXKw`2_z4UU0MlB5#s`UmgkbX6_{ef8NNWEB z0Sl0?02KqVk=2f}heikl_`3U`yObFyz=$q5${!7Z(GVC7fzc2c4S~@R7!85Z5Eu=C J(Gb8I0sx*0m-qkx literal 0 HcmV?d00001 diff --git a/libclammspack/test/test_files/chmd/cve-2018-14680-blank-filenames.chm b/libclammspack/test/test_files/chmd/cve-2018-14680-blank-filenames.chm new file mode 100644 index 0000000000000000000000000000000000000000..71e6fb880030537e7079155debfe1edcf217afc9 GIT binary patch literal 4300 zcmeYa33g*i9Vn!go#e3`uCkq3Ez+c9iRn->+=kYKsIH~mPjUXByq#gt$AQVFd zlnxPqFwQ{fe;`AFpkEV0!${AN-~fO+v z91`T|=N`3cZYsY{q88MsEcqnUKV!oK%JUGzI7Uyb_RBpsHj>hGs)iBt<2u#U+M%86~+n zj4d^`B34!|i6x1_1&PV2`hJPIKyMY7FfiC?Ad3f=5vb z?O<4B-WQ`oli9Vn!go#e3`uCkq3Ez+c9iRn->+=kYKsIH~mPjUXByq#gt$AQVFd zlnxPqFwQ{fe;`AFpkEV0!${AN-~fO+v z91`T|=N`3cZYsY{q88MsEcqnUKV!oK%JUGzI7Uyb_RBpsHj>hGs)iBt<2u#U+M%86~+n zj4d^`B34!|i6x1_1&PV2`hJPIKyMY7FfiC?Ad3f=5vb z?O<4B-WQ`olC(Jwp~aDM;* literal 0 HcmV?d00001 diff --git a/libclammspack/test/test_files/chmd/encints-32bit-lengths.chm b/libclammspack/test/test_files/chmd/encints-32bit-lengths.chm new file mode 100644 index 0000000000000000000000000000000000000000..0667614994e5a48565eb5bd0346f3207ba830664 GIT binary patch literal 4292 zcmeHEJxc>Y5S0=eU~hKcd%N>^*C~64H6l9U<#?eQb)wdnyL!ExX3tG}nXJEmWdF|4 zqgG<03+biwZ+YykC4jU}+3SLWZV4Fkm=A2m1qg*buzG{&{IPbru{O<~KBE`h1)r*W z+&(&3#x2Dc&qkwRo|C(dyMo*e#jPhd6Lgc&vA|mlCwv=tB6dI|W;Za2+9P)hZeh?L z=KJLC#(WEhf+xdXUcvP_OlgDb3uY9Vg@~SHJt1|Vhx?jG7F#bIIS^}1UC143y0+xU!UY5SKhrJZ80So=F{?D8wD z`~wym=k3n!4TAq*2KILMy|*)ucfD$O&>*5?UKuY`t4Y-U&aPjtX8H3ny}WOHeCB`d z(4$tvMwhZzvj0f4x3&PX^{U|j6m(0#n8$ozb1pzAoPjl4L>G^Zlg;&6{`3{R;4S!A zgQLU#{t8~z_~Q9^JaUdESvVCmB{`NfkLVUnZz8y0Sddr^F_qXZp!pZa&DKWoDL($RgG z5jAB&4`_iBjd-jlmC>gXMXySg{3==UtaP>SAPhgxzLtO`UH;DF~#vqo9EUAUph$3xFbl6e%ufC_s^rXhIK?oeHQx^dM@^K|_~Q zP;mhYF!N?+W8(u*&BWdv&wFon_9=sOwBID6V}2X_BHAsY&UfSb^=cM9FVoBW#>Zz= z`vxDesvBL}p4$Fn&E7gzux*fzhJe6ZLXB~pk6OWj5&}7Dtv1ocWAkKleHJ}^=`7F! z4>3GCIJ{eds;n=bPbT9yrYzGep(+0~?^2N!R^Os&jz^Jg?wj$WUpou{4CpRSWjNq_ zz=RMOJ-Pubi}vW#EjY4BC5dHrw6Q)3&&C{WWStV8Q5d?a60?@Xz{JcYHT0C(E9AhI zJ5=a_uXm{mf}&^r_3C=m!rAFK>DxFgs$_t=G2IK1m#oYLO6UQnAS#(acIWDJtQRO; zU1aKlqyh#9{t8VwH=-uZNwjl6YGPBDw$$ah{M~$U2iyU7z#VW0+yQsM9dHNS0e8S1 J_%{dE=od!bb&~)9 literal 0 HcmV?d00001 diff --git a/libclammspack/test/test_files/chmd/encints-64bit-lengths.chm b/libclammspack/test/test_files/chmd/encints-64bit-lengths.chm new file mode 100644 index 0000000000000000000000000000000000000000..9238daf9f02aff3bc7de53babd3ee663a4bf4206 GIT binary patch literal 4292 zcmeH^y-Gtt5QS$$6e1$IorR4)fMjp{eE}t)5Xddkq)-BaP3#pCEA13}#oFhvvCC6f z`2rRi=giJ#*F3{4+`F4|&ffVr=~TU=8WHvRar|I4>O{>Sck^~VEnZgX^<(Svt62Dr zJl1;H=t}oW_h0?kds7A7I#sU=3%M0&>|;K(881j!I76#9h%TRMgYC^}@%$ZFa2I}D z-P7Zf_8M*_efeTE8n#;G#^L12-BLJ~@_6QqE5LCT!P_mw6`^24Oj!#fHhzZSOeC8HDC=` N1J-~wu+YE;{Q(rh^6UTr literal 0 HcmV?d00001 diff --git a/libclammspack/test/test_files/chmd/encints-64bit-offsets.chm b/libclammspack/test/test_files/chmd/encints-64bit-offsets.chm new file mode 100644 index 0000000000000000000000000000000000000000..b8d6225d6b18eae6d521017d40ef364c14913ef0 GIT binary patch literal 4292 zcmeHEy-EXN5Zntz|76%lTL5YOVmJT=-5N0Fu^-ri3lIutV9gfM<#S`ayFE`|exeuL1+O(Y zJwEAd;8u;VUQDNxETh{bo&wECjyWw9?a=H_;XR(I_&(zmae!sY9D=E+F5MfhM-Md@ zveKj63%s8>y%^X7N?G`@G`v{8^kW63C##6QtfVV5uw~`^Y!Z61ghf;$c}9> 7, u1($_[0] & 0x7F)); + while ($in) { + $out = u1(0x80 | ($in & 0x7F)) . $out; + $in >>= 7; + } + return $out; +} + +sub entry { + my ($name, $section, $offset, $length) = @_; + return encint(length $name) + . $name + . encint($section) + . encint($offset) + . encint($length); +} + +sub chunk { + my @entries = @_; + + my $chdr = 'PMGL' # PMGL id + . u4(0) # 0x04 free space (FIXUP) + . u4(0) # 0x08 unknown + . u4(-1) # 0x0C previous chunk (FIXUP) + . u4(-1) # 0x10 next chunk (FIXUP) + ; # 0x14 SIZEOF + my $cdata = join '', @entries; + + my $cfree = $clen - length($cdata) - length($chdr); + die if length($cdata) > $max_entries; + + # append empty "free space" area and number of entries + my $chunk = $chdr + . $cdata + . (u1(0) x ($cfree - 2)) + . u2(scalar @entries); + + # fixup free space in header + substr($chunk, 0x04, 4, u4($cfree)); + return $chunk; +} + +sub hdr { + my $guid1 = guid('7C01FD10-7BAA-11D0-9E0C-00A0C922E6EC'); + my $guid2 = guid('7C01FD11-7BAA-11D0-9E0C-00A0C922E6EC'); + return 'ITSF' # 0x00 id + . u4(2) # 0x04 version + . u4(0x58) # 0x08 total header length + . u4(1) # 0x0C unknown + . u4(0) # 0x10 timestamp + . u4(0x409) # 0x14 language (english) + . $guid1 # 0x18 GUID + . $guid2 # 0x28 GUID + . u8(0) # 0x38 hdr0 offset (FIXUP) + . u8(0) # 0x40 hdr0 length (FIXUP) + . u8(0) # 0x48 hdr1 offset (FIXUP) + . u8(0) # 0x50 hdr1 length (FIXUP) + ; # SIZEOF: 0x54 +} + +sub hs0 { + return u4(0x1FE) # 0x00 unknown + . u4(0) # 0x04 unknown + . u8(0) # 0x08 file size (FIXUP) + . u4(0) # 0x10 unknown + . u4(0) # 0x14 unknown + ; # SIZEOF: 0x18 +} + +sub hs1 { + my $cmax = shift; + my $guid3 = guid('5D02926A-212E-11D0-9DF9-00A0C922E6EC'); + return 'ITSP' # 0x00 id + . u4(1) # 0x04 unknown + . u4(0x54) # 0x08 dir header length + . u4(0x0A) # 0x0C unknown + . u4($clen) # 0x10 dir chunk size + . u4(2) # 0x14 quickref density + . u4(1) # 0x18 index depth + . u4(-1) # 0x1C root PMGI chunk + . u4(0) # 0x20 first PMGL chunk + . u4($cmax) # 0x24 last PMGL chunk + . u4(-1) # 0x28 unknown + . u4(1) # 0x2C number of chunks + . u4(0x904) # 0x30 language + . $guid3 # 0x34 GUID + . u4(0x54) # 0x44 header length + . u4(-1) # 0x48 unknown + . u4(-1) # 0x4C unknown + . u4(-1) # 0x50 unknown + ; # SIZEOF: 0x54 +} + +sub chm { + my @chunks = @_; + my ($hdr, $hs0, $hs1) = (hdr(), hs0(), hs1($#chunks)); + substr($hdr, 0x38, 8, u8(length($hdr))); # hs0 offset + substr($hdr, 0x40, 8, u8(length($hs0))); # hs0 length + substr($hdr, 0x48, 8, u8(length($hdr) + length($hs0))); # hs1 offset + substr($hdr, 0x50, 8, u8(length($hs1))); # hs1 length + substr($hs0, 0x08, 8, u8(length($hdr) + length($hs0) + length($hs1) + ($clen * @chunks))); # chm length + for (my $i = 1; $i <= $#chunks; $i++) { + substr($chunks[$i], 0x0C, 4, u4($i - 1)); # previous chunk + substr($chunks[$i - 1], 0x10, 4, u4($i)); # next chunk + } + return join '', $hdr, $hs0, $hs1, @chunks; +} + +# Create a CHM with the filename "::" right at the end of a PMGL chunk +# +# libmspack < 0.9.1alpha calls memcmp() on any entry beginning "::" to see if +# it begins "::DataSpace/Storage/MSCompressed/" (33 bytes), even when the name +# is shorter. If the entry is right at the end of a chunk, we can get libmspack +# to overread past the end of the chunk by up to 28 bytes +sub chm_sysname_overread { + if (open my $fh, '>', 'cve-2019-1010305-name-overread.chm') { + my $sysname = entry('::', 0, 0, 0); + my $padding = entry('x' x $clen, 0, 0, 0); + my $padding_overhead = length($padding) - $clen; + my $maxlen = $max_entries - length($sysname) - $padding_overhead; + $padding = entry('x' x $maxlen, 0, 0, 0); + print $fh chm(chunk($padding, $sysname)); + close $fh; + } +} + +# Create a CHM with entries containing unicode character U+100 +sub chm_unicode_u100 { + if (open my $fh, '>', 'cve-2018-14682-unicode-u100.chm') { + my $u100 = encode('UTF-8', chr(256)); + my $entry1 = entry("1", 0, 1, 1); + my $entry2 = entry($u100, 0, 2, 2); + print $fh chm(chunk($entry1, $entry2)); + close $fh; + } +} + +# Create a CHM with ENCINTs that go beyond what 32-bit architectures can handle +sub chm_encints_32bit { + chm_encints('encints-32bit-offsets.chm', 2147483647, 1, 0); + chm_encints('encints-32bit-lengths.chm', 2147483647, 0, 1); + chm_encints('encints-32bit-both.chm', 2147483647, 1, 1); +} + +# Create a CHM with ENCINTs that go beyond what 64-bit architectures can handle +sub chm_encints_64bit { + chm_encints('encints-64bit-offsets.chm', 9223372036854775807, 1, 0); + chm_encints('encints-64bit-lengths.chm', 9223372036854775807, 0, 1); + chm_encints('encints-64bit-both.chm', 9223372036854775807, 1, 1); +} + +sub chm_encints { + my ($fname, $max_good, $off_val, $len_val) = @_; + my @vals = ( + 127, 128, # 1->2 byte encoding + 16383, 16384, # 2->3 byte encoding + 2097151, 2097152, # 3->4 byte encoding + 268435455, 268435456, # 4->5 byte encoding + 2147483647, 2147483648, # 2^31-1, 2^31 (doesn't fit in 32-bit off_t) + 34359738367, 34359738368, # 5->6 byte encoding + 4398046511103, 4398046511104, # 6->7 byte encoding + 562949953421311, 562949953421312, # 7->8 byte encoding + 72057594037927935, 72057594037927936, # 8->9 byte encoding + 9223372036854775807, 9223372036854775808, # 2^63-1, 2^63 (doesn't fit in 64-bit off_t) + 147573952589676412927, 147573952589676412928, # 9->10 byte encoding + 1180591620717411303423, 1180591620717411303424, # 10->11 byte encoding + 151115727451828646838271, 151115727451828646838272, # 11->12 byte encoding + 19342813113834066795298815, 19342813113834066795298816); # 12->13 byte encoding + my @entries; + my $i = 0; + for my $val (@vals) { + my $name = sprintf '%s%02i', $val <= $max_good ? 'good' : 'bad', $i++; + my $offset = $off_val ? $val : 1; + my $length = $len_val ? $val : 1; + push @entries, entry($name, 0, $offset, $length); + } + if (open my $fh, '>', $fname) { + print $fh chm(chunk(@entries)); + close $fh; + } +} + +chm_sysname_overread(); +chm_unicode_u100(); +chm_encints_32bit(); +chm_encints_64bit(); diff --git a/libclammspack/test/test_files/kwajd/cve-2018-14681.kwj b/libclammspack/test/test_files/kwajd/cve-2018-14681.kwj new file mode 100644 index 0000000000000000000000000000000000000000..9b7d1499836bf7a8a076dc12e3e70e2b4e9a8779 GIT binary patch literal 35 bcmeYccl7G`pnlN+2>yczAYfpC&`=Tp!+QxG literal 0 HcmV?d00001 From 580bf64adc6267e3640769c9ec3186e9b8604276 Mon Sep 17 00:00:00 2001 From: Micah Snyder Date: Wed, 8 Feb 2023 09:11:58 -0800 Subject: [PATCH 2/3] News: Add detail for libmspack update --- NEWS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/NEWS.md b/NEWS.md index 879a4adbae..14a6127039 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,9 @@ Note: This file refers to the source tarball. Things described here may differ ClamAV 0.103.8 is a critical patch release with the following fixes: +- Update vendored libmspack library to version 0.11alpha. + - GitHub pull request: https://github.com/Cisco-Talos/clamav/pull/830 + ## 0.103.7 ClamAV 0.103.7 is a critical patch release with the following fixes: From bfdf79d464ebcaf2854876c458ee22192523d8ec Mon Sep 17 00:00:00 2001 From: Micah Snyder Date: Sun, 12 Feb 2023 11:51:23 -0800 Subject: [PATCH 3/3] Windows: Revert mspack lib name in def file to fix link issue --- libclammspack/mspack/mspack.def | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libclammspack/mspack/mspack.def b/libclammspack/mspack/mspack.def index efcf32495b..1e142c1192 100644 --- a/libclammspack/mspack/mspack.def +++ b/libclammspack/mspack/mspack.def @@ -1,4 +1,4 @@ -LIBRARY mspack +LIBRARY libmspack EXPORTS mspack_create_cab_compressor mspack_create_cab_decompressor