From f0410a37b1d169dce442618e5e0e17bc413aa79a Mon Sep 17 00:00:00 2001 From: woot000 <97068837+woot000@users.noreply.github.com> Date: Sat, 10 Sep 2022 10:09:46 -0500 Subject: [PATCH 01/10] Update filterScanline, filterScanline2 Small speed boost in PNG filtering, 2kB reduction in LodePNG code size using Clang 15.0.0 --- src/lodepng/lodepng.cpp | 78 ++++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/src/lodepng/lodepng.cpp b/src/lodepng/lodepng.cpp index c3d0bced..08c1a428 100755 --- a/src/lodepng/lodepng.cpp +++ b/src/lodepng/lodepng.cpp @@ -3287,47 +3287,53 @@ static void filterScanline(unsigned char* out, const unsigned char* scanline, co size_t i; switch(filterType) { case 0: /*None*/ - for(i = 0; i != length; ++i) out[i] = scanline[i]; + memcpy(out, scanline, length); break; - case 1: /*Sub*/ - for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; - for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth]; + case 1: { /*Sub*/ + size_t j = 0; + memcpy(out, scanline, bytewidth); + for(i = bytewidth; i != length; ++i, ++j) out[i] = scanline[i] - scanline[j]; break; + } case 2: /*Up*/ if(prevline) { for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i]; } else { - for(i = 0; i != length; ++i) out[i] = scanline[i]; + memcpy(out, scanline, length); } break; - case 3: /*Average*/ + case 3: { /*Average*/ + size_t j = 0; if(prevline) { - for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1); - for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1); + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1u); + for(i = bytewidth; i < length; ++i, ++j) out[i] = scanline[i] - ((scanline[j] + prevline[i]) >> 1u); } else { - for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; - for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1); + memcpy(out, scanline, bytewidth); + for(i = bytewidth; i < length; ++i, ++j) out[i] = scanline[i] - (scanline[j] >> 1u); } break; - case 4: /*Paeth*/ + } + case 4: { /*Paeth*/ + size_t j = 0; if(prevline) { /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/ - for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]); - for(i = bytewidth; i < length; ++i) { - out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth])); + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - prevline[i]; + for(i = bytewidth; i != length; ++i, ++j) { + out[i] = scanline[i] - paethPredictor(scanline[j], prevline[i], prevline[j]); } } else { - for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; + memcpy(out, scanline, bytewidth); /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/ - for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]); + for(i = bytewidth; i != length; ++i, ++j) out[i] = scanline[i] - scanline[j]; } break; + } default: return; /*invalid filter type given*/ } } static void filterScanline2(unsigned char* scanline, const unsigned char* prevline, - size_t length, unsigned char filterType, unsigned char forReal) { + size_t length, unsigned char filterType) { if (!filterType) { for(int i = 0; i < length; i+=4) { if (!scanline[i + 3]) { @@ -3389,8 +3395,8 @@ static void filterScanline2(unsigned char* scanline, const unsigned char* prevli } } } - } else if(filterType == 4 && forReal) { - if(!prevline) { + } else if(filterType == 4) { /*forReal var is always zero, so the code is commented out for now*/ + /*if(!prevline) { if(!scanline[3]) { *(unsigned*)scanline = 0; } @@ -3414,7 +3420,7 @@ static void filterScanline2(unsigned char* scanline, const unsigned char* prevli scanline[i + 2] = paethPredictor(scanline[i - 2], prevline[i], prevline[i - 2]); } } - } + }*/ } } @@ -3548,7 +3554,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, memcpy(rem, &in2[y * linebytes], linebytes * clean); for(type = 0; type != 5; ++type) { if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type, 0); + filterScanline2(&in2[y * linebytes], prevline, linebytes, type); filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); } else { filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); @@ -3583,7 +3589,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType, 0); + filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); prevline = &in2[y * linebytes]; } else { prevline = &in[y * linebytes]; @@ -3642,7 +3648,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, { if(clean) { memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline2, linebytes, type, 0); + filterScanline2(linebuf, prevline2, linebytes, type); filterScanline(attempt[type], linebuf, prevline2, linebytes, bytewidth, type); } else { filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); @@ -3670,7 +3676,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, if(clean) { memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline2, linebytes, bestType, 0); + filterScanline2(linebuf, prevline2, linebytes, bestType); filterScanline(attempt[bestType], linebuf, prevline2, linebytes, bytewidth, bestType); } else { filterScanline(attempt[bestType], &in[y * linebytes], prevline, linebytes, bytewidth, bestType); @@ -3688,7 +3694,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, prevline = &in[y * linebytes]; if(clean) { memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline2, linebytes, bestType, 0); + filterScanline2(linebuf, prevline2, linebytes, bestType); memcpy(prevlinebuf, linebuf, linebytes); prevline2 = prevlinebuf; } @@ -3723,7 +3729,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, for(type = 0; type != 5; ++type) { size_t sum = 0; if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type, 0); + filterScanline2(&in2[y * linebytes], prevline, linebytes, type); filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); } else { filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); @@ -3756,7 +3762,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType, 0); + filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); prevline = &in2[y * linebytes]; } else { prevline = &in[y * linebytes]; @@ -3783,7 +3789,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, for(type = 0; type != 5; ++type) { size_t sum = 0; if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type, 0); + filterScanline2(&in2[y * linebytes], prevline, linebytes, type); filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); } else { filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); @@ -3808,7 +3814,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType, 0); + filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); prevline = &in2[y * linebytes]; } else { prevline = &in[y * linebytes]; @@ -3834,7 +3840,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, for(type = 0; type != 5; ++type) { size_t sum = 0; if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type, 0); + filterScanline2(&in2[y * linebytes], prevline, linebytes, type); filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); } else { filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); @@ -3859,7 +3865,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType, 0); + filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); prevline = &in2[y * linebytes]; } else { prevline = &in[y * linebytes]; @@ -3883,7 +3889,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, /*try the 5 filter types*/ for(type = 0; type != 5; ++type) { if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type, 0); + filterScanline2(&in2[y * linebytes], prevline, linebytes, type); filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); } else { filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); @@ -3910,7 +3916,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType, 0); + filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); prevline = &in2[y * linebytes]; } else { prevline = &in[y * linebytes]; @@ -3981,7 +3987,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, out[y * (linebytes + 1)] = type; if(clean) { memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline, linebytes, type, 0); + filterScanline2(linebuf, prevline, linebytes, type); filterScanline(&out[y * (linebytes + 1) + 1], linebuf, prevline, linebytes, bytewidth, type); memcpy(prevlinebuf, linebuf, linebytes); prevline = prevlinebuf; @@ -4074,7 +4080,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, out[y * (linebytes + 1)] = type; if(clean) { memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline, linebytes, type, 0); + filterScanline2(linebuf, prevline, linebytes, type); filterScanline(&out[y * (linebytes + 1) + 1], linebuf, prevline, linebytes, bytewidth, type); memcpy(prevlinebuf, linebuf, linebytes); prevline = prevlinebuf; @@ -4104,7 +4110,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, out[y * (linebytes + 1)] = type; if(clean) { memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline, linebytes, type, 0); + filterScanline2(linebuf, prevline, linebytes, type); filterScanline(&out[y * (linebytes + 1) + 1], linebuf, prevline, linebytes, bytewidth, type); memcpy(prevlinebuf, linebuf, linebytes); prevline = prevlinebuf; From bb5a5c6e27f26b243b6881eacbc88252ca7b8be0 Mon Sep 17 00:00:00 2001 From: woot000 <97068837+woot000@users.noreply.github.com> Date: Sat, 10 Sep 2022 10:12:08 -0500 Subject: [PATCH 02/10] Refactor some LodePNG encoding functions Very small speed boost, 272 byte reduction in LodePNG code size using Clang 15.0.0 --- src/lodepng/lodepng.cpp | 68 ++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/src/lodepng/lodepng.cpp b/src/lodepng/lodepng.cpp index 08c1a428..e70a9ef5 100755 --- a/src/lodepng/lodepng.cpp +++ b/src/lodepng/lodepng.cpp @@ -1629,11 +1629,9 @@ static unsigned getValueRequiredBits(unsigned char value) { /*stats must already have been inited. */ unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, - const unsigned char* in, unsigned w, unsigned h, + const unsigned char* in, const size_t numpixels, const LodePNGColorMode* mode_in) { size_t i; - ColorTree tree; - size_t numpixels = (size_t)w * (size_t)h; unsigned error = 0; /* mark things as done already if it would be impossible to have a more expensive case */ @@ -1706,6 +1704,7 @@ unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, } } } else /* < 16-bit */ { + ColorTree tree; color_tree_init(&tree); unsigned char r = 0, g = 0, b = 0, a = 0; for(i = 0; i != numpixels; ++i) { @@ -1766,6 +1765,8 @@ unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, if(alpha_done && numcolors_done && colored_done && bits_done) break; } + color_tree_cleanup(&tree); + if(stats->key && !stats->alpha) { for(i = 0; i != numpixels; ++i) { getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); @@ -1783,13 +1784,7 @@ unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, stats->key_r += (stats->key_r << 8); stats->key_g += (stats->key_g << 8); stats->key_b += (stats->key_b << 8); - color_tree_cleanup(&tree); } - - unsigned char r = 0, g = 0, b = 0, a = 0; - getPixelColorRGBA8(&r, &g, &b, &a, in, 0, mode_in); - stats->white = stats->numcolors == 1 && stats->colored == 0 && r == 255 && w > 20 && h > 20 && ((w>225 && h > 225) || w*h > 75000 || (w> 250 && w*h > 40000)); - return 0; } @@ -2093,36 +2088,32 @@ output image, e.g. grey if there are only grayscale pixels, palette if there are less than 256 colors, ... Updates values of mode with a potentially smaller color model. mode_out should contain the user chosen color model, but will be overwritten with the new chosen one.*/ -static unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, - const unsigned char* image, unsigned w, unsigned h, - const LodePNGColorMode* mode_in, unsigned div) { - LodePNGColorStats prof; +static unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, + const LodePNGColorStats* stats, size_t numpixels, unsigned div) { unsigned error = 0; - unsigned palettebits; + unsigned palettebits, palette_ok, gray_ok; size_t i, n; - lodepng_color_stats_init(&prof); - error = lodepng_compute_color_stats(&prof, image, w, h, mode_in); - if(error) return error; - unsigned palette_ok, gray_ok; - LodePNGColorStats* stats = &prof; + unsigned alpha = stats->alpha; + unsigned key = stats->key; + unsigned bits = stats->bits; mode_out->key_defined = 0; - if(stats->key && (unsigned long long)w * h <= 49) { - prof.alpha = 1; /*too few pixels to justify tRNS chunk overhead*/ - prof.key = 0; - if(prof.bits < 8) prof.bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ + if(key && numpixels <= 49) { + alpha = 1; /*too few pixels to justify tRNS chunk overhead*/ + key = 0; + if(bits < 8) bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ } gray_ok = !stats->colored; - if(!gray_ok && prof.bits < 8) prof.bits = 8; + if(!gray_ok && bits < 8) bits = 8; n = stats->numcolors; palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8)); - palette_ok = n <= 256 && prof.bits <= 8; - if(8 + n * 4 > (unsigned long long)w * h / div) {palette_ok = 0;} /*don't add palette overhead if image has only a few pixels*/ - if(gray_ok && !prof.alpha && prof.bits <= palettebits && !prof.white) {palette_ok = 0;} /*gray is less overhead*/ + palette_ok = n <= 256 && bits <= 8; + if(8 + n * 4 > numpixels / div) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/ + if(gray_ok && !alpha && bits <= palettebits && !stats->white) palette_ok = 0; /*gray is less overhead*/ if(palette_ok) { const unsigned char* p = stats->palette; @@ -2135,9 +2126,9 @@ static unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, mode_out->colortype = LCT_PALETTE; mode_out->bitdepth = palettebits; } else /*8-bit or 16-bit per channel*/ { - mode_out->bitdepth = prof.bits; - mode_out->colortype = prof.alpha ? (prof.colored ? LCT_RGBA : LCT_GREY_ALPHA) - : (prof.colored ? LCT_RGB : LCT_GREY); + mode_out->bitdepth = bits; + mode_out->colortype = alpha ? (stats->colored ? LCT_RGBA : LCT_GREY_ALPHA) + : (stats->colored ? LCT_RGB : LCT_GREY); if(stats->key) { unsigned mask = (1u << mode_out->bitdepth) - 1u; /*stats always uses 16-bit, mask converts it*/ @@ -4298,6 +4289,7 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, LodePNGState* state, LodePNGPaletteSettings palset) { unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/ size_t datasize = 0; + size_t numpixels = (size_t)w * (size_t)h; ucvector outv = ucvector_init(0, 0); LodePNGInfo info; const LodePNGInfo* info_png = &state->info_png; @@ -4330,7 +4322,19 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, /* color convert and compute scanline filter types */ lodepng_info_copy(&info, &state->info_png); if(state->encoder.auto_convert) { - state->error = lodepng_auto_choose_color(&info.color, image, w, h, &state->info_raw, state->div); + LodePNGColorStats stats; + lodepng_color_stats_init(&stats); + + state->error = lodepng_compute_color_stats(&stats, image, numpixels, &state->info_raw); + if(state->error) goto cleanup; + else { /*check if image is white only if no error is detected in previous function*/ + unsigned char r = 0, g = 0, b = 0, a = 0; + getPixelColorRGBA8(&r, &g, &b, &a, image, 0, &state->info_raw); + stats.white = stats.numcolors == 1 && stats.colored == 0 && r == 255 && w > 20 && h > 20 + && ((w > 225 && h > 225) || numpixels > 75000 || (w > 250 && numpixels > 40000)); + } + + state->error = lodepng_auto_choose_color(&info.color, &state->info_raw, &stats, numpixels, state->div); if(state->error) goto cleanup; if(info.color.colortype == LCT_PALETTE && palset.order != LPOS_NONE) { if (palset._first & 1) { @@ -4357,7 +4361,7 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, } if(!lodepng_color_mode_equal(&state->info_raw, &info.color)) { unsigned char* converted; - size_t size = ((size_t)w * (size_t)h * (size_t)lodepng_get_bpp(&info.color) + 7u) / 8u; + size_t size = (numpixels * (size_t)lodepng_get_bpp(&info.color) + 7u) / 8u; converted = (unsigned char*)lodepng_malloc(size); if(!converted && size) state->error = 83; /*alloc fail*/ From a987294e4aa0e4d34a36b568c5a9ee14304fc81c Mon Sep 17 00:00:00 2001 From: woot000 <97068837+woot000@users.noreply.github.com> Date: Sat, 10 Sep 2022 10:13:24 -0500 Subject: [PATCH 03/10] Optimize optimize_palette, minor refactoring Small speed boost, 1.5kB reduction in LodePNG code size using Clang 15.0.0 --- src/lodepng/lodepng.cpp | 342 +++++++++++++++------------------------- 1 file changed, 128 insertions(+), 214 deletions(-) diff --git a/src/lodepng/lodepng.cpp b/src/lodepng/lodepng.cpp index e70a9ef5..f2486bb3 100755 --- a/src/lodepng/lodepng.cpp +++ b/src/lodepng/lodepng.cpp @@ -1789,69 +1789,49 @@ unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, } static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, - unsigned w, unsigned h, - LodePNGPalettePriorityStrategy priority, - LodePNGPaletteDirectionStrategy direction, - LodePNGPaletteTransparencyStrategy transparency, - LodePNGPaletteOrderStrategy order) { - if (order == LPOS_NONE) return; - size_t count = 0; + const unsigned w, const unsigned h, + LodePNGPalettePriorityStrategy priority, + LodePNGPaletteDirectionStrategy direction, + LodePNGPaletteTransparencyStrategy transparency, + LodePNGPaletteOrderStrategy order) { + size_t i, count = 0; ColorTree tree; color_tree_init(&tree); - for (size_t i = 0; i < w * h; ++i) { + for (i = 0; i < w * h; ++i) { const unsigned char* c = (unsigned char*)&image[i]; - if (color_tree_inc(&tree, c[0], c[1], c[2], c[3]) == 0) ++count; + if(color_tree_inc(&tree, c[0], c[1], c[2], c[3]) == 0) ++count; } - //Silence clang static analyzer warnings - if (count == 0) {return;} - - // sortfield format: - // bit 0-7: original palette index - // bit 8-39: color encoding or popularity index - // bit 40-47: order score - // bit 48-62: unused - // bit 63: transparency flag + if(count == 0) return; //Silence clang static analyzer warnings + + /*sortfield format: + bit 0-7: original palette index + bit 8-39: color encoding or popularity index + bit 40-47: order score + bit 48-62: unused + bit 63: transparency flag*/ uint64_t* sortfield = (uint64_t*)lodepng_malloc(count << 4); - for (size_t i = 0; i < count; ++i) sortfield[i] = i; + for(i = 0; i != count; ++i) sortfield[i] = i; uint32_t* palette_in = (uint32_t*)(mode_out->palette); - switch (priority) { - case LPPS_POPULARITY: - for (size_t i = 0; i < count; ++i) { - const unsigned char* p = (unsigned char*)&palette_in[i]; - sortfield[i] |= (color_tree_get(&tree, p[0], p[1], p[2], p[3]) + 1) << 8; - } - break; - case LPPS_RGB: - for (size_t i = 0; i < count; ++i) { - const unsigned char* c = (unsigned char*)&palette_in[i]; - sortfield[i] |= uint64_t(c[0]) << 32 | uint64_t(c[1]) << 24 | uint64_t(c[2]) << 16; - } - break; - case LPPS_YUV: - for (size_t i = 0; i < count; ++i) { - const unsigned char* c = (unsigned char*)&palette_in[i]; - const double r = c[0]; - const double g = c[1]; - const double b = c[2]; + for(i = 0; i != count; ++i) { /*all priority values will run through this for loop*/ + const unsigned char* c = (unsigned char*)&palette_in[i]; + if(priority == LPPS_POPULARITY) sortfield[i] |= (color_tree_get(&tree, c[0], c[1], c[2], c[3]) + 1) << 8; + else if(priority == LPPS_RGB) sortfield[i] |= uint64_t(c[0]) << 32 | uint64_t(c[1]) << 24 | uint64_t(c[2]) << 16; + else if(priority == LPPS_YUV || priority == LPPS_LAB) { + const double r = c[0]; + const double g = c[1]; + const double b = c[2]; + if(priority == LPPS_YUV) { sortfield[i] |= uint64_t(0.299 * r + 0.587 * g + 0.114 * b) << 32 | uint64_t((-0.14713 * r - 0.28886 * g + 0.436 * b + 111.18) / 0.872) << 24 | uint64_t((0.615 * r - 0.51499 * g - 0.10001 * b + 156.825) / 1.23) << 16; - } - break; - case LPPS_LAB: - { - const double ep = 216. / 24389.; - const double ka = 24389. / 27.; - const double ex = 1. / 3.; - const double de = 4. / 29.; - for (size_t i = 0; i < count; ++i) { - const unsigned char* c = (unsigned char*)&palette_in[i]; - const double r = c[0]; - const double g = c[1]; - const double b = c[2]; + } else { /*LPPS_LAB*/ double vx = (0.4124564 * r + 0.3575761 * g + 0.1804375 * b) / 255 / 95.047; double vy = (0.2126729 * r + 0.7151522 * g + 0.0721750 * b) / 255 / 100; double vz = (0.0193339 * r + 0.1191920 * g + 0.9503041 * b) / 255 / 108.883; + const double ep = 216. / 24389.; + const double ka = 24389. / 27.; + const double ex = 1. / 3.; + const double de = 4. / 29.; vx = vx > ep ? pow(vx, ex) : ka * vx + de; vy = vy > ep ? pow(vy, ex) : ka * vy + de; vz = vz > ep ? pow(vz, ex) : ka * vz + de; @@ -1859,224 +1839,157 @@ static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, | uint64_t((vx - vy) * 500 + 256) << 24 | uint64_t((vy - vz) * 200 + 256) << 16; } + } else { /*LPPS_MSB*/ + const uint64_t r = c[0]; + const uint64_t g = c[1]; + const uint64_t b = c[2]; + sortfield[i] |= (r & 128) << 39 | (g & 128) << 38 | (b & 128) << 37 + | (r & 64) << 35 | (g & 64) << 34 | (b & 64) << 33 + | (r & 32) << 31 | (g & 32) << 30 | (b & 32) << 29 + | (r & 16) << 27 | (g & 16) << 26 | (b & 16) << 25 + | (r & 8) << 23 | (g & 8) << 22 | (b & 8) << 21 + | (r & 4) << 19 | (g & 4) << 18 | (b & 4) << 17 + | (r & 2) << 15 | (g & 2) << 14 | (b & 2) << 13 + | (r & 1) << 11 | (g & 1) << 10 | (b & 1) << 9; } - break; - case LPPS_MSB: - for (size_t i = 0; i < count; ++i) { - const unsigned char* c = (unsigned char*)&palette_in[i]; - const uint64_t r = c[0]; - const uint64_t g = c[1]; - const uint64_t b = c[2]; - sortfield[i] |= (r & 128) << 39 | (g & 128) << 38 | (b & 128) << 37 - | (r & 64) << 35 | (g & 64) << 34 | (b & 64) << 33 - | (r & 32) << 31 | (g & 32) << 30 | (b & 32) << 29 - | (r & 16) << 27 | (g & 16) << 26 | (b & 16) << 25 - | (r & 8) << 23 | (g & 8) << 22 | (b & 8) << 21 - | (r & 4) << 19 | (g & 4) << 18 | (b & 4) << 17 - | (r & 2) << 15 | (g & 2) << 14 | (b & 2) << 13 - | (r & 1) << 11 | (g & 1) << 10 | (b & 1) << 9; - } - break; } - switch (transparency) { + switch(transparency) { case LPTS_IGNORE: break; case LPTS_FIRST: - for (size_t i = 0; i < count; ++i) { - if (((unsigned char*)&palette_in[i])[3] == 0xFF) { - sortfield[i] |= 0x8000000000000000ULL; - } - } - // fall through + for(i = 0; i != count; ++i) if(((unsigned char*)&palette_in[i])[3] == 0xFF) sortfield[i] |= 0x8000000000000000ULL; + /*fall through*/ case LPTS_SORT: - if (priority == LPPS_MSB) { - for (size_t i = 0; i < count; ++i) { + if(priority == LPPS_MSB) { + for(i = 0; i != count; ++i) { const uint64_t a = ((unsigned char*)&palette_in[i])[3]; sortfield[i] |= (a & 0x80ULL) << 36 | (a & 0x40ULL) << 32 | (a & 0x20ULL) << 28 | (a & 0x10ULL) << 24 | (a & 8ULL) << 20 | (a & 4ULL) << 16 | (a & 2ULL) << 12 | (a & 1ULL) << 8; } - } else if(priority != LPPS_POPULARITY) { - for (size_t i = 0; i < count; ++i) { - sortfield[i] |= uint64_t(((unsigned char*)&palette_in[i])[3]) << 8; - } - } + } else if(priority != LPPS_POPULARITY) for(i = 0; i != count; ++i) sortfield[i] |= uint64_t(((unsigned char*)&palette_in[i])[3]) << 8; break; } size_t best = 0; - if (order == LPOS_GLOBAL) { - if (direction == LPDS_DESCENDING) { - for (size_t i = 0; i < count; ++i) { - // flip bits, but preserve original index and transparency mode 2 + if(order == LPOS_GLOBAL) { + if(direction == LPDS_DESCENDING) { + for(i = 0; i != count; ++i) { + /*flip bits, but preserve original index and transparency mode 2*/ sortfield[i] = (~sortfield[i] & 0x7FFFFFFFFFFFFF00ULL) | (sortfield[i] & 0x80000000000000FFULL); } } } else { - if (direction == LPDS_DESCENDING) { + if(direction == LPDS_DESCENDING) { uint64_t value = 0; - for (size_t i = 1; i < count; ++i) { - if ((sortfield[i] & 0x7FFFFFFFFFFFFFFFULL) > value) { + for(i = 1; i != count; ++i) { + if((sortfield[i] & 0x7FFFFFFFFFFFFFFFULL) > value) { value = (sortfield[i] & 0x7FFFFFFFFFFFFFFFULL); best = i; } } } else { uint64_t value = UINT64_MAX; - for (size_t i = 1; i < count; ++i) { - if ((sortfield[i] & 0x7FFFFFFFFFFFFFFFULL) < value) { + for(i = 1; i != count; ++i) { + if((sortfield[i] & 0x7FFFFFFFFFFFFFFFULL) < value) { value = (sortfield[i] & 0x7FFFFFFFFFFFFFFFULL); best = i; } } } } - switch(order) { - case LPOS_NONE: - case LPOS_GLOBAL: - break; - case LPOS_NEAREST: - for (size_t i = 0; i < count - 1; ++i) { - if (i != best) { - sortfield[i] ^= sortfield[best]; - sortfield[best] ^= sortfield[i]; - sortfield[i] ^= sortfield[best]; - } - sortfield[i] |= uint64_t(i) << 40; - const unsigned char* c = (unsigned char*)&palette_in[sortfield[i] & 0xFF]; - const int r = c[0]; - const int g = c[1]; - const int b = c[2]; - int bestdist = INT_MAX; - for (size_t j = i + 1; j < count; ++j) { - const unsigned char* c2 = (unsigned char*)&palette_in[sortfield[j] & 0xFF]; - const int r2 = c2[0]; - const int g2 = c2[1]; - const int b2 = c2[2]; - int dist = (r - r2) * (r - r2) + (g - g2) + (g - g2) + (b - b2) * (b - b2); - if (transparency == LPTS_SORT) { - const int a = c[3]; - const int a2 = c2[3]; - dist += (a - a2) * (a - a2); - } - if (dist < bestdist) { - bestdist = dist; - best = j; - } - } - } - sortfield[count - 1] |= uint64_t(count - 1) << 40; - break; - case LPOS_NEAREST_WEIGHT: - { - for (size_t i = 0; i < count - 1; ++i) { - if (i != best) { - sortfield[i] ^= sortfield[best]; - sortfield[best] ^= sortfield[i]; - sortfield[i] ^= sortfield[best]; - } - sortfield[i] |= uint64_t(i) << 40; - const unsigned char* c = (unsigned char*)&palette_in[sortfield[i] & 0xFF]; - const int r = c[0]; - const int g = c[1]; - const int b = c[2]; - double bestdist = INT_MAX; - for (size_t j = i + 1; j < count; ++j) { - const unsigned char* c2 = (unsigned char*)&palette_in[sortfield[j] & 0xFF]; - const int r2 = c2[0]; - const int g2 = c2[1]; - const int b2 = c2[2]; - double dist = (r - r2) * (r - r2) + (g - g2) + (g - g2) + (b - b2) * (b - b2); - if (transparency == LPTS_SORT) { - const int a = c[3]; - const int a2 = c2[3]; - dist += (a - a2) * (a - a2); - } - dist /= (color_tree_get(&tree, c2[0], c2[1], c2[2], c2[3]) + 1); - if (dist < bestdist) { - bestdist = dist; - best = j; - } - } - } - sortfield[count - 1] |= uint64_t(count - 1) << 40; - } - break; - case LPOS_NEAREST_NEIGHBOR: - { - ColorTree paltree; + if(order > LPOS_GLOBAL) { /*LPOS_NEAREST, LPOS_NEAREST_WEIGHT, LPOS_NEAREST_NEIGHBOR*/ + size_t j; + ColorTree paltree; + ColorTree neighbors; + if(order == LPOS_NEAREST_NEIGHBOR) { + size_t k, l; color_tree_init(&paltree); - for (size_t i = 0; i < count; ++i) { + color_tree_init(&neighbors); + for(i = 0; i != count; ++i) { const unsigned char* p = (unsigned char*)&palette_in[i]; color_tree_add(&paltree, p[0], p[1], p[2], p[3], i); } - ColorTree neighbors; - color_tree_init(&neighbors); - for (size_t k = 0; k < h; ++k) { - for (size_t l = 0; l < w; ++l) { + for(k = 0; k != h; ++k) { + for(l = 0; l != w; ++l) { const unsigned char* c = (unsigned char*)&image[k * w + l]; int index = color_tree_get(&paltree, c[0], c[1], c[2], c[3]); - if (k > 0) { // above + if(k > 0) { /*above*/ const unsigned char* c2 = (unsigned char*)&image[(k - 1) * w + l]; color_tree_inc(&neighbors, index, color_tree_get(&paltree, c2[0], c2[1], c2[2], c2[3]), 0, 0); } - if (k < h - 1) { // below + if(k < h - 1) { /*below*/ const unsigned char* c2 = (unsigned char*)&image[(k + 1) * w + l]; color_tree_inc(&neighbors, index, color_tree_get(&paltree, c2[0], c2[1], c2[2], c2[3]), 0, 0); } - if (l > 0) { // left + if(l > 0) { /*left*/ const unsigned char* c2 = (unsigned char*)&image[k * w + l - 1]; color_tree_inc(&neighbors, index, color_tree_get(&paltree, c2[0], c2[1], c2[2], c2[3]), 0, 0); } - if (l < w - 1) { // right + if(l < w - 1) { /*right*/ const unsigned char* c2 = (unsigned char*)&image[k * w + l + 1]; color_tree_inc(&neighbors, index, color_tree_get(&paltree, c2[0], c2[1], c2[2], c2[3]), 0, 0); } } } - for (size_t i = 0; i < count - 1; ++i) { - if (i != best) { - sortfield[i] ^= sortfield[best]; - sortfield[best] ^= sortfield[i]; - sortfield[i] ^= sortfield[best]; + } + for(i = 0; i < count - 1; ++i) { + if(i != best) { + sortfield[i] ^= sortfield[best]; + sortfield[best] ^= sortfield[i]; + sortfield[i] ^= sortfield[best]; + } + sortfield[i] |= uint64_t(i) << 40; + const unsigned char* c = (unsigned char*)&palette_in[sortfield[i] & 0xFF]; + const int r = c[0]; + const int g = c[1]; + const int b = c[2]; + int bestdist = INT_MAX; + if(order == LPOS_NEAREST_NEIGHBOR) best = i + 1; + for(j = i + 1; j != count; ++j) { + const unsigned char* c2 = (unsigned char*)&palette_in[sortfield[j] & 0xFF]; + const int r2 = c2[0]; + const int g2 = c2[1]; + const int b2 = c2[2]; + int dist = (r - r2) * (r - r2) + (g - g2) + (g - g2) + (b - b2) * (b - b2); + if(transparency == LPTS_SORT) { + const int a = c[3]; + const int a2 = c2[3]; + dist += (a - a2) * (a - a2); } - sortfield[i] |= uint64_t(i) << 40; - const unsigned char* c = (unsigned char*)&palette_in[sortfield[i] & 0xFF]; - const int r = c[0]; - const int g = c[1]; - const int b = c[2]; - double bestdist = INT_MAX; - best = i + 1; - for (size_t j = i + 1; j < count; ++j) { - const unsigned char* c2 = (unsigned char*)&palette_in[sortfield[j] & 0xFF]; - const int r2 = c2[0]; - const int g2 = c2[1]; - const int b2 = c2[2]; - double dist = (r - r2) * (r - r2) + (g - g2) + (g - g2) + (b - b2) * (b - b2); - if (transparency == LPTS_SORT) { - const int a = c[3]; - const int a2 = c2[3]; - dist += (a - a2) * (a - a2); - } - dist /= (color_tree_get(&neighbors, color_tree_get(&paltree, c[0], c[1], c[2], c[3]), - color_tree_get(&paltree, c2[0], c2[1], c2[2], c2[3]), 0, 0) + 1); - if (dist != 0 && dist < bestdist) { + if(order == LPOS_NEAREST) { + if(dist < bestdist) { bestdist = dist; best = j; } + } else if(order == LPOS_NEAREST_WEIGHT || order == LPOS_NEAREST_NEIGHBOR) { + double d_dist = (double)dist; + if(order == LPOS_NEAREST_WEIGHT) { + d_dist /= (color_tree_get(&tree, c2[0], c2[1], c2[2], c2[3]) + 1); + if(d_dist < (double)bestdist) { + bestdist = (int)d_dist; + best = j; + } + } else { /*LPOS_NEAREST_NEIGHBOR*/ + d_dist /= (color_tree_get(&neighbors, color_tree_get(&paltree, c[0], c[1], c[2], c[3]), + color_tree_get(&paltree, c2[0], c2[1], c2[2], c2[3]), 0, 0) + 1); + if(d_dist != 0 && d_dist < (double)bestdist) { + bestdist = (int)d_dist; + best = j; + } + } } } - sortfield[count - 1] |= uint64_t(count - 1) << 40; + } + sortfield[count - 1] |= uint64_t(count - 1) << 40; + if(order == LPOS_NEAREST_NEIGHBOR) { color_tree_cleanup(&paltree); color_tree_cleanup(&neighbors); } - break; } std::sort(sortfield, sortfield + count); uint32_t* palette_out = (uint32_t*)lodepng_malloc(mode_out->palettesize << 2); - for (size_t i = 0; i < mode_out->palettesize; ++i) { - palette_out[i] = palette_in[sortfield[i] & 0xFF]; - } + for(i = 0; i != mode_out->palettesize; ++i) palette_out[i] = palette_in[sortfield[i] & 0xFF]; std::copy(palette_out, palette_out + mode_out->palettesize, palette_in); free(palette_out); free(sortfield); @@ -4337,22 +4250,23 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, state->error = lodepng_auto_choose_color(&info.color, &state->info_raw, &stats, numpixels, state->div); if(state->error) goto cleanup; if(info.color.colortype == LCT_PALETTE && palset.order != LPOS_NONE) { - if (palset._first & 1) { + if(palset._first & 1) { color_tree_init(&ct); } - optimize_palette(&info.color, (uint32_t*)image, w, h, palset.priority, palset.direction, - palset.trans, palset.order); + if(palset.order != LPOS_NONE) { + optimize_palette(&info.color, (uint32_t*)image, w, h, palset.priority, palset.direction, + palset.trans, palset.order); + } unsigned crc = crc32(0, info.color.palette, info.color.palettesize); - if (!color_tree_inc(&ct, crc & 0xFF, crc & 0xFF00, crc & 0xFF0000, crc & 0xFF000000)) { - } else { - if (palset._first & 2) { + if(color_tree_inc(&ct, crc & 0xFF, crc & 0xFF00, crc & 0xFF0000, crc & 0xFF000000)) { + if(palset._first & 2) { color_tree_cleanup(&ct); } - lodepng_info_cleanup(&info); - return 96; + state->error = 96; + goto cleanup; } - if (palset._first & 2) { + if(palset._first & 2) { color_tree_cleanup(&ct); } } From 6d94323d606458e5aebd93e1cc59ed9c42339911 Mon Sep 17 00:00:00 2001 From: woot000 <97068837+woot000@users.noreply.github.com> Date: Sat, 10 Sep 2022 10:15:30 -0500 Subject: [PATCH 04/10] Optimize LodePNG filter function 10% speedup using --allfilters, 4.2kB reduction in LodePNG code size using Clang 15.0.0 --- src/lodepng/lodepng.cpp | 921 ++++++++++++++++------------------------ 1 file changed, 358 insertions(+), 563 deletions(-) diff --git a/src/lodepng/lodepng.cpp b/src/lodepng/lodepng.cpp index f2486bb3..bd2510d8 100755 --- a/src/lodepng/lodepng.cpp +++ b/src/lodepng/lodepng.cpp @@ -3383,33 +3383,30 @@ static char windowbits(unsigned long len) { } static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, - const LodePNGColorMode* color, LodePNGEncoderSettings* settings) { + const LodePNGColorMode* info, LodePNGEncoderSettings* settings) { /* For PNG filter method 0 out must be a buffer with as size: h + (w * h * bpp + 7u) / 8u, because there are the scanlines with 1 extra byte per scanline */ - unsigned bpp = lodepng_get_bpp(color); + unsigned bpp = lodepng_get_bpp(info); + if(bpp == 0) return 31; /*error: invalid color type*/ /*the width of a scanline in bytes, not including the filter type*/ size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u; - /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/ size_t bytewidth = (bpp + 7u) / 8u; + const LodePNGFilterStrategy strategy = settings->filter_strategy; const unsigned char* prevline = 0; unsigned x, y; unsigned error = 0; - LodePNGFilterStrategy strategy = settings->filter_strategy; - - if(bpp == 0) return 31; /*error: invalid color type*/ if(strategy < LFS_BRUTE_FORCE) { - unsigned char type = (unsigned char)strategy; for(y = 0; y != h; ++y) { size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ size_t inindex = linebytes * y; - out[outindex] = type; /*filter type byte*/ - filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type); + out[outindex] = strategy; /*filter type byte*/ + filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, strategy); prevline = &in[inindex]; } } else if(strategy == LFS_PREDEFINED) { @@ -3422,565 +3419,277 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, prevline = &in[inindex]; } } else { - unsigned clean = settings->clean_alpha && color->colortype == LCT_RGBA && color->bitdepth == 8 && !color->key_defined; + const unsigned clean = settings->clean_alpha && info->colortype == LCT_RGBA && info->bitdepth == 8 && !info->key_defined; unsigned char* in2 = 0; unsigned char* rem = 0; if(clean) { - in2 = (unsigned char*)malloc(linebytes * h); - if (!in2) { - exit(1); - } + in2 = (unsigned char*)lodepng_malloc(linebytes * h); + if(!in2) exit(1); memcpy(in2, in, linebytes * h); - rem = (unsigned char*)malloc(linebytes); - } - if(strategy == LFS_BRUTE_FORCE) { - /*brute force filter chooser. - deflate the scanline after every filter attempt to see which one deflates best.*/ - unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - size_t smallest = 0; - unsigned type = 0, bestType = 0; - - z_stream stream; - stream.zalloc = 0; - stream.zfree = 0; - stream.opaque = 0; - - int err = deflateInit2(&stream, 3, Z_DEFLATED, windowbits(linebytes), 3, Z_FILTERED); - if (err != Z_OK) exit(1); - - for(type = 0; type != 5; ++type) { - attempt[type] = (unsigned char*)lodepng_malloc(linebytes); - if(!attempt[type]) error = 83; /*alloc fail*/ + rem = (unsigned char*)lodepng_malloc(linebytes); } + if(strategy == LFS_BRUTE_FORCE || (strategy >= LFS_INCREMENTAL && strategy <= LFS_INCREMENTAL3)) { + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ + size_t smallest = 0; + unsigned type, bestType = 0; - for(y = 0; y != h; ++y) /*try the 5 filter types*/ - { - memcpy(rem, &in2[y * linebytes], linebytes * clean); for(type = 0; type != 5; ++type) { - if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type); - filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); - } else { - filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - } - - size_t size = 0; - if(settings->filter_style < 2 || 1) { - deflateTune(&stream, 258, 258, 258, 550 + (settings->filter_style) * 100); - stream.next_in = (z_const unsigned char *)attempt[type]; - stream.avail_in = linebytes; - stream.avail_out = UINT_MAX; - stream.next_out = (unsigned char *)1; - - deflate_nooutput(&stream, Z_FINISH); - - size = stream.total_out; - deflateReset(&stream); - } else { - size = ZopfliLZ77LazyLauncher(attempt[type], 0, linebytes, settings->filter_style); - } - - /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/ - if(type == 0 || size < smallest) { - bestType = type; - smallest = size; - } - if(clean) { - memcpy(&in2[y * linebytes], rem, linebytes); - } - - } - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); - prevline = &in2[y * linebytes]; - } else { - prevline = &in[y * linebytes]; - } - } - - deflateEnd(&stream); - for(type = 0; type != 5; ++type) free(attempt[type]); - - } else if(strategy == LFS_INCREMENTAL || strategy == LFS_INCREMENTAL2 || strategy == LFS_INCREMENTAL3) { - /*Incremental brute force filter chooser. - Keep a buffer of each tested scanline and deflate the entire buffer after every filter attempt to see which one deflates best. - Now implemented with streaming, which reduces complexity to O(n) - This is slow.*/ - unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - size_t smallest; - unsigned type, bestType = 0; - - z_stream dstream; - z_stream teststream; - - dstream.zalloc = 0; - dstream.zfree = 0; - dstream.opaque = 0; - - size_t testsize = linebytes + 1; - int err = deflateInit2(&dstream, strategy == LFS_INCREMENTAL3 ? 1 : 2, Z_DEFLATED, windowbits(testsize * h), 8, Z_FILTERED); - if (err != Z_OK) exit(1); - if(strategy == LFS_INCREMENTAL) { - deflateTune(&dstream, 16, 258, 258, 200); - } - else if (strategy == LFS_INCREMENTAL2) { - deflateTune(&dstream, 50, 258, 258, 1100); - } - deflateCopy(&teststream, &dstream, 1); - - unsigned char* dummy = (unsigned char*)1; //Not used, but must not be 0 - - unsigned char* prevline2 = 0; - unsigned char* prevlinebuf = 0; - unsigned char* linebuf; - if(clean) { - prevlinebuf = (unsigned char*)malloc(linebytes); - linebuf = (unsigned char*)malloc(linebytes); - } - - for(type = 0; type != 5; ++type) { - attempt[type] = (unsigned char*)lodepng_malloc(linebytes); - if(!attempt[type]) error = 83; /*alloc fail*/ - } - - for(y = 0; y != h; ++y) /*try the 5 filter types*/ - { - smallest = SIZE_MAX; - for(type = 4; type + 1 != 0; --type) /*type 0 is most likely, so end with that to reduce copying*/ - { - if(clean) { - memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline2, linebytes, type); - filterScanline(attempt[type], linebuf, prevline2, linebytes, bytewidth, type); - } else { - filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - } - /*copy result to output buffer temporarily to include compression test*/ - out[y * (linebytes + 1)] = type; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[type][x]; - size_t size = 0; - - deflateCopy(&teststream, &dstream, 0); - teststream.next_in = (z_const unsigned char *)(out + y * testsize); - teststream.avail_in = testsize; - teststream.avail_out = UINT_MAX; - teststream.next_out = dummy; - deflate_nooutput(&teststream, Z_FINISH); - - size = teststream.total_out; - - /*check if this is smallest size (or if type == 4 it's the first case so always store the values)*/ - if(size < smallest) { - bestType = type; - smallest = size; - } - } - - if(clean) { - memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline2, linebytes, bestType); - filterScanline(attempt[bestType], linebuf, prevline2, linebytes, bytewidth, bestType); - } else { - filterScanline(attempt[bestType], &in[y * linebytes], prevline, linebytes, bytewidth, bestType); - } - /*copy result to output buffer temporarily to include compression test*/ - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - - dstream.next_in = (z_const unsigned char *)(out + y * testsize); - dstream.avail_in = testsize; - dstream.avail_out = UINT_MAX; - dstream.next_out = dummy; - deflate_nooutput(&dstream, Z_NO_FLUSH); - - prevline = &in[y * linebytes]; - if(clean) { - memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline2, linebytes, bestType); - memcpy(prevlinebuf, linebuf, linebytes); - prevline2 = prevlinebuf; - } - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - if (type) /*last attempt is type 0, so no copying necessary*/ - { - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) error = 83; /*alloc fail*/ } - } - if(clean) { - free(prevlinebuf); - free(linebuf); - } - deflateEnd(&dstream); - deflateEnd(&teststream); - for(type = 0; type != 5; ++type) free(attempt[type]); - } else if(strategy == LFS_MINSUM) { - /*adaptive filtering*/ - unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - size_t smallest = 0; - unsigned char type, bestType = 0; - - for(type = 0; type != 5; ++type) { - attempt[type] = (unsigned char*)lodepng_malloc(linebytes); - if(!attempt[type]) error = 83; /*alloc fail*/ - } - - if(!error) { - for(y = 0; y != h; ++y) { - memcpy(rem, &in2[y * linebytes], linebytes * clean); - /*try the 5 filter types*/ - for(type = 0; type != 5; ++type) { - size_t sum = 0; - if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type); - filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); - } else { - filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - } - /*calculate the sum of the result*/ - if(type == 0) { - for(x = 0; x != linebytes; ++x) sum += (unsigned char)(attempt[type][x]); - } else { - for(x = 0; x != linebytes; ++x) { - /*For differences, each byte should be treated as signed, values above 127 are negative - (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there. - This means filtertype 0 is almost never chosen, but that is justified.*/ - unsigned char s = attempt[type][x]; - sum += s < 128 ? s : (255U - s); + z_stream stream; + stream.zalloc = 0; + stream.zfree = 0; + stream.opaque = 0; + + if(!error && strategy == LFS_BRUTE_FORCE) { + /*brute force filter chooser. + deflate the scanline after every filter attempt to see which one deflates best.*/ + int err = deflateInit2(&stream, 3, Z_DEFLATED, windowbits(linebytes), 3, Z_FILTERED); + if(err != Z_OK) exit(1); + + for(y = 0; y != h; ++y) { /*try the 5 filter types*/ + memcpy(rem, &in2[y * linebytes], linebytes * clean); + for(type = 0; type != 5; ++type) { + size_t size = 0; + if(clean) { + filterScanline2(&in2[y * linebytes], prevline, linebytes, type); + filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); + } else filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + + if(settings->filter_style < 2 || 1) { + deflateTune(&stream, 258, 258, 258, 550 + (settings->filter_style) * 100); + stream.next_in = (z_const unsigned char*)attempt[type]; + stream.avail_in = linebytes; + stream.avail_out = UINT_MAX; + stream.next_out = (unsigned char*)1; + + deflate_nooutput(&stream, Z_FINISH); + + size = stream.total_out; + deflateReset(&stream); + } else size = ZopfliLZ77LazyLauncher(attempt[type], 0, linebytes, settings->filter_style); + + /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/ + if(type == 0 || size < smallest) { + bestType = type; + smallest = size; + } + if(clean) memcpy(&in2[y * linebytes], rem, linebytes); } + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + if(clean) { + filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); + prevline = &in2[y * linebytes]; + } else prevline = &in[y * linebytes]; } - - /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ - if(type == 0 || sum < smallest) { - bestType = type; - smallest = sum; - } + } else if(!error && strategy >= LFS_INCREMENTAL && strategy <= LFS_INCREMENTAL3) { + /*Incremental brute force filter chooser. + Keep a buffer of each tested scanline and deflate the entire buffer after every filter attempt to see which one deflates best. + Now implemented with streaming, which reduces complexity to O(n) + This is slow.*/ + z_stream teststream; + size_t testsize = linebytes + 1; + int err = deflateInit2(&stream, strategy == LFS_INCREMENTAL3 ? 1 : 2, Z_DEFLATED, windowbits(testsize * h), 8, Z_FILTERED); + if(err != Z_OK) exit(1); + if(strategy == LFS_INCREMENTAL) deflateTune(&stream, 16, 258, 258, 200); + else if(strategy == LFS_INCREMENTAL2) deflateTune(&stream, 50, 258, 258, 1100); + deflateCopy(&teststream, &stream, 1); + + unsigned char* dummy = (unsigned char*)1; /*Not used, but must not be NULL*/ + unsigned char* prevline2 = 0; + unsigned char* prevlinebuf = 0; + unsigned char* linebuf; if(clean) { - memcpy(&in2[y * linebytes], rem, linebytes); + prevlinebuf = (unsigned char*)lodepng_malloc(linebytes); + linebuf = (unsigned char*)lodepng_malloc(linebytes); } - } - /*now fill the out values*/ - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); - prevline = &in2[y * linebytes]; - } else { - prevline = &in[y * linebytes]; - } - } - } - - for(type = 0; type != 5; ++type) free(attempt[type]); - } else if(strategy == LFS_DISTINCT_BYTES) { - unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - size_t smallest; - unsigned type, bestType = 0; - unsigned char count[256]; + for(y = 0; y != h; ++y) { /*try the 5 filter types*/ + for(type = 4; type + 1 != 0; --type) { /*type 0 is most likely, so end with that to reduce copying*/ + size_t size = 0; + if(clean) { + memcpy(linebuf, &in[y * linebytes], linebytes); + filterScanline2(linebuf, prevline2, linebytes, type); + filterScanline(attempt[type], linebuf, prevline2, linebytes, bytewidth, type); + } else filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + /*copy result to output buffer temporarily to include compression test*/ + out[y * (linebytes + 1)] = type; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[type][x]; + + deflateCopy(&teststream, &stream, 0); + teststream.next_in = (z_const unsigned char*)(out + y * testsize); + teststream.avail_in = testsize; + teststream.avail_out = UINT_MAX; + teststream.next_out = dummy; + deflate_nooutput(&teststream, Z_FINISH); + + size = teststream.total_out; + + /*check if this is smallest size (or if type == 4 it's the first case so always store the values)*/ + if(type == 4 || size < smallest) { + bestType = type; + smallest = size; + } + } - for(type = 0; type != 5; ++type) { - attempt[type] = (unsigned char*)lodepng_malloc(linebytes); - if(!attempt[type]) error = 83; /*alloc fail*/ - } + if(clean) { + memcpy(linebuf, &in[y * linebytes], linebytes); + filterScanline2(linebuf, prevline2, linebytes, bestType); + filterScanline(attempt[bestType], linebuf, prevline2, linebytes, bytewidth, bestType); + } else filterScanline(attempt[bestType], &in[y * linebytes], prevline, linebytes, bytewidth, bestType); + /*copy result to output buffer temporarily to include compression test*/ + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + + stream.next_in = (z_const unsigned char*)(out + y * testsize); + stream.avail_in = testsize; + stream.avail_out = UINT_MAX; + stream.next_out = dummy; + deflate_nooutput(&stream, Z_NO_FLUSH); - for(y = 0; y != h; ++y) { - memcpy(rem, &in2[y * linebytes], linebytes * clean); - smallest = SIZE_MAX; - /*try the 5 filter types*/ - for(type = 0; type != 5; ++type) { - size_t sum = 0; - if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type); - filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); - } else { - filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - } - memset(count, 0, 256); - for(x = 0; x != linebytes; ++x) count[attempt[type][x]] = 1; - count[type] = 1; /*the filter type itself is part of the scanline*/ - for(x = 0; x != 256; ++x) { - if(count[x]) ++sum; - } - /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ - if(sum < smallest) { - bestType = type; - smallest = sum; + prevline = &in[y * linebytes]; + if(clean) { + memcpy(linebuf, &in[y * linebytes], linebytes); + filterScanline2(linebuf, prevline2, linebytes, bestType); + memcpy(prevlinebuf, linebuf, linebytes); + prevline2 = prevlinebuf; + } + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + if(type) for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; /*last attempt is type 0, so no copying necessary*/ } if(clean) { - memcpy(&in2[y * linebytes], rem, linebytes); + free(prevlinebuf); + free(linebuf); } + deflateEnd(&teststream); } + deflateEnd(&stream); + for(type = 0; type != 5; ++type) free(attempt[type]); - /*now fill the out values*/ - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); - prevline = &in2[y * linebytes]; - } else { - prevline = &in[y * linebytes]; + } else if(strategy >= LFS_ENTROPY && strategy <= LFS_MINSUM) { /*LFS_ENTROPY, LFS_DISTINCT_BIGRAMS, LFS_DISTINCT_BYTES, LFS_MINSUM*/ + size_t smallest = 0; + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ + unsigned char type, bestType = 0; + for(type = 0; type != 5; ++type) { + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) error = 83; /*alloc fail*/ } - } - - for(type = 0; type != 5; ++type) free(attempt[type]); - } else if(strategy == LFS_DISTINCT_BIGRAMS) { - unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - size_t smallest; - unsigned type, bestType = 0; - unsigned char count[65536]; + if(!error) { + for(y = 0; y != h; ++y) { + memcpy(rem, &in2[y * linebytes], linebytes * clean); + /*try the 5 filter types*/ + for(type = 0; type != 5; ++type) { + if(clean) { + filterScanline2(&in2[y * linebytes], prevline, linebytes, type); + filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); + } else filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + size_t sum = 0; + if(strategy == LFS_MINSUM) { + if(type == 0) for(x = 0; x != linebytes; ++x) sum += (unsigned char)(attempt[type][x]); + else { + for(x = 0; x != linebytes; ++x) { + /*For differences, each byte should be treated as signed, values above 127 are negative + (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there. + This means filtertype 0 is almost never chosen, but that is justified.*/ + unsigned char s = attempt[type][x]; + sum += s < 128 ? s : (255U - s); + } + } + } else if(strategy == LFS_DISTINCT_BYTES ||strategy == LFS_ENTROPY) { + unsigned count[256] = { 0 }; + for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]]; + ++count[type]; /*the filter type itself is part of the scanline*/ + if(strategy == LFS_DISTINCT_BYTES) { for(x = 0; x != 256; ++x) if(count[x] != 0) ++sum; } + else if(strategy == LFS_ENTROPY) { for(x = 0; x != 256; ++x) sum += ilog2i(count[x]); } + } else if(strategy == LFS_DISTINCT_BIGRAMS) { + unsigned char count[65536] = { 0 }; + for(x = 1; x != linebytes; ++x) ++count[(attempt[type][x - 1] << 8) + attempt[type][x]]; + ++count[type]; /*the filter type itself is part of the scanline*/ + for(x = 0; x != 65536; ++x) if(count[x]) ++sum; + if(type == 0 || sum > smallest) { /*smallest in this case acts as the best sum*/ + bestType = type; + smallest = sum; + } + } - for(type = 0; type != 5; ++type) { - attempt[type] = (unsigned char*)lodepng_malloc(linebytes); - if(!attempt[type]) error = 83; /*alloc fail*/ - } + /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ + if(strategy != LFS_DISTINCT_BIGRAMS && (type == 0 || sum < smallest)) { + bestType = type; + smallest = sum; + } + if(clean) memcpy(&in2[y * linebytes], rem, linebytes); + } - for(y = 0; y != h; ++y) { - memcpy(rem, &in2[y * linebytes], linebytes * clean); - smallest = SIZE_MAX; - /*try the 5 filter types*/ - for(type = 0; type != 5; ++type) { - size_t sum = 0; - if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type); - filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); - } else { - filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - } - memset(count, 0, 65536); - for(x = 1; x != linebytes; ++x) count[(attempt[type][x - 1] << 8) + attempt[type][x]] = 1; - count[type] = 1; /*the filter type itself is part of the scanline*/ - for(x = 0; x != 65536; ++x) { - if(count[x] != 0) ++sum; - } - /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ - if(sum < smallest) { - bestType = type; - smallest = sum; - } - if(clean) { - memcpy(&in2[y * linebytes], rem, linebytes); + /*now fill the out values*/ + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + if(clean) { + filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); + prevline = &in2[y * linebytes]; + } else prevline = &in[y * linebytes]; } } - /*now fill the out values*/ - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); - prevline = &in2[y * linebytes]; - } else { - prevline = &in[y * linebytes]; - } - } - - for(type = 0; type != 5; ++type) free(attempt[type]); - } else if(strategy == LFS_ENTROPY) { - unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - float smallest = 0; - unsigned type, bestType = 0; - unsigned count[256]; - - for(type = 0; type != 5; ++type) { - attempt[type] = (unsigned char*)lodepng_malloc(linebytes); - if(!attempt[type]) error = 83; /*alloc fail*/ - } - - for(y = 0; y != h; ++y) { - memcpy(rem, &in2[y * linebytes], linebytes * clean); - /*try the 5 filter types*/ - for(type = 0; type != 5; ++type) { - if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type); - filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); - } else { - filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - } - memset(count, 0, 256 * sizeof(*count)); - for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]]; - ++count[type]; /*the filter type itself is part of the scanline*/ - float sum = 0; - for(x = 0; x != 256; ++x) { - float p = count[x] / (float)(linebytes + 1); - sum += count[x] == 0 ? 0 : log2f(1 / p) * p; - } - /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ - if(type == 0 || sum < smallest) { - bestType = type; - smallest = sum; - } - if(clean) { - memcpy(&in2[y * linebytes], rem, linebytes); + for(type = 0; type != 5; ++type) free(attempt[type]); + } else if(strategy == LFS_GENETIC || strategy == LFS_ALL_CHEAP) { + if(strategy == LFS_GENETIC) { + if(!settings->quiet) { + printf("Genetic filtering has been enabled, which may take a long time to finish.\n" + "The current generation and number of bytes are displayed. Genetic filtering\n" + "will stop after 500 generations without progress, or by pressing Ctrl+C.\n"); } + signaled.store(-settings->quiet); } - /*now fill the out values*/ - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + unsigned char* prevlinebuf = 0; + unsigned char* linebuf; if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); - prevline = &in2[y * linebytes]; - } else { - prevline = &in[y * linebytes]; + prevlinebuf = (unsigned char*)lodepng_malloc(linebytes); + linebuf = (unsigned char*)lodepng_malloc(linebytes); } - } - for(type = 0; type != 5; ++type) free(attempt[type]); - } - else if(strategy == LFS_GENETIC || strategy == LFS_ALL_CHEAP) { - if (strategy == LFS_GENETIC) { - if(!settings->quiet) { - printf("warning: You have decided to enable genetic filtering, which may take a very long time.\n" - "the current generation and number of bytes is displayed.\n" - "you can stop the genetic filtering anytime by pressing ctrl-c\n" - "it will automatically stop after 500 generations without progress\n"); - } - signaled.store(-settings->quiet); - } - - unsigned char* prevlinebuf = 0; - unsigned char* linebuf; - if(clean) { - prevlinebuf = (unsigned char*)malloc(linebytes); - linebuf = (unsigned char*)malloc(linebytes); - } - - uint64_t r[2]; - initRandomUInt64(r); - - const int Strategies = strategy == LFS_ALL_CHEAP ? 3 : 0; - /*Genetic algorithm filter finder. Attempts to find better filters through mutation and recombination.*/ - const size_t population_size = strategy == LFS_ALL_CHEAP ? Strategies : 19; - const size_t last = population_size - 1; - unsigned char* population = (unsigned char*)lodepng_malloc(h * population_size); - size_t* size = (size_t*)lodepng_malloc(population_size * sizeof(size_t)); - unsigned* ranking = (unsigned*)lodepng_malloc(population_size * sizeof(int)); - unsigned g, i, j, e, t, c, type, crossover1, crossover2, selection_size, size_sum; - unsigned best_size = UINT_MAX; - unsigned total_size = 0; - unsigned e_since_best = 0; - - z_stream stream; - stream.zalloc = 0; - stream.zfree = 0; - stream.opaque = 0; + uint64_t r[2]; + initRandomUInt64(r); + + const int Strategies = strategy == LFS_ALL_CHEAP ? 3 : 0; + /*Genetic algorithm filter finder. Attempts to find better filters through mutation and recombination.*/ + const size_t population_size = strategy == LFS_ALL_CHEAP ? Strategies : 19; + const size_t last = population_size - 1; + unsigned char* population = (unsigned char*)lodepng_malloc(h * population_size); + size_t* size = (size_t*)lodepng_malloc(population_size * sizeof(size_t)); + unsigned* ranking = (unsigned*)lodepng_malloc(population_size * sizeof(int)); + unsigned e, i, g, type; + unsigned best_size = UINT_MAX; + unsigned total_size = 0; + unsigned e_since_best = 0; + + z_stream stream; + stream.zalloc = 0; + stream.zfree = 0; + stream.opaque = 0; #define TUNE deflateTune(&stream, 16, 258, 258, 200); - int err = deflateInit2(&stream, 3, Z_DEFLATED, windowbits(h * (linebytes + 1)), 8, Z_FILTERED); - if (err != Z_OK) exit(1); - unsigned char* dummy = (unsigned char *)1; - size_t popcnt; - uint64_t r2[2]; - initRandomUInt64(r2); - signal(SIGINT, sig_handler); - for(popcnt = 0; popcnt < h * (population_size - Strategies); ++popcnt) population[popcnt] = randomUInt64(r2) % 5; - - for(g = 0; g <= last; ++g) { - if (strategy == LFS_ALL_CHEAP) { - settings->filter_strategy = (LodePNGFilterStrategy)(g + 11); - filter(out, in, w, h, color, settings); - settings->filter_strategy = LFS_ALL_CHEAP; - for(size_t k = 0; k < h * (linebytes + 1); k += (linebytes + 1)) { - population[popcnt++] = out[k]; + int err = deflateInit2(&stream, 3, Z_DEFLATED, windowbits(h * (linebytes + 1)), 8, Z_FILTERED); + if(err != Z_OK) exit(1); + unsigned char* dummy = (unsigned char*)1; + size_t popcnt; + uint64_t r2[2]; + initRandomUInt64(r2); + signal(SIGINT, sig_handler); + for(popcnt = 0; popcnt < h * (population_size - Strategies); ++popcnt) population[popcnt] = randomUInt64(r2) % 5; + + for(g = 0; g <= last; ++g) { + if(strategy == LFS_ALL_CHEAP) { + settings->filter_strategy = (LodePNGFilterStrategy)(g + 11); + filter(out, in, w, h, info, settings); + settings->filter_strategy = LFS_ALL_CHEAP; + for(size_t k = 0; k < h * (linebytes + 1); k += (linebytes + 1)) population[popcnt++] = out[k]; } - } - prevline = 0; - for(y = 0; y < h; ++y) { - type = population[g * h + y]; - out[y * (linebytes + 1)] = type; - if(clean) { - memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline, linebytes, type); - filterScanline(&out[y * (linebytes + 1) + 1], linebuf, prevline, linebytes, bytewidth, type); - memcpy(prevlinebuf, linebuf, linebytes); - prevline = prevlinebuf; - } else { - filterScanline(&out[y * (linebytes + 1) + 1], &in[y * linebytes], prevline, linebytes, bytewidth, type); - prevline = &in[y * linebytes]; - } - } - TUNE - stream.next_in = (z_const unsigned char *)out; - stream.avail_in = h * (linebytes + 1); - stream.avail_out = UINT_MAX; - stream.next_out = dummy; - - deflate_nooutput(&stream, Z_FINISH); - - size[g] = stream.total_out; - deflateReset(&stream); - total_size += size[g]; - ranking[g] = g; - } - for(i = 0; strategy == LFS_ALL_CHEAP && i < population_size; i++) { - if(size[i] < best_size) { - ranking[0] = i; - best_size = size[i]; - } - } - //ctrl-c signals last iteration - for(e = 0; strategy == LFS_GENETIC && e_since_best < 500 && signaled.load() <= 0; ++e) { - /*resort rankings*/ - for(i = 1; i < population_size; ++i) { - t = ranking[i]; - for(j = i - 1; j + 1 > 0 && size[ranking[j]] > size[t]; --j) ranking[j + 1] = ranking[j]; - ranking[j + 1] = t; - } - if(size[ranking[0]] < best_size) { - best_size = size[ranking[0]]; - e_since_best = 0; - if(!settings->quiet) { - printf("Generation %d: %d bytes\n", e, best_size); - fflush(stdout); - } - } - else ++e_since_best; - /*generate offspring*/ - for(c = 0; c < 3; ++c) { - /*tournament selection*/ - /*parent 1*/ - selection_size = UINT_MAX; - for(t = 0; t < 2; ++t) selection_size = std::min(unsigned(randomDecimal(r) * total_size), selection_size); - size_sum = 0; - for(j = 0; size_sum <= selection_size; ++j) size_sum += size[ranking[j]]; - unsigned char* parent1 = &population[ranking[j - 1] * h]; - /*parent 2*/ - selection_size = UINT_MAX; - for(t = 0; t < 2; ++t) selection_size = std::min(unsigned(randomDecimal(r) * total_size), selection_size); - size_sum = 0; - for(j = 0; size_sum <= selection_size; ++j) size_sum += size[ranking[j]]; - unsigned char* parent2 = &population[ranking[j - 1] * h]; - /*two-point crossover*/ - unsigned char* child = &population[(ranking[last - c]) * h]; - if(randomDecimal(r) < 0.9) - { - crossover1 = randomUInt64(r) % h; - crossover2 = randomUInt64(r) % h; - if(crossover1 > crossover2) - { - crossover1 ^= crossover2; - crossover2 ^= crossover1; - crossover1 ^= crossover2; - } - if(child != parent1) - { - memcpy(child, parent1, crossover1); - memcpy(&child[crossover2], &parent1[crossover2], h - crossover2); - } - if(child != parent2) memcpy(&child[crossover1], &parent2[crossover1], crossover2 - crossover1); - } - else if(randomUInt64(r) & 1) memcpy(child, parent1, h); - else memcpy(child, parent2, h); - /*mutation*/ - for(y = 0; y < h; ++y) { - if(randomDecimal(r) < 0.01) child[y] = randomUInt64(r) % 5; - } - /*evaluate new genome*/ - total_size -= size[ranking[last - c]]; prevline = 0; for(y = 0; y < h; ++y) { - type = child[y]; + type = population[g * h + y]; out[y * (linebytes + 1)] = type; if(clean) { memcpy(linebuf, &in[y * linebytes], linebytes); @@ -3994,49 +3703,135 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, } } TUNE - - stream.next_in = (z_const unsigned char *)out; + stream.next_in = (z_const unsigned char*)out; stream.avail_in = h * (linebytes + 1); stream.avail_out = UINT_MAX; stream.next_out = dummy; deflate_nooutput(&stream, Z_FINISH); - size[ranking[last - c]] = stream.total_out; + size[g] = stream.total_out; deflateReset(&stream); - total_size += size[ranking[last - c]]; + total_size += size[g]; + ranking[g] = g; } - } - /*final choice*/ - prevline = 0; - for(y = 0; y < h; ++y) { - type = population[ranking[0] * h + y]; - out[y * (linebytes + 1)] = type; + for(i = 0; strategy == LFS_ALL_CHEAP && i < population_size; i++) { + if(size[i] < best_size) { + ranking[0] = i; + best_size = size[i]; + } + } + /*ctrl-c signals last iteration*/ + for(e = 0; strategy == LFS_GENETIC && e_since_best < 500 && signaled.load() <= 0; ++e) { + /*resort rankings*/ + unsigned c, j, t; + for(i = 1; i < population_size; ++i) { + t = ranking[i]; + for(j = i - 1; j + 1 > 0 && size[ranking[j]] > size[t]; --j) ranking[j + 1] = ranking[j]; + ranking[j + 1] = t; + } + if(size[ranking[0]] < best_size) { + best_size = size[ranking[0]]; + e_since_best = 0; + if(!settings->quiet) { + printf("Generation %d: %d bytes\n", e, best_size); + fflush(stdout); + } + } else ++e_since_best; + /*generate offspring*/ + for(c = 0; c < 3; ++c) { + /*tournament selection*/ + /*parent 1*/ + unsigned selection_size = UINT_MAX; + for(t = 0; t < 2; ++t) selection_size = std::min(unsigned(randomDecimal(r) * total_size), selection_size); + unsigned size_sum = 0; + for(j = 0; size_sum <= selection_size; ++j) size_sum += size[ranking[j]]; + unsigned char* parent1 = &population[ranking[j - 1] * h]; + /*parent 2*/ + selection_size = UINT_MAX; + for(t = 0; t < 2; ++t) selection_size = std::min(unsigned(randomDecimal(r) * total_size), selection_size); + size_sum = 0; + for(j = 0; size_sum <= selection_size; ++j) size_sum += size[ranking[j]]; + unsigned char* parent2 = &population[ranking[j - 1] * h]; + /*two-point crossover*/ + unsigned char* child = &population[(ranking[last - c]) * h]; + if(randomDecimal(r) < 0.9) { + unsigned crossover1 = randomUInt64(r) % h; + unsigned crossover2 = randomUInt64(r) % h; + if(crossover1 > crossover2) { + crossover1 ^= crossover2; + crossover2 ^= crossover1; + crossover1 ^= crossover2; + } + if(child != parent1) { + memcpy(child, parent1, crossover1); + memcpy(&child[crossover2], &parent1[crossover2], h - crossover2); + } + if(child != parent2) memcpy(&child[crossover1], &parent2[crossover1], crossover2 - crossover1); + } + else if(randomUInt64(r) & 1) memcpy(child, parent1, h); + else memcpy(child, parent2, h); + /*mutation*/ + for(y = 0; y < h; ++y) if(randomDecimal(r) < 0.01) child[y] = randomUInt64(r) % 5; + /*evaluate new genome*/ + total_size -= size[ranking[last - c]]; + prevline = 0; + for(y = 0; y < h; ++y) { + type = child[y]; + out[y * (linebytes + 1)] = type; + if(clean) { + memcpy(linebuf, &in[y * linebytes], linebytes); + filterScanline2(linebuf, prevline, linebytes, type); + filterScanline(&out[y * (linebytes + 1) + 1], linebuf, prevline, linebytes, bytewidth, type); + memcpy(prevlinebuf, linebuf, linebytes); + prevline = prevlinebuf; + } else { + filterScanline(&out[y * (linebytes + 1) + 1], &in[y * linebytes], prevline, linebytes, bytewidth, type); + prevline = &in[y * linebytes]; + } + } + TUNE + + stream.next_in = (z_const unsigned char*)out; + stream.avail_in = h * (linebytes + 1); + stream.avail_out = UINT_MAX; + stream.next_out = dummy; + + deflate_nooutput(&stream, Z_FINISH); + + size[ranking[last - c]] = stream.total_out; + deflateReset(&stream); + total_size += size[ranking[last - c]]; + } + } + /*final choice*/ + prevline = 0; + for(y = 0; y < h; ++y) { + type = population[ranking[0] * h + y]; + out[y * (linebytes + 1)] = type; + if(clean) { + memcpy(linebuf, &in[y * linebytes], linebytes); + filterScanline2(linebuf, prevline, linebytes, type); + filterScanline(&out[y * (linebytes + 1) + 1], linebuf, prevline, linebytes, bytewidth, type); + memcpy(prevlinebuf, linebuf, linebytes); + prevline = prevlinebuf; + } else { + filterScanline(&out[y * (linebytes + 1) + 1], &in[y * linebytes], prevline, linebytes, bytewidth, type); + prevline = &in[y * linebytes]; + } + } + deflateEnd(&stream); + free(population); + free(size); + free(ranking); if(clean) { - memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline, linebytes, type); - filterScanline(&out[y * (linebytes + 1) + 1], linebuf, prevline, linebytes, bytewidth, type); - memcpy(prevlinebuf, linebuf, linebytes); - prevline = prevlinebuf; - } else { - filterScanline(&out[y * (linebytes + 1) + 1], &in[y * linebytes], prevline, linebytes, bytewidth, type); - prevline = &in[y * linebytes]; + free(prevlinebuf); + free(linebuf); } - } - deflateEnd(&stream); - free(population); - free(size); - free(ranking); - if(clean) { - free(prevlinebuf); - free(linebuf); - } - } - else return 88; /* unknown filter strategy */ - free(rem); - free(in2); + } else return 88; /*unknown filter strategy*/ + free(rem); + free(in2); } - return error; } From 17434f6c90cbca61f7045e71b80f1506abc2fe6c Mon Sep 17 00:00:00 2001 From: woot000 <97068837+woot000@users.noreply.github.com> Date: Sat, 10 Sep 2022 10:16:16 -0500 Subject: [PATCH 05/10] Minor LodePNG edits, refactoring Small speed boost, 3.3kB reduction in LodePNG code size using Clang 15.0.0 --- src/lodepng/lodepng.cpp | 379 ++++++++++++++++++++-------------------- 1 file changed, 191 insertions(+), 188 deletions(-) diff --git a/src/lodepng/lodepng.cpp b/src/lodepng/lodepng.cpp index bd2510d8..6e38b75b 100755 --- a/src/lodepng/lodepng.cpp +++ b/src/lodepng/lodepng.cpp @@ -103,8 +103,8 @@ void* lodepng_realloc(void* ptr, size_t new_size); #define LODEPNG_RESTRICT /* not available */ #endif +#define LODEPNG_ABS(x) ((x) < 0 ? -(x) : (x)) #define LODEPNG_MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define LODEPNG_MIN(a, b) (((a) < (b)) ? (a) : (b)) #if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER) /* Safely check if adding two integers will overflow (no undefined @@ -228,7 +228,7 @@ static char* alloc_string(const char* in) { #if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG) static unsigned lodepng_read32bitInt(const unsigned char* buffer) { return (((unsigned)buffer[0] << 24u) | ((unsigned)buffer[1] << 16u) | - ((unsigned)buffer[2] << 8u) | (unsigned)buffer[3]); + ((unsigned)buffer[2] << 8u) | (unsigned)buffer[3]); } #endif /*defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)*/ @@ -271,18 +271,16 @@ static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* size_t readsize; file = fopen(filename, "rb"); if(!file) return 78; - readsize = fread(out, 1, size, file); fclose(file); - if(readsize != size) return 78; - return 0; + else return 0; } /*write given buffer to the file, overwriting the file, it doesn't append to it.*/ static unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename) { FILE* file; - file = fopen(filename, "wb" ); + file = fopen(filename, "wb"); if(!file) return 79; fwrite(buffer, 1, buffersize, file); fclose(file); @@ -306,13 +304,13 @@ unsigned lodepng_inflate(unsigned char** out, size_t* outsize, inf.zalloc = 0; inf.zfree = 0; inf.opaque = 0; - inf.next_in = (z_const Byte *)in; + inf.next_in = (z_const Byte*)in; inf.avail_in = (uInt)insize; //The reallocation speed on windows(or at least mingw) is pretty bad which makes this a lot faster. A bigger buffer would be even better on large images. #if defined(_WIN32) || defined(WIN32) #define BUFSIZE 1024 * 128 - unsigned char* buf = (unsigned char*)malloc(BUFSIZE); + unsigned char* buf = (unsigned char*)lodepng_malloc(BUFSIZE); if(!buf) {exit(1);} #else #define BUFSIZE 1024 * 32 @@ -322,7 +320,7 @@ unsigned lodepng_inflate(unsigned char** out, size_t* outsize, inf.next_out = buf; inf.avail_out = BUFSIZE; - if(inflateInit2(&inf, -15) != Z_OK) {return 83;} + if(inflateInit2(&inf, -15) != Z_OK) return 83; while(1) { int err = inflate(&inf, Z_SYNC_FLUSH); @@ -343,7 +341,7 @@ unsigned lodepng_inflate(unsigned char** out, size_t* outsize, free(buf); #endif unsigned ret = 95; - if(err == Z_MEM_ERROR) {ret = 83;} + if(err == Z_MEM_ERROR) ret = 83; inflateEnd(&inf); return ret; } @@ -373,11 +371,6 @@ static unsigned deflate(unsigned char** out, size_t* outsize, #endif /*LODEPNG_COMPILE_DECODER*/ -/*Return the adler32 of the bytes data[0..len-1]*/ -static unsigned adler32(const unsigned char* data, unsigned len) { - return adler32(1, data, len); -} - /* ////////////////////////////////////////////////////////////////////////// */ /* / Zlib / */ /* ////////////////////////////////////////////////////////////////////////// */ @@ -416,7 +409,7 @@ static unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, co if(error) return error; unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]); - unsigned checksum = adler32(*out, (unsigned)(*outsize)); + unsigned checksum = adler32(1, *out, (unsigned)(*outsize)); if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/ return 0; /*no error*/ @@ -443,7 +436,7 @@ static unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, cons } if(!error) { - unsigned ADLER32 = adler32(in, (unsigned)insize); + unsigned ADLER32 = adler32(1, in, (unsigned)insize); /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/ unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/ unsigned FLEVEL = 3; @@ -482,11 +475,6 @@ void lodepng_compress_settings_init(LodePNGCompressSettings* settings) { #ifdef LODEPNG_COMPILE_PNG -/*Return the CRC of the bytes buf[0..len-1].*/ -static unsigned lodepng_crc32(const unsigned char* buf, size_t len) { - return crc32(0, buf, len); -} - /* ////////////////////////////////////////////////////////////////////////// */ /* / Reading and writing PNG color channel bits / */ /* ////////////////////////////////////////////////////////////////////////// */ @@ -549,14 +537,14 @@ static unsigned lodepng_chunk_check_crc(const unsigned char* chunk) { unsigned length = lodepng_chunk_length(chunk); unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]); /*the CRC is taken of the data and the 4 chunk type letters, not the length*/ - unsigned checksum = lodepng_crc32(&chunk[4], length + 4); + unsigned checksum = crc32(0, &chunk[4], length + 4); if(CRC != checksum) return 1; - return 0; + else return 0; } static void lodepng_chunk_generate_crc(unsigned char* chunk) { unsigned length = lodepng_chunk_length(chunk); - unsigned CRC = lodepng_crc32(&chunk[4], length + 4); + unsigned CRC = crc32(0, &chunk[4], length + 4); lodepng_set32bitInt(chunk + 8 + length, CRC); } @@ -564,7 +552,7 @@ unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end) { size_t available_size = (size_t)(end - chunk); if(chunk >= end || available_size < 12) return end; /*too small to contain a chunk*/ if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47 - && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) { + && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) { /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */ return chunk + 8; } else { @@ -579,7 +567,7 @@ const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const size_t available_size = (size_t)(end - chunk); if(chunk >= end || available_size < 12) return end; /*too small to contain a chunk*/ if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47 - && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) { + && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) { /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */ return chunk + 8; } else { @@ -604,7 +592,7 @@ unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsign (*outsize) = new_length; chunk_start = &(*out)[new_length - total_chunk_length]; - for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i]; + memcpy(chunk_start, chunk, total_chunk_length); return 0; } @@ -676,11 +664,11 @@ static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) { static unsigned getNumColorChannels(LodePNGColorType colortype) { switch(colortype) { - case LCT_GREY: return 1; - case LCT_RGB: return 3; - case LCT_PALETTE: return 1; - case LCT_GREY_ALPHA: return 2; - case LCT_RGBA: return 4; + case LCT_GREY: return 1; + case LCT_RGB: return 3; + case LCT_PALETTE: return 1; + case LCT_GREY_ALPHA: return 2; + case LCT_RGBA: return 4; case LCT_MAX_OCTET_VALUE: return 0; /* invalid color type */ default: return 0; /*invalid color type*/ } @@ -800,16 +788,12 @@ unsigned lodepng_can_have_alpha(const LodePNGColorMode* info) { || lodepng_has_palette_alpha(info); } -static size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) { - size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth); +size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) { + size_t bpp = lodepng_get_bpp_lct(color->colortype, color->bitdepth); size_t n = (size_t)w * (size_t)h; return ((n / 8u) * bpp) + ((n & 7u) * bpp + 7u) / 8u; } -size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) { - return lodepng_get_raw_size_lct(w, h, color->colortype, color->bitdepth); -} - #ifdef LODEPNG_COMPILE_PNG @@ -1135,7 +1119,7 @@ static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, uns unsigned i = x & 15; if(!tree->children[i]) return -1; tree = tree->children[i]; - x>>=4; + x >>=4 ; } return tree ? tree->index : -1; } @@ -1163,7 +1147,7 @@ static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, uns /*color is not allowed to already exist. Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/ static void color_tree_add(ColorTree* tree, - unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) { + unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) { int bit; for(bit = 0; bit < 8; ++bit) { int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1); @@ -1575,13 +1559,13 @@ unsigned lodepng_convert(unsigned char* out, const unsigned char* in, getPixelColorsRGBA8(out, numpixels, in, mode_in); } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) { getPixelColorsRGB8(out, numpixels, in, mode_in); - } else if (mode_in->colortype == LCT_RGBA && mode_out->colortype == LCT_PALETTE && mode_in->bitdepth == 8 && mode_out->bitdepth == 8) { + } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_PALETTE && mode_in->bitdepth == 8 && mode_in->colortype == LCT_RGBA) { unsigned match = (*(unsigned*)in) + 1; unsigned char prevbyte = 0; for(i = 0; i != numpixels; ++i) { unsigned m = *(unsigned*)(in + 4 * i); - if (m == match) { + if(m == match) { out[i] = prevbyte; } else { int index = color_tree_get(&tree, in[i * 4], in[i * 4 + 1], in[i * 4 + 2], in[i * 4 + 3]); @@ -1709,7 +1693,7 @@ unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, unsigned char r = 0, g = 0, b = 0, a = 0; for(i = 0; i != numpixels; ++i) { //If we have already encountered a color (e.g. at the previous pixel), it won't have an effect on the color stats. - if (i > 0 && mode_in->colortype == LCT_RGBA && mode_in->bitdepth == 8 && memcmp(in + i * 4, in + i * 4 - 4, 4) == 0) {continue;} + if(i > 0 && mode_in->colortype == LCT_RGBA && mode_in->bitdepth == 8 && memcmp(in + i * 4, in + i * 4 - 4, 4) == 0) {continue;} getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); if(!bits_done && stats->bits < 8) { @@ -1797,7 +1781,7 @@ static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, size_t i, count = 0; ColorTree tree; color_tree_init(&tree); - for (i = 0; i < w * h; ++i) { + for(i = 0; i < w * h; ++i) { const unsigned char* c = (unsigned char*)&image[i]; if(color_tree_inc(&tree, c[0], c[1], c[2], c[3]) == 0) ++count; } @@ -2041,7 +2025,7 @@ static unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, const Lode } else /*8-bit or 16-bit per channel*/ { mode_out->bitdepth = bits; mode_out->colortype = alpha ? (stats->colored ? LCT_RGBA : LCT_GREY_ALPHA) - : (stats->colored ? LCT_RGB : LCT_GREY); + : (stats->colored ? LCT_RGB : LCT_GREY); if(stats->key) { unsigned mask = (1u << mode_out->bitdepth) - 1u; /*stats always uses 16-bit, mask converts it*/ @@ -2063,9 +2047,9 @@ The parameters are of type short, but should come from unsigned chars, the short are only needed to make the paeth calculation correct. */ static unsigned char paethPredictor(short a, short b, short c) { - short pa = abs(b - c); - short pb = abs(a - c); - short pc = abs(a + b - c - c); + short pa = LODEPNG_ABS(b - c); + short pb = LODEPNG_ABS(a - c); + short pc = LODEPNG_ABS(a + b - c - c); /* return input value associated with smallest of pa, pb, pc (with certain priority if equal) */ if(pb < pa) { a = b; pa = pb; } return (pc < pa) ? c : a; @@ -2179,7 +2163,7 @@ unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state, if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34); unsigned CRC = lodepng_read32bitInt(&in[29]); - unsigned checksum = lodepng_crc32(&in[12], 17); + unsigned checksum = crc32(0, &in[12], 17); if(CRC != checksum) { CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/ } @@ -3238,9 +3222,9 @@ static void filterScanline(unsigned char* out, const unsigned char* scanline, co static void filterScanline2(unsigned char* scanline, const unsigned char* prevline, size_t length, unsigned char filterType) { - if (!filterType) { + if(!filterType) { for(int i = 0; i < length; i+=4) { - if (!scanline[i + 3]) { + if(!scanline[i + 3]) { *(unsigned*)&scanline[i] = 0; } } @@ -3260,7 +3244,7 @@ static void filterScanline2(unsigned char* scanline, const unsigned char* prevli else if(filterType == 2) { if(!prevline) { for(int i = 0; i < length; i+=4) { - if (!scanline[i + 3]) { + if(!scanline[i + 3]) { *(unsigned*)&scanline[i] = 0; } } @@ -3305,20 +3289,20 @@ static void filterScanline2(unsigned char* scanline, const unsigned char* prevli *(unsigned*)scanline = 0; } for(int i = 4; i < length; i+=4) { - if (!scanline[i + 3]) { + if(!scanline[i + 3]) { scanline[i] = scanline[i - 4]; scanline[i + 1] = scanline[i - 3]; scanline[i + 2] = scanline[i - 2]; } } } else { - if (!scanline[3]) { + if(!scanline[3]) { scanline[0] = prevline[0]; scanline[1] = prevline[1]; scanline[2] = prevline[2]; } for(int i = 4; i < length; i+=4) { - if (!scanline[i + 3]) { + if(!scanline[i + 3]) { scanline[i] = paethPredictor(scanline[i - 4], prevline[i], prevline[i - 4]); scanline[i + 1] = paethPredictor(scanline[i - 3], prevline[i], prevline[i - 3]); scanline[i + 2] = paethPredictor(scanline[i - 2], prevline[i], prevline[i - 2]); @@ -3349,14 +3333,14 @@ static uint64_t randomUInt64(uint64_t* s) { /* generate random number between 0 and 1 */ static double randomDecimal(uint64_t* s) { - return double(randomUInt64(s)) / UINT64_MAX; + return double(randomUInt64(s)) / 18446744073709551616.0; //UINT64_MAX+1, silences clang implicit conversion warning } #include #include static std::atomic signaled(0); static void sig_handler(int signo) { - if (signo == SIGINT) { + if(signo == SIGINT) { if(signaled.load() == 0) { printf("received SIGINT, will stop after this iteration\n"); } @@ -3369,28 +3353,45 @@ static char windowbits(unsigned long len) { #ifdef __GNUC__ result = __builtin_clzl(len) ^ (8 * sizeof(unsigned long) - 1); #else - while (len >>= 1) {result++;} + while(len >>= 1) {result++;} #endif result++; - if (result < 9) { - result = 9; - } - else if (result > 15) { - result = 15; - } - return -result; + if(result < 9) return -9; + else if (result > 15) return -15; + else return -(char)result; +} + +/* integer binary logarithm, max return value is 31 */ +static size_t ilog2(size_t i) { + size_t result = 0; + if(i >= 65536) { result += 16; i >>= 16; } + if(i >= 256) { result += 8; i >>= 8; } + if(i >= 16) { result += 4; i >>= 4; } + if(i >= 4) { result += 2; i >>= 2; } + if(i >= 2) { result += 1; /*i >>= 1;*/ } + return result; +} + +/* integer approximation for i * log2(i), helper function for LFS_ENTROPY */ +static size_t ilog2i(size_t i) { + if(i == 0) return 0; + size_t l; + l = ilog2(i); + /* approximate i*log2(i): l is integer logarithm, ((i - (1u << l)) << 1u) + linearly approximates the missing fractional part multiplied by i */ + return i * l + ((i - (1u << l)) << 1u); } static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, - const LodePNGColorMode* info, LodePNGEncoderSettings* settings) { + const LodePNGColorMode* color, LodePNGEncoderSettings* settings) { /* For PNG filter method 0 out must be a buffer with as size: h + (w * h * bpp + 7u) / 8u, because there are the scanlines with 1 extra byte per scanline */ - unsigned bpp = lodepng_get_bpp(info); + unsigned bpp = lodepng_get_bpp(color); if(bpp == 0) return 31; /*error: invalid color type*/ /*the width of a scanline in bytes, not including the filter type*/ size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u; @@ -3419,7 +3420,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, prevline = &in[inindex]; } } else { - const unsigned clean = settings->clean_alpha && info->colortype == LCT_RGBA && info->bitdepth == 8 && !info->key_defined; + const unsigned clean = settings->clean_alpha && color->colortype == LCT_RGBA && color->bitdepth == 8 && !color->key_defined; unsigned char* in2 = 0; unsigned char* rem = 0; if(clean) { @@ -3443,128 +3444,130 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, stream.zfree = 0; stream.opaque = 0; - if(!error && strategy == LFS_BRUTE_FORCE) { - /*brute force filter chooser. - deflate the scanline after every filter attempt to see which one deflates best.*/ - int err = deflateInit2(&stream, 3, Z_DEFLATED, windowbits(linebytes), 3, Z_FILTERED); - if(err != Z_OK) exit(1); - - for(y = 0; y != h; ++y) { /*try the 5 filter types*/ - memcpy(rem, &in2[y * linebytes], linebytes * clean); - for(type = 0; type != 5; ++type) { - size_t size = 0; - if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, type); - filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); - } else filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - - if(settings->filter_style < 2 || 1) { - deflateTune(&stream, 258, 258, 258, 550 + (settings->filter_style) * 100); - stream.next_in = (z_const unsigned char*)attempt[type]; - stream.avail_in = linebytes; - stream.avail_out = UINT_MAX; - stream.next_out = (unsigned char*)1; - - deflate_nooutput(&stream, Z_FINISH); - - size = stream.total_out; - deflateReset(&stream); - } else size = ZopfliLZ77LazyLauncher(attempt[type], 0, linebytes, settings->filter_style); - - /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/ - if(type == 0 || size < smallest) { - bestType = type; - smallest = size; + if(!error) { + if(strategy == LFS_BRUTE_FORCE) { + /*brute force filter chooser. + deflate the scanline after every filter attempt to see which one deflates best.*/ + int err = deflateInit2(&stream, 3, Z_DEFLATED, windowbits(linebytes), 3, Z_FILTERED); + if(err != Z_OK) exit(1); + + for(y = 0; y != h; ++y) { /*try the 5 filter types*/ + memcpy(rem, &in2[y * linebytes], linebytes * clean); + for(type = 0; type != 5; ++type) { + size_t size = 0; + if(clean) { + filterScanline2(&in2[y * linebytes], prevline, linebytes, type); + filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); + } else filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + + if(settings->filter_style < 2 || 1) { + deflateTune(&stream, 258, 258, 258, 550 + (settings->filter_style) * 100); + stream.next_in = (z_const unsigned char*)attempt[type]; + stream.avail_in = linebytes; + stream.avail_out = UINT_MAX; + stream.next_out = (unsigned char*)1; + + deflate_nooutput(&stream, Z_FINISH); + + size = stream.total_out; + deflateReset(&stream); + } else size = ZopfliLZ77LazyLauncher(attempt[type], 0, linebytes, settings->filter_style); + + /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/ + if(type == 0 || size < smallest) { + bestType = type; + smallest = size; + } + if(clean) memcpy(&in2[y * linebytes], rem, linebytes); } - if(clean) memcpy(&in2[y * linebytes], rem, linebytes); + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + if(clean) { + filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); + prevline = &in2[y * linebytes]; + } else prevline = &in[y * linebytes]; } - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + } else if(strategy >= LFS_INCREMENTAL && strategy <= LFS_INCREMENTAL3) { + /*Incremental brute force filter chooser. + Keep a buffer of each tested scanline and deflate the entire buffer after every filter attempt to see which one deflates best. + Now implemented with streaming, which reduces complexity to O(n) + This is slow.*/ + z_stream teststream; + size_t testsize = linebytes + 1; + int err = deflateInit2(&stream, strategy == LFS_INCREMENTAL3 ? 1 : 2, Z_DEFLATED, windowbits(testsize * h), 8, Z_FILTERED); + if(err != Z_OK) exit(1); + if(strategy == LFS_INCREMENTAL) deflateTune(&stream, 16, 258, 258, 200); + else if(strategy == LFS_INCREMENTAL2) deflateTune(&stream, 50, 258, 258, 1100); + deflateCopy(&teststream, &stream, 1); + + unsigned char* dummy = (unsigned char*)1; /*Not used, but must not be NULL*/ + unsigned char* prevline2 = 0; + unsigned char* prevlinebuf = 0; + unsigned char* linebuf; if(clean) { - filterScanline2(&in2[y * linebytes], prevline, linebytes, bestType); - prevline = &in2[y * linebytes]; - } else prevline = &in[y * linebytes]; - } - } else if(!error && strategy >= LFS_INCREMENTAL && strategy <= LFS_INCREMENTAL3) { - /*Incremental brute force filter chooser. - Keep a buffer of each tested scanline and deflate the entire buffer after every filter attempt to see which one deflates best. - Now implemented with streaming, which reduces complexity to O(n) - This is slow.*/ - z_stream teststream; - size_t testsize = linebytes + 1; - int err = deflateInit2(&stream, strategy == LFS_INCREMENTAL3 ? 1 : 2, Z_DEFLATED, windowbits(testsize * h), 8, Z_FILTERED); - if(err != Z_OK) exit(1); - if(strategy == LFS_INCREMENTAL) deflateTune(&stream, 16, 258, 258, 200); - else if(strategy == LFS_INCREMENTAL2) deflateTune(&stream, 50, 258, 258, 1100); - deflateCopy(&teststream, &stream, 1); - - unsigned char* dummy = (unsigned char*)1; /*Not used, but must not be NULL*/ - unsigned char* prevline2 = 0; - unsigned char* prevlinebuf = 0; - unsigned char* linebuf; - if(clean) { - prevlinebuf = (unsigned char*)lodepng_malloc(linebytes); - linebuf = (unsigned char*)lodepng_malloc(linebytes); - } - - for(y = 0; y != h; ++y) { /*try the 5 filter types*/ - for(type = 4; type + 1 != 0; --type) { /*type 0 is most likely, so end with that to reduce copying*/ - size_t size = 0; + prevlinebuf = (unsigned char*)lodepng_malloc(linebytes); + linebuf = (unsigned char*)lodepng_malloc(linebytes); + } + + for(y = 0; y != h; ++y) { /*try the 5 filter types*/ + for(type = 4; type + 1 != 0; --type) { /*type 0 is most likely, so end with that to reduce copying*/ + size_t size = 0; + if(clean) { + memcpy(linebuf, &in[y * linebytes], linebytes); + filterScanline2(linebuf, prevline2, linebytes, type); + filterScanline(attempt[type], linebuf, prevline2, linebytes, bytewidth, type); + } else filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + /*copy result to output buffer temporarily to include compression test*/ + out[y * (linebytes + 1)] = type; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[type][x]; + + deflateCopy(&teststream, &stream, 0); + teststream.next_in = (z_const unsigned char*)(out + y * testsize); + teststream.avail_in = testsize; + teststream.avail_out = UINT_MAX; + teststream.next_out = dummy; + deflate_nooutput(&teststream, Z_FINISH); + + size = teststream.total_out; + + /*check if this is smallest size (or if type == 4 it's the first case so always store the values)*/ + if(type == 4 || size < smallest) { + bestType = type; + smallest = size; + } + } + if(clean) { memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline2, linebytes, type); - filterScanline(attempt[type], linebuf, prevline2, linebytes, bytewidth, type); - } else filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + filterScanline2(linebuf, prevline2, linebytes, bestType); + filterScanline(attempt[bestType], linebuf, prevline2, linebytes, bytewidth, bestType); + } else filterScanline(attempt[bestType], &in[y * linebytes], prevline, linebytes, bytewidth, bestType); /*copy result to output buffer temporarily to include compression test*/ - out[y * (linebytes + 1)] = type; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[type][x]; - - deflateCopy(&teststream, &stream, 0); - teststream.next_in = (z_const unsigned char*)(out + y * testsize); - teststream.avail_in = testsize; - teststream.avail_out = UINT_MAX; - teststream.next_out = dummy; - deflate_nooutput(&teststream, Z_FINISH); - - size = teststream.total_out; - - /*check if this is smallest size (or if type == 4 it's the first case so always store the values)*/ - if(type == 4 || size < smallest) { - bestType = type; - smallest = size; + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + + stream.next_in = (z_const unsigned char*)(out + y * testsize); + stream.avail_in = testsize; + stream.avail_out = UINT_MAX; + stream.next_out = dummy; + deflate_nooutput(&stream, Z_NO_FLUSH); + + prevline = &in[y * linebytes]; + if(clean) { + memcpy(linebuf, &in[y * linebytes], linebytes); + filterScanline2(linebuf, prevline2, linebytes, bestType); + memcpy(prevlinebuf, linebuf, linebytes); + prevline2 = prevlinebuf; } + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + if(type) for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; /*last attempt is type 0, so no copying necessary*/ } - - if(clean) { - memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline2, linebytes, bestType); - filterScanline(attempt[bestType], linebuf, prevline2, linebytes, bytewidth, bestType); - } else filterScanline(attempt[bestType], &in[y * linebytes], prevline, linebytes, bytewidth, bestType); - /*copy result to output buffer temporarily to include compression test*/ - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - - stream.next_in = (z_const unsigned char*)(out + y * testsize); - stream.avail_in = testsize; - stream.avail_out = UINT_MAX; - stream.next_out = dummy; - deflate_nooutput(&stream, Z_NO_FLUSH); - - prevline = &in[y * linebytes]; if(clean) { - memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline2, linebytes, bestType); - memcpy(prevlinebuf, linebuf, linebytes); - prevline2 = prevlinebuf; + free(prevlinebuf); + free(linebuf); } - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - if(type) for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; /*last attempt is type 0, so no copying necessary*/ - } - if(clean) { - free(prevlinebuf); - free(linebuf); + deflateEnd(&teststream); } - deflateEnd(&teststream); } deflateEnd(&stream); for(type = 0; type != 5; ++type) free(attempt[type]); @@ -3602,8 +3605,11 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned count[256] = { 0 }; for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]]; ++count[type]; /*the filter type itself is part of the scanline*/ - if(strategy == LFS_DISTINCT_BYTES) { for(x = 0; x != 256; ++x) if(count[x] != 0) ++sum; } - else if(strategy == LFS_ENTROPY) { for(x = 0; x != 256; ++x) sum += ilog2i(count[x]); } + if(strategy == LFS_DISTINCT_BYTES) { + for(x = 0; x != 256; ++x) sum = sum + (count[x] != 0); + } else if(strategy == LFS_ENTROPY) { + for(x = 0; x != 256; ++x) sum += ilog2i(count[x]); + } } else if(strategy == LFS_DISTINCT_BIGRAMS) { unsigned char count[65536] = { 0 }; for(x = 1; x != linebytes; ++x) ++count[(attempt[type][x - 1] << 8) + attempt[type][x]]; @@ -3683,7 +3689,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, for(g = 0; g <= last; ++g) { if(strategy == LFS_ALL_CHEAP) { settings->filter_strategy = (LodePNGFilterStrategy)(g + 11); - filter(out, in, w, h, info, settings); + filter(out, in, w, h, color, settings); settings->filter_strategy = LFS_ALL_CHEAP; for(size_t k = 0; k < h * (linebytes + 1); k += (linebytes + 1)) population[popcnt++] = out[k]; } @@ -4227,7 +4233,6 @@ const char* lodepng_error_text(unsigned code) { case 19: return "end of out buffer memory reached while inflating"; case 20: return "invalid deflate block BTYPE encountered while decoding"; case 21: return "NLEN is not ones complement of LEN in a deflate block"; - /*end of out buffer memory reached while inflating: This can happen if the inflated deflate data is longer than the amount of bytes required to fill up all the pixels of the image, given the color depth and image dimensions. Something that doesn't @@ -4408,9 +4413,7 @@ unsigned decode(unsigned char** out, size_t& buffersize, unsigned& w, unsigned& if(*out && !error) { buffersize = lodepng_get_raw_size(w, h, &state.info_raw); } - else if (*out) { - free(*out); - } + else if(*out) free(*out); return error; } @@ -4426,7 +4429,7 @@ unsigned encode(std::vector& out, size_t buffersize; unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state, p); - if (error == 96) { + if(error == 96) { error = 0; state.note = 1; } From 8a1f29a9821164294ca1a6e2852ff991c4aefe4f Mon Sep 17 00:00:00 2001 From: woot000 <97068837+woot000@users.noreply.github.com> Date: Sat, 10 Sep 2022 20:37:36 -0500 Subject: [PATCH 06/10] Switch lodepng_compute_color_stats to void, other minor changes lodepng_compute_color_stats doesn't return any errors as is, so the function type is changed to void to reflect this behavior --- src/lodepng/lodepng.cpp | 38 +++++++++++++++++--------------------- src/lodepng/lodepng.h | 6 +++--- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/lodepng/lodepng.cpp b/src/lodepng/lodepng.cpp index 6e38b75b..ade3808c 100755 --- a/src/lodepng/lodepng.cpp +++ b/src/lodepng/lodepng.cpp @@ -103,7 +103,6 @@ void* lodepng_realloc(void* ptr, size_t new_size); #define LODEPNG_RESTRICT /* not available */ #endif -#define LODEPNG_ABS(x) ((x) < 0 ? -(x) : (x)) #define LODEPNG_MAX(a, b) (((a) > (b)) ? (a) : (b)) #if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER) @@ -228,7 +227,7 @@ static char* alloc_string(const char* in) { #if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG) static unsigned lodepng_read32bitInt(const unsigned char* buffer) { return (((unsigned)buffer[0] << 24u) | ((unsigned)buffer[1] << 16u) | - ((unsigned)buffer[2] << 8u) | (unsigned)buffer[3]); + ((unsigned)buffer[2] << 8u) | (unsigned)buffer[3]); } #endif /*defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)*/ @@ -664,11 +663,11 @@ static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) { static unsigned getNumColorChannels(LodePNGColorType colortype) { switch(colortype) { - case LCT_GREY: return 1; - case LCT_RGB: return 3; - case LCT_PALETTE: return 1; - case LCT_GREY_ALPHA: return 2; - case LCT_RGBA: return 4; + case LCT_GREY: return 1; + case LCT_RGB: return 3; + case LCT_PALETTE: return 1; + case LCT_GREY_ALPHA: return 2; + case LCT_RGBA: return 4; case LCT_MAX_OCTET_VALUE: return 0; /* invalid color type */ default: return 0; /*invalid color type*/ } @@ -1612,11 +1611,10 @@ static unsigned getValueRequiredBits(unsigned char value) { } /*stats must already have been inited. */ -unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, - const unsigned char* in, const size_t numpixels, - const LodePNGColorMode* mode_in) { +void lodepng_compute_color_stats(LodePNGColorStats* stats, + const unsigned char* in, const size_t numpixels, + const LodePNGColorMode* mode_in) { size_t i; - unsigned error = 0; /* mark things as done already if it would be impossible to have a more expensive case */ unsigned colored_done = lodepng_is_greyscale_type(mode_in) ? 1 : 0; @@ -1769,7 +1767,6 @@ unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, stats->key_g += (stats->key_g << 8); stats->key_b += (stats->key_b << 8); } - return 0; } static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, @@ -2047,9 +2044,9 @@ The parameters are of type short, but should come from unsigned chars, the short are only needed to make the paeth calculation correct. */ static unsigned char paethPredictor(short a, short b, short c) { - short pa = LODEPNG_ABS(b - c); - short pb = LODEPNG_ABS(a - c); - short pc = LODEPNG_ABS(a + b - c - c); + short pa = abs(b - c); + short pb = abs(a - c); + short pc = abs(a + b - c - c); /* return input value associated with smallest of pa, pb, pc (with certain priority if equal) */ if(pb < pa) { a = b; pa = pb; } return (pc < pa) ? c : a; @@ -3179,7 +3176,7 @@ static void filterScanline(unsigned char* out, const unsigned char* scanline, co break; case 1: { /*Sub*/ size_t j = 0; - memcpy(out, scanline, bytewidth); + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; for(i = bytewidth; i != length; ++i, ++j) out[i] = scanline[i] - scanline[j]; break; } @@ -3196,7 +3193,7 @@ static void filterScanline(unsigned char* out, const unsigned char* scanline, co for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1u); for(i = bytewidth; i < length; ++i, ++j) out[i] = scanline[i] - ((scanline[j] + prevline[i]) >> 1u); } else { - memcpy(out, scanline, bytewidth); + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; for(i = bytewidth; i < length; ++i, ++j) out[i] = scanline[i] - (scanline[j] >> 1u); } break; @@ -3210,7 +3207,7 @@ static void filterScanline(unsigned char* out, const unsigned char* scanline, co out[i] = scanline[i] - paethPredictor(scanline[j], prevline[i], prevline[j]); } } else { - memcpy(out, scanline, bytewidth); + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/ for(i = bytewidth; i != length; ++i, ++j) out[i] = scanline[i] - scanline[j]; } @@ -4039,9 +4036,8 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, LodePNGColorStats stats; lodepng_color_stats_init(&stats); - state->error = lodepng_compute_color_stats(&stats, image, numpixels, &state->info_raw); - if(state->error) goto cleanup; - else { /*check if image is white only if no error is detected in previous function*/ + lodepng_compute_color_stats(&stats, image, numpixels, &state->info_raw); + /* check if image is fully white */ { unsigned char r = 0, g = 0, b = 0, a = 0; getPixelColorRGBA8(&r, &g, &b, &a, image, 0, &state->info_raw); stats.white = stats.numcolors == 1 && stats.colored == 0 && r == 255 && w > 20 && h > 20 diff --git a/src/lodepng/lodepng.h b/src/lodepng/lodepng.h index c6faa101..4f30998a 100755 --- a/src/lodepng/lodepng.h +++ b/src/lodepng/lodepng.h @@ -489,9 +489,9 @@ void lodepng_color_stats_init(LodePNGColorStats* stats); /*Get a LodePNGColorStats of the image. The stats must already have been inited. Returns error code (e.g. alloc fail) or 0 if ok.*/ -unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, - const unsigned char* image, unsigned w, unsigned h, - const LodePNGColorMode* mode_in); +void lodepng_compute_color_stats(LodePNGColorStats* stats, + const unsigned char* image, const size_t numpixels, + const LodePNGColorMode* mode_in); /*Settings for the encoder.*/ typedef struct LodePNGEncoderSettings { From e7f5f479f58a38b127005331a91719a7b0ebd5fe Mon Sep 17 00:00:00 2001 From: woot000 <97068837+woot000@users.noreply.github.com> Date: Tue, 13 Sep 2022 14:39:32 -0500 Subject: [PATCH 07/10] More minor optimizations, edits --allfilters is ~1.5% faster from previous commit, --pal_sort=120 is ~2.5% faster from previous commit, LodePNG code size is reduced by 3kB using Clang 15.0.0 --- src/lodepng/lodepng.cpp | 153 +++++++++++++++++++--------------------- src/lodepng/lodepng.h | 3 - 2 files changed, 74 insertions(+), 82 deletions(-) diff --git a/src/lodepng/lodepng.cpp b/src/lodepng/lodepng.cpp index ade3808c..dc1f0fcf 100755 --- a/src/lodepng/lodepng.cpp +++ b/src/lodepng/lodepng.cpp @@ -521,11 +521,7 @@ void lodepng_chunk_type(char type[5], const unsigned char* chunk) { unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type) { if(strlen(type) != 4) return 0; - return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]); -} - -unsigned char lodepng_chunk_ancillary(const unsigned char* chunk) { - return((chunk[4] & 32) != 0); + else return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]); } const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk) { @@ -1123,7 +1119,7 @@ static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, uns return tree ? tree->index : -1; } -static int color_tree_inc(ColorTree* tree, +static bool color_tree_inc(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) { int bit; for(bit = 0; bit < 8; ++bit) { @@ -1134,7 +1130,7 @@ static int color_tree_inc(ColorTree* tree, } tree = tree->children[i]; } - return ++(tree->index); + return (++(tree->index) != 0); } #ifdef LODEPNG_COMPILE_ENCODER @@ -1778,9 +1774,9 @@ static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, size_t i, count = 0; ColorTree tree; color_tree_init(&tree); - for(i = 0; i < w * h; ++i) { + for(i = 0; i != (size_t)w * (size_t)h; ++i) { const unsigned char* c = (unsigned char*)&image[i]; - if(color_tree_inc(&tree, c[0], c[1], c[2], c[3]) == 0) ++count; + count = count + !color_tree_inc(&tree, c[0], c[1], c[2], c[3]); } if(count == 0) return; //Silence clang static analyzer warnings @@ -1796,19 +1792,20 @@ static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, for(i = 0; i != count; ++i) { /*all priority values will run through this for loop*/ const unsigned char* c = (unsigned char*)&palette_in[i]; if(priority == LPPS_POPULARITY) sortfield[i] |= (color_tree_get(&tree, c[0], c[1], c[2], c[3]) + 1) << 8; - else if(priority == LPPS_RGB) sortfield[i] |= uint64_t(c[0]) << 32 | uint64_t(c[1]) << 24 | uint64_t(c[2]) << 16; - else if(priority == LPPS_YUV || priority == LPPS_LAB) { - const double r = c[0]; - const double g = c[1]; - const double b = c[2]; + else if(priority == LPPS_RGB) sortfield[i] |= uint64_t(c[0]) << 32 | uint64_t(c[1]) << 24 | uint64_t(c[2]) << 16; + else { /*LPPS_YUV, LPPS_LAB, LPPS_MSB*/ + const uint64_t r = c[0]; + const uint64_t g = c[1]; + const uint64_t b = c[2]; if(priority == LPPS_YUV) { - sortfield[i] |= uint64_t(0.299 * r + 0.587 * g + 0.114 * b) << 32 - | uint64_t((-0.14713 * r - 0.28886 * g + 0.436 * b + 111.18) / 0.872) << 24 - | uint64_t((0.615 * r - 0.51499 * g - 0.10001 * b + 156.825) / 1.23) << 16; - } else { /*LPPS_LAB*/ - double vx = (0.4124564 * r + 0.3575761 * g + 0.1804375 * b) / 255 / 95.047; - double vy = (0.2126729 * r + 0.7151522 * g + 0.0721750 * b) / 255 / 100; - double vz = (0.0193339 * r + 0.1191920 * g + 0.9503041 * b) / 255 / 108.883; + sortfield[i] |= + uint64_t(0.299 * (double)r + 0.587 * (double)g + 0.114 * (double)b) << 32 + | uint64_t((-0.14713 * (double)r - 0.28886 * (double)g + 0.436 * (double)b + 111.18) / 0.872) << 24 + | uint64_t((0.615 * (double)r - 0.51499 * (double)g - 0.10001 * (double)b + 156.825) / 1.23) << 16; + } else if(priority == LPPS_LAB) { + double vx = (0.4124564 * (double)r + 0.3575761 * (double)g + 0.1804375 * (double)b) / 255 / 95.047; + double vy = (0.2126729 * (double)r + 0.7151522 * (double)g + 0.0721750 * (double)b) / 255 / 100; + double vz = (0.0193339 * (double)r + 0.1191920 * (double)g + 0.9503041 * (double)b) / 255 / 108.883; const double ep = 216. / 24389.; const double ka = 24389. / 27.; const double ex = 1. / 3.; @@ -1816,22 +1813,21 @@ static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, vx = vx > ep ? pow(vx, ex) : ka * vx + de; vy = vy > ep ? pow(vy, ex) : ka * vy + de; vz = vz > ep ? pow(vz, ex) : ka * vz + de; - sortfield[i] |= uint64_t((vy * 116 - 16) / 100 * 255) << 32 + sortfield[i] |= + uint64_t((vy * 116 - 16) / 100 * 255) << 32 | uint64_t((vx - vy) * 500 + 256) << 24 | uint64_t((vy - vz) * 200 + 256) << 16; + } else { /*LPPS_MSB*/ + sortfield[i] |= + (r & 128) << 39 | (g & 128) << 38 | (b & 128) << 37 + | (r & 64) << 35 | (g & 64) << 34 | (b & 64) << 33 + | (r & 32) << 31 | (g & 32) << 30 | (b & 32) << 29 + | (r & 16) << 27 | (g & 16) << 26 | (b & 16) << 25 + | (r & 8) << 23 | (g & 8) << 22 | (b & 8) << 21 + | (r & 4) << 19 | (g & 4) << 18 | (b & 4) << 17 + | (r & 2) << 15 | (g & 2) << 14 | (b & 2) << 13 + | (r & 1) << 11 | (g & 1) << 10 | (b & 1) << 9; } - } else { /*LPPS_MSB*/ - const uint64_t r = c[0]; - const uint64_t g = c[1]; - const uint64_t b = c[2]; - sortfield[i] |= (r & 128) << 39 | (g & 128) << 38 | (b & 128) << 37 - | (r & 64) << 35 | (g & 64) << 34 | (b & 64) << 33 - | (r & 32) << 31 | (g & 32) << 30 | (b & 32) << 29 - | (r & 16) << 27 | (g & 16) << 26 | (b & 16) << 25 - | (r & 8) << 23 | (g & 8) << 22 | (b & 8) << 21 - | (r & 4) << 19 | (g & 4) << 18 | (b & 4) << 17 - | (r & 2) << 15 | (g & 2) << 14 | (b & 2) << 13 - | (r & 1) << 11 | (g & 1) << 10 | (b & 1) << 9; } } switch(transparency) { @@ -1869,7 +1865,7 @@ static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, best = i; } } - } else { + } else { /*LPDS_ASCENDING*/ uint64_t value = UINT64_MAX; for(i = 1; i != count; ++i) { if((sortfield[i] & 0x7FFFFFFFFFFFFFFFULL) < value) { @@ -2808,7 +2804,7 @@ static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, #endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ } else /*it's not an implemented chunk type, so ignore it: skip over the data*/ { /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/ - if(!lodepng_chunk_ancillary(chunk)) CERROR_BREAK(state->error, 69); + if((chunk[4] & 32) == 0) CERROR_BREAK(state->error, 69); unknown = 1; #ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS @@ -3417,7 +3413,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, prevline = &in[inindex]; } } else { - const unsigned clean = settings->clean_alpha && color->colortype == LCT_RGBA && color->bitdepth == 8 && !color->key_defined; + const bool clean = settings->clean_alpha && color->colortype == LCT_RGBA && color->bitdepth == 8 && !color->key_defined; unsigned char* in2 = 0; unsigned char* rem = 0; if(clean) { @@ -3428,20 +3424,20 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, } if(strategy == LFS_BRUTE_FORCE || (strategy >= LFS_INCREMENTAL && strategy <= LFS_INCREMENTAL3)) { unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - size_t smallest = 0; - unsigned type, bestType = 0; - + unsigned type; for(type = 0; type != 5; ++type) { attempt[type] = (unsigned char*)lodepng_malloc(linebytes); if(!attempt[type]) error = 83; /*alloc fail*/ } - z_stream stream; - stream.zalloc = 0; - stream.zfree = 0; - stream.opaque = 0; - if(!error) { + size_t smallest = 0; + unsigned char bestType = 0; + + z_stream stream; + stream.zalloc = 0; + stream.zfree = 0; + stream.opaque = 0; if(strategy == LFS_BRUTE_FORCE) { /*brute force filter chooser. deflate the scanline after every filter attempt to see which one deflates best.*/ @@ -3472,7 +3468,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/ if(type == 0 || size < smallest) { - bestType = type; + bestType = (unsigned char)type; smallest = size; } if(clean) memcpy(&in2[y * linebytes], rem, linebytes); @@ -3529,7 +3525,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, /*check if this is smallest size (or if type == 4 it's the first case so always store the values)*/ if(type == 4 || size < smallest) { - bestType = type; + bestType = (unsigned char)type; smallest = size; } } @@ -3565,19 +3561,21 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, } deflateEnd(&teststream); } + deflateEnd(&stream); } - deflateEnd(&stream); for(type = 0; type != 5; ++type) free(attempt[type]); - } else if(strategy >= LFS_ENTROPY && strategy <= LFS_MINSUM) { /*LFS_ENTROPY, LFS_DISTINCT_BIGRAMS, LFS_DISTINCT_BYTES, LFS_MINSUM*/ - size_t smallest = 0; unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - unsigned char type, bestType = 0; + unsigned char type; for(type = 0; type != 5; ++type) { attempt[type] = (unsigned char*)lodepng_malloc(linebytes); if(!attempt[type]) error = 83; /*alloc fail*/ } + if(!error) { + size_t smallest = 0; + unsigned char bestType = 0; + const size_t countsize = strategy != LFS_DISTINCT_BIGRAMS ? 256 : 65536; for(y = 0; y != h; ++y) { memcpy(rem, &in2[y * linebytes], linebytes * clean); /*try the 5 filter types*/ @@ -3598,30 +3596,32 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, sum += s < 128 ? s : (255U - s); } } - } else if(strategy == LFS_DISTINCT_BYTES ||strategy == LFS_ENTROPY) { - unsigned count[256] = { 0 }; - for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]]; - ++count[type]; /*the filter type itself is part of the scanline*/ - if(strategy == LFS_DISTINCT_BYTES) { - for(x = 0; x != 256; ++x) sum = sum + (count[x] != 0); - } else if(strategy == LFS_ENTROPY) { - for(x = 0; x != 256; ++x) sum += ilog2i(count[x]); + } else { + unsigned count[65536] = { 0 }; + if(strategy == LFS_DISTINCT_BYTES ||strategy == LFS_ENTROPY) { + for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]]; + } else { /*LFS_DISTINCT_BIGRAMS*/ + for(x = 1; x != linebytes; ++x) ++count[(attempt[type][x - 1] << 8) + attempt[type][x]]; } - } else if(strategy == LFS_DISTINCT_BIGRAMS) { - unsigned char count[65536] = { 0 }; - for(x = 1; x != linebytes; ++x) ++count[(attempt[type][x - 1] << 8) + attempt[type][x]]; ++count[type]; /*the filter type itself is part of the scanline*/ - for(x = 0; x != 65536; ++x) if(count[x]) ++sum; - if(type == 0 || sum > smallest) { /*smallest in this case acts as the best sum*/ - bestType = type; - smallest = sum; + if(strategy == LFS_DISTINCT_BYTES || strategy == LFS_DISTINCT_BIGRAMS) { + for(x = 0; x != countsize; ++x) sum += (count[x] != 0); + } else { /*LFS_ENTROPY*/ + for(x = 0; x != countsize; ++x) sum += ilog2i(count[x]); } } /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ - if(strategy != LFS_DISTINCT_BIGRAMS && (type == 0 || sum < smallest)) { - bestType = type; - smallest = sum; + if(strategy != LFS_DISTINCT_BIGRAMS) { + if(type == 0 || sum < smallest) { + bestType = type; + smallest = sum; + } + } else { /*smallest acts as the best sum when the strategy is LFS_DISTINCT_BIGRAMS*/ + if(type == 0 || sum > smallest) { + bestType = type; + smallest = sum; + } } if(clean) memcpy(&in2[y * linebytes], rem, linebytes); } @@ -3646,7 +3646,6 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, } signaled.store(-settings->quiet); } - unsigned char* prevlinebuf = 0; unsigned char* linebuf; if(clean) { @@ -3667,7 +3666,6 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned e, i, g, type; unsigned best_size = UINT_MAX; unsigned total_size = 0; - unsigned e_since_best = 0; z_stream stream; stream.zalloc = 0; @@ -3724,6 +3722,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, best_size = size[i]; } } + unsigned e_since_best = 0; /*ctrl-c signals last iteration*/ for(e = 0; strategy == LFS_GENETIC && e_since_best < 500 && signaled.load() <= 0; ++e) { /*resort rankings*/ @@ -4004,7 +4003,6 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, ucvector outv = ucvector_init(0, 0); LodePNGInfo info; const LodePNGInfo* info_png = &state->info_png; - lodepng_info_init(&info); /*provide some proper output values if error will happen*/ *out = 0; @@ -4031,6 +4029,7 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, if(state->error) goto cleanup; /*error: invalid color type given*/ /* color convert and compute scanline filter types */ + lodepng_info_init(&info); lodepng_info_copy(&info, &state->info_png); if(state->encoder.auto_convert) { LodePNGColorStats stats; @@ -4056,16 +4055,12 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, } unsigned crc = crc32(0, info.color.palette, info.color.palettesize); - if(color_tree_inc(&ct, crc & 0xFF, crc & 0xFF00, crc & 0xFF0000, crc & 0xFF000000)) { - if(palset._first & 2) { - color_tree_cleanup(&ct); - } - state->error = 96; - goto cleanup; - } + /* error is set to 96 if color_tree_inc returns true, otherwise proceed */ + state->error = 96 * color_tree_inc(&ct, crc & 0xFF, crc & 0xFF00, crc & 0xFF0000, crc & 0xFF000000); if(palset._first & 2) { color_tree_cleanup(&ct); } + if(state->error) goto cleanup; } lodepng_color_mode_init(&state->out_mode); lodepng_color_mode_copy(&state->out_mode, &info.color); diff --git a/src/lodepng/lodepng.h b/src/lodepng/lodepng.h index 4f30998a..bf3672b9 100755 --- a/src/lodepng/lodepng.h +++ b/src/lodepng/lodepng.h @@ -604,9 +604,6 @@ void lodepng_chunk_type(char type[5], const unsigned char* chunk); /*check if the type is the given type*/ unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type); -/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/ -unsigned char lodepng_chunk_ancillary(const unsigned char* chunk); - /*get pointer to the data of the chunk, where the input points to the header of the chunk*/ const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk); From c365263703e1c126081d05c7d3c7fe7080f8052e Mon Sep 17 00:00:00 2001 From: woot000 <97068837+woot000@users.noreply.github.com> Date: Tue, 13 Sep 2022 17:34:24 -0500 Subject: [PATCH 08/10] minor LodePNG decoding optimizations ~2% faster decoding of RGB images, LodePNG code size decreases by 0.75kB using Clang 15.0.0 --- src/lodepng/lodepng.cpp | 57 ++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/src/lodepng/lodepng.cpp b/src/lodepng/lodepng.cpp index dc1f0fcf..ca62d931 100755 --- a/src/lodepng/lodepng.cpp +++ b/src/lodepng/lodepng.cpp @@ -1250,33 +1250,28 @@ static void getPixelColorRGBA8(unsigned char* r, unsigned char* g, if(mode->colortype == LCT_GREY) { if(mode->bitdepth == 8) { *r = *g = *b = in[i]; - if(mode->key_defined && *r == mode->key_r) *a = 0; - else *a = 255; + *a = 255 * !(mode->key_defined && *r == mode->key_r); } else if(mode->bitdepth == 16) { *r = *g = *b = in[i * 2]; - if(mode->key_defined && 256U * in[i * 2] + in[i * 2 + 1] == mode->key_r) *a = 0; - else *a = 255; + *a = 255 * !(mode->key_defined && 256U * in[i * 2] + in[i * 2 + 1] == mode->key_r); } else { unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/ size_t j = i * mode->bitdepth; unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth); *r = *g = *b = (value * 255) / highest; - if(mode->key_defined && value == mode->key_r) *a = 0; - else *a = 255; + *a = 255 * !(mode->key_defined && value == mode->key_r); } } else if(mode->colortype == LCT_RGB) { if(mode->bitdepth == 8) { *r = in[i * 3]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2]; - if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0; - else *a = 255; + *a = 255 * !(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b); } else { *r = in[i * 6]; *g = in[i * 6 + 2]; *b = in[i * 6 + 4]; - if(mode->key_defined && 256U * in[i * 6] + in[i * 6 + 1] == mode->key_r - && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g - && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0; - else *a = 255; + *a = 255 * !(mode->key_defined && 256U * in[i * 6] + in[i * 6 + 1] == mode->key_r + && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g + && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b); } } else if(mode->colortype == LCT_PALETTE) { unsigned index; @@ -1317,10 +1312,10 @@ static void getPixelColorRGBA8(unsigned char* r, unsigned char* g, mode test cases, optimized to convert the colors much faster, when converting to the common case of RGBA with 8 bit per channel. buffer must be RGBA with enough memory.*/ -static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels, +static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, const size_t numpixels, const unsigned char* LODEPNG_RESTRICT in, const LodePNGColorMode* mode) { - unsigned num_channels = 4; + const unsigned char num_channels = 4; size_t i; if(mode->colortype == LCT_GREY) { if(mode->bitdepth == 8) { @@ -1331,13 +1326,13 @@ static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t n if(mode->key_defined) { buffer -= numpixels * num_channels; for(i = 0; i != numpixels; ++i, buffer += num_channels) { - if(buffer[0] == mode->key_r) buffer[3] = 0; + buffer[3] = buffer[3] * !(buffer[0] == mode->key_r); } } } else if(mode->bitdepth == 16) { for(i = 0; i != numpixels; ++i, buffer += num_channels) { buffer[0] = buffer[1] = buffer[2] = in[i * 2]; - buffer[3] = mode->key_defined && 256U * in[i * 2] + in[i * 2 + 1] == mode->key_r ? 0 : 255; + buffer[3] = 255 * !(mode->key_defined && 256U * in[i * 2] + in[i * 2 + 1] == mode->key_r); } } else { unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/ @@ -1345,7 +1340,7 @@ static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t n for(i = 0; i != numpixels; ++i, buffer += num_channels) { unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth); buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest; - buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255; + buffer[3] = 255 * !(mode->key_defined && value == mode->key_r); } } } else if(mode->colortype == LCT_RGB) { @@ -1357,7 +1352,7 @@ static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t n if(mode->key_defined) { buffer -= numpixels * num_channels; for(i = 0; i != numpixels; ++i, buffer += num_channels) { - if(buffer[0] == mode->key_r && buffer[1]== mode->key_g && buffer[2] == mode->key_b) buffer[3] = 0; + buffer[3] = buffer[3] * !(buffer[0] == mode->key_r && buffer[1]== mode->key_g && buffer[2] == mode->key_b); } } } else { @@ -1365,10 +1360,10 @@ static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t n buffer[0] = in[i * 6]; buffer[1] = in[i * 6 + 2]; buffer[2] = in[i * 6 + 4]; - buffer[3] = mode->key_defined + buffer[3] = 255 * !(mode->key_defined && 256U * in[i * 6] + in[i * 6 + 1] == mode->key_r && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g - && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255; + && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b); } } } else if(mode->colortype == LCT_PALETTE) { @@ -1413,10 +1408,10 @@ static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t n } /*Similar to getPixelColorsRGBA8, but with 3-channel RGB output.*/ -static void getPixelColorsRGB8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels, +static void getPixelColorsRGB8(unsigned char* LODEPNG_RESTRICT buffer, const size_t numpixels, const unsigned char* LODEPNG_RESTRICT in, const LodePNGColorMode* mode) { - const unsigned num_channels = 3; + const unsigned char num_channels = 3; size_t i; if(mode->colortype == LCT_GREY) { if(mode->bitdepth == 8) { @@ -1491,17 +1486,15 @@ static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned s const unsigned char* in, size_t i, const LodePNGColorMode* mode) { if(mode->colortype == LCT_GREY) { *r = *g = *b = 256 * in[i * 2] + in[i * 2 + 1]; - if(mode->key_defined && 256U * in[i * 2] + in[i * 2 + 1] == mode->key_r) *a = 0; - else *a = 65535; + *a = 65535 * !(mode->key_defined && 256U * in[i * 2] + in[i * 2 + 1] == mode->key_r); } else if(mode->colortype == LCT_RGB) { *r = 256u * in[i * 6] + in[i * 6 + 1]; *g = 256u * in[i * 6 + 2] + in[i * 6 + 3]; *b = 256u * in[i * 6 + 4] + in[i * 6 + 5]; - if(mode->key_defined - && 256u * in[i * 6] + in[i * 6 + 1] == mode->key_r - && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g - && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0; - else *a = 65535; + *a = 65535 * !(mode->key_defined + && 256u * in[i * 6] + in[i * 6 + 1] == mode->key_r + && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g + && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b); } else if(mode->colortype == LCT_GREY_ALPHA) { *r = *g = *b = 256u * in[i * 4] + in[i * 4 + 1]; *a = 256u * in[i * 4 + 2] + in[i * 4 + 3]; @@ -1518,7 +1511,7 @@ unsigned lodepng_convert(unsigned char* out, const unsigned char* in, unsigned w, unsigned h) { size_t i; ColorTree tree; - size_t numpixels = (size_t)w * (size_t)h; + const size_t numpixels = (size_t)w * (size_t)h; unsigned error = 0; if(mode_in->colortype == LCT_PALETTE && !mode_in->palette) { @@ -1979,7 +1972,7 @@ are less than 256 colors, ... Updates values of mode with a potentially smaller color model. mode_out should contain the user chosen color model, but will be overwritten with the new chosen one.*/ static unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, - const LodePNGColorStats* stats, size_t numpixels, unsigned div) { + const LodePNGColorStats* stats, const size_t numpixels, unsigned div) { unsigned error = 0; unsigned palettebits, palette_ok, gray_ok; size_t i, n; @@ -3999,7 +3992,7 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, LodePNGState* state, LodePNGPaletteSettings palset) { unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/ size_t datasize = 0; - size_t numpixels = (size_t)w * (size_t)h; + const size_t numpixels = (size_t)w * (size_t)h; ucvector outv = ucvector_init(0, 0); LodePNGInfo info; const LodePNGInfo* info_png = &state->info_png; From 5cd91bc8f0709276fa8a8d4881da1211ce34a161 Mon Sep 17 00:00:00 2001 From: woot000 <97068837+woot000@users.noreply.github.com> Date: Tue, 20 Sep 2022 08:12:46 -0500 Subject: [PATCH 09/10] Replace exit() with return statements, minor edits If multiple PNGs are being processed, returning errors for bad PNG files only would be better than exiting the program entirely --- src/lodepng/lodepng.cpp | 82 +++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/src/lodepng/lodepng.cpp b/src/lodepng/lodepng.cpp index ca62d931..9522af6a 100755 --- a/src/lodepng/lodepng.cpp +++ b/src/lodepng/lodepng.cpp @@ -310,7 +310,7 @@ unsigned lodepng_inflate(unsigned char** out, size_t* outsize, #if defined(_WIN32) || defined(WIN32) #define BUFSIZE 1024 * 128 unsigned char* buf = (unsigned char*)lodepng_malloc(BUFSIZE); - if(!buf) {exit(1);} + if(!buf) return 83; /*alloc fail*/ #else #define BUFSIZE 1024 * 32 unsigned char buf[BUFSIZE]; @@ -319,7 +319,7 @@ unsigned lodepng_inflate(unsigned char** out, size_t* outsize, inf.next_out = buf; inf.avail_out = BUFSIZE; - if(inflateInit2(&inf, -15) != Z_OK) return 83; + if(inflateInit2(&inf, -15) != Z_OK) return 83; /*alloc fail*/ while(1) { int err = inflate(&inf, Z_SYNC_FLUSH); @@ -339,8 +339,8 @@ unsigned lodepng_inflate(unsigned char** out, size_t* outsize, #if defined(_WIN32) || defined(WIN32) free(buf); #endif - unsigned ret = 95; - if(err == Z_MEM_ERROR) ret = 83; + unsigned ret = 95; /*overflow w/ idat chunk size*/ + if(err == Z_MEM_ERROR) ret = 83; /*alloc fail*/ inflateEnd(&inf); return ret; } @@ -349,7 +349,7 @@ unsigned lodepng_inflate(unsigned char** out, size_t* outsize, #if defined(_WIN32) || defined(WIN32) free(buf); #endif - return 83; + return 83; /*alloc fail*/ } #if defined(_WIN32) || defined(WIN32) @@ -1312,10 +1312,10 @@ static void getPixelColorRGBA8(unsigned char* r, unsigned char* g, mode test cases, optimized to convert the colors much faster, when converting to the common case of RGBA with 8 bit per channel. buffer must be RGBA with enough memory.*/ -static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, const size_t numpixels, +static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels, const unsigned char* LODEPNG_RESTRICT in, const LodePNGColorMode* mode) { - const unsigned char num_channels = 4; + unsigned char num_channels = 4; size_t i; if(mode->colortype == LCT_GREY) { if(mode->bitdepth == 8) { @@ -1326,7 +1326,7 @@ static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, const si if(mode->key_defined) { buffer -= numpixels * num_channels; for(i = 0; i != numpixels; ++i, buffer += num_channels) { - buffer[3] = buffer[3] * !(buffer[0] == mode->key_r); + buffer[3] *= !(buffer[0] == mode->key_r); } } } else if(mode->bitdepth == 16) { @@ -1352,7 +1352,7 @@ static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, const si if(mode->key_defined) { buffer -= numpixels * num_channels; for(i = 0; i != numpixels; ++i, buffer += num_channels) { - buffer[3] = buffer[3] * !(buffer[0] == mode->key_r && buffer[1]== mode->key_g && buffer[2] == mode->key_b); + buffer[3] *= !(buffer[0] == mode->key_r && buffer[1]== mode->key_g && buffer[2] == mode->key_b); } } } else { @@ -1408,10 +1408,10 @@ static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, const si } /*Similar to getPixelColorsRGBA8, but with 3-channel RGB output.*/ -static void getPixelColorsRGB8(unsigned char* LODEPNG_RESTRICT buffer, const size_t numpixels, +static void getPixelColorsRGB8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels, const unsigned char* LODEPNG_RESTRICT in, const LodePNGColorMode* mode) { - const unsigned char num_channels = 3; + unsigned char num_channels = 3; size_t i; if(mode->colortype == LCT_GREY) { if(mode->bitdepth == 8) { @@ -1511,7 +1511,7 @@ unsigned lodepng_convert(unsigned char* out, const unsigned char* in, unsigned w, unsigned h) { size_t i; ColorTree tree; - const size_t numpixels = (size_t)w * (size_t)h; + size_t numpixels = (size_t)w * (size_t)h; unsigned error = 0; if(mode_in->colortype == LCT_PALETTE && !mode_in->palette) { @@ -1972,7 +1972,7 @@ are less than 256 colors, ... Updates values of mode with a potentially smaller color model. mode_out should contain the user chosen color model, but will be overwritten with the new chosen one.*/ static unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, - const LodePNGColorStats* stats, const size_t numpixels, unsigned div) { + const LodePNGColorStats* stats, size_t numpixels, unsigned div) { unsigned error = 0; unsigned palettebits, palette_ok, gray_ok; size_t i, n; @@ -3166,7 +3166,7 @@ static void filterScanline(unsigned char* out, const unsigned char* scanline, co case 1: { /*Sub*/ size_t j = 0; for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; - for(i = bytewidth; i != length; ++i, ++j) out[i] = scanline[i] - scanline[j]; + for(i = bytewidth; i < length; ++i, ++j) out[i] = scanline[i] - scanline[j]; break; } case 2: /*Up*/ @@ -3192,13 +3192,13 @@ static void filterScanline(unsigned char* out, const unsigned char* scanline, co if(prevline) { /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/ for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - prevline[i]; - for(i = bytewidth; i != length; ++i, ++j) { + for(i = bytewidth; i < length; ++i, ++j) { out[i] = scanline[i] - paethPredictor(scanline[j], prevline[i], prevline[j]); } } else { for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; - /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/ - for(i = bytewidth; i != length; ++i, ++j) out[i] = scanline[i] - scanline[j]; + /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[j]*/ + for(i = bytewidth; i < length; ++i, ++j) out[i] = scanline[i] - scanline[j]; } break; } @@ -3411,9 +3411,10 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned char* rem = 0; if(clean) { in2 = (unsigned char*)lodepng_malloc(linebytes * h); - if(!in2) exit(1); + if(!in2) return 83; /*alloc fail*/ memcpy(in2, in, linebytes * h); rem = (unsigned char*)lodepng_malloc(linebytes); + if(!rem) { free(in2); return 83; /*alloc fail*/ } } if(strategy == LFS_BRUTE_FORCE || (strategy >= LFS_INCREMENTAL && strategy <= LFS_INCREMENTAL3)) { unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ @@ -3435,8 +3436,12 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, /*brute force filter chooser. deflate the scanline after every filter attempt to see which one deflates best.*/ int err = deflateInit2(&stream, 3, Z_DEFLATED, windowbits(linebytes), 3, Z_FILTERED); - if(err != Z_OK) exit(1); - + if(err != Z_OK) { + if(clean) { free(in2); free(rem); } + for(type = 0; type != 5; ++type) free(attempt[type]); + return 83; /*alloc fail*/ + } + for(y = 0; y != h; ++y) { /*try the 5 filter types*/ memcpy(rem, &in2[y * linebytes], linebytes * clean); for(type = 0; type != 5; ++type) { @@ -3445,20 +3450,20 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, filterScanline2(&in2[y * linebytes], prevline, linebytes, type); filterScanline(attempt[type], &in2[y * linebytes], prevline, linebytes, bytewidth, type); } else filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - + if(settings->filter_style < 2 || 1) { deflateTune(&stream, 258, 258, 258, 550 + (settings->filter_style) * 100); stream.next_in = (z_const unsigned char*)attempt[type]; stream.avail_in = linebytes; stream.avail_out = UINT_MAX; stream.next_out = (unsigned char*)1; - + deflate_nooutput(&stream, Z_FINISH); - + size = stream.total_out; deflateReset(&stream); } else size = ZopfliLZ77LazyLauncher(attempt[type], 0, linebytes, settings->filter_style); - + /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/ if(type == 0 || size < smallest) { bestType = (unsigned char)type; @@ -3481,11 +3486,15 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, z_stream teststream; size_t testsize = linebytes + 1; int err = deflateInit2(&stream, strategy == LFS_INCREMENTAL3 ? 1 : 2, Z_DEFLATED, windowbits(testsize * h), 8, Z_FILTERED); - if(err != Z_OK) exit(1); + if(err != Z_OK) { + if(clean) { free(in2); free(rem); } + for(type = 0; type != 5; ++type) free(attempt[type]); + return 83; /*alloc fail*/ + } if(strategy == LFS_INCREMENTAL) deflateTune(&stream, 16, 258, 258, 200); else if(strategy == LFS_INCREMENTAL2) deflateTune(&stream, 50, 258, 258, 1100); deflateCopy(&teststream, &stream, 1); - + unsigned char* dummy = (unsigned char*)1; /*Not used, but must not be NULL*/ unsigned char* prevline2 = 0; unsigned char* prevlinebuf = 0; @@ -3494,7 +3503,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, prevlinebuf = (unsigned char*)lodepng_malloc(linebytes); linebuf = (unsigned char*)lodepng_malloc(linebytes); } - + for(y = 0; y != h; ++y) { /*try the 5 filter types*/ for(type = 4; type + 1 != 0; --type) { /*type 0 is most likely, so end with that to reduce copying*/ size_t size = 0; @@ -3506,23 +3515,23 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, /*copy result to output buffer temporarily to include compression test*/ out[y * (linebytes + 1)] = type; /*the first byte of a scanline will be the filter type*/ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[type][x]; - + deflateCopy(&teststream, &stream, 0); teststream.next_in = (z_const unsigned char*)(out + y * testsize); teststream.avail_in = testsize; teststream.avail_out = UINT_MAX; teststream.next_out = dummy; deflate_nooutput(&teststream, Z_FINISH); - + size = teststream.total_out; - + /*check if this is smallest size (or if type == 4 it's the first case so always store the values)*/ if(type == 4 || size < smallest) { bestType = (unsigned char)type; smallest = size; } } - + if(clean) { memcpy(linebuf, &in[y * linebytes], linebytes); filterScanline2(linebuf, prevline2, linebytes, bestType); @@ -3531,13 +3540,13 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, /*copy result to output buffer temporarily to include compression test*/ out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - + stream.next_in = (z_const unsigned char*)(out + y * testsize); stream.avail_in = testsize; stream.avail_out = UINT_MAX; stream.next_out = dummy; deflate_nooutput(&stream, Z_NO_FLUSH); - + prevline = &in[y * linebytes]; if(clean) { memcpy(linebuf, &in[y * linebytes], linebytes); @@ -3666,7 +3675,10 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, stream.opaque = 0; #define TUNE deflateTune(&stream, 16, 258, 258, 200); int err = deflateInit2(&stream, 3, Z_DEFLATED, windowbits(h * (linebytes + 1)), 8, Z_FILTERED); - if(err != Z_OK) exit(1); + if(err != Z_OK) { + if(clean) { free(in2); free(rem); } + return 83; /*alloc fail*/ + } unsigned char* dummy = (unsigned char*)1; size_t popcnt; uint64_t r2[2]; @@ -3992,7 +4004,7 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, LodePNGState* state, LodePNGPaletteSettings palset) { unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/ size_t datasize = 0; - const size_t numpixels = (size_t)w * (size_t)h; + size_t numpixels = (size_t)w * (size_t)h; ucvector outv = ucvector_init(0, 0); LodePNGInfo info; const LodePNGInfo* info_png = &state->info_png; From 672aa1c2af24bf3a92c4baec8c97cfe2a82393aa Mon Sep 17 00:00:00 2001 From: woot000 <97068837+woot000@users.noreply.github.com> Date: Fri, 23 Sep 2022 20:51:12 -0500 Subject: [PATCH 10/10] Minor optimizations to encoding, formatting ~0.25s faster PNG encoding using Clang 15.0.0 --- src/lodepng/lodepng.cpp | 384 ++++++++++++++++++++-------------------- 1 file changed, 194 insertions(+), 190 deletions(-) diff --git a/src/lodepng/lodepng.cpp b/src/lodepng/lodepng.cpp index 9522af6a..74511f11 100755 --- a/src/lodepng/lodepng.cpp +++ b/src/lodepng/lodepng.cpp @@ -1069,39 +1069,38 @@ static void color_tree_cleanup(ColorTree* tree) { /*returns -1 if color not present, its index otherwise*/ static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) { static const unsigned colortable[256] = { - 0U, - 1U,16U,17U,256U,257U,272U,273U,4096U, - 4097U,4112U,4113U,4352U,4353U,4368U,4369U,65536U, - 65537U,65552U,65553U,65792U,65793U,65808U,65809U,69632U, - 69633U,69648U,69649U,69888U,69889U,69904U,69905U,1048576U, - 1048577U,1048592U,1048593U,1048832U,1048833U,1048848U,1048849U,1052672U, - 1052673U,1052688U,1052689U,1052928U,1052929U,1052944U,1052945U,1114112U, - 1114113U,1114128U,1114129U,1114368U,1114369U,1114384U,1114385U,1118208U, - 1118209U,1118224U,1118225U,1118464U,1118465U,1118480U,1118481U,16777216U, - 16777217U,16777232U,16777233U,16777472U,16777473U,16777488U,16777489U,16781312U, - 16781313U,16781328U,16781329U,16781568U,16781569U,16781584U,16781585U,16842752U, - 16842753U,16842768U,16842769U,16843008U,16843009U,16843024U,16843025U,16846848U, - 16846849U,16846864U,16846865U,16847104U,16847105U,16847120U,16847121U,17825792U, - 17825793U,17825808U,17825809U,17826048U,17826049U,17826064U,17826065U,17829888U, - 17829889U,17829904U,17829905U,17830144U,17830145U,17830160U,17830161U,17891328U, - 17891329U,17891344U,17891345U,17891584U,17891585U,17891600U,17891601U,17895424U, - 17895425U,17895440U,17895441U,17895680U,17895681U,17895696U,17895697U,268435456U, - 268435457U,268435472U,268435473U,268435712U,268435713U,268435728U,268435729U,268439552U, - 268439553U,268439568U,268439569U,268439808U,268439809U,268439824U,268439825U,268500992U, - 268500993U,268501008U,268501009U,268501248U,268501249U,268501264U,268501265U,268505088U, - 268505089U,268505104U,268505105U,268505344U,268505345U,268505360U,268505361U,269484032U, - 269484033U,269484048U,269484049U,269484288U,269484289U,269484304U,269484305U,269488128U, - 269488129U,269488144U,269488145U,269488384U,269488385U,269488400U,269488401U,269549568U, - 269549569U,269549584U,269549585U,269549824U,269549825U,269549840U,269549841U,269553664U, - 269553665U,269553680U,269553681U,269553920U,269553921U,269553936U,269553937U,285212672U, - 285212673U,285212688U,285212689U,285212928U,285212929U,285212944U,285212945U,285216768U, - 285216769U,285216784U,285216785U,285217024U,285217025U,285217040U,285217041U,285278208U, - 285278209U,285278224U,285278225U,285278464U,285278465U,285278480U,285278481U,285282304U, - 285282305U,285282320U,285282321U,285282560U,285282561U,285282576U,285282577U,286261248U, - 286261249U,286261264U,286261265U,286261504U,286261505U,286261520U,286261521U,286265344U, - 286265345U,286265360U,286265361U,286265600U,286265601U,286265616U,286265617U,286326784U, - 286326785U,286326800U,286326801U,286327040U,286327041U,286327056U,286327057U,286330880U, - 286330881U,286330896U,286330897U,286331136U,286331137U,286331152U,286331153U + 0U, 1U, 16U, 17U, 256U, 257U, 272U, 273U, + 4096U, 4097U, 4112U, 4113U, 4352U, 4353U, 4368U, 4369U, + 65536U, 65537U, 65552U, 65553U, 65792U, 65793U, 65808U, 65809U, + 69632U, 69633U, 69648U, 69649U, 69888U, 69889U, 69904U, 69905U, + 1048576U, 1048577U, 1048592U, 1048593U, 1048832U, 1048833U, 1048848U, 1048849U, + 1052672U, 1052673U, 1052688U, 1052689U, 1052928U, 1052929U, 1052944U, 1052945U, + 1114112U, 1114113U, 1114128U, 1114129U, 1114368U, 1114369U, 1114384U, 1114385U, + 1118208U, 1118209U, 1118224U, 1118225U, 1118464U, 1118465U, 1118480U, 1118481U, + 16777216U, 16777217U, 16777232U, 16777233U, 16777472U, 16777473U, 16777488U, 16777489U, + 16781312U, 16781313U, 16781328U, 16781329U, 16781568U, 16781569U, 16781584U, 16781585U, + 16842752U, 16842753U, 16842768U, 16842769U, 16843008U, 16843009U, 16843024U, 16843025U, + 16846848U, 16846849U, 16846864U, 16846865U, 16847104U, 16847105U, 16847120U, 16847121U, + 17825792U, 17825793U, 17825808U, 17825809U, 17826048U, 17826049U, 17826064U, 17826065U, + 17829888U, 17829889U, 17829904U, 17829905U, 17830144U, 17830145U, 17830160U, 17830161U, + 17891328U, 17891329U, 17891344U, 17891345U, 17891584U, 17891585U, 17891600U, 17891601U, + 17895424U, 17895425U, 17895440U, 17895441U, 17895680U, 17895681U, 17895696U, 17895697U, + 268435456U,268435457U,268435472U,268435473U,268435712U,268435713U,268435728U,268435729U, + 268439552U,268439553U,268439568U,268439569U,268439808U,268439809U,268439824U,268439825U, + 268500992U,268500993U,268501008U,268501009U,268501248U,268501249U,268501264U,268501265U, + 268505088U,268505089U,268505104U,268505105U,268505344U,268505345U,268505360U,268505361U, + 269484032U,269484033U,269484048U,269484049U,269484288U,269484289U,269484304U,269484305U, + 269488128U,269488129U,269488144U,269488145U,269488384U,269488385U,269488400U,269488401U, + 269549568U,269549569U,269549584U,269549585U,269549824U,269549825U,269549840U,269549841U, + 269553664U,269553665U,269553680U,269553681U,269553920U,269553921U,269553936U,269553937U, + 285212672U,285212673U,285212688U,285212689U,285212928U,285212929U,285212944U,285212945U, + 285216768U,285216769U,285216784U,285216785U,285217024U,285217025U,285217040U,285217041U, + 285278208U,285278209U,285278224U,285278225U,285278464U,285278465U,285278480U,285278481U, + 285282304U,285282305U,285282320U,285282321U,285282560U,285282561U,285282576U,285282577U, + 286261248U,286261249U,286261264U,286261265U,286261504U,286261505U,286261520U,286261521U, + 286265344U,286265345U,286265360U,286265361U,286265600U,286265601U,286265616U,286265617U, + 286326784U,286326785U,286326800U,286326801U,286327040U,286327041U,286327056U,286327057U, + 286330880U,286330881U,286330896U,286330897U,286331136U,286331137U,286331152U,286331153U }; unsigned x = 0; @@ -1119,7 +1118,7 @@ static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, uns return tree ? tree->index : -1; } -static bool color_tree_inc(ColorTree* tree, +static unsigned char color_tree_inc(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) { int bit; for(bit = 0; bit < 8; ++bit) { @@ -1595,8 +1594,8 @@ void lodepng_color_stats_init(LodePNGColorStats* stats) { static unsigned getValueRequiredBits(unsigned char value) { if(value == 0 || value == 255) return 1; /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/ - if(value % 17 == 0) return value % 85 == 0 ? 2 : 4; - return 8; + else if(value % 17 == 0) return value % 85 == 0 ? 2 : 4; + else return 8; } /*stats must already have been inited. */ @@ -1606,13 +1605,13 @@ void lodepng_compute_color_stats(LodePNGColorStats* stats, size_t i; /* mark things as done already if it would be impossible to have a more expensive case */ - unsigned colored_done = lodepng_is_greyscale_type(mode_in) ? 1 : 0; - unsigned alpha_done = lodepng_can_have_alpha(mode_in) ? 0 : 1; - unsigned numcolors_done = 0; + unsigned char colored_done = lodepng_is_greyscale_type(mode_in) ? 1 : 0; + unsigned char alpha_done = lodepng_can_have_alpha(mode_in) ? 0 : 1; + unsigned char numcolors_done = 0; unsigned bpp = lodepng_get_bpp(mode_in); - unsigned bits_done = bpp == 1 ? 1 : 0; - unsigned sixteen = 0; /* whether the input image is 16 bit */ - unsigned maxnumcolors = 257; + unsigned char bits_done = bpp == 1 ? 1 : 0; + unsigned char sixteen = 0; /* whether the input image is 16 bit */ + unsigned short maxnumcolors = 257; if(bpp <= 8) maxnumcolors = bpp == 1 ? 2 : (bpp == 2 ? 4 : (bpp == 4 ? 16 : 256)); /*Check if the 16-bit input is truly 16-bit*/ @@ -1643,7 +1642,7 @@ void lodepng_compute_color_stats(LodePNGColorStats* stats, } if(!alpha_done) { - unsigned matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b); + unsigned char matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b); if(a != 65535 && (a != 0 || (stats->key && !matchkey))) { stats->alpha = 1; stats->key = 0; @@ -1697,7 +1696,7 @@ void lodepng_compute_color_stats(LodePNGColorStats* stats, } if(!alpha_done) { - unsigned matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b); + unsigned char matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b); if(a != 255 && (a != 0 || (stats->key && !matchkey))) { stats->alpha = 1; stats->key = 0; @@ -1769,7 +1768,7 @@ static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, color_tree_init(&tree); for(i = 0; i != (size_t)w * (size_t)h; ++i) { const unsigned char* c = (unsigned char*)&image[i]; - count = count + !color_tree_inc(&tree, c[0], c[1], c[2], c[3]); + count += !color_tree_inc(&tree, c[0], c[1], c[2], c[3]); } if(count == 0) return; //Silence clang static analyzer warnings @@ -1787,14 +1786,14 @@ static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, if(priority == LPPS_POPULARITY) sortfield[i] |= (color_tree_get(&tree, c[0], c[1], c[2], c[3]) + 1) << 8; else if(priority == LPPS_RGB) sortfield[i] |= uint64_t(c[0]) << 32 | uint64_t(c[1]) << 24 | uint64_t(c[2]) << 16; else { /*LPPS_YUV, LPPS_LAB, LPPS_MSB*/ - const uint64_t r = c[0]; - const uint64_t g = c[1]; - const uint64_t b = c[2]; + const unsigned char r = c[0]; + const unsigned char g = c[1]; + const unsigned char b = c[2]; if(priority == LPPS_YUV) { sortfield[i] |= - uint64_t(0.299 * (double)r + 0.587 * (double)g + 0.114 * (double)b) << 32 - | uint64_t((-0.14713 * (double)r - 0.28886 * (double)g + 0.436 * (double)b + 111.18) / 0.872) << 24 - | uint64_t((0.615 * (double)r - 0.51499 * (double)g - 0.10001 * (double)b + 156.825) / 1.23) << 16; + uint64_t( 0.299 * (double)r + 0.587 * (double)g + 0.114 * (double)b ) << 32 + | uint64_t((-0.14713 * (double)r - 0.28886 * (double)g + 0.436 * (double)b + 111.18) / 0.872) << 24 + | uint64_t(( 0.615 * (double)r - 0.51499 * (double)g - 0.10001 * (double)b + 156.825) / 1.23 ) << 16; } else if(priority == LPPS_LAB) { double vx = (0.4124564 * (double)r + 0.3575761 * (double)g + 0.1804375 * (double)b) / 255 / 95.047; double vy = (0.2126729 * (double)r + 0.7151522 * (double)g + 0.0721750 * (double)b) / 255 / 100; @@ -1812,14 +1811,14 @@ static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, | uint64_t((vy - vz) * 200 + 256) << 16; } else { /*LPPS_MSB*/ sortfield[i] |= - (r & 128) << 39 | (g & 128) << 38 | (b & 128) << 37 - | (r & 64) << 35 | (g & 64) << 34 | (b & 64) << 33 - | (r & 32) << 31 | (g & 32) << 30 | (b & 32) << 29 - | (r & 16) << 27 | (g & 16) << 26 | (b & 16) << 25 - | (r & 8) << 23 | (g & 8) << 22 | (b & 8) << 21 - | (r & 4) << 19 | (g & 4) << 18 | (b & 4) << 17 - | (r & 2) << 15 | (g & 2) << 14 | (b & 2) << 13 - | (r & 1) << 11 | (g & 1) << 10 | (b & 1) << 9; + ((uint64_t)r & 128) << 39 | ((uint64_t)g & 128) << 38 | ((uint64_t)b & 128) << 37 + | ((uint64_t)r & 64) << 35 | ((uint64_t)g & 64) << 34 | ((uint64_t)b & 64) << 33 + | ((uint64_t)r & 32) << 31 | ((uint64_t)g & 32) << 30 | ((uint64_t)b & 32) << 29 + | ((uint64_t)r & 16) << 27 | ((uint64_t)g & 16) << 26 | ((uint64_t)b & 16) << 25 + | ((uint64_t)r & 8) << 23 | ((uint64_t)g & 8) << 22 | ((uint64_t)b & 8) << 21 + | ((uint64_t)r & 4) << 19 | ((uint64_t)g & 4) << 18 | ((uint64_t)b & 4) << 17 + | ((uint64_t)r & 2) << 15 | ((uint64_t)g & 2) << 14 | ((uint64_t)b & 2) << 13 + | ((uint64_t)r & 1) << 11 | ((uint64_t)g & 1) << 10 | ((uint64_t)b & 1) << 9; } } } @@ -1966,19 +1965,24 @@ static void optimize_palette(LodePNGColorMode* mode_out, const uint32_t* image, color_tree_cleanup(&tree); } -/*Automatically chooses color type that gives smallest amount of bits in the -output image, e.g. grey if there are only grayscale pixels, palette if there -are less than 256 colors, ... -Updates values of mode with a potentially smaller color model. mode_out should -contain the user chosen color model, but will be overwritten with the new chosen one.*/ -static unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, - const LodePNGColorStats* stats, size_t numpixels, unsigned div) { +/*Computes a minimal PNG color model that can contain all colors as indicated by the stats. +The stats should be computed with lodepng_compute_color_stats. +mode_in is raw color profile of the image the stats were computed on, to copy palette order from when relevant. +Minimal PNG color model means the color type and bit depth that gives smallest amount of bits in the output image, +e.g. gray if only grayscale pixels, palette if less than 256 colors, color key if only single transparent color, ... +This is used if auto_convert is enabled (it is by default). +*/ +static unsigned auto_choose_color(LodePNGColorMode* mode_out, + const LodePNGColorMode* mode_in, + const LodePNGColorStats* stats, size_t numpixels, unsigned div) { unsigned error = 0; - unsigned palettebits, palette_ok, gray_ok; + unsigned palettebits; size_t i, n; - unsigned alpha = stats->alpha; - unsigned key = stats->key; + unsigned char palette_ok, gray_ok; + + unsigned char alpha = stats->alpha; + unsigned char key = stats->key; unsigned bits = stats->bits; mode_out->key_defined = 0; @@ -3216,11 +3220,11 @@ static void filterScanline2(unsigned char* scanline, const unsigned char* prevli } } /*else if(filterType == 1) { - if (!scanline[3]) { + if(!scanline[3]) { *(unsigned*)scanline = 0; } for(int i = 4; i < length; i+=4) { - if (!scanline[i + 3]) { + if(!scanline[i + 3]) { scanline[i] = scanline[i - 4]; scanline[i + 1] = scanline[i - 3]; scanline[i + 2] = scanline[i - 2]; @@ -3344,7 +3348,7 @@ static char windowbits(unsigned long len) { result++; if(result < 9) return -9; - else if (result > 15) return -15; + else if(result > 15) return -15; else return -(char)result; } @@ -3406,7 +3410,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, prevline = &in[inindex]; } } else { - const bool clean = settings->clean_alpha && color->colortype == LCT_RGBA && color->bitdepth == 8 && !color->key_defined; + const unsigned char clean = settings->clean_alpha && color->colortype == LCT_RGBA && color->bitdepth == 8 && !color->key_defined; unsigned char* in2 = 0; unsigned char* rem = 0; if(clean) { @@ -3484,8 +3488,8 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, Now implemented with streaming, which reduces complexity to O(n) This is slow.*/ z_stream teststream; - size_t testsize = linebytes + 1; - int err = deflateInit2(&stream, strategy == LFS_INCREMENTAL3 ? 1 : 2, Z_DEFLATED, windowbits(testsize * h), 8, Z_FILTERED); + size_t filterbytes = linebytes + 1; + int err = deflateInit2(&stream, strategy == LFS_INCREMENTAL3 ? 1 : 2, Z_DEFLATED, windowbits(filterbytes * h), 8, Z_FILTERED); if(err != Z_OK) { if(clean) { free(in2); free(rem); } for(type = 0; type != 5; ++type) free(attempt[type]); @@ -3517,8 +3521,8 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[type][x]; deflateCopy(&teststream, &stream, 0); - teststream.next_in = (z_const unsigned char*)(out + y * testsize); - teststream.avail_in = testsize; + teststream.next_in = (z_const unsigned char*)(out + y * filterbytes); + teststream.avail_in = filterbytes; teststream.avail_out = UINT_MAX; teststream.next_out = dummy; deflate_nooutput(&teststream, Z_FINISH); @@ -3541,8 +3545,8 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - stream.next_in = (z_const unsigned char*)(out + y * testsize); - stream.avail_in = testsize; + stream.next_in = (z_const unsigned char*)(out + y * filterbytes); + stream.avail_in = filterbytes; stream.avail_out = UINT_MAX; stream.next_out = dummy; deflate_nooutput(&stream, Z_NO_FLUSH); @@ -3648,6 +3652,16 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, } signaled.store(-settings->quiet); } + z_stream stream; + stream.zalloc = 0; + stream.zfree = 0; + stream.opaque = 0; + int err = deflateInit2(&stream, 3, Z_DEFLATED, windowbits(h * (linebytes + 1)), 8, Z_FILTERED); + if(err != Z_OK) { + if(clean) { free(in2); free(rem); } + return 83; /*alloc fail*/ + } + unsigned char* prevlinebuf = 0; unsigned char* linebuf; if(clean) { @@ -3655,43 +3669,28 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, linebuf = (unsigned char*)lodepng_malloc(linebytes); } - uint64_t r[2]; - initRandomUInt64(r); - - const int Strategies = strategy == LFS_ALL_CHEAP ? 3 : 0; /*Genetic algorithm filter finder. Attempts to find better filters through mutation and recombination.*/ - const size_t population_size = strategy == LFS_ALL_CHEAP ? Strategies : 19; - const size_t last = population_size - 1; + const unsigned char population_size = strategy == LFS_GENETIC ? 19 : 3; unsigned char* population = (unsigned char*)lodepng_malloc(h * population_size); size_t* size = (size_t*)lodepng_malloc(population_size * sizeof(size_t)); unsigned* ranking = (unsigned*)lodepng_malloc(population_size * sizeof(int)); - unsigned e, i, g, type; - unsigned best_size = UINT_MAX; - unsigned total_size = 0; - - z_stream stream; - stream.zalloc = 0; - stream.zfree = 0; - stream.opaque = 0; -#define TUNE deflateTune(&stream, 16, 258, 258, 200); - int err = deflateInit2(&stream, 3, Z_DEFLATED, windowbits(h * (linebytes + 1)), 8, Z_FILTERED); - if(err != Z_OK) { - if(clean) { free(in2); free(rem); } - return 83; /*alloc fail*/ - } + unsigned i, g; + unsigned best_size = UINT_MAX, total_size = 0; + unsigned char type; unsigned char* dummy = (unsigned char*)1; size_t popcnt; - uint64_t r2[2]; - initRandomUInt64(r2); - signal(SIGINT, sig_handler); - for(popcnt = 0; popcnt < h * (population_size - Strategies); ++popcnt) population[popcnt] = randomUInt64(r2) % 5; + if(strategy == LFS_GENETIC) { /*fill population with random values from 0-4*/ + uint64_t r[2]; + initRandomUInt64(r); + for(popcnt = 0; popcnt < h * population_size; ++popcnt) population[popcnt] = randomUInt64(r) % 5; + } - for(g = 0; g <= last; ++g) { + for(g = 0; g != population_size; ++g) { if(strategy == LFS_ALL_CHEAP) { settings->filter_strategy = (LodePNGFilterStrategy)(g + 11); filter(out, in, w, h, color, settings); settings->filter_strategy = LFS_ALL_CHEAP; - for(size_t k = 0; k < h * (linebytes + 1); k += (linebytes + 1)) population[popcnt++] = out[k]; + for(size_t k = 0; k != h * (linebytes + 1); k += (linebytes + 1)) population[popcnt++] = out[k]; } prevline = 0; for(y = 0; y < h; ++y) { @@ -3708,7 +3707,7 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, prevline = &in[y * linebytes]; } } - TUNE + deflateTune(&stream, 16, 258, 258, 200); stream.next_in = (z_const unsigned char*)out; stream.avail_in = h * (linebytes + 1); stream.avail_out = UINT_MAX; @@ -3721,99 +3720,104 @@ static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, total_size += size[g]; ranking[g] = g; } - for(i = 0; strategy == LFS_ALL_CHEAP && i < population_size; i++) { - if(size[i] < best_size) { - ranking[0] = i; - best_size = size[i]; - } - } - unsigned e_since_best = 0; - /*ctrl-c signals last iteration*/ - for(e = 0; strategy == LFS_GENETIC && e_since_best < 500 && signaled.load() <= 0; ++e) { - /*resort rankings*/ - unsigned c, j, t; - for(i = 1; i < population_size; ++i) { - t = ranking[i]; - for(j = i - 1; j + 1 > 0 && size[ranking[j]] > size[t]; --j) ranking[j + 1] = ranking[j]; - ranking[j + 1] = t; + if(strategy == LFS_ALL_CHEAP) { + for(i = 0; i != population_size; i++) { + if(size[i] < best_size) { + ranking[0] = i; + best_size = size[i]; + } } - if(size[ranking[0]] < best_size) { - best_size = size[ranking[0]]; - e_since_best = 0; - if(!settings->quiet) { - printf("Generation %d: %d bytes\n", e, best_size); - fflush(stdout); + } else { /*LFS_GENETIC, ctrl-c signals last iteration*/ + signal(SIGINT, sig_handler); + uint64_t r[2]; + initRandomUInt64(r); + unsigned e, e_since_best = 0; + for(e = 0; e_since_best != 500 && signaled.load() <= 0; ++e) { + /*resort rankings*/ + unsigned char c, t; + unsigned j; + for(i = 1; i != population_size; ++i) { + t = ranking[i]; + for(j = i - 1; j + 1 > 0 && size[ranking[j]] > size[t]; --j) ranking[j + 1] = ranking[j]; + ranking[j + 1] = t; } - } else ++e_since_best; - /*generate offspring*/ - for(c = 0; c < 3; ++c) { - /*tournament selection*/ - /*parent 1*/ - unsigned selection_size = UINT_MAX; - for(t = 0; t < 2; ++t) selection_size = std::min(unsigned(randomDecimal(r) * total_size), selection_size); - unsigned size_sum = 0; - for(j = 0; size_sum <= selection_size; ++j) size_sum += size[ranking[j]]; - unsigned char* parent1 = &population[ranking[j - 1] * h]; - /*parent 2*/ - selection_size = UINT_MAX; - for(t = 0; t < 2; ++t) selection_size = std::min(unsigned(randomDecimal(r) * total_size), selection_size); - size_sum = 0; - for(j = 0; size_sum <= selection_size; ++j) size_sum += size[ranking[j]]; - unsigned char* parent2 = &population[ranking[j - 1] * h]; - /*two-point crossover*/ - unsigned char* child = &population[(ranking[last - c]) * h]; - if(randomDecimal(r) < 0.9) { - unsigned crossover1 = randomUInt64(r) % h; - unsigned crossover2 = randomUInt64(r) % h; - if(crossover1 > crossover2) { - crossover1 ^= crossover2; - crossover2 ^= crossover1; - crossover1 ^= crossover2; + if(size[ranking[0]] < best_size) { + best_size = size[ranking[0]]; + e_since_best = 0; + if(!settings->quiet) { + printf("Generation %d: %d bytes\n", e, best_size); + fflush(stdout); } - if(child != parent1) { - memcpy(child, parent1, crossover1); - memcpy(&child[crossover2], &parent1[crossover2], h - crossover2); + } else ++e_since_best; + /*generate offspring*/ + for(c = 0; c != 3; ++c) { + /*tournament selection*/ + /*parent 1*/ + unsigned selection_size = UINT_MAX; + for(t = 0; t != 2; ++t) selection_size = std::min(unsigned(randomDecimal(r) * total_size), selection_size); + unsigned size_sum = 0; + for(j = 0; size_sum <= selection_size; ++j) size_sum += size[ranking[j]]; + unsigned char* parent1 = &population[ranking[j - 1] * h]; + /*parent 2*/ + selection_size = UINT_MAX; + for(t = 0; t != 2; ++t) selection_size = std::min(unsigned(randomDecimal(r) * total_size), selection_size); + size_sum = 0; + for(j = 0; size_sum <= selection_size; ++j) size_sum += size[ranking[j]]; + unsigned char* parent2 = &population[ranking[j - 1] * h]; + /*two-point crossover*/ + unsigned char* child = &population[(ranking[(population_size - 1) - c]) * h]; + if(randomDecimal(r) < 0.9) { + unsigned crossover1 = randomUInt64(r) % h; + unsigned crossover2 = randomUInt64(r) % h; + if(crossover1 > crossover2) { + crossover1 ^= crossover2; + crossover2 ^= crossover1; + crossover1 ^= crossover2; + } + if(child != parent1) { + memcpy(child, parent1, crossover1); + memcpy(&child[crossover2], &parent1[crossover2], h - crossover2); + } + if(child != parent2) memcpy(&child[crossover1], &parent2[crossover1], crossover2 - crossover1); } - if(child != parent2) memcpy(&child[crossover1], &parent2[crossover1], crossover2 - crossover1); - } - else if(randomUInt64(r) & 1) memcpy(child, parent1, h); - else memcpy(child, parent2, h); - /*mutation*/ - for(y = 0; y < h; ++y) if(randomDecimal(r) < 0.01) child[y] = randomUInt64(r) % 5; - /*evaluate new genome*/ - total_size -= size[ranking[last - c]]; - prevline = 0; - for(y = 0; y < h; ++y) { - type = child[y]; - out[y * (linebytes + 1)] = type; - if(clean) { - memcpy(linebuf, &in[y * linebytes], linebytes); - filterScanline2(linebuf, prevline, linebytes, type); - filterScanline(&out[y * (linebytes + 1) + 1], linebuf, prevline, linebytes, bytewidth, type); - memcpy(prevlinebuf, linebuf, linebytes); - prevline = prevlinebuf; - } else { - filterScanline(&out[y * (linebytes + 1) + 1], &in[y * linebytes], prevline, linebytes, bytewidth, type); - prevline = &in[y * linebytes]; + else if(randomUInt64(r) & 1) memcpy(child, parent1, h); + else memcpy(child, parent2, h); + /*mutation*/ + for(y = 0; y != h; ++y) if(randomDecimal(r) < 0.01) child[y] = randomUInt64(r) % 5; + /*evaluate new genome*/ + total_size -= size[ranking[(population_size - 1) - c]]; + prevline = 0; + for(y = 0; y != h; ++y) { + type = child[y]; + out[y * (linebytes + 1)] = type; + if(clean) { + memcpy(linebuf, &in[y * linebytes], linebytes); + filterScanline2(linebuf, prevline, linebytes, type); + filterScanline(&out[y * (linebytes + 1) + 1], linebuf, prevline, linebytes, bytewidth, type); + memcpy(prevlinebuf, linebuf, linebytes); + prevline = prevlinebuf; + } else { + filterScanline(&out[y * (linebytes + 1) + 1], &in[y * linebytes], prevline, linebytes, bytewidth, type); + prevline = &in[y * linebytes]; + } } + deflateTune(&stream, 16, 258, 258, 200); + stream.next_in = (z_const unsigned char*)out; + stream.avail_in = h * (linebytes + 1); + stream.avail_out = UINT_MAX; + stream.next_out = dummy; + + deflate_nooutput(&stream, Z_FINISH); + + size[ranking[(population_size - 1) - c]] = stream.total_out; + deflateReset(&stream); + total_size += size[ranking[(population_size - 1) - c]]; } - TUNE - - stream.next_in = (z_const unsigned char*)out; - stream.avail_in = h * (linebytes + 1); - stream.avail_out = UINT_MAX; - stream.next_out = dummy; - - deflate_nooutput(&stream, Z_FINISH); - - size[ranking[last - c]] = stream.total_out; - deflateReset(&stream); - total_size += size[ranking[last - c]]; } } /*final choice*/ prevline = 0; - for(y = 0; y < h; ++y) { + for(y = 0; y != h; ++y) { type = population[ranking[0] * h + y]; out[y * (linebytes + 1)] = type; if(clean) { @@ -4048,7 +4052,7 @@ static unsigned lodepng_encode(unsigned char** out, size_t* outsize, && ((w > 225 && h > 225) || numpixels > 75000 || (w > 250 && numpixels > 40000)); } - state->error = lodepng_auto_choose_color(&info.color, &state->info_raw, &stats, numpixels, state->div); + state->error = auto_choose_color(&info.color, &state->info_raw, &stats, numpixels, state->div); if(state->error) goto cleanup; if(info.color.colortype == LCT_PALETTE && palset.order != LPOS_NONE) { if(palset._first & 1) {