1
0
Fork 0

Merge pull request #41250 from akien-mga/2.1-cherrypicks

[2.1] Third-party library updates and Travis fix
This commit is contained in:
Rémi Verschelde 2020-08-14 14:58:17 +02:00 committed by GitHub
commit a7cdffc39b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
66 changed files with 1108 additions and 889 deletions

View File

@ -36,7 +36,7 @@ matrix:
os: osx os: osx
osx_image: xcode9.3 osx_image: xcode9.3
compiler: clang compiler: clang
- env: GODOT_TARGET=server TOOLS=no CACHE_NAME=${GODOT_TARGET}-clang" - env: GODOT_TARGET=server TOOLS=no CACHE_NAME=${GODOT_TARGET}-clang
os: linux os: linux
compiler: clang compiler: clang

View File

@ -222,7 +222,7 @@ License: curl
Files: ./thirdparty/misc/fastlz.c Files: ./thirdparty/misc/fastlz.c
./thirdparty/misc/fastlz.h ./thirdparty/misc/fastlz.h
Comment: FastLZ Comment: FastLZ
Copyright: 2005-2007, Ariya Hidayat Copyright: 2005-2020, Ariya Hidayat
License: Expat License: Expat
Files: ./thirdparty/misc/hq2x.cpp Files: ./thirdparty/misc/hq2x.cpp

View File

@ -154,7 +154,7 @@ Files extracted from upstream source:
## libwebp ## libwebp
- Upstream: https://chromium.googlesource.com/webm/libwebp/ - Upstream: https://chromium.googlesource.com/webm/libwebp/
- Version: 1.0.2 - Version: 1.1.0
- License: BSD-3-Clause - License: BSD-3-Clause
Files extracted from upstream source: Files extracted from upstream source:
@ -197,8 +197,8 @@ Collection of single-file libraries used in Godot components.
* Version: latest, as of April 2017 * Version: latest, as of April 2017
* License: Public Domain * License: Public Domain
- `fastlz.{c,h}` - `fastlz.{c,h}`
* Upstream: https://code.google.com/archive/p/fastlz * Upstream: https://github.com/ariya/FastLZ
* Version: svn (r12) * Version: 0.5.0 (4f20f54d46f5a6dd4fae4def134933369b7602d2, 2020)
* License: MIT * License: MIT
- `hq2x.{cpp,h}` - `hq2x.{cpp,h}`
* Upstream: https://github.com/brunexgeek/hqx * Upstream: https://github.com/brunexgeek/hqx
@ -248,7 +248,7 @@ Collection of single-file libraries used in Godot components.
## openssl ## openssl
- Upstream: https://www.openssl.org - Upstream: https://www.openssl.org
- Version: 1.0.2t - Version: 1.0.2u
- License: OpenSSL license / BSD-like - License: OpenSSL license / BSD-like
Files extracted from the upstream source: Files extracted from the upstream source:

View File

@ -732,7 +732,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
mem += f_info_size; mem += f_info_size;
dec->thread_ctx_.id_ = 0; dec->thread_ctx_.id_ = 0;
dec->thread_ctx_.f_info_ = dec->f_info_; dec->thread_ctx_.f_info_ = dec->f_info_;
if (dec->mt_method_ > 0) { if (dec->filter_type_ > 0 && dec->mt_method_ > 0) {
// secondary cache line. The deblocking process need to make use of the // secondary cache line. The deblocking process need to make use of the
// filtering strength from previous macroblock row, while the new ones // filtering strength from previous macroblock row, while the new ones
// are being decoded in parallel. We'll just swap the pointers. // are being decoded in parallel. We'll just swap the pointers.

View File

@ -166,9 +166,11 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
VP8Decoder* const dec = (VP8Decoder*)idec->dec_; VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
MemBuffer* const mem = &idec->mem_; MemBuffer* const mem = &idec->mem_;
const int need_compressed_alpha = NeedCompressedAlpha(idec); const int need_compressed_alpha = NeedCompressedAlpha(idec);
const uint8_t* const old_start = mem->buf_ + mem->start_; const uint8_t* const old_start =
(mem->buf_ == NULL) ? NULL : mem->buf_ + mem->start_;
const uint8_t* const old_base = const uint8_t* const old_base =
need_compressed_alpha ? dec->alpha_data_ : old_start; need_compressed_alpha ? dec->alpha_data_ : old_start;
assert(mem->buf_ != NULL || mem->start_ == 0);
assert(mem->mode_ == MEM_MODE_APPEND); assert(mem->mode_ == MEM_MODE_APPEND);
if (data_size > MAX_CHUNK_PAYLOAD) { if (data_size > MAX_CHUNK_PAYLOAD) {
// security safeguard: trying to allocate more than what the format // security safeguard: trying to allocate more than what the format
@ -184,7 +186,7 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
uint8_t* const new_buf = uint8_t* const new_buf =
(uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf)); (uint8_t*)WebPSafeMalloc(extra_size, sizeof(*new_buf));
if (new_buf == NULL) return 0; if (new_buf == NULL) return 0;
memcpy(new_buf, old_base, current_size); if (old_base != NULL) memcpy(new_buf, old_base, current_size);
WebPSafeFree(mem->buf_); WebPSafeFree(mem->buf_);
mem->buf_ = new_buf; mem->buf_ = new_buf;
mem->buf_size_ = (size_t)extra_size; mem->buf_size_ = (size_t)extra_size;
@ -192,6 +194,7 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
mem->end_ = current_size; mem->end_ = current_size;
} }
assert(mem->buf_ != NULL);
memcpy(mem->buf_ + mem->end_, data, data_size); memcpy(mem->buf_ + mem->end_, data, data_size);
mem->end_ += data_size; mem->end_ += data_size;
assert(mem->end_ <= mem->buf_size_); assert(mem->end_ <= mem->buf_size_);
@ -204,7 +207,9 @@ static int RemapMemBuffer(WebPIDecoder* const idec,
const uint8_t* const data, size_t data_size) { const uint8_t* const data, size_t data_size) {
MemBuffer* const mem = &idec->mem_; MemBuffer* const mem = &idec->mem_;
const uint8_t* const old_buf = mem->buf_; const uint8_t* const old_buf = mem->buf_;
const uint8_t* const old_start = old_buf + mem->start_; const uint8_t* const old_start =
(old_buf == NULL) ? NULL : old_buf + mem->start_;
assert(old_buf != NULL || mem->start_ == 0);
assert(mem->mode_ == MEM_MODE_MAP); assert(mem->mode_ == MEM_MODE_MAP);
if (data_size < mem->buf_size_) return 0; // can't remap to a shorter buffer! if (data_size < mem->buf_size_) return 0; // can't remap to a shorter buffer!

View File

@ -61,12 +61,17 @@ static const uint16_t kAcTable[128] = {
void VP8ParseQuant(VP8Decoder* const dec) { void VP8ParseQuant(VP8Decoder* const dec) {
VP8BitReader* const br = &dec->br_; VP8BitReader* const br = &dec->br_;
const int base_q0 = VP8GetValue(br, 7); const int base_q0 = VP8GetValue(br, 7, "global-header");
const int dqy1_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0; const int dqy1_dc = VP8Get(br, "global-header") ?
const int dqy2_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0; VP8GetSignedValue(br, 4, "global-header") : 0;
const int dqy2_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0; const int dqy2_dc = VP8Get(br, "global-header") ?
const int dquv_dc = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0; VP8GetSignedValue(br, 4, "global-header") : 0;
const int dquv_ac = VP8Get(br) ? VP8GetSignedValue(br, 4) : 0; const int dqy2_ac = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 4, "global-header") : 0;
const int dquv_dc = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 4, "global-header") : 0;
const int dquv_ac = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 4, "global-header") : 0;
const VP8SegmentHeader* const hdr = &dec->segment_hdr_; const VP8SegmentHeader* const hdr = &dec->segment_hdr_;
int i; int i;

View File

@ -296,20 +296,21 @@ static void ParseIntraMode(VP8BitReader* const br,
// to decode more than 1 keyframe. // to decode more than 1 keyframe.
if (dec->segment_hdr_.update_map_) { if (dec->segment_hdr_.update_map_) {
// Hardcoded tree parsing // Hardcoded tree parsing
block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0]) block->segment_ = !VP8GetBit(br, dec->proba_.segments_[0], "segments")
? VP8GetBit(br, dec->proba_.segments_[1]) ? VP8GetBit(br, dec->proba_.segments_[1], "segments")
: 2 + VP8GetBit(br, dec->proba_.segments_[2]); : VP8GetBit(br, dec->proba_.segments_[2], "segments") + 2;
} else { } else {
block->segment_ = 0; // default for intra block->segment_ = 0; // default for intra
} }
if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_); if (dec->use_skip_proba_) block->skip_ = VP8GetBit(br, dec->skip_p_, "skip");
block->is_i4x4_ = !VP8GetBit(br, 145); // decide for B_PRED first block->is_i4x4_ = !VP8GetBit(br, 145, "block-size");
if (!block->is_i4x4_) { if (!block->is_i4x4_) {
// Hardcoded 16x16 intra-mode decision tree. // Hardcoded 16x16 intra-mode decision tree.
const int ymode = const int ymode =
VP8GetBit(br, 156) ? (VP8GetBit(br, 128) ? TM_PRED : H_PRED) VP8GetBit(br, 156, "pred-modes") ?
: (VP8GetBit(br, 163) ? V_PRED : DC_PRED); (VP8GetBit(br, 128, "pred-modes") ? TM_PRED : H_PRED) :
(VP8GetBit(br, 163, "pred-modes") ? V_PRED : DC_PRED);
block->imodes_[0] = ymode; block->imodes_[0] = ymode;
memset(top, ymode, 4 * sizeof(*top)); memset(top, ymode, 4 * sizeof(*top));
memset(left, ymode, 4 * sizeof(*left)); memset(left, ymode, 4 * sizeof(*left));
@ -323,22 +324,25 @@ static void ParseIntraMode(VP8BitReader* const br,
const uint8_t* const prob = kBModesProba[top[x]][ymode]; const uint8_t* const prob = kBModesProba[top[x]][ymode];
#if (USE_GENERIC_TREE == 1) #if (USE_GENERIC_TREE == 1)
// Generic tree-parsing // Generic tree-parsing
int i = kYModesIntra4[VP8GetBit(br, prob[0])]; int i = kYModesIntra4[VP8GetBit(br, prob[0], "pred-modes")];
while (i > 0) { while (i > 0) {
i = kYModesIntra4[2 * i + VP8GetBit(br, prob[i])]; i = kYModesIntra4[2 * i + VP8GetBit(br, prob[i], "pred-modes")];
} }
ymode = -i; ymode = -i;
#else #else
// Hardcoded tree parsing // Hardcoded tree parsing
ymode = !VP8GetBit(br, prob[0]) ? B_DC_PRED : ymode = !VP8GetBit(br, prob[0], "pred-modes") ? B_DC_PRED :
!VP8GetBit(br, prob[1]) ? B_TM_PRED : !VP8GetBit(br, prob[1], "pred-modes") ? B_TM_PRED :
!VP8GetBit(br, prob[2]) ? B_VE_PRED : !VP8GetBit(br, prob[2], "pred-modes") ? B_VE_PRED :
!VP8GetBit(br, prob[3]) ? !VP8GetBit(br, prob[3], "pred-modes") ?
(!VP8GetBit(br, prob[4]) ? B_HE_PRED : (!VP8GetBit(br, prob[4], "pred-modes") ? B_HE_PRED :
(!VP8GetBit(br, prob[5]) ? B_RD_PRED : B_VR_PRED)) : (!VP8GetBit(br, prob[5], "pred-modes") ? B_RD_PRED
(!VP8GetBit(br, prob[6]) ? B_LD_PRED : : B_VR_PRED)) :
(!VP8GetBit(br, prob[7]) ? B_VL_PRED : (!VP8GetBit(br, prob[6], "pred-modes") ? B_LD_PRED :
(!VP8GetBit(br, prob[8]) ? B_HD_PRED : B_HU_PRED))); (!VP8GetBit(br, prob[7], "pred-modes") ? B_VL_PRED :
(!VP8GetBit(br, prob[8], "pred-modes") ? B_HD_PRED
: B_HU_PRED))
);
#endif // USE_GENERIC_TREE #endif // USE_GENERIC_TREE
top[x] = ymode; top[x] = ymode;
} }
@ -348,9 +352,9 @@ static void ParseIntraMode(VP8BitReader* const br,
} }
} }
// Hardcoded UVMode decision tree // Hardcoded UVMode decision tree
block->uvmode_ = !VP8GetBit(br, 142) ? DC_PRED block->uvmode_ = !VP8GetBit(br, 142, "pred-modes-uv") ? DC_PRED
: !VP8GetBit(br, 114) ? V_PRED : !VP8GetBit(br, 114, "pred-modes-uv") ? V_PRED
: VP8GetBit(br, 183) ? TM_PRED : H_PRED; : VP8GetBit(br, 183, "pred-modes-uv") ? TM_PRED : H_PRED;
} }
int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) { int VP8ParseIntraModeRow(VP8BitReader* const br, VP8Decoder* const dec) {
@ -514,8 +518,10 @@ void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
for (b = 0; b < NUM_BANDS; ++b) { for (b = 0; b < NUM_BANDS; ++b) {
for (c = 0; c < NUM_CTX; ++c) { for (c = 0; c < NUM_CTX; ++c) {
for (p = 0; p < NUM_PROBAS; ++p) { for (p = 0; p < NUM_PROBAS; ++p) {
const int v = VP8GetBit(br, CoeffsUpdateProba[t][b][c][p]) ? const int v =
VP8GetValue(br, 8) : CoeffsProba0[t][b][c][p]; VP8GetBit(br, CoeffsUpdateProba[t][b][c][p], "global-header") ?
VP8GetValue(br, 8, "global-header") :
CoeffsProba0[t][b][c][p];
proba->bands_[t][b].probas_[c][p] = v; proba->bands_[t][b].probas_[c][p] = v;
} }
} }
@ -524,9 +530,8 @@ void VP8ParseProba(VP8BitReader* const br, VP8Decoder* const dec) {
proba->bands_ptr_[t][b] = &proba->bands_[t][kBands[b]]; proba->bands_ptr_[t][b] = &proba->bands_[t][kBands[b]];
} }
} }
dec->use_skip_proba_ = VP8Get(br); dec->use_skip_proba_ = VP8Get(br, "global-header");
if (dec->use_skip_proba_) { if (dec->use_skip_proba_) {
dec->skip_p_ = VP8GetValue(br, 8); dec->skip_p_ = VP8GetValue(br, 8, "global-header");
} }
} }

View File

@ -161,23 +161,26 @@ static int ParseSegmentHeader(VP8BitReader* br,
VP8SegmentHeader* hdr, VP8Proba* proba) { VP8SegmentHeader* hdr, VP8Proba* proba) {
assert(br != NULL); assert(br != NULL);
assert(hdr != NULL); assert(hdr != NULL);
hdr->use_segment_ = VP8Get(br); hdr->use_segment_ = VP8Get(br, "global-header");
if (hdr->use_segment_) { if (hdr->use_segment_) {
hdr->update_map_ = VP8Get(br); hdr->update_map_ = VP8Get(br, "global-header");
if (VP8Get(br)) { // update data if (VP8Get(br, "global-header")) { // update data
int s; int s;
hdr->absolute_delta_ = VP8Get(br); hdr->absolute_delta_ = VP8Get(br, "global-header");
for (s = 0; s < NUM_MB_SEGMENTS; ++s) { for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
hdr->quantizer_[s] = VP8Get(br) ? VP8GetSignedValue(br, 7) : 0; hdr->quantizer_[s] = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 7, "global-header") : 0;
} }
for (s = 0; s < NUM_MB_SEGMENTS; ++s) { for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
hdr->filter_strength_[s] = VP8Get(br) ? VP8GetSignedValue(br, 6) : 0; hdr->filter_strength_[s] = VP8Get(br, "global-header") ?
VP8GetSignedValue(br, 6, "global-header") : 0;
} }
} }
if (hdr->update_map_) { if (hdr->update_map_) {
int s; int s;
for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) { for (s = 0; s < MB_FEATURE_TREE_PROBS; ++s) {
proba->segments_[s] = VP8Get(br) ? VP8GetValue(br, 8) : 255u; proba->segments_[s] = VP8Get(br, "global-header") ?
VP8GetValue(br, 8, "global-header") : 255u;
} }
} }
} else { } else {
@ -205,7 +208,7 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
size_t last_part; size_t last_part;
size_t p; size_t p;
dec->num_parts_minus_one_ = (1 << VP8GetValue(br, 2)) - 1; dec->num_parts_minus_one_ = (1 << VP8GetValue(br, 2, "global-header")) - 1;
last_part = dec->num_parts_minus_one_; last_part = dec->num_parts_minus_one_;
if (size < 3 * last_part) { if (size < 3 * last_part) {
// we can't even read the sizes with sz[]! That's a failure. // we can't even read the sizes with sz[]! That's a failure.
@ -229,21 +232,21 @@ static VP8StatusCode ParsePartitions(VP8Decoder* const dec,
// Paragraph 9.4 // Paragraph 9.4
static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) { static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
VP8FilterHeader* const hdr = &dec->filter_hdr_; VP8FilterHeader* const hdr = &dec->filter_hdr_;
hdr->simple_ = VP8Get(br); hdr->simple_ = VP8Get(br, "global-header");
hdr->level_ = VP8GetValue(br, 6); hdr->level_ = VP8GetValue(br, 6, "global-header");
hdr->sharpness_ = VP8GetValue(br, 3); hdr->sharpness_ = VP8GetValue(br, 3, "global-header");
hdr->use_lf_delta_ = VP8Get(br); hdr->use_lf_delta_ = VP8Get(br, "global-header");
if (hdr->use_lf_delta_) { if (hdr->use_lf_delta_) {
if (VP8Get(br)) { // update lf-delta? if (VP8Get(br, "global-header")) { // update lf-delta?
int i; int i;
for (i = 0; i < NUM_REF_LF_DELTAS; ++i) { for (i = 0; i < NUM_REF_LF_DELTAS; ++i) {
if (VP8Get(br)) { if (VP8Get(br, "global-header")) {
hdr->ref_lf_delta_[i] = VP8GetSignedValue(br, 6); hdr->ref_lf_delta_[i] = VP8GetSignedValue(br, 6, "global-header");
} }
} }
for (i = 0; i < NUM_MODE_LF_DELTAS; ++i) { for (i = 0; i < NUM_MODE_LF_DELTAS; ++i) {
if (VP8Get(br)) { if (VP8Get(br, "global-header")) {
hdr->mode_lf_delta_[i] = VP8GetSignedValue(br, 6); hdr->mode_lf_delta_[i] = VP8GetSignedValue(br, 6, "global-header");
} }
} }
} }
@ -352,8 +355,8 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
buf_size -= frm_hdr->partition_length_; buf_size -= frm_hdr->partition_length_;
if (frm_hdr->key_frame_) { if (frm_hdr->key_frame_) {
pic_hdr->colorspace_ = VP8Get(br); pic_hdr->colorspace_ = VP8Get(br, "global-header");
pic_hdr->clamp_type_ = VP8Get(br); pic_hdr->clamp_type_ = VP8Get(br, "global-header");
} }
if (!ParseSegmentHeader(br, &dec->segment_hdr_, &dec->proba_)) { if (!ParseSegmentHeader(br, &dec->segment_hdr_, &dec->proba_)) {
return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR, return VP8SetError(dec, VP8_STATUS_BITSTREAM_ERROR,
@ -378,7 +381,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
"Not a key frame."); "Not a key frame.");
} }
VP8Get(br); // ignore the value of update_proba_ VP8Get(br, "global-header"); // ignore the value of update_proba_
VP8ParseProba(br, dec); VP8ParseProba(br, dec);
@ -403,28 +406,28 @@ static const uint8_t kZigzag[16] = {
// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2 // See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) { static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
int v; int v;
if (!VP8GetBit(br, p[3])) { if (!VP8GetBit(br, p[3], "coeffs")) {
if (!VP8GetBit(br, p[4])) { if (!VP8GetBit(br, p[4], "coeffs")) {
v = 2; v = 2;
} else { } else {
v = 3 + VP8GetBit(br, p[5]); v = 3 + VP8GetBit(br, p[5], "coeffs");
} }
} else { } else {
if (!VP8GetBit(br, p[6])) { if (!VP8GetBit(br, p[6], "coeffs")) {
if (!VP8GetBit(br, p[7])) { if (!VP8GetBit(br, p[7], "coeffs")) {
v = 5 + VP8GetBit(br, 159); v = 5 + VP8GetBit(br, 159, "coeffs");
} else { } else {
v = 7 + 2 * VP8GetBit(br, 165); v = 7 + 2 * VP8GetBit(br, 165, "coeffs");
v += VP8GetBit(br, 145); v += VP8GetBit(br, 145, "coeffs");
} }
} else { } else {
const uint8_t* tab; const uint8_t* tab;
const int bit1 = VP8GetBit(br, p[8]); const int bit1 = VP8GetBit(br, p[8], "coeffs");
const int bit0 = VP8GetBit(br, p[9 + bit1]); const int bit0 = VP8GetBit(br, p[9 + bit1], "coeffs");
const int cat = 2 * bit1 + bit0; const int cat = 2 * bit1 + bit0;
v = 0; v = 0;
for (tab = kCat3456[cat]; *tab; ++tab) { for (tab = kCat3456[cat]; *tab; ++tab) {
v += v + VP8GetBit(br, *tab); v += v + VP8GetBit(br, *tab, "coeffs");
} }
v += 3 + (8 << cat); v += 3 + (8 << cat);
} }
@ -438,24 +441,24 @@ static int GetCoeffsFast(VP8BitReader* const br,
int ctx, const quant_t dq, int n, int16_t* out) { int ctx, const quant_t dq, int n, int16_t* out) {
const uint8_t* p = prob[n]->probas_[ctx]; const uint8_t* p = prob[n]->probas_[ctx];
for (; n < 16; ++n) { for (; n < 16; ++n) {
if (!VP8GetBit(br, p[0])) { if (!VP8GetBit(br, p[0], "coeffs")) {
return n; // previous coeff was last non-zero coeff return n; // previous coeff was last non-zero coeff
} }
while (!VP8GetBit(br, p[1])) { // sequence of zero coeffs while (!VP8GetBit(br, p[1], "coeffs")) { // sequence of zero coeffs
p = prob[++n]->probas_[0]; p = prob[++n]->probas_[0];
if (n == 16) return 16; if (n == 16) return 16;
} }
{ // non zero coeff { // non zero coeff
const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0]; const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
int v; int v;
if (!VP8GetBit(br, p[2])) { if (!VP8GetBit(br, p[2], "coeffs")) {
v = 1; v = 1;
p = p_ctx[1]; p = p_ctx[1];
} else { } else {
v = GetLargeValue(br, p); v = GetLargeValue(br, p);
p = p_ctx[2]; p = p_ctx[2];
} }
out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0]; out[kZigzag[n]] = VP8GetSigned(br, v, "coeffs") * dq[n > 0];
} }
} }
return 16; return 16;
@ -468,24 +471,24 @@ static int GetCoeffsAlt(VP8BitReader* const br,
int ctx, const quant_t dq, int n, int16_t* out) { int ctx, const quant_t dq, int n, int16_t* out) {
const uint8_t* p = prob[n]->probas_[ctx]; const uint8_t* p = prob[n]->probas_[ctx];
for (; n < 16; ++n) { for (; n < 16; ++n) {
if (!VP8GetBitAlt(br, p[0])) { if (!VP8GetBitAlt(br, p[0], "coeffs")) {
return n; // previous coeff was last non-zero coeff return n; // previous coeff was last non-zero coeff
} }
while (!VP8GetBitAlt(br, p[1])) { // sequence of zero coeffs while (!VP8GetBitAlt(br, p[1], "coeffs")) { // sequence of zero coeffs
p = prob[++n]->probas_[0]; p = prob[++n]->probas_[0];
if (n == 16) return 16; if (n == 16) return 16;
} }
{ // non zero coeff { // non zero coeff
const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0]; const VP8ProbaArray* const p_ctx = &prob[n + 1]->probas_[0];
int v; int v;
if (!VP8GetBitAlt(br, p[2])) { if (!VP8GetBitAlt(br, p[2], "coeffs")) {
v = 1; v = 1;
p = p_ctx[1]; p = p_ctx[1];
} else { } else {
v = GetLargeValue(br, p); v = GetLargeValue(br, p);
p = p_ctx[2]; p = p_ctx[2];
} }
out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0]; out[kZigzag[n]] = VP8GetSigned(br, v, "coeffs") * dq[n > 0];
} }
} }
return 16; return 16;

View File

@ -31,8 +31,8 @@ extern "C" {
// version numbers // version numbers
#define DEC_MAJ_VERSION 1 #define DEC_MAJ_VERSION 1
#define DEC_MIN_VERSION 0 #define DEC_MIN_VERSION 1
#define DEC_REV_VERSION 2 #define DEC_REV_VERSION 0
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
// Constraints are: We need to store one 16x16 block of luma samples (y), // Constraints are: We need to store one 16x16 block of luma samples (y),

View File

@ -362,12 +362,8 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
VP8LMetadata* const hdr = &dec->hdr_; VP8LMetadata* const hdr = &dec->hdr_;
uint32_t* huffman_image = NULL; uint32_t* huffman_image = NULL;
HTreeGroup* htree_groups = NULL; HTreeGroup* htree_groups = NULL;
// When reading htrees, some might be unused, as the format allows it.
// We will still read them but put them in this htree_group_bogus.
HTreeGroup htree_group_bogus;
HuffmanCode* huffman_tables = NULL; HuffmanCode* huffman_tables = NULL;
HuffmanCode* huffman_tables_bogus = NULL; HuffmanCode* huffman_table = NULL;
HuffmanCode* next = NULL;
int num_htree_groups = 1; int num_htree_groups = 1;
int num_htree_groups_max = 1; int num_htree_groups_max = 1;
int max_alphabet_size = 0; int max_alphabet_size = 0;
@ -418,12 +414,6 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
if (*mapped_group == -1) *mapped_group = num_htree_groups++; if (*mapped_group == -1) *mapped_group = num_htree_groups++;
huffman_image[i] = *mapped_group; huffman_image[i] = *mapped_group;
} }
huffman_tables_bogus = (HuffmanCode*)WebPSafeMalloc(
table_size, sizeof(*huffman_tables_bogus));
if (huffman_tables_bogus == NULL) {
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
goto Error;
}
} else { } else {
num_htree_groups = num_htree_groups_max; num_htree_groups = num_htree_groups_max;
} }
@ -453,63 +443,71 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
goto Error; goto Error;
} }
next = huffman_tables; huffman_table = huffman_tables;
for (i = 0; i < num_htree_groups_max; ++i) { for (i = 0; i < num_htree_groups_max; ++i) {
// If the index "i" is unused in the Huffman image, read the coefficients // If the index "i" is unused in the Huffman image, just make sure the
// but store them to a bogus htree_group. // coefficients are valid but do not store them.
const int is_bogus = (mapping != NULL && mapping[i] == -1); if (mapping != NULL && mapping[i] == -1) {
HTreeGroup* const htree_group = for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
is_bogus ? &htree_group_bogus : int alphabet_size = kAlphabetSize[j];
&htree_groups[(mapping == NULL) ? i : mapping[i]]; if (j == 0 && color_cache_bits > 0) {
HuffmanCode** const htrees = htree_group->htrees; alphabet_size += (1 << color_cache_bits);
HuffmanCode* huffman_tables_i = is_bogus ? huffman_tables_bogus : next; }
int size; // Passing in NULL so that nothing gets filled.
int total_size = 0; if (!ReadHuffmanCode(alphabet_size, dec, code_lengths, NULL)) {
int is_trivial_literal = 1; goto Error;
int max_bits = 0;
for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
int alphabet_size = kAlphabetSize[j];
htrees[j] = huffman_tables_i;
if (j == 0 && color_cache_bits > 0) {
alphabet_size += 1 << color_cache_bits;
}
size =
ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables_i);
if (size == 0) {
goto Error;
}
if (is_trivial_literal && kLiteralMap[j] == 1) {
is_trivial_literal = (huffman_tables_i->bits == 0);
}
total_size += huffman_tables_i->bits;
huffman_tables_i += size;
if (j <= ALPHA) {
int local_max_bits = code_lengths[0];
int k;
for (k = 1; k < alphabet_size; ++k) {
if (code_lengths[k] > local_max_bits) {
local_max_bits = code_lengths[k];
}
} }
max_bits += local_max_bits;
} }
} } else {
if (!is_bogus) next = huffman_tables_i; HTreeGroup* const htree_group =
htree_group->is_trivial_literal = is_trivial_literal; &htree_groups[(mapping == NULL) ? i : mapping[i]];
htree_group->is_trivial_code = 0; HuffmanCode** const htrees = htree_group->htrees;
if (is_trivial_literal) { int size;
const int red = htrees[RED][0].value; int total_size = 0;
const int blue = htrees[BLUE][0].value; int is_trivial_literal = 1;
const int alpha = htrees[ALPHA][0].value; int max_bits = 0;
htree_group->literal_arb = ((uint32_t)alpha << 24) | (red << 16) | blue; for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) { int alphabet_size = kAlphabetSize[j];
htree_group->is_trivial_code = 1; htrees[j] = huffman_table;
htree_group->literal_arb |= htrees[GREEN][0].value << 8; if (j == 0 && color_cache_bits > 0) {
alphabet_size += (1 << color_cache_bits);
}
size = ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_table);
if (size == 0) {
goto Error;
}
if (is_trivial_literal && kLiteralMap[j] == 1) {
is_trivial_literal = (huffman_table->bits == 0);
}
total_size += huffman_table->bits;
huffman_table += size;
if (j <= ALPHA) {
int local_max_bits = code_lengths[0];
int k;
for (k = 1; k < alphabet_size; ++k) {
if (code_lengths[k] > local_max_bits) {
local_max_bits = code_lengths[k];
}
}
max_bits += local_max_bits;
}
} }
htree_group->is_trivial_literal = is_trivial_literal;
htree_group->is_trivial_code = 0;
if (is_trivial_literal) {
const int red = htrees[RED][0].value;
const int blue = htrees[BLUE][0].value;
const int alpha = htrees[ALPHA][0].value;
htree_group->literal_arb = ((uint32_t)alpha << 24) | (red << 16) | blue;
if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
htree_group->is_trivial_code = 1;
htree_group->literal_arb |= htrees[GREEN][0].value << 8;
}
}
htree_group->use_packed_table =
!htree_group->is_trivial_code && (max_bits < HUFFMAN_PACKED_BITS);
if (htree_group->use_packed_table) BuildPackedTable(htree_group);
} }
htree_group->use_packed_table =
!htree_group->is_trivial_code && (max_bits < HUFFMAN_PACKED_BITS);
if (htree_group->use_packed_table) BuildPackedTable(htree_group);
} }
ok = 1; ok = 1;
@ -521,7 +519,6 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
Error: Error:
WebPSafeFree(code_lengths); WebPSafeFree(code_lengths);
WebPSafeFree(huffman_tables_bogus);
WebPSafeFree(mapping); WebPSafeFree(mapping);
if (!ok) { if (!ok) {
WebPSafeFree(huffman_image); WebPSafeFree(huffman_image);
@ -757,11 +754,11 @@ static WEBP_INLINE HTreeGroup* GetHtreeGroupForPos(VP8LMetadata* const hdr,
typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row); typedef void (*ProcessRowsFunc)(VP8LDecoder* const dec, int row);
static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows, static void ApplyInverseTransforms(VP8LDecoder* const dec,
int start_row, int num_rows,
const uint32_t* const rows) { const uint32_t* const rows) {
int n = dec->next_transform_; int n = dec->next_transform_;
const int cache_pixs = dec->width_ * num_rows; const int cache_pixs = dec->width_ * num_rows;
const int start_row = dec->last_row_;
const int end_row = start_row + num_rows; const int end_row = start_row + num_rows;
const uint32_t* rows_in = rows; const uint32_t* rows_in = rows;
uint32_t* const rows_out = dec->argb_cache_; uint32_t* const rows_out = dec->argb_cache_;
@ -792,8 +789,7 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
VP8Io* const io = dec->io_; VP8Io* const io = dec->io_;
uint8_t* rows_data = (uint8_t*)dec->argb_cache_; uint8_t* rows_data = (uint8_t*)dec->argb_cache_;
const int in_stride = io->width * sizeof(uint32_t); // in unit of RGBA const int in_stride = io->width * sizeof(uint32_t); // in unit of RGBA
ApplyInverseTransforms(dec, dec->last_row_, num_rows, rows);
ApplyInverseTransforms(dec, num_rows, rows);
if (!SetCropWindow(io, dec->last_row_, row, &rows_data, in_stride)) { if (!SetCropWindow(io, dec->last_row_, row, &rows_data, in_stride)) {
// Nothing to output (this time). // Nothing to output (this time).
} else { } else {
@ -1196,6 +1192,7 @@ static int DecodeImageData(VP8LDecoder* const dec, uint32_t* const data,
VP8LFillBitWindow(br); VP8LFillBitWindow(br);
dist_code = GetCopyDistance(dist_symbol, br); dist_code = GetCopyDistance(dist_symbol, br);
dist = PlaneCodeToDistance(width, dist_code); dist = PlaneCodeToDistance(width, dist_code);
if (VP8LIsEndOfStream(br)) break; if (VP8LIsEndOfStream(br)) break;
if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) { if (src - data < (ptrdiff_t)dist || src_end - src < (ptrdiff_t)length) {
goto Error; goto Error;
@ -1556,7 +1553,7 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int last_row) {
const int cache_pixs = width * num_rows_to_process; const int cache_pixs = width * num_rows_to_process;
uint8_t* const dst = output + width * cur_row; uint8_t* const dst = output + width * cur_row;
const uint32_t* const src = dec->argb_cache_; const uint32_t* const src = dec->argb_cache_;
ApplyInverseTransforms(dec, num_rows_to_process, in); ApplyInverseTransforms(dec, cur_row, num_rows_to_process, in);
WebPExtractGreen(src, dst, cache_pixs); WebPExtractGreen(src, dst, cache_pixs);
AlphaApplyFilter(alph_dec, AlphaApplyFilter(alph_dec,
cur_row, cur_row + num_rows_to_process, dst, width); cur_row, cur_row + num_rows_to_process, dst, width);

View File

@ -37,7 +37,7 @@ struct VP8LTransform {
int bits_; // subsampling bits defining transform window. int bits_; // subsampling bits defining transform window.
int xsize_; // transform window X index. int xsize_; // transform window X index.
int ysize_; // transform window Y index. int ysize_; // transform window Y index.
uint32_t *data_; // transform data. uint32_t* data_; // transform data.
}; };
typedef struct { typedef struct {
@ -48,23 +48,23 @@ typedef struct {
int huffman_mask_; int huffman_mask_;
int huffman_subsample_bits_; int huffman_subsample_bits_;
int huffman_xsize_; int huffman_xsize_;
uint32_t *huffman_image_; uint32_t* huffman_image_;
int num_htree_groups_; int num_htree_groups_;
HTreeGroup *htree_groups_; HTreeGroup* htree_groups_;
HuffmanCode *huffman_tables_; HuffmanCode* huffman_tables_;
} VP8LMetadata; } VP8LMetadata;
typedef struct VP8LDecoder VP8LDecoder; typedef struct VP8LDecoder VP8LDecoder;
struct VP8LDecoder { struct VP8LDecoder {
VP8StatusCode status_; VP8StatusCode status_;
VP8LDecodeState state_; VP8LDecodeState state_;
VP8Io *io_; VP8Io* io_;
const WebPDecBuffer *output_; // shortcut to io->opaque->output const WebPDecBuffer* output_; // shortcut to io->opaque->output
uint32_t *pixels_; // Internal data: either uint8_t* for alpha uint32_t* pixels_; // Internal data: either uint8_t* for alpha
// or uint32_t* for BGRA. // or uint32_t* for BGRA.
uint32_t *argb_cache_; // Scratch buffer for temporary BGRA storage. uint32_t* argb_cache_; // Scratch buffer for temporary BGRA storage.
VP8LBitReader br_; VP8LBitReader br_;
int incremental_; // if true, incremental decoding is expected int incremental_; // if true, incremental decoding is expected
@ -86,8 +86,8 @@ struct VP8LDecoder {
// or'd bitset storing the transforms types. // or'd bitset storing the transforms types.
uint32_t transforms_seen_; uint32_t transforms_seen_;
uint8_t *rescaler_memory; // Working memory for rescaling work. uint8_t* rescaler_memory; // Working memory for rescaling work.
WebPRescaler *rescaler; // Common rescaler for all channels. WebPRescaler* rescaler; // Common rescaler for all channels.
}; };
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

View File

@ -24,8 +24,8 @@
#include "src/webp/format_constants.h" #include "src/webp/format_constants.h"
#define DMUX_MAJ_VERSION 1 #define DMUX_MAJ_VERSION 1
#define DMUX_MIN_VERSION 0 #define DMUX_MIN_VERSION 1
#define DMUX_REV_VERSION 2 #define DMUX_REV_VERSION 0
typedef struct { typedef struct {
size_t start_; // start location of the data size_t start_; // start location of the data

View File

@ -214,7 +214,7 @@ static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first,
// Alpha detection // Alpha detection
static int HasAlpha8b_SSE2(const uint8_t* src, int length) { static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
const __m128i all_0xff = _mm_set1_epi8(0xff); const __m128i all_0xff = _mm_set1_epi8((char)0xff);
int i = 0; int i = 0;
for (; i + 16 <= length; i += 16) { for (; i + 16 <= length; i += 16) {
const __m128i v = _mm_loadu_si128((const __m128i*)(src + i)); const __m128i v = _mm_loadu_si128((const __m128i*)(src + i));
@ -228,7 +228,7 @@ static int HasAlpha8b_SSE2(const uint8_t* src, int length) {
static int HasAlpha32b_SSE2(const uint8_t* src, int length) { static int HasAlpha32b_SSE2(const uint8_t* src, int length) {
const __m128i alpha_mask = _mm_set1_epi32(0xff); const __m128i alpha_mask = _mm_set1_epi32(0xff);
const __m128i all_0xff = _mm_set1_epi8(0xff); const __m128i all_0xff = _mm_set1_epi8((char)0xff);
int i = 0; int i = 0;
// We don't know if we can access the last 3 bytes after the last alpha // We don't know if we can access the last 3 bytes after the last alpha
// value 'src[4 * length - 4]' (because we don't know if alpha is the first // value 'src[4 * length - 4]' (because we don't know if alpha is the first

View File

@ -173,8 +173,8 @@ static int AndroidCPUInfo(CPUFeature feature) {
const AndroidCpuFamily cpu_family = android_getCpuFamily(); const AndroidCpuFamily cpu_family = android_getCpuFamily();
const uint64_t cpu_features = android_getCpuFeatures(); const uint64_t cpu_features = android_getCpuFeatures();
if (feature == kNEON) { if (feature == kNEON) {
return (cpu_family == ANDROID_CPU_FAMILY_ARM && return cpu_family == ANDROID_CPU_FAMILY_ARM &&
0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)); (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
} }
return 0; return 0;
} }

View File

@ -1361,7 +1361,8 @@ static void RD4_NEON(uint8_t* dst) { // Down-right
const uint32_t J = dst[-1 + 1 * BPS]; const uint32_t J = dst[-1 + 1 * BPS];
const uint32_t K = dst[-1 + 2 * BPS]; const uint32_t K = dst[-1 + 2 * BPS];
const uint32_t L = dst[-1 + 3 * BPS]; const uint32_t L = dst[-1 + 3 * BPS];
const uint64x1_t LKJI____ = vcreate_u64(L | (K << 8) | (J << 16) | (I << 24)); const uint64x1_t LKJI____ =
vcreate_u64((uint64_t)L | (K << 8) | (J << 16) | (I << 24));
const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC); const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8)); const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16)); const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
@ -1427,10 +1428,16 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) {
if (do_top) { if (do_top) {
const uint8x8_t A = vld1_u8(dst - BPS); // top row const uint8x8_t A = vld1_u8(dst - BPS); // top row
#if defined(__aarch64__)
const uint16x8_t B = vmovl_u8(A);
const uint16_t p2 = vaddvq_u16(B);
sum_top = vdupq_n_u16(p2);
#else
const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
const uint16x4_t p1 = vpadd_u16(p0, p0); const uint16x4_t p1 = vpadd_u16(p0, p0);
const uint16x4_t p2 = vpadd_u16(p1, p1); const uint16x4_t p2 = vpadd_u16(p1, p1);
sum_top = vcombine_u16(p2, p2); sum_top = vcombine_u16(p2, p2);
#endif
} }
if (do_left) { if (do_left) {

View File

@ -326,7 +326,7 @@ static WEBP_INLINE void Update2Pixels_SSE2(__m128i* const pi, __m128i* const qi,
const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7); const __m128i a1_lo = _mm_srai_epi16(*a0_lo, 7);
const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7); const __m128i a1_hi = _mm_srai_epi16(*a0_hi, 7);
const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi); const __m128i delta = _mm_packs_epi16(a1_lo, a1_hi);
const __m128i sign_bit = _mm_set1_epi8(0x80); const __m128i sign_bit = _mm_set1_epi8((char)0x80);
*pi = _mm_adds_epi8(*pi, delta); *pi = _mm_adds_epi8(*pi, delta);
*qi = _mm_subs_epi8(*qi, delta); *qi = _mm_subs_epi8(*qi, delta);
FLIP_SIGN_BIT2(*pi, *qi); FLIP_SIGN_BIT2(*pi, *qi);
@ -338,9 +338,9 @@ static WEBP_INLINE void NeedsFilter_SSE2(const __m128i* const p1,
const __m128i* const q0, const __m128i* const q0,
const __m128i* const q1, const __m128i* const q1,
int thresh, __m128i* const mask) { int thresh, __m128i* const mask) {
const __m128i m_thresh = _mm_set1_epi8(thresh); const __m128i m_thresh = _mm_set1_epi8((char)thresh);
const __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1) const __m128i t1 = MM_ABS(*p1, *q1); // abs(p1 - q1)
const __m128i kFE = _mm_set1_epi8(0xFE); const __m128i kFE = _mm_set1_epi8((char)0xFE);
const __m128i t2 = _mm_and_si128(t1, kFE); // set lsb of each byte to zero const __m128i t2 = _mm_and_si128(t1, kFE); // set lsb of each byte to zero
const __m128i t3 = _mm_srli_epi16(t2, 1); // abs(p1 - q1) / 2 const __m128i t3 = _mm_srli_epi16(t2, 1); // abs(p1 - q1) / 2
@ -360,7 +360,7 @@ static WEBP_INLINE void DoFilter2_SSE2(__m128i* const p1, __m128i* const p0,
__m128i* const q0, __m128i* const q1, __m128i* const q0, __m128i* const q1,
int thresh) { int thresh) {
__m128i a, mask; __m128i a, mask;
const __m128i sign_bit = _mm_set1_epi8(0x80); const __m128i sign_bit = _mm_set1_epi8((char)0x80);
// convert p1/q1 to int8_t (for GetBaseDelta_SSE2) // convert p1/q1 to int8_t (for GetBaseDelta_SSE2)
const __m128i p1s = _mm_xor_si128(*p1, sign_bit); const __m128i p1s = _mm_xor_si128(*p1, sign_bit);
const __m128i q1s = _mm_xor_si128(*q1, sign_bit); const __m128i q1s = _mm_xor_si128(*q1, sign_bit);
@ -380,7 +380,7 @@ static WEBP_INLINE void DoFilter4_SSE2(__m128i* const p1, __m128i* const p0,
const __m128i* const mask, const __m128i* const mask,
int hev_thresh) { int hev_thresh) {
const __m128i zero = _mm_setzero_si128(); const __m128i zero = _mm_setzero_si128();
const __m128i sign_bit = _mm_set1_epi8(0x80); const __m128i sign_bit = _mm_set1_epi8((char)0x80);
const __m128i k64 = _mm_set1_epi8(64); const __m128i k64 = _mm_set1_epi8(64);
const __m128i k3 = _mm_set1_epi8(3); const __m128i k3 = _mm_set1_epi8(3);
const __m128i k4 = _mm_set1_epi8(4); const __m128i k4 = _mm_set1_epi8(4);
@ -427,7 +427,7 @@ static WEBP_INLINE void DoFilter6_SSE2(__m128i* const p2, __m128i* const p1,
const __m128i* const mask, const __m128i* const mask,
int hev_thresh) { int hev_thresh) {
const __m128i zero = _mm_setzero_si128(); const __m128i zero = _mm_setzero_si128();
const __m128i sign_bit = _mm_set1_epi8(0x80); const __m128i sign_bit = _mm_set1_epi8((char)0x80);
__m128i a, not_hev; __m128i a, not_hev;
// compute hev mask // compute hev mask
@ -941,7 +941,7 @@ static void VR4_SSE2(uint8_t* dst) { // Vertical-Right
const __m128i ABCD0 = _mm_srli_si128(XABCD, 1); const __m128i ABCD0 = _mm_srli_si128(XABCD, 1);
const __m128i abcd = _mm_avg_epu8(XABCD, ABCD0); const __m128i abcd = _mm_avg_epu8(XABCD, ABCD0);
const __m128i _XABCD = _mm_slli_si128(XABCD, 1); const __m128i _XABCD = _mm_slli_si128(XABCD, 1);
const __m128i IXABCD = _mm_insert_epi16(_XABCD, I | (X << 8), 0); const __m128i IXABCD = _mm_insert_epi16(_XABCD, (short)(I | (X << 8)), 0);
const __m128i avg1 = _mm_avg_epu8(IXABCD, ABCD0); const __m128i avg1 = _mm_avg_epu8(IXABCD, ABCD0);
const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one); const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb); const __m128i avg2 = _mm_subs_epu8(avg1, lsb);

View File

@ -246,9 +246,9 @@ extern VP8Fdct VP8FTransform2; // performs two transforms at a time
extern VP8WHT VP8FTransformWHT; extern VP8WHT VP8FTransformWHT;
// Predictions // Predictions
// *dst is the destination block. *top and *left can be NULL. // *dst is the destination block. *top and *left can be NULL.
typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left, typedef void (*VP8IntraPreds)(uint8_t* dst, const uint8_t* left,
const uint8_t* top); const uint8_t* top);
typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top); typedef void (*VP8Intra4Preds)(uint8_t* dst, const uint8_t* top);
extern VP8Intra4Preds VP8EncPredLuma4; extern VP8Intra4Preds VP8EncPredLuma4;
extern VP8IntraPreds VP8EncPredLuma16; extern VP8IntraPreds VP8EncPredLuma16;
extern VP8IntraPreds VP8EncPredChroma8; extern VP8IntraPreds VP8EncPredChroma8;

View File

@ -777,7 +777,7 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* dst,
const __m128i ABCD0 = _mm_srli_si128(XABCD, 1); const __m128i ABCD0 = _mm_srli_si128(XABCD, 1);
const __m128i abcd = _mm_avg_epu8(XABCD, ABCD0); const __m128i abcd = _mm_avg_epu8(XABCD, ABCD0);
const __m128i _XABCD = _mm_slli_si128(XABCD, 1); const __m128i _XABCD = _mm_slli_si128(XABCD, 1);
const __m128i IXABCD = _mm_insert_epi16(_XABCD, I | (X << 8), 0); const __m128i IXABCD = _mm_insert_epi16(_XABCD, (short)(I | (X << 8)), 0);
const __m128i avg1 = _mm_avg_epu8(IXABCD, ABCD0); const __m128i avg1 = _mm_avg_epu8(IXABCD, ABCD0);
const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one); const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one);
const __m128i avg2 = _mm_subs_epu8(avg1, lsb); const __m128i avg2 = _mm_subs_epu8(avg1, lsb);

View File

@ -33,9 +33,9 @@ static WEBP_INLINE void PredictLine_C(const uint8_t* src, const uint8_t* pred,
uint8_t* dst, int length, int inverse) { uint8_t* dst, int length, int inverse) {
int i; int i;
if (inverse) { if (inverse) {
for (i = 0; i < length; ++i) dst[i] = src[i] + pred[i]; for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] + pred[i]);
} else { } else {
for (i = 0; i < length; ++i) dst[i] = src[i] - pred[i]; for (i = 0; i < length; ++i) dst[i] = (uint8_t)(src[i] - pred[i]);
} }
} }
@ -155,7 +155,7 @@ static WEBP_INLINE void DoGradientFilter_C(const uint8_t* in,
const int pred = GradientPredictor_C(preds[w - 1], const int pred = GradientPredictor_C(preds[w - 1],
preds[w - stride], preds[w - stride],
preds[w - stride - 1]); preds[w - stride - 1]);
out[w] = in[w] + (inverse ? pred : -pred); out[w] = (uint8_t)(in[w] + (inverse ? pred : -pred));
} }
++row; ++row;
preds += stride; preds += stride;
@ -194,7 +194,7 @@ static void HorizontalUnfilter_C(const uint8_t* prev, const uint8_t* in,
uint8_t pred = (prev == NULL) ? 0 : prev[0]; uint8_t pred = (prev == NULL) ? 0 : prev[0];
int i; int i;
for (i = 0; i < width; ++i) { for (i = 0; i < width; ++i) {
out[i] = pred + in[i]; out[i] = (uint8_t)(pred + in[i]);
pred = out[i]; pred = out[i];
} }
} }
@ -206,7 +206,7 @@ static void VerticalUnfilter_C(const uint8_t* prev, const uint8_t* in,
HorizontalUnfilter_C(NULL, in, out, width); HorizontalUnfilter_C(NULL, in, out, width);
} else { } else {
int i; int i;
for (i = 0; i < width; ++i) out[i] = prev[i] + in[i]; for (i = 0; i < width; ++i) out[i] = (uint8_t)(prev[i] + in[i]);
} }
} }
#endif // !WEBP_NEON_OMIT_C_CODE #endif // !WEBP_NEON_OMIT_C_CODE
@ -220,7 +220,7 @@ static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in,
int i; int i;
for (i = 0; i < width; ++i) { for (i = 0; i < width; ++i) {
top = prev[i]; // need to read this first, in case prev==out top = prev[i]; // need to read this first, in case prev==out
left = in[i] + GradientPredictor_C(left, top, top_left); left = (uint8_t)(in[i] + GradientPredictor_C(left, top, top_left));
top_left = top; top_left = top;
out[i] = left; out[i] = left;
} }

View File

@ -163,7 +163,8 @@ static void GradientPredictDirect_SSE2(const uint8_t* const row,
_mm_storel_epi64((__m128i*)(out + i), H); _mm_storel_epi64((__m128i*)(out + i), H);
} }
for (; i < length; ++i) { for (; i < length; ++i) {
out[i] = row[i] - GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]); const int delta = GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
out[i] = (uint8_t)(row[i] - delta);
} }
} }
@ -188,7 +189,7 @@ static WEBP_INLINE void DoGradientFilter_SSE2(const uint8_t* in,
// Filter line-by-line. // Filter line-by-line.
while (row < last_row) { while (row < last_row) {
out[0] = in[0] - in[-stride]; out[0] = (uint8_t)(in[0] - in[-stride]);
GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1); GradientPredictDirect_SSE2(in + 1, in + 1 - stride, out + 1, width - 1);
++row; ++row;
in += stride; in += stride;
@ -223,7 +224,7 @@ static void HorizontalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
uint8_t* out, int width) { uint8_t* out, int width) {
int i; int i;
__m128i last; __m128i last;
out[0] = in[0] + (prev == NULL ? 0 : prev[0]); out[0] = (uint8_t)(in[0] + (prev == NULL ? 0 : prev[0]));
if (width <= 1) return; if (width <= 1) return;
last = _mm_set_epi32(0, 0, 0, out[0]); last = _mm_set_epi32(0, 0, 0, out[0]);
for (i = 1; i + 8 <= width; i += 8) { for (i = 1; i + 8 <= width; i += 8) {
@ -238,7 +239,7 @@ static void HorizontalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
_mm_storel_epi64((__m128i*)(out + i), A7); _mm_storel_epi64((__m128i*)(out + i), A7);
last = _mm_srli_epi64(A7, 56); last = _mm_srli_epi64(A7, 56);
} }
for (; i < width; ++i) out[i] = in[i] + out[i - 1]; for (; i < width; ++i) out[i] = (uint8_t)(in[i] + out[i - 1]);
} }
static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in, static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
@ -259,7 +260,7 @@ static void VerticalUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
_mm_storeu_si128((__m128i*)&out[i + 0], C0); _mm_storeu_si128((__m128i*)&out[i + 0], C0);
_mm_storeu_si128((__m128i*)&out[i + 16], C1); _mm_storeu_si128((__m128i*)&out[i + 16], C1);
} }
for (; i < width; ++i) out[i] = in[i] + prev[i]; for (; i < width; ++i) out[i] = (uint8_t)(in[i] + prev[i]);
} }
} }
@ -296,7 +297,8 @@ static void GradientPredictInverse_SSE2(const uint8_t* const in,
_mm_storel_epi64((__m128i*)&row[i], out); _mm_storel_epi64((__m128i*)&row[i], out);
} }
for (; i < length; ++i) { for (; i < length; ++i) {
row[i] = in[i] + GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]); const int delta = GradientPredictor_SSE2(row[i - 1], top[i], top[i - 1]);
row[i] = (uint8_t)(in[i] + delta);
} }
} }
} }
@ -306,7 +308,7 @@ static void GradientUnfilter_SSE2(const uint8_t* prev, const uint8_t* in,
if (prev == NULL) { if (prev == NULL) {
HorizontalUnfilter_SSE2(NULL, in, out, width); HorizontalUnfilter_SSE2(NULL, in, out, width);
} else { } else {
out[0] = in[0] + prev[0]; // predict from above out[0] = (uint8_t)(in[0] + prev[0]); // predict from above
GradientPredictInverse_SSE2(in + 1, prev + 1, out + 1, width - 1); GradientPredictInverse_SSE2(in + 1, prev + 1, out + 1, width - 1);
} }
} }

View File

@ -81,7 +81,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
// gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is // gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
// inlined. // inlined.
#if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409 #if defined(__arm__) && defined(__GNUC__) && LOCAL_GCC_VERSION <= 0x409
# define LOCAL_INLINE __attribute__ ((noinline)) # define LOCAL_INLINE __attribute__ ((noinline))
#else #else
# define LOCAL_INLINE WEBP_INLINE # define LOCAL_INLINE WEBP_INLINE
@ -167,15 +167,20 @@ static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
return pred; return pred;
} }
GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C) static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int x;
(void)upper;
for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
}
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper, static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) { int num_pixels, uint32_t* out) {
int i; int i;
uint32_t left = out[-1]; uint32_t left = out[-1];
(void)upper;
for (i = 0; i < num_pixels; ++i) { for (i = 0; i < num_pixels; ++i) {
out[i] = left = VP8LAddPixels(in[i], left); out[i] = left = VP8LAddPixels(in[i], left);
} }
(void)upper;
} }
GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C) GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C) GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)
@ -270,14 +275,14 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m,
int i; int i;
for (i = 0; i < num_pixels; ++i) { for (i = 0; i < num_pixels; ++i) {
const uint32_t argb = src[i]; const uint32_t argb = src[i];
const uint32_t green = argb >> 8; const int8_t green = (int8_t)(argb >> 8);
const uint32_t red = argb >> 16; const uint32_t red = argb >> 16;
int new_red = red & 0xff; int new_red = red & 0xff;
int new_blue = argb & 0xff; int new_blue = argb & 0xff;
new_red += ColorTransformDelta(m->green_to_red_, green); new_red += ColorTransformDelta(m->green_to_red_, green);
new_red &= 0xff; new_red &= 0xff;
new_blue += ColorTransformDelta(m->green_to_blue_, green); new_blue += ColorTransformDelta(m->green_to_blue_, green);
new_blue += ColorTransformDelta(m->red_to_blue_, new_red); new_blue += ColorTransformDelta(m->red_to_blue_, (int8_t)new_red);
new_blue &= 0xff; new_blue &= 0xff;
dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
} }

View File

@ -177,6 +177,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* out) { \ int num_pixels, uint32_t* out) { \
int x; \ int x; \
assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \ for (x = 0; x < num_pixels; ++x) { \
const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \ const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \
out[x] = VP8LAddPixels(in[x], pred); \ out[x] = VP8LAddPixels(in[x], pred); \
@ -189,6 +190,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
static void PREDICTOR_SUB(const uint32_t* in, const uint32_t* upper, \ static void PREDICTOR_SUB(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* out) { \ int num_pixels, uint32_t* out) { \
int x; \ int x; \
assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \ for (x = 0; x < num_pixels; ++x) { \
const uint32_t pred = (PREDICTOR)(in[x - 1], upper + x); \ const uint32_t pred = (PREDICTOR)(in[x - 1], upper + x); \
out[x] = VP8LSubPixels(in[x], pred); \ out[x] = VP8LSubPixels(in[x], pred); \

View File

@ -515,13 +515,17 @@ static WEBP_INLINE int ColorTransformDelta(int8_t color_pred, int8_t color) {
return ((int)color_pred * color) >> 5; return ((int)color_pred * color) >> 5;
} }
static WEBP_INLINE int8_t U32ToS8(uint32_t v) {
return (int8_t)(v & 0xff);
}
void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data, void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
int num_pixels) { int num_pixels) {
int i; int i;
for (i = 0; i < num_pixels; ++i) { for (i = 0; i < num_pixels; ++i) {
const uint32_t argb = data[i]; const uint32_t argb = data[i];
const uint32_t green = argb >> 8; const int8_t green = U32ToS8(argb >> 8);
const uint32_t red = argb >> 16; const int8_t red = U32ToS8(argb >> 16);
int new_red = red & 0xff; int new_red = red & 0xff;
int new_blue = argb & 0xff; int new_blue = argb & 0xff;
new_red -= ColorTransformDelta(m->green_to_red_, green); new_red -= ColorTransformDelta(m->green_to_red_, green);
@ -535,7 +539,7 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data,
static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
uint32_t argb) { uint32_t argb) {
const uint32_t green = argb >> 8; const int8_t green = U32ToS8(argb >> 8);
int new_red = argb >> 16; int new_red = argb >> 16;
new_red -= ColorTransformDelta(green_to_red, green); new_red -= ColorTransformDelta(green_to_red, green);
return (new_red & 0xff); return (new_red & 0xff);
@ -544,9 +548,9 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
uint8_t red_to_blue, uint8_t red_to_blue,
uint32_t argb) { uint32_t argb) {
const uint32_t green = argb >> 8; const int8_t green = U32ToS8(argb >> 8);
const uint32_t red = argb >> 16; const int8_t red = U32ToS8(argb >> 16);
uint8_t new_blue = argb; uint8_t new_blue = argb & 0xff;
new_blue -= ColorTransformDelta(green_to_blue, green); new_blue -= ColorTransformDelta(green_to_blue, green);
new_blue -= ColorTransformDelta(red_to_blue, red); new_blue -= ColorTransformDelta(red_to_blue, red);
return (new_blue & 0xff); return (new_blue & 0xff);
@ -558,7 +562,7 @@ void VP8LCollectColorRedTransforms_C(const uint32_t* argb, int stride,
while (tile_height-- > 0) { while (tile_height-- > 0) {
int x; int x;
for (x = 0; x < tile_width; ++x) { for (x = 0; x < tile_width; ++x) {
++histo[TransformColorRed(green_to_red, argb[x])]; ++histo[TransformColorRed((uint8_t)green_to_red, argb[x])];
} }
argb += stride; argb += stride;
} }
@ -571,7 +575,8 @@ void VP8LCollectColorBlueTransforms_C(const uint32_t* argb, int stride,
while (tile_height-- > 0) { while (tile_height-- > 0) {
int x; int x;
for (x = 0; x < tile_width; ++x) { for (x = 0; x < tile_width; ++x) {
++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[x])]; ++histo[TransformColorBlue((uint8_t)green_to_blue, (uint8_t)red_to_blue,
argb[x])];
} }
argb += stride; argb += stride;
} }

View File

@ -363,7 +363,7 @@ static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits,
assert(xbits <= 3); assert(xbits <= 3);
switch (xbits) { switch (xbits) {
case 0: { case 0: {
const __m128i ff = _mm_set1_epi16(0xff00); const __m128i ff = _mm_set1_epi16((short)0xff00);
const __m128i zero = _mm_setzero_si128(); const __m128i zero = _mm_setzero_si128();
// Store 0xff000000 | (row[x] << 8). // Store 0xff000000 | (row[x] << 8).
for (x = 0; x + 16 <= width; x += 16, dst += 16) { for (x = 0; x + 16 <= width; x += 16, dst += 16) {
@ -382,7 +382,7 @@ static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits,
break; break;
} }
case 1: { case 1: {
const __m128i ff = _mm_set1_epi16(0xff00); const __m128i ff = _mm_set1_epi16((short)0xff00);
const __m128i mul = _mm_set1_epi16(0x110); const __m128i mul = _mm_set1_epi16(0x110);
for (x = 0; x + 16 <= width; x += 16, dst += 8) { for (x = 0; x + 16 <= width; x += 16, dst += 8) {
// 0a0b | (where a/b are 4 bits). // 0a0b | (where a/b are 4 bits).
@ -455,8 +455,9 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
_mm_storeu_si128((__m128i*)&out[i], res); _mm_storeu_si128((__m128i*)&out[i], res);
} }
if (i != num_pixels) { if (i != num_pixels) {
VP8LPredictorsSub_C[0](in + i, upper + i, num_pixels - i, out + i); VP8LPredictorsSub_C[0](in + i, NULL, num_pixels - i, out + i);
} }
(void)upper;
} }
#define GENERATE_PREDICTOR_1(X, IN) \ #define GENERATE_PREDICTOR_1(X, IN) \

View File

@ -51,9 +51,9 @@ static void CollectColorBlueTransforms_SSE41(const uint32_t* argb, int stride,
int histo[]) { int histo[]) {
const __m128i mults_r = _mm_set1_epi16(CST_5b(red_to_blue)); const __m128i mults_r = _mm_set1_epi16(CST_5b(red_to_blue));
const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_blue)); const __m128i mults_g = _mm_set1_epi16(CST_5b(green_to_blue));
const __m128i mask_g = _mm_set1_epi16(0xff00); // green mask const __m128i mask_g = _mm_set1_epi16((short)0xff00); // green mask
const __m128i mask_gb = _mm_set1_epi32(0xffff); // green/blue mask const __m128i mask_gb = _mm_set1_epi32(0xffff); // green/blue mask
const __m128i mask_b = _mm_set1_epi16(0x00ff); // blue mask const __m128i mask_b = _mm_set1_epi16(0x00ff); // blue mask
const __m128i shuffler_lo = _mm_setr_epi8(-1, 2, -1, 6, -1, 10, -1, 14, -1, const __m128i shuffler_lo = _mm_setr_epi8(-1, 2, -1, 6, -1, 10, -1, 14, -1,
-1, -1, -1, -1, -1, -1, -1); -1, -1, -1, -1, -1, -1, -1);
const __m128i shuffler_hi = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, const __m128i shuffler_hi = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1,

View File

@ -191,8 +191,9 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
_mm_storeu_si128((__m128i*)&out[i], res); _mm_storeu_si128((__m128i*)&out[i], res);
} }
if (i != num_pixels) { if (i != num_pixels) {
VP8LPredictorsAdd_C[0](in + i, upper + i, num_pixels - i, out + i); VP8LPredictorsAdd_C[0](in + i, NULL, num_pixels - i, out + i);
} }
(void)upper;
} }
// Predictor1: left. // Predictor1: left.

View File

@ -10,6 +10,8 @@
#ifndef WEBP_DSP_QUANT_H_ #ifndef WEBP_DSP_QUANT_H_
#define WEBP_DSP_QUANT_H_ #define WEBP_DSP_QUANT_H_
#include <string.h>
#include "src/dsp/dsp.h" #include "src/dsp/dsp.h"
#include "src/webp/types.h" #include "src/webp/types.h"
@ -67,4 +69,17 @@ static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
#endif // defined(WEBP_USE_NEON) && !defined(WEBP_ANDROID_NEON) && #endif // defined(WEBP_USE_NEON) && !defined(WEBP_ANDROID_NEON) &&
// !defined(WEBP_HAVE_NEON_RTCD) // !defined(WEBP_HAVE_NEON_RTCD)
static WEBP_INLINE int IsFlatSource16(const uint8_t* src) {
const uint32_t v = src[0] * 0x01010101u;
int i;
for (i = 0; i < 16; ++i) {
if (memcmp(src + 0, &v, 4) || memcmp(src + 4, &v, 4) ||
memcmp(src + 8, &v, 4) || memcmp(src + 12, &v, 4)) {
return 0;
}
src += BPS;
}
return 1;
}
#endif // WEBP_DSP_QUANT_H_ #endif // WEBP_DSP_QUANT_H_

View File

@ -109,8 +109,7 @@ void WebPRescalerExportRowExpand_C(WebPRescaler* const wrk) {
for (x_out = 0; x_out < x_out_max; ++x_out) { for (x_out = 0; x_out < x_out_max; ++x_out) {
const uint32_t J = frow[x_out]; const uint32_t J = frow[x_out];
const int v = (int)MULT_FIX(J, wrk->fy_scale); const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
} }
} else { } else {
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
@ -120,8 +119,7 @@ void WebPRescalerExportRowExpand_C(WebPRescaler* const wrk) {
+ (uint64_t)B * irow[x_out]; + (uint64_t)B * irow[x_out];
const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX); const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
const int v = (int)MULT_FIX(J, wrk->fy_scale); const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
} }
} }
} }
@ -138,17 +136,15 @@ void WebPRescalerExportRowShrink_C(WebPRescaler* const wrk) {
assert(!wrk->y_expand); assert(!wrk->y_expand);
if (yscale) { if (yscale) {
for (x_out = 0; x_out < x_out_max; ++x_out) { for (x_out = 0; x_out < x_out_max; ++x_out) {
const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale); const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(frow[x_out], yscale);
const int v = (int)MULT_FIX_FLOOR(irow[x_out] - frac, wrk->fxy_scale); const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
irow[x_out] = frac; // new fractional start irow[x_out] = frac; // new fractional start
} }
} else { } else {
for (x_out = 0; x_out < x_out_max; ++x_out) { for (x_out = 0; x_out < x_out_max; ++x_out) {
const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale); const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
irow[x_out] = 0; irow[x_out] = 0;
} }
} }

View File

@ -107,10 +107,9 @@ static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
); );
} }
for (i = 0; i < (x_out_max & 0x3); ++i) { for (i = 0; i < (x_out_max & 0x3); ++i) {
const uint32_t frac = (uint32_t)MULT_FIX(*frow++, yscale); const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(*frow++, yscale);
const int v = (int)MULT_FIX_FLOOR(*irow - frac, wrk->fxy_scale); const int v = (int)MULT_FIX(*irow - frac, wrk->fxy_scale);
assert(v >= 0 && v <= 255); *dst++ = (v > 255) ? 255u : (uint8_t)v;
*dst++ = v;
*irow++ = frac; // new fractional start *irow++ = frac; // new fractional start
} }
} else { } else {
@ -157,8 +156,7 @@ static void ExportRowShrink_MIPSdspR2(WebPRescaler* const wrk) {
} }
for (i = 0; i < (x_out_max & 0x3); ++i) { for (i = 0; i < (x_out_max & 0x3); ++i) {
const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale); const int v = (int)MULT_FIX_FLOOR(*irow, wrk->fxy_scale);
assert(v >= 0 && v <= 255); *dst++ = (v > 255) ? 255u : (uint8_t)v;
*dst++ = v;
*irow++ = 0; *irow++ = 0;
} }
} }
@ -219,8 +217,7 @@ static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
for (i = 0; i < (x_out_max & 0x3); ++i) { for (i = 0; i < (x_out_max & 0x3); ++i) {
const uint32_t J = *frow++; const uint32_t J = *frow++;
const int v = (int)MULT_FIX(J, wrk->fy_scale); const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255); *dst++ = (v > 255) ? 255u : (uint8_t)v;
*dst++ = v;
} }
} else { } else {
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
@ -291,8 +288,7 @@ static void ExportRowExpand_MIPSdspR2(WebPRescaler* const wrk) {
+ (uint64_t)B * *irow++; + (uint64_t)B * *irow++;
const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX); const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
const int v = (int)MULT_FIX(J, wrk->fy_scale); const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255); *dst++ = (v > 255) ? 255u : (uint8_t)v;
*dst++ = v;
} }
} }
} }

View File

@ -166,8 +166,7 @@ static WEBP_INLINE void ExportRowExpand_0(const uint32_t* frow, uint8_t* dst,
for (x_out = 0; x_out < length; ++x_out) { for (x_out = 0; x_out < length; ++x_out) {
const uint32_t J = frow[x_out]; const uint32_t J = frow[x_out];
const int v = (int)MULT_FIX(J, wrk->fy_scale); const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
} }
} }
} }
@ -241,8 +240,7 @@ static WEBP_INLINE void ExportRowExpand_1(const uint32_t* frow, uint32_t* irow,
+ (uint64_t)B * irow[x_out]; + (uint64_t)B * irow[x_out];
const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX); const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
const int v = (int)MULT_FIX(J, wrk->fy_scale); const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
} }
} }
} }
@ -342,10 +340,9 @@ static WEBP_INLINE void ExportRowShrink_0(const uint32_t* frow, uint32_t* irow,
length -= 4; length -= 4;
} }
for (x_out = 0; x_out < length; ++x_out) { for (x_out = 0; x_out < length; ++x_out) {
const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale); const uint32_t frac = (uint32_t)MULT_FIX_FLOOR(frow[x_out], yscale);
const int v = (int)MULT_FIX_FLOOR(irow[x_out] - frac, wrk->fxy_scale); const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
irow[x_out] = frac; irow[x_out] = frac;
} }
} }
@ -406,8 +403,7 @@ static WEBP_INLINE void ExportRowShrink_1(uint32_t* irow, uint8_t* dst,
} }
for (x_out = 0; x_out < length; ++x_out) { for (x_out = 0; x_out < length; ++x_out) {
const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale); const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
irow[x_out] = 0; irow[x_out] = 0;
} }
} }

View File

@ -81,14 +81,13 @@ static void RescalerExportRowExpand_NEON(WebPRescaler* const wrk) {
const uint32x4_t B1 = MULT_FIX(A1, fy_scale_half); const uint32x4_t B1 = MULT_FIX(A1, fy_scale_half);
const uint16x4_t C0 = vmovn_u32(B0); const uint16x4_t C0 = vmovn_u32(B0);
const uint16x4_t C1 = vmovn_u32(B1); const uint16x4_t C1 = vmovn_u32(B1);
const uint8x8_t D = vmovn_u16(vcombine_u16(C0, C1)); const uint8x8_t D = vqmovn_u16(vcombine_u16(C0, C1));
vst1_u8(dst + x_out, D); vst1_u8(dst + x_out, D);
} }
for (; x_out < x_out_max; ++x_out) { for (; x_out < x_out_max; ++x_out) {
const uint32_t J = frow[x_out]; const uint32_t J = frow[x_out];
const int v = (int)MULT_FIX_C(J, fy_scale); const int v = (int)MULT_FIX_C(J, fy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
} }
} else { } else {
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
@ -102,7 +101,7 @@ static void RescalerExportRowExpand_NEON(WebPRescaler* const wrk) {
const uint32x4_t D1 = MULT_FIX(C1, fy_scale_half); const uint32x4_t D1 = MULT_FIX(C1, fy_scale_half);
const uint16x4_t E0 = vmovn_u32(D0); const uint16x4_t E0 = vmovn_u32(D0);
const uint16x4_t E1 = vmovn_u32(D1); const uint16x4_t E1 = vmovn_u32(D1);
const uint8x8_t F = vmovn_u16(vcombine_u16(E0, E1)); const uint8x8_t F = vqmovn_u16(vcombine_u16(E0, E1));
vst1_u8(dst + x_out, F); vst1_u8(dst + x_out, F);
} }
for (; x_out < x_out_max; ++x_out) { for (; x_out < x_out_max; ++x_out) {
@ -110,8 +109,7 @@ static void RescalerExportRowExpand_NEON(WebPRescaler* const wrk) {
+ (uint64_t)B * irow[x_out]; + (uint64_t)B * irow[x_out];
const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX); const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
const int v = (int)MULT_FIX_C(J, fy_scale); const int v = (int)MULT_FIX_C(J, fy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
} }
} }
} }
@ -135,23 +133,22 @@ static void RescalerExportRowShrink_NEON(WebPRescaler* const wrk) {
for (x_out = 0; x_out < max_span; x_out += 8) { for (x_out = 0; x_out < max_span; x_out += 8) {
LOAD_32x8(frow + x_out, in0, in1); LOAD_32x8(frow + x_out, in0, in1);
LOAD_32x8(irow + x_out, in2, in3); LOAD_32x8(irow + x_out, in2, in3);
const uint32x4_t A0 = MULT_FIX(in0, yscale_half); const uint32x4_t A0 = MULT_FIX_FLOOR(in0, yscale_half);
const uint32x4_t A1 = MULT_FIX(in1, yscale_half); const uint32x4_t A1 = MULT_FIX_FLOOR(in1, yscale_half);
const uint32x4_t B0 = vqsubq_u32(in2, A0); const uint32x4_t B0 = vqsubq_u32(in2, A0);
const uint32x4_t B1 = vqsubq_u32(in3, A1); const uint32x4_t B1 = vqsubq_u32(in3, A1);
const uint32x4_t C0 = MULT_FIX_FLOOR(B0, fxy_scale_half); const uint32x4_t C0 = MULT_FIX(B0, fxy_scale_half);
const uint32x4_t C1 = MULT_FIX_FLOOR(B1, fxy_scale_half); const uint32x4_t C1 = MULT_FIX(B1, fxy_scale_half);
const uint16x4_t D0 = vmovn_u32(C0); const uint16x4_t D0 = vmovn_u32(C0);
const uint16x4_t D1 = vmovn_u32(C1); const uint16x4_t D1 = vmovn_u32(C1);
const uint8x8_t E = vmovn_u16(vcombine_u16(D0, D1)); const uint8x8_t E = vqmovn_u16(vcombine_u16(D0, D1));
vst1_u8(dst + x_out, E); vst1_u8(dst + x_out, E);
STORE_32x8(A0, A1, irow + x_out); STORE_32x8(A0, A1, irow + x_out);
} }
for (; x_out < x_out_max; ++x_out) { for (; x_out < x_out_max; ++x_out) {
const uint32_t frac = (uint32_t)MULT_FIX_C(frow[x_out], yscale); const uint32_t frac = (uint32_t)MULT_FIX_FLOOR_C(frow[x_out], yscale);
const int v = (int)MULT_FIX_FLOOR_C(irow[x_out] - frac, fxy_scale); const int v = (int)MULT_FIX_C(irow[x_out] - frac, fxy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
irow[x_out] = frac; // new fractional start irow[x_out] = frac; // new fractional start
} }
} else { } else {
@ -161,14 +158,13 @@ static void RescalerExportRowShrink_NEON(WebPRescaler* const wrk) {
const uint32x4_t A1 = MULT_FIX(in1, fxy_scale_half); const uint32x4_t A1 = MULT_FIX(in1, fxy_scale_half);
const uint16x4_t B0 = vmovn_u32(A0); const uint16x4_t B0 = vmovn_u32(A0);
const uint16x4_t B1 = vmovn_u32(A1); const uint16x4_t B1 = vmovn_u32(A1);
const uint8x8_t C = vmovn_u16(vcombine_u16(B0, B1)); const uint8x8_t C = vqmovn_u16(vcombine_u16(B0, B1));
vst1_u8(dst + x_out, C); vst1_u8(dst + x_out, C);
STORE_32x8(zero, zero, irow + x_out); STORE_32x8(zero, zero, irow + x_out);
} }
for (; x_out < x_out_max; ++x_out) { for (; x_out < x_out_max; ++x_out) {
const int v = (int)MULT_FIX_C(irow[x_out], fxy_scale); const int v = (int)MULT_FIX_C(irow[x_out], fxy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
irow[x_out] = 0; irow[x_out] = 0;
} }
} }

View File

@ -225,35 +225,6 @@ static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0,
_mm_storel_epi64((__m128i*)dst, G); _mm_storel_epi64((__m128i*)dst, G);
} }
static WEBP_INLINE void ProcessRow_Floor_SSE2(const __m128i* const A0,
const __m128i* const A1,
const __m128i* const A2,
const __m128i* const A3,
const __m128i* const mult,
uint8_t* const dst) {
const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0);
const __m128i B0 = _mm_mul_epu32(*A0, *mult);
const __m128i B1 = _mm_mul_epu32(*A1, *mult);
const __m128i B2 = _mm_mul_epu32(*A2, *mult);
const __m128i B3 = _mm_mul_epu32(*A3, *mult);
const __m128i D0 = _mm_srli_epi64(B0, WEBP_RESCALER_RFIX);
const __m128i D1 = _mm_srli_epi64(B1, WEBP_RESCALER_RFIX);
#if (WEBP_RESCALER_RFIX < 32)
const __m128i D2 =
_mm_and_si128(_mm_slli_epi64(B2, 32 - WEBP_RESCALER_RFIX), mask);
const __m128i D3 =
_mm_and_si128(_mm_slli_epi64(B3, 32 - WEBP_RESCALER_RFIX), mask);
#else
const __m128i D2 = _mm_and_si128(B2, mask);
const __m128i D3 = _mm_and_si128(B3, mask);
#endif
const __m128i E0 = _mm_or_si128(D0, D2);
const __m128i E1 = _mm_or_si128(D1, D3);
const __m128i F = _mm_packs_epi32(E0, E1);
const __m128i G = _mm_packus_epi16(F, F);
_mm_storel_epi64((__m128i*)dst, G);
}
static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) { static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) {
int x_out; int x_out;
uint8_t* const dst = wrk->dst; uint8_t* const dst = wrk->dst;
@ -274,8 +245,7 @@ static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) {
for (; x_out < x_out_max; ++x_out) { for (; x_out < x_out_max; ++x_out) {
const uint32_t J = frow[x_out]; const uint32_t J = frow[x_out];
const int v = (int)MULT_FIX(J, wrk->fy_scale); const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
} }
} else { } else {
const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub);
@ -308,8 +278,7 @@ static void RescalerExportRowExpand_SSE2(WebPRescaler* const wrk) {
+ (uint64_t)B * irow[x_out]; + (uint64_t)B * irow[x_out];
const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX); const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX);
const int v = (int)MULT_FIX(J, wrk->fy_scale); const int v = (int)MULT_FIX(J, wrk->fy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
} }
} }
} }
@ -328,20 +297,15 @@ static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) {
const int scale_xy = wrk->fxy_scale; const int scale_xy = wrk->fxy_scale;
const __m128i mult_xy = _mm_set_epi32(0, scale_xy, 0, scale_xy); const __m128i mult_xy = _mm_set_epi32(0, scale_xy, 0, scale_xy);
const __m128i mult_y = _mm_set_epi32(0, yscale, 0, yscale); const __m128i mult_y = _mm_set_epi32(0, yscale, 0, yscale);
const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER);
for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) { for (x_out = 0; x_out + 8 <= x_out_max; x_out += 8) {
__m128i A0, A1, A2, A3, B0, B1, B2, B3; __m128i A0, A1, A2, A3, B0, B1, B2, B3;
LoadDispatchAndMult_SSE2(irow + x_out, NULL, &A0, &A1, &A2, &A3); LoadDispatchAndMult_SSE2(irow + x_out, NULL, &A0, &A1, &A2, &A3);
LoadDispatchAndMult_SSE2(frow + x_out, &mult_y, &B0, &B1, &B2, &B3); LoadDispatchAndMult_SSE2(frow + x_out, &mult_y, &B0, &B1, &B2, &B3);
{ {
const __m128i C0 = _mm_add_epi64(B0, rounder); const __m128i D0 = _mm_srli_epi64(B0, WEBP_RESCALER_RFIX); // = frac
const __m128i C1 = _mm_add_epi64(B1, rounder); const __m128i D1 = _mm_srli_epi64(B1, WEBP_RESCALER_RFIX);
const __m128i C2 = _mm_add_epi64(B2, rounder); const __m128i D2 = _mm_srli_epi64(B2, WEBP_RESCALER_RFIX);
const __m128i C3 = _mm_add_epi64(B3, rounder); const __m128i D3 = _mm_srli_epi64(B3, WEBP_RESCALER_RFIX);
const __m128i D0 = _mm_srli_epi64(C0, WEBP_RESCALER_RFIX); // = frac
const __m128i D1 = _mm_srli_epi64(C1, WEBP_RESCALER_RFIX);
const __m128i D2 = _mm_srli_epi64(C2, WEBP_RESCALER_RFIX);
const __m128i D3 = _mm_srli_epi64(C3, WEBP_RESCALER_RFIX);
const __m128i E0 = _mm_sub_epi64(A0, D0); // irow[x] - frac const __m128i E0 = _mm_sub_epi64(A0, D0); // irow[x] - frac
const __m128i E1 = _mm_sub_epi64(A1, D1); const __m128i E1 = _mm_sub_epi64(A1, D1);
const __m128i E2 = _mm_sub_epi64(A2, D2); const __m128i E2 = _mm_sub_epi64(A2, D2);
@ -352,14 +316,13 @@ static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) {
const __m128i G1 = _mm_or_si128(D1, F3); const __m128i G1 = _mm_or_si128(D1, F3);
_mm_storeu_si128((__m128i*)(irow + x_out + 0), G0); _mm_storeu_si128((__m128i*)(irow + x_out + 0), G0);
_mm_storeu_si128((__m128i*)(irow + x_out + 4), G1); _mm_storeu_si128((__m128i*)(irow + x_out + 4), G1);
ProcessRow_Floor_SSE2(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out); ProcessRow_SSE2(&E0, &E1, &E2, &E3, &mult_xy, dst + x_out);
} }
} }
for (; x_out < x_out_max; ++x_out) { for (; x_out < x_out_max; ++x_out) {
const uint32_t frac = (int)MULT_FIX(frow[x_out], yscale); const uint32_t frac = (int)MULT_FIX_FLOOR(frow[x_out], yscale);
const int v = (int)MULT_FIX_FLOOR(irow[x_out] - frac, wrk->fxy_scale); const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
irow[x_out] = frac; // new fractional start irow[x_out] = frac; // new fractional start
} }
} else { } else {
@ -375,8 +338,7 @@ static void RescalerExportRowShrink_SSE2(WebPRescaler* const wrk) {
} }
for (; x_out < x_out_max; ++x_out) { for (; x_out < x_out_max; ++x_out) {
const int v = (int)MULT_FIX(irow[x_out], scale); const int v = (int)MULT_FIX(irow[x_out], scale);
assert(v >= 0 && v <= 255); dst[x_out] = (v > 255) ? 255u : (uint8_t)v;
dst[x_out] = v;
irow[x_out] = 0; irow[x_out] = 0;
} }
} }

View File

@ -576,9 +576,9 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \
const uint32_t l_uv = ((cur_u[0]) | ((cur_v[0]) << 16)); \ const uint32_t l_uv = ((cur_u[0]) | ((cur_v[0]) << 16)); \
const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \ const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \
const uint8_t* ptop_y = &top_y[1]; \ const uint8_t* ptop_y = &top_y[1]; \
uint8_t *ptop_dst = top_dst + XSTEP; \ uint8_t* ptop_dst = top_dst + XSTEP; \
const uint8_t* pbot_y = &bot_y[1]; \ const uint8_t* pbot_y = &bot_y[1]; \
uint8_t *pbot_dst = bot_dst + XSTEP; \ uint8_t* pbot_dst = bot_dst + XSTEP; \
\ \
FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \ FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \
if (bot_y != NULL) { \ if (bot_y != NULL) { \

View File

@ -58,8 +58,8 @@
} while (0) } while (0)
// Turn the macro into a function for reducing code-size when non-critical // Turn the macro into a function for reducing code-size when non-critical
static void Upsample16Pixels_NEON(const uint8_t *r1, const uint8_t *r2, static void Upsample16Pixels_NEON(const uint8_t* r1, const uint8_t* r2,
uint8_t *out) { uint8_t* out) {
UPSAMPLE_16PIXELS(r1, r2, out); UPSAMPLE_16PIXELS(r1, r2, out);
} }
@ -190,14 +190,14 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
} }
#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \ #define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \
static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
const uint8_t *top_u, const uint8_t *top_v, \ const uint8_t* top_u, const uint8_t* top_v, \
const uint8_t *cur_u, const uint8_t *cur_v, \ const uint8_t* cur_u, const uint8_t* cur_v, \
uint8_t *top_dst, uint8_t *bottom_dst, int len) { \ uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
int block; \ int block; \
/* 16 byte aligned array to cache reconstructed u and v */ \ /* 16 byte aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[2 * 32 + 15]; \ uint8_t uv_buf[2 * 32 + 15]; \
uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \ uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
const int uv_len = (len + 1) >> 1; \ const int uv_len = (len + 1) >> 1; \
/* 9 pixels must be read-able for each block */ \ /* 9 pixels must be read-able for each block */ \
const int num_blocks = (uv_len - 1) >> 3; \ const int num_blocks = (uv_len - 1) >> 3; \

View File

@ -191,13 +191,14 @@ void VP8LHashChainClear(VP8LHashChain* const p) {
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
#define HASH_MULTIPLIER_HI (0xc6a4a793ULL) static const uint32_t kHashMultiplierHi = 0xc6a4a793u;
#define HASH_MULTIPLIER_LO (0x5bd1e996ULL) static const uint32_t kHashMultiplierLo = 0x5bd1e996u;
static WEBP_INLINE uint32_t GetPixPairHash64(const uint32_t* const argb) { static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
uint32_t GetPixPairHash64(const uint32_t* const argb) {
uint32_t key; uint32_t key;
key = (argb[1] * HASH_MULTIPLIER_HI) & 0xffffffffu; key = argb[1] * kHashMultiplierHi;
key += (argb[0] * HASH_MULTIPLIER_LO) & 0xffffffffu; key += argb[0] * kHashMultiplierLo;
key = key >> (32 - HASH_BITS); key = key >> (32 - HASH_BITS);
return key; return key;
} }

View File

@ -641,7 +641,7 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo,
// Merges some histograms with same bin_id together if it's advantageous. // Merges some histograms with same bin_id together if it's advantageous.
// Sets the remaining histograms to NULL. // Sets the remaining histograms to NULL.
static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo, static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo,
int *num_used, int* num_used,
const uint16_t* const clusters, const uint16_t* const clusters,
uint16_t* const cluster_mappings, uint16_t* const cluster_mappings,
VP8LHistogram* cur_combo, VP8LHistogram* cur_combo,
@ -929,9 +929,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo,
} }
mappings = (int*) WebPSafeMalloc(*num_used, sizeof(*mappings)); mappings = (int*) WebPSafeMalloc(*num_used, sizeof(*mappings));
if (mappings == NULL || !HistoQueueInit(&histo_queue, kHistoQueueSize)) { if (mappings == NULL) return 0;
goto End; if (!HistoQueueInit(&histo_queue, kHistoQueueSize)) goto End;
}
// Fill the initial mapping. // Fill the initial mapping.
for (j = 0, iter = 0; iter < image_histo->size; ++iter) { for (j = 0, iter = 0; iter < image_histo->size; ++iter) {
if (histograms[iter] == NULL) continue; if (histograms[iter] == NULL) continue;

View File

@ -29,11 +29,15 @@
#define USE_INVERSE_ALPHA_TABLE #define USE_INVERSE_ALPHA_TABLE
#ifdef WORDS_BIGENDIAN #ifdef WORDS_BIGENDIAN
#define ALPHA_OFFSET 0 // uint32_t 0xff000000 is 0xff,00,00,00 in memory // uint32_t 0xff000000 is 0xff,00,00,00 in memory
#define CHANNEL_OFFSET(i) (i)
#else #else
#define ALPHA_OFFSET 3 // uint32_t 0xff000000 is 0x00,00,00,ff in memory // uint32_t 0xff000000 is 0x00,00,00,ff in memory
#define CHANNEL_OFFSET(i) (3-(i))
#endif #endif
#define ALPHA_OFFSET CHANNEL_OFFSET(0)
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Detection of non-trivial transparency // Detection of non-trivial transparency
@ -997,10 +1001,10 @@ static int PictureARGBToYUVA(WebPPicture* picture, WebPEncCSP colorspace,
return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION);
} else { } else {
const uint8_t* const argb = (const uint8_t*)picture->argb; const uint8_t* const argb = (const uint8_t*)picture->argb;
const uint8_t* const a = argb + (0 ^ ALPHA_OFFSET); const uint8_t* const a = argb + CHANNEL_OFFSET(0);
const uint8_t* const r = argb + (1 ^ ALPHA_OFFSET); const uint8_t* const r = argb + CHANNEL_OFFSET(1);
const uint8_t* const g = argb + (2 ^ ALPHA_OFFSET); const uint8_t* const g = argb + CHANNEL_OFFSET(2);
const uint8_t* const b = argb + (3 ^ ALPHA_OFFSET); const uint8_t* const b = argb + CHANNEL_OFFSET(3);
picture->colorspace = WEBP_YUV420; picture->colorspace = WEBP_YUV420;
return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride, return ImportYUVAFromRGBA(r, g, b, a, 4, 4 * picture->argb_stride,
@ -1050,7 +1054,7 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
const int height = picture->height; const int height = picture->height;
const int argb_stride = 4 * picture->argb_stride; const int argb_stride = 4 * picture->argb_stride;
uint8_t* dst = (uint8_t*)picture->argb; uint8_t* dst = (uint8_t*)picture->argb;
const uint8_t *cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y; const uint8_t* cur_u = picture->u, *cur_v = picture->v, *cur_y = picture->y;
WebPUpsampleLinePairFunc upsample = WebPUpsampleLinePairFunc upsample =
WebPGetLinePairConverter(ALPHA_OFFSET > 0); WebPGetLinePairConverter(ALPHA_OFFSET > 0);

View File

@ -202,7 +202,7 @@ static uint32_t NearLossless(uint32_t value, uint32_t predict,
} }
if ((value >> 24) == 0 || (value >> 24) == 0xff) { if ((value >> 24) == 0 || (value >> 24) == 0xff) {
// Preserve transparency of fully transparent or fully opaque pixels. // Preserve transparency of fully transparent or fully opaque pixels.
a = NearLosslessDiff(value >> 24, predict >> 24); a = NearLosslessDiff((value >> 24) & 0xff, (predict >> 24) & 0xff);
} else { } else {
a = NearLosslessComponent(value >> 24, predict >> 24, 0xff, quantization); a = NearLosslessComponent(value >> 24, predict >> 24, 0xff, quantization);
} }
@ -215,12 +215,12 @@ static uint32_t NearLossless(uint32_t value, uint32_t predict,
// The amount by which green has been adjusted during quantization. It is // The amount by which green has been adjusted during quantization. It is
// subtracted from red and blue for compensation, to avoid accumulating two // subtracted from red and blue for compensation, to avoid accumulating two
// quantization errors in them. // quantization errors in them.
green_diff = NearLosslessDiff(new_green, value >> 8); green_diff = NearLosslessDiff(new_green, (value >> 8) & 0xff);
} }
r = NearLosslessComponent(NearLosslessDiff(value >> 16, green_diff), r = NearLosslessComponent(NearLosslessDiff((value >> 16) & 0xff, green_diff),
(predict >> 16) & 0xff, 0xff - new_green, (predict >> 16) & 0xff, 0xff - new_green,
quantization); quantization);
b = NearLosslessComponent(NearLosslessDiff(value, green_diff), b = NearLosslessComponent(NearLosslessDiff(value & 0xff, green_diff),
predict & 0xff, 0xff - new_green, quantization); predict & 0xff, 0xff - new_green, quantization);
return ((uint32_t)a << 24) | ((uint32_t)r << 16) | ((uint32_t)g << 8) | b; return ((uint32_t)a << 24) | ((uint32_t)r << 16) | ((uint32_t)g << 8) | b;
} }
@ -587,7 +587,7 @@ static void GetBestGreenToRed(
} }
} }
} }
best_tx->green_to_red_ = green_to_red_best; best_tx->green_to_red_ = (green_to_red_best & 0xff);
} }
static float GetPredictionCostCrossColorBlue( static float GetPredictionCostCrossColorBlue(
@ -666,8 +666,8 @@ static void GetBestGreenRedToBlue(
break; // out of iter-loop. break; // out of iter-loop.
} }
} }
best_tx->green_to_blue_ = green_to_blue_best; best_tx->green_to_blue_ = green_to_blue_best & 0xff;
best_tx->red_to_blue_ = red_to_blue_best; best_tx->red_to_blue_ = red_to_blue_best & 0xff;
} }
#undef kGreenRedToBlueMaxIters #undef kGreenRedToBlueMaxIters
#undef kGreenRedToBlueNumAxis #undef kGreenRedToBlueNumAxis

View File

@ -33,7 +33,7 @@
// number of non-zero coeffs below which we consider the block very flat // number of non-zero coeffs below which we consider the block very flat
// (and apply a penalty to complex predictions) // (and apply a penalty to complex predictions)
#define FLATNESS_LIMIT_I16 10 // I16 mode #define FLATNESS_LIMIT_I16 0 // I16 mode (special case)
#define FLATNESS_LIMIT_I4 3 // I4 mode #define FLATNESS_LIMIT_I4 3 // I4 mode
#define FLATNESS_LIMIT_UV 2 // UV mode #define FLATNESS_LIMIT_UV 2 // UV mode
#define FLATNESS_PENALTY 140 // roughly ~1bit per block #define FLATNESS_PENALTY 140 // roughly ~1bit per block
@ -988,6 +988,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
VP8ModeScore* rd_cur = &rd_tmp; VP8ModeScore* rd_cur = &rd_tmp;
VP8ModeScore* rd_best = rd; VP8ModeScore* rd_best = rd;
int mode; int mode;
int is_flat = IsFlatSource16(it->yuv_in_ + Y_OFF_ENC);
rd->mode_i16 = -1; rd->mode_i16 = -1;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) { for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
@ -1003,10 +1004,14 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0; tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY)) : 0;
rd_cur->H = VP8FixedCostsI16[mode]; rd_cur->H = VP8FixedCostsI16[mode];
rd_cur->R = VP8GetCostLuma16(it, rd_cur); rd_cur->R = VP8GetCostLuma16(it, rd_cur);
if (mode > 0 && if (is_flat) {
IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) { // refine the first impression (which was in pixel space)
// penalty to avoid flat area to be mispredicted by complex mode is_flat = IsFlat(rd_cur->y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16);
rd_cur->R += FLATNESS_PENALTY * kNumBlocks; if (is_flat) {
// Block is very flat. We put emphasis on the distortion being very low!
rd_cur->D *= 2;
rd_cur->SD *= 2;
}
} }
// Since we always examine Intra16 first, we can overwrite *rd directly. // Since we always examine Intra16 first, we can overwrite *rd directly.
@ -1087,7 +1092,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
: 0; : 0;
rd_tmp.H = mode_costs[mode]; rd_tmp.H = mode_costs[mode];
// Add flatness penalty // Add flatness penalty, to avoid flat area to be mispredicted
// by a complex mode.
if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) { if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
rd_tmp.R = FLATNESS_PENALTY * kNumBlocks; rd_tmp.R = FLATNESS_PENALTY * kNumBlocks;
} else { } else {
@ -1242,11 +1248,19 @@ static void RefineUsingDistortion(VP8EncIterator* const it,
if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) { if (mode > 0 && VP8FixedCostsI16[mode] > bit_limit) {
continue; continue;
} }
if (score < best_score) { if (score < best_score) {
best_mode = mode; best_mode = mode;
best_score = score; best_score = score;
} }
} }
if (it->x_ == 0 || it->y_ == 0) {
// avoid starting a checkerboard resonance from the border. See bug #432.
if (IsFlatSource16(src)) {
best_mode = (it->x_ == 0) ? 0 : 2;
try_both_modes = 0; // stick to i16
}
}
VP8SetIntra16Mode(it, best_mode); VP8SetIntra16Mode(it, best_mode);
// we'll reconstruct later, if i16 mode actually gets selected // we'll reconstruct later, if i16 mode actually gets selected
} }

View File

@ -31,8 +31,8 @@ extern "C" {
// version numbers // version numbers
#define ENC_MAJ_VERSION 1 #define ENC_MAJ_VERSION 1
#define ENC_MIN_VERSION 0 #define ENC_MIN_VERSION 1
#define ENC_REV_VERSION 2 #define ENC_REV_VERSION 0
enum { MAX_LF_LEVELS = 64, // Maximum loop filter level enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
@ -249,7 +249,7 @@ typedef struct {
int percent0_; // saved initial progress percent int percent0_; // saved initial progress percent
DError left_derr_; // left error diffusion (u/v) DError left_derr_; // left error diffusion (u/v)
DError *top_derr_; // top diffusion error - NULL if disabled DError* top_derr_; // top diffusion error - NULL if disabled
uint8_t* y_left_; // left luma samples (addressable from index -1 to 15). uint8_t* y_left_; // left luma samples (addressable from index -1 to 15).
uint8_t* u_left_; // left u samples (addressable from index -1 to 7) uint8_t* u_left_; // left u samples (addressable from index -1 to 7)

View File

@ -28,8 +28,8 @@ extern "C" {
// Defines and constants. // Defines and constants.
#define MUX_MAJ_VERSION 1 #define MUX_MAJ_VERSION 1
#define MUX_MIN_VERSION 0 #define MUX_MIN_VERSION 1
#define MUX_REV_VERSION 2 #define MUX_REV_VERSION 0
// Chunk object. // Chunk object.
typedef struct WebPChunk WebPChunk; typedef struct WebPChunk WebPChunk;

View File

@ -100,7 +100,7 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data,
WebPMuxImage* const wpi) { WebPMuxImage* const wpi) {
const uint8_t* bytes = chunk->data_.bytes; const uint8_t* bytes = chunk->data_.bytes;
size_t size = chunk->data_.size; size_t size = chunk->data_.size;
const uint8_t* const last = bytes + size; const uint8_t* const last = (bytes == NULL) ? NULL : bytes + size;
WebPChunk subchunk; WebPChunk subchunk;
size_t subchunk_size; size_t subchunk_size;
WebPChunk** unknown_chunk_list = &wpi->unknown_; WebPChunk** unknown_chunk_list = &wpi->unknown_;

View File

@ -104,7 +104,8 @@ void VP8LoadNewBytes(VP8BitReader* const br) {
} }
// Read a bit with proba 'prob'. Speed-critical function! // Read a bit with proba 'prob'. Speed-critical function!
static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) { static WEBP_INLINE int VP8GetBit(VP8BitReader* const br,
int prob, const char label[]) {
// Don't move this declaration! It makes a big speed difference to store // Don't move this declaration! It makes a big speed difference to store
// 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
// alter br->range_ value. // alter br->range_ value.
@ -129,13 +130,14 @@ static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
br->bits_ -= shift; br->bits_ -= shift;
} }
br->range_ = range - 1; br->range_ = range - 1;
BT_TRACK(br);
return bit; return bit;
} }
} }
// simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here) // simplified version of VP8GetBit() for prob=0x80 (note shift is always 1 here)
static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
int VP8GetSigned(VP8BitReader* const br, int v) { int VP8GetSigned(VP8BitReader* const br, int v, const char label[]) {
if (br->bits_ < 0) { if (br->bits_ < 0) {
VP8LoadNewBytes(br); VP8LoadNewBytes(br);
} }
@ -148,11 +150,13 @@ int VP8GetSigned(VP8BitReader* const br, int v) {
br->range_ += mask; br->range_ += mask;
br->range_ |= 1; br->range_ |= 1;
br->value_ -= (bit_t)((split + 1) & mask) << pos; br->value_ -= (bit_t)((split + 1) & mask) << pos;
BT_TRACK(br);
return (v ^ mask) - mask; return (v ^ mask) - mask;
} }
} }
static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, int prob) { static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br,
int prob, const char label[]) {
// Don't move this declaration! It makes a big speed difference to store // Don't move this declaration! It makes a big speed difference to store
// 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't // 'range' *before* calling VP8LoadNewBytes(), even if this function doesn't
// alter br->range_ value. // alter br->range_ value.
@ -179,6 +183,7 @@ static WEBP_INLINE int VP8GetBitAlt(VP8BitReader* const br, int prob) {
br->bits_ -= shift; br->bits_ -= shift;
} }
br->range_ = range; br->range_ = range;
BT_TRACK(br);
return bit; return bit;
} }
} }

View File

@ -109,17 +109,18 @@ void VP8LoadFinalBytes(VP8BitReader* const br) {
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Higher-level calls // Higher-level calls
uint32_t VP8GetValue(VP8BitReader* const br, int bits) { uint32_t VP8GetValue(VP8BitReader* const br, int bits, const char label[]) {
uint32_t v = 0; uint32_t v = 0;
while (bits-- > 0) { while (bits-- > 0) {
v |= VP8GetBit(br, 0x80) << bits; v |= VP8GetBit(br, 0x80, label) << bits;
} }
return v; return v;
} }
int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) { int32_t VP8GetSignedValue(VP8BitReader* const br, int bits,
const int value = VP8GetValue(br, bits); const char label[]) {
return VP8Get(br) ? -value : value; const int value = VP8GetValue(br, bits, label);
return VP8Get(br, label) ? -value : value;
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
@ -227,3 +228,78 @@ uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Bit-tracing tool
#if (BITTRACE > 0)
#include <stdlib.h> // for atexit()
#include <stdio.h>
#include <string.h>
#define MAX_NUM_LABELS 32
static struct {
const char* label;
int size;
int count;
} kLabels[MAX_NUM_LABELS];
static int last_label = 0;
static int last_pos = 0;
static const uint8_t* buf_start = NULL;
static int init_done = 0;
static void PrintBitTraces(void) {
int i;
int scale = 1;
int total = 0;
const char* units = "bits";
#if (BITTRACE == 2)
scale = 8;
units = "bytes";
#endif
for (i = 0; i < last_label; ++i) total += kLabels[i].size;
if (total < 1) total = 1; // avoid rounding errors
printf("=== Bit traces ===\n");
for (i = 0; i < last_label; ++i) {
const int skip = 16 - (int)strlen(kLabels[i].label);
const int value = (kLabels[i].size + scale - 1) / scale;
assert(skip > 0);
printf("%s \%*s: %6d %s \t[%5.2f%%] [count: %7d]\n",
kLabels[i].label, skip, "", value, units,
100.f * kLabels[i].size / total,
kLabels[i].count);
}
total = (total + scale - 1) / scale;
printf("Total: %d %s\n", total, units);
}
void BitTrace(const struct VP8BitReader* const br, const char label[]) {
int i, pos;
if (!init_done) {
memset(kLabels, 0, sizeof(kLabels));
atexit(PrintBitTraces);
buf_start = br->buf_;
init_done = 1;
}
pos = (int)(br->buf_ - buf_start) * 8 - br->bits_;
// if there's a too large jump, we've changed partition -> reset counter
if (abs(pos - last_pos) > 32) {
buf_start = br->buf_;
pos = 0;
last_pos = 0;
}
if (br->range_ >= 0x7f) pos += kVP8Log2Range[br->range_ - 0x7f];
for (i = 0; i < last_label; ++i) {
if (!strcmp(label, kLabels[i].label)) break;
}
if (i == MAX_NUM_LABELS) abort(); // overflow!
kLabels[i].label = label;
kLabels[i].size += pos - last_pos;
kLabels[i].count += 1;
if (i == last_label) ++last_label;
last_pos = pos;
}
#endif // BITTRACE > 0
//------------------------------------------------------------------------------

View File

@ -21,6 +21,27 @@
#endif #endif
#include "src/webp/types.h" #include "src/webp/types.h"
// Warning! This macro triggers quite some MACRO wizardry around func signature!
#if !defined(BITTRACE)
#define BITTRACE 0 // 0 = off, 1 = print bits, 2 = print bytes
#endif
#if (BITTRACE > 0)
struct VP8BitReader;
extern void BitTrace(const struct VP8BitReader* const br, const char label[]);
#define BT_TRACK(br) BitTrace(br, label)
#define VP8Get(BR, L) VP8GetValue(BR, 1, L)
#else
#define BT_TRACK(br)
// We'll REMOVE the 'const char label[]' from all signatures and calls (!!):
#define VP8GetValue(BR, N, L) VP8GetValue(BR, N)
#define VP8Get(BR, L) VP8GetValue(BR, 1, L)
#define VP8GetSignedValue(BR, N, L) VP8GetSignedValue(BR, N)
#define VP8GetBit(BR, P, L) VP8GetBit(BR, P)
#define VP8GetBitAlt(BR, P, L) VP8GetBitAlt(BR, P)
#define VP8GetSigned(BR, V, L) VP8GetSigned(BR, V)
#endif
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
@ -102,17 +123,15 @@ void VP8BitReaderSetBuffer(VP8BitReader* const br,
void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset); void VP8RemapBitReader(VP8BitReader* const br, ptrdiff_t offset);
// return the next value made of 'num_bits' bits // return the next value made of 'num_bits' bits
uint32_t VP8GetValue(VP8BitReader* const br, int num_bits); uint32_t VP8GetValue(VP8BitReader* const br, int num_bits, const char label[]);
static WEBP_INLINE uint32_t VP8Get(VP8BitReader* const br) {
return VP8GetValue(br, 1);
}
// return the next value with sign-extension. // return the next value with sign-extension.
int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits); int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits,
const char label[]);
// bit_reader_inl.h will implement the following methods: // bit_reader_inl.h will implement the following methods:
// static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) // static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob, ...)
// static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) // static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v, ...)
// and should be included by the .c files that actually need them. // and should be included by the .c files that actually need them.
// This is to avoid recompiling the whole library whenever this file is touched, // This is to avoid recompiling the whole library whenever this file is touched,
// and also allowing platform-specific ad-hoc hacks. // and also allowing platform-specific ad-hoc hacks.

View File

@ -70,7 +70,7 @@ static void Flush(VP8BitWriter* const bw) {
const int value = (bits & 0x100) ? 0x00 : 0xff; const int value = (bits & 0x100) ? 0x00 : 0xff;
for (; bw->run_ > 0; --bw->run_) bw->buf_[pos++] = value; for (; bw->run_ > 0; --bw->run_) bw->buf_[pos++] = value;
} }
bw->buf_[pos++] = bits; bw->buf_[pos++] = bits & 0xff;
bw->pos_ = pos; bw->pos_ = pos;
} else { } else {
bw->run_++; // delay writing of bytes 0xff, pending eventual carry. bw->run_++; // delay writing of bytes 0xff, pending eventual carry.

View File

@ -17,6 +17,7 @@
#include <assert.h> #include <assert.h>
#include "src/dsp/dsp.h"
#include "src/webp/types.h" #include "src/webp/types.h"
#ifdef __cplusplus #ifdef __cplusplus
@ -25,15 +26,16 @@ extern "C" {
// Main color cache struct. // Main color cache struct.
typedef struct { typedef struct {
uint32_t *colors_; // color entries uint32_t* colors_; // color entries
int hash_shift_; // Hash shift: 32 - hash_bits_. int hash_shift_; // Hash shift: 32 - hash_bits_.
int hash_bits_; int hash_bits_;
} VP8LColorCache; } VP8LColorCache;
static const uint64_t kHashMul = 0x1e35a7bdull; static const uint32_t kHashMul = 0x1e35a7bdu;
static WEBP_INLINE int VP8LHashPix(uint32_t argb, int shift) { static WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW WEBP_INLINE
return (int)(((argb * kHashMul) & 0xffffffffu) >> shift); int VP8LHashPix(uint32_t argb, int shift) {
return (int)((argb * kHashMul) >> shift);
} }
static WEBP_INLINE uint32_t VP8LColorCacheLookup( static WEBP_INLINE uint32_t VP8LColorCacheLookup(

View File

@ -91,7 +91,8 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
assert(code_lengths_size != 0); assert(code_lengths_size != 0);
assert(code_lengths != NULL); assert(code_lengths != NULL);
assert(root_table != NULL); assert((root_table != NULL && sorted != NULL) ||
(root_table == NULL && sorted == NULL));
assert(root_bits > 0); assert(root_bits > 0);
// Build histogram of code lengths. // Build histogram of code lengths.
@ -120,16 +121,22 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
for (symbol = 0; symbol < code_lengths_size; ++symbol) { for (symbol = 0; symbol < code_lengths_size; ++symbol) {
const int symbol_code_length = code_lengths[symbol]; const int symbol_code_length = code_lengths[symbol];
if (code_lengths[symbol] > 0) { if (code_lengths[symbol] > 0) {
sorted[offset[symbol_code_length]++] = symbol; if (sorted != NULL) {
sorted[offset[symbol_code_length]++] = symbol;
} else {
offset[symbol_code_length]++;
}
} }
} }
// Special case code with only one value. // Special case code with only one value.
if (offset[MAX_ALLOWED_CODE_LENGTH] == 1) { if (offset[MAX_ALLOWED_CODE_LENGTH] == 1) {
HuffmanCode code; if (sorted != NULL) {
code.bits = 0; HuffmanCode code;
code.value = (uint16_t)sorted[0]; code.bits = 0;
ReplicateValue(table, 1, total_size, code); code.value = (uint16_t)sorted[0];
ReplicateValue(table, 1, total_size, code);
}
return total_size; return total_size;
} }
@ -151,6 +158,7 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
if (num_open < 0) { if (num_open < 0) {
return 0; return 0;
} }
if (root_table == NULL) continue;
for (; count[len] > 0; --count[len]) { for (; count[len] > 0; --count[len]) {
HuffmanCode code; HuffmanCode code;
code.bits = (uint8_t)len; code.bits = (uint8_t)len;
@ -169,6 +177,7 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
if (num_open < 0) { if (num_open < 0) {
return 0; return 0;
} }
if (root_table == NULL) continue;
for (; count[len] > 0; --count[len]) { for (; count[len] > 0; --count[len]) {
HuffmanCode code; HuffmanCode code;
if ((key & mask) != low) { if ((key & mask) != low) {
@ -206,7 +215,10 @@ int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
const int code_lengths[], int code_lengths_size) { const int code_lengths[], int code_lengths_size) {
int total_size; int total_size;
assert(code_lengths_size <= MAX_CODE_LENGTHS_SIZE); assert(code_lengths_size <= MAX_CODE_LENGTHS_SIZE);
if (code_lengths_size <= SORTED_SIZE_CUTOFF) { if (root_table == NULL) {
total_size = BuildHuffmanTable(NULL, root_bits,
code_lengths, code_lengths_size, NULL);
} else if (code_lengths_size <= SORTED_SIZE_CUTOFF) {
// use local stack-allocated array. // use local stack-allocated array.
uint16_t sorted[SORTED_SIZE_CUTOFF]; uint16_t sorted[SORTED_SIZE_CUTOFF];
total_size = BuildHuffmanTable(root_table, root_bits, total_size = BuildHuffmanTable(root_table, root_bits,

View File

@ -78,6 +78,8 @@ void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups);
// the huffman table. // the huffman table.
// Returns built table size or 0 in case of error (invalid tree or // Returns built table size or 0 in case of error (invalid tree or
// memory error). // memory error).
// If root_table is NULL, it returns 0 if a lookup cannot be built, something
// > 0 otherwise (but not the table size).
int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits, int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits,
const int code_lengths[], int code_lengths_size); const int code_lengths[], int code_lengths_size);

View File

@ -84,14 +84,14 @@ int WebPRescalerGetScaledDimensions(int src_width, int src_height,
int height = *scaled_height; int height = *scaled_height;
// if width is unspecified, scale original proportionally to height ratio. // if width is unspecified, scale original proportionally to height ratio.
if (width == 0) { if (width == 0 && src_height > 0) {
width = width =
(int)(((uint64_t)src_width * height + src_height / 2) / src_height); (int)(((uint64_t)src_width * height + src_height - 1) / src_height);
} }
// if height is unspecified, scale original proportionally to width ratio. // if height is unspecified, scale original proportionally to width ratio.
if (height == 0) { if (height == 0 && src_width > 0) {
height = height =
(int)(((uint64_t)src_height * width + src_width / 2) / src_width); (int)(((uint64_t)src_height * width + src_width - 1) / src_width);
} }
// Check if the overall dimensions still make sense. // Check if the overall dimensions still make sense.
if (width <= 0 || height <= 0) { if (width <= 0 || height <= 0) {

View File

@ -73,7 +73,7 @@ typedef struct {
#endif #endif
static int pthread_create(pthread_t* const thread, const void* attr, static int pthread_create(pthread_t* const thread, const void* attr,
unsigned int (__stdcall *start)(void*), void* arg) { unsigned int (__stdcall* start)(void*), void* arg) {
(void)attr; (void)attr;
#ifdef USE_CREATE_THREAD #ifdef USE_CREATE_THREAD
*thread = CreateThread(NULL, /* lpThreadAttributes */ *thread = CreateThread(NULL, /* lpThreadAttributes */
@ -217,8 +217,12 @@ static THREADFN ThreadLoop(void* ptr) {
done = 1; done = 1;
} }
// signal to the main thread that we're done (for Sync()) // signal to the main thread that we're done (for Sync())
pthread_cond_signal(&impl->condition_); // Note the associated mutex does not need to be held when signaling the
// condition. Unlocking the mutex first may improve performance in some
// implementations, avoiding the case where the waiting thread can't
// reacquire the mutex when woken.
pthread_mutex_unlock(&impl->mutex_); pthread_mutex_unlock(&impl->mutex_);
pthread_cond_signal(&impl->condition_);
} }
return THREAD_RETURN(NULL); // Thread is finished return THREAD_RETURN(NULL); // Thread is finished
} }
@ -240,7 +244,13 @@ static void ChangeState(WebPWorker* const worker, WebPWorkerStatus new_status) {
// assign new status and release the working thread if needed // assign new status and release the working thread if needed
if (new_status != OK) { if (new_status != OK) {
worker->status_ = new_status; worker->status_ = new_status;
// Note the associated mutex does not need to be held when signaling the
// condition. Unlocking the mutex first may improve performance in some
// implementations, avoiding the case where the waiting thread can't
// reacquire the mutex when woken.
pthread_mutex_unlock(&impl->mutex_);
pthread_cond_signal(&impl->condition_); pthread_cond_signal(&impl->condition_);
return;
} }
} }
pthread_mutex_unlock(&impl->mutex_); pthread_mutex_unlock(&impl->mutex_);

View File

@ -216,9 +216,14 @@ void WebPSafeFree(void* const ptr) {
free(ptr); free(ptr);
} }
// Public API function. // Public API functions.
void* WebPMalloc(size_t size) {
return WebPSafeMalloc(1, size);
}
void WebPFree(void* ptr) { void WebPFree(void* ptr) {
free(ptr); WebPSafeFree(ptr);
} }
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

View File

@ -92,14 +92,14 @@ static WEBP_INLINE uint32_t GetLE32(const uint8_t* const data) {
// Store 16, 24 or 32 bits in little-endian order. // Store 16, 24 or 32 bits in little-endian order.
static WEBP_INLINE void PutLE16(uint8_t* const data, int val) { static WEBP_INLINE void PutLE16(uint8_t* const data, int val) {
assert(val < (1 << 16)); assert(val < (1 << 16));
data[0] = (val >> 0); data[0] = (val >> 0) & 0xff;
data[1] = (val >> 8); data[1] = (val >> 8) & 0xff;
} }
static WEBP_INLINE void PutLE24(uint8_t* const data, int val) { static WEBP_INLINE void PutLE24(uint8_t* const data, int val) {
assert(val < (1 << 24)); assert(val < (1 << 24));
PutLE16(data, val & 0xffff); PutLE16(data, val & 0xffff);
data[2] = (val >> 16); data[2] = (val >> 16) & 0xff;
} }
static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) { static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {

View File

@ -20,7 +20,7 @@
extern "C" { extern "C" {
#endif #endif
#define WEBP_DECODER_ABI_VERSION 0x0208 // MAJOR(8b) + MINOR(8b) #define WEBP_DECODER_ABI_VERSION 0x0209 // MAJOR(8b) + MINOR(8b)
// Note: forward declaring enumerations is not allowed in (strict) C and C++, // Note: forward declaring enumerations is not allowed in (strict) C and C++,
// the types are left here for reference. // the types are left here for reference.
@ -91,9 +91,6 @@ WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
uint8_t** u, uint8_t** v, uint8_t** u, uint8_t** v,
int* stride, int* uv_stride); int* stride, int* uv_stride);
// Releases memory returned by the WebPDecode*() functions above.
WEBP_EXTERN void WebPFree(void* ptr);
// These five functions are variants of the above ones, that decode the image // These five functions are variants of the above ones, that decode the image
// directly into a pre-allocated buffer 'output_buffer'. The maximum storage // directly into a pre-allocated buffer 'output_buffer'. The maximum storage
// available in this buffer is indicated by 'output_buffer_size'. If this // available in this buffer is indicated by 'output_buffer_size'. If this

View File

@ -20,7 +20,7 @@
extern "C" { extern "C" {
#endif #endif
#define WEBP_ENCODER_ABI_VERSION 0x020e // MAJOR(8b) + MINOR(8b) #define WEBP_ENCODER_ABI_VERSION 0x020f // MAJOR(8b) + MINOR(8b)
// Note: forward declaring enumerations is not allowed in (strict) C and C++, // Note: forward declaring enumerations is not allowed in (strict) C and C++,
// the types are left here for reference. // the types are left here for reference.
@ -62,6 +62,10 @@ WEBP_EXTERN size_t WebPEncodeBGRA(const uint8_t* bgra,
// These functions are the equivalent of the above, but compressing in a // These functions are the equivalent of the above, but compressing in a
// lossless manner. Files are usually larger than lossy format, but will // lossless manner. Files are usually larger than lossy format, but will
// not suffer any compression loss. // not suffer any compression loss.
// Note these functions, like the lossy versions, use the library's default
// settings. For lossless this means 'exact' is disabled. RGB values in
// transparent areas will be modified to improve compression. To avoid this,
// use WebPEncode() and set WebPConfig::exact to 1.
WEBP_EXTERN size_t WebPEncodeLosslessRGB(const uint8_t* rgb, WEBP_EXTERN size_t WebPEncodeLosslessRGB(const uint8_t* rgb,
int width, int height, int stride, int width, int height, int stride,
uint8_t** output); uint8_t** output);
@ -75,9 +79,6 @@ WEBP_EXTERN size_t WebPEncodeLosslessBGRA(const uint8_t* bgra,
int width, int height, int stride, int width, int height, int stride,
uint8_t** output); uint8_t** output);
// Releases memory returned by the WebPEncode*() functions above.
WEBP_EXTERN void WebPFree(void* ptr);
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Coding parameters // Coding parameters
@ -302,7 +303,7 @@ struct WebPPicture {
// YUV input (mostly used for input to lossy compression) // YUV input (mostly used for input to lossy compression)
WebPEncCSP colorspace; // colorspace: should be YUV420 for now (=Y'CbCr). WebPEncCSP colorspace; // colorspace: should be YUV420 for now (=Y'CbCr).
int width, height; // dimensions (less or equal to WEBP_MAX_DIMENSION) int width, height; // dimensions (less or equal to WEBP_MAX_DIMENSION)
uint8_t *y, *u, *v; // pointers to luma/chroma planes. uint8_t* y, *u, *v; // pointers to luma/chroma planes.
int y_stride, uv_stride; // luma/chroma strides. int y_stride, uv_stride; // luma/chroma strides.
uint8_t* a; // pointer to the alpha plane uint8_t* a; // pointer to the alpha plane
int a_stride; // stride of the alpha plane int a_stride; // stride of the alpha plane
@ -346,7 +347,7 @@ struct WebPPicture {
uint32_t pad3[3]; // padding for later use uint32_t pad3[3]; // padding for later use
// Unused for now // Unused for now
uint8_t *pad4, *pad5; uint8_t* pad4, *pad5;
uint32_t pad6[8]; // padding for later use uint32_t pad6[8]; // padding for later use
// PRIVATE FIELDS // PRIVATE FIELDS

View File

@ -57,7 +57,7 @@ extern "C" {
WebPMuxGetChunk(mux, "ICCP", &icc_profile); WebPMuxGetChunk(mux, "ICCP", &icc_profile);
// ... (Consume icc_data). // ... (Consume icc_data).
WebPMuxDelete(mux); WebPMuxDelete(mux);
free(data); WebPFree(data);
*/ */
// Note: forward declaring enumerations is not allowed in (strict) C and C++, // Note: forward declaring enumerations is not allowed in (strict) C and C++,
@ -245,7 +245,7 @@ WEBP_EXTERN WebPMuxError WebPMuxPushFrame(
WebPMux* mux, const WebPMuxFrameInfo* frame, int copy_data); WebPMux* mux, const WebPMuxFrameInfo* frame, int copy_data);
// Gets the nth frame from the mux object. // Gets the nth frame from the mux object.
// The content of 'frame->bitstream' is allocated using malloc(), and NOT // The content of 'frame->bitstream' is allocated using WebPMalloc(), and NOT
// owned by the 'mux' object. It MUST be deallocated by the caller by calling // owned by the 'mux' object. It MUST be deallocated by the caller by calling
// WebPDataClear(). // WebPDataClear().
// nth=0 has a special meaning - last position. // nth=0 has a special meaning - last position.
@ -376,10 +376,10 @@ WEBP_EXTERN WebPMuxError WebPMuxNumChunks(const WebPMux* mux,
// Assembles all chunks in WebP RIFF format and returns in 'assembled_data'. // Assembles all chunks in WebP RIFF format and returns in 'assembled_data'.
// This function also validates the mux object. // This function also validates the mux object.
// Note: The content of 'assembled_data' will be ignored and overwritten. // Note: The content of 'assembled_data' will be ignored and overwritten.
// Also, the content of 'assembled_data' is allocated using malloc(), and NOT // Also, the content of 'assembled_data' is allocated using WebPMalloc(), and
// owned by the 'mux' object. It MUST be deallocated by the caller by calling // NOT owned by the 'mux' object. It MUST be deallocated by the caller by
// WebPDataClear(). It's always safe to call WebPDataClear() upon return, // calling WebPDataClear(). It's always safe to call WebPDataClear() upon
// even in case of error. // return, even in case of error.
// Parameters: // Parameters:
// mux - (in/out) object whose chunks are to be assembled // mux - (in/out) object whose chunks are to be assembled
// assembled_data - (out) assembled WebP data // assembled_data - (out) assembled WebP data

View File

@ -14,7 +14,6 @@
#ifndef WEBP_WEBP_MUX_TYPES_H_ #ifndef WEBP_WEBP_MUX_TYPES_H_
#define WEBP_WEBP_MUX_TYPES_H_ #define WEBP_WEBP_MUX_TYPES_H_
#include <stdlib.h> // free()
#include <string.h> // memset() #include <string.h> // memset()
#include "./types.h" #include "./types.h"
@ -56,6 +55,7 @@ typedef enum WebPMuxAnimBlend {
// Data type used to describe 'raw' data, e.g., chunk data // Data type used to describe 'raw' data, e.g., chunk data
// (ICC profile, metadata) and WebP compressed image data. // (ICC profile, metadata) and WebP compressed image data.
// 'bytes' memory must be allocated using WebPMalloc() and such.
struct WebPData { struct WebPData {
const uint8_t* bytes; const uint8_t* bytes;
size_t size; size_t size;
@ -68,11 +68,11 @@ static WEBP_INLINE void WebPDataInit(WebPData* webp_data) {
} }
} }
// Clears the contents of the 'webp_data' object by calling free(). Does not // Clears the contents of the 'webp_data' object by calling WebPFree().
// deallocate the object itself. // Does not deallocate the object itself.
static WEBP_INLINE void WebPDataClear(WebPData* webp_data) { static WEBP_INLINE void WebPDataClear(WebPData* webp_data) {
if (webp_data != NULL) { if (webp_data != NULL) {
free((void*)webp_data->bytes); WebPFree((void*)webp_data->bytes);
WebPDataInit(webp_data); WebPDataInit(webp_data);
} }
} }
@ -83,7 +83,7 @@ static WEBP_INLINE int WebPDataCopy(const WebPData* src, WebPData* dst) {
if (src == NULL || dst == NULL) return 0; if (src == NULL || dst == NULL) return 0;
WebPDataInit(dst); WebPDataInit(dst);
if (src->bytes != NULL && src->size != 0) { if (src->bytes != NULL && src->size != 0) {
dst->bytes = (uint8_t*)malloc(src->size); dst->bytes = (uint8_t*)WebPMalloc(src->size);
if (dst->bytes == NULL) return 0; if (dst->bytes == NULL) return 0;
memcpy((void*)dst->bytes, src->bytes, src->size); memcpy((void*)dst->bytes, src->bytes, src->size);
dst->size = src->size; dst->size = src->size;

View File

@ -7,7 +7,7 @@
// be found in the AUTHORS file in the root of the source tree. // be found in the AUTHORS file in the root of the source tree.
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// //
// Common types // Common types + memory wrappers
// //
// Author: Skal (pascal.massimino@gmail.com) // Author: Skal (pascal.massimino@gmail.com)
@ -49,4 +49,20 @@ typedef long long int int64_t;
// Macro to check ABI compatibility (same major revision number) // Macro to check ABI compatibility (same major revision number)
#define WEBP_ABI_IS_INCOMPATIBLE(a, b) (((a) >> 8) != ((b) >> 8)) #define WEBP_ABI_IS_INCOMPATIBLE(a, b) (((a) >> 8) != ((b) >> 8))
#ifdef __cplusplus
extern "C" {
#endif
// Allocates 'size' bytes of memory. Returns NULL upon error. Memory
// must be deallocated by calling WebPFree(). This function is made available
// by the core 'libwebp' library.
WEBP_EXTERN void* WebPMalloc(size_t size);
// Releases memory returned by the WebPDecode*() functions (from decode.h).
WEBP_EXTERN void WebPFree(void* ptr);
#ifdef __cplusplus
} // extern "C"
#endif
#endif // WEBP_WEBP_TYPES_H_ #endif // WEBP_WEBP_TYPES_H_

View File

@ -1,9 +1,6 @@
/* /*
FastLZ - lightning-fast lossless compression library FastLZ - Byte-aligned LZ77 compression library
Copyright (C) 2005-2020 Ariya Hidayat <ariya.hidayat@gmail.com>
Copyright (C) 2007 Ariya Hidayat (ariya@kde.org)
Copyright (C) 2006 Ariya Hidayat (ariya@kde.org)
Copyright (C) 2005 Ariya Hidayat (ariya@kde.org)
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal
@ -24,274 +21,148 @@
THE SOFTWARE. THE SOFTWARE.
*/ */
#if !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) #include "fastlz.h"
#include <stdint.h>
/* /*
* Always check for bound when decompressing. * Always check for bound when decompressing.
* Generally it is best to leave it defined. * Generally it is best to leave it defined.
*/ */
#define FASTLZ_SAFE #define FASTLZ_SAFE
#if defined(FASTLZ_USE_SAFE_DECOMPRESSOR) && (FASTLZ_USE_SAFE_DECOMPRESSOR == 0)
#undef FASTLZ_SAFE
#endif
/* /*
* Give hints to the compiler for branch prediction optimization. * Give hints to the compiler for branch prediction optimization.
*/ */
#if defined(__GNUC__) && (__GNUC__ > 2) #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 2))
#define FASTLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1)) #define FASTLZ_LIKELY(c) (__builtin_expect(!!(c), 1))
#define FASTLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0)) #define FASTLZ_UNLIKELY(c) (__builtin_expect(!!(c), 0))
#else #else
#define FASTLZ_EXPECT_CONDITIONAL(c) (c) #define FASTLZ_LIKELY(c) (c)
#define FASTLZ_UNEXPECT_CONDITIONAL(c) (c) #define FASTLZ_UNLIKELY(c) (c)
#endif #endif
/* #if defined(FASTLZ_SAFE)
* Use inlined functions for supported systems. #define FASTLZ_BOUND_CHECK(cond) \
*/ if (FASTLZ_UNLIKELY(!(cond))) return 0;
#if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) || defined(__WATCOMC__) || defined(__SUNPRO_C)
#define FASTLZ_INLINE inline
#elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__)
#define FASTLZ_INLINE __inline
#else
#define FASTLZ_INLINE
#endif
/*
* Prevent accessing more than 8-bit at once, except on x86 architectures.
*/
#if !defined(FASTLZ_STRICT_ALIGN)
#define FASTLZ_STRICT_ALIGN
#if defined(__i386__) || defined(__386) /* GNU C, Sun Studio */
#undef FASTLZ_STRICT_ALIGN
#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
#undef FASTLZ_STRICT_ALIGN
#elif defined(_M_IX86) /* Intel, MSVC */
#undef FASTLZ_STRICT_ALIGN
#elif defined(__386)
#undef FASTLZ_STRICT_ALIGN
#elif defined(_X86_) /* MinGW */
#undef FASTLZ_STRICT_ALIGN
#elif defined(__I86__) /* Digital Mars */
#undef FASTLZ_STRICT_ALIGN
#endif
#endif
/*
* FIXME: use preprocessor magic to set this on different platforms!
*/
typedef unsigned char flzuint8;
typedef unsigned short flzuint16;
typedef unsigned int flzuint32;
/* prototypes */
int fastlz_compress(const void* input, int length, void* output);
int fastlz_compress_level(int level, const void* input, int length, void* output);
int fastlz_decompress(const void* input, int length, void* output, int maxout);
#define MAX_COPY 32
#define MAX_LEN 264 /* 256 + 8 */
#define MAX_DISTANCE 8192
#if !defined(FASTLZ_STRICT_ALIGN)
#define FASTLZ_READU16(p) *((const flzuint16*)(p))
#else #else
#define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8) #define FASTLZ_BOUND_CHECK(cond) \
do { \
} while (0)
#endif #endif
#define HASH_LOG 13 #define MAX_COPY 32
#define HASH_SIZE (1<< HASH_LOG) #define MAX_LEN 264 /* 256 + 8 */
#define HASH_MASK (HASH_SIZE-1) #define MAX_L1_DISTANCE 8192
#define HASH_FUNCTION(v,p) { v = FASTLZ_READU16(p); v ^= FASTLZ_READU16(p+1)^(v>>(16-HASH_LOG));v &= HASH_MASK; } #define MAX_L2_DISTANCE 8191
#define MAX_FARDISTANCE (65535 + MAX_L2_DISTANCE - 1)
#undef FASTLZ_LEVEL #define FASTLZ_READU16(p) ((p)[0] | (p)[1] << 8)
#define FASTLZ_LEVEL 1
#undef FASTLZ_COMPRESSOR #define HASH_LOG 13
#undef FASTLZ_DECOMPRESSOR #define HASH_SIZE (1 << HASH_LOG)
#define FASTLZ_COMPRESSOR fastlz1_compress #define HASH_MASK (HASH_SIZE - 1)
#define FASTLZ_DECOMPRESSOR fastlz1_decompress #define HASH_FUNCTION(v, p) \
static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output); { \
static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout); v = FASTLZ_READU16(p); \
#include "fastlz.c" v ^= FASTLZ_READU16(p + 1) ^ (v >> (16 - HASH_LOG)); \
v &= HASH_MASK; \
}
#undef FASTLZ_LEVEL int fastlz1_compress(const void* input, int length, void* output) {
#define FASTLZ_LEVEL 2 const uint8_t* ip = (const uint8_t*)input;
const uint8_t* ip_bound = ip + length - 2;
const uint8_t* ip_limit = ip + length - 12 - 1;
uint8_t* op = (uint8_t*)output;
#undef MAX_DISTANCE const uint8_t* htab[HASH_SIZE];
#define MAX_DISTANCE 8191 uint32_t hval;
#define MAX_FARDISTANCE (65535+MAX_DISTANCE-1)
#undef FASTLZ_COMPRESSOR uint32_t copy;
#undef FASTLZ_DECOMPRESSOR
#define FASTLZ_COMPRESSOR fastlz2_compress
#define FASTLZ_DECOMPRESSOR fastlz2_decompress
static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output);
static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout);
#include "fastlz.c"
int fastlz_compress(const void* input, int length, void* output)
{
/* for short block, choose fastlz1 */
if(length < 65536)
return fastlz1_compress(input, length, output);
/* else... */
return fastlz2_compress(input, length, output);
}
int fastlz_decompress(const void* input, int length, void* output, int maxout)
{
/* magic identifier for compression level */
int level = ((*(const flzuint8*)input) >> 5) + 1;
if(level == 1)
return fastlz1_decompress(input, length, output, maxout);
if(level == 2)
return fastlz2_decompress(input, length, output, maxout);
/* unknown level, trigger error */
return 0;
}
int fastlz_compress_level(int level, const void* input, int length, void* output)
{
if(level == 1)
return fastlz1_compress(input, length, output);
if(level == 2)
return fastlz2_compress(input, length, output);
return 0;
}
#else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output)
{
const flzuint8* ip = (const flzuint8*) input;
const flzuint8* ip_bound = ip + length - 2;
const flzuint8* ip_limit = ip + length - 12;
flzuint8* op = (flzuint8*) output;
const flzuint8* htab[HASH_SIZE];
const flzuint8** hslot;
flzuint32 hval;
flzuint32 copy;
/* sanity check */ /* sanity check */
if(FASTLZ_UNEXPECT_CONDITIONAL(length < 4)) if (FASTLZ_UNLIKELY(length < 4)) {
{ if (length) {
if(length)
{
/* create literal copy only */ /* create literal copy only */
*op++ = length-1; *op++ = length - 1;
ip_bound++; ip_bound++;
while(ip <= ip_bound) while (ip <= ip_bound) *op++ = *ip++;
*op++ = *ip++; return length + 1;
return length+1; } else
}
else
return 0; return 0;
} }
/* initializes hash table */ /* initializes hash table */
for (hslot = htab; hslot < htab + HASH_SIZE; hslot++) for (hval = 0; hval < HASH_SIZE; ++hval) htab[hval] = ip;
*hslot = ip;
/* we start with literal copy */ /* we start with literal copy */
copy = 2; copy = 2;
*op++ = MAX_COPY-1; *op++ = MAX_COPY - 1;
*op++ = *ip++; *op++ = *ip++;
*op++ = *ip++; *op++ = *ip++;
/* main loop */ /* main loop */
while(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) while (FASTLZ_LIKELY(ip < ip_limit)) {
{ const uint8_t* ref;
const flzuint8* ref; uint32_t distance;
flzuint32 distance;
/* minimum match length */ /* minimum match length */
flzuint32 len = 3; uint32_t len = 3;
/* comparison starting-point */ /* comparison starting-point */
const flzuint8* anchor = ip; const uint8_t* anchor = ip;
/* check for a run */
#if FASTLZ_LEVEL==2
if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1))
{
distance = 1;
ip += 3;
ref = anchor - 1 + 3;
goto match;
}
#endif
/* find potential match */ /* find potential match */
HASH_FUNCTION(hval,ip); HASH_FUNCTION(hval, ip);
hslot = htab + hval;
ref = htab[hval]; ref = htab[hval];
/* update hash table */
htab[hval] = anchor;
/* calculate distance to the match */ /* calculate distance to the match */
distance = anchor - ref; distance = anchor - ref;
/* update hash table */
*hslot = anchor;
/* is this a match? check the first 3 bytes */ /* is this a match? check the first 3 bytes */
if(distance==0 || if (distance == 0 || (distance >= MAX_L1_DISTANCE) || *ref++ != *ip++ ||
#if FASTLZ_LEVEL==1 *ref++ != *ip++ || *ref++ != *ip++)
(distance >= MAX_DISTANCE) ||
#else
(distance >= MAX_FARDISTANCE) ||
#endif
*ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++)
goto literal; goto literal;
#if FASTLZ_LEVEL==2
/* far, needs at least 5-byte match */
if(distance >= MAX_DISTANCE)
{
if(*ip++ != *ref++ || *ip++!= *ref++)
goto literal;
len += 2;
}
match:
#endif
/* last matched byte */ /* last matched byte */
ip = anchor + len; ip = anchor + len;
/* distance is biased */ /* distance is biased */
distance--; distance--;
if(!distance) if (!distance) {
{
/* zero distance means a run */ /* zero distance means a run */
flzuint8 x = ip[-1]; uint8_t x = ip[-1];
while(ip < ip_bound) while (ip < ip_bound)
if(*ref++ != x) break; else ip++; if (*ref++ != x)
} break;
else else
for(;;) ip++;
{ } else
/* safe because the outer check against ip limit */ for (;;) {
if(*ref++ != *ip++) break; /* safe because the outer check against ip limit */
if(*ref++ != *ip++) break; if (*ref++ != *ip++) break;
if(*ref++ != *ip++) break; if (*ref++ != *ip++) break;
if(*ref++ != *ip++) break; if (*ref++ != *ip++) break;
if(*ref++ != *ip++) break; if (*ref++ != *ip++) break;
if(*ref++ != *ip++) break; if (*ref++ != *ip++) break;
if(*ref++ != *ip++) break; if (*ref++ != *ip++) break;
if(*ref++ != *ip++) break; if (*ref++ != *ip++) break;
while(ip < ip_bound) if (*ref++ != *ip++) break;
if(*ref++ != *ip++) break; while (ip < ip_bound)
break; if (*ref++ != *ip++) break;
} break;
}
/* if we have copied something, adjust the copy count */ /* if we have copied something, adjust the copy count */
if(copy) if (copy) /* copy is biased, '0' means 1 byte copy */
/* copy is biased, '0' means 1 byte copy */ *(op - copy - 1) = copy - 1;
*(op-copy-1) = copy-1;
else else
/* back, to overwrite the copy count */ /* back, to overwrite the copy count */
op--; op--;
@ -304,248 +175,417 @@ static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void*
len = ip - anchor; len = ip - anchor;
/* encode the match */ /* encode the match */
#if FASTLZ_LEVEL==2 if (FASTLZ_UNLIKELY(len > MAX_LEN - 2))
if(distance < MAX_DISTANCE) while (len > MAX_LEN - 2) {
{
if(len < 7)
{
*op++ = (len << 5) + (distance >> 8);
*op++ = (distance & 255);
}
else
{
*op++ = (7 << 5) + (distance >> 8); *op++ = (7 << 5) + (distance >> 8);
for(len-=7; len >= 255; len-= 255) *op++ = MAX_LEN - 2 - 7 - 2;
*op++ = 255;
*op++ = len;
*op++ = (distance & 255); *op++ = (distance & 255);
} len -= MAX_LEN - 2;
}
else
{
/* far away, but not yet in the another galaxy... */
if(len < 7)
{
distance -= MAX_DISTANCE;
*op++ = (len << 5) + 31;
*op++ = 255;
*op++ = distance >> 8;
*op++ = distance & 255;
}
else
{
distance -= MAX_DISTANCE;
*op++ = (7 << 5) + 31;
for(len-=7; len >= 255; len-= 255)
*op++ = 255;
*op++ = len;
*op++ = 255;
*op++ = distance >> 8;
*op++ = distance & 255;
}
}
#else
if(FASTLZ_UNEXPECT_CONDITIONAL(len > MAX_LEN-2))
while(len > MAX_LEN-2)
{
*op++ = (7 << 5) + (distance >> 8);
*op++ = MAX_LEN - 2 - 7 -2;
*op++ = (distance & 255);
len -= MAX_LEN-2;
} }
if(len < 7) if (len < 7) {
{
*op++ = (len << 5) + (distance >> 8); *op++ = (len << 5) + (distance >> 8);
*op++ = (distance & 255); *op++ = (distance & 255);
} } else {
else
{
*op++ = (7 << 5) + (distance >> 8); *op++ = (7 << 5) + (distance >> 8);
*op++ = len - 7; *op++ = len - 7;
*op++ = (distance & 255); *op++ = (distance & 255);
} }
#endif
/* update the hash at match boundary */ /* update the hash at match boundary */
HASH_FUNCTION(hval,ip); HASH_FUNCTION(hval, ip);
htab[hval] = ip++; htab[hval] = ip++;
HASH_FUNCTION(hval,ip); HASH_FUNCTION(hval, ip);
htab[hval] = ip++; htab[hval] = ip++;
/* assuming literal copy */ /* assuming literal copy */
*op++ = MAX_COPY-1; *op++ = MAX_COPY - 1;
continue; continue;
literal: literal:
*op++ = *anchor++; *op++ = *anchor++;
ip = anchor; ip = anchor;
copy++; copy++;
if(FASTLZ_UNEXPECT_CONDITIONAL(copy == MAX_COPY)) if (FASTLZ_UNLIKELY(copy == MAX_COPY)) {
{ copy = 0;
copy = 0; *op++ = MAX_COPY - 1;
*op++ = MAX_COPY-1; }
}
} }
/* left-over as literal copy */ /* left-over as literal copy */
ip_bound++; ip_bound++;
while(ip <= ip_bound) while (ip <= ip_bound) {
{
*op++ = *ip++; *op++ = *ip++;
copy++; copy++;
if(copy == MAX_COPY) if (copy == MAX_COPY) {
{
copy = 0; copy = 0;
*op++ = MAX_COPY-1; *op++ = MAX_COPY - 1;
} }
} }
/* if we have copied something, adjust the copy length */ /* if we have copied something, adjust the copy length */
if(copy) if (copy)
*(op-copy-1) = copy-1; *(op - copy - 1) = copy - 1;
else else
op--; op--;
#if FASTLZ_LEVEL==2 return op - (uint8_t*)output;
/* marker for fastlz2 */
*(flzuint8*)output |= (1 << 5);
#endif
return op - (flzuint8*)output;
} }
static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout) #if defined(FASTLZ_USE_MEMMOVE) && (FASTLZ_USE_MEMMOVE == 0)
{
const flzuint8* ip = (const flzuint8*) input;
const flzuint8* ip_limit = ip + length;
flzuint8* op = (flzuint8*) output;
flzuint8* op_limit = op + maxout;
flzuint32 ctrl = (*ip++) & 31;
int loop = 1;
do static void fastlz_memmove(uint8_t* dest, const uint8_t* src, uint32_t count) {
{ do {
const flzuint8* ref = op; *dest++ = *src++;
flzuint32 len = ctrl >> 5; } while (--count);
flzuint32 ofs = (ctrl & 31) << 8; }
static void fastlz_memcpy(uint8_t* dest, const uint8_t* src, uint32_t count) {
return fastlz_memmove(dest, src, count);
}
if(ctrl >= 32)
{
#if FASTLZ_LEVEL==2
flzuint8 code;
#endif
len--;
ref -= ofs;
if (len == 7-1)
#if FASTLZ_LEVEL==1
len += *ip++;
ref -= *ip++;
#else #else
do
{
code = *ip++;
len += code;
} while (code==255);
code = *ip++;
ref -= code;
/* match from 16-bit distance */ #include <string.h>
if(FASTLZ_UNEXPECT_CONDITIONAL(code==255))
if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8)))
{
ofs = (*ip++) << 8;
ofs += *ip++;
ref = op - ofs - MAX_DISTANCE;
}
#endif
#ifdef FASTLZ_SAFE
if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit))
return 0;
if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output)) static void fastlz_memmove(uint8_t* dest, const uint8_t* src, uint32_t count) {
return 0; if ((count > 4) && (dest >= src + count)) {
#endif memmove(dest, src, count);
} else {
if(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit)) switch (count) {
ctrl = *ip++; default:
else do {
loop = 0; *dest++ = *src++;
} while (--count);
if(ref == op) break;
{ case 3:
/* optimize copy for a run */ *dest++ = *src++;
flzuint8 b = ref[-1]; case 2:
*op++ = b; *dest++ = *src++;
*op++ = b; case 1:
*op++ = b; *dest++ = *src++;
for(; len; --len) case 0:
*op++ = b; break;
}
else
{
#if !defined(FASTLZ_STRICT_ALIGN)
const flzuint16* p;
flzuint16* q;
#endif
/* copy from reference */
ref--;
*op++ = *ref++;
*op++ = *ref++;
*op++ = *ref++;
#if !defined(FASTLZ_STRICT_ALIGN)
/* copy a byte, so that now it's word aligned */
if(len & 1)
{
*op++ = *ref++;
len--;
}
/* copy 16-bit at once */
q = (flzuint16*) op;
op += len;
p = (const flzuint16*) ref;
for(len>>=1; len > 4; len-=4)
{
*q++ = *p++;
*q++ = *p++;
*q++ = *p++;
*q++ = *p++;
}
for(; len; --len)
*q++ = *p++;
#else
for(; len; --len)
*op++ = *ref++;
#endif
}
}
else
{
ctrl++;
#ifdef FASTLZ_SAFE
if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit))
return 0;
if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit))
return 0;
#endif
*op++ = *ip++;
for(--ctrl; ctrl; ctrl--)
*op++ = *ip++;
loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit);
if(loop)
ctrl = *ip++;
} }
} }
while(FASTLZ_EXPECT_CONDITIONAL(loop));
return op - (flzuint8*)output;
} }
#endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */ static void fastlz_memcpy(uint8_t* dest, const uint8_t* src, uint32_t count) {
memcpy(dest, src, count);
}
#endif
int fastlz1_decompress(const void* input, int length, void* output,
int maxout) {
const uint8_t* ip = (const uint8_t*)input;
const uint8_t* ip_limit = ip + length;
const uint8_t* ip_bound = ip_limit - 2;
uint8_t* op = (uint8_t*)output;
uint8_t* op_limit = op + maxout;
uint32_t ctrl = (*ip++) & 31;
while (1) {
if (ctrl >= 32) {
uint32_t len = (ctrl >> 5) - 1;
uint32_t ofs = (ctrl & 31) << 8;
const uint8_t* ref = op - ofs - 1;
if (len == 7 - 1) {
FASTLZ_BOUND_CHECK(ip <= ip_bound);
len += *ip++;
}
ref -= *ip++;
len += 3;
FASTLZ_BOUND_CHECK(op + len <= op_limit);
FASTLZ_BOUND_CHECK(ref >= (uint8_t*)output);
fastlz_memmove(op, ref, len);
op += len;
} else {
ctrl++;
FASTLZ_BOUND_CHECK(op + ctrl <= op_limit);
FASTLZ_BOUND_CHECK(ip + ctrl <= ip_limit);
fastlz_memcpy(op, ip, ctrl);
ip += ctrl;
op += ctrl;
}
if (FASTLZ_UNLIKELY(ip > ip_bound)) break;
ctrl = *ip++;
}
return op - (uint8_t*)output;
}
int fastlz2_compress(const void* input, int length, void* output) {
const uint8_t* ip = (const uint8_t*)input;
const uint8_t* ip_bound = ip + length - 2;
const uint8_t* ip_limit = ip + length - 12 - 1;
uint8_t* op = (uint8_t*)output;
const uint8_t* htab[HASH_SIZE];
uint32_t hval;
uint32_t copy;
/* sanity check */
if (FASTLZ_UNLIKELY(length < 4)) {
if (length) {
/* create literal copy only */
*op++ = length - 1;
ip_bound++;
while (ip <= ip_bound) *op++ = *ip++;
return length + 1;
} else
return 0;
}
/* initializes hash table */
for (hval = 0; hval < HASH_SIZE; ++hval) htab[hval] = ip;
/* we start with literal copy */
copy = 2;
*op++ = MAX_COPY - 1;
*op++ = *ip++;
*op++ = *ip++;
/* main loop */
while (FASTLZ_LIKELY(ip < ip_limit)) {
const uint8_t* ref;
uint32_t distance;
/* minimum match length */
uint32_t len = 3;
/* comparison starting-point */
const uint8_t* anchor = ip;
/* check for a run */
if (ip[0] == ip[-1] && ip[0] == ip[1] && ip[1] == ip[2]) {
distance = 1;
ip += 3;
ref = anchor - 1 + 3;
goto match;
}
/* find potential match */
HASH_FUNCTION(hval, ip);
ref = htab[hval];
/* update hash table */
htab[hval] = anchor;
/* calculate distance to the match */
distance = anchor - ref;
/* is this a match? check the first 3 bytes */
if (distance == 0 || (distance >= MAX_FARDISTANCE) || *ref++ != *ip++ ||
*ref++ != *ip++ || *ref++ != *ip++)
goto literal;
/* far, needs at least 5-byte match */
if (distance >= MAX_L2_DISTANCE) {
if (*ip++ != *ref++ || *ip++ != *ref++) goto literal;
len += 2;
}
match:
/* last matched byte */
ip = anchor + len;
/* distance is biased */
distance--;
if (!distance) {
/* zero distance means a run */
uint8_t x = ip[-1];
while (ip < ip_bound)
if (*ref++ != x)
break;
else
ip++;
} else
for (;;) {
/* safe because the outer check against ip limit */
if (*ref++ != *ip++) break;
if (*ref++ != *ip++) break;
if (*ref++ != *ip++) break;
if (*ref++ != *ip++) break;
if (*ref++ != *ip++) break;
if (*ref++ != *ip++) break;
if (*ref++ != *ip++) break;
if (*ref++ != *ip++) break;
while (ip < ip_bound)
if (*ref++ != *ip++) break;
break;
}
/* if we have copied something, adjust the copy count */
if (copy) /* copy is biased, '0' means 1 byte copy */
*(op - copy - 1) = copy - 1;
else
/* back, to overwrite the copy count */
op--;
/* reset literal counter */
copy = 0;
/* length is biased, '1' means a match of 3 bytes */
ip -= 3;
len = ip - anchor;
/* encode the match */
if (distance < MAX_L2_DISTANCE) {
if (len < 7) {
*op++ = (len << 5) + (distance >> 8);
*op++ = (distance & 255);
} else {
*op++ = (7 << 5) + (distance >> 8);
for (len -= 7; len >= 255; len -= 255) *op++ = 255;
*op++ = len;
*op++ = (distance & 255);
}
} else {
/* far away, but not yet in the another galaxy... */
if (len < 7) {
distance -= MAX_L2_DISTANCE;
*op++ = (len << 5) + 31;
*op++ = 255;
*op++ = distance >> 8;
*op++ = distance & 255;
} else {
distance -= MAX_L2_DISTANCE;
*op++ = (7 << 5) + 31;
for (len -= 7; len >= 255; len -= 255) *op++ = 255;
*op++ = len;
*op++ = 255;
*op++ = distance >> 8;
*op++ = distance & 255;
}
}
/* update the hash at match boundary */
HASH_FUNCTION(hval, ip);
htab[hval] = ip++;
HASH_FUNCTION(hval, ip);
htab[hval] = ip++;
/* assuming literal copy */
*op++ = MAX_COPY - 1;
continue;
literal:
*op++ = *anchor++;
ip = anchor;
copy++;
if (FASTLZ_UNLIKELY(copy == MAX_COPY)) {
copy = 0;
*op++ = MAX_COPY - 1;
}
}
/* left-over as literal copy */
ip_bound++;
while (ip <= ip_bound) {
*op++ = *ip++;
copy++;
if (copy == MAX_COPY) {
copy = 0;
*op++ = MAX_COPY - 1;
}
}
/* if we have copied something, adjust the copy length */
if (copy)
*(op - copy - 1) = copy - 1;
else
op--;
/* marker for fastlz2 */
*(uint8_t*)output |= (1 << 5);
return op - (uint8_t*)output;
}
int fastlz2_decompress(const void* input, int length, void* output,
int maxout) {
const uint8_t* ip = (const uint8_t*)input;
const uint8_t* ip_limit = ip + length;
const uint8_t* ip_bound = ip_limit - 2;
uint8_t* op = (uint8_t*)output;
uint8_t* op_limit = op + maxout;
uint32_t ctrl = (*ip++) & 31;
while (1) {
if (ctrl >= 32) {
uint32_t len = (ctrl >> 5) - 1;
uint32_t ofs = (ctrl & 31) << 8;
const uint8_t* ref = op - ofs - 1;
uint8_t code;
if (len == 7 - 1) do {
FASTLZ_BOUND_CHECK(ip <= ip_bound);
code = *ip++;
len += code;
} while (code == 255);
code = *ip++;
ref -= code;
len += 3;
/* match from 16-bit distance */
if (FASTLZ_UNLIKELY(code == 255))
if (FASTLZ_LIKELY(ofs == (31 << 8))) {
FASTLZ_BOUND_CHECK(ip < ip_bound);
ofs = (*ip++) << 8;
ofs += *ip++;
ref = op - ofs - MAX_L2_DISTANCE - 1;
}
FASTLZ_BOUND_CHECK(op + len <= op_limit);
FASTLZ_BOUND_CHECK(ref >= (uint8_t*)output);
fastlz_memmove(op, ref, len);
op += len;
} else {
ctrl++;
FASTLZ_BOUND_CHECK(op + ctrl <= op_limit);
FASTLZ_BOUND_CHECK(ip + ctrl <= ip_limit);
fastlz_memcpy(op, ip, ctrl);
ip += ctrl;
op += ctrl;
}
if (FASTLZ_UNLIKELY(ip >= ip_limit)) break;
ctrl = *ip++;
}
return op - (uint8_t*)output;
}
int fastlz_compress(const void* input, int length, void* output) {
/* for short block, choose fastlz1 */
if (length < 65536) return fastlz1_compress(input, length, output);
/* else... */
return fastlz2_compress(input, length, output);
}
int fastlz_decompress(const void* input, int length, void* output, int maxout) {
/* magic identifier for compression level */
int level = ((*(const uint8_t*)input) >> 5) + 1;
if (level == 1) return fastlz1_decompress(input, length, output, maxout);
if (level == 2) return fastlz2_decompress(input, length, output, maxout);
/* unknown level, trigger error */
return 0;
}
int fastlz_compress_level(int level, const void* input, int length,
void* output) {
if (level == 1) return fastlz1_compress(input, length, output);
if (level == 2) return fastlz2_compress(input, length, output);
return 0;
}

View File

@ -1,9 +1,6 @@
/* /*
FastLZ - lightning-fast lossless compression library FastLZ - Byte-aligned LZ77 compression library
Copyright (C) 2005-2020 Ariya Hidayat <ariya.hidayat@gmail.com>
Copyright (C) 2007 Ariya Hidayat (ariya@kde.org)
Copyright (C) 2006 Ariya Hidayat (ariya@kde.org)
Copyright (C) 2005 Ariya Hidayat (ariya@kde.org)
Permission is hereby granted, free of charge, to any person obtaining a copy Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal of this software and associated documentation files (the "Software"), to deal
@ -27,48 +24,18 @@
#ifndef FASTLZ_H #ifndef FASTLZ_H
#define FASTLZ_H #define FASTLZ_H
#define FASTLZ_VERSION 0x000100 #define FASTLZ_VERSION 0x000500
#define FASTLZ_VERSION_MAJOR 0 #define FASTLZ_VERSION_MAJOR 0
#define FASTLZ_VERSION_MINOR 0 #define FASTLZ_VERSION_MINOR 5
#define FASTLZ_VERSION_REVISION 0 #define FASTLZ_VERSION_REVISION 0
#define FASTLZ_VERSION_STRING "0.1.0" #define FASTLZ_VERSION_STRING "0.5.0"
#if defined (__cplusplus) #if defined(__cplusplus)
extern "C" { extern "C" {
#endif #endif
/**
Compress a block of data in the input buffer and returns the size of
compressed block. The size of input buffer is specified by length. The
minimum input buffer size is 16.
The output buffer must be at least 5% larger than the input buffer
and can not be smaller than 66 bytes.
If the input is not compressible, the return value might be larger than
length (input buffer size).
The input buffer and the output buffer can not overlap.
*/
int fastlz_compress(const void* input, int length, void* output);
/**
Decompress a block of compressed data and returns the size of the
decompressed block. If error occurs, e.g. the compressed data is
corrupted or the output buffer is not large enough, then 0 (zero)
will be returned instead.
The input buffer and the output buffer can not overlap.
Decompression is memory safe and guaranteed not to write the output buffer
more than what is specified in maxout.
*/
int fastlz_decompress(const void* input, int length, void* output, int maxout);
/** /**
Compress a block of data in the input buffer and returns the size of Compress a block of data in the input buffer and returns the size of
compressed block. The size of input buffer is specified by length. The compressed block. The size of input buffer is specified by length. The
@ -88,12 +55,43 @@ int fastlz_decompress(const void* input, int length, void* output, int maxout);
Level 2 is slightly slower but it gives better compression ratio. Level 2 is slightly slower but it gives better compression ratio.
Note that the compressed data, regardless of the level, can always be Note that the compressed data, regardless of the level, can always be
decompressed using the function fastlz_decompress above. decompressed using the function fastlz_decompress below.
*/ */
int fastlz_compress_level(int level, const void* input, int length, void* output); int fastlz_compress_level(int level, const void* input, int length,
void* output);
#if defined (__cplusplus) /**
Decompress a block of compressed data and returns the size of the
decompressed block. If error occurs, e.g. the compressed data is
corrupted or the output buffer is not large enough, then 0 (zero)
will be returned instead.
The input buffer and the output buffer can not overlap.
Decompression is memory safe and guaranteed not to write the output buffer
more than what is specified in maxout.
Note that the decompression will always work, regardless of the
compression level specified in fastlz_compress_level above (when
producing the compressed block).
*/
int fastlz_decompress(const void* input, int length, void* output, int maxout);
/**
DEPRECATED.
This is similar to fastlz_compress_level above, but with the level
automatically chosen.
This function is deprecated and it will be completely removed in some future
version.
*/
int fastlz_compress(const void* input, int length, void* output);
#if defined(__cplusplus)
} }
#endif #endif

View File

@ -4,7 +4,7 @@
* 2000. * 2000.
*/ */
/* ==================================================================== /* ====================================================================
* Copyright (c) 2000 The OpenSSL Project. All rights reserved. * Copyright (c) 2000-2019 The OpenSSL Project. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions
@ -102,7 +102,7 @@ ASN1_ITEM_end(CBIGNUM)
static int bn_new(ASN1_VALUE **pval, const ASN1_ITEM *it) static int bn_new(ASN1_VALUE **pval, const ASN1_ITEM *it)
{ {
*pval = (ASN1_VALUE *)BN_new(); *pval = (ASN1_VALUE *)BN_new();
if (*pval) if (*pval != NULL)
return 1; return 1;
else else
return 0; return 0;
@ -110,7 +110,7 @@ static int bn_new(ASN1_VALUE **pval, const ASN1_ITEM *it)
static void bn_free(ASN1_VALUE **pval, const ASN1_ITEM *it) static void bn_free(ASN1_VALUE **pval, const ASN1_ITEM *it)
{ {
if (!*pval) if (*pval == NULL)
return; return;
if (it->size & BN_SENSITIVE) if (it->size & BN_SENSITIVE)
BN_clear_free((BIGNUM *)*pval); BN_clear_free((BIGNUM *)*pval);
@ -124,7 +124,7 @@ static int bn_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype,
{ {
BIGNUM *bn; BIGNUM *bn;
int pad; int pad;
if (!*pval) if (*pval == NULL)
return -1; return -1;
bn = (BIGNUM *)*pval; bn = (BIGNUM *)*pval;
/* If MSB set in an octet we need a padding byte */ /* If MSB set in an octet we need a padding byte */

View File

@ -1,6 +1,6 @@
/* crypto/cryptlib.c */ /* crypto/cryptlib.c */
/* ==================================================================== /* ====================================================================
* Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved. * Copyright (c) 1998-2019 The OpenSSL Project. All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions * modification, are permitted provided that the following conditions
@ -745,6 +745,11 @@ int OPENSSL_NONPIC_relocated = 0;
void OPENSSL_cpuid_setup(void) void OPENSSL_cpuid_setup(void)
{ {
} }
unsigned long OPENSSL_rdtsc(void)
{
return 0;
}
#endif #endif
#if (defined(_WIN32) || defined(__CYGWIN__)) && defined(_WINDLL) #if (defined(_WIN32) || defined(__CYGWIN__)) && defined(_WINDLL)

View File

@ -973,6 +973,20 @@ static EC_GROUP *ec_asn1_parameters2group(const ECPARAMETERS *params)
* 0x0 = OPENSSL_EC_EXPLICIT_CURVE * 0x0 = OPENSSL_EC_EXPLICIT_CURVE
*/ */
EC_GROUP_set_asn1_flag(ret, 0x0); EC_GROUP_set_asn1_flag(ret, 0x0);
/*
* If the input params do not contain the optional seed field we make
* sure it is not added to the returned group.
*
* The seed field is not really used inside libcrypto anyway, and
* adding it to parsed explicit parameter keys would alter their DER
* encoding output (because of the extra field) which could impact
* applications fingerprinting keys by their DER encoding.
*/
if (params->curve->seed == NULL) {
if (EC_GROUP_set_seed(ret, NULL, 0) != 1)
goto err;
}
} }
ok = 1; ok = 1;

View File

@ -255,7 +255,7 @@ extern "C" {
even newer MIPS CPU's, but at the moment one size fits all for even newer MIPS CPU's, but at the moment one size fits all for
optimization options. Older Sparc's work better with only UNROLL, but optimization options. Older Sparc's work better with only UNROLL, but
there's no way to tell at compile time what it is you're running on */ there's no way to tell at compile time what it is you're running on */
#if defined( __sun ) || defined ( sun ) /* Newer Sparc's */ #if defined( __sun ) || defined ( sun ) /* Newer Sparc's */
# define DES_PTR # define DES_PTR
# define DES_RISC1 # define DES_RISC1

View File

@ -30,11 +30,11 @@ extern "C" {
* (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for
* major minor fix final patch/beta) * major minor fix final patch/beta)
*/ */
# define OPENSSL_VERSION_NUMBER 0x1000214fL # define OPENSSL_VERSION_NUMBER 0x1000215fL
# ifdef OPENSSL_FIPS # ifdef OPENSSL_FIPS
# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2t-fips 10 Sep 2019" # define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2u-fips 20 Dec 2019"
# else # else
# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2t 10 Sep 2019" # define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2u 20 Dec 2019"
# endif # endif
# define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT # define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT

View File

@ -1,5 +1,5 @@
diff --git a/thirdparty/openssl/crypto/rand/rand_win.c b/thirdparty/openssl/crypto/rand/rand_win.c diff --git a/thirdparty/openssl/crypto/rand/rand_win.c b/thirdparty/openssl/crypto/rand/rand_win.c
index 06670ae01..cb4093128 100644 index b4be3097e9..63a9e9975a 100644
--- a/thirdparty/openssl/crypto/rand/rand_win.c --- a/thirdparty/openssl/crypto/rand/rand_win.c
+++ b/thirdparty/openssl/crypto/rand/rand_win.c +++ b/thirdparty/openssl/crypto/rand/rand_win.c
@@ -118,8 +118,10 @@ @@ -118,8 +118,10 @@
@ -22,15 +22,15 @@ index 06670ae01..cb4093128 100644
typedef BOOL(WINAPI *CRYPTACQUIRECONTEXTW) (HCRYPTPROV *, LPCWSTR, LPCWSTR, typedef BOOL(WINAPI *CRYPTACQUIRECONTEXTW) (HCRYPTPROV *, LPCWSTR, LPCWSTR,
DWORD, DWORD); DWORD, DWORD);
typedef BOOL(WINAPI *CRYPTGENRANDOM) (HCRYPTPROV, DWORD, BYTE *); typedef BOOL(WINAPI *CRYPTGENRANDOM) (HCRYPTPROV, DWORD, BYTE *);
@@ -196,6 +198,7 @@ typedef NET_API_STATUS(NET_API_FUNCTION *NETFREE) (LPBYTE); @@ -198,6 +200,7 @@ typedef NET_API_STATUS(NET_API_FUNCTION *NETFREE) (LPBYTE);
# endif /* 1 */
# endif /* !OPENSSL_SYS_WINCE */ #define NOTTOOLONG(start) ((GetTickCount() - (start)) < MAXDELAY)
+#if !defined(WINRT_ENABLED) // -- GODOT -- +#if !defined(WINRT_ENABLED) // -- GODOT --
int RAND_poll(void) int RAND_poll(void)
{ {
MEMORYSTATUS m; MEMORYSTATUS m;
@@ -580,6 +583,8 @@ int RAND_poll(void) @@ -576,6 +579,8 @@ int RAND_poll(void)
return (1); return (1);
} }
@ -39,7 +39,7 @@ index 06670ae01..cb4093128 100644
int RAND_event(UINT iMsg, WPARAM wParam, LPARAM lParam) int RAND_event(UINT iMsg, WPARAM wParam, LPARAM lParam)
{ {
double add_entropy = 0; double add_entropy = 0;
@@ -682,7 +687,7 @@ static void readtimer(void) @@ -678,7 +683,7 @@ static void readtimer(void)
static void readscreen(void) static void readscreen(void)
{ {
@ -49,7 +49,7 @@ index 06670ae01..cb4093128 100644
HBITMAP hBitmap; /* handle for our bitmap */ HBITMAP hBitmap; /* handle for our bitmap */
BITMAP bm; /* bitmap properties */ BITMAP bm; /* bitmap properties */
diff --git a/thirdparty/openssl/openssl/dtls1.h b/thirdparty/openssl/openssl/dtls1.h diff --git a/thirdparty/openssl/openssl/dtls1.h b/thirdparty/openssl/openssl/dtls1.h
index 30bbcf278..81d28c29c 100644 index 30bbcf278a..31cb6d7eb9 100644
--- a/thirdparty/openssl/openssl/dtls1.h --- a/thirdparty/openssl/openssl/dtls1.h
+++ b/thirdparty/openssl/openssl/dtls1.h +++ b/thirdparty/openssl/openssl/dtls1.h
@@ -78,6 +78,9 @@ @@ -78,6 +78,9 @@