Merge remote-tracking branch 'qatar/master'

* qatar/master: asfdec: read attached pictures. apetag: reindent apetag: export attached covers as video streams. apetag: fix the amount of data read from binary tags. apetag: make sure avio_get_str() doesn't read more than it should. mov: read itunes cover art. snow: remove VLA in mc_block() intfloat: Don't use designated initializers in the public headers snow: remove a VLA. doc: Remind devs to check return values, especially for malloc() et al MS ATC Screen (aka MSS3) decoder vf_yadif: move x86 init code to x86/yadif.c vf_gradfun: move x86 init code to x86/gradfun.c roqvideo: Remove a totally unused dspcontext smacker: remove some unused code dsicin: remove dead assignment aacdec: remove dead assignment rl2: remove dead assignment proresenc: make a variable local to the loop where it is used alsdec: remove dead assignments Conflicts: Changelog doc/developer.texi libavcodec/allcodecs.c libavcodec/avcodec.h libavcodec/version.h libavfilter/gradfun.h libavfilter/x86/gradfun.c Merged-by: Michael Niedermayer <michaelni@gmx.at>

Merge remote-tracking branch 'qatar/master'
* qatar/master: asfdec: read attached pictures. apetag: reindent apetag: export attached covers as video streams. apetag: fix the amount of data read from binary tags. apetag: make sure avio_get_str() doesn't read more than it should. mov: read itunes cover art. snow: remove VLA in mc_block() intfloat: Don't use designated initializers in the public headers snow: remove a VLA. doc: Remind devs to check return values, especially for malloc() et al MS ATC Screen (aka MSS3) decoder vf_yadif: move x86 init code to x86/yadif.c vf_gradfun: move x86 init code to x86/gradfun.c roqvideo: Remove a totally unused dspcontext smacker: remove some unused code dsicin: remove dead assignment aacdec: remove dead assignment rl2: remove dead assignment proresenc: make a variable local to the loop where it is used alsdec: remove dead assignments Conflicts: Changelog doc/developer.texi libavcodec/allcodecs.c libavcodec/avcodec.h libavcodec/version.h libavfilter/gradfun.h libavfilter/x86/gradfun.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
b286383b · Michael Niedermayer · 7c84e7d3 · 5e745cef · b286383b · b286383b
Commit b286383b authored Jul 02, 2012 by Michael Niedermayer
29 changed files
--- a/Changelog
+++ b/Changelog
@@ -14,6 +14,7 @@ version next:
 - Microsoft Screen 1 decoder
 - join audio filter
 - audio channel mapping filter
+- Microsoft ATC Screen decoder
 - showwaves filter
 - LucasArts SMUSH playback support
 - SAMI demuxer and decoder

--- a/doc/developer.texi
+++ b/doc/developer.texi
@@ -489,6 +489,10 @@ send a reminder by email. Your patch should eventually be dealt with.
    Consider to add a regression test for your code.
 @item
    If you added YASM code please check that things still work with --disable-yasm
+@item
+    Make sure you check the return values of function and return appropriate
+    error codes. Especially memory allocation functions like @code{malloc()}
+    are notoriously left unchecked, which is a serious problem.
 @end enumerate

 @section Patch review process

--- a/doc/general.texi
+++ b/doc/general.texi
@@ -565,6 +565,8 @@ following image formats are supported:
 @item LucasArts Smush        @tab     @tab  X
    @tab Used in LucasArts games.
 @item lossless MJPEG         @tab  X  @tab  X
+@item Microsoft ATC Screen   @tab     @tab  X
+    @tab Also known as Microsoft Screen 3.
 @item Microsoft RLE          @tab     @tab  X
 @item Microsoft Screen 1     @tab     @tab  X
    @tab Also known as Windows Media Video V7 Screen.

--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -327,6 +327,7 @@ OBJS-$(CONFIG_MSMPEG4V3_ENCODER)       += msmpeg4.o msmpeg4enc.o msmpeg4data.o \
                                          h263dec.o h263.o ituh263dec.o        \
                                          mpeg4videodec.o
 OBJS-$(CONFIG_MSRLE_DECODER)           += msrle.o msrledec.o
+OBJS-$(CONFIG_MSA1_DECODER)            += mss3.o
 OBJS-$(CONFIG_MSS1_DECODER)            += mss1.o
 OBJS-$(CONFIG_MSVIDEO1_DECODER)        += msvideo1.o
 OBJS-$(CONFIG_MSVIDEO1_ENCODER)        += msvideo1enc.o elbg.o

--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -1308,7 +1308,7 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
    t.i = s.i ^ (sign & 1U<<31);
    *dst++ = v[idx>>4 & 3] * t.f;

-    sign <<= nz & 1; nz >>= 1;
+    sign <<= nz & 1;
    t.i = s.i ^ (sign & 1U<<31);
    *dst++ = v[idx>>6 & 3] * t.f;


--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -165,6 +165,7 @@ void avcodec_register_all(void)
    REGISTER_DECODER (MPEG_VDPAU, mpeg_vdpau);
    REGISTER_DECODER (MPEG1_VDPAU, mpeg1_vdpau);
    REGISTER_DECODER (MPEG2_CRYSTALHD, mpeg2_crystalhd);
+    REGISTER_DECODER (MSA1, msa1);
    REGISTER_DECODER (MSMPEG4_CRYSTALHD, msmpeg4_crystalhd);
    REGISTER_DECODER (MSMPEG4V1, msmpeg4v1);
    REGISTER_ENCDEC  (MSMPEG4V2, msmpeg4v2);

--- a/libavcodec/alsdec.c
+++ b/libavcodec/alsdec.c
@@ -770,7 +770,7 @@ static int read_var_block_data(ALSDecContext *ctx, ALSBlockData *bd)
        int          delta[8];
        unsigned int k    [8];
        unsigned int b = av_clip((av_ceil_log2(bd->block_length) - 3) >> 1, 0, 5);
-        unsigned int i = start;
+        unsigned int i;

        // read most significant bits
        unsigned int high;
@@ -781,7 +781,7 @@ static int read_var_block_data(ALSDecContext *ctx, ALSBlockData *bd)

        current_res = bd->raw_samples + start;

-        for (sb = 0; sb < sub_blocks; sb++, i = 0) {
+        for (sb = 0; sb < sub_blocks; sb++) {
            k    [sb] = s[sb] > b ? s[sb] - b : 0;
            delta[sb] = 5 - s[sb] + k[sb];


--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -258,6 +258,7 @@ enum CodecID {
    CODEC_ID_XBM,
    CODEC_ID_ZEROCODEC,
    CODEC_ID_MSS1,
+    CODEC_ID_MSA1,
    CODEC_ID_Y41P       = MKBETAG('Y','4','1','P'),
    CODEC_ID_ESCAPE130  = MKBETAG('E','1','3','0'),
    CODEC_ID_EXR        = MKBETAG('0','E','X','R'),

--- a/libavcodec/dsicinav.c
+++ b/libavcodec/dsicinav.c
@@ -122,7 +122,7 @@ static int cin_decode_huffman(const unsigned char *src, int src_size, unsigned c
    unsigned char *dst_end = dst + dst_size;
    const unsigned char *src_end = src + src_size;

-    memcpy(huff_code_table, src, 15); src += 15; src_size -= 15;
+    memcpy(huff_code_table, src, 15); src += 15;

    while (src < src_end) {
        huff_code = *src++;

--- a/libavcodec/mss3.c
+++ b/libavcodec/mss3.c
+/*
+ * Microsoft Screen 3 (aka Microsoft ATC Screen) decoder
+ * Copyright (c) 2012 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Microsoft Screen 3 (aka Microsoft ATC Screen) decoder
+ */
+
+#include "avcodec.h"
+#include "bytestream.h"
+
+#define HEADER_SIZE 27
+
+#define MODEL2_SCALE       13
+#define MODEL_SCALE        15
+#define MODEL256_SEC_SCALE  9
+
+typedef struct Model2 {
+    int      upd_val, till_rescale;
+    unsigned zero_freq,  zero_weight;
+    unsigned total_freq, total_weight;
+} Model2;
+
+typedef struct Model {
+    int weights[16], freqs[16];
+    int num_syms;
+    int tot_weight;
+    int upd_val, max_upd_val, till_rescale;
+} Model;
+
+typedef struct Model256 {
+    int weights[256], freqs[256];
+    int tot_weight;
+    int secondary[68];
+    int sec_size;
+    int upd_val, max_upd_val, till_rescale;
+} Model256;
+
+#define RAC_BOTTOM 0x01000000
+typedef struct RangeCoder {
+    const uint8_t *src, *src_end;
+
+    uint32_t range, low;
+    int got_error;
+} RangeCoder;
+
+enum BlockType {
+    FILL_BLOCK = 0,
+    IMAGE_BLOCK,
+    DCT_BLOCK,
+    HAAR_BLOCK,
+    SKIP_BLOCK
+};
+
+typedef struct BlockTypeContext {
+    int      last_type;
+    Model    bt_model[5];
+} BlockTypeContext;
+
+typedef struct FillBlockCoder {
+    int      fill_val;
+    Model    coef_model;
+} FillBlockCoder;
+
+typedef struct ImageBlockCoder {
+    Model256 esc_model, vec_entry_model;
+    Model    vec_size_model;
+    Model    vq_model[125];
+} ImageBlockCoder;
+
+typedef struct DCTBlockCoder {
+    int      *prev_dc;
+    int      prev_dc_stride;
+    int      prev_dc_height;
+    int      quality;
+    uint16_t qmat[64];
+    Model    dc_model;
+    Model2   sign_model;
+    Model256 ac_model;
+} DCTBlockCoder;
+
+typedef struct HaarBlockCoder {
+    int      quality, scale;
+    Model256 coef_model;
+    Model    coef_hi_model;
+} HaarBlockCoder;
+
+typedef struct MSS3Context {
+    AVCodecContext   *avctx;
+    AVFrame          pic;
+
+    int              got_error;
+    RangeCoder       coder;
+    BlockTypeContext btype[3];
+    FillBlockCoder   fill_coder[3];
+    ImageBlockCoder  image_coder[3];
+    DCTBlockCoder    dct_coder[3];
+    HaarBlockCoder   haar_coder[3];
+
+    int              dctblock[64];
+    int              hblock[16 * 16];
+} MSS3Context;
+
+static const uint8_t mss3_luma_quant[64] = {
+    16,  11,  10,  16,  24,  40,  51,  61,
+    12,  12,  14,  19,  26,  58,  60,  55,
+    14,  13,  16,  24,  40,  57,  69,  56,
+    14,  17,  22,  29,  51,  87,  80,  62,
+    18,  22,  37,  56,  68, 109, 103,  77,
+    24,  35,  55,  64,  81, 104, 113,  92,
+    49,  64,  78,  87, 103, 121, 120, 101,
+    72,  92,  95,  98, 112, 100, 103,  99
+};
+
+static const uint8_t mss3_chroma_quant[64] = {
+    17, 18, 24, 47, 99, 99, 99, 99,
+    18, 21, 26, 66, 99, 99, 99, 99,
+    24, 26, 56, 99, 99, 99, 99, 99,
+    47, 66, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99,
+    99, 99, 99, 99, 99, 99, 99, 99
+};
+
+const uint8_t zigzag_scan[64] = {
+    0,   1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+
+static void model2_reset(Model2 *m)
+{
+    m->zero_weight  = 1;
+    m->total_weight = 2;
+    m->zero_freq    = 0x1000;
+    m->total_freq   = 0x2000;
+    m->upd_val      = 4;
+    m->till_rescale = 4;
+}
+
+static void model2_update(Model2 *m, int bit)
+{
+    unsigned scale;
+
+    if (!bit)
+        m->zero_weight++;
+    m->till_rescale--;
+    if (m->till_rescale)
+        return;
+
+    m->total_weight += m->upd_val;
+    if (m->total_weight > 0x2000) {
+        m->total_weight = (m->total_weight + 1) >> 1;
+        m->zero_weight  = (m->zero_weight  + 1) >> 1;
+        if (m->total_weight == m->zero_weight)
+            m->total_weight = m->zero_weight + 1;
+    }
+    m->upd_val = m->upd_val * 5 >> 2;
+    if (m->upd_val > 64)
+        m->upd_val = 64;
+    scale = 0x80000000u / m->total_weight;
+    m->zero_freq    = m->zero_weight  * scale >> 18;
+    m->total_freq   = m->total_weight * scale >> 18;
+    m->till_rescale = m->upd_val;
+}
+
+static void model_update(Model *m, int val)
+{
+    int i, sum = 0;
+    unsigned scale;
+
+    m->weights[val]++;
+    m->till_rescale--;
+    if (m->till_rescale)
+        return;
+    m->tot_weight += m->upd_val;
+
+    if (m->tot_weight > 0x8000) {
+        m->tot_weight = 0;
+        for (i = 0; i < m->num_syms; i++) {
+            m->weights[i]  = (m->weights[i] + 1) >> 1;
+            m->tot_weight +=  m->weights[i];
+        }
+    }
+    scale = 0x80000000u / m->tot_weight;
+    for (i = 0; i < m->num_syms; i++) {
+        m->freqs[i] = sum * scale >> 16;
+        sum += m->weights[i];
+    }
+
+    m->upd_val = m->upd_val * 5 >> 2;
+    if (m->upd_val > m->max_upd_val)
+        m->upd_val = m->max_upd_val;
+    m->till_rescale = m->upd_val;
+}
+
+static void model_reset(Model *m)
+{
+    int i;
+
+    m->tot_weight   = 0;
+    for (i = 0; i < m->num_syms - 1; i++)
+        m->weights[i] = 1;
+    m->weights[m->num_syms - 1] = 0;
+
+    m->upd_val      = m->num_syms;
+    m->till_rescale = 1;
+    model_update(m, m->num_syms - 1);
+    m->till_rescale =
+    m->upd_val      = (m->num_syms + 6) >> 1;
+}
+
+static av_cold void model_init(Model *m, int num_syms)
+{
+    m->num_syms    = num_syms;
+    m->max_upd_val = 8 * num_syms + 48;
+
+    model_reset(m);
+}
+
+static void model256_update(Model256 *m, int val)
+{
+    int i, sum = 0;
+    unsigned scale;
+    int send, sidx = 1;
+
+    m->weights[val]++;
+    m->till_rescale--;
+    if (m->till_rescale)
+        return;
+    m->tot_weight += m->upd_val;
+
+    if (m->tot_weight > 0x8000) {
+        m->tot_weight = 0;
+        for (i = 0; i < 256; i++) {
+            m->weights[i]  = (m->weights[i] + 1) >> 1;
+            m->tot_weight +=  m->weights[i];
+        }
+    }
+    scale = 0x80000000u / m->tot_weight;
+    m->secondary[0] = 0;
+    for (i = 0; i < 256; i++) {
+        m->freqs[i] = sum * scale >> 16;
+        sum += m->weights[i];
+        send = m->freqs[i] >> MODEL256_SEC_SCALE;
+        while (sidx <= send)
+            m->secondary[sidx++] = i - 1;
+    }
+    while (sidx < m->sec_size)
+        m->secondary[sidx++] = 255;
+
+    m->upd_val = m->upd_val * 5 >> 2;
+    if (m->upd_val > m->max_upd_val)
+        m->upd_val = m->max_upd_val;
+    m->till_rescale = m->upd_val;
+}
+
+static void model256_reset(Model256 *m)
+{
+    int i;
+
+    for (i = 0; i < 255; i++)
+        m->weights[i] = 1;
+    m->weights[255] = 0;
+
+    m->tot_weight   = 0;
+    m->upd_val      = 256;
+    m->till_rescale = 1;
+    model256_update(m, 255);
+    m->till_rescale =
+    m->upd_val      = (256 + 6) >> 1;
+}
+
+static av_cold void model256_init(Model256 *m)
+{
+    m->max_upd_val = 8 * 256 + 48;
+    m->sec_size    = (1 << 6) + 2;
+
+    model256_reset(m);
+}
+
+static void rac_init(RangeCoder *c, const uint8_t *src, int size)
+{
+    int i;
+
+    c->src       = src;
+    c->src_end   = src + size;
+    c->low       = 0;
+    for (i = 0; i < FFMIN(size, 4); i++)
+        c->low = (c->low << 8) | *c->src++;
+    c->range     = 0xFFFFFFFF;
+    c->got_error = 0;
+}
+
+static void rac_normalise(RangeCoder *c)
+{
+    for (;;) {
+        c->range <<= 8;
+        c->low   <<= 8;
+        if (c->src < c->src_end) {
+            c->low |= *c->src++;
+        } else if (!c->low) {
+            c->got_error = 1;
+            return;
+        }
+        if (c->range >= RAC_BOTTOM)
+            return;
+    }
+}
+
+static int rac_get_bit(RangeCoder *c)
+{
+    int bit;
+
+    c->range >>= 1;
+
+    bit = (c->range <= c->low);
+    if (bit)
+        c->low -= c->range;
+
+    if (c->range < RAC_BOTTOM)
+        rac_normalise(c);
+
+    return bit;
+}
+
+static int rac_get_bits(RangeCoder *c, int nbits)
+{
+    int val;
+
+    c->range >>= nbits;
+    val = c->low / c->range;
+    c->low -= c->range * val;
+
+    if (c->range < RAC_BOTTOM)
+        rac_normalise(c);
+
+    return val;
+}
+
+static int rac_get_model2_sym(RangeCoder *c, Model2 *m)
+{
+    int bit, helper;
+
+    helper = m->zero_freq * (c->range >> MODEL2_SCALE);
+    bit    = (c->low >= helper);
+    if (bit) {
+        c->low   -= helper;
+        c->range -= helper;
+    } else {
+        c->range  = helper;
+    }
+
+    if (c->range < RAC_BOTTOM)
+        rac_normalise(c);
+
+    model2_update(m, bit);
+
+    return bit;
+}
+
+static int rac_get_model_sym(RangeCoder *c, Model *m)
+{
+    int prob, prob2, helper, val;
+    int end, end2;
+
+    prob       = 0;
+    prob2      = c->range;
+    c->range >>= MODEL_SCALE;
+    val        = 0;
+    end        = m->num_syms >> 1;
+    end2       = m->num_syms;
+    do {
+        helper = m->freqs[end] * c->range;
+        if (helper <= c->low) {
+            val   = end;
+            prob  = helper;
+        } else {
+            end2  = end;
+            prob2 = helper;
+        }
+        end = (end2 + val) >> 1;
+    } while (end != val);
+    c->low  -= prob;
+    c->range = prob2 - prob;
+    if (c->range < RAC_BOTTOM)
+        rac_normalise(c);
+
+    model_update(m, val);
+
+    return val;
+}
+
+static int rac_get_model256_sym(RangeCoder *c, Model256 *m)
+{
+    int prob, prob2, helper, val;
+    int start, end;
+    int ssym;
+
+    prob2      = c->range;
+    c->range >>= MODEL_SCALE;
+
+    helper     = c->low / c->range;
+    ssym       = helper >> MODEL256_SEC_SCALE;
+    val        = m->secondary[ssym];
+
+    end = start = m->secondary[ssym + 1] + 1;
+    while (end > val + 1) {
+        ssym = (end + val) >> 1;
+        if (m->freqs[ssym] <= helper) {
+            end = start;
+            val = ssym;
+        } else {
+            end   = (end + val) >> 1;
+            start = ssym;
+        }
+    }
+    prob = m->freqs[val] * c->range;
+    if (val != 255)
+        prob2 = m->freqs[val + 1] * c->range;
+
+    c->low  -= prob;
+    c->range = prob2 - prob;
+    if (c->range < RAC_BOTTOM)
+        rac_normalise(c);
+
+    model256_update(m, val);
+
+    return val;
+}
+
+static int decode_block_type(RangeCoder *c, BlockTypeContext *bt)
+{
+    bt->last_type = rac_get_model_sym(c, &bt->bt_model[bt->last_type]);
+
+    return bt->last_type;
+}
+
+static int decode_coeff(RangeCoder *c, Model *m)
+{
+    int val, sign;
+
+    val = rac_get_model_sym(c, m);
+    if (val) {
+        sign = rac_get_bit(c);
+        if (val > 1) {
+            val--;
+            val = (1 << val) + rac_get_bits(c, val);
+        }
+        if (!sign)
+            val = -val;
+    }
+
+    return val;
+}
+
+static void decode_fill_block(RangeCoder *c, FillBlockCoder *fc,
+                              uint8_t *dst, int stride, int block_size)
+{
+    int i;
+
+    fc->fill_val += decode_coeff(c, &fc->coef_model);
+
+    for (i = 0; i < block_size; i++, dst += stride)
+        memset(dst, fc->fill_val, block_size);
+}
+
+static void decode_image_block(RangeCoder *c, ImageBlockCoder *ic,
+                               uint8_t *dst, int stride, int block_size)
+{
+    int i, j;
+    int vec_size;
+    int vec[4];
+    int prev_line[16];
+    int A, B, C;
+
+    vec_size = rac_get_model_sym(c, &ic->vec_size_model) + 2;
+    for (i = 0; i < vec_size; i++)
+        vec[i] = rac_get_model256_sym(c, &ic->vec_entry_model);
+    for (; i < 4; i++)
+        vec[i] = 0;
+    memset(prev_line, 0, sizeof(prev_line));
+
+    for (j = 0; j < block_size; j++) {
+        A = 0;
+        B = 0;
+        for (i = 0; i < block_size; i++) {
+            C = B;
+            B = prev_line[i];
+            A = rac_get_model_sym(c, &ic->vq_model[A + B * 5 + C * 25]);
+
+            prev_line[i] = A;
+            if (A < 4)
+               dst[i] = vec[A];
+            else
+               dst[i] = rac_get_model256_sym(c, &ic->esc_model);
+        }
+        dst += stride;
+    }
+}
+
+static int decode_dct(RangeCoder *c, DCTBlockCoder *bc, int *block,
+                      int bx, int by)
+{
+    int skip, val, sign, pos = 1, zz_pos, dc;
+    int blk_pos = bx + by * bc->prev_dc_stride;
+
+    memset(block, 0, sizeof(*block) * 64);
+
+    dc = decode_coeff(c, &bc->dc_model);
+    if (by) {
+        if (bx) {
+            int l, tl, t;
+
+            l  = bc->prev_dc[blk_pos - 1];
+            tl = bc->prev_dc[blk_pos - 1 - bc->prev_dc_stride];
+            t  = bc->prev_dc[blk_pos     - bc->prev_dc_stride];
+
+            if (FFABS(t - tl) <= FFABS(l - tl))
+                dc += l;
+            else
+                dc += t;
+        } else {
+            dc += bc->prev_dc[blk_pos - bc->prev_dc_stride];
+        }
+    } else if (bx) {
+        dc += bc->prev_dc[bx - 1];
+    }
+    bc->prev_dc[blk_pos] = dc;
+    block[0]             = dc * bc->qmat[0];
+
+    while (pos < 64) {
+        val = rac_get_model256_sym(c, &bc->ac_model);
+        if (!val)
+            return 0;
+        if (val == 0xF0) {
+            pos += 16;
+            continue;
+        }
+        skip = val >> 4;
+        val  = val & 0xF;
+        if (!val)
+            return -1;
+        pos += skip;
+        if (pos >= 64)
+            return -1;
+
+        sign = rac_get_model2_sym(c, &bc->sign_model);
+        if (val > 1) {
+            val--;
+            val = (1 << val) + rac_get_bits(c, val);
+        }
+        if (!sign)
+            val = -val;
+
+        zz_pos = zigzag_scan[pos];
+        block[zz_pos] = val * bc->qmat[zz_pos];
+        pos++;
+    }
+
+    return pos == 64 ? 0 : -1;
+}
+
+#define DCT_TEMPLATE(blk, step, SOP, shift)                         \
+    const int t0 = -39409 * blk[7 * step] -  58980 * blk[1 * step]; \
+    const int t1 =  39410 * blk[1 * step] -  58980 * blk[7 * step]; \
+    const int t2 = -33410 * blk[5 * step] - 167963 * blk[3 * step]; \
+    const int t3 =  33410 * blk[3 * step] - 167963 * blk[5 * step]; \
+    const int t4 =          blk[3 * step] +          blk[7 * step]; \
+    const int t5 =          blk[1 * step] +          blk[5 * step]; \
+    const int t6 =  77062 * t4            +  51491 * t5;            \
+    const int t7 =  77062 * t5            -  51491 * t4;            \
+    const int t8 =  35470 * blk[2 * step] -  85623 * blk[6 * step]; \
+    const int t9 =  35470 * blk[6 * step] +  85623 * blk[2 * step]; \
+    const int tA = SOP(blk[0 * step] - blk[4 * step]);              \
+    const int tB = SOP(blk[0 * step] + blk[4 * step]);              \
+                                                                    \
+    blk[0 * step] = (  t1 + t6  + t9 + tB) >> shift;                \
+    blk[1 * step] = (  t3 + t7  + t8 + tA) >> shift;                \
+    blk[2 * step] = (  t2 + t6  - t8 + tA) >> shift;                \
+    blk[3 * step] = (  t0 + t7  - t9 + tB) >> shift;                \
+    blk[4 * step] = (-(t0 + t7) - t9 + tB) >> shift;                \
+    blk[5 * step] = (-(t2 + t6) - t8 + tA) >> shift;                \
+    blk[6 * step] = (-(t3 + t7) + t8 + tA) >> shift;                \
+    blk[7 * step] = (-(t1 + t6) + t9 + tB) >> shift;                \
+
+#define SOP_ROW(a) ((a) << 16) + 0x2000
+#define SOP_COL(a) ((a + 32) << 16)
+
+static void dct_put(uint8_t *dst, int stride, int *block)
+{
+    int i, j;
+    int *ptr;
+
+    ptr = block;
+    for (i = 0; i < 8; i++) {
+        DCT_TEMPLATE(ptr, 1, SOP_ROW, 13);
+        ptr += 8;
+    }
+
+    ptr = block;
+    for (i = 0; i < 8; i++) {
+        DCT_TEMPLATE(ptr, 8, SOP_COL, 22);
+        ptr++;
+    }
+
+    ptr = block;
+    for (j = 0; j < 8; j++) {
+        for (i = 0; i < 8; i++)
+            dst[i] = av_clip_uint8(ptr[i] + 128);
+        dst += stride;
+        ptr += 8;
+    }
+}
+
+static void decode_dct_block(RangeCoder *c, DCTBlockCoder *bc,
+                             uint8_t *dst, int stride, int block_size,
+                             int *block, int mb_x, int mb_y)
+{
+    int i, j;
+    int bx, by;
+    int nblocks = block_size >> 3;
+
+    bx = mb_x * nblocks;
+    by = mb_y * nblocks;
+
+    for (j = 0; j < nblocks; j++) {
+        for (i = 0; i < nblocks; i++) {
+            if (decode_dct(c, bc, block, bx + i, by + j)) {
+                c->got_error = 1;
+                return;
+            }
+            dct_put(dst + i * 8, stride, block);
+        }
+        dst += 8 * stride;
+    }
+}
+
+static void decode_haar_block(RangeCoder *c, HaarBlockCoder *hc,
+                              uint8_t *dst, int stride, int block_size,
+                              int *block)
+{
+    const int hsize = block_size >> 1;
+    int A, B, C, D, t1, t2, t3, t4;
+    int i, j;
+
+    for (j = 0; j < block_size; j++) {
+        for (i = 0; i < block_size; i++) {
+            if (i < hsize && j < hsize)
+                block[i] = rac_get_model256_sym(c, &hc->coef_model);
+            else
+                block[i] = decode_coeff(c, &hc->coef_hi_model);
+            block[i] *= hc->scale;
+        }
+        block += block_size;
+    }
+    block -= block_size * block_size;
+
+    for (j = 0; j < hsize; j++) {
+        for (i = 0; i < hsize; i++) {
+            A = block[i];
+            B = block[i + hsize];
+            C = block[i + hsize * block_size];
+            D = block[i + hsize * block_size + hsize];
+
+            t1 = A - B;
+            t2 = C - D;
+            t3 = A + B;
+            t4 = C + D;
+            dst[i * 2]              = av_clip_uint8(t1 - t2);
+            dst[i * 2 + stride]     = av_clip_uint8(t1 + t2);
+            dst[i * 2 + 1]          = av_clip_uint8(t3 - t4);
+            dst[i * 2 + 1 + stride] = av_clip_uint8(t3 + t4);
+        }
+        block += block_size;
+        dst   += stride * 2;
+    }
+}
+
+static void gen_quant_mat(uint16_t *qmat, const uint8_t *ref, float scale)
+{
+    int i;
+
+    for (i = 0; i < 64; i++)
+        qmat[i] = (uint16_t)(ref[i] * scale + 50.0) / 100;
+}
+
+static void reset_coders(MSS3Context *ctx, int quality)
+{
+    int i, j;
+
+    for (i = 0; i < 3; i++) {
+        ctx->btype[i].last_type = SKIP_BLOCK;
+        for (j = 0; j < 5; j++)
+            model_reset(&ctx->btype[i].bt_model[j]);
+        ctx->fill_coder[i].fill_val = 0;
+        model_reset(&ctx->fill_coder[i].coef_model);
+        model256_reset(&ctx->image_coder[i].esc_model);
+        model256_reset(&ctx->image_coder[i].vec_entry_model);
+        model_reset(&ctx->image_coder[i].vec_size_model);
+        for (j = 0; j < 125; j++)
+            model_reset(&ctx->image_coder[i].vq_model[j]);
+        if (ctx->dct_coder[i].quality != quality) {
+            float scale;
+            ctx->dct_coder[i].quality = quality;
+            if (quality > 50)
+                scale = 200.0f - 2 * quality;
+            else
+                scale = 5000.0f / quality;
+            gen_quant_mat(ctx->dct_coder[i].qmat,
+                          i ? mss3_chroma_quant : mss3_luma_quant,
+                          scale);
+        }
+        memset(ctx->dct_coder[i].prev_dc, 0,
+               sizeof(*ctx->dct_coder[i].prev_dc) *
+               ctx->dct_coder[i].prev_dc_stride *
+               ctx->dct_coder[i].prev_dc_height);
+        model_reset(&ctx->dct_coder[i].dc_model);
+        model2_reset(&ctx->dct_coder[i].sign_model);
+        model256_reset(&ctx->dct_coder[i].ac_model);
+        if (ctx->haar_coder[i].quality != quality) {
+            ctx->haar_coder[i].quality = quality;
+            ctx->haar_coder[i].scale   = 17 - 7 * quality / 50;
+        }
+        model_reset(&ctx->haar_coder[i].coef_hi_model);
+        model256_reset(&ctx->haar_coder[i].coef_model);
+    }
+}
+
+static av_cold void init_coders(MSS3Context *ctx)
+{
+    int i, j;
+
+    for (i = 0; i < 3; i++) {
+        for (j = 0; j < 5; j++)
+            model_init(&ctx->btype[i].bt_model[j], 5);
+        model_init(&ctx->fill_coder[i].coef_model, 12);
+        model256_init(&ctx->image_coder[i].esc_model);
+        model256_init(&ctx->image_coder[i].vec_entry_model);
+        model_init(&ctx->image_coder[i].vec_size_model, 3);
+        for (j = 0; j < 125; j++)
+            model_init(&ctx->image_coder[i].vq_model[j], 5);
+        model_init(&ctx->dct_coder[i].dc_model, 12);
+        model256_init(&ctx->dct_coder[i].ac_model);
+        model_init(&ctx->haar_coder[i].coef_hi_model, 12);
+        model256_init(&ctx->haar_coder[i].coef_model);
+    }
+}
+
+static int mss3_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+                             AVPacket *avpkt)
+{
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
+    MSS3Context *c = avctx->priv_data;
+    RangeCoder *acoder = &c->coder;
+    GetByteContext gb;
+    uint8_t *dst[3];
+    int dec_width, dec_height, dec_x, dec_y, quality, keyframe;
+    int x, y, i, mb_width, mb_height, blk_size, btype;
+    int ret;
+
+    if (buf_size < HEADER_SIZE) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Frame should have at least %d bytes, got %d instead\n",
+               HEADER_SIZE, buf_size);
+        return AVERROR_INVALIDDATA;
+    }
+
+    bytestream2_init(&gb, buf, buf_size);
+    keyframe   = bytestream2_get_be32(&gb);
+    if (keyframe & ~0x301) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid frame type %X\n", keyframe);
+        return AVERROR_INVALIDDATA;
+    }
+    keyframe   = !(keyframe & 1);
+    bytestream2_skip(&gb, 6);
+    dec_x      = bytestream2_get_be16(&gb);
+    dec_y      = bytestream2_get_be16(&gb);
+    dec_width  = bytestream2_get_be16(&gb);
+    dec_height = bytestream2_get_be16(&gb);
+
+    if (dec_x + dec_width > avctx->width ||
+        dec_y + dec_height > avctx->height ||
+        (dec_width | dec_height) & 0xF) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid frame dimensions %dx%d +%d,%d\n",
+               dec_width, dec_height, dec_x, dec_y);
+        return AVERROR_INVALIDDATA;
+    }
+    bytestream2_skip(&gb, 4);
+    quality    = bytestream2_get_byte(&gb);
+    if (quality < 1 || quality > 100) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid quality setting %d\n", quality);
+        return AVERROR_INVALIDDATA;
+    }
+    bytestream2_skip(&gb, 4);
+
+    if (keyframe && !bytestream2_get_bytes_left(&gb)) {
+        av_log(avctx, AV_LOG_ERROR, "Keyframe without data found\n");
+        return AVERROR_INVALIDDATA;
+    }
+    if (!keyframe && c->got_error)
+        return buf_size;
+    c->got_error = 0;
+
+    c->pic.reference    = 3;
+    c->pic.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE |
+                          FF_BUFFER_HINTS_REUSABLE;
+    if ((ret = avctx->reget_buffer(avctx, &c->pic)) < 0) {
+        av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
+        return ret;
+    }
+    c->pic.key_frame = keyframe;
+    c->pic.pict_type = keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+    if (!bytestream2_get_bytes_left(&gb)) {
+        *data_size = sizeof(AVFrame);
+        *(AVFrame*)data = c->pic;
+
+        return buf_size;
+    }
+
+    reset_coders(c, quality);
+
+    rac_init(acoder, buf + HEADER_SIZE, buf_size - HEADER_SIZE);
+
+    mb_width  = dec_width  >> 4;
+    mb_height = dec_height >> 4;
+    dst[0] = c->pic.data[0] + dec_x     +  dec_y      * c->pic.linesize[0];
+    dst[1] = c->pic.data[1] + dec_x / 2 + (dec_y / 2) * c->pic.linesize[1];
+    dst[2] = c->pic.data[2] + dec_x / 2 + (dec_y / 2) * c->pic.linesize[2];
+    for (y = 0; y < mb_height; y++) {
+        for (x = 0; x < mb_width; x++) {
+            for (i = 0; i < 3; i++) {
+                blk_size = 8 << !i;
+
+                btype = decode_block_type(acoder, c->btype + i);
+                switch (btype) {
+                case FILL_BLOCK:
+                    decode_fill_block(acoder, c->fill_coder + i,
+                                      dst[i] + x * blk_size,
+                                      c->pic.linesize[i], blk_size);
+                    break;
+                case IMAGE_BLOCK:
+                    decode_image_block(acoder, c->image_coder + i,
+                                       dst[i] + x * blk_size,
+                                       c->pic.linesize[i], blk_size);
+                    break;
+                case DCT_BLOCK:
+                    decode_dct_block(acoder, c->dct_coder + i,
+                                     dst[i] + x * blk_size,
+                                     c->pic.linesize[i], blk_size,
+                                     c->dctblock, x, y);
+                    break;
+                case HAAR_BLOCK:
+                    decode_haar_block(acoder, c->haar_coder + i,
+                                      dst[i] + x * blk_size,
+                                      c->pic.linesize[i], blk_size,
+                                      c->hblock);
+                    break;
+                }
+                if (c->got_error || acoder->got_error) {
+                    av_log(avctx, AV_LOG_ERROR, "Error decoding block %d,%d\n",
+                           x, y);
+                    c->got_error = 1;
+                    return AVERROR_INVALIDDATA;
+                }
+            }
+        }
+        dst[0] += c->pic.linesize[0] * 16;
+        dst[1] += c->pic.linesize[1] * 8;
+        dst[2] += c->pic.linesize[2] * 8;
+    }
+
+    *data_size = sizeof(AVFrame);
+    *(AVFrame*)data = c->pic;
+
+    return buf_size;
+}
+
+static av_cold int mss3_decode_init(AVCodecContext *avctx)
+{
+    MSS3Context * const c = avctx->priv_data;
+    int i;
+
+    c->avctx = avctx;
+
+    if ((avctx->width & 0xF) || (avctx->height & 0xF)) {
+        av_log(avctx, AV_LOG_ERROR,
+               "Image dimensions should be a multiple of 16.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    c->got_error = 0;
+    for (i = 0; i < 3; i++) {
+        int b_width  = avctx->width  >> (2 + !!i);
+        int b_height = avctx->height >> (2 + !!i);
+        c->dct_coder[i].prev_dc_stride = b_width;
+        c->dct_coder[i].prev_dc_height = b_height;
+        c->dct_coder[i].prev_dc = av_malloc(sizeof(*c->dct_coder[i].prev_dc) *
+                                            b_width * b_height);
+        if (!c->dct_coder[i].prev_dc) {
+            av_log(avctx, AV_LOG_ERROR, "Cannot allocate buffer\n");
+            while (i >= 0) {
+                av_freep(&c->dct_coder[i].prev_dc);
+                i--;
+            }
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    avctx->pix_fmt     = PIX_FMT_YUV420P;
+    avctx->coded_frame = &c->pic;
+
+    init_coders(c);
+
+    return 0;
+}
+
+static av_cold int mss3_decode_end(AVCodecContext *avctx)
+{
+    MSS3Context * const c = avctx->priv_data;
+    int i;
+
+    if (c->pic.data[0])
+        avctx->release_buffer(avctx, &c->pic);
+    for (i = 0; i < 3; i++)
+        av_freep(&c->dct_coder[i].prev_dc);
+
+    return 0;
+}
+
+AVCodec ff_msa1_decoder = {
+    .name           = "msa1",
+    .type           = AVMEDIA_TYPE_VIDEO,
+    .id             = CODEC_ID_MSA1,
+    .priv_data_size = sizeof(MSS3Context),
+    .init           = mss3_decode_init,
+    .close          = mss3_decode_end,
+    .decode         = mss3_decode_frame,
+    .capabilities   = CODEC_CAP_DR1,
+    .long_name      = NULL_IF_CONFIG_SMALL("MS ATC Screen"),
+};
--- a/libavcodec/proresenc_kostya.c
+++ b/libavcodec/proresenc_kostya.c
@@ -726,7 +726,6 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
    int sizes[4] = { 0 };
    int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
    int frame_size, picture_size, slice_size;
-    int mbs_per_slice = ctx->mbs_per_slice;
    int pkt_size, ret;

    *avctx->coded_frame           = *pic;
@@ -792,7 +791,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
    }

    for (y = 0; y < ctx->mb_height; y++) {
-        mbs_per_slice = ctx->mbs_per_slice;
+        int mbs_per_slice = ctx->mbs_per_slice;
        for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
            q = ctx->force_quant ? ctx->force_quant
                                 : ctx->slice_q[mb + y * ctx->slices_width];

--- a/libavcodec/rl2.c
+++ b/libavcodec/rl2.c
@@ -64,7 +64,7 @@ static void rl2_rle_decode(Rl2Context *s,const unsigned char* in,int size,
    const unsigned char* back_frame = s->back_frame;
    const unsigned char* in_end = in + size;
    const unsigned char* out_end = out + stride * s->avctx->height;
-    unsigned char* line_end = out + s->avctx->width;
+    unsigned char* line_end;

    /** copy start of the background frame */
    for(i=0;i<=base_y;i++){

--- a/libavcodec/roqvideo.h
+++ b/libavcodec/roqvideo.h
@@ -45,7 +45,6 @@ struct RoqTempData;
 typedef struct RoqContext {

    AVCodecContext *avctx;
-    DSPContext dsp;
    AVFrame frames[2];
    AVFrame *last_frame;
    AVFrame *current_frame;

--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -137,13 +137,9 @@ static int smacker_decode_bigtree(GetBitContext *gb, HuffContext *hc, DBCtx *ctx
        return -1;
    }
    if(!get_bits1(gb)){ //Leaf
-        int val, i1, i2, b1, b2;
-        b1 = get_bits_count(gb);
+        int val, i1, i2;
        i1 = ctx->v1->table ? get_vlc2(gb, ctx->v1->table, SMKTREE_BITS, 3) : 0;
-        b1 = get_bits_count(gb) - b1;
-        b2 = get_bits_count(gb);
        i2 = ctx->v2->table ? get_vlc2(gb, ctx->v2->table, SMKTREE_BITS, 3) : 0;
-        b2 = get_bits_count(gb) - b2;
        if (i1 < 0 || i2 < 0)
            return -1;
        val = ctx->recode1[i1] | (ctx->recode2[i2] << 8);

--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -140,7 +140,7 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int

    int x, y, b, r, l;
    int16_t tmpIt   [64*(32+HTAPS_MAX)];
-    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
+    uint8_t tmp2t[3][64*(32+HTAPS_MAX)];
    int16_t *tmpI= tmpIt;
    uint8_t *tmp2= tmp2t[0];
    const uint8_t *hpel[11];
@@ -178,7 +178,7 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
                tmp2[x]= am;
            }
            tmpI+= 64;
-            tmp2+= stride;
+            tmp2+= 64;
            src += stride;
        }
        src -= stride*y;
@@ -207,7 +207,7 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
                tmp2[x]= am;
            }
            src += stride;
-            tmp2+= stride;
+            tmp2+= 64;
        }
        src -= stride*y;
    }
@@ -234,12 +234,12 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
                tmp2[x]= am;
            }
            tmpI+= 64;
-            tmp2+= stride;
+            tmp2+= 64;
        }
    }

    hpel[ 0]= src;
-    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
+    hpel[ 1]= tmp2t[0] + 64*(HTAPS_MAX/2-1);
    hpel[ 2]= src + 1;

    hpel[ 4]= tmp2t[1];
@@ -247,14 +247,21 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
    hpel[ 6]= tmp2t[1] + 1;

    hpel[ 8]= src + stride;
-    hpel[ 9]= hpel[1] + stride;
+    hpel[ 9]= hpel[1] + 64;
    hpel[10]= hpel[8] + 1;

+#define MC_STRIDE(x) (needs[x] ? 64 : stride)
+
    if(b==15){
-        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
-        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
-        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
-        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
+        int dxy = dx / 8 + dy / 8 * 4;
+        const uint8_t *src1 = hpel[dxy    ];
+        const uint8_t *src2 = hpel[dxy + 1];
+        const uint8_t *src3 = hpel[dxy + 4];
+        const uint8_t *src4 = hpel[dxy + 5];
+        int stride1 = MC_STRIDE(dxy);
+        int stride2 = MC_STRIDE(dxy + 1);
+        int stride3 = MC_STRIDE(dxy + 4);
+        int stride4 = MC_STRIDE(dxy + 5);
        dx&=7;
        dy&=7;
        for(y=0; y < b_h; y++){
@@ -262,23 +269,25 @@ static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int
                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
            }
-            src1+=stride;
-            src2+=stride;
-            src3+=stride;
-            src4+=stride;
+            src1+=stride1;
+            src2+=stride2;
+            src3+=stride3;
+            src4+=stride4;
            dst +=stride;
        }
    }else{
        const uint8_t *src1= hpel[l];
        const uint8_t *src2= hpel[r];
+        int stride1 = MC_STRIDE(l);
+        int stride2 = MC_STRIDE(r);
        int a= weight[((dx&7) + (8*(dy&7)))];
        int b= 8-a;
        for(y=0; y < b_h; y++){
            for(x=0; x < b_w; x++){
                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
            }
-            src1+=stride;
-            src2+=stride;
+            src1+=stride1;
+            src2+=stride2;
            dst +=stride;
        }
    }

--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -666,7 +666,7 @@ static inline int get_block_bits(SnowContext *s, int x, int y, int w){
    }
 }

-static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
+static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, uint8_t (*obmc_edged)[MB_SIZE * 2]){
    Plane *p= &s->plane[plane_index];
    const int block_size = MB_SIZE >> s->block_max_depth;
    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
@@ -698,7 +698,7 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con
    ff_snow_pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_h*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);

    for(y=y0; y<y1; y++){
-        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
+        const uint8_t *obmc1= obmc_edged[y];
        const IDWTELEM *pred1 = pred + y*obmc_stride;
        uint8_t *cur1 = cur + y*ref_stride;
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
@@ -962,7 +962,7 @@ static int encode_subband(SnowContext *s, SubBand *b, const IDWTELEM *src, const
 //    encode_subband_dzr(s, b, src, parent, stride, orientation);
 }

-static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
+static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){
    const int b_stride= s->b_width << s->block_max_depth;
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
    BlockNode backup= *block;
@@ -1003,7 +1003,7 @@ static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int

 /* special case for int[2] args we discard afterwards,
 * fixes compilation problem with gcc 2.95 */
-static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
+static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){
    int p[2] = {p0, p1};
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
 }
@@ -1083,7 +1083,7 @@ static void iterative_me(SnowContext *s){
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
                const int b_w= (MB_SIZE >> s->block_max_depth);
-                uint8_t obmc_edged[b_w*2][b_w*2];
+                uint8_t obmc_edged[MB_SIZE * 2][MB_SIZE * 2];

                if(pass && (block->type & BLOCK_OPT))
                    continue;
@@ -1098,7 +1098,8 @@ static void iterative_me(SnowContext *s){
                //FIXME precalculate
                {
                    int x, y;
-                    memcpy(obmc_edged, ff_obmc_tab[s->block_max_depth], b_w*b_w*4);
+                    for (y = 0; y < b_w * 2; y++)
+                        memcpy(obmc_edged[y], ff_obmc_tab[s->block_max_depth] + y * b_w * 2, b_w * 2);
                    if(mb_x==0)
                        for(y=0; y<b_w*2; y++)
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
@@ -1153,9 +1154,9 @@ static void iterative_me(SnowContext *s){
                // get previous score (cannot be cached due to OBMC)
                if(pass > 0 && (block->type&BLOCK_INTRA)){
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
-                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
+                    check_block(s, mb_x, mb_y, color0, 1, obmc_edged, &best_rd);
                }else
-                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
+                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, obmc_edged, &best_rd);

                ref_b= *block;
                ref_rd= best_rd;
@@ -1166,16 +1167,16 @@ static void iterative_me(SnowContext *s){
                    block->ref= ref;
                    best_rd= INT_MAX;

-                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
-                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
+                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], obmc_edged, &best_rd);
+                    check_block_inter(s, mb_x, mb_y, 0, 0, obmc_edged, &best_rd);
                    if(tb)
-                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
+                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], obmc_edged, &best_rd);
                    if(lb)
-                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
+                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], obmc_edged, &best_rd);
                    if(rb)
-                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
+                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], obmc_edged, &best_rd);
                    if(bb)
-                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
+                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], obmc_edged, &best_rd);

                    /* fullpel ME */
                    //FIXME avoid subpel interpolation / round to nearest integer
@@ -1183,10 +1184,10 @@ static void iterative_me(SnowContext *s){
                        dia_change=0;
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
                            for(j=0; j<i; j++){
-                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
-                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
-                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
-                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), obmc_edged, &best_rd);
+                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), obmc_edged, &best_rd);
                            }
                        }
                    }while(dia_change);
@@ -1195,7 +1196,7 @@ static void iterative_me(SnowContext *s){
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
                        dia_change=0;
                        for(i=0; i<8; i++)
-                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
+                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], obmc_edged, &best_rd);
                    }while(dia_change);
                    //FIXME or try the standard 2 pass qpel or similar

@@ -1208,7 +1209,7 @@ static void iterative_me(SnowContext *s){
                }
                best_rd= ref_rd;
                *block= ref_b;
-                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
+                check_block(s, mb_x, mb_y, color, 1, obmc_edged, &best_rd);
                //FIXME RD style color selection
                if(!same_block(block, &backup)){
                    if(tb ) tb ->type &= ~BLOCK_OPT;

--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -27,7 +27,7 @@
 */

 #define LIBAVCODEC_VERSION_MAJOR 54
-#define LIBAVCODEC_VERSION_MINOR  31
+#define LIBAVCODEC_VERSION_MINOR  32
 #define LIBAVCODEC_VERSION_MICRO 100

 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \

--- a/libavfilter/gradfun.h
+++ b/libavfilter/gradfun.h
@@ -37,12 +37,9 @@ typedef struct {
    void (*blur_line) (uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width);
 } GradFunContext;

+void ff_gradfun_init_x86(GradFunContext *gf);
+
 void ff_gradfun_filter_line_c(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers);
 void ff_gradfun_blur_line_c(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width);

-void ff_gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers);
-void ff_gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers);
-
-void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width);
-
 #endif /* AVFILTER_GRADFUN_H */
--- a/libavfilter/vf_gradfun.c
+++ b/libavfilter/vf_gradfun.c
@@ -123,7 +123,6 @@ static av_cold int init(AVFilterContext *ctx, const char *args)
    GradFunContext *gf = ctx->priv;
    float thresh = 1.2;
    int radius = 16;
-    int cpu_flags = av_get_cpu_flags();

    if (args)
        sscanf(args, "%f:%d", &thresh, &radius);
@@ -135,12 +134,8 @@ static av_cold int init(AVFilterContext *ctx, const char *args)
    gf->blur_line = ff_gradfun_blur_line_c;
    gf->filter_line = ff_gradfun_filter_line_c;

-    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX2)
-        gf->filter_line = ff_gradfun_filter_line_mmx2;
-    if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
-        gf->filter_line = ff_gradfun_filter_line_ssse3;
-    if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
-        gf->blur_line = ff_gradfun_blur_line_sse2;
+    if (HAVE_MMX)
+        ff_gradfun_init_x86(gf);

    av_log(ctx, AV_LOG_VERBOSE, "threshold:%.2f radius:%d\n", thresh, gf->radius);


--- a/libavfilter/vf_yadif.c
+++ b/libavfilter/vf_yadif.c
@@ -30,42 +30,6 @@
 #undef NDEBUG
 #include <assert.h>

-typedef struct {
-    /**
-     * 0: send 1 frame for each frame
-     * 1: send 1 frame for each field
-     * 2: like 0 but skips spatial interlacing check
-     * 3: like 1 but skips spatial interlacing check
-     */
-    int mode;
-
-    /**
-     *  0: top field first
-     *  1: bottom field first
-     * -1: auto-detection
-     */
-    int parity;
-
-    int frame_pending;
-
-    /**
-     *  0: deinterlace all frames
-     *  1: only deinterlace frames marked as interlaced
-     */
-    int auto_enable;
-
-    AVFilterBufferRef *cur;
-    AVFilterBufferRef *next;
-    AVFilterBufferRef *prev;
-    AVFilterBufferRef *out;
-    void (*filter_line)(uint8_t *dst,
-                        uint8_t *prev, uint8_t *cur, uint8_t *next,
-                        int w, int prefs, int mrefs, int parity, int mode);
-
-    const AVPixFmtDescriptor *csp;
-    int eof;
-} YADIFContext;
-
 #define CHECK(j)\
    {   int score = FFABS(cur[mrefs-1+(j)] - cur[prefs-1-(j)])\
                  + FFABS(cur[mrefs  +(j)] - cur[prefs  -(j)])\
@@ -400,7 +364,6 @@ static int query_formats(AVFilterContext *ctx)
 static av_cold int init(AVFilterContext *ctx, const char *args)
 {
    YADIFContext *yadif = ctx->priv;
-    int cpu_flags = av_get_cpu_flags();

    yadif->mode = 0;
    yadif->parity = -1;
@@ -410,12 +373,9 @@ static av_cold int init(AVFilterContext *ctx, const char *args)
    if (args) sscanf(args, "%d:%d:%d", &yadif->mode, &yadif->parity, &yadif->auto_enable);

    yadif->filter_line = filter_line_c;
-    if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
-        yadif->filter_line = ff_yadif_filter_line_ssse3;
-    else if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
-        yadif->filter_line = ff_yadif_filter_line_sse2;
-    else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX)
-        yadif->filter_line = ff_yadif_filter_line_mmx;
+
+    if (HAVE_MMX)
+        ff_yadif_init_x86(yadif);

    av_log(ctx, AV_LOG_VERBOSE, "mode:%d parity:%d auto_enable:%d\n", yadif->mode, yadif->parity, yadif->auto_enable);


--- a/libavfilter/x86/gradfun.c
+++ b/libavfilter/x86/gradfun.c
@@ -18,6 +18,7 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

+#include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/x86_cpu.h"
 #include "libavfilter/gradfun.h"
@@ -25,9 +26,9 @@
 DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F};
 DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};

-void ff_gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
+#if HAVE_MMX2
+static void gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
 {
-#if HAVE_MMX
    intptr_t x;
    if (width & 3) {
        x = width & ~3;
@@ -70,12 +71,12 @@ void ff_gradfun_filter_line_mmx2(uint8_t *dst, const uint8_t *src, const uint16_
         "rm"(thresh), "m"(*dithers), "m"(*pw_7f)
        :"memory"
    );
-#endif
 }
+#endif

-void ff_gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
-{
 #if HAVE_SSSE3
+static void gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16_t *dc, int width, int thresh, const uint16_t *dithers)
+{
    intptr_t x;
    if (width & 7) {
        // could be 10% faster if I somehow eliminated this
@@ -117,12 +118,12 @@ void ff_gradfun_filter_line_ssse3(uint8_t *dst, const uint8_t *src, const uint16
         "rm"(thresh), "m"(*dithers), "m"(*pw_7f)
        :"memory"
    );
-#endif // HAVE_SSSE3
 }
+#endif // HAVE_SSSE3

-void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width)
-{
 #if HAVE_SSE
+static void gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *buf1, const uint8_t *src, int src_linesize, int width)
+{
 #define BLURV(load)\
    intptr_t x = -2*width;\
    __asm__ volatile(\
@@ -160,5 +161,17 @@ void ff_gradfun_blur_line_sse2(uint16_t *dc, uint16_t *buf, const uint16_t *buf1
    } else {
        BLURV("movdqa");
    }
+}
 #endif // HAVE_SSE
+
+av_cold void ff_gradfun_init_x86(GradFunContext *gf)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2)
+        gf->filter_line = gradfun_filter_line_mmx2;
+    if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
+        gf->filter_line = gradfun_filter_line_ssse3;
+    if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
+        gf->blur_line = gradfun_blur_line_sse2;
 }
--- a/libavfilter/x86/yadif.c
+++ b/libavfilter/x86/yadif.c
@@ -18,6 +18,7 @@
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

+#include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
 #include "libavutil/x86_cpu.h"
 #include "libavcodec/x86/dsputil_mmx.h"
@@ -47,3 +48,15 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010
 #define RENAME(a) a ## _mmx
 #include "yadif_template.c"
 #endif
+
+av_cold void ff_yadif_init_x86(YADIFContext *yadif)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX)
+        yadif->filter_line = yadif_filter_line_mmx;
+    if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
+        yadif->filter_line = yadif_filter_line_sse2;
+    if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
+        yadif->filter_line = yadif_filter_line_ssse3;
+}
--- a/libavfilter/x86/yadif_template.c
+++ b/libavfilter/x86/yadif_template.c
@@ -103,9 +103,9 @@
            "por       "MM"5, "MM"3 \n\t"\
            MOVQ"      "MM"3, "MM"1 \n\t"

-void RENAME(ff_yadif_filter_line)(uint8_t *dst,
-                                  uint8_t *prev, uint8_t *cur, uint8_t *next,
-                                  int w, int prefs, int mrefs, int parity, int mode)
+static void RENAME(yadif_filter_line)(uint8_t *dst, uint8_t *prev, uint8_t *cur,
+                                      uint8_t *next, int w, int prefs,
+                                      int mrefs, int parity, int mode)
 {
    uint8_t tmp[5*16];
    uint8_t *tmpA= (uint8_t*)(((uint64_t)(tmp+15)) & ~15);

--- a/libavfilter/yadif.h
+++ b/libavfilter/yadif.h
@@ -19,18 +19,45 @@
 #ifndef AVFILTER_YADIF_H
 #define AVFILTER_YADIF_H

+#include "libavutil/pixdesc.h"
 #include "avfilter.h"

-void ff_yadif_filter_line_mmx(uint8_t *dst,
-                              uint8_t *prev, uint8_t *cur, uint8_t *next,
-                              int w, int prefs, int mrefs, int parity, int mode);
+typedef struct {
+    /**
+     * 0: send 1 frame for each frame
+     * 1: send 1 frame for each field
+     * 2: like 0 but skips spatial interlacing check
+     * 3: like 1 but skips spatial interlacing check
+     */
+    int mode;

-void ff_yadif_filter_line_sse2(uint8_t *dst,
-                               uint8_t *prev, uint8_t *cur, uint8_t *next,
-                               int w, int prefs, int mrefs, int parity, int mode);
+    /**
+     *  0: top field first
+     *  1: bottom field first
+     * -1: auto-detection
+     */
+    int parity;

-void ff_yadif_filter_line_ssse3(uint8_t *dst,
-                                uint8_t *prev, uint8_t *cur, uint8_t *next,
-                                int w, int prefs, int mrefs, int parity, int mode);
+    int frame_pending;
+
+    /**
+     *  0: deinterlace all frames
+     *  1: only deinterlace frames marked as interlaced
+     */
+    int auto_enable;
+
+    AVFilterBufferRef *cur;
+    AVFilterBufferRef *next;
+    AVFilterBufferRef *prev;
+    AVFilterBufferRef *out;
+    void (*filter_line)(uint8_t *dst,
+                        uint8_t *prev, uint8_t *cur, uint8_t *next,
+                        int w, int prefs, int mrefs, int parity, int mode);
+
+    const AVPixFmtDescriptor *csp;
+    int eof;
+} YADIFContext;
+
+void ff_yadif_init_x86(YADIFContext *yadif);

 #endif /* AVFILTER_YADIF_H */
--- a/libavformat/apetag.c
+++ b/libavformat/apetag.c
@@ -24,6 +24,7 @@
 #include "libavutil/dict.h"
 #include "avformat.h"
 #include "apetag.h"
+#include "internal.h"

 #define APE_TAG_VERSION               2000
 #define APE_TAG_FOOTER_BYTES          32
@@ -56,20 +57,47 @@ static int ape_tag_read_field(AVFormatContext *s)
        return -1;
    if (flags & APE_TAG_FLAG_IS_BINARY) {
        uint8_t filename[1024];
+        enum CodecID id;
        AVStream *st = avformat_new_stream(s, NULL);
        if (!st)
            return AVERROR(ENOMEM);
-        avio_get_str(pb, INT_MAX, filename, sizeof(filename));
-        st->codec->extradata = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
-        if (!st->codec->extradata)
-            return AVERROR(ENOMEM);
-        if (avio_read(pb, st->codec->extradata, size) != size) {
-            av_freep(&st->codec->extradata);
-            return AVERROR(EIO);
+
+        size -= avio_get_str(pb, size, filename, sizeof(filename));
+        if (size <= 0) {
+            av_log(s, AV_LOG_WARNING, "Skipping binary tag '%s'.\n", key);
+            return 0;
        }
-        st->codec->extradata_size = size;
+
        av_dict_set(&st->metadata, key, filename, 0);
-        st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT;
+
+        if ((id = ff_guess_image2_codec(filename)) != CODEC_ID_NONE) {
+            AVPacket pkt;
+            int ret;
+
+            ret = av_get_packet(s->pb, &pkt, size);
+            if (ret < 0) {
+                av_log(s, AV_LOG_ERROR, "Error reading cover art.\n");
+                return ret;
+            }
+
+            st->disposition      |= AV_DISPOSITION_ATTACHED_PIC;
+            st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
+            st->codec->codec_id   = id;
+
+            st->attached_pic              = pkt;
+            st->attached_pic.stream_index = st->index;
+            st->attached_pic.flags       |= AV_PKT_FLAG_KEY;
+        } else {
+            st->codec->extradata = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
+            if (!st->codec->extradata)
+                return AVERROR(ENOMEM);
+            if (avio_read(pb, st->codec->extradata, size) != size) {
+                av_freep(&st->codec->extradata);
+                return AVERROR(EIO);
+            }
+            st->codec->extradata_size = size;
+            st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT;
+        }
    } else {
        value = av_malloc(size+1);
        if (!value)

--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -30,6 +30,7 @@
 #include "avformat.h"
 #include "internal.h"
 #include "avio_internal.h"
+#include "id3v2.h"
 #include "riff.h"
 #include "asf.h"
 #include "asfcrypt.h"
@@ -163,6 +164,101 @@ static int get_value(AVIOContext *pb, int type){
    }
 }

+/* MSDN claims that this should be "compatible with the ID3 frame, APIC",
+ * but in reality this is only loosely similar */
+static int asf_read_picture(AVFormatContext *s, int len)
+{
+    AVPacket pkt = { 0 };
+    const CodecMime *mime = ff_id3v2_mime_tags;
+    enum  CodecID      id = CODEC_ID_NONE;
+    char mimetype[64];
+    uint8_t  *desc = NULL;
+    ASFStream *ast = NULL;
+    AVStream   *st = NULL;
+    int ret, type, picsize, desc_len;
+
+    /* type + picsize + mime + desc */
+    if (len < 1 + 4 + 2 + 2) {
+        av_log(s, AV_LOG_ERROR, "Invalid attached picture size: %d.\n", len);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* picture type */
+    type = avio_r8(s->pb);
+    len--;
+    if (type >= FF_ARRAY_ELEMS(ff_id3v2_picture_types) || type < 0) {
+        av_log(s, AV_LOG_WARNING, "Unknown attached picture type: %d.\n", type);
+        type = 0;
+    }
+
+    /* picture data size */
+    picsize = avio_rl32(s->pb);
+    len -= 4;
+
+    /* picture MIME type */
+    len -= avio_get_str16le(s->pb, len, mimetype, sizeof(mimetype));
+    while (mime->id != CODEC_ID_NONE) {
+        if (!strncmp(mime->str, mimetype, sizeof(mimetype))) {
+            id = mime->id;
+            break;
+        }
+        mime++;
+    }
+    if (id == CODEC_ID_NONE) {
+        av_log(s, AV_LOG_ERROR, "Unknown attached picture mimetype: %s.\n",
+               mimetype);
+        return 0;
+    }
+
+    if (picsize >= len) {
+        av_log(s, AV_LOG_ERROR, "Invalid attached picture data size: %d >= %d.\n",
+               picsize, len);
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* picture description */
+    desc_len = (len - picsize) * 2 + 1;
+    desc     = av_malloc(desc_len);
+    if (!desc)
+        return AVERROR(ENOMEM);
+    len -= avio_get_str16le(s->pb, len - picsize, desc, desc_len);
+
+    ret = av_get_packet(s->pb, &pkt, picsize);
+    if (ret < 0)
+        goto fail;
+
+    st = avformat_new_stream(s, NULL);
+    ast = av_mallocz(sizeof(*ast));
+    if (!st || !ast) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+    st->priv_data = ast;
+
+    st->disposition      |= AV_DISPOSITION_ATTACHED_PIC;
+    st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
+    st->codec->codec_id   = id;
+
+    st->attached_pic      = pkt;
+    st->attached_pic.stream_index = st->index;
+    st->attached_pic.flags |= AV_PKT_FLAG_KEY;
+
+    if (*desc)
+        av_dict_set(&st->metadata, "title", desc, AV_DICT_DONT_STRDUP_VAL);
+    else
+        av_freep(&desc);
+
+    av_dict_set(&st->metadata, "comment", ff_id3v2_picture_types[type], 0);
+
+    return 0;
+
+fail:
+    av_freep(&ast);
+    av_freep(&desc);
+    av_free_packet(&pkt);
+    return ret;
+}
+
 static void get_tag(AVFormatContext *s, const char *key, int type, int len)
 {
    char *value;
@@ -183,6 +279,9 @@ static void get_tag(AVFormatContext *s, const char *key, int type, int len)
    } else if (type > 1 && type <= 5) {  // boolean or DWORD or QWORD or WORD
        uint64_t num = get_value(s->pb, type);
        snprintf(value, len, "%"PRIu64, num);
+    } else if (type == 1 && !strcmp(key, "WM/Picture")) { // handle cover art
+        asf_read_picture(s, len);
+        goto finish;
    } else {
        av_log(s, AV_LOG_DEBUG, "Unsupported value type %d in tag %s.\n", type, key);
        goto finish;

--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -168,6 +168,48 @@ static int mov_read_mac_string(MOVContext *c, AVIOContext *pb, int len,
    return p - dst;
 }

+static int mov_read_covr(MOVContext *c, AVIOContext *pb, int type, int len)
+{
+    AVPacket pkt;
+    AVStream *st;
+    MOVStreamContext *sc;
+    enum CodecID id;
+    int ret;
+
+    switch (type) {
+    case 0xd:  id = CODEC_ID_MJPEG; break;
+    case 0xe:  id = CODEC_ID_PNG;   break;
+    case 0x1b: id = CODEC_ID_BMP;   break;
+    default:
+        av_log(c->fc, AV_LOG_WARNING, "Unknown cover type: 0x%x.\n", type);
+        avio_skip(pb, len);
+        return 0;
+    }
+
+    st = avformat_new_stream(c->fc, NULL);
+    if (!st)
+        return AVERROR(ENOMEM);
+    sc = av_mallocz(sizeof(*sc));
+    if (!sc)
+        return AVERROR(ENOMEM);
+    st->priv_data = sc;
+
+    ret = av_get_packet(pb, &pkt, len);
+    if (ret < 0)
+        return ret;
+
+    st->disposition              |= AV_DISPOSITION_ATTACHED_PIC;
+
+    st->attached_pic              = pkt;
+    st->attached_pic.stream_index = st->index;
+    st->attached_pic.flags       |= AV_PKT_FLAG_KEY;
+
+    st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
+    st->codec->codec_id   = id;
+
+    return 0;
+}
+
 static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
 #ifdef MOV_EXPORT_ALL_METADATA
@@ -228,6 +270,14 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
            avio_rb32(pb); // unknown
            str_size = data_size - 16;
            atom.size -= 16;
+
+            if (atom.type == MKTAG('c', 'o', 'v', 'r')) {
+                int ret = mov_read_covr(c, pb, data_type, str_size);
+                if (ret < 0) {
+                    av_log(c->fc, AV_LOG_ERROR, "Error parsing cover art.\n");
+                    return ret;
+                }
+            }
        } else return 0;
    } else if (atom.size > 4 && key && !c->itunes_metadata) {
        str_size = avio_rb16(pb); // string length

--- a/libavformat/riff.c
+++ b/libavformat/riff.c
@@ -308,6 +308,7 @@ const AVCodecTag ff_codec_bmp_tags[] = {
    { CODEC_ID_FLIC,         MKTAG('A', 'F', 'L', 'C') },
    { CODEC_ID_EXR,          MKTAG('e', 'x', 'r', ' ') },
    { CODEC_ID_MSS1,         MKTAG('M', 'S', 'S', '1') },
+    { CODEC_ID_MSA1,         MKTAG('M', 'S', 'A', '1') },
    { CODEC_ID_NONE,         0 }
 };


--- a/libavutil/intfloat.h
+++ b/libavutil/intfloat.h
@@ -39,7 +39,8 @@ union av_intfloat64 {
 */
 static av_always_inline float av_int2float(uint32_t i)
 {
-    union av_intfloat32 v = { .i = i };
+    union av_intfloat32 v;
+    v.i = i;
    return v.f;
 }

@@ -48,7 +49,8 @@ static av_always_inline float av_int2float(uint32_t i)
 */
 static av_always_inline uint32_t av_float2int(float f)
 {
-    union av_intfloat32 v = { .f = f };
+    union av_intfloat32 v;
+    v.f = f;
    return v.i;
 }

@@ -57,7 +59,8 @@ static av_always_inline uint32_t av_float2int(float f)
 */
 static av_always_inline double av_int2double(uint64_t i)
 {
-    union av_intfloat64 v = { .i = i };
+    union av_intfloat64 v;
+    v.i = i;
    return v.f;
 }

@@ -66,7 +69,8 @@ static av_always_inline double av_int2double(uint64_t i)
 */
 static av_always_inline uint64_t av_double2int(double f)
 {
-    union av_intfloat64 v = { .f = f };
+    union av_intfloat64 v;
+    v.f = f;
    return v.i;
 }