Merge commit '8d686ca5'

* commit '8d686ca5': dsputil: Split off *_8x8basis to a separate context Conflicts: libavcodec/dsputil.c libavcodec/mpegvideo_enc.c libavcodec/x86/dsputilenc_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>

Merge commit '8d686ca5'
* commit '8d686ca5': dsputil: Split off *_8x8basis to a separate context Conflicts: libavcodec/dsputil.c libavcodec/mpegvideo_enc.c libavcodec/x86/dsputilenc_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
462c6cdb · Michael Niedermayer · 8324bd51 · 8d686ca5 · 462c6cdb · 462c6cdb
Commit 462c6cdb authored Jul 07, 2014 by Michael Niedermayer
11 changed files
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -77,7 +77,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP)            += mpegaudiodsp.o                \
 OBJS-$(CONFIG_MPEGVIDEO)               += mpegvideo.o mpegvideodsp.o    \
                                          mpegvideo_motion.o mpegutils.o
 OBJS-$(CONFIG_MPEGVIDEOENC)            += mpegvideo_enc.o mpeg12data.o  \
-                                          motion_est.o ratecontrol.o
+                                          motion_est.o ratecontrol.o    \
+                                          mpegvideoencdsp.o
 OBJS-$(CONFIG_QPELDSP)                 += qpeldsp.o
 OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o

--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -465,35 +465,6 @@ static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int
        return score1 + FFABS(score2) * 8;
 }
-static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
-                          int16_t basis[64], int scale)
-{
-    int i;
-    unsigned int sum = 0;
-    for (i = 0; i < 8 * 8; i++) {
-        int b = rem[i] + ((basis[i] * scale +
-                           (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
-                          (BASIS_SHIFT - RECON_SHIFT));
-        int w = weight[i];
-        b >>= RECON_SHIFT;
-        av_assert2(-512 < b && b < 512);
-        sum += (w * b) * (w * b) >> 4;
-    }
-    return sum >> 2;
-}
-static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
-{
-    int i;
-    for (i = 0; i < 8 * 8; i++)
-        rem[i] += (basis[i] * scale +
-                   (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
-                  (BASIS_SHIFT - RECON_SHIFT);
-}
 static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
                    int stride, int h)
 {
@@ -1170,9 +1141,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
    ff_dsputil_init_dwt(c);
 #endif
-    c->try_8x8basis = try_8x8basis_c;
-    c->add_8x8basis = add_8x8basis_c;
    c->shrink[0] = av_image_copy_plane;
    c->shrink[1] = ff_shrink22;
    c->shrink[2] = ff_shrink44;

--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -103,12 +103,6 @@ typedef struct DSPContext {
    void (*fdct)(int16_t *block /* align 16 */);
    void (*fdct248)(int16_t *block /* align 16 */);
-    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
-                        int16_t basis[64], int scale);
-    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
-#define BASIS_SHIFT 16
-#define RECON_SHIFT 6
    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height,
                       int w, int h, int sides);
 #define EDGE_WIDTH 16

--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -38,6 +38,7 @@
 #include "hpeldsp.h"
 #include "idctdsp.h"
 #include "mpegvideodsp.h"
+#include "mpegvideoencdsp.h"
 #include "put_bits.h"
 #include "ratecontrol.h"
 #include "parser.h"
@@ -365,6 +366,7 @@ typedef struct MpegEncContext {
    HpelDSPContext hdsp;
    IDCTDSPContext idsp;
    MpegVideoDSPContext mdsp;
+    MpegvideoEncDSPContext mpvencdsp;
    QpelDSPContext qdsp;
    VideoDSPContext vdsp;
    H263DSPContext h263dsp;

--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -818,6 +818,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx)
    if (ff_MPV_common_init(s) < 0)
        return -1;
+    ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
    ff_qpeldsp_init(&s->qdsp);
    s->avctx->coded_frame = s->current_picture.f;
@@ -4090,7 +4091,7 @@ STOP_TIMER("memset rem[]")}
            run_tab[rle_index++]=run;
            run=0;
-            s->dsp.add_8x8basis(rem, basis[j], coeff);
+            s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
        }else{
            run++;
        }
@@ -4104,7 +4105,7 @@ STOP_TIMER("init rem[]")
 {START_TIMER
 #endif
    for(;;){
-        int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0);
+        int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
        int best_coeff=0;
        int best_change=0;
        int run2, best_unquant_change=0, analyze_gradient;
@@ -4148,7 +4149,8 @@ STOP_TIMER("dct")}
                if(new_coeff >= 2048 || new_coeff < 0)
                    continue;
-                score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff);
+                score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
+                                                  new_coeff - old_coeff);
                if(score<best_score){
                    best_score= score;
                    best_coeff= 0;
@@ -4271,7 +4273,8 @@ STOP_TIMER("dct")}
                unquant_change= new_coeff - old_coeff;
                av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
-                score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
+                score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
+                                                   unquant_change);
                if(score<best_score){
                    best_score= score;
                    best_coeff= i;
@@ -4345,7 +4348,7 @@ if(256*256*256*64 % count == 0){
                 }
            }
-            s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
+            s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
        }else{
            break;
        }

--- a/libavcodec/mpegvideoencdsp.c
+++ b/libavcodec/mpegvideoencdsp.c
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <assert.h>
+#include <stdint.h>
+#include "config.h"
+#include "libavutil/avassert.h"
+#include "libavutil/attributes.h"
+#include "avcodec.h"
+#include "mpegvideoencdsp.h"
+static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
+                          int16_t basis[64], int scale)
+{
+    int i;
+    unsigned int sum = 0;
+    for (i = 0; i < 8 * 8; i++) {
+        int b = rem[i] + ((basis[i] * scale +
+                           (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
+                          (BASIS_SHIFT - RECON_SHIFT));
+        int w = weight[i];
+        b >>= RECON_SHIFT;
+        av_assert2(-512 < b && b < 512);
+        sum += (w * b) * (w * b) >> 4;
+    }
+    return sum >> 2;
+}
+static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
+{
+    int i;
+    for (i = 0; i < 8 * 8; i++)
+        rem[i] += (basis[i] * scale +
+                   (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
+                  (BASIS_SHIFT - RECON_SHIFT);
+}
+av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
+                                     AVCodecContext *avctx)
+{
+    c->try_8x8basis = try_8x8basis_c;
+    c->add_8x8basis = add_8x8basis_c;
+    if (ARCH_X86)
+        ff_mpegvideoencdsp_init_x86(c, avctx);
+}
--- a/libavcodec/mpegvideoencdsp.h
+++ b/libavcodec/mpegvideoencdsp.h
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_MPEGVIDEOENCDSP_H
+#define AVCODEC_MPEGVIDEOENCDSP_H
+#include <stdint.h>
+#include "avcodec.h"
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+typedef struct MpegvideoEncDSPContext {
+    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64],
+                        int16_t basis[64], int scale);
+    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+} MpegvideoEncDSPContext;
+void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c,
+                             AVCodecContext *avctx);
+void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
+                                 AVCodecContext *avctx);
+#endif /* AVCODEC_MPEGVIDEOENCDSP_H */
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -28,7 +28,8 @@ OBJS-$(CONFIG_LPC)                     += x86/lpc.o
 OBJS-$(CONFIG_MPEGAUDIODSP)            += x86/mpegaudiodsp.o
 OBJS-$(CONFIG_MPEGVIDEO)               += x86/mpegvideo.o              \
                                          x86/mpegvideodsp.o
-OBJS-$(CONFIG_MPEGVIDEOENC)            += x86/mpegvideoenc.o
+OBJS-$(CONFIG_MPEGVIDEOENC)            += x86/mpegvideoenc.o           \
+                                          x86/mpegvideoencdsp_init.o
 OBJS-$(CONFIG_QPELDSP)                 += x86/qpeldsp_init.o
 OBJS-$(CONFIG_VIDEODSP)                += x86/videodsp_init.o
 OBJS-$(CONFIG_VP3DSP)                  += x86/vp3dsp_init.o

--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -352,72 +352,6 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
 #undef SUM
-#define PHADDD(a, t)                            \
-    "movq  " #a ", " #t "               \n\t"   \
-    "psrlq    $32, " #a "               \n\t"   \
-    "paddd " #t ", " #a "               \n\t"
-/*
- * pmulhw:   dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
- * pmulhrw:  dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
- * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
- */
-#define PMULHRW(x, y, s, o)                     \
-    "pmulhw " #s ", " #x "              \n\t"   \
-    "pmulhw " #s ", " #y "              \n\t"   \
-    "paddw  " #o ", " #x "              \n\t"   \
-    "paddw  " #o ", " #y "              \n\t"   \
-    "psraw      $1, " #x "              \n\t"   \
-    "psraw      $1, " #y "              \n\t"
-#define DEF(x) x ## _mmx
-#define SET_RND MOVQ_WONE
-#define SCALE_OFFSET 1
-#include "dsputil_qns_template.c"
-#undef DEF
-#undef SET_RND
-#undef SCALE_OFFSET
-#undef PMULHRW
-#define DEF(x) x ## _3dnow
-#define SET_RND(x)
-#define SCALE_OFFSET 0
-#define PMULHRW(x, y, s, o)                     \
-    "pmulhrw " #s ", " #x "             \n\t"   \
-    "pmulhrw " #s ", " #y "             \n\t"
-#include "dsputil_qns_template.c"
-#undef DEF
-#undef SET_RND
-#undef SCALE_OFFSET
-#undef PMULHRW
-#if HAVE_SSSE3_INLINE
-#undef PHADDD
-#define DEF(x) x ## _ssse3
-#define SET_RND(x)
-#define SCALE_OFFSET -1
-#define PHADDD(a, t)                            \
-    "pshufw $0x0E, " #a ", " #t "       \n\t"   \
-    /* faster than phaddd on core2 */           \
-    "paddd " #t ", " #a "               \n\t"
-#define PMULHRW(x, y, s, o)                     \
-    "pmulhrsw " #s ", " #x "            \n\t"   \
-    "pmulhrsw " #s ", " #y "            \n\t"
-#include "dsputil_qns_template.c"
-#undef DEF
-#undef SET_RND
-#undef SCALE_OFFSET
-#undef PMULHRW
-#undef PHADDD
-#endif /* HAVE_SSSE3_INLINE */
 #endif /* HAVE_INLINE_ASM */
 av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
@@ -448,16 +382,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
            c->vsad[0]      = vsad16_mmx;
-            c->try_8x8basis = try_8x8basis_mmx;
-        }
-        c->add_8x8basis = add_8x8basis_mmx;
-    }
-    if (INLINE_AMD3DNOW(cpu_flags)) {
-        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
-            c->try_8x8basis = try_8x8basis_3dnow;
        }
-        c->add_8x8basis = add_8x8basis_3dnow;
    }
    if (INLINE_MMXEXT(cpu_flags)) {
@@ -480,10 +405,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
 #if HAVE_SSSE3_INLINE
    if (INLINE_SSSE3(cpu_flags)) {
-        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
-            c->try_8x8basis = try_8x8basis_ssse3;
-        }
-        c->add_8x8basis    = add_8x8basis_ssse3;
    }
 #endif
 #endif /* HAVE_INLINE_ASM */

--- a/libavcodec/x86/dsputil_qns_template.c
+++ b/libavcodec/x86/dsputil_qns_template.c
 /*
- * DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3
+ * QNS functions are compiled 3 times for MMX/3DNOW/SSSE3
 * Copyright (c) 2004 Michael Niedermayer
 *
 * MMX optimization by Michael Niedermayer <michaelni@gmx.at>
@@ -22,9 +22,9 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
-#include <assert.h>
 #include <stdint.h>
+#include "libavutil/avassert.h"
 #include "libavutil/common.h"
 #include "libavutil/x86/asm.h"

--- a/libavcodec/x86/mpegvideoencdsp_init.c
+++ b/libavcodec/x86/mpegvideoencdsp_init.c
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/mpegvideoencdsp.h"
+#if HAVE_INLINE_ASM
+#define PHADDD(a, t)                            \
+    "movq  " #a ", " #t "               \n\t"   \
+    "psrlq    $32, " #a "               \n\t"   \
+    "paddd " #t ", " #a "               \n\t"
+/*
+ * pmulhw:   dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31]
+ * pmulhrw:  dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31]
+ * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30]
+ */
+#define PMULHRW(x, y, s, o)                     \
+    "pmulhw " #s ", " #x "              \n\t"   \
+    "pmulhw " #s ", " #y "              \n\t"   \
+    "paddw  " #o ", " #x "              \n\t"   \
+    "paddw  " #o ", " #y "              \n\t"   \
+    "psraw      $1, " #x "              \n\t"   \
+    "psraw      $1, " #y "              \n\t"
+#define DEF(x) x ## _mmx
+#define SET_RND MOVQ_WONE
+#define SCALE_OFFSET 1
+#include "mpegvideoenc_qns_template.c"
+#undef DEF
+#undef SET_RND
+#undef SCALE_OFFSET
+#undef PMULHRW
+#define DEF(x) x ## _3dnow
+#define SET_RND(x)
+#define SCALE_OFFSET 0
+#define PMULHRW(x, y, s, o)                     \
+    "pmulhrw " #s ", " #x "             \n\t"   \
+    "pmulhrw " #s ", " #y "             \n\t"
+#include "mpegvideoenc_qns_template.c"
+#undef DEF
+#undef SET_RND
+#undef SCALE_OFFSET
+#undef PMULHRW
+#if HAVE_SSSE3_INLINE
+#undef PHADDD
+#define DEF(x) x ## _ssse3
+#define SET_RND(x)
+#define SCALE_OFFSET -1
+#define PHADDD(a, t)                            \
+    "pshufw $0x0E, " #a ", " #t "       \n\t"   \
+    /* faster than phaddd on core2 */           \
+    "paddd " #t ", " #a "               \n\t"
+#define PMULHRW(x, y, s, o)                     \
+    "pmulhrsw " #s ", " #x "            \n\t"   \
+    "pmulhrsw " #s ", " #y "            \n\t"
+#include "mpegvideoenc_qns_template.c"
+#undef DEF
+#undef SET_RND
+#undef SCALE_OFFSET
+#undef PMULHRW
+#undef PHADDD
+#endif /* HAVE_SSSE3_INLINE */
+#endif /* HAVE_INLINE_ASM */
+av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c,
+                                         AVCodecContext *avctx)
+{
+#if HAVE_INLINE_ASM
+    int cpu_flags = av_get_cpu_flags();
+    if (INLINE_MMX(cpu_flags)) {
+        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+            c->try_8x8basis = try_8x8basis_mmx;
+        }
+        c->add_8x8basis = add_8x8basis_mmx;
+    }
+    if (INLINE_AMD3DNOW(cpu_flags)) {
+        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+            c->try_8x8basis = try_8x8basis_3dnow;
+        }
+        c->add_8x8basis = add_8x8basis_3dnow;
+    }
+#if HAVE_SSSE3_INLINE
+    if (INLINE_SSSE3(cpu_flags)) {
+        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+            c->try_8x8basis = try_8x8basis_ssse3;
+        }
+        c->add_8x8basis = add_8x8basis_ssse3;
+    }
+#endif /* HAVE_SSSE3_INLINE */
+#endif /* HAVE_INLINE_ASM */
+}