Commit 05aec7bb authored by Måns Rullgård's avatar Måns Rullgård

Separate DWT from snow and dsputil

This moves the DWT functions from snow.c and dsputil.c to a file of
their own.  A new struct, DWTContext, holds the function pointers
previously part of DSPContext.

Originally committed as revision 22522 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 28eb5773
...@@ -894,6 +894,7 @@ CONFIG_LIST=" ...@@ -894,6 +894,7 @@ CONFIG_LIST="
bzlib bzlib
dct dct
doc doc
dwt
dxva2 dxva2
fastdiv fastdiv
ffmpeg ffmpeg
...@@ -1276,7 +1277,8 @@ rv30_decoder_select="golomb" ...@@ -1276,7 +1277,8 @@ rv30_decoder_select="golomb"
rv40_decoder_select="golomb" rv40_decoder_select="golomb"
shorten_decoder_select="golomb" shorten_decoder_select="golomb"
sipr_decoder_select="lsp" sipr_decoder_select="lsp"
snow_encoder_select="aandct" snow_decoder_select="dwt"
snow_encoder_select="aandct dwt"
sonic_decoder_select="golomb" sonic_decoder_select="golomb"
sonic_encoder_select="golomb" sonic_encoder_select="golomb"
sonic_ls_encoder_select="golomb" sonic_ls_encoder_select="golomb"
......
...@@ -28,6 +28,7 @@ OBJS = allcodecs.o \ ...@@ -28,6 +28,7 @@ OBJS = allcodecs.o \
OBJS-$(CONFIG_AANDCT) += aandcttab.o OBJS-$(CONFIG_AANDCT) += aandcttab.o
OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o
OBJS-$(CONFIG_DCT) += dct.o OBJS-$(CONFIG_DCT) += dct.o
OBJS-$(CONFIG_DWT) += dwt.o
OBJS-$(CONFIG_DXVA2) += dxva2.o OBJS-$(CONFIG_DXVA2) += dxva2.o
FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o
OBJS-$(CONFIG_FFT) += avfft.o fft.o $(FFT-OBJS-yes) OBJS-$(CONFIG_FFT) += avfft.o fft.o $(FFT-OBJS-yes)
...@@ -598,7 +599,7 @@ MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o ...@@ -598,7 +599,7 @@ MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o
MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o
MMX-OBJS-$(CONFIG_GPL) += x86/idct_mmx.o MMX-OBJS-$(CONFIG_GPL) += x86/idct_mmx.o
MMX-OBJS-$(CONFIG_LPC) += x86/lpc_mmx.o MMX-OBJS-$(CONFIG_LPC) += x86/lpc_mmx.o
MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp_mmx.o MMX-OBJS-$(CONFIG_DWT) += x86/snowdsp_mmx.o
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
MMX-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp_mmx.o \ MMX-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp_mmx.o \
x86/vp3dsp_sse2.o x86/vp3dsp_sse2.o
......
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
#include "faandct.h" #include "faandct.h"
#include "faanidct.h" #include "faanidct.h"
#include "mathops.h" #include "mathops.h"
#include "snow.h"
#include "mpegvideo.h" #include "mpegvideo.h"
#include "config.h" #include "config.h"
#include "lpc.h" #include "lpc.h"
...@@ -329,102 +328,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) ...@@ -329,102 +328,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
return s; return s;
} }
#if CONFIG_SNOW_ENCODER //dwt is in snow.c
static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
int s, i, j;
const int dec_count= w==8 ? 3 : 4;
int tmp[32*32];
int level, ori;
static const int scale[2][2][4][4]={
{
{
// 9/7 8x8 dec=3
{268, 239, 239, 213},
{ 0, 224, 224, 152},
{ 0, 135, 135, 110},
},{
// 9/7 16x16 or 32x32 dec=4
{344, 310, 310, 280},
{ 0, 320, 320, 228},
{ 0, 175, 175, 136},
{ 0, 129, 129, 102},
}
},{
{
// 5/3 8x8 dec=3
{275, 245, 245, 218},
{ 0, 230, 230, 156},
{ 0, 138, 138, 113},
},{
// 5/3 16x16 or 32x32 dec=4
{352, 317, 317, 286},
{ 0, 328, 328, 233},
{ 0, 180, 180, 140},
{ 0, 132, 132, 105},
}
}
};
for (i = 0; i < h; i++) {
for (j = 0; j < w; j+=4) {
tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
}
pix1 += line_size;
pix2 += line_size;
}
ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
s=0;
assert(w==h);
for(level=0; level<dec_count; level++){
for(ori= level ? 1 : 0; ori<4; ori++){
int size= w>>(dec_count-level);
int sx= (ori&1) ? size : 0;
int stride= 32<<(dec_count-level);
int sy= (ori&2) ? stride>>1 : 0;
for(i=0; i<size; i++){
for(j=0; j<size; j++){
int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
s += FFABS(v);
}
}
}
}
assert(s>=0);
return s>>9;
}
static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
return w_c(v, pix1, pix2, line_size, 8, h, 1);
}
static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
return w_c(v, pix1, pix2, line_size, 8, h, 0);
}
static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
return w_c(v, pix1, pix2, line_size, 16, h, 1);
}
static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
return w_c(v, pix1, pix2, line_size, 16, h, 0);
}
int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
return w_c(v, pix1, pix2, line_size, 32, h, 1);
}
int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
return w_c(v, pix1, pix2, line_size, 32, h, 0);
}
#endif
/* draw the edges of width 'w' of an image of size width, height */ /* draw the edges of width 'w' of an image of size width, height */
//FIXME check that this is ok for mpeg4 interlaced //FIXME check that this is ok for mpeg4 interlaced
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w) static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
...@@ -3531,7 +3434,7 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ ...@@ -3531,7 +3434,7 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
case FF_CMP_NSSE: case FF_CMP_NSSE:
cmp[i]= c->nsse[i]; cmp[i]= c->nsse[i];
break; break;
#if CONFIG_SNOW_ENCODER #if CONFIG_DWT
case FF_CMP_W53: case FF_CMP_W53:
cmp[i]= c->w53[i]; cmp[i]= c->w53[i];
break; break;
...@@ -4816,11 +4719,8 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) ...@@ -4816,11 +4719,8 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->vsse[5]= vsse_intra8_c; c->vsse[5]= vsse_intra8_c;
c->nsse[0]= nsse16_c; c->nsse[0]= nsse16_c;
c->nsse[1]= nsse8_c; c->nsse[1]= nsse8_c;
#if CONFIG_SNOW_ENCODER #if CONFIG_DWT
c->w53[0]= w53_16_c; ff_dsputil_init_dwt(c);
c->w53[1]= w53_8_c;
c->w97[0]= w97_16_c;
c->w97[1]= w97_8_c;
#endif #endif
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
...@@ -4865,12 +4765,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) ...@@ -4865,12 +4765,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->try_8x8basis= try_8x8basis_c; c->try_8x8basis= try_8x8basis_c;
c->add_8x8basis= add_8x8basis_c; c->add_8x8basis= add_8x8basis_c;
#if CONFIG_SNOW_DECODER
c->vertical_compose97i = ff_snow_vertical_compose97i;
c->horizontal_compose97i = ff_snow_horizontal_compose97i;
c->inner_add_yblock = ff_snow_inner_add_yblock;
#endif
#if CONFIG_VORBIS_DECODER #if CONFIG_VORBIS_DECODER
c->vorbis_inverse_coupling = vorbis_inverse_coupling; c->vorbis_inverse_coupling = vorbis_inverse_coupling;
#endif #endif
......
...@@ -37,8 +37,6 @@ ...@@ -37,8 +37,6 @@
//#define DEBUG //#define DEBUG
/* dct code */ /* dct code */
typedef short DCTELEM; typedef short DCTELEM;
typedef int DWTELEM;
typedef short IDWTELEM;
void fdct_ifast (DCTELEM *data); void fdct_ifast (DCTELEM *data);
void fdct_ifast248 (DCTELEM *data); void fdct_ifast248 (DCTELEM *data);
...@@ -185,10 +183,6 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ ...@@ -185,10 +183,6 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
// although currently h<4 is not used as functions with width <8 are neither used nor implemented // although currently h<4 is not used as functions with width <8 are neither used nor implemented
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
// for snow slices
typedef struct slice_buffer_s slice_buffer;
/** /**
* Scantable. * Scantable.
*/ */
...@@ -538,11 +532,6 @@ typedef struct DSPContext { ...@@ -538,11 +532,6 @@ typedef struct DSPContext {
void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
/* snow wavelet */
void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
void (*horizontal_compose97i)(IDWTELEM *b, int width);
void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
void (*prefetch)(void *mem, int stride, int h); void (*prefetch)(void *mem, int stride, int h);
void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
...@@ -681,6 +670,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); ...@@ -681,6 +670,7 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
void ff_dsputil_init_dwt(DSPContext *c);
void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
......
This diff is collapsed.
/*
* Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_DWT_H
#define AVCODEC_DWT_H
#include <stdint.h>
typedef int DWTELEM;
typedef short IDWTELEM;
typedef struct {
IDWTELEM *b0;
IDWTELEM *b1;
IDWTELEM *b2;
IDWTELEM *b3;
int y;
} DWTCompose;
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
typedef struct slice_buffer_s {
IDWTELEM * * line; ///< For use by idwt and predict_slices.
IDWTELEM * * data_stack; ///< Used for internal purposes.
int data_stack_top;
int line_count;
int line_width;
int data_count;
IDWTELEM * base_buffer; ///< Buffer that this structure is caching.
} slice_buffer;
typedef struct DWTContext {
void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
void (*horizontal_compose97i)(IDWTELEM *b, int width);
void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
} DWTContext;
#define MAX_DECOMPOSITIONS 8
#define DWT_97 0
#define DWT_53 1
#define liftS lift
#if 1
#define W_AM 3
#define W_AO 0
#define W_AS 1
#undef liftS
#define W_BM 1
#define W_BO 8
#define W_BS 4
#define W_CM 1
#define W_CO 0
#define W_CS 0
#define W_DM 3
#define W_DO 4
#define W_DS 3
#elif 0
#define W_AM 55
#define W_AO 16
#define W_AS 5
#define W_BM 3
#define W_BO 32
#define W_BS 6
#define W_CM 127
#define W_CO 64
#define W_CS 7
#define W_DM 7
#define W_DO 8
#define W_DS 4
#elif 0
#define W_AM 97
#define W_AO 32
#define W_AS 6
#define W_BM 63
#define W_BO 512
#define W_BS 10
#define W_CM 13
#define W_CO 8
#define W_CS 4
#define W_DM 15
#define W_DO 16
#define W_DS 5
#else
#define W_AM 203
#define W_AO 64
#define W_AS 7
#define W_BM 217
#define W_BO 2048
#define W_BS 12
#define W_CM 113
#define W_CO 64
#define W_CS 7
#define W_DM 227
#define W_DO 128
#define W_DS 9
#endif
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer);
void slice_buffer_release(slice_buffer * buf, int line);
void slice_buffer_flush(slice_buffer * buf);
void slice_buffer_destroy(slice_buffer * buf);
IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line);
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count);
void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y);
void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count);
void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y);
void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count);
void ff_dwt_init(DWTContext *c);
void ff_dwt_init_x86(DWTContext *c);
#endif /* AVCODEC_DWT_H */
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include "avcodec.h" #include "avcodec.h"
#include "dsputil.h" #include "dsputil.h"
#include "dwt.h"
#include "ivi_common.h" #include "ivi_common.h"
#include "ivi_dsp.h" #include "ivi_dsp.h"
......
This diff is collapsed.
...@@ -23,10 +23,10 @@ ...@@ -23,10 +23,10 @@
#define AVCODEC_SNOW_H #define AVCODEC_SNOW_H
#include "dsputil.h" #include "dsputil.h"
#include "dwt.h"
#define MID_STATE 128 #define MID_STATE 128
#define MAX_DECOMPOSITIONS 8
#define MAX_PLANES 4 #define MAX_PLANES 4
#define QSHIFT 5 #define QSHIFT 5
#define QROOT (1<<QSHIFT) #define QROOT (1<<QSHIFT)
...@@ -37,101 +37,6 @@ ...@@ -37,101 +37,6 @@
#define LOG2_OBMC_MAX 8 #define LOG2_OBMC_MAX 8
#define OBMC_MAX (1<<(LOG2_OBMC_MAX)) #define OBMC_MAX (1<<(LOG2_OBMC_MAX))
#define DWT_97 0
#define DWT_53 1
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
struct slice_buffer_s {
IDWTELEM * * line; ///< For use by idwt and predict_slices.
IDWTELEM * * data_stack; ///< Used for internal purposes.
int data_stack_top;
int line_count;
int line_width;
int data_count;
IDWTELEM * base_buffer; ///< Buffer that this structure is caching.
};
#define liftS lift
#if 1
#define W_AM 3
#define W_AO 0
#define W_AS 1
#undef liftS
#define W_BM 1
#define W_BO 8
#define W_BS 4
#define W_CM 1
#define W_CO 0
#define W_CS 0
#define W_DM 3
#define W_DO 4
#define W_DS 3
#elif 0
#define W_AM 55
#define W_AO 16
#define W_AS 5
#define W_BM 3
#define W_BO 32
#define W_BS 6
#define W_CM 127
#define W_CO 64
#define W_CS 7
#define W_DM 7
#define W_DO 8
#define W_DS 4
#elif 0
#define W_AM 97
#define W_AO 32
#define W_AS 6
#define W_BM 63
#define W_BO 512
#define W_BS 10
#define W_CM 13
#define W_CO 8
#define W_CS 4
#define W_DM 15
#define W_DO 16
#define W_DS 5
#else
#define W_AM 203
#define W_AO 64
#define W_AS 7
#define W_BM 217
#define W_BO 2048
#define W_BS 12
#define W_CM 113
#define W_CO 64
#define W_CS 7
#define W_DM 227
#define W_DO 128
#define W_DS 9
#endif
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
#if CONFIG_SNOW_ENCODER
int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
#endif
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
/* C bits used by mmx/sse2/altivec */ /* C bits used by mmx/sse2/altivec */
static av_always_inline void snow_interleave_line_header(int * i, int width, IDWTELEM * low, IDWTELEM * high){ static av_always_inline void snow_interleave_line_header(int * i, int width, IDWTELEM * low, IDWTELEM * high){
......
...@@ -2894,25 +2894,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) ...@@ -2894,25 +2894,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
} }
#endif #endif
#if CONFIG_SNOW_DECODER
if(mm_flags & FF_MM_SSE2 & 0){
c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
#if HAVE_7REGS
c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
#endif
c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
}
else{
if(mm_flags & FF_MM_MMX2){
c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
#if HAVE_7REGS
c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
#endif
}
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
}
#endif
if(mm_flags & FF_MM_3DNOW){ if(mm_flags & FF_MM_3DNOW){
c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
c->vector_fmul = vector_fmul_3dnow; c->vector_fmul = vector_fmul_3dnow;
......
...@@ -167,15 +167,6 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx); ...@@ -167,15 +167,6 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);
void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width);
void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag, void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
double *autoc); double *autoc);
......
...@@ -22,9 +22,10 @@ ...@@ -22,9 +22,10 @@
#include "libavutil/x86_cpu.h" #include "libavutil/x86_cpu.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavcodec/snow.h" #include "libavcodec/snow.h"
#include "libavcodec/dwt.h"
#include "dsputil_mmx.h" #include "dsputil_mmx.h"
void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
const int w2= (width+1)>>1; const int w2= (width+1)>>1;
DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1]; DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1];
const int w_l= (width>>1); const int w_l= (width>>1);
...@@ -213,7 +214,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ ...@@ -213,7 +214,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
} }
} }
void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
const int w2= (width+1)>>1; const int w2= (width+1)>>1;
IDWTELEM temp[width >> 1]; IDWTELEM temp[width >> 1];
const int w_l= (width>>1); const int w_l= (width>>1);
...@@ -436,7 +437,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ ...@@ -436,7 +437,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
"movdqa %%"s2", %%"t2" \n\t"\ "movdqa %%"s2", %%"t2" \n\t"\
"movdqa %%"s3", %%"t3" \n\t" "movdqa %%"s3", %%"t3" \n\t"
void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
x86_reg i = width; x86_reg i = width;
while(i & 0x1F) while(i & 0x1F)
...@@ -534,7 +535,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, ...@@ -534,7 +535,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
"movq %%"s3", %%"t3" \n\t" "movq %%"s3", %%"t3" \n\t"
void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
x86_reg i = width; x86_reg i = width;
while(i & 15) while(i & 15)
{ {
...@@ -847,7 +848,7 @@ snow_inner_add_yblock_mmx_mix("16", "8") ...@@ -847,7 +848,7 @@ snow_inner_add_yblock_mmx_mix("16", "8")
snow_inner_add_yblock_mmx_end("32") snow_inner_add_yblock_mmx_end("32")
} }
void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
if (b_w == 16) if (b_w == 16)
...@@ -861,7 +862,7 @@ void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, u ...@@ -861,7 +862,7 @@ void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, u
ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
} }
void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
if (b_w == 16) if (b_w == 16)
inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
...@@ -870,3 +871,27 @@ void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, ui ...@@ -870,3 +871,27 @@ void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, ui
else else
ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
} }
void ff_dwt_init_x86(DWTContext *c)
{
mm_flags = mm_support();
if (mm_flags & FF_MM_MMX) {
if(mm_flags & FF_MM_SSE2 & 0){
c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
#if HAVE_7REGS
c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
#endif
c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
}
else{
if(mm_flags & FF_MM_MMX2){
c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
#if HAVE_7REGS
c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
#endif
}
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment