Commit 167029a7 authored by David Conrad's avatar David Conrad

Use ff_pw_8 in MMX/SSE VP3 IDCT

Originally committed as revision 15053 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 21383da8
...@@ -24,16 +24,15 @@ ...@@ -24,16 +24,15 @@
*/ */
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"
#include "mmx.h" #include "mmx.h"
#define IdctAdjustBeforeShift 8
/* (12 * 4) 2-byte memory locations ( = 96 bytes total) /* (12 * 4) 2-byte memory locations ( = 96 bytes total)
* idct_constants[0..15] = Mask table (M(I)) * idct_constants[0..15] = Mask table (M(I))
* idct_constants[16..43] = Cosine table (C(I)) * idct_constants[16..43] = Cosine table (C(I))
* idct_constants[44..47] = 8 * idct_constants[44..47] = 8
*/ */
static uint16_t idct_constants[(4 + 7 + 1) * 4]; static uint16_t idct_constants[(4 + 7) * 4];
static const uint16_t idct_cosine_table[7] = { static const uint16_t idct_cosine_table[7] = {
64277, 60547, 54491, 46341, 36410, 25080, 12785 64277, 60547, 54491, 46341, 36410, 25080, 12785
}; };
...@@ -269,9 +268,6 @@ void ff_vp3_dsp_init_mmx(void) ...@@ -269,9 +268,6 @@ void ff_vp3_dsp_init_mmx(void)
p = idct_constants + ((j + 3) << 2); p = idct_constants + ((j + 3) << 2);
p[0] = p[1] = p[2] = p[3] = idct_cosine_table[j - 1]; p[0] = p[1] = p[2] = p[3] = idct_cosine_table[j - 1];
} while (++j <= 7); } while (++j <= 7);
idct_constants[44] = idct_constants[45] =
idct_constants[46] = idct_constants[47] = IdctAdjustBeforeShift;
} }
void ff_vp3_idct_mmx(int16_t *output_data) void ff_vp3_idct_mmx(int16_t *output_data)
...@@ -286,7 +282,7 @@ void ff_vp3_idct_mmx(int16_t *output_data) ...@@ -286,7 +282,7 @@ void ff_vp3_idct_mmx(int16_t *output_data)
*/ */
#define C(x) (idct_constants + 16 + (x - 1) * 4) #define C(x) (idct_constants + 16 + (x - 1) * 4)
#define Eight (idct_constants + 44) #define Eight (&ff_pw_8)
/* at this point, function has completed dequantization + dezigzag + /* at this point, function has completed dequantization + dezigzag +
* partial transposition; now do the idct itself */ * partial transposition; now do the idct itself */
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
*/ */
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"
#include "mmx.h" #include "mmx.h"
static DECLARE_ALIGNED_16(const unsigned short, SSE2_dequant_const[]) = static DECLARE_ALIGNED_16(const unsigned short, SSE2_dequant_const[]) =
...@@ -37,14 +38,6 @@ static DECLARE_ALIGNED_16(const unsigned short, SSE2_dequant_const[]) = ...@@ -37,14 +38,6 @@ static DECLARE_ALIGNED_16(const unsigned short, SSE2_dequant_const[]) =
0,0,65535,65535, 0,0,0,0 // 0x0000 0000 0000 0000 FFFF FFFF 0000 0000 0,0,65535,65535, 0,0,0,0 // 0x0000 0000 0000 0000 FFFF FFFF 0000 0000
}; };
static DECLARE_ALIGNED_16(const unsigned int, eight_data[]) =
{
0x00080008,
0x00080008,
0x00080008,
0x00080008
};
static DECLARE_ALIGNED_16(const unsigned short, SSE2_idct_data[7 * 8]) = static DECLARE_ALIGNED_16(const unsigned short, SSE2_idct_data[7 * 8]) =
{ {
64277,64277,64277,64277,64277,64277,64277,64277, 64277,64277,64277,64277,64277,64277,64277,64277,
...@@ -803,7 +796,7 @@ void ff_vp3_idct_sse2(int16_t *input_data) ...@@ -803,7 +796,7 @@ void ff_vp3_idct_sse2(int16_t *input_data)
unsigned char *input_bytes = (unsigned char *)input_data; unsigned char *input_bytes = (unsigned char *)input_data;
unsigned char *output_data_bytes = (unsigned char *)input_data; unsigned char *output_data_bytes = (unsigned char *)input_data;
const unsigned char *idct_data_bytes = (const unsigned char *)SSE2_idct_data; const unsigned char *idct_data_bytes = (const unsigned char *)SSE2_idct_data;
const unsigned char *Eight = (const unsigned char *)eight_data; const unsigned char *Eight = (const unsigned char *)&ff_pw_8;
#define eax input_bytes #define eax input_bytes
//#define ebx dequant_matrix_bytes //#define ebx dequant_matrix_bytes
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment