Commit d593e329 authored by Michael Niedermayer's avatar Michael Niedermayer

use 16bit IDWT (a SIMD implementation of it should be >2x faster then with

the old 32bit code)
disable mmx/sse2 optimizations as they need a rewrite now

Originally committed as revision 10218 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 830bf1f2
......@@ -37,6 +37,7 @@
/* dct code */
typedef short DCTELEM;
typedef int DWTELEM;
typedef short IDWTELEM;
void fdct_ifast (DCTELEM *data);
void fdct_ifast248 (DCTELEM *data);
......@@ -390,8 +391,8 @@ typedef struct DSPContext {
void (*h264_dct)(DCTELEM block[4][4]);
/* snow wavelet */
void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
void (*horizontal_compose97i)(DWTELEM *b, int width);
void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
void (*horizontal_compose97i)(IDWTELEM *b, int width);
void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
void (*prefetch)(void *mem, int stride, int h);
......
......@@ -3621,6 +3621,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#endif
#ifdef CONFIG_SNOW_DECODER
#if 0
if(mm_flags & MM_SSE2){
c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
......@@ -3631,6 +3632,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
}
#endif
#endif
if(mm_flags & MM_3DNOW){
......
This diff is collapsed.
......@@ -31,7 +31,7 @@
#define QSHIFT 5
#define QROOT (1<<QSHIFT)
#define LOSSLESS_QLOG -128
#define FRAC_BITS 8
#define FRAC_BITS 4
#define MAX_REF_FRAMES 8
#define LOG2_OBMC_MAX 8
......@@ -43,17 +43,18 @@
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
struct slice_buffer_s {
DWTELEM * * line; ///< For use by idwt and predict_slices.
DWTELEM * * data_stack; ///< Used for internal purposes.
IDWTELEM * * line; ///< For use by idwt and predict_slices.
IDWTELEM * * data_stack; ///< Used for internal purposes.
int data_stack_top;
int line_count;
int line_width;
int data_count;
DWTELEM * base_buffer; ///< Buffer that this structure is caching.
IDWTELEM * base_buffer; ///< Buffer that this structure is caching.
};
#define liftS lift
#define lift5 lift
#define inv_lift5 inv_lift
#if 1
#define W_AM 3
#define W_AO 0
......@@ -123,8 +124,8 @@ struct slice_buffer_s {
#define W_DS 9
#endif
extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width);
extern void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
extern void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
extern void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
#ifdef CONFIG_SNOW_ENCODER
......@@ -137,7 +138,7 @@ static int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
/* C bits used by mmx/sse2/altivec */
static av_always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
static av_always_inline void snow_interleave_line_header(int * i, int width, IDWTELEM * low, IDWTELEM * high){
(*i) = (width) - 2;
if (width & 1){
......@@ -146,14 +147,14 @@ static av_always_inline void snow_interleave_line_header(int * i, int width, DWT
}
}
static av_always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){
static av_always_inline void snow_interleave_line_footer(int * i, IDWTELEM * low, IDWTELEM * high){
for (; (*i)>=0; (*i)-=2){
low[(*i)+1] = high[(*i)>>1];
low[*i] = low[(*i)>>1];
}
}
static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, IDWTELEM * dst, IDWTELEM * src, IDWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
for(; i<w; i++){
dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift);
}
......@@ -163,7 +164,7 @@ static av_always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELE
}
}
static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
static av_always_inline void snow_horizontal_compose_liftS_lead_out(int i, IDWTELEM * dst, IDWTELEM * src, IDWTELEM * ref, int width, int w){
for(; i<w; i++){
dst[i] = src[i] + ((ref[i] + ref[(i+1)]+W_BO + 4 * src[i]) >> W_BS);
}
......
......@@ -141,10 +141,10 @@ f8f51fa737add17f7fecaefa118b57ed *./tests/data/a-ffv1.avi
2654678 ./tests/data/a-ffv1.avi
799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
19c377580ec83d0c5fc4da0740dec278 *./tests/data/a-snow.avi
156532 ./tests/data/a-snow.avi
64282679f712a2b09cde43edb18a6fb0 *./tests/data/out.yuv
stddev: 23.14 PSNR:20.83 bytes:7602176
0356b219110f391044352547360377a8 *./tests/data/a-snow.avi
156586 ./tests/data/a-snow.avi
c038bc896a435796588ca3b96f38bbb5 *./tests/data/out.yuv
stddev: 23.15 PSNR:20.83 bytes:7602176
ba999e86070aa971376e7f317a022c37 *./tests/data/a-snow53.avi
3519486 ./tests/data/a-snow53.avi
799d3db687f6cdd7a837ec156efc171f *./tests/data/out.yuv
......
......@@ -141,10 +141,10 @@ d72b0960e162d4998b9acbabb07e99ab *./tests/data/a-ffv1.avi
3525804 ./tests/data/a-ffv1.avi
dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv
stddev: 0.00 PSNR:99.99 bytes:7602176
213abed95d2e43cf7d2c9e1921779e6d *./tests/data/a-snow.avi
68852 ./tests/data/a-snow.avi
f3ab734e188a8e2af7b89e0f101bd7a1 *./tests/data/out.yuv
stddev: 10.86 PSNR:27.40 bytes:7602176
ae64e5ff9b5684c46e74e48381e6a132 *./tests/data/a-snow.avi
68900 ./tests/data/a-snow.avi
5f5b97b726f97d3514b3c2b8e635175c *./tests/data/out.yuv
stddev: 10.87 PSNR:27.39 bytes:7602176
3d0da6aeec9b80c6ee0ff4b747bdd0f0 *./tests/data/a-snow53.avi
2721980 ./tests/data/a-snow53.avi
dde5895817ad9d219f79a52d0bdfb001 *./tests/data/out.yuv
......
......@@ -2050,47 +2050,47 @@ ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st:-1 ts:1.894167 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46872 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st: 0 ts:0.800000 flags:0
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31656 size:3478 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31696 size:3478 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46872 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st: 0 ts:0.360000 flags:0
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17968 size:3228 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17984 size:3229 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31656 size:3478 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31696 size:3478 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.840000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63328 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st:-1 ts:1.730004 flags:0
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63328 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st:-1 ts:0.624171 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17968 size:3228 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17984 size:3229 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.400000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63328 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st:-1 ts:1.306672 flags:0
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46872 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st:-1 ts:0.200839 flags:1
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0
ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:2987 flags:1
ret: 0 st: 0 ts:2.000000 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63328 size:3635 flags:1
ret: 0 st: 0 dts:1.920000 pts:1.920000 pos:63380 size:3635 flags:1
ret: 0 st:-1 ts:0.883340 flags:0
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31656 size:3478 flags:1
ret: 0 st: 0 dts:0.960000 pts:0.960000 pos:31696 size:3478 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46872 size:3663 flags:1
ret: 0 st: 0 dts:1.440000 pts:1.440000 pos:46890 size:3663 flags:1
ret: 0 st:-1 ts:0.460008 flags:0
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17968 size:3228 flags:1
ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:17984 size:3229 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-snow53.avi
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment