Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
db794953
Commit
db794953
authored
Jan 09, 2003
by
Michael Niedermayer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
qpel fix
Originally committed as revision 1426 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
28269849
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
344 additions
and
398 deletions
+344
-398
avcodec.h
libavcodec/avcodec.h
+3
-2
dsputil.c
libavcodec/dsputil.c
+144
-31
dsputil.h
libavcodec/dsputil.h
+17
-1
h263dec.c
libavcodec/h263dec.c
+91
-33
dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c
+66
-86
dsputil_mmx_rnd.h
libavcodec/i386/dsputil_mmx_rnd.h
+23
-245
No files found.
libavcodec/avcodec.h
View file @
db794953
...
...
@@ -5,8 +5,8 @@
#define LIBAVCODEC_VERSION_INT 0x000406
#define LIBAVCODEC_VERSION "0.4.6"
#define LIBAVCODEC_BUILD 465
2
#define LIBAVCODEC_BUILD_STR "465
2
"
#define LIBAVCODEC_BUILD 465
3
#define LIBAVCODEC_BUILD_STR "465
3
"
enum
CodecID
{
CODEC_ID_NONE
,
...
...
@@ -520,6 +520,7 @@ typedef struct AVCodecContext {
#define FF_BUG_NO_PADDING 16
#define FF_BUG_AC_VLC 32
#define FF_BUG_QPEL_CHROMA 64
#define FF_BUG_STD_QPEL 128
//#define FF_BUG_FAKE_SCALABILITY 16 //autodetection should work 100%
/**
...
...
libavcodec/dsputil.c
View file @
db794953
...
...
@@ -801,7 +801,8 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStrid
}\
}\
\
static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\
static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride){\
const int w=8;\
UINT8 *cm = cropTbl + MAX_NEG_CROP;\
int i;\
for(i=0; i<w; i++)\
...
...
@@ -923,107 +924,163 @@ static void OPNAME ## qpel8_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 half[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
}\
\
static void OPNAME ## qpel8_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
copy_block9(full, src, 16, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16
, 8
);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
}\
\
static void OPNAME ## qpel8_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 half[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
}\
static void OPNAME ## qpel8_mc11
_c(UINT8 *dst, UINT8 *src, int stride){\
void ff_ ## OPNAME ## qpel8_mc11_old
_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
}\
void ff_ ## OPNAME ## qpel8_mc31_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
}\
void ff_ ## OPNAME ## qpel8_mc13_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\
void ff_ ## OPNAME ## qpel8_mc33_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 halfH[72];\
UINT8 halfHV[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 halfH[72];\
UINT8 halfHV[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc12
_c(UINT8 *dst, UINT8 *src, int stride){\
void ff_ ## OPNAME ## qpel8_mc12_old
_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
void ff_ ## OPNAME ## qpel8_mc32_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
UINT8 halfV[64];\
UINT8 halfHV[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8
, 8
);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
}\
static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[16*9];\
UINT8 halfH[72];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
static void OPNAME ## qpel8_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 halfH[72];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8
, 8
);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
static void OPNAME ## qpel16_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## pixels16_c(dst, src, stride, 16);\
...
...
@@ -1066,7 +1123,7 @@ static void OPNAME ## qpel16_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
}\
static void OPNAME ## qpel16_mc11
_c(UINT8 *dst, UINT8 *src, int stride){\
void ff_ ## OPNAME ## qpel16_mc11_old
_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
...
...
@@ -1077,7 +1134,17 @@ static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfHV[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
}\
void ff_ ## OPNAME ## qpel16_mc31_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
...
...
@@ -1088,7 +1155,17 @@ static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfHV[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
}\
void ff_ ## OPNAME ## qpel16_mc13_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
...
...
@@ -1099,7 +1176,17 @@ static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfHV[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\
void ff_ ## OPNAME ## qpel16_mc33_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
...
...
@@ -1110,6 +1197,16 @@ static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfHV[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 halfH[272];\
UINT8 halfHV[256];\
...
...
@@ -1124,7 +1221,7 @@ static void OPNAME ## qpel16_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc12
_c(UINT8 *dst, UINT8 *src, int stride){\
void ff_ ## OPNAME ## qpel16_mc12_old
_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
...
...
@@ -1135,7 +1232,15 @@ static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
}\
void ff_ ## OPNAME ## qpel16_mc32_old_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
UINT8 halfV[256];\
...
...
@@ -1146,6 +1251,14 @@ static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
}\
static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 full[24*17];\
UINT8 halfH[272];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
}\
static void OPNAME ## qpel16_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
UINT8 halfH[272];\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
...
...
libavcodec/dsputil.h
View file @
db794953
...
...
@@ -69,7 +69,23 @@ void clear_blocks_c(DCTELEM *blocks);
typedef
void
(
*
op_pixels_func
)(
UINT8
*
block
/*align width (8 or 16)*/
,
const
UINT8
*
pixels
/*align 1*/
,
int
line_size
,
int
h
);
typedef
void
(
*
qpel_mc_func
)(
UINT8
*
dst
/*align width (8 or 16)*/
,
UINT8
*
src
/*align 1*/
,
int
stride
);
#define DEF_OLD_QPEL(name)\
void ff_put_ ## name (UINT8 *dst
/*align width (8 or 16)*/
, UINT8 *src
/*align 1*/
, int stride);\
void ff_put_no_rnd_ ## name (UINT8 *dst
/*align width (8 or 16)*/
, UINT8 *src
/*align 1*/
, int stride);\
void ff_avg_ ## name (UINT8 *dst
/*align width (8 or 16)*/
, UINT8 *src
/*align 1*/
, int stride);
DEF_OLD_QPEL
(
qpel16_mc11_old_c
)
DEF_OLD_QPEL
(
qpel16_mc31_old_c
)
DEF_OLD_QPEL
(
qpel16_mc12_old_c
)
DEF_OLD_QPEL
(
qpel16_mc32_old_c
)
DEF_OLD_QPEL
(
qpel16_mc13_old_c
)
DEF_OLD_QPEL
(
qpel16_mc33_old_c
)
DEF_OLD_QPEL
(
qpel8_mc11_old_c
)
DEF_OLD_QPEL
(
qpel8_mc31_old_c
)
DEF_OLD_QPEL
(
qpel8_mc12_old_c
)
DEF_OLD_QPEL
(
qpel8_mc32_old_c
)
DEF_OLD_QPEL
(
qpel8_mc13_old_c
)
DEF_OLD_QPEL
(
qpel8_mc33_old_c
)
#define CALL_2X_PIXELS(a, b, n)\
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
...
...
libavcodec/h263dec.c
View file @
db794953
...
...
@@ -344,6 +344,42 @@ static int mpeg4_find_frame_end(MpegEncContext *s, UINT8 *buf, int buf_size){
return
-
1
;
}
static
void
draw_line
(
uint8_t
*
buf
,
int
sx
,
int
sy
,
int
ex
,
int
ey
,
int
w
,
int
h
,
int
stride
,
int
color
){
int
t
,
x
,
y
,
f
;
ex
=
clip
(
ex
,
0
,
w
-
1
);
ey
=
clip
(
ey
,
0
,
h
-
1
);
buf
[
sy
*
stride
+
sx
]
+=
color
;
if
(
ABS
(
ex
-
sx
)
>
ABS
(
ey
-
sy
)){
if
(
sx
>
ex
){
t
=
sx
;
sx
=
ex
;
ex
=
t
;
t
=
sy
;
sy
=
ey
;
ey
=
t
;
}
buf
+=
sx
+
sy
*
stride
;
ex
-=
sx
;
f
=
((
ey
-
sy
)
<<
16
)
/
ex
;
for
(
x
=
0
;
x
<=
ex
;
x
++
){
y
=
((
x
*
f
)
+
(
1
<<
15
))
>>
16
;
buf
[
y
*
stride
+
x
]
+=
color
;
}
}
else
{
if
(
sy
>
ey
){
t
=
sx
;
sx
=
ex
;
ex
=
t
;
t
=
sy
;
sy
=
ey
;
ey
=
t
;
}
buf
+=
sx
+
sy
*
stride
;
ey
-=
sy
;
if
(
ey
)
f
=
((
ex
-
sx
)
<<
16
)
/
ey
;
else
f
=
0
;
for
(
y
=
0
;
y
<=
ey
;
y
++
){
x
=
((
y
*
f
)
+
(
1
<<
15
))
>>
16
;
buf
[
y
*
stride
+
x
]
+=
color
;
}
}
}
int
ff_h263_decode_frame
(
AVCodecContext
*
avctx
,
void
*
data
,
int
*
data_size
,
UINT8
*
buf
,
int
buf_size
)
...
...
@@ -472,6 +508,14 @@ retry:
if
(
s
->
xvid_build
&&
s
->
xvid_build
<=
1
)
s
->
workaround_bugs
|=
FF_BUG_QPEL_CHROMA
;
#define SET_QPEL_FUNC(postfix1, postfix2) \
s->dsp.put_ ## postfix1 = ff_put_ ## postfix2;\
s->dsp.put_no_rnd_ ## postfix1 = ff_put_no_rnd_ ## postfix2;\
s->dsp.avg_ ## postfix1 = ff_avg_ ## postfix2;
if
(
s
->
lavc_build
&&
s
->
lavc_build
<
4653
)
s
->
workaround_bugs
|=
FF_BUG_STD_QPEL
;
//printf("padding_bug_score: %d\n", s->padding_bug_score);
#if 0
if(s->divx_version==500)
...
...
@@ -489,6 +533,21 @@ retry:
#endif
}
if
(
s
->
workaround_bugs
&
FF_BUG_STD_QPEL
){
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
5
],
qpel16_mc11_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
7
],
qpel16_mc31_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
9
],
qpel16_mc12_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
11
],
qpel16_mc32_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
13
],
qpel16_mc13_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
15
],
qpel16_mc33_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
1
][
5
],
qpel8_mc11_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
1
][
7
],
qpel8_mc31_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
1
][
9
],
qpel8_mc12_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
1
][
11
],
qpel8_mc32_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
1
][
13
],
qpel8_mc13_old_c
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
1
][
15
],
qpel8_mc33_old_c
)
}
#if 0 // dump bits per frame / qp / complexity
{
...
...
@@ -645,41 +704,40 @@ retry:
}
MPV_frame_end
(
s
);
#if 0 //dirty show MVs, we should export the MV tables and write a filter to show them
{
int mb_y;
s->has_b_frames=1;
for(mb_y=0; mb_y<s->mb_height; mb_y++){
int mb_x;
int y= mb_y*16 + 8;
for(mb_x=0; mb_x<s->mb_width; mb_x++){
int x= mb_x*16 + 8;
uint8_t *ptr= s->last_picture.data[0];
int xy= 1 + mb_x*2 + (mb_y*2 + 1)*(s->mb_width*2 + 2);
int mx= (s->motion_val[xy][0]>>1) + x;
int my= (s->motion_val[xy][1]>>1) + y;
int i;
int max;
if(mx<0) mx=0;
if(my<0) my=0;
if(mx>=s->width) mx= s->width -1;
if(my>=s->height) my= s->height-1;
max= ABS(mx-x);
if(ABS(my-y) > max) max= ABS(my-y);
/* the ugliest linedrawing routine ... */
for(i=0; i<max; i++){
int x1= x + (mx-x)*i/max;
int y1= y + (my-y)*i/max;
ptr[y1*s->linesize + x1]+=100;
}
ptr[y*s->linesize + x]+=100;
s->mbskip_table[mb_x + mb_y*s->mb_width]=0;
if
((
avctx
->
debug
&
FF_DEBUG_VIS_MV
)
&&
s
->
last_picture
.
data
[
0
]){
const
int
shift
=
1
+
s
->
quarter_sample
;
int
mb_y
;
uint8_t
*
ptr
=
s
->
last_picture
.
data
[
0
];
s
->
low_delay
=
0
;
//needed to see the vectors without trashing the buffers
for
(
mb_y
=
0
;
mb_y
<
s
->
mb_height
;
mb_y
++
){
int
mb_x
;
for
(
mb_x
=
0
;
mb_x
<
s
->
mb_width
;
mb_x
++
){
const
int
mb_index
=
mb_x
+
mb_y
*
s
->
mb_width
;
if
(
s
->
co_located_type_table
[
mb_index
]
==
MV_TYPE_8X8
){
int
i
;
for
(
i
=
0
;
i
<
4
;
i
++
){
int
sx
=
mb_x
*
16
+
4
+
8
*
(
i
&
1
);
int
sy
=
mb_y
*
16
+
4
+
8
*
(
i
>>
1
);
int
xy
=
1
+
mb_x
*
2
+
(
i
&
1
)
+
(
mb_y
*
2
+
1
+
(
i
>>
1
))
*
(
s
->
mb_width
*
2
+
2
);
int
mx
=
(
s
->
motion_val
[
xy
][
0
]
>>
shift
)
+
sx
;
int
my
=
(
s
->
motion_val
[
xy
][
1
]
>>
shift
)
+
sy
;
draw_line
(
ptr
,
sx
,
sy
,
mx
,
my
,
s
->
width
,
s
->
height
,
s
->
linesize
,
100
);
}
}
else
{
int
sx
=
mb_x
*
16
+
8
;
int
sy
=
mb_y
*
16
+
8
;
int
xy
=
1
+
mb_x
*
2
+
(
mb_y
*
2
+
1
)
*
(
s
->
mb_width
*
2
+
2
);
int
mx
=
(
s
->
motion_val
[
xy
][
0
]
>>
shift
)
+
sx
;
int
my
=
(
s
->
motion_val
[
xy
][
1
]
>>
shift
)
+
sy
;
draw_line
(
ptr
,
sx
,
sy
,
mx
,
my
,
s
->
width
,
s
->
height
,
s
->
linesize
,
100
);
}
s
->
mbskip_table
[
mb_index
]
=
0
;
}
}
}
}
}
#endif
if
(
s
->
pict_type
==
B_TYPE
||
s
->
low_delay
){
*
pict
=
*
(
AVFrame
*
)
&
s
->
current_picture
;
...
...
libavcodec/i386/dsputil_mmx.c
View file @
db794953
...
...
@@ -1085,7 +1085,7 @@ static void OPNAME ## qpel8_mc00_ ## MMX (UINT8 *dst, UINT8 *src, int stride){\
}\
\
static void OPNAME ## qpel8_mc10_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t temp[
32
];\
uint64_t temp[
8
];\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
...
...
@@ -1096,14 +1096,14 @@ static void OPNAME ## qpel8_mc20_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
}\
\
static void OPNAME ## qpel8_mc30_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t temp[
32
];\
uint64_t temp[
8
];\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\
}\
\
static void OPNAME ## qpel8_mc01_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t temp[
32
];\
uint64_t temp[
8
];\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\
...
...
@@ -1114,53 +1114,49 @@ static void OPNAME ## qpel8_mc02_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
}\
\
static void OPNAME ## qpel8_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t temp[
32
];\
uint64_t temp[
8
];\
uint8_t * const half= (uint8_t*)temp;\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\
}\
static void OPNAME ## qpel8_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[8*2 + 8*2 + 18*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*64 + 8;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint64_t half[8 + 9];\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ##
mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride
);\
put ## RND ##
pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9
);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l
4_mmx(dst, src, (uint8_t*)half, stride
, 8);\
OPNAME ## pixels8_l
2_mmx(dst, halfH, halfHV, stride, 8
, 8);\
}\
static void OPNAME ## qpel8_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[8*2 + 8*2 + 18*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*64 + 8;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint64_t half[8 + 9];\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ##
mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride
);\
put ## RND ##
pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9
);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l
4_mmx(dst, src+1, (uint8_t*)half, stride
, 8);\
OPNAME ## pixels8_l
2_mmx(dst, halfH, halfHV, stride, 8
, 8);\
}\
static void OPNAME ## qpel8_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[8*2 + 8*2 + 9*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*64;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint64_t half[8 + 9];\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ##
mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride
);\
put ## RND ##
pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9
);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l
4_mmx(dst, src+stride, (uint8_t*)half, stride
, 8);\
OPNAME ## pixels8_l
2_mmx(dst, halfH+8, halfHV, stride, 8
, 8);\
}\
static void OPNAME ## qpel8_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[8*2 + 8*2 + 9*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*64;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src , 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\
uint64_t half[8 + 9];\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l
4_mmx(dst, src+stride+1, (uint8_t*)half, stride
, 8);\
OPNAME ## pixels8_l
2_mmx(dst, halfH+8, halfHV, stride, 8
, 8);\
}\
static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[8
*2 + 9*2
];\
uint64_t half[8
+ 9
];\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
...
...
@@ -1168,7 +1164,7 @@ static void OPNAME ## qpel8_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\
}\
static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[8
*2 + 9*2
];\
uint64_t half[8
+ 9
];\
uint8_t * const halfH= ((uint8_t*)half) + 64;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
...
...
@@ -1176,27 +1172,21 @@ static void OPNAME ## qpel8_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\
}\
static void OPNAME ## qpel8_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[8*2 + 8*2 + 9*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*64;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint64_t half[8 + 9];\
uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\
put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
}\
static void OPNAME ## qpel8_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[8*2 + 8*2 + 9*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*64;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 64;\
uint64_t half[8 + 9];\
uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfV, src+1, 8, stride);\
put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_mmx(dst, halfV, halfHV, stride, 8, 8);\
put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
}\
static void OPNAME ## qpel8_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[9
*2
];\
uint64_t half[9];\
uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
...
...
@@ -1241,44 +1231,40 @@ static void OPNAME ## qpel16_mc03_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\
}\
static void OPNAME ## qpel16_mc11_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[16*2 + 16*2 + 18*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*256 + 16;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint64_t half[16*2 + 17*2];\
uint8_t * const halfH= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ##
mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride
);\
put ## RND ##
pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17
);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l
4_mmx(dst, src, (uint8_t*)half, stride
, 16);\
OPNAME ## pixels16_l
2_mmx(dst, halfH, halfHV, stride, 16
, 16);\
}\
static void OPNAME ## qpel16_mc31_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[16*2 + 16*2 + 18*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*256 + 16;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint64_t half[16*2 + 17*2];\
uint8_t * const halfH= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ##
mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride
);\
put ## RND ##
pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17
);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l
4_mmx(dst, src+1, (uint8_t*)half, stride
, 16);\
OPNAME ## pixels16_l
2_mmx(dst, halfH, halfHV, stride, 16
, 16);\
}\
static void OPNAME ## qpel16_mc13_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[16*2 + 16*2 + 17*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*256;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint64_t half[16*2 + 17*2];\
uint8_t * const halfH= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ##
mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride
);\
put ## RND ##
pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17
);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l
4_mmx(dst, src+stride, (uint8_t*)half, stride
, 16);\
OPNAME ## pixels16_l
2_mmx(dst, halfH+16, halfHV, stride, 16
, 16);\
}\
static void OPNAME ## qpel16_mc33_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[16*2 + 16*2 + 17*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*256;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src , 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\
uint64_t half[16*2 + 17*2];\
uint8_t * const halfH= ((uint8_t*)half) + 256;\
uint8_t * const halfHV= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l
4_mmx(dst, src+stride+1, (uint8_t*)half, stride
, 16);\
OPNAME ## pixels16_l
2_mmx(dst, halfH+16, halfHV, stride, 16
, 16);\
}\
static void OPNAME ## qpel16_mc21_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[16*2 + 17*2];\
...
...
@@ -1297,24 +1283,18 @@ static void OPNAME ## qpel16_mc23_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\
}\
static void OPNAME ## qpel16_mc12_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[16*2 + 16*2 + 17*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*256;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint64_t half[17*2];\
uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\
put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
}\
static void OPNAME ## qpel16_mc32_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[16*2 + 16*2 + 17*2];\
uint8_t * const halfH= ((uint8_t*)half) + 2*256;\
uint8_t * const halfV= ((uint8_t*)half);\
uint8_t * const halfHV= ((uint8_t*)half) + 256;\
uint64_t half[17*2];\
uint8_t * const halfH= ((uint8_t*)half);\
put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfV, src+1, 16, stride);\
put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_mmx(dst, halfV, halfHV, stride, 16, 16);\
put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
}\
static void OPNAME ## qpel16_mc22_ ## MMX(UINT8 *dst, UINT8 *src, int stride){\
uint64_t half[17*2];\
...
...
@@ -1525,7 +1505,7 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
c
->
avg_pixels_tab
[
1
][
1
]
=
avg_pixels8_x2_3dnow
;
c
->
avg_pixels_tab
[
1
][
2
]
=
avg_pixels8_y2_3dnow
;
c
->
avg_pixels_tab
[
1
][
3
]
=
avg_pixels8_xy2_3dnow
;
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
0
],
qpel16_mc00_3dnow
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
1
],
qpel16_mc10_3dnow
)
SET_QPEL_FUNC
(
qpel_pixels_tab
[
0
][
2
],
qpel16_mc20_3dnow
)
...
...
libavcodec/i386/dsputil_mmx_rnd.h
View file @
db794953
...
...
@@ -58,6 +58,16 @@ static void DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
{
MOVQ_BFE
(
mm6
);
__asm
__volatile
(
"test $1, %0
\n\t
"
" jz 1f
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"addl %4, %1
\n\t
"
"addl $8, %2
\n\t
"
PAVGB
(
%%
mm0
,
%%
mm1
,
%%
mm4
,
%%
mm6
)
"movq %%mm4, (%3)
\n\t
"
"addl %5, %3
\n\t
"
"decl %0
\n\t
"
".balign 8
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
...
...
@@ -144,6 +154,19 @@ static void DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in
{
MOVQ_BFE
(
mm6
);
__asm
__volatile
(
"test $1, %0
\n\t
"
" jz 1f
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 8(%1), %%mm2
\n\t
"
"movq 8(%2), %%mm3
\n\t
"
"addl %4, %1
\n\t
"
"addl $16, %2
\n\t
"
PAVGBP
(
%%
mm0
,
%%
mm1
,
%%
mm4
,
%%
mm2
,
%%
mm3
,
%%
mm5
)
"movq %%mm4, (%3)
\n\t
"
"movq %%mm5, 8(%3)
\n\t
"
"addl %5, %3
\n\t
"
"decl %0
\n\t
"
".balign 8
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
...
...
@@ -271,124 +294,6 @@ static void DEF(put, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_si
:
"eax"
,
"memory"
);
}
static
void
DEF
(
put
,
pixels8_l4
)(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
stride
,
int
h
)
{
MOVQ_ZERO
(
mm7
);
SET_RND
(
mm6
);
// =2 for rnd and =1 for no_rnd version
__asm
__volatile
(
".balign 8
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 64(%2), %%mm2
\n\t
"
"movq 136(%2), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm3
\n\t
"
"paddusw %%mm1, %%mm3
\n\t
"
"psrlw $2, %%mm3
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 64(%2), %%mm2
\n\t
"
"movq 136(%2), %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm4
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm4
\n\t
"
"paddusw %%mm1, %%mm4
\n\t
"
"psrlw $2, %%mm4
\n\t
"
"packuswb %%mm4, %%mm3
\n\t
"
"movq %%mm3, (%0)
\n\t
"
"addl %4, %0
\n\t
"
"addl %4, %1
\n\t
"
"addl $8, %2
\n\t
"
"decl %3
\n\t
"
"jnz 1b
\n\t
"
:
"+r"
(
dst
),
"+r"
(
src1
),
"+r"
(
src2
),
"+r"
(
h
)
:
"r"
(
stride
)
:
"memory"
);
}
static
void
DEF
(
put
,
pixels16_l4
)(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
stride
,
int
h
)
{
MOVQ_ZERO
(
mm7
);
SET_RND
(
mm6
);
// =2 for rnd and =1 for no_rnd version
__asm
__volatile
(
".balign 8
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 256(%2), %%mm2
\n\t
"
"movq 528(%2), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm3
\n\t
"
"paddusw %%mm1, %%mm3
\n\t
"
"psrlw $2, %%mm3
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 256(%2), %%mm2
\n\t
"
"movq 528(%2), %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm4
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm4
\n\t
"
"paddusw %%mm1, %%mm4
\n\t
"
"psrlw $2, %%mm4
\n\t
"
"packuswb %%mm4, %%mm3
\n\t
"
"movq %%mm3, (%0)
\n\t
"
"movq 8(%1), %%mm0
\n\t
"
"movq 8(%2), %%mm1
\n\t
"
"movq 264(%2), %%mm2
\n\t
"
"movq 536(%2), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm3
\n\t
"
"paddusw %%mm1, %%mm3
\n\t
"
"psrlw $2, %%mm3
\n\t
"
"movq 8(%1), %%mm0
\n\t
"
"movq 8(%2), %%mm1
\n\t
"
"movq 264(%2), %%mm2
\n\t
"
"movq 536(%2), %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm4
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm4
\n\t
"
"paddusw %%mm1, %%mm4
\n\t
"
"psrlw $2, %%mm4
\n\t
"
"packuswb %%mm4, %%mm3
\n\t
"
"movq %%mm3, 8(%0)
\n\t
"
"addl %4, %0
\n\t
"
"addl %4, %1
\n\t
"
"addl $16, %2
\n\t
"
"decl %3
\n\t
"
"jnz 1b
\n\t
"
:
"+r"
(
dst
),
"+r"
(
src1
),
"+r"
(
src2
),
"+r"
(
h
)
:
"r"
(
stride
)
:
"memory"
);
}
// avg_pixels
// in case more speed is needed - unroling would certainly help
static
void
DEF
(
avg
,
pixels8
)(
UINT8
*
block
,
const
UINT8
*
pixels
,
int
line_size
,
int
h
)
...
...
@@ -641,133 +546,6 @@ static void DEF(avg, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_si
:
"eax"
,
"memory"
);
}
static
void
DEF
(
avg
,
pixels8_l4
)(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
stride
,
int
h
)
{
MOVQ_ZERO
(
mm7
);
SET_RND
(
mm6
);
// =2 for rnd and =1 for no_rnd version
MOVQ_BFE
(
mm5
);
__asm
__volatile
(
".balign 8
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 64(%2), %%mm2
\n\t
"
"movq 136(%2), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm3
\n\t
"
"paddusw %%mm1, %%mm3
\n\t
"
"psrlw $2, %%mm3
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 64(%2), %%mm2
\n\t
"
"movq 136(%2), %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm4
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm4
\n\t
"
"paddusw %%mm1, %%mm4
\n\t
"
"psrlw $2, %%mm4
\n\t
"
"packuswb %%mm4, %%mm3
\n\t
"
"movq (%0), %%mm4
\n\t
"
PAVGB
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm5
)
"movq %%mm0, (%0)
\n\t
"
"addl %4, %0
\n\t
"
"addl %4, %1
\n\t
"
"addl $8, %2
\n\t
"
"decl %3
\n\t
"
"jnz 1b
\n\t
"
:
"+r"
(
dst
),
"+r"
(
src1
),
"+r"
(
src2
),
"+r"
(
h
)
:
"r"
(
stride
)
:
"memory"
);
}
static
void
DEF
(
avg
,
pixels16_l4
)(
uint8_t
*
dst
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
stride
,
int
h
)
{
MOVQ_ZERO
(
mm7
);
SET_RND
(
mm6
);
// =2 for rnd and =1 for no_rnd version
MOVQ_BFE
(
mm5
);
__asm
__volatile
(
".balign 8
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 256(%2), %%mm2
\n\t
"
"movq 528(%2), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm3
\n\t
"
"paddusw %%mm1, %%mm3
\n\t
"
"psrlw $2, %%mm3
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%2), %%mm1
\n\t
"
"movq 256(%2), %%mm2
\n\t
"
"movq 528(%2), %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm4
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm4
\n\t
"
"paddusw %%mm1, %%mm4
\n\t
"
"psrlw $2, %%mm4
\n\t
"
"packuswb %%mm4, %%mm3
\n\t
"
"movq (%0), %%mm4
\n\t
"
PAVGB
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm5
)
"movq %%mm0, (%0)
\n\t
"
"movq 8(%1), %%mm0
\n\t
"
"movq 8(%2), %%mm1
\n\t
"
"movq 264(%2), %%mm2
\n\t
"
"movq 536(%2), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm3
\n\t
"
"paddusw %%mm1, %%mm3
\n\t
"
"psrlw $2, %%mm3
\n\t
"
"movq 8(%1), %%mm0
\n\t
"
"movq 8(%2), %%mm1
\n\t
"
"movq 264(%2), %%mm2
\n\t
"
"movq 536(%2), %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm4
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm0, %%mm1
\n\t
"
"paddusw %%mm2, %%mm4
\n\t
"
"paddusw %%mm1, %%mm4
\n\t
"
"psrlw $2, %%mm4
\n\t
"
"packuswb %%mm4, %%mm3
\n\t
"
"movq 8(%0), %%mm4
\n\t
"
PAVGB
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm5
)
"movq %%mm0, 8(%0)
\n\t
"
"addl %4, %0
\n\t
"
"addl %4, %1
\n\t
"
"addl $16, %2
\n\t
"
"decl %3
\n\t
"
"jnz 1b
\n\t
"
:
"+r"
(
dst
),
"+r"
(
src1
),
"+r"
(
src2
),
"+r"
(
h
)
:
"r"
(
stride
)
:
"memory"
);
}
//FIXME optimize
static
void
DEF
(
put
,
pixels16_y2
)(
UINT8
*
block
,
const
UINT8
*
pixels
,
int
line_size
,
int
h
){
DEF
(
put
,
pixels8_y2
)(
block
,
pixels
,
line_size
,
h
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment