Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
348493db
Commit
348493db
authored
May 24, 2011
by
Daniel Kang
Committed by
Ronald S. Bultje
May 31, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update 8-bit H.264 IDCT function names to reflect bit-depth.
Signed-off-by:
Ronald S. Bultje
<
rbultje@google.com
>
parent
836f47d3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
57 additions
and
72 deletions
+57
-72
h264dsp.h
libavcodec/h264dsp.h
+0
-1
h264_idct.asm
libavcodec/x86/h264_idct.asm
+19
-19
h264dsp_mmx.c
libavcodec/x86/h264dsp_mmx.c
+38
-52
No files found.
libavcodec/h264dsp.h
View file @
348493db
...
...
@@ -66,7 +66,6 @@ typedef struct H264DSPContext{
void
(
*
h264_idct_dc_add
)(
uint8_t
*
dst
/*align 4*/
,
DCTELEM
*
block
/*align 16*/
,
int
stride
);
void
(
*
h264_idct8_dc_add
)(
uint8_t
*
dst
/*align 8*/
,
DCTELEM
*
block
/*align 16*/
,
int
stride
);
void
(
*
h264_dct
)(
DCTELEM
block
[
4
][
4
]);
void
(
*
h264_idct_add16
)(
uint8_t
*
dst
/*align 16*/
,
const
int
*
blockoffset
,
DCTELEM
*
block
/*align 16*/
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
(
*
h264_idct8_add4
)(
uint8_t
*
dst
/*align 16*/
,
const
int
*
blockoffset
,
DCTELEM
*
block
/*align 16*/
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
(
*
h264_idct_add8
)(
uint8_t
**
dst
/*align 16*/
,
const
int
*
blockoffset
,
DCTELEM
*
block
/*align 16*/
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
...
...
libavcodec/x86/h264_idct.asm
View file @
348493db
...
...
@@ -73,7 +73,7 @@ SECTION .text
INIT_MMX
; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_add_mmx
,
3
,
3
,
0
cglobal
h264_idct_add_
8_
mmx
,
3
,
3
,
0
IDCT4_ADD
r0
,
r1
,
r2
RET
...
...
@@ -125,7 +125,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0
SUMSUB_BA
w
,
0
,
4
SUMSUB_BA
w
,
3
,
2
SUMSUB_BA
w
,
1
,
5
SWAP
7
,
6
,
4
,
5
,
2
,
3
,
1
,
0
; 70315246 -> 01234567
SWAP
7
,
6
,
4
,
5
,
2
,
3
,
1
,
0
; 70315246 -> 01234567
%endmacro
%macro
IDCT8_1D_FULL
1
...
...
@@ -177,7 +177,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0
INIT_MMX
; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_mmx
,
3
,
4
,
0
cglobal
h264_idct8_add_
8_
mmx
,
3
,
4
,
0
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -237,7 +237,7 @@ cglobal h264_idct8_add_mmx, 3, 4, 0
INIT_XMM
; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_add_sse2
,
3
,
4
,
10
cglobal
h264_idct8_add_
8_
sse2
,
3
,
4
,
10
IDCT8_ADD_SSE
r0
,
r1
,
r2
,
r3
RET
...
...
@@ -261,7 +261,7 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
packuswb
m1
,
m1
%endmacro
%macro
DC_ADD_MMX2_OP
3
-
4
%macro
DC_ADD_MMX2_OP
4
%1
m2
,
[
%2
]
%1
m3
,
[
%2
+
%3
]
%1
m4
,
[
%2
+
%3
*
2
]
...
...
@@ -282,13 +282,13 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
INIT_MMX
; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_dc_add_mmx2
,
3
,
3
,
0
cglobal
h264_idct_dc_add_
8_
mmx2
,
3
,
3
,
0
DC_ADD_MMX2_INIT
r1
,
r2
DC_ADD_MMX2_OP
movh
,
r0
,
r2
,
r1
RET
; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_mmx2
,
3
,
3
,
0
cglobal
h264_idct8_dc_add_
8_
mmx2
,
3
,
3
,
0
DC_ADD_MMX2_INIT
r1
,
r2
DC_ADD_MMX2_OP
mova
,
r0
,
r2
,
r1
lea
r0
,
[
r0
+
r2
*
4
]
...
...
@@ -297,7 +297,7 @@ cglobal h264_idct8_dc_add_mmx2, 3, 3, 0
; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_mmx
,
5
,
7
,
0
cglobal
h264_idct_add16_
8_
mmx
,
5
,
7
,
0
xor
r5
,
r5
%ifdef
PIC
lea
r11
,
[
scan8_mem
]
...
...
@@ -319,7 +319,7 @@ cglobal h264_idct_add16_mmx, 5, 7, 0
; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct8_add4_mmx
,
5
,
7
,
0
cglobal
h264_idct8_add4_
8_
mmx
,
5
,
7
,
0
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -351,7 +351,7 @@ cglobal h264_idct8_add4_mmx, 5, 7, 0
; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_mmx2
,
5
,
7
,
0
cglobal
h264_idct_add16_
8_
mmx2
,
5
,
7
,
0
xor
r5
,
r5
%ifdef
PIC
lea
r11
,
[
scan8_mem
]
...
...
@@ -398,7 +398,7 @@ cglobal h264_idct_add16_mmx2, 5, 7, 0
; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_mmx
,
5
,
7
,
0
cglobal
h264_idct_add16intra_
8_
mmx
,
5
,
7
,
0
xor
r5
,
r5
%ifdef
PIC
lea
r11
,
[
scan8_mem
]
...
...
@@ -421,7 +421,7 @@ cglobal h264_idct_add16intra_mmx, 5, 7, 0
; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_mmx2
,
5
,
7
,
0
cglobal
h264_idct_add16intra_
8_
mmx2
,
5
,
7
,
0
xor
r5
,
r5
%ifdef
PIC
lea
r11
,
[
scan8_mem
]
...
...
@@ -466,7 +466,7 @@ cglobal h264_idct_add16intra_mmx2, 5, 7, 0
; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct8_add4_mmx2
,
5
,
7
,
0
cglobal
h264_idct8_add4_
8_
mmx2
,
5
,
7
,
0
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -529,7 +529,7 @@ cglobal h264_idct8_add4_mmx2, 5, 7, 0
INIT_XMM
; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct8_add4_sse2
,
5
,
7
,
10
cglobal
h264_idct8_add4_
8_
sse2
,
5
,
7
,
10
xor
r5
,
r5
%ifdef
PIC
lea
r11
,
[
scan8_mem
]
...
...
@@ -607,7 +607,7 @@ h264_idct_add8_mmx_plane:
; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_mmx
,
5
,
7
,
0
cglobal
h264_idct_add8_
8_
mmx
,
5
,
7
,
0
mov
r5
,
16
add
r2
,
512
%ifdef
PIC
...
...
@@ -668,7 +668,7 @@ h264_idct_add8_mmx2_plane
; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_mmx2
,
5
,
7
,
0
cglobal
h264_idct_add8_
8_
mmx2
,
5
,
7
,
0
mov
r5
,
16
add
r2
,
512
%ifdef
ARCH_X86_64
...
...
@@ -744,7 +744,7 @@ x264_add8x4_idct_sse2:
; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_sse2
,
5
,
5
,
8
cglobal
h264_idct_add16_
8_
sse2
,
5
,
5
,
8
%ifdef
ARCH_X86_64
mov
r10
,
r0
%endif
...
...
@@ -791,7 +791,7 @@ cglobal h264_idct_add16_sse2, 5, 5, 8
; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_sse2
,
5
,
7
,
8
cglobal
h264_idct_add16intra_
8_
sse2
,
5
,
7
,
8
%ifdef
ARCH_X86_64
mov
r10
,
r0
%endif
...
...
@@ -840,7 +840,7 @@ cglobal h264_idct_add16intra_sse2, 5, 7, 8
; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_sse2
,
5
,
7
,
8
cglobal
h264_idct_add8_
8_
sse2
,
5
,
7
,
8
add
r2
,
512
%ifdef
ARCH_X86_64
mov
r10
,
r0
...
...
libavcodec/x86/h264dsp_mmx.c
View file @
348493db
...
...
@@ -30,9 +30,14 @@ DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
#define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \
void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride);
IDCT_ADD_FUNC
(,
8
,
mmx
)
IDCT_ADD_FUNC
(,
10
,
sse2
)
IDCT_ADD_FUNC
(
_dc
,
8
,
mmx2
)
IDCT_ADD_FUNC
(
_dc
,
10
,
mmx2
)
IDCT_ADD_FUNC
(
8
_dc
,
8
,
mmx2
)
IDCT_ADD_FUNC
(
8
_dc
,
10
,
sse2
)
IDCT_ADD_FUNC
(
8
,
8
,
mmx
)
IDCT_ADD_FUNC
(
8
,
8
,
sse2
)
IDCT_ADD_FUNC
(
8
,
10
,
sse2
)
#if HAVE_AVX
IDCT_ADD_FUNC
(,
10
,
avx
)
...
...
@@ -46,9 +51,18 @@ void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, const int *block_offset, \
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
IDCT_ADD_REP_FUNC
(
8
,
4
,
8
,
mmx
)
IDCT_ADD_REP_FUNC
(
8
,
4
,
8
,
mmx2
)
IDCT_ADD_REP_FUNC
(
8
,
4
,
8
,
sse2
)
IDCT_ADD_REP_FUNC
(
8
,
4
,
10
,
sse2
)
IDCT_ADD_REP_FUNC
(
8
,
4
,
10
,
avx
)
IDCT_ADD_REP_FUNC
(,
16
,
8
,
mmx
)
IDCT_ADD_REP_FUNC
(,
16
,
8
,
mmx2
)
IDCT_ADD_REP_FUNC
(,
16
,
8
,
sse2
)
IDCT_ADD_REP_FUNC
(,
16
,
10
,
sse2
)
IDCT_ADD_REP_FUNC
(,
16
intra
,
8
,
mmx
)
IDCT_ADD_REP_FUNC
(,
16
intra
,
8
,
mmx2
)
IDCT_ADD_REP_FUNC
(,
16
intra
,
8
,
sse2
)
IDCT_ADD_REP_FUNC
(,
16
intra
,
10
,
sse2
)
#if HAVE_AVX
IDCT_ADD_REP_FUNC
(,
16
,
10
,
avx
)
...
...
@@ -60,42 +74,14 @@ IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
(uint8_t **dst, const int *block_offset, \
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
IDCT_ADD_REP_FUNC2
(,
8
,
8
,
mmx
)
IDCT_ADD_REP_FUNC2
(,
8
,
8
,
mmx2
)
IDCT_ADD_REP_FUNC2
(,
8
,
8
,
sse2
)
IDCT_ADD_REP_FUNC2
(,
8
,
10
,
sse2
)
#if HAVE_AVX
IDCT_ADD_REP_FUNC2
(,
8
,
10
,
avx
)
#endif
void
ff_h264_idct_add_mmx
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct8_add_mmx
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct8_add_sse2
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct_dc_add_mmx2
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct8_dc_add_mmx2
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
);
void
ff_h264_idct_add16_mmx
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct8_add4_mmx
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16_mmx2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16intra_mmx
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16intra_mmx2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct8_add4_mmx2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct8_add4_sse2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add8_mmx
(
uint8_t
**
dest
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add8_mmx2
(
uint8_t
**
dest
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16_sse2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add16intra_sse2
(
uint8_t
*
dst
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_idct_add8_sse2
(
uint8_t
**
dest
,
const
int
*
block_offset
,
DCTELEM
*
block
,
int
stride
,
const
uint8_t
nnzc
[
6
*
8
]);
void
ff_h264_luma_dc_dequant_idct_mmx
(
DCTELEM
*
output
,
DCTELEM
*
input
,
int
qmul
);
void
ff_h264_luma_dc_dequant_idct_sse2
(
DCTELEM
*
output
,
DCTELEM
*
input
,
int
qmul
);
...
...
@@ -350,24 +336,24 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
}
#if HAVE_YASM
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
c
->
h264_idct_dc_add
=
c
->
h264_idct_add
=
ff_h264_idct_add
_mmx
;
c
->
h264_idct8_dc_add
=
c
->
h264_idct8_add
=
ff_h264_idct8_add
_mmx
;
c
->
h264_idct_add16
=
ff_h264_idct_add16
_mmx
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4
_mmx
;
c
->
h264_idct_add8
=
ff_h264_idct_add
8_mmx
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra
_mmx
;
c
->
h264_idct_dc_add
=
c
->
h264_idct_add
=
ff_h264_idct_add_8
_mmx
;
c
->
h264_idct8_dc_add
=
c
->
h264_idct8_add
=
ff_h264_idct8_add_8
_mmx
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_8
_mmx
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_8
_mmx
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_
8_mmx
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_8
_mmx
;
c
->
h264_luma_dc_dequant_idct
=
ff_h264_luma_dc_dequant_idct_mmx
;
if
(
mm_flags
&
AV_CPU_FLAG_MMX2
)
{
c
->
h264_idct_dc_add
=
ff_h264_idct_dc_add
_mmx2
;
c
->
h264_idct8_dc_add
=
ff_h264_idct8_dc_add
_mmx2
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_mmx2
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_mmx2
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_mmx2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_mmx2
;
c
->
h264_idct_dc_add
=
ff_h264_idct_dc_add_8
_mmx2
;
c
->
h264_idct8_dc_add
=
ff_h264_idct8_dc_add_8
_mmx2
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_
8_
mmx2
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_
8_
mmx2
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_
8_
mmx2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_
8_
mmx2
;
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_8_mmxext
;
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_8_mmxext
;
...
...
@@ -398,8 +384,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c
->
biweight_h264_pixels_tab
[
7
]
=
ff_h264_biweight_4x2_mmx2
;
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
)
{
c
->
h264_idct8_add
=
ff_h264_idct8_add_sse2
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_sse2
;
c
->
h264_idct8_add
=
ff_h264_idct8_add_8_sse2
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_8_sse2
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_8_sse2
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_8_sse2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_8_sse2
;
c
->
h264_luma_dc_dequant_idct
=
ff_h264_luma_dc_dequant_idct_sse2
;
c
->
weight_h264_pixels_tab
[
0
]
=
ff_h264_weight_16x16_sse2
;
...
...
@@ -420,10 +410,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_8_sse2
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_8_sse2
;
#endif
c
->
h264_idct_add16
=
ff_h264_idct_add16_sse2
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_sse2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_sse2
;
}
if
(
mm_flags
&
AV_CPU_FLAG_SSSE3
)
{
c
->
biweight_h264_pixels_tab
[
0
]
=
ff_h264_biweight_16x16_ssse3
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment