Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
548a1c8a
Commit
548a1c8a
authored
Mar 07, 2006
by
Loren Merritt
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
h264_idct8_add_mmx
Originally committed as revision 5123 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
6da971f1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
160 additions
and
6 deletions
+160
-6
h264.c
libavcodec/h264.c
+31
-6
dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c
+2
-0
h264dsp_mmx.c
libavcodec/i386/h264dsp_mmx.c
+127
-0
No files found.
libavcodec/h264.c
View file @
548a1c8a
...
...
@@ -358,8 +358,12 @@ typedef struct H264Context{
uint8_t
zigzag_scan
[
16
];
uint8_t
field_scan
[
16
];
uint8_t
zigzag_scan8x8
[
64
];
uint8_t
zigzag_scan8x8_cavlc
[
64
];
const
uint8_t
*
zigzag_scan_q0
;
const
uint8_t
*
field_scan_q0
;
const
uint8_t
*
zigzag_scan8x8_q0
;
const
uint8_t
*
zigzag_scan8x8_cavlc_q0
;
int
x264_build
;
}
H264Context
;
...
...
@@ -2953,6 +2957,7 @@ static void free_tables(H264Context *h){
static
void
init_dequant8_coeff_table
(
H264Context
*
h
){
int
i
,
q
,
x
;
const
int
transpose
=
(
h
->
s
.
dsp
.
h264_idct8_add
!=
ff_h264_idct8_add_c
);
//FIXME ugly
h
->
dequant8_coeff
[
0
]
=
h
->
dequant8_buffer
[
0
];
h
->
dequant8_coeff
[
1
]
=
h
->
dequant8_buffer
[
1
];
...
...
@@ -2966,8 +2971,9 @@ static void init_dequant8_coeff_table(H264Context *h){
int
shift
=
div6
[
q
];
int
idx
=
rem6
[
q
];
for
(
x
=
0
;
x
<
64
;
x
++
)
h
->
dequant8_coeff
[
i
][
q
][
x
]
=
((
uint32_t
)
dequant8_coeff_init
[
idx
][
dequant8_coeff_init_scan
[((
x
>>
1
)
&
12
)
|
(
x
&
3
)]
]
*
h
->
pps
.
scaling_matrix8
[
i
][
x
])
<<
shift
;
h
->
dequant8_coeff
[
i
][
q
][
transpose
?
(
x
>>
3
)
|
((
x
&
7
)
<<
3
)
:
x
]
=
((
uint32_t
)
dequant8_coeff_init
[
idx
][
dequant8_coeff_init_scan
[((
x
>>
1
)
&
12
)
|
(
x
&
3
)]
]
*
h
->
pps
.
scaling_matrix8
[
i
][
x
])
<<
shift
;
}
}
}
...
...
@@ -4317,14 +4323,31 @@ static int decode_slice_header(H264Context *h){
#define T(x) (x>>2) | ((x<<2) & 0xF)
h
->
zigzag_scan
[
i
]
=
T
(
zigzag_scan
[
i
]);
h
->
field_scan
[
i
]
=
T
(
field_scan
[
i
]);
#undef T
}
}
if
(
s
->
dsp
.
h264_idct8_add
==
ff_h264_idct8_add_c
){
memcpy
(
h
->
zigzag_scan8x8
,
zigzag_scan8x8
,
64
*
sizeof
(
uint8_t
));
memcpy
(
h
->
zigzag_scan8x8_cavlc
,
zigzag_scan8x8_cavlc
,
64
*
sizeof
(
uint8_t
));
}
else
{
int
i
;
for
(
i
=
0
;
i
<
64
;
i
++
){
#define T(x) (x>>3) | ((x&7)<<3)
h
->
zigzag_scan8x8
[
i
]
=
T
(
zigzag_scan8x8
[
i
]);
h
->
zigzag_scan8x8_cavlc
[
i
]
=
T
(
zigzag_scan8x8_cavlc
[
i
]);
#undef T
}
}
if
(
h
->
sps
.
transform_bypass
){
//FIXME same ugly
h
->
zigzag_scan_q0
=
zigzag_scan
;
h
->
field_scan_q0
=
field_scan
;
h
->
zigzag_scan8x8_q0
=
zigzag_scan8x8
;
h
->
zigzag_scan8x8_cavlc_q0
=
zigzag_scan8x8_cavlc
;
}
else
{
h
->
zigzag_scan_q0
=
h
->
zigzag_scan
;
h
->
field_scan_q0
=
h
->
field_scan
;
h
->
zigzag_scan8x8_q0
=
h
->
zigzag_scan8x8
;
h
->
zigzag_scan8x8_cavlc_q0
=
h
->
zigzag_scan8x8_cavlc
;
}
alloc_tables
(
h
);
...
...
@@ -5101,7 +5124,7 @@ decode_intra_mb:
int
i8x8
,
i4x4
,
chroma_idx
;
int
chroma_qp
,
dquant
;
GetBitContext
*
gb
=
IS_INTRA
(
mb_type
)
?
h
->
intra_gb_ptr
:
h
->
inter_gb_ptr
;
const
uint8_t
*
scan
,
*
dc_scan
;
const
uint8_t
*
scan
,
*
scan8x8
,
*
dc_scan
;
// fill_non_zero_count_cache(h);
...
...
@@ -5112,6 +5135,7 @@ decode_intra_mb:
scan
=
s
->
qscale
?
h
->
zigzag_scan
:
h
->
zigzag_scan_q0
;
dc_scan
=
luma_dc_zigzag_scan
;
}
scan8x8
=
s
->
qscale
?
h
->
zigzag_scan8x8_cavlc
:
h
->
zigzag_scan8x8_cavlc_q0
;
dquant
=
get_se_golomb
(
&
s
->
gb
);
...
...
@@ -5153,7 +5177,7 @@ decode_intra_mb:
DCTELEM
*
buf
=
&
h
->
mb
[
64
*
i8x8
];
uint8_t
*
nnz
;
for
(
i4x4
=
0
;
i4x4
<
4
;
i4x4
++
){
if
(
decode_residual
(
h
,
gb
,
buf
,
i4x4
+
4
*
i8x8
,
zigzag_scan8x8_cavlc
+
16
*
i4x4
,
if
(
decode_residual
(
h
,
gb
,
buf
,
i4x4
+
4
*
i8x8
,
scan8x8
+
16
*
i4x4
,
h
->
dequant8_coeff
[
IS_INTRA
(
mb_type
)
?
0
:
1
][
s
->
qscale
],
16
)
<
0
)
return
-
1
;
}
...
...
@@ -6144,7 +6168,7 @@ decode_intra_mb:
s
->
current_picture
.
mb_type
[
mb_xy
]
=
mb_type
;
if
(
cbp
||
IS_INTRA16x16
(
mb_type
)
)
{
const
uint8_t
*
scan
,
*
dc_scan
;
const
uint8_t
*
scan
,
*
scan8x8
,
*
dc_scan
;
int
dqp
;
if
(
IS_INTERLACED
(
mb_type
)){
...
...
@@ -6154,6 +6178,7 @@ decode_intra_mb:
scan
=
s
->
qscale
?
h
->
zigzag_scan
:
h
->
zigzag_scan_q0
;
dc_scan
=
luma_dc_zigzag_scan
;
}
scan8x8
=
s
->
qscale
?
h
->
zigzag_scan8x8
:
h
->
zigzag_scan8x8_q0
;
h
->
last_qscale_diff
=
dqp
=
decode_cabac_mb_dqp
(
h
);
if
(
dqp
==
INT_MIN
){
...
...
@@ -6187,7 +6212,7 @@ decode_intra_mb:
if
(
cbp
&
(
1
<<
i8x8
)
)
{
if
(
IS_8x8DCT
(
mb_type
)
)
{
if
(
decode_cabac_residual
(
h
,
h
->
mb
+
64
*
i8x8
,
5
,
4
*
i8x8
,
zigzag_
scan8x8
,
h
->
dequant8_coeff
[
IS_INTRA
(
mb_type
)
?
0
:
1
][
s
->
qscale
],
64
)
<
0
)
scan8x8
,
h
->
dequant8_coeff
[
IS_INTRA
(
mb_type
)
?
0
:
1
][
s
->
qscale
],
64
)
<
0
)
return
-
1
;
}
else
for
(
i4x4
=
0
;
i4x4
<
4
;
i4x4
++
)
{
...
...
libavcodec/i386/dsputil_mmx.c
View file @
548a1c8a
...
...
@@ -2734,6 +2734,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
h264_idct_dc_add
=
c
->
h264_idct_add
=
ff_h264_idct_add_mmx
;
c
->
h264_idct8_dc_add
=
c
->
h264_idct8_add
=
ff_h264_idct8_add_mmx
;
if
(
mm_flags
&
MM_MMXEXT
)
{
c
->
put_pixels_tab
[
0
][
1
]
=
put_pixels16_x2_mmx2
;
...
...
libavcodec/i386/h264dsp_mmx.c
View file @
548a1c8a
...
...
@@ -104,6 +104,133 @@ static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
);
}
static
inline
void
h264_idct8_1d
(
int16_t
*
block
)
{
asm
volatile
(
"movq 112(%0), %%mm7
\n\t
"
"movq 80(%0), %%mm5
\n\t
"
"movq 48(%0), %%mm3
\n\t
"
"movq 16(%0), %%mm1
\n\t
"
"movq %%mm7, %%mm4
\n\t
"
"movq %%mm3, %%mm6
\n\t
"
"movq %%mm5, %%mm0
\n\t
"
"movq %%mm7, %%mm2
\n\t
"
"psraw $1, %%mm4
\n\t
"
"psraw $1, %%mm6
\n\t
"
"psubw %%mm7, %%mm0
\n\t
"
"psubw %%mm6, %%mm2
\n\t
"
"psubw %%mm4, %%mm0
\n\t
"
"psubw %%mm3, %%mm2
\n\t
"
"psubw %%mm3, %%mm0
\n\t
"
"paddw %%mm1, %%mm2
\n\t
"
"movq %%mm5, %%mm4
\n\t
"
"movq %%mm1, %%mm6
\n\t
"
"psraw $1, %%mm4
\n\t
"
"psraw $1, %%mm6
\n\t
"
"paddw %%mm5, %%mm4
\n\t
"
"paddw %%mm1, %%mm6
\n\t
"
"paddw %%mm7, %%mm4
\n\t
"
"paddw %%mm5, %%mm6
\n\t
"
"psubw %%mm1, %%mm4
\n\t
"
"paddw %%mm3, %%mm6
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm4, %%mm3
\n\t
"
"movq %%mm2, %%mm5
\n\t
"
"movq %%mm6, %%mm7
\n\t
"
"psraw $2, %%mm6
\n\t
"
"psraw $2, %%mm3
\n\t
"
"psraw $2, %%mm5
\n\t
"
"psraw $2, %%mm0
\n\t
"
"paddw %%mm6, %%mm1
\n\t
"
"paddw %%mm2, %%mm3
\n\t
"
"psubw %%mm4, %%mm5
\n\t
"
"psubw %%mm0, %%mm7
\n\t
"
"movq 32(%0), %%mm2
\n\t
"
"movq 96(%0), %%mm6
\n\t
"
"movq %%mm2, %%mm4
\n\t
"
"movq %%mm6, %%mm0
\n\t
"
"psraw $1, %%mm4
\n\t
"
"psraw $1, %%mm6
\n\t
"
"psubw %%mm0, %%mm4
\n\t
"
"paddw %%mm2, %%mm6
\n\t
"
"movq (%0), %%mm2
\n\t
"
"movq 64(%0), %%mm0
\n\t
"
SUMSUB_BA
(
%%
mm0
,
%%
mm2
)
SUMSUB_BA
(
%%
mm6
,
%%
mm0
)
SUMSUB_BA
(
%%
mm4
,
%%
mm2
)
SUMSUB_BA
(
%%
mm7
,
%%
mm6
)
SUMSUB_BA
(
%%
mm5
,
%%
mm4
)
SUMSUB_BA
(
%%
mm3
,
%%
mm2
)
SUMSUB_BA
(
%%
mm1
,
%%
mm0
)
::
"r"
(
block
)
);
}
static
void
ff_h264_idct8_add_mmx
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
)
{
int
i
;
int16_t
__attribute__
((
aligned
(
8
)))
b2
[
64
];
block
[
0
]
+=
32
;
for
(
i
=
0
;
i
<
2
;
i
++
){
uint64_t
tmp
;
h264_idct8_1d
(
block
+
4
*
i
);
asm
volatile
(
"movq %%mm7, %0
\n\t
"
TRANSPOSE4
(
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm6
,
%%
mm7
)
"movq %%mm0, 8(%1)
\n\t
"
"movq %%mm6, 24(%1)
\n\t
"
"movq %%mm7, 40(%1)
\n\t
"
"movq %%mm4, 56(%1)
\n\t
"
"movq %0, %%mm7
\n\t
"
TRANSPOSE4
(
%%
mm7
,
%%
mm5
,
%%
mm3
,
%%
mm1
,
%%
mm0
)
"movq %%mm7, (%1)
\n\t
"
"movq %%mm1, 16(%1)
\n\t
"
"movq %%mm0, 32(%1)
\n\t
"
"movq %%mm3, 48(%1)
\n\t
"
:
"=m"
(
tmp
)
:
"r"
(
b2
+
32
*
i
)
:
"memory"
);
}
for
(
i
=
0
;
i
<
2
;
i
++
){
h264_idct8_1d
(
b2
+
4
*
i
);
asm
volatile
(
"psraw $6, %%mm7
\n\t
"
"psraw $6, %%mm6
\n\t
"
"psraw $6, %%mm5
\n\t
"
"psraw $6, %%mm4
\n\t
"
"psraw $6, %%mm3
\n\t
"
"psraw $6, %%mm2
\n\t
"
"psraw $6, %%mm1
\n\t
"
"psraw $6, %%mm0
\n\t
"
"movq %%mm7, (%0)
\n\t
"
"movq %%mm5, 16(%0)
\n\t
"
"movq %%mm3, 32(%0)
\n\t
"
"movq %%mm1, 48(%0)
\n\t
"
"movq %%mm0, 64(%0)
\n\t
"
"movq %%mm2, 80(%0)
\n\t
"
"movq %%mm4, 96(%0)
\n\t
"
"movq %%mm6, 112(%0)
\n\t
"
::
"r"
(
b2
+
4
*
i
)
:
"memory"
);
}
add_pixels_clamped_mmx
(
b2
,
dst
,
stride
);
}
static
void
ff_h264_idct_dc_add_mmx2
(
uint8_t
*
dst
,
int16_t
*
block
,
int
stride
)
{
int
dc
=
(
block
[
0
]
+
32
)
>>
6
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment