Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
357f45d9
Commit
357f45d9
authored
Oct 17, 2008
by
David Conrad
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MMX VP3 Loop Filter
Originally committed as revision 15630 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
75083a88
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
131 additions
and
1 deletion
+131
-1
dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c
+7
-0
dsputil_mmx.h
libavcodec/i386/dsputil_mmx.h
+18
-0
vp3dsp_mmx.c
libavcodec/i386/vp3dsp_mmx.c
+101
-0
vp3dsp_mmx.h
libavcodec/i386/vp3dsp_mmx.h
+3
-0
vp3.c
libavcodec/vp3.c
+2
-1
No files found.
libavcodec/i386/dsputil_mmx.c
View file @
357f45d9
...
...
@@ -63,7 +63,9 @@ DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
DECLARE_ALIGNED_8
(
const
uint64_t
,
ff_pb_1
)
=
0x0101010101010101ULL
;
DECLARE_ALIGNED_8
(
const
uint64_t
,
ff_pb_3
)
=
0x0303030303030303ULL
;
DECLARE_ALIGNED_8
(
const
uint64_t
,
ff_pb_7
)
=
0x0707070707070707ULL
;
DECLARE_ALIGNED_8
(
const
uint64_t
,
ff_pb_1F
)
=
0x1F1F1F1F1F1F1F1FULL
;
DECLARE_ALIGNED_8
(
const
uint64_t
,
ff_pb_3F
)
=
0x3F3F3F3F3F3F3F3FULL
;
DECLARE_ALIGNED_8
(
const
uint64_t
,
ff_pb_81
)
=
0x8181818181818181ULL
;
DECLARE_ALIGNED_8
(
const
uint64_t
,
ff_pb_A1
)
=
0xA1A1A1A1A1A1A1A1ULL
;
DECLARE_ALIGNED_8
(
const
uint64_t
,
ff_pb_FC
)
=
0xFCFCFCFCFCFCFCFCULL
;
...
...
@@ -2591,6 +2593,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
h263_v_loop_filter
=
h263_v_loop_filter_mmx
;
c
->
h263_h_loop_filter
=
h263_h_loop_filter_mmx
;
}
if
((
ENABLE_VP3_DECODER
||
ENABLE_THEORA_DECODER
)
&&
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
vp3_v_loop_filter
=
ff_vp3_v_loop_filter_mmx
;
c
->
vp3_h_loop_filter
=
ff_vp3_h_loop_filter_mmx
;
}
c
->
put_h264_chroma_pixels_tab
[
0
]
=
put_h264_chroma_mc8_mmx_rnd
;
c
->
put_h264_chroma_pixels_tab
[
1
]
=
put_h264_chroma_mc4_mmx
;
c
->
put_no_rnd_h264_chroma_pixels_tab
[
0
]
=
put_h264_chroma_mc8_mmx_nornd
;
...
...
libavcodec/i386/dsputil_mmx.h
View file @
357f45d9
...
...
@@ -50,7 +50,9 @@ extern const uint64_t ff_pw_255;
extern
const
uint64_t
ff_pb_1
;
extern
const
uint64_t
ff_pb_3
;
extern
const
uint64_t
ff_pb_7
;
extern
const
uint64_t
ff_pb_1F
;
extern
const
uint64_t
ff_pb_3F
;
extern
const
uint64_t
ff_pb_81
;
extern
const
uint64_t
ff_pb_A1
;
extern
const
uint64_t
ff_pb_FC
;
...
...
@@ -86,6 +88,22 @@ extern const double ff_pd_2[2];
SBUTTERFLY(a,c,d,dq,q)
/* a=aeim d=bfjn */
\
SBUTTERFLY(t,b,c,dq,q)
/* t=cgko c=dhlp */
// e,f,g,h can be memory
// out: a,d,t,c
#define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\
"punpcklbw " #e ", " #a " \n\t"
/* a0 e0 a1 e1 a2 e2 a3 e3 */
\
"punpcklbw " #f ", " #b " \n\t"
/* b0 f0 b1 f1 b2 f2 b3 f3 */
\
"punpcklbw " #g ", " #c " \n\t"
/* c0 g0 c1 g1 c2 g2 d3 g3 */
\
"punpcklbw " #h ", " #d " \n\t"
/* d0 h0 d1 h1 d2 h2 d3 h3 */
\
SBUTTERFLY(a, b, t, bw, q)
/* a= a0 b0 e0 f0 a1 b1 e1 f1 */
\
/* t= a2 b2 e2 f2 a3 b3 e3 f3 */
\
SBUTTERFLY(c, d, b, bw, q)
/* c= c0 d0 g0 h0 c1 d1 g1 h1 */
\
/* b= c2 d2 g2 h2 c3 d3 g3 h3 */
\
SBUTTERFLY(a, c, d, wd, q)
/* a= a0 b0 c0 d0 e0 f0 g0 h0 */
\
/* d= a1 b1 c1 d1 e1 f1 g1 h1 */
\
SBUTTERFLY(t, b, c, wd, q)
/* t= a2 b2 c2 d2 e2 f2 g2 h2 */
\
/* c= a3 b3 c3 d3 e3 f3 g3 h3 */
#ifdef ARCH_X86_64
// permutes 01234567 -> 05736421
#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
...
...
libavcodec/i386/vp3dsp_mmx.c
View file @
357f45d9
...
...
@@ -23,11 +23,112 @@
* MMX-optimized functions cribbed from the original VP3 source code.
*/
#include "libavutil/x86_cpu.h"
#include "libavcodec/dsputil.h"
#include "dsputil_mmx.h"
extern
const
uint16_t
ff_vp3_idct_data
[];
// this is off by one or two for some cases when filter_limit is greater than 63
// in: p0 in mm6, p1 in mm4, p2 in mm2, p3 in mm1
// out: p1 in mm4, p2 in mm3
#define VP3_LOOP_FILTER(flim) \
"movq %%mm6, %%mm7 \n\t" \
"pand "MANGLE(ff_pb_7 )", %%mm6 \n\t"
/* p0&7 */
\
"psrlw $3, %%mm7 \n\t" \
"pand "MANGLE(ff_pb_1F)", %%mm7 \n\t"
/* p0>>3 */
\
"movq %%mm2, %%mm3 \n\t"
/* mm3 = p2 */
\
"pxor %%mm4, %%mm2 \n\t" \
"pand "MANGLE(ff_pb_1 )", %%mm2 \n\t"
/* (p2^p1)&1 */
\
"movq %%mm2, %%mm5 \n\t" \
"paddb %%mm2, %%mm2 \n\t" \
"paddb %%mm5, %%mm2 \n\t"
/* 3*(p2^p1)&1 */
\
"paddb %%mm6, %%mm2 \n\t"
/* extra bits lost in shifts */
\
"pcmpeqb %%mm0, %%mm0 \n\t" \
"pxor %%mm0, %%mm1 \n\t"
/* 255 - p3 */
\
"pavgb %%mm2, %%mm1 \n\t"
/* (256 - p3 + extrabits) >> 1 */
\
"pxor %%mm4, %%mm0 \n\t"
/* 255 - p1 */
\
"pavgb %%mm3, %%mm0 \n\t"
/* (256 + p2-p1) >> 1 */
\
"paddb "MANGLE(ff_pb_3 )", %%mm1 \n\t" \
"pavgb %%mm0, %%mm1 \n\t"
/* 128+2+( p2-p1 - p3) >> 2 */
\
"pavgb %%mm0, %%mm1 \n\t"
/* 128+1+(3*(p2-p1) - p3) >> 3 */
\
"paddusb %%mm1, %%mm7 \n\t"
/* d+128+1 */
\
"movq "MANGLE(ff_pb_81)", %%mm6 \n\t" \
"psubusb %%mm7, %%mm6 \n\t" \
"psubusb "MANGLE(ff_pb_81)", %%mm7 \n\t" \
\
"movq "#flim", %%mm5 \n\t" \
"pminub %%mm5, %%mm6 \n\t" \
"pminub %%mm5, %%mm7 \n\t" \
"movq %%mm6, %%mm0 \n\t" \
"movq %%mm7, %%mm1 \n\t" \
"paddb %%mm6, %%mm6 \n\t" \
"paddb %%mm7, %%mm7 \n\t" \
"pminub %%mm5, %%mm6 \n\t" \
"pminub %%mm5, %%mm7 \n\t" \
"psubb %%mm0, %%mm6 \n\t" \
"psubb %%mm1, %%mm7 \n\t" \
"paddusb %%mm7, %%mm4 \n\t" \
"psubusb %%mm6, %%mm4 \n\t" \
"psubusb %%mm7, %%mm3 \n\t" \
"paddusb %%mm6, %%mm3 \n\t"
#define STORE_4_WORDS(dst0, dst1, dst2, dst3, mm) \
"movd "#mm", %0 \n\t" \
"movw %w0, -1"#dst0" \n\t" \
"psrlq $32, "#mm" \n\t" \
"shr $16, %0 \n\t" \
"movw %w0, -1"#dst1" \n\t" \
"movd "#mm", %0 \n\t" \
"movw %w0, -1"#dst2" \n\t" \
"shr $16, %0 \n\t" \
"movw %w0, -1"#dst3" \n\t"
void
ff_vp3_v_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
)
{
__asm__
volatile
(
"movq %0, %%mm6
\n\t
"
"movq %1, %%mm4
\n\t
"
"movq %2, %%mm2
\n\t
"
"movq %3, %%mm1
\n\t
"
VP3_LOOP_FILTER
(
%
4
)
"movq %%mm4, %1
\n\t
"
"movq %%mm3, %2
\n\t
"
:
"+m"
(
*
(
uint64_t
*
)(
src
-
2
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
src
-
1
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
src
+
0
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
src
+
1
*
stride
))
:
"m"
(
*
(
uint64_t
*
)(
bounding_values
+
129
))
);
}
void
ff_vp3_h_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
)
{
x86_reg
tmp
;
__asm__
volatile
(
"movd -2(%1), %%mm6
\n\t
"
"movd -2(%1,%3), %%mm0
\n\t
"
"movd -2(%1,%3,2), %%mm1
\n\t
"
"movd -2(%1,%4), %%mm4
\n\t
"
TRANSPOSE8x4
(
%%
mm6
,
%%
mm0
,
%%
mm1
,
%%
mm4
,
-
2
(
%
2
),
-
2
(
%
2
,
%
3
),
-
2
(
%
2
,
%
3
,
2
),
-
2
(
%
2
,
%
4
),
%%
mm2
)
VP3_LOOP_FILTER
(
%
5
)
SBUTTERFLY
(
%%
mm4
,
%%
mm3
,
%%
mm5
,
bw
,
q
)
STORE_4_WORDS
((
%
1
),
(
%
1
,
%
3
),
(
%
1
,
%
3
,
2
),
(
%
1
,
%
4
),
%%
mm4
)
STORE_4_WORDS
((
%
2
),
(
%
2
,
%
3
),
(
%
2
,
%
3
,
2
),
(
%
2
,
%
4
),
%%
mm5
)
:
"=&r"
(
tmp
)
:
"r"
(
src
),
"r"
(
src
+
4
*
stride
),
"r"
((
x86_reg
)
stride
),
"r"
((
x86_reg
)
3
*
stride
),
"m"
(
*
(
uint64_t
*
)(
bounding_values
+
129
))
:
"memory"
);
}
/* from original comments: The Macro does IDct on 4 1-D Dcts */
#define BeginIDCT() \
"movq "I(3)", %%mm2 \n\t" \
...
...
libavcodec/i386/vp3dsp_mmx.h
View file @
357f45d9
...
...
@@ -29,4 +29,7 @@ void ff_vp3_idct_mmx(int16_t *data);
void
ff_vp3_idct_put_mmx
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_vp3_idct_add_mmx
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_vp3_v_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
);
void
ff_vp3_h_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
);
#endif
/* AVCODEC_I386_VP3DSP_MMX_H */
libavcodec/vp3.c
View file @
357f45d9
...
...
@@ -229,7 +229,7 @@ typedef struct Vp3DecodeContext {
uint16_t
huffman_table
[
80
][
32
][
2
];
uint8_t
filter_limit_values
[
64
];
int
bounding_values_array
[
256
]
;
DECLARE_ALIGNED_8
(
int
,
bounding_values_array
[
256
+
2
])
;
}
Vp3DecodeContext
;
/************************************************************************
...
...
@@ -533,6 +533,7 @@ static void init_loop_filter(Vp3DecodeContext *s)
bounding_values
[
x
]
=
x
;
bounding_values
[
x
+
filter_limit
]
=
filter_limit
-
x
;
}
bounding_values
[
129
]
=
bounding_values
[
130
]
=
filter_limit
*
0x02020202
;
}
/*
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment