Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
bcc22352
Commit
bcc22352
authored
Feb 14, 2016
by
Timothy Gu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86/vc1dsp: Port vc1_*_hor_16b_shift2 to NASM format
Reviewed-by:
Christophe Gisquet
<
christophe.gisquet@gmail.com
>
parent
ebf648d4
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
98 additions
and
53 deletions
+98
-53
vc1dsp.asm
libavcodec/x86/vc1dsp.asm
+90
-0
vc1dsp_mmx.c
libavcodec/x86/vc1dsp_mmx.c
+8
-53
No files found.
libavcodec/x86/vc1dsp.asm
View file @
bcc22352
...
...
@@ -25,6 +25,7 @@
cextern
pw_4
cextern
pw_5
cextern
pw_9
cextern
pw_128
section
.
text
...
...
@@ -319,6 +320,44 @@ cglobal vc1_h_loop_filter8, 3,5,8
RET
%if
HAVE_MMX_INLINE
; XXX some of these macros are not used right now, but they will in the future
; when more functions are ported.
%macro
OP_PUT
2
; dst, src
%endmacro
%macro
OP_AVG
2
; dst, src
pavgb
%1
,
%2
%endmacro
%macro
NORMALIZE_MMX
1
; shift
paddw
m3
,
m7
; +bias-r
paddw
m4
,
m7
; +bias-r
psraw
m3
,
%1
psraw
m4
,
%1
%endmacro
%macro
TRANSFER_DO_PACK
2
; op, dst
packuswb
m3
,
m4
%1
m3
,
[
%2
]
mova
[
%2
]
,
m3
%endmacro
%macro
TRANSFER_DONT_PACK
2
; op, dst
%1
m3
,
[
%2
]
%1
m3
,
[
%2
+
mmsize
]
mova
[
%2
]
,
m3
mova
[
mmsize
+
%2
]
,
m4
%endmacro
; see MSPEL_FILTER13_CORE for use as UNPACK macro
%macro
DO_UNPACK
1
; reg
punpcklbw
%1
,
m0
%endmacro
%macro
DONT_UNPACK
1
; reg
%endmacro
; Compute the rounder 32-r or 8-r and unpacks it to m7
%macro
LOAD_ROUNDER_MMX
1
; round
movd
m7
,
%1
...
...
@@ -394,6 +433,57 @@ cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride
dec
i
jnz
.
loop
REP_RET
%undef
rnd
%undef
shift
%undef
stride_neg2
%undef
stride_9minus4
%undef
i
; void ff_vc1_*_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,
; const int16_t *src, int rnd);
; Data is already unpacked, so some operations can directly be made from
; memory.
%macro
HOR_16B_SHIFT2
2
; op, opname
cglobal
vc1_
%2
_hor_16b_shift2
,
4
,
5
,
0
,
dst
,
stride
,
src
,
rnd
,
h
mov
hq
,
8
sub
srcq
,
2
sub
rndd
,
(
-
1
+
9
+
9
-
1
)
*
1024
; add -1024 bias
LOAD_ROUNDER_MMX
rndq
mova
m5
,
[
pw_9
]
mova
m6
,
[
pw_128
]
pxor
m0
,
m0
.
loop
:
mova
m1
,
[
srcq
+
2
*
0
]
mova
m2
,
[
srcq
+
2
*
0
+
mmsize
]
mova
m3
,
[
srcq
+
2
*
1
]
mova
m4
,
[
srcq
+
2
*
1
+
mmsize
]
paddw
m3
,
[
srcq
+
2
*
2
]
paddw
m4
,
[
srcq
+
2
*
2
+
mmsize
]
paddw
m1
,
[
srcq
+
2
*
3
]
paddw
m2
,
[
srcq
+
2
*
3
+
mmsize
]
pmullw
m3
,
m5
pmullw
m4
,
m5
psubw
m3
,
m1
psubw
m4
,
m2
NORMALIZE_MMX
7
; remove bias
paddw
m3
,
m6
paddw
m4
,
m6
TRANSFER_DO_PACK
%1
,
dstq
add
srcq
,
24
add
dstq
,
strideq
dec
hq
jnz
.
loop
RET
%endmacro
INIT_MMX
mmx
HOR_16B_SHIFT2
OP_PUT
,
put
INIT_MMX
mmxext
HOR_16B_SHIFT2
OP_AVG
,
avg
%endif
; HAVE_MMX_INLINE
%macro
INV_TRANS_INIT
0
...
...
libavcodec/x86/vc1dsp_mmx.c
View file @
bcc22352
...
...
@@ -38,6 +38,10 @@
void
ff_vc1_put_ver_16b_shift2_mmx
(
int16_t
*
dst
,
const
uint8_t
*
src
,
x86_reg
stride
,
int
rnd
,
int64_t
shift
);
void
ff_vc1_put_hor_16b_shift2_mmx
(
uint8_t
*
dst
,
x86_reg
stride
,
const
int16_t
*
src
,
int
rnd
);
void
ff_vc1_avg_hor_16b_shift2_mmxext
(
uint8_t
*
dst
,
x86_reg
stride
,
const
int16_t
*
src
,
int
rnd
);
#define OP_PUT(S,D)
#define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t"
...
...
@@ -70,55 +74,6 @@ void ff_vc1_put_ver_16b_shift2_mmx(int16_t *dst,
"punpcklwd %%mm7, %%mm7 \n\t" \
"punpckldq %%mm7, %%mm7 \n\t"
/**
* Data is already unpacked, so some operations can directly be made from
* memory.
*/
#define VC1_HOR_16b_SHIFT2(OP, OPNAME)\
static void OPNAME ## vc1_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride,\
const int16_t *src, int rnd)\
{\
int h = 8;\
\
src -= 1;\
rnd -= (-1+9+9-1)*1024;
/* Add -1024 bias */
\
__asm__ volatile(\
LOAD_ROUNDER_MMX("%4")\
"movq "MANGLE(ff_pw_128)", %%mm6\n\t"\
"movq "MANGLE(ff_pw_9)", %%mm5 \n\t"\
"1: \n\t"\
"movq 2*0+0(%1), %%mm1 \n\t"\
"movq 2*0+8(%1), %%mm2 \n\t"\
"movq 2*1+0(%1), %%mm3 \n\t"\
"movq 2*1+8(%1), %%mm4 \n\t"\
"paddw 2*3+0(%1), %%mm1 \n\t"\
"paddw 2*3+8(%1), %%mm2 \n\t"\
"paddw 2*2+0(%1), %%mm3 \n\t"\
"paddw 2*2+8(%1), %%mm4 \n\t"\
"pmullw %%mm5, %%mm3 \n\t"\
"pmullw %%mm5, %%mm4 \n\t"\
"psubw %%mm1, %%mm3 \n\t"\
"psubw %%mm2, %%mm4 \n\t"\
NORMALIZE_MMX("$7")\
/* Remove bias */
\
"paddw %%mm6, %%mm3 \n\t"\
"paddw %%mm6, %%mm4 \n\t"\
TRANSFER_DO_PACK(OP)\
"add $24, %1 \n\t"\
"add %3, %2 \n\t"\
"decl %0 \n\t"\
"jnz 1b \n\t"\
: "+r"(h), "+r" (src), "+r" (dst)\
: "r"(stride), "m"(rnd)\
NAMED_CONSTRAINTS_ADD(ff_pw_128,ff_pw_9)\
: "memory"\
);\
}
VC1_HOR_16b_SHIFT2
(
OP_PUT
,
put_
)
VC1_HOR_16b_SHIFT2
(
OP_AVG
,
avg_
)
/**
* Purely vertical or horizontal 1/2 shift interpolation.
* Sacrify mm6 for *9 factor.
...
...
@@ -380,14 +335,14 @@ typedef void (*vc1_mspel_mc_filter_8bits)(uint8_t *dst, const uint8_t *src, x86_
* @param hmode Vertical filter.
* @param rnd Rounding bias.
*/
#define VC1_MSPEL_MC(OP)\
#define VC1_MSPEL_MC(OP
, INSTR
)\
static void OP ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride,\
int hmode, int vmode, int rnd)\
{\
static const vc1_mspel_mc_filter_ver_16bits vc1_put_shift_ver_16bits[] =\
{ NULL, vc1_put_ver_16b_shift1_mmx, ff_vc1_put_ver_16b_shift2_mmx, vc1_put_ver_16b_shift3_mmx };\
static const vc1_mspel_mc_filter_hor_16bits vc1_put_shift_hor_16bits[] =\
{ NULL, OP ## vc1_hor_16b_shift1_mmx,
OP ## vc1_hor_16b_shift2_mmx
, OP ## vc1_hor_16b_shift3_mmx };\
{ NULL, OP ## vc1_hor_16b_shift1_mmx,
ff_vc1_ ## OP ## hor_16b_shift2_ ## INSTR
, OP ## vc1_hor_16b_shift3_mmx };\
static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] =\
{ NULL, OP ## vc1_shift1_mmx, OP ## vc1_shift2_mmx, OP ## vc1_shift3_mmx };\
\
...
...
@@ -428,8 +383,8 @@ static void OP ## vc1_mspel_mc_16(uint8_t *dst, const uint8_t *src, \
OP ## vc1_mspel_mc(dst + 8, src + 8, stride, hmode, vmode, rnd); \
}
VC1_MSPEL_MC
(
put_
)
VC1_MSPEL_MC
(
avg_
)
VC1_MSPEL_MC
(
put_
,
mmx
)
VC1_MSPEL_MC
(
avg_
,
mmxext
)
/** Macro to ease bicubic filter interpolation functions declarations */
#define DECLARE_FUNCTION(a, b) \
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment