Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
0e730494
Commit
0e730494
authored
Oct 22, 2013
by
Daniel Kang
Committed by
Diego Biurrun
Oct 23, 2013
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter: x86: Port gradfun filter optimizations to yasm
Signed-off-by:
Diego Biurrun
<
diego@biurrun.de
>
parent
2c993e8b
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
207 additions
and
218 deletions
+207
-218
Makefile
libavfilter/x86/Makefile
+2
-1
vf_gradfun.asm
libavfilter/x86/vf_gradfun.asm
+110
-0
vf_gradfun.c
libavfilter/x86/vf_gradfun.c
+0
-217
vf_gradfun_init.c
libavfilter/x86/vf_gradfun_init.c
+95
-0
No files found.
libavfilter/x86/Makefile
View file @
0e730494
OBJS-$(CONFIG_GRADFUN_FILTER)
+=
x86/vf_gradfun.o
OBJS-$(CONFIG_GRADFUN_FILTER)
+=
x86/vf_gradfun
_init
.o
OBJS-$(CONFIG_HQDN3D_FILTER)
+=
x86/vf_hqdn3d_init.o
OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume_init.o
OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif_init.o
YASM-OBJS-$(CONFIG_GRADFUN_FILTER)
+=
x86/vf_gradfun.o
YASM-OBJS-$(CONFIG_HQDN3D_FILTER)
+=
x86/vf_hqdn3d.o
YASM-OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume.o
YASM-OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif.o
libavfilter/x86/vf_gradfun.asm
0 → 100644
View file @
0e730494
;******************************************************************************
;* x86-optimized functions for gradfun filter
;*
;* This file is part of Libav.
;*
;* Libav is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* Libav is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with Libav; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86util.asm"
SECTION_RODATA
pw_7f
:
times
8
dw
0x7F
pw_ff
:
times
8
dw
0xFF
SECTION
.
text
%macro
FILTER_LINE
1
movh
m0
,
[
r2
+
r0
]
movh
m1
,
[
r3
+
r0
]
punpcklbw
m0
,
m7
punpcklwd
m1
,
m1
psllw
m0
,
7
psubw
m1
,
m0
PABSW
m2
,
m1
pmulhuw
m2
,
m5
psubw
m2
,
m6
pminsw
m2
,
m7
pmullw
m2
,
m2
psllw
m1
,
2
paddw
m0
,
%1
pmulhw
m1
,
m2
paddw
m0
,
m1
psraw
m0
,
7
packuswb
m0
,
m0
movh
[
r1
+
r0
]
,
m0
%endmacro
INIT_MMX
mmxext
cglobal
gradfun_filter_line
,
6
,
6
movh
m5
,
r4d
pxor
m7
,
m7
pshufw
m5
,
m5
,
0
mova
m6
,
[
pw_7f
]
mova
m3
,
[r5]
mova
m4
,
[
r5
+
8
]
.
loop
:
FILTER_LINE
m3
add
r0
,
4
jge
.
end
FILTER_LINE
m4
add
r0
,
4
jl
.
loop
.
end
:
REP_RET
INIT_XMM
ssse3
cglobal
gradfun_filter_line
,
6
,
6
,
8
movd
m5
,
r4d
pxor
m7
,
m7
pshuflw
m5
,
m5
,
0
mova
m6
,
[
pw_7f
]
punpcklqdq
m5
,
m5
mova
m4
,
[r5]
.
loop
:
FILTER_LINE
m4
add
r0
,
8
jl
.
loop
REP_RET
%macro
BLUR_LINE
1
cglobal
gradfun_blur_line_
%1
,
6
,
6
,
8
mova
m7
,
[
pw_ff
]
.
loop
:
%1
m0
,
[
r4
+
r0
]
%1
m1
,
[
r5
+
r0
]
mova
m2
,
m0
mova
m3
,
m1
psrlw
m0
,
8
psrlw
m1
,
8
pand
m2
,
m7
pand
m3
,
m7
paddw
m0
,
m1
paddw
m2
,
m3
paddw
m0
,
m2
paddw
m0
,
[
r2
+
r0
]
mova
m1
,
[
r1
+
r0
]
mova
[
r1
+
r0
]
,
m0
psubw
m0
,
m1
mova
[
r3
+
r0
]
,
m0
add
r0
,
16
jl
.
loop
REP_RET
%endmacro
INIT_XMM
sse2
BLUR_LINE
movdqa
BLUR_LINE
movdqu
libavfilter/x86/vf_gradfun.c
deleted
100644 → 0
View file @
2c993e8b
/*
* Copyright (C) 2009 Loren Merritt <lorenm@u.washington.edu>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavfilter/gradfun.h"
#if HAVE_INLINE_ASM
DECLARE_ALIGNED
(
16
,
static
const
uint16_t
,
pw_7f
)[
8
]
=
{
0x7F
,
0x7F
,
0x7F
,
0x7F
,
0x7F
,
0x7F
,
0x7F
,
0x7F
};
DECLARE_ALIGNED
(
16
,
static
const
uint16_t
,
pw_ff
)[
8
]
=
{
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
,
0xFF
};
#if HAVE_MMXEXT_INLINE
static
void
gradfun_filter_line_mmxext
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
width
,
int
thresh
,
const
uint16_t
*
dithers
)
{
intptr_t
x
;
if
(
width
&
3
)
{
x
=
width
&
~
3
;
ff_gradfun_filter_line_c
(
dst
+
x
,
src
+
x
,
dc
+
x
/
2
,
width
-
x
,
thresh
,
dithers
);
width
=
x
;
}
x
=
-
width
;
__asm__
volatile
(
"movd %4, %%mm5
\n
"
"pxor %%mm7, %%mm7
\n
"
"pshufw $0, %%mm5, %%mm5
\n
"
"movq %6, %%mm6
\n
"
"movq (%5), %%mm3
\n
"
"movq 8(%5), %%mm4
\n
"
"1:
\n
"
"movd (%2,%0), %%mm0
\n
"
"movd (%3,%0), %%mm1
\n
"
"punpcklbw %%mm7, %%mm0
\n
"
"punpcklwd %%mm1, %%mm1
\n
"
"psllw $7, %%mm0
\n
"
"pxor %%mm2, %%mm2
\n
"
"psubw %%mm0, %%mm1
\n
"
// delta = dc - pix
"psubw %%mm1, %%mm2
\n
"
"pmaxsw %%mm1, %%mm2
\n
"
"pmulhuw %%mm5, %%mm2
\n
"
// m = abs(delta) * thresh >> 16
"psubw %%mm6, %%mm2
\n
"
"pminsw %%mm7, %%mm2
\n
"
// m = -max(0, 127-m)
"pmullw %%mm2, %%mm2
\n
"
"paddw %%mm3, %%mm0
\n
"
// pix += dither
"psllw $2, %%mm1
\n
"
// m = m*m*delta >> 14
"pmulhw %%mm2, %%mm1
\n
"
"paddw %%mm1, %%mm0
\n
"
// pix += m
"psraw $7, %%mm0
\n
"
"packuswb %%mm0, %%mm0
\n
"
"movd %%mm0, (%1,%0)
\n
"
// dst = clip(pix>>7)
"add $4, %0
\n
"
"jnl 2f
\n
"
"movd (%2,%0), %%mm0
\n
"
"movd (%3,%0), %%mm1
\n
"
"punpcklbw %%mm7, %%mm0
\n
"
"punpcklwd %%mm1, %%mm1
\n
"
"psllw $7, %%mm0
\n
"
"pxor %%mm2, %%mm2
\n
"
"psubw %%mm0, %%mm1
\n
"
// delta = dc - pix
"psubw %%mm1, %%mm2
\n
"
"pmaxsw %%mm1, %%mm2
\n
"
"pmulhuw %%mm5, %%mm2
\n
"
// m = abs(delta) * thresh >> 16
"psubw %%mm6, %%mm2
\n
"
"pminsw %%mm7, %%mm2
\n
"
// m = -max(0, 127-m)
"pmullw %%mm2, %%mm2
\n
"
"paddw %%mm4, %%mm0
\n
"
// pix += dither
"psllw $2, %%mm1
\n
"
// m = m*m*delta >> 14
"pmulhw %%mm2, %%mm1
\n
"
"paddw %%mm1, %%mm0
\n
"
// pix += m
"psraw $7, %%mm0
\n
"
"packuswb %%mm0, %%mm0
\n
"
"movd %%mm0, (%1,%0)
\n
"
// dst = clip(pix>>7)
"add $4, %0
\n
"
"jl 1b
\n
"
"2:
\n
"
"emms
\n
"
:
"+r"
(
x
)
:
"r"
(
dst
+
width
),
"r"
(
src
+
width
),
"r"
(
dc
+
width
/
2
),
"rm"
(
thresh
),
"r"
(
dithers
),
"m"
(
*
pw_7f
)
:
"memory"
);
}
#endif
#if HAVE_SSSE3_INLINE
static
void
gradfun_filter_line_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
width
,
int
thresh
,
const
uint16_t
*
dithers
)
{
intptr_t
x
;
if
(
width
&
7
)
{
// could be 10% faster if I somehow eliminated this
x
=
width
&
~
7
;
ff_gradfun_filter_line_c
(
dst
+
x
,
src
+
x
,
dc
+
x
/
2
,
width
-
x
,
thresh
,
dithers
);
width
=
x
;
}
x
=
-
width
;
__asm__
volatile
(
"movd %4, %%xmm5
\n
"
"pxor %%xmm7, %%xmm7
\n
"
"pshuflw $0,%%xmm5, %%xmm5
\n
"
"movdqa %6, %%xmm6
\n
"
"punpcklqdq %%xmm5, %%xmm5
\n
"
"movdqa %5, %%xmm4
\n
"
"1:
\n
"
"movq (%2,%0), %%xmm0
\n
"
"movq (%3,%0), %%xmm1
\n
"
"punpcklbw %%xmm7, %%xmm0
\n
"
"punpcklwd %%xmm1, %%xmm1
\n
"
"psllw $7, %%xmm0
\n
"
"psubw %%xmm0, %%xmm1
\n
"
// delta = dc - pix
"pabsw %%xmm1, %%xmm2
\n
"
"pmulhuw %%xmm5, %%xmm2
\n
"
// m = abs(delta) * thresh >> 16
"psubw %%xmm6, %%xmm2
\n
"
"pminsw %%xmm7, %%xmm2
\n
"
// m = -max(0, 127-m)
"pmullw %%xmm2, %%xmm2
\n
"
"psllw $2, %%xmm1
\n
"
"paddw %%xmm4, %%xmm0
\n
"
// pix += dither
"pmulhw %%xmm2, %%xmm1
\n
"
// m = m*m*delta >> 14
"paddw %%xmm1, %%xmm0
\n
"
// pix += m
"psraw $7, %%xmm0
\n
"
"packuswb %%xmm0, %%xmm0
\n
"
"movq %%xmm0, (%1,%0)
\n
"
// dst = clip(pix>>7)
"add $8, %0
\n
"
"jl 1b
\n
"
:
"+&r"
(
x
)
:
"r"
(
dst
+
width
),
"r"
(
src
+
width
),
"r"
(
dc
+
width
/
2
),
"rm"
(
thresh
),
"m"
(
*
dithers
),
"m"
(
*
pw_7f
)
:
"memory"
);
}
#endif
/* HAVE_SSSE3_INLINE */
#if HAVE_SSE2_INLINE
static
void
gradfun_blur_line_sse2
(
uint16_t
*
dc
,
uint16_t
*
buf
,
uint16_t
*
buf1
,
uint8_t
*
src
,
int
src_linesize
,
int
width
)
{
#define BLURV(load)\
intptr_t x = -2*width;\
__asm__ volatile(\
"movdqa %6, %%xmm7 \n"\
"1: \n"\
load" (%4,%0), %%xmm0 \n"\
load" (%5,%0), %%xmm1 \n"\
"movdqa %%xmm0, %%xmm2 \n"\
"movdqa %%xmm1, %%xmm3 \n"\
"psrlw $8, %%xmm0 \n"\
"psrlw $8, %%xmm1 \n"\
"pand %%xmm7, %%xmm2 \n"\
"pand %%xmm7, %%xmm3 \n"\
"paddw %%xmm1, %%xmm0 \n"\
"paddw %%xmm3, %%xmm2 \n"\
"paddw %%xmm2, %%xmm0 \n"\
"paddw (%2,%0), %%xmm0 \n"\
"movdqa (%1,%0), %%xmm1 \n"\
"movdqa %%xmm0, (%1,%0) \n"\
"psubw %%xmm1, %%xmm0 \n"\
"movdqa %%xmm0, (%3,%0) \n"\
"add $16, %0 \n"\
"jl 1b \n"\
:"+&r"(x)\
:"r"(buf+width),\
"r"(buf1+width),\
"r"(dc+width),\
"r"(src+width*2),\
"r"(src+width*2+src_linesize),\
"m"(*pw_ff)\
:"memory"\
);
if
(((
intptr_t
)
src
|
src_linesize
)
&
15
)
{
BLURV
(
"movdqu"
);
}
else
{
BLURV
(
"movdqa"
);
}
}
#endif
/* HAVE_SSE2_INLINE */
#endif
/* HAVE_INLINE_ASM */
av_cold
void
ff_gradfun_init_x86
(
GradFunContext
*
gf
)
{
#if HAVE_MMXEXT_INLINE
int
cpu_flags
=
av_get_cpu_flags
();
if
(
cpu_flags
&
AV_CPU_FLAG_MMXEXT
)
gf
->
filter_line
=
gradfun_filter_line_mmxext
;
#endif
#if HAVE_SSSE3_INLINE
if
(
cpu_flags
&
AV_CPU_FLAG_SSSE3
)
gf
->
filter_line
=
gradfun_filter_line_ssse3
;
#endif
#if HAVE_SSE2_INLINE
if
(
cpu_flags
&
AV_CPU_FLAG_SSE2
)
gf
->
blur_line
=
gradfun_blur_line_sse2
;
#endif
}
libavfilter/x86/vf_gradfun_init.c
0 → 100644
View file @
0e730494
/*
* Copyright (C) 2009 Loren Merritt <lorenm@u.washington.edu>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/gradfun.h"
#if HAVE_YASM
void
ff_gradfun_filter_line_mmxext
(
intptr_t
x
,
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
thresh
,
const
uint16_t
*
dithers
);
static
void
gradfun_filter_line_mmxext
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
width
,
int
thresh
,
const
uint16_t
*
dithers
)
{
intptr_t
x
;
if
(
width
&
3
)
{
x
=
width
&
~
3
;
ff_gradfun_filter_line_c
(
dst
+
x
,
src
+
x
,
dc
+
x
/
2
,
width
-
x
,
thresh
,
dithers
);
width
=
x
;
}
x
=
-
width
;
ff_gradfun_filter_line_mmxext
(
x
,
dst
+
width
,
src
+
width
,
dc
+
width
/
2
,
thresh
,
dithers
);
}
void
ff_gradfun_filter_line_ssse3
(
intptr_t
x
,
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
thresh
,
const
uint16_t
*
dithers
);
static
void
gradfun_filter_line_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint16_t
*
dc
,
int
width
,
int
thresh
,
const
uint16_t
*
dithers
)
{
intptr_t
x
;
if
(
width
&
7
)
{
// could be 10% faster if I somehow eliminated this
x
=
width
&
~
7
;
ff_gradfun_filter_line_c
(
dst
+
x
,
src
+
x
,
dc
+
x
/
2
,
width
-
x
,
thresh
,
dithers
);
width
=
x
;
}
x
=
-
width
;
ff_gradfun_filter_line_ssse3
(
x
,
dst
+
width
,
src
+
width
,
dc
+
width
/
2
,
thresh
,
dithers
);
}
void
ff_gradfun_blur_line_movdqa_sse2
(
intptr_t
x
,
uint16_t
*
buf
,
uint16_t
*
buf1
,
uint16_t
*
dc
,
uint8_t
*
src1
,
uint8_t
*
src2
);
void
ff_gradfun_blur_line_movdqu_sse2
(
intptr_t
x
,
uint16_t
*
buf
,
uint16_t
*
buf1
,
uint16_t
*
dc
,
uint8_t
*
src1
,
uint8_t
*
src2
);
static
void
gradfun_blur_line_sse2
(
uint16_t
*
dc
,
uint16_t
*
buf
,
uint16_t
*
buf1
,
uint8_t
*
src
,
int
src_linesize
,
int
width
)
{
intptr_t
x
=
-
2
*
width
;
if
(((
intptr_t
)
src
|
src_linesize
)
&
15
)
{
ff_gradfun_blur_line_movdqu_sse2
(
x
,
buf
+
width
,
buf1
+
width
,
dc
+
width
,
src
+
width
*
2
,
src
+
width
*
2
+
src_linesize
);
}
else
{
ff_gradfun_blur_line_movdqa_sse2
(
x
,
buf
+
width
,
buf1
+
width
,
dc
+
width
,
src
+
width
*
2
,
src
+
width
*
2
+
src_linesize
);
}
}
#endif
/* HAVE_YASM */
av_cold
void
ff_gradfun_init_x86
(
GradFunContext
*
gf
)
{
#if HAVE_YASM
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
gf
->
filter_line
=
gradfun_filter_line_mmxext
;
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
gf
->
filter_line
=
gradfun_filter_line_ssse3
;
if
(
EXTERNAL_SSE2
(
cpu_flags
))
gf
->
blur_line
=
gradfun_blur_line_sse2
;
#endif
/* HAVE_YASM */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment