Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
5740dc27
Commit
5740dc27
authored
Oct 07, 2015
by
Paul B Mahol
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter/vf_w3fdif: add x86 SIMD
Signed-off-by:
Paul B Mahol
<
onemda@gmail.com
>
parent
0281ef90
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
440 additions
and
37 deletions
+440
-37
vf_w3fdif.c
libavfilter/vf_w3fdif.c
+95
-37
w3fdif.h
libavfilter/w3fdif.h
+47
-0
Makefile
libavfilter/x86/Makefile
+2
-0
vf_w3fdif.asm
libavfilter/x86/vf_w3fdif.asm
+234
-0
vf_w3fdif_init.c
libavfilter/x86/vf_w3fdif_init.c
+62
-0
No files found.
libavfilter/vf_w3fdif.c
View file @
5740dc27
...
@@ -29,6 +29,7 @@
...
@@ -29,6 +29,7 @@
#include "formats.h"
#include "formats.h"
#include "internal.h"
#include "internal.h"
#include "video.h"
#include "video.h"
#include "w3fdif.h"
typedef
struct
W3FDIFContext
{
typedef
struct
W3FDIFContext
{
const
AVClass
*
class
;
const
AVClass
*
class
;
...
@@ -42,6 +43,8 @@ typedef struct W3FDIFContext {
...
@@ -42,6 +43,8 @@ typedef struct W3FDIFContext {
AVFrame
*
prev
,
*
cur
,
*
next
;
///< previous, current, next frames
AVFrame
*
prev
,
*
cur
,
*
next
;
///< previous, current, next frames
int32_t
**
work_line
;
///< lines we are calculating
int32_t
**
work_line
;
///< lines we are calculating
int
nb_threads
;
int
nb_threads
;
W3FDIFDSPContext
dsp
;
}
W3FDIFContext
;
}
W3FDIFContext
;
#define OFFSET(x) offsetof(W3FDIFContext, x)
#define OFFSET(x) offsetof(W3FDIFContext, x)
...
@@ -81,6 +84,78 @@ static int query_formats(AVFilterContext *ctx)
...
@@ -81,6 +84,78 @@ static int query_formats(AVFilterContext *ctx)
return
ff_set_common_formats
(
ctx
,
fmts_list
);
return
ff_set_common_formats
(
ctx
,
fmts_list
);
}
}
static
void
filter_simple_low
(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
2
],
const
int16_t
*
coef
,
int
linesize
)
{
int
i
;
for
(
i
=
0
;
i
<
linesize
;
i
++
)
{
*
work_line
=
*
in_lines_cur
[
0
]
++
*
coef
[
0
];
*
work_line
++
+=
*
in_lines_cur
[
1
]
++
*
coef
[
1
];
}
}
static
void
filter_complex_low
(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
4
],
const
int16_t
*
coef
,
int
linesize
)
{
int
i
;
for
(
i
=
0
;
i
<
linesize
;
i
++
)
{
*
work_line
=
*
in_lines_cur
[
0
]
++
*
coef
[
0
];
*
work_line
+=
*
in_lines_cur
[
1
]
++
*
coef
[
1
];
*
work_line
+=
*
in_lines_cur
[
2
]
++
*
coef
[
2
];
*
work_line
++
+=
*
in_lines_cur
[
3
]
++
*
coef
[
3
];
}
}
static
void
filter_simple_high
(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
3
],
uint8_t
*
in_lines_adj
[
3
],
const
int16_t
*
coef
,
int
linesize
)
{
int
i
;
for
(
i
=
0
;
i
<
linesize
;
i
++
)
{
*
work_line
+=
*
in_lines_cur
[
0
]
++
*
coef
[
0
];
*
work_line
+=
*
in_lines_adj
[
0
]
++
*
coef
[
0
];
*
work_line
+=
*
in_lines_cur
[
1
]
++
*
coef
[
1
];
*
work_line
+=
*
in_lines_adj
[
1
]
++
*
coef
[
1
];
*
work_line
+=
*
in_lines_cur
[
2
]
++
*
coef
[
2
];
*
work_line
++
+=
*
in_lines_adj
[
2
]
++
*
coef
[
2
];
}
}
static
void
filter_complex_high
(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
5
],
uint8_t
*
in_lines_adj
[
5
],
const
int16_t
*
coef
,
int
linesize
)
{
int
i
;
for
(
i
=
0
;
i
<
linesize
;
i
++
)
{
*
work_line
+=
*
in_lines_cur
[
0
]
++
*
coef
[
0
];
*
work_line
+=
*
in_lines_adj
[
0
]
++
*
coef
[
0
];
*
work_line
+=
*
in_lines_cur
[
1
]
++
*
coef
[
1
];
*
work_line
+=
*
in_lines_adj
[
1
]
++
*
coef
[
1
];
*
work_line
+=
*
in_lines_cur
[
2
]
++
*
coef
[
2
];
*
work_line
+=
*
in_lines_adj
[
2
]
++
*
coef
[
2
];
*
work_line
+=
*
in_lines_cur
[
3
]
++
*
coef
[
3
];
*
work_line
+=
*
in_lines_adj
[
3
]
++
*
coef
[
3
];
*
work_line
+=
*
in_lines_cur
[
4
]
++
*
coef
[
4
];
*
work_line
++
+=
*
in_lines_adj
[
4
]
++
*
coef
[
4
];
}
}
static
void
filter_scale
(
uint8_t
*
out_pixel
,
const
int32_t
*
work_pixel
,
int
linesize
)
{
int
j
;
for
(
j
=
0
;
j
<
linesize
;
j
++
,
out_pixel
++
,
work_pixel
++
)
*
out_pixel
=
av_clip
(
*
work_pixel
,
0
,
255
*
256
*
128
)
>>
15
;
}
static
int
config_input
(
AVFilterLink
*
inlink
)
static
int
config_input
(
AVFilterLink
*
inlink
)
{
{
AVFilterContext
*
ctx
=
inlink
->
dst
;
AVFilterContext
*
ctx
=
inlink
->
dst
;
...
@@ -101,11 +176,20 @@ static int config_input(AVFilterLink *inlink)
...
@@ -101,11 +176,20 @@ static int config_input(AVFilterLink *inlink)
return
AVERROR
(
ENOMEM
);
return
AVERROR
(
ENOMEM
);
for
(
i
=
0
;
i
<
s
->
nb_threads
;
i
++
)
{
for
(
i
=
0
;
i
<
s
->
nb_threads
;
i
++
)
{
s
->
work_line
[
i
]
=
av_calloc
(
s
->
linesize
[
0
]
,
sizeof
(
*
s
->
work_line
[
0
]));
s
->
work_line
[
i
]
=
av_calloc
(
FFALIGN
(
s
->
linesize
[
0
],
32
)
,
sizeof
(
*
s
->
work_line
[
0
]));
if
(
!
s
->
work_line
[
i
])
if
(
!
s
->
work_line
[
i
])
return
AVERROR
(
ENOMEM
);
return
AVERROR
(
ENOMEM
);
}
}
s
->
dsp
.
filter_simple_low
=
filter_simple_low
;
s
->
dsp
.
filter_complex_low
=
filter_complex_low
;
s
->
dsp
.
filter_simple_high
=
filter_simple_high
;
s
->
dsp
.
filter_complex_high
=
filter_complex_high
;
s
->
dsp
.
filter_scale
=
filter_scale
;
if
(
ARCH_X86
)
ff_w3fdif_init_x86
(
&
s
->
dsp
);
return
0
;
return
0
;
}
}
...
@@ -163,7 +247,7 @@ static int deinterlace_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
...
@@ -163,7 +247,7 @@ static int deinterlace_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
const
int
dst_line_stride
=
out
->
linesize
[
plane
];
const
int
dst_line_stride
=
out
->
linesize
[
plane
];
const
int
start
=
(
height
*
jobnr
)
/
nb_jobs
;
const
int
start
=
(
height
*
jobnr
)
/
nb_jobs
;
const
int
end
=
(
height
*
(
jobnr
+
1
))
/
nb_jobs
;
const
int
end
=
(
height
*
(
jobnr
+
1
))
/
nb_jobs
;
int
i
,
j
,
y_in
,
y_out
;
int
j
,
y_in
,
y_out
;
/* copy unchanged the lines of the field */
/* copy unchanged the lines of the field */
y_out
=
start
+
(
s
->
field
==
cur
->
top_field_first
)
-
(
start
&
1
);
y_out
=
start
+
(
s
->
field
==
cur
->
top_field_first
)
-
(
start
&
1
);
...
@@ -184,9 +268,6 @@ static int deinterlace_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
...
@@ -184,9 +268,6 @@ static int deinterlace_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
out_line
=
dst_data
+
(
y_out
*
dst_line_stride
);
out_line
=
dst_data
+
(
y_out
*
dst_line_stride
);
while
(
y_out
<
end
)
{
while
(
y_out
<
end
)
{
/* clear workspace */
memset
(
s
->
work_line
[
jobnr
],
0
,
sizeof
(
*
s
->
work_line
[
jobnr
])
*
linesize
);
/* get low vertical frequencies from current field */
/* get low vertical frequencies from current field */
for
(
j
=
0
;
j
<
n_coef_lf
[
filter
];
j
++
)
{
for
(
j
=
0
;
j
<
n_coef_lf
[
filter
];
j
++
)
{
y_in
=
(
y_out
+
1
)
+
(
j
*
2
)
-
n_coef_lf
[
filter
];
y_in
=
(
y_out
+
1
)
+
(
j
*
2
)
-
n_coef_lf
[
filter
];
...
@@ -202,18 +283,12 @@ static int deinterlace_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
...
@@ -202,18 +283,12 @@ static int deinterlace_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
work_line
=
s
->
work_line
[
jobnr
];
work_line
=
s
->
work_line
[
jobnr
];
switch
(
n_coef_lf
[
filter
])
{
switch
(
n_coef_lf
[
filter
])
{
case
2
:
case
2
:
for
(
i
=
0
;
i
<
linesize
;
i
++
)
{
s
->
dsp
.
filter_simple_low
(
work_line
,
in_lines_cur
,
*
work_line
+=
*
in_lines_cur
[
0
]
++
*
coef_lf
[
filter
][
0
];
coef_lf
[
filter
],
linesize
);
*
work_line
++
+=
*
in_lines_cur
[
1
]
++
*
coef_lf
[
filter
][
1
];
}
break
;
break
;
case
4
:
case
4
:
for
(
i
=
0
;
i
<
linesize
;
i
++
)
{
s
->
dsp
.
filter_complex_low
(
work_line
,
in_lines_cur
,
*
work_line
+=
*
in_lines_cur
[
0
]
++
*
coef_lf
[
filter
][
0
];
coef_lf
[
filter
],
linesize
);
*
work_line
+=
*
in_lines_cur
[
1
]
++
*
coef_lf
[
filter
][
1
];
*
work_line
+=
*
in_lines_cur
[
2
]
++
*
coef_lf
[
filter
][
2
];
*
work_line
++
+=
*
in_lines_cur
[
3
]
++
*
coef_lf
[
filter
][
3
];
}
}
}
/* get high vertical frequencies from adjacent fields */
/* get high vertical frequencies from adjacent fields */
...
@@ -232,36 +307,19 @@ static int deinterlace_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
...
@@ -232,36 +307,19 @@ static int deinterlace_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_
work_line
=
s
->
work_line
[
jobnr
];
work_line
=
s
->
work_line
[
jobnr
];
switch
(
n_coef_hf
[
filter
])
{
switch
(
n_coef_hf
[
filter
])
{
case
3
:
case
3
:
for
(
i
=
0
;
i
<
linesize
;
i
++
)
{
s
->
dsp
.
filter_simple_high
(
work_line
,
in_lines_cur
,
in_lines_adj
,
*
work_line
+=
*
in_lines_cur
[
0
]
++
*
coef_hf
[
filter
][
0
];
coef_hf
[
filter
],
linesize
);
*
work_line
+=
*
in_lines_adj
[
0
]
++
*
coef_hf
[
filter
][
0
];
*
work_line
+=
*
in_lines_cur
[
1
]
++
*
coef_hf
[
filter
][
1
];
*
work_line
+=
*
in_lines_adj
[
1
]
++
*
coef_hf
[
filter
][
1
];
*
work_line
+=
*
in_lines_cur
[
2
]
++
*
coef_hf
[
filter
][
2
];
*
work_line
++
+=
*
in_lines_adj
[
2
]
++
*
coef_hf
[
filter
][
2
];
}
break
;
break
;
case
5
:
case
5
:
for
(
i
=
0
;
i
<
linesize
;
i
++
)
{
s
->
dsp
.
filter_complex_high
(
work_line
,
in_lines_cur
,
in_lines_adj
,
*
work_line
+=
*
in_lines_cur
[
0
]
++
*
coef_hf
[
filter
][
0
];
coef_hf
[
filter
],
linesize
);
*
work_line
+=
*
in_lines_adj
[
0
]
++
*
coef_hf
[
filter
][
0
];
*
work_line
+=
*
in_lines_cur
[
1
]
++
*
coef_hf
[
filter
][
1
];
*
work_line
+=
*
in_lines_adj
[
1
]
++
*
coef_hf
[
filter
][
1
];
*
work_line
+=
*
in_lines_cur
[
2
]
++
*
coef_hf
[
filter
][
2
];
*
work_line
+=
*
in_lines_adj
[
2
]
++
*
coef_hf
[
filter
][
2
];
*
work_line
+=
*
in_lines_cur
[
3
]
++
*
coef_hf
[
filter
][
3
];
*
work_line
+=
*
in_lines_adj
[
3
]
++
*
coef_hf
[
filter
][
3
];
*
work_line
+=
*
in_lines_cur
[
4
]
++
*
coef_hf
[
filter
][
4
];
*
work_line
++
+=
*
in_lines_adj
[
4
]
++
*
coef_hf
[
filter
][
4
];
}
}
}
/* save scaled result to the output frame, scaling down by 256 * 128 */
/* save scaled result to the output frame, scaling down by 256 * 128 */
work_pixel
=
s
->
work_line
[
jobnr
];
work_pixel
=
s
->
work_line
[
jobnr
];
out_pixel
=
out_line
;
out_pixel
=
out_line
;
for
(
j
=
0
;
j
<
linesize
;
j
++
,
out_pixel
++
,
work_pixel
++
)
s
->
dsp
.
filter_scale
(
out_pixel
,
work_pixel
,
linesize
);
*
out_pixel
=
av_clip
(
*
work_pixel
,
0
,
255
*
256
*
128
)
>>
15
;
/* move on to next line */
/* move on to next line */
y_out
+=
2
;
y_out
+=
2
;
...
...
libavfilter/w3fdif.h
0 → 100644
View file @
5740dc27
/*
* Copyright (c) 2015 Paul B Mahol
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFILTER_W3FDIF_H
#define AVFILTER_W3FDIF_H
#include <stddef.h>
#include <stdint.h>
typedef
struct
W3FDIFDSPContext
{
void
(
*
filter_simple_low
)(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
2
],
const
int16_t
*
coef
,
int
linesize
);
void
(
*
filter_complex_low
)(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
4
],
const
int16_t
*
coef
,
int
linesize
);
void
(
*
filter_simple_high
)(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
3
],
uint8_t
*
in_lines_adj
[
3
],
const
int16_t
*
coef
,
int
linesize
);
void
(
*
filter_complex_high
)(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
5
],
uint8_t
*
in_lines_adj
[
5
],
const
int16_t
*
coef
,
int
linesize
);
void
(
*
filter_scale
)(
uint8_t
*
out_pixel
,
const
int32_t
*
work_pixel
,
int
linesize
);
}
W3FDIFDSPContext
;
void
ff_w3fdif_init_x86
(
W3FDIFDSPContext
*
dsp
);
#endif
/* AVFILTER_W3FDIF_H */
libavfilter/x86/Makefile
View file @
5740dc27
...
@@ -17,6 +17,7 @@ OBJS-$(CONFIG_STEREO3D_FILTER) += x86/vf_stereo3d_init.o
...
@@ -17,6 +17,7 @@ OBJS-$(CONFIG_STEREO3D_FILTER) += x86/vf_stereo3d_init.o
OBJS-$(CONFIG_TBLEND_FILTER)
+=
x86/vf_blend_init.o
OBJS-$(CONFIG_TBLEND_FILTER)
+=
x86/vf_blend_init.o
OBJS-$(CONFIG_TINTERLACE_FILTER)
+=
x86/vf_tinterlace_init.o
OBJS-$(CONFIG_TINTERLACE_FILTER)
+=
x86/vf_tinterlace_init.o
OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume_init.o
OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume_init.o
OBJS-$(CONFIG_W3FDIF_FILTER)
+=
x86/vf_w3fdif_init.o
OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif_init.o
OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif_init.o
YASM-OBJS-$(CONFIG_BLEND_FILTER)
+=
x86/vf_blend.o
YASM-OBJS-$(CONFIG_BLEND_FILTER)
+=
x86/vf_blend.o
...
@@ -37,4 +38,5 @@ YASM-OBJS-$(CONFIG_STEREO3D_FILTER) += x86/vf_stereo3d.o
...
@@ -37,4 +38,5 @@ YASM-OBJS-$(CONFIG_STEREO3D_FILTER) += x86/vf_stereo3d.o
YASM-OBJS-$(CONFIG_TBLEND_FILTER)
+=
x86/vf_blend.o
YASM-OBJS-$(CONFIG_TBLEND_FILTER)
+=
x86/vf_blend.o
YASM-OBJS-$(CONFIG_TINTERLACE_FILTER)
+=
x86/vf_interlace.o
YASM-OBJS-$(CONFIG_TINTERLACE_FILTER)
+=
x86/vf_interlace.o
YASM-OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume.o
YASM-OBJS-$(CONFIG_VOLUME_FILTER)
+=
x86/af_volume.o
YASM-OBJS-$(CONFIG_W3FDIF_FILTER)
+=
x86/vf_w3fdif.o
YASM-OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif.o
x86/yadif-16.o
x86/yadif-10.o
YASM-OBJS-$(CONFIG_YADIF_FILTER)
+=
x86/vf_yadif.o
x86/yadif-16.o
x86/yadif-10.o
libavfilter/x86/vf_w3fdif.asm
0 → 100644
View file @
5740dc27
;*****************************************************************************
;* x86-optimized functions for w3fdif filter
;*
;* Copyright (c) 2015 Paul B Mahol
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86util.asm"
SECTION
.
text
INIT_XMM
sse2
cglobal
w3fdif_scale
,
3
,
3
,
2
,
0
,
out_pixel
,
work_pixel
,
linesize
.
loop
mova
m0
,
[
work_pixelq
]
mova
m1
,
[
work_pixelq
+
mmsize
]
psrad
m0
,
15
psrad
m1
,
15
packssdw
m0
,
m1
packuswb
m0
,
m0
movh
[
out_pixelq
]
,
m0
add
out_pixelq
,
mmsize
/
2
add
work_pixelq
,
mmsize
*
2
sub
linesized
,
mmsize
/
2
jg
.
loop
REP_RET
cglobal
w3fdif_simple_low
,
4
,
5
,
6
,
0
,
work_line
,
in_lines_cur0
,
coef
,
linesize
,
offset
movd
m1
,
[coefq]
DEFINE_ARGS
work_line
,
in_lines_cur0
,
in_lines_cur1
,
linesize
,
offset
SPLATW
m0
,
m1
,
0
SPLATW
m1
,
m1
,
1
pxor
m4
,
m4
mov
offsetq
,
0
mov
in_lines_cur1q
,
[
in_lines_cur0q
+
gprsize
]
mov
in_lines_cur0q
,
[
in_lines_cur0q
]
.
loop
movh
m2
,
[
in_lines_cur0q
+
offsetq
]
movh
m3
,
[
in_lines_cur1q
+
offsetq
]
punpcklbw
m2
,
m4
punpcklbw
m3
,
m4
SBUTTERFLY
wd
,
2
,
3
,
5
pmaddwd
m2
,
m0
pmaddwd
m3
,
m1
mova
[
work_lineq
+
offsetq
*
4
]
,
m2
mova
[
work_lineq
+
offsetq
*
4
+
mmsize
]
,
m3
add
offsetq
,
mmsize
/
2
sub
linesized
,
mmsize
/
2
jg
.
loop
REP_RET
cglobal
w3fdif_complex_low
,
4
,
7
,
7
,
0
,
work_line
,
in_lines_cur0
,
coef
,
linesize
movq
m0
,
[coefq]
DEFINE_ARGS
work_line
,
in_lines_cur0
,
in_lines_cur1
,
linesize
,
offset
,
in_lines_cur2
,
in_lines_cur3
pshufd
m2
,
m0
,
q1111
SPLATD
m0
mov
offsetq
,
0
mov
in_lines_cur3q
,
[
in_lines_cur0q
+
gprsize
*
3
]
mov
in_lines_cur2q
,
[
in_lines_cur0q
+
gprsize
*
2
]
mov
in_lines_cur1q
,
[
in_lines_cur0q
+
gprsize
]
mov
in_lines_cur0q
,
[
in_lines_cur0q
]
.
loop
movh
m4
,
[
in_lines_cur0q
+
offsetq
]
movh
m5
,
[
in_lines_cur1q
+
offsetq
]
pxor
m1
,
m1
punpcklbw
m4
,
m1
punpcklbw
m5
,
m1
SBUTTERFLY
wd
,
4
,
5
,
3
pmaddwd
m4
,
m0
pmaddwd
m5
,
m0
movh
m6
,
[
in_lines_cur2q
+
offsetq
]
movh
m3
,
[
in_lines_cur3q
+
offsetq
]
punpcklbw
m6
,
m1
punpcklbw
m3
,
m1
SBUTTERFLY
wd
,
6
,
3
,
1
pmaddwd
m6
,
m2
pmaddwd
m3
,
m2
paddd
m4
,
m6
paddd
m5
,
m3
mova
[
work_lineq
+
offsetq
*
4
]
,
m4
mova
[
work_lineq
+
offsetq
*
4
+
mmsize
]
,
m5
add
offsetq
,
mmsize
/
2
sub
linesized
,
mmsize
/
2
jg
.
loop
REP_RET
%if
ARCH_X86_64
cglobal
w3fdif_simple_high
,
5
,
9
,
9
,
0
,
work_line
,
in_lines_cur0
,
in_lines_adj0
,
coef
,
linesize
movq
m2
,
[coefq]
DEFINE_ARGS
work_line
,
in_lines_cur0
,
in_lines_adj0
,
in_lines_cur1
,
linesize
,
offset
,
in_lines_cur2
,
in_lines_adj1
,
in_lines_adj2
SPLATW
m0
,
m2
,
0
SPLATW
m1
,
m2
,
1
SPLATW
m2
,
m2
,
2
SBUTTERFLY
wd
,
0
,
1
,
7
pxor
m7
,
m7
mov
offsetq
,
0
mov
in_lines_cur2q
,
[
in_lines_cur0q
+
gprsize
*
2
]
mov
in_lines_cur1q
,
[
in_lines_cur0q
+
gprsize
]
mov
in_lines_cur0q
,
[
in_lines_cur0q
]
mov
in_lines_adj2q
,
[
in_lines_adj0q
+
gprsize
*
2
]
mov
in_lines_adj1q
,
[
in_lines_adj0q
+
gprsize
]
mov
in_lines_adj0q
,
[
in_lines_adj0q
]
.
loop
movh
m3
,
[
in_lines_cur0q
+
offsetq
]
movh
m4
,
[
in_lines_cur1q
+
offsetq
]
punpcklbw
m3
,
m7
punpcklbw
m4
,
m7
SBUTTERFLY
wd
,
3
,
4
,
8
pmaddwd
m3
,
m0
pmaddwd
m4
,
m1
movh
m5
,
[
in_lines_adj0q
+
offsetq
]
movh
m6
,
[
in_lines_adj1q
+
offsetq
]
punpcklbw
m5
,
m7
punpcklbw
m6
,
m7
SBUTTERFLY
wd
,
5
,
6
,
8
pmaddwd
m5
,
m0
pmaddwd
m6
,
m1
paddd
m3
,
m5
paddd
m4
,
m6
movh
m5
,
[
in_lines_cur2q
+
offsetq
]
movh
m6
,
[
in_lines_adj2q
+
offsetq
]
punpcklbw
m5
,
m7
punpcklbw
m6
,
m7
SBUTTERFLY
wd
,
5
,
6
,
8
pmaddwd
m5
,
m2
pmaddwd
m6
,
m2
paddd
m3
,
m5
paddd
m4
,
m6
movu
m5
,
[
work_lineq
+
offsetq
*
4
]
movu
m6
,
[
work_lineq
+
offsetq
*
4
+
mmsize
]
paddd
m3
,
m5
paddd
m4
,
m6
mova
[
work_lineq
+
offsetq
*
4
]
,
m3
mova
[
work_lineq
+
offsetq
*
4
+
mmsize
]
,
m4
add
offsetq
,
mmsize
/
2
sub
linesized
,
mmsize
/
2
jg
.
loop
REP_RET
cglobal
w3fdif_complex_high
,
5
,
13
,
10
,
0
,
work_line
,
in_lines_cur0
,
in_lines_adj0
,
coef
,
linesize
movq
m0
,
[
coefq
+
0
]
movd
m4
,
[
coefq
+
8
]
DEFINE_ARGS
work_line
,
in_lines_cur0
,
in_lines_adj0
,
in_lines_cur1
,
linesize
,
offset
,
in_lines_cur2
,
in_lines_cur3
,
in_lines_cur4
,
in_lines_adj1
,
in_lines_adj2
,
in_lines_adj3
,
in_lines_adj4
pshufd
m1
,
m0
,
q1111
SPLATD
m0
SPLATW
m4
,
m4
pxor
m3
,
m3
mov
offsetq
,
0
mov
in_lines_cur4q
,
[
in_lines_cur0q
+
gprsize
*
4
]
mov
in_lines_cur3q
,
[
in_lines_cur0q
+
gprsize
*
3
]
mov
in_lines_cur2q
,
[
in_lines_cur0q
+
gprsize
*
2
]
mov
in_lines_cur1q
,
[
in_lines_cur0q
+
gprsize
]
mov
in_lines_cur0q
,
[
in_lines_cur0q
]
mov
in_lines_adj4q
,
[
in_lines_adj0q
+
gprsize
*
4
]
mov
in_lines_adj3q
,
[
in_lines_adj0q
+
gprsize
*
3
]
mov
in_lines_adj2q
,
[
in_lines_adj0q
+
gprsize
*
2
]
mov
in_lines_adj1q
,
[
in_lines_adj0q
+
gprsize
]
mov
in_lines_adj0q
,
[
in_lines_adj0q
]
.
loop
movh
m5
,
[
in_lines_cur0q
+
offsetq
]
movh
m6
,
[
in_lines_cur1q
+
offsetq
]
punpcklbw
m5
,
m3
punpcklbw
m6
,
m3
SBUTTERFLY
wd
,
5
,
6
,
2
pmaddwd
m5
,
m0
pmaddwd
m6
,
m0
movh
m8
,
[
in_lines_cur2q
+
offsetq
]
movh
m9
,
[
in_lines_cur3q
+
offsetq
]
punpcklbw
m8
,
m3
punpcklbw
m9
,
m3
SBUTTERFLY
wd
,
8
,
9
,
2
pmaddwd
m8
,
m1
pmaddwd
m9
,
m1
paddd
m5
,
m8
paddd
m6
,
m9
movh
m8
,
[
in_lines_adj0q
+
offsetq
]
movh
m9
,
[
in_lines_adj1q
+
offsetq
]
punpcklbw
m8
,
m3
punpcklbw
m9
,
m3
SBUTTERFLY
wd
,
8
,
9
,
2
pmaddwd
m8
,
m0
pmaddwd
m9
,
m0
paddd
m5
,
m8
paddd
m6
,
m9
movh
m8
,
[
in_lines_adj2q
+
offsetq
]
movh
m9
,
[
in_lines_adj3q
+
offsetq
]
punpcklbw
m8
,
m3
punpcklbw
m9
,
m3
SBUTTERFLY
wd
,
8
,
9
,
2
pmaddwd
m8
,
m1
pmaddwd
m9
,
m1
paddd
m5
,
m8
paddd
m6
,
m9
movh
m8
,
[
in_lines_cur4q
+
offsetq
]
movh
m9
,
[
in_lines_adj4q
+
offsetq
]
punpcklbw
m8
,
m3
punpcklbw
m9
,
m3
SBUTTERFLY
wd
,
8
,
9
,
2
pmaddwd
m8
,
m4
pmaddwd
m9
,
m4
paddd
m5
,
m8
paddd
m6
,
m9
movu
m8
,
[
work_lineq
+
offsetq
*
4
]
movu
m9
,
[
work_lineq
+
offsetq
*
4
+
mmsize
]
paddd
m5
,
m8
paddd
m6
,
m9
mova
[
work_lineq
+
offsetq
*
4
]
,
m5
mova
[
work_lineq
+
offsetq
*
4
+
mmsize
]
,
m6
add
offsetq
,
mmsize
/
2
sub
linesized
,
mmsize
/
2
jg
.
loop
REP_RET
%endif
libavfilter/x86/vf_w3fdif_init.c
0 → 100644
View file @
5740dc27
/*
* Copyright (C) 2015 Paul B Mahol
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/w3fdif.h"
void
ff_w3fdif_simple_low_sse2
(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
2
],
const
int16_t
*
coef
,
int
linesize
);
void
ff_w3fdif_simple_high_sse2
(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
3
],
uint8_t
*
in_lines_adj
[
3
],
const
int16_t
*
coef
,
int
linesize
);
void
ff_w3fdif_complex_low_sse2
(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
4
],
const
int16_t
*
coef
,
int
linesize
);
void
ff_w3fdif_complex_high_sse2
(
int32_t
*
work_line
,
uint8_t
*
in_lines_cur
[
5
],
uint8_t
*
in_lines_adj
[
5
],
const
int16_t
*
coef
,
int
linesize
);
void
ff_w3fdif_scale_sse2
(
uint8_t
*
out_pixel
,
const
int32_t
*
work_pixel
,
int
linesize
);
av_cold
void
ff_w3fdif_init_x86
(
W3FDIFDSPContext
*
dsp
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
dsp
->
filter_simple_low
=
ff_w3fdif_simple_low_sse2
;
dsp
->
filter_complex_low
=
ff_w3fdif_complex_low_sse2
;
dsp
->
filter_scale
=
ff_w3fdif_scale_sse2
;
}
if
(
ARCH_X86_64
&&
EXTERNAL_SSE2
(
cpu_flags
))
{
dsp
->
filter_simple_high
=
ff_w3fdif_simple_high_sse2
;
dsp
->
filter_complex_high
=
ff_w3fdif_complex_high_sse2
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment