Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
7b19e76a
Commit
7b19e76a
authored
Aug 15, 2017
by
Paul B Mahol
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter/vf_transpose: rewrite for x86 SIMD
Transpose first in chunks of 8x8 blocks. 15% faster overall.
parent
cbd8e070
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
141 additions
and
41 deletions
+141
-41
vf_transpose.c
libavfilter/vf_transpose.c
+141
-41
No files found.
libavfilter/vf_transpose.c
View file @
7b19e76a
...
...
@@ -58,6 +58,12 @@ typedef struct TransContext {
int
passthrough
;
///< PassthroughType, landscape passthrough mode enabled
int
dir
;
///< TransposeDir
void
(
*
transpose_8x8
)(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
);
void
(
*
transpose_block
)(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
);
}
TransContext
;
static
int
query_formats
(
AVFilterContext
*
ctx
)
...
...
@@ -79,6 +85,109 @@ static int query_formats(AVFilterContext *ctx)
return
ff_set_common_formats
(
ctx
,
pix_fmts
);
}
static
inline
void
transpose_block_8_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
,
src
++
)
for
(
x
=
0
;
x
<
w
;
x
++
)
dst
[
x
]
=
src
[
x
*
src_linesize
];
}
static
void
transpose_8x8_8_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_8_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
inline
void
transpose_block_16_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
,
src
+=
2
)
for
(
x
=
0
;
x
<
w
;
x
++
)
*
((
uint16_t
*
)(
dst
+
2
*
x
))
=
*
((
uint16_t
*
)(
src
+
x
*
src_linesize
));
}
static
void
transpose_8x8_16_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_16_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
inline
void
transpose_block_24_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
)
{
for
(
x
=
0
;
x
<
w
;
x
++
)
{
int32_t
v
=
AV_RB24
(
src
+
x
*
src_linesize
+
y
*
3
);
AV_WB24
(
dst
+
3
*
x
,
v
);
}
}
}
static
void
transpose_8x8_24_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_24_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
inline
void
transpose_block_32_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
,
src
+=
4
)
{
for
(
x
=
0
;
x
<
w
;
x
++
)
*
((
uint32_t
*
)(
dst
+
4
*
x
))
=
*
((
uint32_t
*
)(
src
+
x
*
src_linesize
));
}
}
static
void
transpose_8x8_32_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_32_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
inline
void
transpose_block_48_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
,
src
+=
6
)
{
for
(
x
=
0
;
x
<
w
;
x
++
)
{
int64_t
v
=
AV_RB48
(
src
+
x
*
src_linesize
);
AV_WB48
(
dst
+
6
*
x
,
v
);
}
}
}
static
void
transpose_8x8_48_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_48_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
inline
void
transpose_block_64_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
,
int
w
,
int
h
)
{
int
x
,
y
;
for
(
y
=
0
;
y
<
h
;
y
++
,
dst
+=
dst_linesize
,
src
+=
8
)
for
(
x
=
0
;
x
<
w
;
x
++
)
*
((
uint64_t
*
)(
dst
+
8
*
x
))
=
*
((
uint64_t
*
)(
src
+
x
*
src_linesize
));
}
static
void
transpose_8x8_64_c
(
uint8_t
*
src
,
ptrdiff_t
src_linesize
,
uint8_t
*
dst
,
ptrdiff_t
dst_linesize
)
{
transpose_block_64_c
(
src
,
src_linesize
,
dst
,
dst_linesize
,
8
,
8
);
}
static
int
config_props_output
(
AVFilterLink
*
outlink
)
{
AVFilterContext
*
ctx
=
outlink
->
src
;
...
...
@@ -118,6 +227,21 @@ static int config_props_output(AVFilterLink *outlink)
else
outlink
->
sample_aspect_ratio
=
inlink
->
sample_aspect_ratio
;
switch
(
s
->
pixsteps
[
0
])
{
case
1
:
s
->
transpose_block
=
transpose_block_8_c
;
s
->
transpose_8x8
=
transpose_8x8_8_c
;
break
;
case
2
:
s
->
transpose_block
=
transpose_block_16_c
;
s
->
transpose_8x8
=
transpose_8x8_16_c
;
break
;
case
3
:
s
->
transpose_block
=
transpose_block_24_c
;
s
->
transpose_8x8
=
transpose_8x8_24_c
;
break
;
case
4
:
s
->
transpose_block
=
transpose_block_32_c
;
s
->
transpose_8x8
=
transpose_8x8_32_c
;
break
;
case
6
:
s
->
transpose_block
=
transpose_block_48_c
;
s
->
transpose_8x8
=
transpose_8x8_48_c
;
break
;
case
8
:
s
->
transpose_block
=
transpose_block_64_c
;
s
->
transpose_8x8
=
transpose_8x8_64_c
;
break
;
}
av_log
(
ctx
,
AV_LOG_VERBOSE
,
"w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d
\n
"
,
inlink
->
w
,
inlink
->
h
,
s
->
dir
,
outlink
->
w
,
outlink
->
h
,
...
...
@@ -176,49 +300,25 @@ static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr,
dstlinesize
*=
-
1
;
}
switch
(
pixstep
)
{
case
1
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
for
(
x
=
0
;
x
<
outw
;
x
++
)
dst
[
x
]
=
src
[
x
*
srclinesize
+
y
];
break
;
case
2
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
{
for
(
x
=
0
;
x
<
outw
;
x
++
)
*
((
uint16_t
*
)(
dst
+
2
*
x
))
=
*
((
uint16_t
*
)(
src
+
x
*
srclinesize
+
y
*
2
));
}
break
;
case
3
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
{
for
(
x
=
0
;
x
<
outw
;
x
++
)
{
int32_t
v
=
AV_RB24
(
src
+
x
*
srclinesize
+
y
*
3
);
AV_WB24
(
dst
+
3
*
x
,
v
);
}
}
break
;
case
4
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
{
for
(
x
=
0
;
x
<
outw
;
x
++
)
*
((
uint32_t
*
)(
dst
+
4
*
x
))
=
*
((
uint32_t
*
)(
src
+
x
*
srclinesize
+
y
*
4
));
for
(
y
=
start
;
y
<
end
-
7
;
y
+=
8
)
{
for
(
x
=
0
;
x
<
outw
-
7
;
x
+=
8
)
{
s
->
transpose_8x8
(
src
+
x
*
srclinesize
+
y
*
pixstep
,
srclinesize
,
dst
+
(
y
-
start
)
*
dstlinesize
+
x
*
pixstep
,
dstlinesize
);
}
break
;
case
6
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
{
for
(
x
=
0
;
x
<
outw
;
x
++
)
{
int64_t
v
=
AV_RB48
(
src
+
x
*
srclinesize
+
y
*
6
);
AV_WB48
(
dst
+
6
*
x
,
v
);
}
}
break
;
case
8
:
for
(
y
=
start
;
y
<
end
;
y
++
,
dst
+=
dstlinesize
)
{
for
(
x
=
0
;
x
<
outw
;
x
++
)
*
((
uint64_t
*
)(
dst
+
8
*
x
))
=
*
((
uint64_t
*
)(
src
+
x
*
srclinesize
+
y
*
8
));
}
break
;
if
(
outw
-
x
>
0
&&
end
-
y
>
0
)
s
->
transpose_block
(
src
+
x
*
srclinesize
+
y
*
pixstep
,
srclinesize
,
dst
+
(
y
-
start
)
*
dstlinesize
+
x
*
pixstep
,
dstlinesize
,
outw
-
x
,
end
-
y
);
}
if
(
end
-
y
>
0
)
s
->
transpose_block
(
src
+
0
*
srclinesize
+
y
*
pixstep
,
srclinesize
,
dst
+
(
y
-
start
)
*
dstlinesize
+
0
*
pixstep
,
dstlinesize
,
outw
,
end
-
y
);
}
return
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment