Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
486f76f0
Commit
486f76f0
authored
Apr 19, 2013
by
Martin Storsjö
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: Get rid of duplication between *_rnd_template.c
Signed-off-by:
Martin Storsjö
<
martin@martin.st
>
parent
feec9349
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
197 deletions
+5
-197
dsputil_rnd_template.c
libavcodec/x86/dsputil_rnd_template.c
+3
-0
hpeldsp_rnd_template.c
libavcodec/x86/hpeldsp_rnd_template.c
+2
-197
No files found.
libavcodec/x86/dsputil_rnd_template.c
View file @
486f76f0
...
...
@@ -91,6 +91,8 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff
:
REG_a
,
"memory"
);
}
// avg_pixels
#ifndef NO_RND
// in case more speed is needed - unroling would certainly help
static
void
DEF
(
avg
,
pixels8
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
...
...
@@ -110,6 +112,7 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, ptrdiff_t l
}
while
(
--
h
);
}
#endif
/* NO_RND */
static
void
DEF
(
avg
,
pixels16
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
...
...
libavcodec/x86/hpeldsp_rnd_template.c
View file @
486f76f0
...
...
@@ -24,6 +24,8 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "dsputil_rnd_template.c"
// put_pixels
static
void
DEF
(
put
,
pixels8_x2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
...
...
@@ -132,118 +134,6 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
:
REG_a
,
"memory"
);
}
static
void
DEF
(
put
,
pixels8_xy2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
MOVQ_ZERO
(
mm7
);
SET_RND
(
mm6
);
// =2 for rnd and =1 for no_rnd version
__asm__
volatile
(
"movq (%1), %%mm0
\n\t
"
"movq 1(%1), %%mm4
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm5
\n\t
"
"paddusw %%mm0, %%mm4
\n\t
"
"paddusw %%mm1, %%mm5
\n\t
"
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"add %3, %1
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"paddusw %%mm2, %%mm0
\n\t
"
"paddusw %%mm3, %%mm1
\n\t
"
"paddusw %%mm6, %%mm4
\n\t
"
"paddusw %%mm6, %%mm5
\n\t
"
"paddusw %%mm0, %%mm4
\n\t
"
"paddusw %%mm1, %%mm5
\n\t
"
"psrlw $2, %%mm4
\n\t
"
"psrlw $2, %%mm5
\n\t
"
"packuswb %%mm5, %%mm4
\n\t
"
"movq %%mm4, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
// 0 <-> 2 1 <-> 3
"movq 1(%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"punpckhbw %%mm7, %%mm5
\n\t
"
"paddusw %%mm2, %%mm4
\n\t
"
"paddusw %%mm3, %%mm5
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm6, %%mm1
\n\t
"
"paddusw %%mm4, %%mm0
\n\t
"
"paddusw %%mm5, %%mm1
\n\t
"
"psrlw $2, %%mm0
\n\t
"
"psrlw $2, %%mm1
\n\t
"
"packuswb %%mm1, %%mm0
\n\t
"
"movq %%mm0, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"subl $2, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
)
:
"D"
(
block
),
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
}
// avg_pixels
#ifndef NO_RND
// in case more speed is needed - unroling would certainly help
static
void
DEF
(
avg
,
pixels8
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
MOVQ_BFE
(
mm6
);
JUMPALIGN
();
do
{
__asm__
volatile
(
"movq %0, %%mm0
\n\t
"
"movq %1, %%mm1
\n\t
"
OP_AVG
(
%%
mm0
,
%%
mm1
,
%%
mm2
,
%%
mm6
)
"movq %%mm2, %0
\n\t
"
:
"+m"
(
*
block
)
:
"m"
(
*
pixels
)
:
"memory"
);
pixels
+=
line_size
;
block
+=
line_size
;
}
while
(
--
h
);
}
#endif // NO_RND
static
void
DEF
(
avg
,
pixels16
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
MOVQ_BFE
(
mm6
);
JUMPALIGN
();
do
{
__asm__
volatile
(
"movq %0, %%mm0
\n\t
"
"movq %1, %%mm1
\n\t
"
OP_AVG
(
%%
mm0
,
%%
mm1
,
%%
mm2
,
%%
mm6
)
"movq %%mm2, %0
\n\t
"
"movq 8%0, %%mm0
\n\t
"
"movq 8%1, %%mm1
\n\t
"
OP_AVG
(
%%
mm0
,
%%
mm1
,
%%
mm2
,
%%
mm6
)
"movq %%mm2, 8%0
\n\t
"
:
"+m"
(
*
block
)
:
"m"
(
*
pixels
)
:
"memory"
);
pixels
+=
line_size
;
block
+=
line_size
;
}
while
(
--
h
);
}
#ifndef NO_RND
static
void
DEF
(
avg
,
pixels8_x2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
...
...
@@ -331,98 +221,13 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
:
REG_a
,
"memory"
);
}
// this routine is 'slightly' suboptimal but mostly unused
static
void
DEF
(
avg
,
pixels8_xy2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
MOVQ_ZERO
(
mm7
);
SET_RND
(
mm6
);
// =2 for rnd and =1 for no_rnd version
__asm__
volatile
(
"movq (%1), %%mm0
\n\t
"
"movq 1(%1), %%mm4
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm5
\n\t
"
"paddusw %%mm0, %%mm4
\n\t
"
"paddusw %%mm1, %%mm5
\n\t
"
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"add %3, %1
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"paddusw %%mm2, %%mm0
\n\t
"
"paddusw %%mm3, %%mm1
\n\t
"
"paddusw %%mm6, %%mm4
\n\t
"
"paddusw %%mm6, %%mm5
\n\t
"
"paddusw %%mm0, %%mm4
\n\t
"
"paddusw %%mm1, %%mm5
\n\t
"
"psrlw $2, %%mm4
\n\t
"
"psrlw $2, %%mm5
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"packuswb %%mm5, %%mm4
\n\t
"
"pcmpeqd %%mm2, %%mm2
\n\t
"
"paddb %%mm2, %%mm2
\n\t
"
OP_AVG
(
%%
mm3
,
%%
mm4
,
%%
mm5
,
%%
mm2
)
"movq %%mm5, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
// 0 <-> 2 1 <-> 3
"movq 1(%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"punpckhbw %%mm7, %%mm5
\n\t
"
"paddusw %%mm2, %%mm4
\n\t
"
"paddusw %%mm3, %%mm5
\n\t
"
"paddusw %%mm6, %%mm0
\n\t
"
"paddusw %%mm6, %%mm1
\n\t
"
"paddusw %%mm4, %%mm0
\n\t
"
"paddusw %%mm5, %%mm1
\n\t
"
"psrlw $2, %%mm0
\n\t
"
"psrlw $2, %%mm1
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"packuswb %%mm1, %%mm0
\n\t
"
"pcmpeqd %%mm2, %%mm2
\n\t
"
"paddb %%mm2, %%mm2
\n\t
"
OP_AVG
(
%%
mm3
,
%%
mm0
,
%%
mm1
,
%%
mm2
)
"movq %%mm1, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"subl $2, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
)
:
"D"
(
block
),
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
}
//FIXME optimize
static
void
DEF
(
put
,
pixels16_y2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
){
DEF
(
put
,
pixels8_y2
)(
block
,
pixels
,
line_size
,
h
);
DEF
(
put
,
pixels8_y2
)(
block
+
8
,
pixels
+
8
,
line_size
,
h
);
}
static
void
DEF
(
put
,
pixels16_xy2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
){
DEF
(
put
,
pixels8_xy2
)(
block
,
pixels
,
line_size
,
h
);
DEF
(
put
,
pixels8_xy2
)(
block
+
8
,
pixels
+
8
,
line_size
,
h
);
}
static
void
DEF
(
avg
,
pixels16_y2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
){
DEF
(
avg
,
pixels8_y2
)(
block
,
pixels
,
line_size
,
h
);
DEF
(
avg
,
pixels8_y2
)(
block
+
8
,
pixels
+
8
,
line_size
,
h
);
}
static
void
DEF
(
avg
,
pixels16_xy2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
){
DEF
(
avg
,
pixels8_xy2
)(
block
,
pixels
,
line_size
,
h
);
DEF
(
avg
,
pixels8_xy2
)(
block
+
8
,
pixels
+
8
,
line_size
,
h
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment