Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
8e0a3db7
Commit
8e0a3db7
authored
Mar 26, 2003
by
Michael Niedermayer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
faster hadamard transform
Originally committed as revision 1707 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
3b0da3f9
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
115 additions
and
20 deletions
+115
-20
dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c
+115
-20
No files found.
libavcodec/i386/dsputil_mmx.c
View file @
8e0a3db7
...
@@ -583,26 +583,21 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
...
@@ -583,26 +583,21 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
for
(;
i
<
w
;
i
++
)
for
(;
i
<
w
;
i
++
)
dst
[
i
+
0
]
=
src1
[
i
+
0
]
-
src2
[
i
+
0
];
dst
[
i
+
0
]
=
src1
[
i
+
0
]
-
src2
[
i
+
0
];
}
}
#define LBUTTERFLY(a,b)\
#define LBUTTERFLY2(a1,b1,a2,b2)\
"paddw " #b ", " #a " \n\t"\
"paddw " #b1 ", " #a1 " \n\t"\
"paddw " #b ", " #b " \n\t"\
"paddw " #b2 ", " #a2 " \n\t"\
"psubw " #a ", " #b " \n\t"
"paddw " #b1 ", " #b1 " \n\t"\
"paddw " #b2 ", " #b2 " \n\t"\
"psubw " #a1 ", " #b1 " \n\t"\
"psubw " #a2 ", " #b1 " \n\t"
#define HADAMARD48\
#define HADAMARD48\
LBUTTERFLY(%%mm0, %%mm1)\
LBUTTERFLY2(%%mm0, %%mm1, %%mm2, %%mm3)\
LBUTTERFLY(%%mm2, %%mm3)\
LBUTTERFLY2(%%mm4, %%mm5, %%mm6, %%mm7)\
LBUTTERFLY(%%mm4, %%mm5)\
LBUTTERFLY2(%%mm0, %%mm2, %%mm1, %%mm3)\
LBUTTERFLY(%%mm6, %%mm7)\
LBUTTERFLY2(%%mm4, %%mm6, %%mm5, %%mm7)\
\
LBUTTERFLY2(%%mm0, %%mm4, %%mm1, %%mm5)\
LBUTTERFLY(%%mm0, %%mm2)\
LBUTTERFLY2(%%mm2, %%mm6, %%mm3, %%mm7)\
LBUTTERFLY(%%mm1, %%mm3)\
LBUTTERFLY(%%mm4, %%mm6)\
LBUTTERFLY(%%mm5, %%mm7)\
\
LBUTTERFLY(%%mm0, %%mm4)\
LBUTTERFLY(%%mm1, %%mm5)\
LBUTTERFLY(%%mm2, %%mm6)\
LBUTTERFLY(%%mm3, %%mm7)
#define MMABS(a,z)\
#define MMABS(a,z)\
"pxor " #z ", " #z " \n\t"\
"pxor " #z ", " #z " \n\t"\
...
@@ -617,12 +612,22 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
...
@@ -617,12 +612,22 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
"psubw " #z ", " #a " \n\t"\
"psubw " #z ", " #a " \n\t"\
"paddusw " #a ", " #sum " \n\t"
"paddusw " #a ", " #sum " \n\t"
#define MMABS_MMX2(a,z)\
"pxor " #z ", " #z " \n\t"\
"psubw " #a ", " #z " \n\t"\
"pmaxsw " #z ", " #a " \n\t"
#define MMABS_SUM_MMX2(a,z, sum)\
"pxor " #z ", " #z " \n\t"\
"psubw " #a ", " #z " \n\t"\
"pmaxsw " #z ", " #a " \n\t"\
"paddusw " #a ", " #sum " \n\t"
#define SBUTTERFLY(a,b,t,n)\
#define SBUTTERFLY(a,b,t,n)\
"movq " #a ", " #t " \n\t"
/* abcd */
\
"movq " #a ", " #t " \n\t"
/* abcd */
\
"punpckl" #n " " #b ", " #a " \n\t"
/* aebf */
\
"punpckl" #n " " #b ", " #a " \n\t"
/* aebf */
\
"punpckh" #n " " #b ", " #t " \n\t"
/* cgdh */
\
"punpckh" #n " " #b ", " #t " \n\t"
/* cgdh */
\
#define TRANSPOSE4(a,b,c,d,t)\
#define TRANSPOSE4(a,b,c,d,t)\
SBUTTERFLY(a,b,t,wd)
/* a=aebf t=cgdh */
\
SBUTTERFLY(a,b,t,wd)
/* a=aebf t=cgdh */
\
SBUTTERFLY(c,d,b,wd)
/* c=imjn b=kolp */
\
SBUTTERFLY(c,d,b,wd)
/* c=imjn b=kolp */
\
...
@@ -726,7 +731,94 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride)
...
@@ -726,7 +731,94 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride)
return
sum
&
0xFFFF
;
return
sum
&
0xFFFF
;
}
}
static
int
hadamard8_diff_mmx2
(
void
*
s
,
uint8_t
*
src1
,
uint8_t
*
src2
,
int
stride
){
uint64_t
temp
[
16
]
__align8
;
int
sum
=
0
;
diff_pixels_mmx
((
DCTELEM
*
)
temp
,
src1
,
src2
,
stride
);
asm
volatile
(
LOAD4
(
0
,
%%
mm0
,
%%
mm1
,
%%
mm2
,
%%
mm3
)
LOAD4
(
64
,
%%
mm4
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
HADAMARD48
"movq %%mm7, 112(%1)
\n\t
"
TRANSPOSE4
(
%%
mm0
,
%%
mm1
,
%%
mm2
,
%%
mm3
,
%%
mm7
)
STORE4
(
0
,
%%
mm0
,
%%
mm3
,
%%
mm7
,
%%
mm2
)
"movq 112(%1), %%mm7
\n\t
"
TRANSPOSE4
(
%%
mm4
,
%%
mm5
,
%%
mm6
,
%%
mm7
,
%%
mm0
)
STORE4
(
64
,
%%
mm4
,
%%
mm7
,
%%
mm0
,
%%
mm6
)
LOAD4
(
8
,
%%
mm0
,
%%
mm1
,
%%
mm2
,
%%
mm3
)
LOAD4
(
72
,
%%
mm4
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
HADAMARD48
"movq %%mm7, 120(%1)
\n\t
"
TRANSPOSE4
(
%%
mm0
,
%%
mm1
,
%%
mm2
,
%%
mm3
,
%%
mm7
)
STORE4
(
8
,
%%
mm0
,
%%
mm3
,
%%
mm7
,
%%
mm2
)
"movq 120(%1), %%mm7
\n\t
"
TRANSPOSE4
(
%%
mm4
,
%%
mm5
,
%%
mm6
,
%%
mm7
,
%%
mm0
)
"movq %%mm7, %%mm5
\n\t
"
//FIXME remove
"movq %%mm6, %%mm7
\n\t
"
"movq %%mm0, %%mm6
\n\t
"
// STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
LOAD4
(
64
,
%%
mm0
,
%%
mm1
,
%%
mm2
,
%%
mm3
)
// LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
HADAMARD48
"movq %%mm7, 64(%1)
\n\t
"
MMABS_MMX2
(
%%
mm0
,
%%
mm7
)
MMABS_SUM_MMX2
(
%%
mm1
,
%%
mm7
,
%%
mm0
)
MMABS_SUM_MMX2
(
%%
mm2
,
%%
mm7
,
%%
mm0
)
MMABS_SUM_MMX2
(
%%
mm3
,
%%
mm7
,
%%
mm0
)
MMABS_SUM_MMX2
(
%%
mm4
,
%%
mm7
,
%%
mm0
)
MMABS_SUM_MMX2
(
%%
mm5
,
%%
mm7
,
%%
mm0
)
MMABS_SUM_MMX2
(
%%
mm6
,
%%
mm7
,
%%
mm0
)
"movq 64(%1), %%mm1
\n\t
"
MMABS_SUM_MMX2
(
%%
mm1
,
%%
mm7
,
%%
mm0
)
"movq %%mm0, 64(%1)
\n\t
"
LOAD4
(
0
,
%%
mm0
,
%%
mm1
,
%%
mm2
,
%%
mm3
)
LOAD4
(
8
,
%%
mm4
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
HADAMARD48
"movq %%mm7, (%1)
\n\t
"
MMABS_MMX2
(
%%
mm0
,
%%
mm7
)
MMABS_SUM_MMX2
(
%%
mm1
,
%%
mm7
,
%%
mm0
)
MMABS_SUM_MMX2
(
%%
mm2
,
%%
mm7
,
%%
mm0
)
MMABS_SUM_MMX2
(
%%
mm3
,
%%
mm7
,
%%
mm0
)
MMABS_SUM_MMX2
(
%%
mm4
,
%%
mm7
,
%%
mm0
)
MMABS_SUM_MMX2
(
%%
mm5
,
%%
mm7
,
%%
mm0
)
MMABS_SUM_MMX2
(
%%
mm6
,
%%
mm7
,
%%
mm0
)
"movq (%1), %%mm1
\n\t
"
MMABS_SUM_MMX2
(
%%
mm1
,
%%
mm7
,
%%
mm0
)
"movq 64(%1), %%mm1
\n\t
"
MMABS_SUM_MMX2
(
%%
mm1
,
%%
mm7
,
%%
mm0
)
"movq %%mm0, %%mm1
\n\t
"
"psrlq $32, %%mm0
\n\t
"
"paddusw %%mm1, %%mm0
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"psrlq $16, %%mm0
\n\t
"
"paddusw %%mm1, %%mm0
\n\t
"
"movd %%mm0, %0
\n\t
"
:
"=r"
(
sum
)
:
"r"
(
temp
)
);
return
sum
&
0xFFFF
;
}
WARPER88_1616
(
hadamard8_diff_mmx
,
hadamard8_diff16_mmx
)
WARPER88_1616
(
hadamard8_diff_mmx
,
hadamard8_diff16_mmx
)
WARPER88_1616
(
hadamard8_diff_mmx2
,
hadamard8_diff16_mmx2
)
#define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
#define put_no_rnd_pixels8_mmx(a,b,c,d) put_pixels8_mmx(a,b,c,d)
#define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d)
#define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d)
...
@@ -1556,6 +1648,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
...
@@ -1556,6 +1648,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
avg_pixels_tab
[
1
][
1
]
=
avg_pixels8_x2_mmx2
;
c
->
avg_pixels_tab
[
1
][
1
]
=
avg_pixels8_x2_mmx2
;
c
->
avg_pixels_tab
[
1
][
2
]
=
avg_pixels8_y2_mmx2
;
c
->
avg_pixels_tab
[
1
][
2
]
=
avg_pixels8_y2_mmx2
;
c
->
hadamard8_diff
[
0
]
=
hadamard8_diff16_mmx2
;
c
->
hadamard8_diff
[
1
]
=
hadamard8_diff_mmx2
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)){
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)){
c
->
put_no_rnd_pixels_tab
[
0
][
1
]
=
put_no_rnd_pixels16_x2_mmx2
;
c
->
put_no_rnd_pixels_tab
[
0
][
1
]
=
put_no_rnd_pixels16_x2_mmx2
;
c
->
put_no_rnd_pixels_tab
[
0
][
2
]
=
put_no_rnd_pixels16_y2_mmx2
;
c
->
put_no_rnd_pixels_tab
[
0
][
2
]
=
put_no_rnd_pixels16_y2_mmx2
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment