Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
9c12c6ff
Commit
9c12c6ff
authored
Nov 21, 2014
by
Vittorio Giovara
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
motion_est: convert stride to ptrdiff_t
CC: libav-stable@libav.org Bug-Id: CID 700556 / CID 700557 / CID 700558
parent
f856d9c2
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
162 additions
and
155 deletions
+162
-155
me_cmp_init_arm.c
libavcodec/arm/me_cmp_init_arm.c
+5
-5
me_cmp.c
libavcodec/me_cmp.c
+59
-57
me_cmp.h
libavcodec/me_cmp.h
+2
-1
me_cmp.c
libavcodec/ppc/me_cmp.c
+28
-28
me_cmp_init.c
libavcodec/x86/me_cmp_init.c
+68
-64
No files found.
libavcodec/arm/me_cmp_init_arm.c
View file @
9c12c6ff
...
@@ -26,17 +26,17 @@
...
@@ -26,17 +26,17 @@
#include "libavcodec/mpegvideo.h"
#include "libavcodec/mpegvideo.h"
int
ff_pix_abs16_armv6
(
MpegEncContext
*
s
,
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
ff_pix_abs16_armv6
(
MpegEncContext
*
s
,
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
line_siz
e
,
int
h
);
ptrdiff_t
strid
e
,
int
h
);
int
ff_pix_abs16_x2_armv6
(
MpegEncContext
*
s
,
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
ff_pix_abs16_x2_armv6
(
MpegEncContext
*
s
,
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
line_siz
e
,
int
h
);
ptrdiff_t
strid
e
,
int
h
);
int
ff_pix_abs16_y2_armv6
(
MpegEncContext
*
s
,
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
ff_pix_abs16_y2_armv6
(
MpegEncContext
*
s
,
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
line_siz
e
,
int
h
);
ptrdiff_t
strid
e
,
int
h
);
int
ff_pix_abs8_armv6
(
MpegEncContext
*
s
,
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
ff_pix_abs8_armv6
(
MpegEncContext
*
s
,
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
line_siz
e
,
int
h
);
ptrdiff_t
strid
e
,
int
h
);
int
ff_sse16_armv6
(
MpegEncContext
*
s
,
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
ff_sse16_armv6
(
MpegEncContext
*
s
,
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
line_siz
e
,
int
h
);
ptrdiff_t
strid
e
,
int
h
);
av_cold
void
ff_me_cmp_init_arm
(
MECmpContext
*
c
,
AVCodecContext
*
avctx
)
av_cold
void
ff_me_cmp_init_arm
(
MECmpContext
*
c
,
AVCodecContext
*
avctx
)
{
{
...
...
libavcodec/me_cmp.c
View file @
9c12c6ff
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
uint32_t
ff_square_tab
[
512
]
=
{
0
,
};
uint32_t
ff_square_tab
[
512
]
=
{
0
,
};
static
int
sse4_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sse4_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
uint32_t
*
sq
=
ff_square_tab
+
256
;
uint32_t
*
sq
=
ff_square_tab
+
256
;
...
@@ -37,14 +37,14 @@ static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -37,14 +37,14 @@ static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
sq
[
pix1
[
1
]
-
pix2
[
1
]];
s
+=
sq
[
pix1
[
1
]
-
pix2
[
1
]];
s
+=
sq
[
pix1
[
2
]
-
pix2
[
2
]];
s
+=
sq
[
pix1
[
2
]
-
pix2
[
2
]];
s
+=
sq
[
pix1
[
3
]
-
pix2
[
3
]];
s
+=
sq
[
pix1
[
3
]
-
pix2
[
3
]];
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
static
int
sse8_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sse8_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
uint32_t
*
sq
=
ff_square_tab
+
256
;
uint32_t
*
sq
=
ff_square_tab
+
256
;
...
@@ -58,14 +58,14 @@ static int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -58,14 +58,14 @@ static int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
sq
[
pix1
[
5
]
-
pix2
[
5
]];
s
+=
sq
[
pix1
[
5
]
-
pix2
[
5
]];
s
+=
sq
[
pix1
[
6
]
-
pix2
[
6
]];
s
+=
sq
[
pix1
[
6
]
-
pix2
[
6
]];
s
+=
sq
[
pix1
[
7
]
-
pix2
[
7
]];
s
+=
sq
[
pix1
[
7
]
-
pix2
[
7
]];
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
static
int
sse16_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sse16_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
uint32_t
*
sq
=
ff_square_tab
+
256
;
uint32_t
*
sq
=
ff_square_tab
+
256
;
...
@@ -88,8 +88,8 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -88,8 +88,8 @@ static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
sq
[
pix1
[
14
]
-
pix2
[
14
]];
s
+=
sq
[
pix1
[
14
]
-
pix2
[
14
]];
s
+=
sq
[
pix1
[
15
]
-
pix2
[
15
]];
s
+=
sq
[
pix1
[
15
]
-
pix2
[
15
]];
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
...
@@ -107,7 +107,7 @@ static int sum_abs_dctelem_c(int16_t *block)
...
@@ -107,7 +107,7 @@ static int sum_abs_dctelem_c(int16_t *block)
#define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
#define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
static
inline
int
pix_abs16_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
inline
int
pix_abs16_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
...
@@ -128,14 +128,14 @@ static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -128,14 +128,14 @@ static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
abs
(
pix1
[
13
]
-
pix2
[
13
]);
s
+=
abs
(
pix1
[
13
]
-
pix2
[
13
]);
s
+=
abs
(
pix1
[
14
]
-
pix2
[
14
]);
s
+=
abs
(
pix1
[
14
]
-
pix2
[
14
]);
s
+=
abs
(
pix1
[
15
]
-
pix2
[
15
]);
s
+=
abs
(
pix1
[
15
]
-
pix2
[
15
]);
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
static
int
pix_abs16_x2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
pix_abs16_x2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
...
@@ -156,17 +156,17 @@ static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -156,17 +156,17 @@ static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
abs
(
pix1
[
13
]
-
avg2
(
pix2
[
13
],
pix2
[
14
]));
s
+=
abs
(
pix1
[
13
]
-
avg2
(
pix2
[
13
],
pix2
[
14
]));
s
+=
abs
(
pix1
[
14
]
-
avg2
(
pix2
[
14
],
pix2
[
15
]));
s
+=
abs
(
pix1
[
14
]
-
avg2
(
pix2
[
14
],
pix2
[
15
]));
s
+=
abs
(
pix1
[
15
]
-
avg2
(
pix2
[
15
],
pix2
[
16
]));
s
+=
abs
(
pix1
[
15
]
-
avg2
(
pix2
[
15
],
pix2
[
16
]));
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
static
int
pix_abs16_y2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
pix_abs16_y2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
uint8_t
*
pix3
=
pix2
+
line_siz
e
;
uint8_t
*
pix3
=
pix2
+
strid
e
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
for
(
i
=
0
;
i
<
h
;
i
++
)
{
s
+=
abs
(
pix1
[
0
]
-
avg2
(
pix2
[
0
],
pix3
[
0
]));
s
+=
abs
(
pix1
[
0
]
-
avg2
(
pix2
[
0
],
pix3
[
0
]));
...
@@ -185,18 +185,18 @@ static int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -185,18 +185,18 @@ static int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
abs
(
pix1
[
13
]
-
avg2
(
pix2
[
13
],
pix3
[
13
]));
s
+=
abs
(
pix1
[
13
]
-
avg2
(
pix2
[
13
],
pix3
[
13
]));
s
+=
abs
(
pix1
[
14
]
-
avg2
(
pix2
[
14
],
pix3
[
14
]));
s
+=
abs
(
pix1
[
14
]
-
avg2
(
pix2
[
14
],
pix3
[
14
]));
s
+=
abs
(
pix1
[
15
]
-
avg2
(
pix2
[
15
],
pix3
[
15
]));
s
+=
abs
(
pix1
[
15
]
-
avg2
(
pix2
[
15
],
pix3
[
15
]));
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
pix3
+=
line_siz
e
;
pix3
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
static
int
pix_abs16_xy2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
pix_abs16_xy2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
uint8_t
*
pix3
=
pix2
+
line_siz
e
;
uint8_t
*
pix3
=
pix2
+
strid
e
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
for
(
i
=
0
;
i
<
h
;
i
++
)
{
s
+=
abs
(
pix1
[
0
]
-
avg4
(
pix2
[
0
],
pix2
[
1
],
pix3
[
0
],
pix3
[
1
]));
s
+=
abs
(
pix1
[
0
]
-
avg4
(
pix2
[
0
],
pix2
[
1
],
pix3
[
0
],
pix3
[
1
]));
...
@@ -215,15 +215,15 @@ static int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -215,15 +215,15 @@ static int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
abs
(
pix1
[
13
]
-
avg4
(
pix2
[
13
],
pix2
[
14
],
pix3
[
13
],
pix3
[
14
]));
s
+=
abs
(
pix1
[
13
]
-
avg4
(
pix2
[
13
],
pix2
[
14
],
pix3
[
13
],
pix3
[
14
]));
s
+=
abs
(
pix1
[
14
]
-
avg4
(
pix2
[
14
],
pix2
[
15
],
pix3
[
14
],
pix3
[
15
]));
s
+=
abs
(
pix1
[
14
]
-
avg4
(
pix2
[
14
],
pix2
[
15
],
pix3
[
14
],
pix3
[
15
]));
s
+=
abs
(
pix1
[
15
]
-
avg4
(
pix2
[
15
],
pix2
[
16
],
pix3
[
15
],
pix3
[
16
]));
s
+=
abs
(
pix1
[
15
]
-
avg4
(
pix2
[
15
],
pix2
[
16
],
pix3
[
15
],
pix3
[
16
]));
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
pix3
+=
line_siz
e
;
pix3
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
static
inline
int
pix_abs8_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
inline
int
pix_abs8_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
...
@@ -236,14 +236,14 @@ static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -236,14 +236,14 @@ static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
abs
(
pix1
[
5
]
-
pix2
[
5
]);
s
+=
abs
(
pix1
[
5
]
-
pix2
[
5
]);
s
+=
abs
(
pix1
[
6
]
-
pix2
[
6
]);
s
+=
abs
(
pix1
[
6
]
-
pix2
[
6
]);
s
+=
abs
(
pix1
[
7
]
-
pix2
[
7
]);
s
+=
abs
(
pix1
[
7
]
-
pix2
[
7
]);
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
static
int
pix_abs8_x2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
pix_abs8_x2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
...
@@ -256,17 +256,17 @@ static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -256,17 +256,17 @@ static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
abs
(
pix1
[
5
]
-
avg2
(
pix2
[
5
],
pix2
[
6
]));
s
+=
abs
(
pix1
[
5
]
-
avg2
(
pix2
[
5
],
pix2
[
6
]));
s
+=
abs
(
pix1
[
6
]
-
avg2
(
pix2
[
6
],
pix2
[
7
]));
s
+=
abs
(
pix1
[
6
]
-
avg2
(
pix2
[
6
],
pix2
[
7
]));
s
+=
abs
(
pix1
[
7
]
-
avg2
(
pix2
[
7
],
pix2
[
8
]));
s
+=
abs
(
pix1
[
7
]
-
avg2
(
pix2
[
7
],
pix2
[
8
]));
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
static
int
pix_abs8_y2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
pix_abs8_y2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
uint8_t
*
pix3
=
pix2
+
line_siz
e
;
uint8_t
*
pix3
=
pix2
+
strid
e
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
for
(
i
=
0
;
i
<
h
;
i
++
)
{
s
+=
abs
(
pix1
[
0
]
-
avg2
(
pix2
[
0
],
pix3
[
0
]));
s
+=
abs
(
pix1
[
0
]
-
avg2
(
pix2
[
0
],
pix3
[
0
]));
...
@@ -277,18 +277,18 @@ static int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -277,18 +277,18 @@ static int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
abs
(
pix1
[
5
]
-
avg2
(
pix2
[
5
],
pix3
[
5
]));
s
+=
abs
(
pix1
[
5
]
-
avg2
(
pix2
[
5
],
pix3
[
5
]));
s
+=
abs
(
pix1
[
6
]
-
avg2
(
pix2
[
6
],
pix3
[
6
]));
s
+=
abs
(
pix1
[
6
]
-
avg2
(
pix2
[
6
],
pix3
[
6
]));
s
+=
abs
(
pix1
[
7
]
-
avg2
(
pix2
[
7
],
pix3
[
7
]));
s
+=
abs
(
pix1
[
7
]
-
avg2
(
pix2
[
7
],
pix3
[
7
]));
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
pix3
+=
line_siz
e
;
pix3
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
static
int
pix_abs8_xy2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
pix_abs8_xy2_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
s
=
0
,
i
;
int
s
=
0
,
i
;
uint8_t
*
pix3
=
pix2
+
line_siz
e
;
uint8_t
*
pix3
=
pix2
+
strid
e
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
for
(
i
=
0
;
i
<
h
;
i
++
)
{
s
+=
abs
(
pix1
[
0
]
-
avg4
(
pix2
[
0
],
pix2
[
1
],
pix3
[
0
],
pix3
[
1
]));
s
+=
abs
(
pix1
[
0
]
-
avg4
(
pix2
[
0
],
pix2
[
1
],
pix3
[
0
],
pix3
[
1
]));
...
@@ -299,14 +299,15 @@ static int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -299,14 +299,15 @@ static int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
s
+=
abs
(
pix1
[
5
]
-
avg4
(
pix2
[
5
],
pix2
[
6
],
pix3
[
5
],
pix3
[
6
]));
s
+=
abs
(
pix1
[
5
]
-
avg4
(
pix2
[
5
],
pix2
[
6
],
pix3
[
5
],
pix3
[
6
]));
s
+=
abs
(
pix1
[
6
]
-
avg4
(
pix2
[
6
],
pix2
[
7
],
pix3
[
6
],
pix3
[
7
]));
s
+=
abs
(
pix1
[
6
]
-
avg4
(
pix2
[
6
],
pix2
[
7
],
pix3
[
6
],
pix3
[
7
]));
s
+=
abs
(
pix1
[
7
]
-
avg4
(
pix2
[
7
],
pix2
[
8
],
pix3
[
7
],
pix3
[
8
]));
s
+=
abs
(
pix1
[
7
]
-
avg4
(
pix2
[
7
],
pix2
[
8
],
pix3
[
7
],
pix3
[
8
]));
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
pix3
+=
line_siz
e
;
pix3
+=
strid
e
;
}
}
return
s
;
return
s
;
}
}
static
int
nsse16_c
(
MpegEncContext
*
c
,
uint8_t
*
s1
,
uint8_t
*
s2
,
int
stride
,
int
h
)
static
int
nsse16_c
(
MpegEncContext
*
c
,
uint8_t
*
s1
,
uint8_t
*
s2
,
ptrdiff_t
stride
,
int
h
)
{
{
int
score1
=
0
,
score2
=
0
,
x
,
y
;
int
score1
=
0
,
score2
=
0
,
x
,
y
;
...
@@ -330,7 +331,8 @@ static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int
...
@@ -330,7 +331,8 @@ static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int
return
score1
+
FFABS
(
score2
)
*
8
;
return
score1
+
FFABS
(
score2
)
*
8
;
}
}
static
int
nsse8_c
(
MpegEncContext
*
c
,
uint8_t
*
s1
,
uint8_t
*
s2
,
int
stride
,
int
h
)
static
int
nsse8_c
(
MpegEncContext
*
c
,
uint8_t
*
s1
,
uint8_t
*
s2
,
ptrdiff_t
stride
,
int
h
)
{
{
int
score1
=
0
,
score2
=
0
,
x
,
y
;
int
score1
=
0
,
score2
=
0
,
x
,
y
;
...
@@ -355,7 +357,7 @@ static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int
...
@@ -355,7 +357,7 @@ static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int
}
}
static
int
zero_cmp
(
MpegEncContext
*
s
,
uint8_t
*
a
,
uint8_t
*
b
,
static
int
zero_cmp
(
MpegEncContext
*
s
,
uint8_t
*
a
,
uint8_t
*
b
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
return
0
;
return
0
;
}
}
...
@@ -430,7 +432,7 @@ void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type)
...
@@ -430,7 +432,7 @@ void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type)
#define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
#define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
static
int
hadamard8_diff8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
dst
,
static
int
hadamard8_diff8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
in
t
stride
,
int
h
)
uint8_t
*
src
,
ptrdiff_
t
stride
,
int
h
)
{
{
int
i
,
temp
[
64
],
sum
=
0
;
int
i
,
temp
[
64
],
sum
=
0
;
...
@@ -482,7 +484,7 @@ static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst,
...
@@ -482,7 +484,7 @@ static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst,
}
}
static
int
hadamard8_intra8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src
,
static
int
hadamard8_intra8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src
,
uint8_t
*
dummy
,
in
t
stride
,
int
h
)
uint8_t
*
dummy
,
ptrdiff_
t
stride
,
int
h
)
{
{
int
i
,
temp
[
64
],
sum
=
0
;
int
i
,
temp
[
64
],
sum
=
0
;
...
@@ -534,7 +536,7 @@ static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src,
...
@@ -534,7 +536,7 @@ static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src,
}
}
static
int
dct_sad8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
static
int
dct_sad8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
uint8_t
*
src2
,
in
t
stride
,
int
h
)
uint8_t
*
src2
,
ptrdiff_
t
stride
,
int
h
)
{
{
LOCAL_ALIGNED_16
(
int16_t
,
temp
,
[
64
]);
LOCAL_ALIGNED_16
(
int16_t
,
temp
,
[
64
]);
...
@@ -575,7 +577,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
...
@@ -575,7 +577,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
}
}
static
int
dct264_sad8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
static
int
dct264_sad8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
uint8_t
*
src2
,
in
t
stride
,
int
h
)
uint8_t
*
src2
,
ptrdiff_
t
stride
,
int
h
)
{
{
int16_t
dct
[
8
][
8
];
int16_t
dct
[
8
][
8
];
int
i
,
sum
=
0
;
int
i
,
sum
=
0
;
...
@@ -600,7 +602,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
...
@@ -600,7 +602,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
#endif
#endif
static
int
dct_max8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
static
int
dct_max8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
uint8_t
*
src2
,
in
t
stride
,
int
h
)
uint8_t
*
src2
,
ptrdiff_
t
stride
,
int
h
)
{
{
LOCAL_ALIGNED_16
(
int16_t
,
temp
,
[
64
]);
LOCAL_ALIGNED_16
(
int16_t
,
temp
,
[
64
]);
int
sum
=
0
,
i
;
int
sum
=
0
,
i
;
...
@@ -617,7 +619,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
...
@@ -617,7 +619,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
}
}
static
int
quant_psnr8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
static
int
quant_psnr8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
uint8_t
*
src2
,
in
t
stride
,
int
h
)
uint8_t
*
src2
,
ptrdiff_
t
stride
,
int
h
)
{
{
LOCAL_ALIGNED_16
(
int16_t
,
temp
,
[
64
*
2
]);
LOCAL_ALIGNED_16
(
int16_t
,
temp
,
[
64
*
2
]);
int16_t
*
const
bak
=
temp
+
64
;
int16_t
*
const
bak
=
temp
+
64
;
...
@@ -642,7 +644,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
...
@@ -642,7 +644,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
}
}
static
int
rd8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
uint8_t
*
src2
,
static
int
rd8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
uint8_t
*
src2
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
const
uint8_t
*
scantable
=
s
->
intra_scantable
.
permutated
;
const
uint8_t
*
scantable
=
s
->
intra_scantable
.
permutated
;
LOCAL_ALIGNED_16
(
int16_t
,
temp
,
[
64
]);
LOCAL_ALIGNED_16
(
int16_t
,
temp
,
[
64
]);
...
@@ -719,7 +721,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
...
@@ -719,7 +721,7 @@ static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
}
}
static
int
bit8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
uint8_t
*
src2
,
static
int
bit8x8_c
(
MpegEncContext
*
s
,
uint8_t
*
src1
,
uint8_t
*
src2
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
const
uint8_t
*
scantable
=
s
->
intra_scantable
.
permutated
;
const
uint8_t
*
scantable
=
s
->
intra_scantable
.
permutated
;
LOCAL_ALIGNED_16
(
int16_t
,
temp
,
[
64
]);
LOCAL_ALIGNED_16
(
int16_t
,
temp
,
[
64
]);
...
@@ -782,7 +784,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
...
@@ -782,7 +784,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
#define VSAD_INTRA(size) \
#define VSAD_INTRA(size) \
static int vsad_intra ## size ## _c(MpegEncContext *c, \
static int vsad_intra ## size ## _c(MpegEncContext *c, \
uint8_t *s, uint8_t *dummy, \
uint8_t *s, uint8_t *dummy, \
int stride, int h)
\
ptrdiff_t stride, int h)
\
{ \
{ \
int score = 0, x, y; \
int score = 0, x, y; \
\
\
...
@@ -802,7 +804,7 @@ VSAD_INTRA(8)
...
@@ -802,7 +804,7 @@ VSAD_INTRA(8)
VSAD_INTRA
(
16
)
VSAD_INTRA
(
16
)
static
int
vsad16_c
(
MpegEncContext
*
c
,
uint8_t
*
s1
,
uint8_t
*
s2
,
static
int
vsad16_c
(
MpegEncContext
*
c
,
uint8_t
*
s1
,
uint8_t
*
s2
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
int
score
=
0
,
x
,
y
;
int
score
=
0
,
x
,
y
;
...
@@ -820,7 +822,7 @@ static int vsad16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
...
@@ -820,7 +822,7 @@ static int vsad16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
#define VSSE_INTRA(size) \
#define VSSE_INTRA(size) \
static int vsse_intra ## size ## _c(MpegEncContext *c, \
static int vsse_intra ## size ## _c(MpegEncContext *c, \
uint8_t *s, uint8_t *dummy, \
uint8_t *s, uint8_t *dummy, \
int stride, int h)
\
ptrdiff_t stride, int h)
\
{ \
{ \
int score = 0, x, y; \
int score = 0, x, y; \
\
\
...
@@ -840,7 +842,7 @@ VSSE_INTRA(8)
...
@@ -840,7 +842,7 @@ VSSE_INTRA(8)
VSSE_INTRA
(
16
)
VSSE_INTRA
(
16
)
static
int
vsse16_c
(
MpegEncContext
*
c
,
uint8_t
*
s1
,
uint8_t
*
s2
,
static
int
vsse16_c
(
MpegEncContext
*
c
,
uint8_t
*
s1
,
uint8_t
*
s2
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
int
score
=
0
,
x
,
y
;
int
score
=
0
,
x
,
y
;
...
@@ -856,7 +858,7 @@ static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
...
@@ -856,7 +858,7 @@ static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
#define WRAPPER8_16_SQ(name8, name16) \
#define WRAPPER8_16_SQ(name8, name16) \
static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
int stride, int h)
\
ptrdiff_t stride, int h)
\
{ \
{ \
int score = 0; \
int score = 0; \
\
\
...
...
libavcodec/me_cmp.h
View file @
9c12c6ff
...
@@ -33,7 +33,8 @@ struct MpegEncContext;
...
@@ -33,7 +33,8 @@ struct MpegEncContext;
* width < 8 are neither used nor implemented. */
* width < 8 are neither used nor implemented. */
typedef
int
(
*
me_cmp_func
)(
struct
MpegEncContext
*
c
,
typedef
int
(
*
me_cmp_func
)(
struct
MpegEncContext
*
c
,
uint8_t
*
blk1
/* align width (8 or 16) */
,
uint8_t
*
blk1
/* align width (8 or 16) */
,
uint8_t
*
blk2
/* align 1 */
,
int
line_size
,
int
h
);
uint8_t
*
blk2
/* align 1 */
,
ptrdiff_t
stride
,
int
h
);
typedef
struct
MECmpContext
{
typedef
struct
MECmpContext
{
int
(
*
sum_abs_dctelem
)(
int16_t
*
block
/* align 16 */
);
int
(
*
sum_abs_dctelem
)(
int16_t
*
block
/* align 16 */
);
...
...
libavcodec/ppc/me_cmp.c
View file @
9c12c6ff
...
@@ -36,7 +36,7 @@
...
@@ -36,7 +36,7 @@
#if HAVE_ALTIVEC
#if HAVE_ALTIVEC
static
int
sad16_x2_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sad16_x2_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
i
,
s
=
0
;
int
i
,
s
=
0
;
const
vector
unsigned
char
zero
=
const
vector
unsigned
char
zero
=
...
@@ -66,8 +66,8 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -66,8 +66,8 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
/* Add each 4 pixel group together and put 4 results into sad. */
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
sad
=
vec_sum4s
(
t5
,
sad
);
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
/* Sum up the four partial sums, and put the result into s. */
/* Sum up the four partial sums, and put the result into s. */
sumdiffs
=
vec_sums
((
vector
signed
int
)
sad
,
(
vector
signed
int
)
zero
);
sumdiffs
=
vec_sums
((
vector
signed
int
)
sad
,
(
vector
signed
int
)
zero
);
...
@@ -78,7 +78,7 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -78,7 +78,7 @@ static int sad16_x2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
}
}
static
int
sad16_y2_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sad16_y2_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
i
,
s
=
0
;
int
i
,
s
=
0
;
const
vector
unsigned
char
zero
=
const
vector
unsigned
char
zero
=
...
@@ -87,9 +87,9 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -87,9 +87,9 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
vector
unsigned
char
pix1v
,
pix3v
,
avgv
,
t5
;
vector
unsigned
char
pix1v
,
pix3v
,
avgv
,
t5
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
vector
signed
int
sumdiffs
;
uint8_t
*
pix3
=
pix2
+
line_siz
e
;
uint8_t
*
pix3
=
pix2
+
strid
e
;
/* Due to the fact that pix3 = pix2 +
line_siz
e, the pix3 of one
/* Due to the fact that pix3 = pix2 +
strid
e, the pix3 of one
* iteration becomes pix2 in the next iteration. We can use this
* iteration becomes pix2 in the next iteration. We can use this
* fact to avoid a potentially expensive unaligned read, each
* fact to avoid a potentially expensive unaligned read, each
* time around the loop.
* time around the loop.
...
@@ -119,9 +119,9 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -119,9 +119,9 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
/* Add each 4 pixel group together and put 4 results into sad. */
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
sad
=
vec_sum4s
(
t5
,
sad
);
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2v
=
pix3v
;
pix2v
=
pix3v
;
pix3
+=
line_siz
e
;
pix3
+=
strid
e
;
}
}
/* Sum up the four partial sums, and put the result into s. */
/* Sum up the four partial sums, and put the result into s. */
...
@@ -132,10 +132,10 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -132,10 +132,10 @@ static int sad16_y2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
}
}
static
int
sad16_xy2_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sad16_xy2_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
i
,
s
=
0
;
int
i
,
s
=
0
;
uint8_t
*
pix3
=
pix2
+
line_siz
e
;
uint8_t
*
pix3
=
pix2
+
strid
e
;
const
vector
unsigned
char
zero
=
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
const
vector
unsigned
short
two
=
const
vector
unsigned
short
two
=
...
@@ -149,7 +149,7 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -149,7 +149,7 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
vector
signed
int
sumdiffs
;
/* Due to the fact that pix3 = pix2 +
line_siz
e, the pix3 of one
/* Due to the fact that pix3 = pix2 +
strid
e, the pix3 of one
* iteration becomes pix2 in the next iteration. We can use this
* iteration becomes pix2 in the next iteration. We can use this
* fact to avoid a potentially expensive unaligned read, as well
* fact to avoid a potentially expensive unaligned read, as well
* as some splitting, and vector addition each time around the loop.
* as some splitting, and vector addition each time around the loop.
...
@@ -212,8 +212,8 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -212,8 +212,8 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
/* Add each 4 pixel group together and put 4 results into sad. */
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
sad
=
vec_sum4s
(
t5
,
sad
);
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix3
+=
line_siz
e
;
pix3
+=
strid
e
;
/* Transfer the calculated values for pix3 into pix2. */
/* Transfer the calculated values for pix3 into pix2. */
t1
=
t3
;
t1
=
t3
;
t2
=
t4
;
t2
=
t4
;
...
@@ -227,7 +227,7 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -227,7 +227,7 @@ static int sad16_xy2_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
}
}
static
int
sad16_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sad16_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
i
,
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
const
vector
unsigned
int
zero
=
...
@@ -251,8 +251,8 @@ static int sad16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -251,8 +251,8 @@ static int sad16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
/* Add each 4 pixel group together and put 4 results into sad. */
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
sad
=
vec_sum4s
(
t5
,
sad
);
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
/* Sum up the four partial sums, and put the result into s. */
/* Sum up the four partial sums, and put the result into s. */
...
@@ -264,7 +264,7 @@ static int sad16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -264,7 +264,7 @@ static int sad16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
}
}
static
int
sad8_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sad8_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
i
,
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
const
vector
unsigned
int
zero
=
...
@@ -298,8 +298,8 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -298,8 +298,8 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
/* Add each 4 pixel group together and put 4 results into sad. */
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
sad
=
vec_sum4s
(
t5
,
sad
);
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
/* Sum up the four partial sums, and put the result into s. */
/* Sum up the four partial sums, and put the result into s. */
...
@@ -313,7 +313,7 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -313,7 +313,7 @@ static int sad8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
* It's the sad8_altivec code above w/ squaring added. */
* It's the sad8_altivec code above w/ squaring added. */
static
int
sse8_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sse8_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
i
,
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
const
vector
unsigned
int
zero
=
...
@@ -350,8 +350,8 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -350,8 +350,8 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
/* Square the values and add them to our sum. */
/* Square the values and add them to our sum. */
sum
=
vec_msum
(
t5
,
t5
,
sum
);
sum
=
vec_msum
(
t5
,
t5
,
sum
);
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
/* Sum up the four partial sums, and put the result into s. */
/* Sum up the four partial sums, and put the result into s. */
...
@@ -365,7 +365,7 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -365,7 +365,7 @@ static int sse8_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
/* Sum of Squared Errors for a 16x16 block, AltiVec-enhanced.
/* Sum of Squared Errors for a 16x16 block, AltiVec-enhanced.
* It's the sad16_altivec code above w/ squaring added. */
* It's the sad16_altivec code above w/ squaring added. */
static
int
sse16_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sse16_altivec
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
i
,
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
const
vector
unsigned
int
zero
=
...
@@ -392,8 +392,8 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -392,8 +392,8 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
/* Square the values and add them to our sum. */
/* Square the values and add them to our sum. */
sum
=
vec_msum
(
t5
,
t5
,
sum
);
sum
=
vec_msum
(
t5
,
t5
,
sum
);
pix1
+=
line_siz
e
;
pix1
+=
strid
e
;
pix2
+=
line_siz
e
;
pix2
+=
strid
e
;
}
}
/* Sum up the four partial sums, and put the result into s. */
/* Sum up the four partial sums, and put the result into s. */
...
@@ -405,7 +405,7 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -405,7 +405,7 @@ static int sse16_altivec(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
}
}
static
int
hadamard8_diff8x8_altivec
(
MpegEncContext
*
s
,
uint8_t
*
dst
,
static
int
hadamard8_diff8x8_altivec
(
MpegEncContext
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
in
t
stride
,
int
h
)
uint8_t
*
src
,
ptrdiff_
t
stride
,
int
h
)
{
{
int
sum
;
int
sum
;
register
const
vector
unsigned
char
vzero
=
register
const
vector
unsigned
char
vzero
=
...
@@ -534,7 +534,7 @@ static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst,
...
@@ -534,7 +534,7 @@ static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst,
* but xlc goes to around 660 on the regular C code...
* but xlc goes to around 660 on the regular C code...
*/
*/
static
int
hadamard8_diff16x8_altivec
(
MpegEncContext
*
s
,
uint8_t
*
dst
,
static
int
hadamard8_diff16x8_altivec
(
MpegEncContext
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
in
t
stride
,
int
h
)
uint8_t
*
src
,
ptrdiff_
t
stride
,
int
h
)
{
{
int
sum
;
int
sum
;
register
vector
signed
short
register
vector
signed
short
...
@@ -731,7 +731,7 @@ static int hadamard8_diff16x8_altivec(MpegEncContext *s, uint8_t *dst,
...
@@ -731,7 +731,7 @@ static int hadamard8_diff16x8_altivec(MpegEncContext *s, uint8_t *dst,
}
}
static
int
hadamard8_diff16_altivec
(
MpegEncContext
*
s
,
uint8_t
*
dst
,
static
int
hadamard8_diff16_altivec
(
MpegEncContext
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
in
t
stride
,
int
h
)
uint8_t
*
src
,
ptrdiff_
t
stride
,
int
h
)
{
{
int
score
=
hadamard8_diff16x8_altivec
(
s
,
dst
,
src
,
stride
,
8
);
int
score
=
hadamard8_diff16x8_altivec
(
s
,
dst
,
src
,
stride
,
8
);
...
...
libavcodec/x86/me_cmp_init.c
View file @
9c12c6ff
...
@@ -32,7 +32,7 @@
...
@@ -32,7 +32,7 @@
#if HAVE_INLINE_ASM
#if HAVE_INLINE_ASM
static
int
sse8_mmx
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sse8_mmx
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
tmp
;
int
tmp
;
...
@@ -74,8 +74,8 @@ static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -74,8 +74,8 @@ static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
"pmaddwd %%mm1, %%mm1
\n
"
"pmaddwd %%mm1, %%mm1
\n
"
"pmaddwd %%mm3, %%mm3
\n
"
"pmaddwd %%mm3, %%mm3
\n
"
"lea (%0, %3, 2), %0
\n
"
/* pix1 += 2 *
line_siz
e */
"lea (%0, %3, 2), %0
\n
"
/* pix1 += 2 *
strid
e */
"lea (%1, %3, 2), %1
\n
"
/* pix2 += 2 *
line_siz
e */
"lea (%1, %3, 2), %1
\n
"
/* pix2 += 2 *
strid
e */
"paddd %%mm2, %%mm1
\n
"
"paddd %%mm2, %%mm1
\n
"
"paddd %%mm4, %%mm3
\n
"
"paddd %%mm4, %%mm3
\n
"
...
@@ -90,14 +90,14 @@ static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -90,14 +90,14 @@ static int sse8_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
"paddd %%mm7, %%mm1
\n
"
"paddd %%mm7, %%mm1
\n
"
"movd %%mm1, %2
\n
"
"movd %%mm1, %2
\n
"
:
"+r"
(
pix1
),
"+r"
(
pix2
),
"=r"
(
tmp
)
:
"+r"
(
pix1
),
"+r"
(
pix2
),
"=r"
(
tmp
)
:
"r"
(
(
x86_reg
)
line_siz
e
),
"m"
(
h
)
:
"r"
(
strid
e
),
"m"
(
h
)
:
"%ecx"
);
:
"%ecx"
);
return
tmp
;
return
tmp
;
}
}
static
int
sse16_mmx
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
sse16_mmx
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
tmp
;
int
tmp
;
...
@@ -154,13 +154,13 @@ static int sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -154,13 +154,13 @@ static int sse16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
"paddd %%mm7, %%mm1
\n
"
"paddd %%mm7, %%mm1
\n
"
"movd %%mm1, %2
\n
"
"movd %%mm1, %2
\n
"
:
"+r"
(
pix1
),
"+r"
(
pix2
),
"=r"
(
tmp
)
:
"+r"
(
pix1
),
"+r"
(
pix2
),
"=r"
(
tmp
)
:
"r"
(
(
x86_reg
)
line_siz
e
),
"m"
(
h
)
:
"r"
(
strid
e
),
"m"
(
h
)
:
"%ecx"
);
:
"%ecx"
);
return
tmp
;
return
tmp
;
}
}
static
int
hf_noise8_mmx
(
uint8_t
*
pix1
,
int
line_siz
e
,
int
h
)
static
int
hf_noise8_mmx
(
uint8_t
*
pix1
,
ptrdiff_t
strid
e
,
int
h
)
{
{
int
tmp
;
int
tmp
;
...
@@ -282,13 +282,13 @@ static int hf_noise8_mmx(uint8_t *pix1, int line_size, int h)
...
@@ -282,13 +282,13 @@ static int hf_noise8_mmx(uint8_t *pix1, int line_size, int h)
"paddd %%mm6, %%mm0
\n
"
"paddd %%mm6, %%mm0
\n
"
"movd %%mm0, %1
\n
"
"movd %%mm0, %1
\n
"
:
"+r"
(
pix1
),
"=r"
(
tmp
)
:
"+r"
(
pix1
),
"=r"
(
tmp
)
:
"r"
(
(
x86_reg
)
line_siz
e
),
"g"
(
h
-
2
)
:
"r"
(
strid
e
),
"g"
(
h
-
2
)
:
"%ecx"
);
:
"%ecx"
);
return
tmp
;
return
tmp
;
}
}
static
int
hf_noise16_mmx
(
uint8_t
*
pix1
,
int
line_siz
e
,
int
h
)
static
int
hf_noise16_mmx
(
uint8_t
*
pix1
,
ptrdiff_t
strid
e
,
int
h
)
{
{
int
tmp
;
int
tmp
;
uint8_t
*
pix
=
pix1
;
uint8_t
*
pix
=
pix1
;
...
@@ -399,23 +399,23 @@ static int hf_noise16_mmx(uint8_t *pix1, int line_size, int h)
...
@@ -399,23 +399,23 @@ static int hf_noise16_mmx(uint8_t *pix1, int line_size, int h)
"paddd %%mm6, %%mm0
\n
"
"paddd %%mm6, %%mm0
\n
"
"movd %%mm0, %1
\n
"
"movd %%mm0, %1
\n
"
:
"+r"
(
pix1
),
"=r"
(
tmp
)
:
"+r"
(
pix1
),
"=r"
(
tmp
)
:
"r"
(
(
x86_reg
)
line_siz
e
),
"g"
(
h
-
2
)
:
"r"
(
strid
e
),
"g"
(
h
-
2
)
:
"%ecx"
);
:
"%ecx"
);
return
tmp
+
hf_noise8_mmx
(
pix
+
8
,
line_siz
e
,
h
);
return
tmp
+
hf_noise8_mmx
(
pix
+
8
,
strid
e
,
h
);
}
}
static
int
nsse16_mmx
(
MpegEncContext
*
c
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
nsse16_mmx
(
MpegEncContext
*
c
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
score1
,
score2
;
int
score1
,
score2
;
if
(
c
)
if
(
c
)
score1
=
c
->
mecc
.
sse
[
0
](
c
,
pix1
,
pix2
,
line_siz
e
,
h
);
score1
=
c
->
mecc
.
sse
[
0
](
c
,
pix1
,
pix2
,
strid
e
,
h
);
else
else
score1
=
sse16_mmx
(
c
,
pix1
,
pix2
,
line_siz
e
,
h
);
score1
=
sse16_mmx
(
c
,
pix1
,
pix2
,
strid
e
,
h
);
score2
=
hf_noise16_mmx
(
pix1
,
line_siz
e
,
h
)
-
score2
=
hf_noise16_mmx
(
pix1
,
strid
e
,
h
)
-
hf_noise16_mmx
(
pix2
,
line_siz
e
,
h
);
hf_noise16_mmx
(
pix2
,
strid
e
,
h
);
if
(
c
)
if
(
c
)
return
score1
+
FFABS
(
score2
)
*
c
->
avctx
->
nsse_weight
;
return
score1
+
FFABS
(
score2
)
*
c
->
avctx
->
nsse_weight
;
...
@@ -424,11 +424,11 @@ static int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
...
@@ -424,11 +424,11 @@ static int nsse16_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
}
}
static
int
nsse8_mmx
(
MpegEncContext
*
c
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
nsse8_mmx
(
MpegEncContext
*
c
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
score1
=
sse8_mmx
(
c
,
pix1
,
pix2
,
line_siz
e
,
h
);
int
score1
=
sse8_mmx
(
c
,
pix1
,
pix2
,
strid
e
,
h
);
int
score2
=
hf_noise8_mmx
(
pix1
,
line_siz
e
,
h
)
-
int
score2
=
hf_noise8_mmx
(
pix1
,
strid
e
,
h
)
-
hf_noise8_mmx
(
pix2
,
line_siz
e
,
h
);
hf_noise8_mmx
(
pix2
,
strid
e
,
h
);
if
(
c
)
if
(
c
)
return
score1
+
FFABS
(
score2
)
*
c
->
avctx
->
nsse_weight
;
return
score1
+
FFABS
(
score2
)
*
c
->
avctx
->
nsse_weight
;
...
@@ -437,12 +437,12 @@ static int nsse8_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
...
@@ -437,12 +437,12 @@ static int nsse8_mmx(MpegEncContext *c, uint8_t *pix1, uint8_t *pix2,
}
}
static
int
vsad_intra16_mmx
(
MpegEncContext
*
v
,
uint8_t
*
pix
,
uint8_t
*
dummy
,
static
int
vsad_intra16_mmx
(
MpegEncContext
*
v
,
uint8_t
*
pix
,
uint8_t
*
dummy
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
tmp
;
int
tmp
;
assert
((((
int
)
pix
)
&
7
)
==
0
);
assert
((((
int
)
pix
)
&
7
)
==
0
);
assert
((
line_siz
e
&
7
)
==
0
);
assert
((
strid
e
&
7
)
==
0
);
#define SUM(in0, in1, out0, out1) \
#define SUM(in0, in1, out0, out1) \
"movq (%0), %%mm2\n" \
"movq (%0), %%mm2\n" \
...
@@ -493,7 +493,7 @@ static int vsad_intra16_mmx(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
...
@@ -493,7 +493,7 @@ static int vsad_intra16_mmx(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
"paddw %%mm6, %%mm0
\n
"
"paddw %%mm6, %%mm0
\n
"
"movd %%mm0, %1
\n
"
"movd %%mm0, %1
\n
"
:
"+r"
(
pix
),
"=r"
(
tmp
)
:
"+r"
(
pix
),
"=r"
(
tmp
)
:
"r"
(
(
x86_reg
)
line_siz
e
),
"m"
(
h
)
:
"r"
(
strid
e
),
"m"
(
h
)
:
"%ecx"
);
:
"%ecx"
);
return
tmp
&
0xFFFF
;
return
tmp
&
0xFFFF
;
...
@@ -501,12 +501,12 @@ static int vsad_intra16_mmx(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
...
@@ -501,12 +501,12 @@ static int vsad_intra16_mmx(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
#undef SUM
#undef SUM
static
int
vsad_intra16_mmxext
(
MpegEncContext
*
v
,
uint8_t
*
pix
,
uint8_t
*
dummy
,
static
int
vsad_intra16_mmxext
(
MpegEncContext
*
v
,
uint8_t
*
pix
,
uint8_t
*
dummy
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
tmp
;
int
tmp
;
assert
((((
int
)
pix
)
&
7
)
==
0
);
assert
((((
int
)
pix
)
&
7
)
==
0
);
assert
((
line_siz
e
&
7
)
==
0
);
assert
((
strid
e
&
7
)
==
0
);
#define SUM(in0, in1, out0, out1) \
#define SUM(in0, in1, out0, out1) \
"movq (%0), " #out0 "\n" \
"movq (%0), " #out0 "\n" \
...
@@ -536,7 +536,7 @@ static int vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
...
@@ -536,7 +536,7 @@ static int vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
"movd %%mm6, %1
\n
"
"movd %%mm6, %1
\n
"
:
"+r"
(
pix
),
"=r"
(
tmp
)
:
"+r"
(
pix
),
"=r"
(
tmp
)
:
"r"
(
(
x86_reg
)
line_siz
e
),
"m"
(
h
)
:
"r"
(
strid
e
),
"m"
(
h
)
:
"%ecx"
);
:
"%ecx"
);
return
tmp
;
return
tmp
;
...
@@ -544,13 +544,13 @@ static int vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
...
@@ -544,13 +544,13 @@ static int vsad_intra16_mmxext(MpegEncContext *v, uint8_t *pix, uint8_t *dummy,
#undef SUM
#undef SUM
static
int
vsad16_mmx
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
vsad16_mmx
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
tmp
;
int
tmp
;
assert
((((
int
)
pix1
)
&
7
)
==
0
);
assert
((((
int
)
pix1
)
&
7
)
==
0
);
assert
((((
int
)
pix2
)
&
7
)
==
0
);
assert
((((
int
)
pix2
)
&
7
)
==
0
);
assert
((
line_siz
e
&
7
)
==
0
);
assert
((
strid
e
&
7
)
==
0
);
#define SUM(in0, in1, out0, out1) \
#define SUM(in0, in1, out0, out1) \
"movq (%0), %%mm2\n" \
"movq (%0), %%mm2\n" \
...
@@ -617,7 +617,7 @@ static int vsad16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -617,7 +617,7 @@ static int vsad16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
"paddw %%mm6, %%mm0
\n
"
"paddw %%mm6, %%mm0
\n
"
"movd %%mm0, %2
\n
"
"movd %%mm0, %2
\n
"
:
"+r"
(
pix1
),
"+r"
(
pix2
),
"=r"
(
tmp
)
:
"+r"
(
pix1
),
"+r"
(
pix2
),
"=r"
(
tmp
)
:
"r"
(
(
x86_reg
)
line_siz
e
),
"m"
(
h
)
:
"r"
(
strid
e
),
"m"
(
h
)
:
"%ecx"
);
:
"%ecx"
);
return
tmp
&
0x7FFF
;
return
tmp
&
0x7FFF
;
...
@@ -625,13 +625,13 @@ static int vsad16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -625,13 +625,13 @@ static int vsad16_mmx(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
#undef SUM
#undef SUM
static
int
vsad16_mmxext
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
int
vsad16_mmxext
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
)
ptrdiff_t
strid
e
,
int
h
)
{
{
int
tmp
;
int
tmp
;
assert
((((
int
)
pix1
)
&
7
)
==
0
);
assert
((((
int
)
pix1
)
&
7
)
==
0
);
assert
((((
int
)
pix2
)
&
7
)
==
0
);
assert
((((
int
)
pix2
)
&
7
)
==
0
);
assert
((
line_siz
e
&
7
)
==
0
);
assert
((
strid
e
&
7
)
==
0
);
#define SUM(in0, in1, out0, out1) \
#define SUM(in0, in1, out0, out1) \
"movq (%0), " #out0 "\n" \
"movq (%0), " #out0 "\n" \
...
@@ -677,7 +677,7 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
...
@@ -677,7 +677,7 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
"movd %%mm6, %2
\n
"
"movd %%mm6, %2
\n
"
:
"+r"
(
pix1
),
"+r"
(
pix2
),
"=r"
(
tmp
)
:
"+r"
(
pix1
),
"+r"
(
pix2
),
"=r"
(
tmp
)
:
"r"
(
(
x86_reg
)
line_siz
e
),
"m"
(
h
)
:
"r"
(
strid
e
),
"m"
(
h
)
:
"%ecx"
);
:
"%ecx"
);
return
tmp
;
return
tmp
;
...
@@ -805,7 +805,8 @@ DECLARE_ASM_CONST(8, uint64_t, round_tab)[3] = {
...
@@ -805,7 +805,8 @@ DECLARE_ASM_CONST(8, uint64_t, round_tab)[3] = {
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bone
)
=
0x0101010101010101LL
;
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bone
)
=
0x0101010101010101LL
;
static
inline
void
sad8_1_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
static
inline
void
sad8_1_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
ptrdiff_t
stride
,
int
h
)
{
{
x86_reg
len
=
-
(
stride
*
h
);
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
__asm__
volatile
(
...
@@ -837,11 +838,11 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
...
@@ -837,11 +838,11 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
"add %3, %%"
REG_a
"
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk2
-
len
),
"r"
(
(
x86_reg
)
stride
));
:
"r"
(
blk1
-
len
),
"r"
(
blk2
-
len
),
"r"
(
stride
));
}
}
static
inline
void
sad8_1_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
static
inline
void
sad8_1_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
__asm__
volatile
(
__asm__
volatile
(
".p2align 4
\n\t
"
".p2align 4
\n\t
"
...
@@ -857,11 +858,11 @@ static inline void sad8_1_mmxext(uint8_t *blk1, uint8_t *blk2,
...
@@ -857,11 +858,11 @@ static inline void sad8_1_mmxext(uint8_t *blk1, uint8_t *blk2,
"sub $2, %0
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
(
(
x86_reg
)
stride
));
:
"r"
(
stride
));
}
}
static
int
sad16_sse2
(
MpegEncContext
*
v
,
uint8_t
*
blk2
,
uint8_t
*
blk1
,
static
int
sad16_sse2
(
MpegEncContext
*
v
,
uint8_t
*
blk2
,
uint8_t
*
blk1
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
int
ret
;
int
ret
;
__asm__
volatile
(
__asm__
volatile
(
...
@@ -882,12 +883,12 @@ static int sad16_sse2(MpegEncContext *v, uint8_t *blk2, uint8_t *blk1,
...
@@ -882,12 +883,12 @@ static int sad16_sse2(MpegEncContext *v, uint8_t *blk2, uint8_t *blk1,
"paddw %%xmm0, %%xmm2
\n\t
"
"paddw %%xmm0, %%xmm2
\n\t
"
"movd %%xmm2, %3
\n\t
"
"movd %%xmm2, %3
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
),
"=r"
(
ret
)
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
),
"=r"
(
ret
)
:
"r"
(
(
x86_reg
)
stride
));
:
"r"
(
stride
));
return
ret
;
return
ret
;
}
}
static
inline
void
sad8_x2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
static
inline
void
sad8_x2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
__asm__
volatile
(
__asm__
volatile
(
".p2align 4
\n\t
"
".p2align 4
\n\t
"
...
@@ -905,11 +906,11 @@ static inline void sad8_x2a_mmxext(uint8_t *blk1, uint8_t *blk2,
...
@@ -905,11 +906,11 @@ static inline void sad8_x2a_mmxext(uint8_t *blk1, uint8_t *blk2,
"sub $2, %0
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
(
(
x86_reg
)
stride
));
:
"r"
(
stride
));
}
}
static
inline
void
sad8_y2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
static
inline
void
sad8_y2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
__asm__
volatile
(
__asm__
volatile
(
"movq (%1), %%mm0
\n\t
"
"movq (%1), %%mm0
\n\t
"
...
@@ -930,11 +931,11 @@ static inline void sad8_y2a_mmxext(uint8_t *blk1, uint8_t *blk2,
...
@@ -930,11 +931,11 @@ static inline void sad8_y2a_mmxext(uint8_t *blk1, uint8_t *blk2,
"sub $2, %0
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
(
(
x86_reg
)
stride
));
:
"r"
(
stride
));
}
}
static
inline
void
sad8_4_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
static
inline
void
sad8_4_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
__asm__
volatile
(
__asm__
volatile
(
"movq "
MANGLE
(
bone
)
", %%mm5
\n\t
"
"movq "
MANGLE
(
bone
)
", %%mm5
\n\t
"
...
@@ -960,11 +961,11 @@ static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2,
...
@@ -960,11 +961,11 @@ static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2,
"sub $2, %0
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
(
(
x86_reg
)
stride
));
:
"r"
(
stride
));
}
}
static
inline
void
sad8_2_mmx
(
uint8_t
*
blk1a
,
uint8_t
*
blk1b
,
uint8_t
*
blk2
,
static
inline
void
sad8_2_mmx
(
uint8_t
*
blk1a
,
uint8_t
*
blk1b
,
uint8_t
*
blk2
,
in
t
stride
,
int
h
)
ptrdiff_
t
stride
,
int
h
)
{
{
x86_reg
len
=
-
(
stride
*
h
);
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
__asm__
volatile
(
...
@@ -999,10 +1000,11 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
...
@@ -999,10 +1000,11 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
" js 1b
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"+a"
(
len
)
:
"r"
(
blk1a
-
len
),
"r"
(
blk1b
-
len
),
"r"
(
blk2
-
len
),
:
"r"
(
blk1a
-
len
),
"r"
(
blk1b
-
len
),
"r"
(
blk2
-
len
),
"r"
(
(
x86_reg
)
stride
));
"r"
(
stride
));
}
}
static
inline
void
sad8_4_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
static
inline
void
sad8_4_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
ptrdiff_t
stride
,
int
h
)
{
{
x86_reg
len
=
-
(
stride
*
h
);
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
__asm__
volatile
(
...
@@ -1052,7 +1054,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
...
@@ -1052,7 +1054,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
" js 1b
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk1
-
len
+
stride
),
"r"
(
blk2
-
len
),
:
"r"
(
blk1
-
len
),
"r"
(
blk1
-
len
+
stride
),
"r"
(
blk2
-
len
),
"r"
(
(
x86_reg
)
stride
));
"r"
(
stride
));
}
}
static
inline
int
sum_mmx
(
void
)
static
inline
int
sum_mmx
(
void
)
...
@@ -1079,19 +1081,21 @@ static inline int sum_mmxext(void)
...
@@ -1079,19 +1081,21 @@ static inline int sum_mmxext(void)
return
ret
;
return
ret
;
}
}
static
inline
void
sad8_x2a_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
static
inline
void
sad8_x2a_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
ptrdiff_t
stride
,
int
h
)
{
{
sad8_2_mmx
(
blk1
,
blk1
+
1
,
blk2
,
stride
,
h
);
sad8_2_mmx
(
blk1
,
blk1
+
1
,
blk2
,
stride
,
h
);
}
}
static
inline
void
sad8_y2a_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
static
inline
void
sad8_y2a_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
ptrdiff_t
stride
,
int
h
)
{
{
sad8_2_mmx
(
blk1
,
blk1
+
stride
,
blk2
,
stride
,
h
);
sad8_2_mmx
(
blk1
,
blk1
+
stride
,
blk2
,
stride
,
h
);
}
}
#define PIX_SAD(suf) \
#define PIX_SAD(suf) \
static int sad8_ ## suf(MpegEncContext *v, uint8_t *blk2, \
static int sad8_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1,
int stride, int h)
\
uint8_t *blk1,
ptrdiff_t stride, int h)
\
{ \
{ \
assert(h == 8); \
assert(h == 8); \
__asm__ volatile ( \
__asm__ volatile ( \
...
@@ -1105,7 +1109,7 @@ static int sad8_ ## suf(MpegEncContext *v, uint8_t *blk2, \
...
@@ -1105,7 +1109,7 @@ static int sad8_ ## suf(MpegEncContext *v, uint8_t *blk2, \
} \
} \
\
\
static int sad8_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
static int sad8_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1,
int stride, int h)
\
uint8_t *blk1,
ptrdiff_t stride, int h)
\
{ \
{ \
assert(h == 8); \
assert(h == 8); \
__asm__ volatile ( \
__asm__ volatile ( \
...
@@ -1120,7 +1124,7 @@ static int sad8_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
...
@@ -1120,7 +1124,7 @@ static int sad8_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
} \
} \
\
\
static int sad8_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
static int sad8_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1,
int stride, int h)
\
uint8_t *blk1,
ptrdiff_t stride, int h)
\
{ \
{ \
assert(h == 8); \
assert(h == 8); \
__asm__ volatile ( \
__asm__ volatile ( \
...
@@ -1135,7 +1139,7 @@ static int sad8_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
...
@@ -1135,7 +1139,7 @@ static int sad8_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
} \
} \
\
\
static int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
static int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1,
int stride, int h)
\
uint8_t *blk1,
ptrdiff_t stride, int h)
\
{ \
{ \
assert(h == 8); \
assert(h == 8); \
__asm__ volatile ( \
__asm__ volatile ( \
...
@@ -1149,7 +1153,7 @@ static int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
...
@@ -1149,7 +1153,7 @@ static int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
} \
} \
\
\
static int sad16_ ## suf(MpegEncContext *v, uint8_t *blk2, \
static int sad16_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1,
int stride, int h)
\
uint8_t *blk1,
ptrdiff_t stride, int h)
\
{ \
{ \
__asm__ volatile ( \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm7, %%mm7 \n\t" \
...
@@ -1163,7 +1167,7 @@ static int sad16_ ## suf(MpegEncContext *v, uint8_t *blk2, \
...
@@ -1163,7 +1167,7 @@ static int sad16_ ## suf(MpegEncContext *v, uint8_t *blk2, \
} \
} \
\
\
static int sad16_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
static int sad16_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1,
int stride, int h)
\
uint8_t *blk1,
ptrdiff_t stride, int h)
\
{ \
{ \
__asm__ volatile ( \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm7, %%mm7 \n\t" \
...
@@ -1178,7 +1182,7 @@ static int sad16_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
...
@@ -1178,7 +1182,7 @@ static int sad16_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
} \
} \
\
\
static int sad16_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
static int sad16_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1,
int stride, int h)
\
uint8_t *blk1,
ptrdiff_t stride, int h)
\
{ \
{ \
__asm__ volatile ( \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm7, %%mm7 \n\t" \
...
@@ -1193,7 +1197,7 @@ static int sad16_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
...
@@ -1193,7 +1197,7 @@ static int sad16_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
} \
} \
\
\
static int sad16_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
static int sad16_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1,
int stride, int h)
\
uint8_t *blk1,
ptrdiff_t stride, int h)
\
{ \
{ \
__asm__ volatile ( \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm7, %%mm7 \n\t" \
...
@@ -1212,13 +1216,13 @@ PIX_SAD(mmxext)
...
@@ -1212,13 +1216,13 @@ PIX_SAD(mmxext)
#endif
/* HAVE_INLINE_ASM */
#endif
/* HAVE_INLINE_ASM */
int
ff_sse16_sse2
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
ff_sse16_sse2
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_siz
e
,
int
h
);
ptrdiff_t
strid
e
,
int
h
);
#define hadamard_func(cpu) \
#define hadamard_func(cpu) \
int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1, \
int ff_hadamard8_diff_ ## cpu(MpegEncContext *s, uint8_t *src1, \
uint8_t *src2,
in
t stride, int h); \
uint8_t *src2,
ptrdiff_
t stride, int h); \
int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1, \
int ff_hadamard8_diff16_ ## cpu(MpegEncContext *s, uint8_t *src1, \
uint8_t *src2,
in
t stride, int h);
uint8_t *src2,
ptrdiff_
t stride, int h);
hadamard_func
(
mmx
)
hadamard_func
(
mmx
)
hadamard_func
(
mmxext
)
hadamard_func
(
mmxext
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment