Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
59fe111e
Commit
59fe111e
authored
May 16, 2002
by
Michael Niedermayer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new hopefully faster MC
Originally committed as revision 505 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
02734c6a
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
344 additions
and
8 deletions
+344
-8
dsputil.c
libavcodec/dsputil.c
+344
-8
No files found.
libavcodec/dsputil.c
View file @
59fe111e
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
* along with this program; if not, write to the Free Software
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*
* gmc & q-pel
support
by Michael Niedermayer <michaelni@gmx.at>
* gmc & q-pel
& 32/64 bit based MC
by Michael Niedermayer <michaelni@gmx.at>
*/
*/
#include <stdlib.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdio.h>
...
@@ -252,6 +252,347 @@ void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
...
@@ -252,6 +252,347 @@ void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
}
}
}
}
//FIXME someone with a alignemtent picky cpu should change these
#define LD32(a) (*((uint32_t*)(a)))
#define LD64(a) (*((uint64_t*)(a)))
#if 0
#define PIXOP2(OPNAME, OP) \
void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
for(i=0; i<h; i++){\
OP(*((uint64_t*)block), LD64(pixels));\
pixels+=line_size;\
block +=line_size;\
}\
}\
\
void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
for(i=0; i<h; i++){\
const uint64_t a= LD64(pixels );\
const uint64_t b= LD64(pixels+1);\
OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
pixels+=line_size;\
block +=line_size;\
}\
}\
\
void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
for(i=0; i<h; i++){\
const uint64_t a= LD64(pixels );\
const uint64_t b= LD64(pixels+1);\
OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
pixels+=line_size;\
block +=line_size;\
}\
}\
\
void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
for(i=0; i<h; i++){\
const uint64_t a= LD64(pixels );\
const uint64_t b= LD64(pixels+line_size);\
OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
pixels+=line_size;\
block +=line_size;\
}\
}\
\
void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
for(i=0; i<h; i++){\
const uint64_t a= LD64(pixels );\
const uint64_t b= LD64(pixels+line_size);\
OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
pixels+=line_size;\
block +=line_size;\
}\
}\
\
void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
const uint64_t a= LD64(pixels );\
const uint64_t b= LD64(pixels+1);\
uint64_t l0= (a&0x0303030303030303ULL)\
+ (b&0x0303030303030303ULL)\
+ 0x0202020202020202ULL;\
uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
uint64_t l1,h1;\
\
pixels+=line_size;\
for(i=0; i<h; i+=2){\
uint64_t a= LD64(pixels );\
uint64_t b= LD64(pixels+1);\
l1= (a&0x0303030303030303ULL)\
+ (b&0x0303030303030303ULL);\
h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
pixels+=line_size;\
block +=line_size;\
a= LD64(pixels );\
b= LD64(pixels+1);\
l0= (a&0x0303030303030303ULL)\
+ (b&0x0303030303030303ULL)\
+ 0x0202020202020202ULL;\
h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
pixels+=line_size;\
block +=line_size;\
}\
}\
\
void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
const uint64_t a= LD64(pixels );\
const uint64_t b= LD64(pixels+1);\
uint64_t l0= (a&0x0303030303030303ULL)\
+ (b&0x0303030303030303ULL)\
+ 0x0101010101010101ULL;\
uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
uint64_t l1,h1;\
\
pixels+=line_size;\
for(i=0; i<h; i+=2){\
uint64_t a= LD64(pixels );\
uint64_t b= LD64(pixels+1);\
l1= (a&0x0303030303030303ULL)\
+ (b&0x0303030303030303ULL);\
h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
pixels+=line_size;\
block +=line_size;\
a= LD64(pixels );\
b= LD64(pixels+1);\
l0= (a&0x0303030303030303ULL)\
+ (b&0x0303030303030303ULL)\
+ 0x0101010101010101ULL;\
h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
pixels+=line_size;\
block +=line_size;\
}\
}\
\
void (*OPNAME ## _pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
OPNAME ## _pixels,\
OPNAME ## _pixels_x2,\
OPNAME ## _pixels_y2,\
OPNAME ## _pixels_xy2,\
};\
\
void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
OPNAME ## _pixels,\
OPNAME ## _no_rnd_pixels_x2,\
OPNAME ## _no_rnd_pixels_y2,\
OPNAME ## _no_rnd_pixels_xy2,\
};
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
#else
// 64 bit variant
#define PIXOP2(OPNAME, OP) \
void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
for(i=0; i<h; i++){\
OP(*((uint32_t*)(block )), LD32(pixels ));\
OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
pixels+=line_size;\
block +=line_size;\
}\
}\
\
void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
for(i=0; i<h; i++){\
int j;\
for(j=0; j<2; j++){\
const uint32_t a= LD32(pixels );\
const uint32_t b= LD32(pixels+1);\
OP(*((uint32_t*)block), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
pixels+=4;\
block +=4;\
}\
pixels+=line_size-8;\
block +=line_size-8;\
}\
}\
\
void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
for(i=0; i<h; i++){\
int j;\
for(j=0; j<2; j++){\
const uint32_t a= LD32(pixels );\
const uint32_t b= LD32(pixels+1);\
OP(*((uint32_t*)block), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
pixels+=4;\
block +=4;\
}\
pixels+=line_size-8;\
block +=line_size-8;\
}\
}\
\
void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
for(i=0; i<h; i++){\
int j;\
for(j=0; j<2; j++){\
const uint32_t a= LD32(pixels );\
const uint32_t b= LD32(pixels+line_size);\
OP(*((uint32_t*)block), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
pixels+=4;\
block +=4;\
}\
pixels+=line_size-8;\
block +=line_size-8;\
}\
}\
\
void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int i;\
for(i=0; i<h; i++){\
int j;\
for(j=0; j<2; j++){\
const uint32_t a= LD32(pixels );\
const uint32_t b= LD32(pixels+line_size);\
OP(*((uint32_t*)block), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
pixels+=4;\
block +=4;\
}\
pixels+=line_size-8;\
block +=line_size-8;\
}\
}\
\
void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int j;\
for(j=0; j<2; j++){\
int i;\
const uint32_t a= LD32(pixels );\
const uint32_t b= LD32(pixels+1);\
uint32_t l0= (a&0x03030303UL)\
+ (b&0x03030303UL)\
+ 0x02020202UL;\
uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+ ((b&0xFCFCFCFCUL)>>2);\
uint32_t l1,h1;\
\
pixels+=line_size;\
for(i=0; i<h; i+=2){\
uint32_t a= LD32(pixels );\
uint32_t b= LD32(pixels+1);\
l1= (a&0x03030303UL)\
+ (b&0x03030303UL);\
h1= ((a&0xFCFCFCFCUL)>>2)\
+ ((b&0xFCFCFCFCUL)>>2);\
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
pixels+=line_size;\
block +=line_size;\
a= LD32(pixels );\
b= LD32(pixels+1);\
l0= (a&0x03030303UL)\
+ (b&0x03030303UL)\
+ 0x02020202UL;\
h0= ((a&0xFCFCFCFCUL)>>2)\
+ ((b&0xFCFCFCFCUL)>>2);\
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
pixels+=line_size;\
block +=line_size;\
}\
pixels+=4-line_size*(h+1);\
block +=4-line_size*h;\
}\
}\
\
void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
{\
int j;\
for(j=0; j<2; j++){\
int i;\
const uint32_t a= LD32(pixels );\
const uint32_t b= LD32(pixels+1);\
uint32_t l0= (a&0x03030303UL)\
+ (b&0x03030303UL)\
+ 0x01010101UL;\
uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+ ((b&0xFCFCFCFCUL)>>2);\
uint32_t l1,h1;\
\
pixels+=line_size;\
for(i=0; i<h; i+=2){\
uint32_t a= LD32(pixels );\
uint32_t b= LD32(pixels+1);\
l1= (a&0x03030303UL)\
+ (b&0x03030303UL);\
h1= ((a&0xFCFCFCFCUL)>>2)\
+ ((b&0xFCFCFCFCUL)>>2);\
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
pixels+=line_size;\
block +=line_size;\
a= LD32(pixels );\
b= LD32(pixels+1);\
l0= (a&0x03030303UL)\
+ (b&0x03030303UL)\
+ 0x01010101UL;\
h0= ((a&0xFCFCFCFCUL)>>2)\
+ ((b&0xFCFCFCFCUL)>>2);\
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
pixels+=line_size;\
block +=line_size;\
}\
pixels+=4-line_size*(h+1);\
block +=4-line_size*h;\
}\
}\
\
void (*OPNAME ## _pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
OPNAME ## _pixels,\
OPNAME ## _pixels_x2,\
OPNAME ## _pixels_y2,\
OPNAME ## _pixels_xy2,\
};\
\
void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
OPNAME ## _pixels,\
OPNAME ## _no_rnd_pixels_x2,\
OPNAME ## _no_rnd_pixels_y2,\
OPNAME ## _no_rnd_pixels_xy2,\
};
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
#endif
#define op_put(a, b) a = b
PIXOP2
(
avg
,
op_avg
)
PIXOP2
(
put
,
op_put
)
#undef op_avg
#undef op_put
/* FIXME this stuff could be removed as its ot really used anymore */
#define PIXOP(BTYPE, OPNAME, OP, INCR) \
#define PIXOP(BTYPE, OPNAME, OP, INCR) \
\
\
static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
...
@@ -356,13 +697,9 @@ void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_siz
...
@@ -356,13 +697,9 @@ void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_siz
#define avg2(a,b) ((a+b+1)>>1)
#define avg2(a,b) ((a+b+1)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
#define op_put(a, b) a = b
#define op_avg(a, b) a = avg2(a, b)
#define op_avg(a, b) a = avg2(a, b)
#define op_sub(a, b) a -= b
#define op_sub(a, b) a -= b
PIXOP
(
UINT8
,
put
,
op_put
,
line_size
)
PIXOP
(
UINT8
,
avg
,
op_avg
,
line_size
)
PIXOP
(
DCTELEM
,
sub
,
op_sub
,
8
)
PIXOP
(
DCTELEM
,
sub
,
op_sub
,
8
)
/* not rounding primitives */
/* not rounding primitives */
...
@@ -371,9 +708,6 @@ PIXOP(DCTELEM, sub, op_sub, 8)
...
@@ -371,9 +708,6 @@ PIXOP(DCTELEM, sub, op_sub, 8)
#define avg2(a,b) ((a+b)>>1)
#define avg2(a,b) ((a+b)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
PIXOP
(
UINT8
,
put_no_rnd
,
op_put
,
line_size
)
PIXOP
(
UINT8
,
avg_no_rnd
,
op_avg
,
line_size
)
/* motion estimation */
/* motion estimation */
#undef avg2
#undef avg2
...
@@ -381,6 +715,8 @@ PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
...
@@ -381,6 +715,8 @@ PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
#define avg2(a,b) ((a+b+1)>>1)
#define avg2(a,b) ((a+b+1)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
/* end of removeale stuff */
static
void
gmc1_c
(
UINT8
*
dst
,
UINT8
*
src
,
int
srcStride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
static
void
gmc1_c
(
UINT8
*
dst
,
UINT8
*
src
,
int
srcStride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
{
{
const
int
A
=
(
16
-
x16
)
*
(
16
-
y16
);
const
int
A
=
(
16
-
x16
)
*
(
16
-
y16
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment