Commit 8f2b21a8 authored by Michael Niedermayer's avatar Michael Niedermayer

fixing memalign

Originally committed as revision 949 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 5596c60c
...@@ -29,8 +29,8 @@ ...@@ -29,8 +29,8 @@
void *av_malloc(int size) void *av_malloc(int size)
{ {
void *ptr; void *ptr;
#if defined ( ARCH_X86 ) && defined ( HAVE_MEMALIGN ) && 0 #if defined (HAVE_MEMALIGN)
ptr = memalign(64,size); ptr = memalign(16,size);
/* Why 64? /* Why 64?
Indeed, we should align it: Indeed, we should align it:
on 4 for 386 on 4 for 386
...@@ -40,11 +40,29 @@ void *av_malloc(int size) ...@@ -40,11 +40,29 @@ void *av_malloc(int size)
Because L1 and L2 caches are aligned on those values. Because L1 and L2 caches are aligned on those values.
But I don't want to code such logic here! But I don't want to code such logic here!
*/ */
/* Why 16?
because some cpus need alignment, for example SSE2 on P4, & most RISC cpus
it will just trigger an exception and the unaligned load will be done in the
exception handler or it will just segfault (SSE2 on P4)
Why not larger? because i didnt see a difference in benchmarks ...
*/
/* benchmarks with p3
memalign(64)+1 3071,3051,3032
memalign(64)+2 3051,3032,3041
memalign(64)+4 2911,2896,2915
memalign(64)+8 2545,2554,2550
memalign(64)+16 2543,2572,2563
memalign(64)+32 2546,2545,2571
memalign(64)+64 2570,2533,2558
btw, malloc seems to do 8 byte alignment by default here
*/
#else #else
ptr = malloc(size); ptr = malloc(size);
#endif #endif
if (!ptr) if (!ptr)
return NULL; return NULL;
//fprintf(stderr, "%X %d\n", (int)ptr, size);
/* NOTE: this memset should not be present */ /* NOTE: this memset should not be present */
memset(ptr, 0, size); memset(ptr, 0, size);
return ptr; return ptr;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment