Commit 36157009 authored by Nick Kurshev's avatar Nick Kurshev

Aligned malloc. Another 10% of speedup.


Originally committed as revision 111 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent a74127c0
......@@ -20,6 +20,11 @@
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <limits.h> /* __GLIBC__ and __GLIBC_MINOR__ are defined here */
#if __GLIBC__ >=2 && __GLIBC_MINOR__ >= 1 /* Fixme about glibc-2.0 */
#define HAVE_MEMALIGN 1
#include <malloc.h>
#endif
#include "common.h"
#include "dsputil.h"
#include "avcodec.h"
......@@ -28,7 +33,33 @@
void *av_mallocz(int size)
{
void *ptr;
#if defined ( ARCH_X86 ) && defined ( HAVE_MEMALIGN )
/*
From glibc-2.1.x manuals:
-------------------------
The address of a block returned by `malloc' or `realloc' in the GNU
system is always a multiple of eight (or sixteen on 64-bit systems).
If you need a block whose address is a multiple of a higher power of
two than that, use `memalign' or `valloc'. These functions are
declared in `stdlib.h'.
With the GNU library, you can use `free' to free the blocks that
`memalign' and `valloc' return. That does not work in BSD,
however--BSD does not provide any way to free such blocks.
*/
ptr = memalign(64,size);
/* Why 64?
Indeed, we should align it:
on 4 for 386
on 16 for 486
on 32 for 586, PPro - k6-III
on 64 for K7 (maybe for P3 too).
Because L1 and L2 caches are aligned on those values.
But I don't want to code such logic here!
*/
#else
ptr = malloc(size);
#endif
if (!ptr)
return NULL;
memset(ptr, 0, size);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment