mem.c 8.27 KB
Newer Older
1
/*
2
 * default memory allocator for libavutil
3
 * Copyright (c) 2002 Fabrice Bellard
4
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

Michael Niedermayer's avatar
Michael Niedermayer committed
22
/**
23
 * @file
24
 * default memory allocator for libavutil
Michael Niedermayer's avatar
Michael Niedermayer committed
25
 */
26

27 28
#define _XOPEN_SOURCE 600

29
#include "config.h"
30

31
#include <limits.h>
32
#include <stdint.h>
33
#include <stdlib.h>
34
#include <string.h>
35
#if HAVE_MALLOC_H
36 37 38
#include <malloc.h>
#endif

39
#include "avutil.h"
40
#include "intreadwrite.h"
41 42
#include "mem.h"

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
#ifdef MALLOC_PREFIX

#define malloc         AV_JOIN(MALLOC_PREFIX, malloc)
#define memalign       AV_JOIN(MALLOC_PREFIX, memalign)
#define posix_memalign AV_JOIN(MALLOC_PREFIX, posix_memalign)
#define realloc        AV_JOIN(MALLOC_PREFIX, realloc)
#define free           AV_JOIN(MALLOC_PREFIX, free)

void *malloc(size_t size);
void *memalign(size_t align, size_t size);
int   posix_memalign(void **ptr, size_t align, size_t size);
void *realloc(void *ptr, size_t size);
void  free(void *ptr);

#endif /* MALLOC_PREFIX */

59 60
#define ALIGN (HAVE_AVX ? 32 : 16)

61 62 63 64
/* NOTE: if you want to override these functions with your own
 * implementations (not recommended) you have to link libav* as
 * dynamic libraries and remove -Wl,-Bsymbolic from the linker flags.
 * Note that this will cost performance. */
65

66 67 68 69 70
static size_t max_alloc_size= INT_MAX;

void av_max_alloc(size_t max){
    max_alloc_size = max;
}
71

72
void *av_malloc(size_t size)
73
{
74
    void *ptr = NULL;
75
#if CONFIG_MEMALIGN_HACK
76
    long diff;
77
#endif
78

79
    /* let's disallow possible ambiguous cases */
80
    if (size > (max_alloc_size - 32))
81
        return NULL;
82

83
#if CONFIG_MEMALIGN_HACK
84
    ptr = malloc(size + ALIGN);
85
    if (!ptr)
86
        return ptr;
87
    diff              = ((~(long)ptr)&(ALIGN - 1)) + 1;
88 89
    ptr               = (char *)ptr + diff;
    ((char *)ptr)[-1] = diff;
90
#elif HAVE_POSIX_MEMALIGN
91
    if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation
92
    if (posix_memalign(&ptr, ALIGN, size))
93
        ptr = NULL;
94
#elif HAVE_ALIGNED_MALLOC
95
    ptr = _aligned_malloc(size, ALIGN);
96
#elif HAVE_MEMALIGN
97
    ptr = memalign(ALIGN, size);
98
    /* Why 64?
99 100 101 102 103 104 105
     * Indeed, we should align it:
     *   on  4 for 386
     *   on 16 for 486
     *   on 32 for 586, PPro - K6-III
     *   on 64 for K7 (maybe for P3 too).
     * Because L1 and L2 caches are aligned on those values.
     * But I don't want to code such logic here!
106
     */
107 108 109
    /* Why 32?
     * For AVX ASM. SSE / NEON needs only 16.
     * Why not larger? Because I did not see a difference in benchmarks ...
Michael Niedermayer's avatar
Michael Niedermayer committed
110
     */
111 112 113 114 115 116 117 118 119 120
    /* benchmarks with P3
     * memalign(64) + 1          3071, 3051, 3032
     * memalign(64) + 2          3051, 3032, 3041
     * memalign(64) + 4          2911, 2896, 2915
     * memalign(64) + 8          2545, 2554, 2550
     * memalign(64) + 16         2543, 2572, 2563
     * memalign(64) + 32         2546, 2545, 2571
     * memalign(64) + 64         2570, 2533, 2558
     *
     * BTW, malloc seems to do 8-byte alignment by default here.
Michael Niedermayer's avatar
Michael Niedermayer committed
121
     */
122 123 124
#else
    ptr = malloc(size);
#endif
125 126
    if(!ptr && !size) {
        size = 1;
127
        ptr= av_malloc(1);
128 129 130 131 132
    }
#if CONFIG_MEMORY_POISONING
    if (ptr)
        memset(ptr, 0x2a, size);
#endif
133 134 135
    return ptr;
}

136
void *av_realloc(void *ptr, size_t size)
137
{
138
#if CONFIG_MEMALIGN_HACK
139 140
    int diff;
#endif
141

142
    /* let's disallow possible ambiguous cases */
143
    if (size > (max_alloc_size - 32))
144 145
        return NULL;

146
#if CONFIG_MEMALIGN_HACK
147
    //FIXME this isn't aligned correctly, though it probably isn't needed
148 149 150
    if (!ptr)
        return av_malloc(size);
    diff = ((char *)ptr)[-1];
151 152 153
    ptr = realloc((char *)ptr - diff, size + diff);
    if (ptr)
        ptr = (char *)ptr + diff;
154
    return ptr;
155
#elif HAVE_ALIGNED_MALLOC
156
    return _aligned_realloc(ptr, size + !size, ALIGN);
157
#else
158
    return realloc(ptr, size + !size);
159
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
160 161
}

Nicolas George's avatar
Nicolas George committed
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
void *av_realloc_f(void *ptr, size_t nelem, size_t elsize)
{
    size_t size;
    void *r;

    if (av_size_mult(elsize, nelem, &size)) {
        av_free(ptr);
        return NULL;
    }
    r = av_realloc(ptr, size);
    if (!r && size)
        av_free(ptr);
    return r;
}

177 178
void av_free(void *ptr)
{
179
#if CONFIG_MEMALIGN_HACK
180
    if (ptr)
181
        free((char *)ptr - ((char *)ptr)[-1]);
182 183
#elif HAVE_ALIGNED_MALLOC
    _aligned_free(ptr);
184
#else
185
    free(ptr);
186
#endif
187 188
}

189 190
void av_freep(void *arg)
{
191
    void **ptr = (void **)arg;
192 193 194 195
    av_free(*ptr);
    *ptr = NULL;
}

196
void *av_mallocz(size_t size)
197
{
198
    void *ptr = av_malloc(size);
199 200 201 202 203
    if (ptr)
        memset(ptr, 0, size);
    return ptr;
}

Laurent Aimar's avatar
Laurent Aimar committed
204 205 206 207 208 209 210
void *av_calloc(size_t nmemb, size_t size)
{
    if (size <= 0 || nmemb >= INT_MAX / size)
        return NULL;
    return av_mallocz(nmemb * size);
}

211 212
char *av_strdup(const char *s)
{
213 214
    char *ptr = NULL;
    if (s) {
Michael Niedermayer's avatar
Michael Niedermayer committed
215 216 217 218
        int len = strlen(s) + 1;
        ptr = av_malloc(len);
        if (ptr)
            memcpy(ptr, s, len);
219
    }
220 221 222
    return ptr;
}

223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
/* add one element to a dynamic array */
void av_dynarray_add(void *tab_ptr, int *nb_ptr, void *elem)
{
    /* see similar ffmpeg.c:grow_array() */
    int nb, nb_alloc;
    intptr_t *tab;

    nb = *nb_ptr;
    tab = *(intptr_t**)tab_ptr;
    if ((nb & (nb - 1)) == 0) {
        if (nb == 0)
            nb_alloc = 1;
        else
            nb_alloc = nb * 2;
        tab = av_realloc(tab, nb_alloc * sizeof(intptr_t));
        *(intptr_t**)tab_ptr = tab;
    }
    tab[nb++] = (intptr_t)elem;
    *nb_ptr = nb;
}
243

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
static void fill16(uint8_t *dst, int len)
{
    uint32_t v = AV_RN16(dst - 2);

    v |= v << 16;

    while (len >= 4) {
        AV_WN32(dst, v);
        dst += 4;
        len -= 4;
    }

    while (len--) {
        *dst = dst[-2];
        dst++;
    }
}

static void fill24(uint8_t *dst, int len)
{
#if HAVE_BIGENDIAN
    uint32_t v = AV_RB24(dst - 3);
    uint32_t a = v << 8  | v >> 16;
    uint32_t b = v << 16 | v >> 8;
    uint32_t c = v << 24 | v;
#else
    uint32_t v = AV_RL24(dst - 3);
    uint32_t a = v       | v << 24;
    uint32_t b = v >> 8  | v << 16;
    uint32_t c = v >> 16 | v << 8;
#endif

    while (len >= 12) {
        AV_WN32(dst,     a);
        AV_WN32(dst + 4, b);
        AV_WN32(dst + 8, c);
        dst += 12;
        len -= 12;
    }

    if (len >= 4) {
        AV_WN32(dst, a);
        dst += 4;
        len -= 4;
    }

    if (len >= 4) {
        AV_WN32(dst, b);
        dst += 4;
        len -= 4;
    }

    while (len--) {
        *dst = dst[-3];
        dst++;
    }
}

static void fill32(uint8_t *dst, int len)
{
    uint32_t v = AV_RN32(dst - 4);

    while (len >= 4) {
        AV_WN32(dst, v);
        dst += 4;
        len -= 4;
    }

    while (len--) {
        *dst = dst[-4];
        dst++;
    }
}

318 319 320
void av_memcpy_backptr(uint8_t *dst, int back, int cnt)
{
    const uint8_t *src = &dst[-back];
321
    if (back <= 1) {
322
        memset(dst, *src, cnt);
323 324 325 326 327 328
    } else if (back == 2) {
        fill16(dst, cnt);
    } else if (back == 3) {
        fill24(dst, cnt);
    } else if (back == 4) {
        fill32(dst, cnt);
329
    } else {
330
        if (cnt >= 16) {
331 332 333 334 335 336 337 338
            int blocklen = back;
            while (cnt > blocklen) {
                memcpy(dst, src, blocklen);
                dst       += blocklen;
                cnt       -= blocklen;
                blocklen <<= 1;
            }
            memcpy(dst, src, cnt);
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
            return;
        }
        if (cnt >= 8) {
            AV_COPY32U(dst,     src);
            AV_COPY32U(dst + 4, src + 4);
            src += 8;
            dst += 8;
            cnt -= 8;
        }
        if (cnt >= 4) {
            AV_COPY32U(dst, src);
            src += 4;
            dst += 4;
            cnt -= 4;
        }
        if (cnt >= 2) {
            AV_COPY16U(dst, src);
            src += 2;
            dst += 2;
            cnt -= 2;
359
        }
360 361
        if (cnt)
            *dst = *src;
362 363
    }
}
364