cabac.h 23.9 KB
Newer Older
Michael Niedermayer's avatar
Michael Niedermayer committed
1 2 3 4
/*
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
5 6 7
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
Michael Niedermayer's avatar
Michael Niedermayer committed
8 9
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
Michael Niedermayer's avatar
Michael Niedermayer committed
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
Michael Niedermayer's avatar
Michael Niedermayer committed
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Michael Niedermayer's avatar
Michael Niedermayer committed
20
 */
21

Michael Niedermayer's avatar
Michael Niedermayer committed
22
/**
23
 * @file libavcodec/cabac.h
Michael Niedermayer's avatar
Michael Niedermayer committed
24 25 26
 * Context Adaptive Binary Arithmetic Coder.
 */

27 28
#ifndef AVCODEC_CABAC_H
#define AVCODEC_CABAC_H
29

30
#include "put_bits.h"
Michael Niedermayer's avatar
Michael Niedermayer committed
31

32
//#undef NDEBUG
Michael Niedermayer's avatar
Michael Niedermayer committed
33
#include <assert.h>
34
#include "libavutil/x86_cpu.h"
Michael Niedermayer's avatar
Michael Niedermayer committed
35

36
#define CABAC_BITS 16
Michael Niedermayer's avatar
Michael Niedermayer committed
37
#define CABAC_MASK ((1<<CABAC_BITS)-1)
38
#define BRANCHLESS_CABAC_DECODER 1
39
//#define ARCH_X86_DISABLED 1
Michael Niedermayer's avatar
Michael Niedermayer committed
40

Michael Niedermayer's avatar
Michael Niedermayer committed
41 42 43 44 45 46 47
typedef struct CABACContext{
    int low;
    int range;
    int outstanding_count;
#ifdef STRICT_LIMITS
    int symCount;
#endif
48 49
    const uint8_t *bytestream_start;
    const uint8_t *bytestream;
Michael Niedermayer's avatar
Michael Niedermayer committed
50
    const uint8_t *bytestream_end;
Michael Niedermayer's avatar
Michael Niedermayer committed
51 52 53
    PutBitContext pb;
}CABACContext;

54
extern uint8_t ff_h264_mlps_state[4*64];
55
extern uint8_t ff_h264_lps_range[4*2*64];  ///< rangeTabLPS
56 57
extern uint8_t ff_h264_mps_state[2*64];     ///< transIdxMPS
extern uint8_t ff_h264_lps_state[2*64];     ///< transIdxLPS
58
extern const uint8_t ff_h264_norm_shift[512];
Michael Niedermayer's avatar
Michael Niedermayer committed
59

Michael Niedermayer's avatar
Michael Niedermayer committed
60 61

void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
62
void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
63
void ff_init_cabac_states(CABACContext *c);
Michael Niedermayer's avatar
Michael Niedermayer committed
64 65 66


static inline void put_cabac_bit(CABACContext *c, int b){
67 68
    put_bits(&c->pb, 1, b);
    for(;c->outstanding_count; c->outstanding_count--){
Michael Niedermayer's avatar
Michael Niedermayer committed
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
        put_bits(&c->pb, 1, 1-b);
    }
}

static inline void renorm_cabac_encoder(CABACContext *c){
    while(c->range < 0x100){
        //FIXME optimize
        if(c->low<0x100){
            put_cabac_bit(c, 0);
        }else if(c->low<0x200){
            c->outstanding_count++;
            c->low -= 0x100;
        }else{
            put_cabac_bit(c, 1);
            c->low -= 0x200;
        }
85

Michael Niedermayer's avatar
Michael Niedermayer committed
86 87 88 89 90
        c->range+= c->range;
        c->low += c->low;
    }
}

91
#ifdef TEST
92
static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
93
    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
94

Michael Niedermayer's avatar
Michael Niedermayer committed
95 96
    if(bit == ((*state)&1)){
        c->range -= RangeLPS;
97
        *state= ff_h264_mps_state[*state];
Michael Niedermayer's avatar
Michael Niedermayer committed
98 99 100
    }else{
        c->low += c->range - RangeLPS;
        c->range = RangeLPS;
101
        *state= ff_h264_lps_state[*state];
Michael Niedermayer's avatar
Michael Niedermayer committed
102
    }
103

Michael Niedermayer's avatar
Michael Niedermayer committed
104 105 106 107 108 109 110
    renorm_cabac_encoder(c);

#ifdef STRICT_LIMITS
    c->symCount++;
#endif
}

111
static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
Michael Niedermayer's avatar
Michael Niedermayer committed
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
    assert(c->range > RangeLPS);

    if(!bit){
        c->range -= RangeLPS;
    }else{
        c->low += c->range - RangeLPS;
        c->range = RangeLPS;
    }

    renorm_cabac_encoder(c);

#ifdef STRICT_LIMITS
    c->symCount++;
#endif
}

128 129 130
/**
 * @param bit 0 -> write zero bit, !=0 write one bit
 */
131
static void put_cabac_bypass(CABACContext *c, int bit){
Michael Niedermayer's avatar
Michael Niedermayer committed
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
    c->low += c->low;

    if(bit){
        c->low += c->range;
    }
//FIXME optimize
    if(c->low<0x200){
        put_cabac_bit(c, 0);
    }else if(c->low<0x400){
        c->outstanding_count++;
        c->low -= 0x200;
    }else{
        put_cabac_bit(c, 1);
        c->low -= 0x400;
    }
147

Michael Niedermayer's avatar
Michael Niedermayer committed
148 149 150 151 152
#ifdef STRICT_LIMITS
    c->symCount++;
#endif
}

153 154 155 156
/**
 *
 * @return the number of bytes written
 */
157
static int put_cabac_terminate(CABACContext *c, int bit){
Michael Niedermayer's avatar
Michael Niedermayer committed
158 159 160 161 162 163 164
    c->range -= 2;

    if(!bit){
        renorm_cabac_encoder(c);
    }else{
        c->low += c->range;
        c->range= 2;
165

Michael Niedermayer's avatar
Michael Niedermayer committed
166 167 168 169 170
        renorm_cabac_encoder(c);

        assert(c->low <= 0x1FF);
        put_cabac_bit(c, c->low>>9);
        put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
171

Michael Niedermayer's avatar
Michael Niedermayer committed
172 173
        flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
    }
174

Michael Niedermayer's avatar
Michael Niedermayer committed
175 176 177
#ifdef STRICT_LIMITS
    c->symCount++;
#endif
178

179
    return (put_bits_count(&c->pb)+7)>>3;
Michael Niedermayer's avatar
Michael Niedermayer committed
180 181
}

182 183 184
/**
 * put (truncated) unary binarization.
 */
185
static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
186
    int i;
187

188
    assert(v <= max);
189

190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
#if 1
    for(i=0; i<v; i++){
        put_cabac(c, state, 1);
        if(i < max_index) state++;
    }
    if(truncated==0 || v<max)
        put_cabac(c, state, 0);
#else
    if(v <= max_index){
        for(i=0; i<v; i++){
            put_cabac(c, state+i, 1);
        }
        if(truncated==0 || v<max)
            put_cabac(c, state+i, 0);
    }else{
        for(i=0; i<=max_index; i++){
            put_cabac(c, state+i, 1);
        }
        for(; i<v; i++){
            put_cabac(c, state+max_index, 1);
        }
        if(truncated==0 || v<max)
            put_cabac(c, state+max_index, 0);
    }
#endif
}

/**
 * put unary exp golomb k-th order binarization.
 */
220
static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
221
    int i;
222

223 224 225
    if(v==0)
        put_cabac(c, state, 0);
    else{
Michael Niedermayer's avatar
Michael Niedermayer committed
226
        const int sign= v < 0;
227

228
        if(is_signed) v= FFABS(v);
229

230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
        if(v<max){
            for(i=0; i<v; i++){
                put_cabac(c, state, 1);
                if(i < max_index) state++;
            }

            put_cabac(c, state, 0);
        }else{
            int m= 1<<k;

            for(i=0; i<max; i++){
                put_cabac(c, state, 1);
                if(i < max_index) state++;
            }

            v -= max;
            while(v >= m){ //FIXME optimize
                put_cabac_bypass(c, 1);
                v-= m;
                m+= m;
            }
            put_cabac_bypass(c, 0);
            while(m>>=1){
                put_cabac_bypass(c, v&m);
            }
        }

        if(is_signed)
            put_cabac_bypass(c, sign);
    }
}
261
#endif /* TEST */
262

Michael Niedermayer's avatar
Michael Niedermayer committed
263 264
static void refill(CABACContext *c){
#if CABAC_BITS == 16
Michael Niedermayer's avatar
Michael Niedermayer committed
265
        c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
Michael Niedermayer's avatar
Michael Niedermayer committed
266 267 268 269 270 271 272
#else
        c->low+= c->bytestream[0]<<1;
#endif
    c->low -= CABAC_MASK;
    c->bytestream+= CABAC_BITS/8;
}

273
#if ! ( ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) )
Michael Niedermayer's avatar
Michael Niedermayer committed
274 275 276 277
static void refill2(CABACContext *c){
    int i, x;

    x= c->low ^ (c->low-1);
278
    i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
Michael Niedermayer's avatar
Michael Niedermayer committed
279 280

    x= -CABAC_MASK;
281

Michael Niedermayer's avatar
Michael Niedermayer committed
282 283 284 285 286
#if CABAC_BITS == 16
        x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
#else
        x+= c->bytestream[0]<<1;
#endif
287

Michael Niedermayer's avatar
Michael Niedermayer committed
288 289 290
    c->low += x<<i;
    c->bytestream+= CABAC_BITS/8;
}
291
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
292

Michael Niedermayer's avatar
Michael Niedermayer committed
293
static inline void renorm_cabac_decoder(CABACContext *c){
294
    while(c->range < 0x100){
Michael Niedermayer's avatar
Michael Niedermayer committed
295 296
        c->range+= c->range;
        c->low+= c->low;
Michael Niedermayer's avatar
Michael Niedermayer committed
297 298
        if(!(c->low & CABAC_MASK))
            refill(c);
Michael Niedermayer's avatar
Michael Niedermayer committed
299 300 301
    }
}

Michael Niedermayer's avatar
Michael Niedermayer committed
302
static inline void renorm_cabac_decoder_once(CABACContext *c){
303
#ifdef ARCH_X86_DISABLED
304 305
    int temp;
#if 0
306
    //P3:683    athlon:475
307
    __asm__(
308
        "lea -0x100(%0), %2         \n\t"
309 310 311 312 313 314
        "shr $31, %2                \n\t"  //FIXME 31->63 for x86-64
        "shl %%cl, %0               \n\t"
        "shl %%cl, %1               \n\t"
        : "+r"(c->range), "+r"(c->low), "+c"(temp)
    );
#elif 0
315
    //P3:680    athlon:474
316
    __asm__(
317
        "cmp $0x100, %0             \n\t"
318 319 320 321 322 323 324
        "setb %%cl                  \n\t"  //FIXME 31->63 for x86-64
        "shl %%cl, %0               \n\t"
        "shl %%cl, %1               \n\t"
        : "+r"(c->range), "+r"(c->low), "+c"(temp)
    );
#elif 1
    int temp2;
325
    //P3:665    athlon:517
326
    __asm__(
327
        "lea -0x100(%0), %%eax      \n\t"
328
        "cltd                       \n\t"
329 330 331 332 333 334 335 336 337
        "mov %0, %%eax              \n\t"
        "and %%edx, %0              \n\t"
        "and %1, %%edx              \n\t"
        "add %%eax, %0              \n\t"
        "add %%edx, %1              \n\t"
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
    );
#elif 0
    int temp2;
338
    //P3:673    athlon:509
339
    __asm__(
340
        "cmp $0x100, %0             \n\t"
341 342 343 344 345 346 347 348 349 350
        "sbb %%edx, %%edx           \n\t"
        "mov %0, %%eax              \n\t"
        "and %%edx, %0              \n\t"
        "and %1, %%edx              \n\t"
        "add %%eax, %0              \n\t"
        "add %%edx, %1              \n\t"
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
    );
#else
    int temp2;
351
    //P3:677    athlon:511
352
    __asm__(
353
        "cmp $0x100, %0             \n\t"
354 355 356 357 358 359 360 361
        "lea (%0, %0), %%eax        \n\t"
        "lea (%1, %1), %%edx        \n\t"
        "cmovb %%eax, %0            \n\t"
        "cmovb %%edx, %1            \n\t"
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
    );
#endif
#else
362
    //P3:675    athlon:476
363
    int shift= (uint32_t)(c->range - 0x100)>>31;
364 365
    c->range<<= shift;
    c->low  <<= shift;
366
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
367 368 369 370
    if(!(c->low & CABAC_MASK))
        refill(c);
}

371
static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){
372
    //FIXME gcc generates duplicate load/stores for c->low and c->range
373 374
#define LOW          "0"
#define RANGE        "4"
375
#if ARCH_X86_64
376 377 378 379
#define BYTESTART   "16"
#define BYTE        "24"
#define BYTEEND     "32"
#else
380 381 382
#define BYTESTART   "12"
#define BYTE        "16"
#define BYTEEND     "20"
383
#endif
384
#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
385 386
    int bit;

387
#ifndef BRANCHLESS_CABAC_DECODER
388
    __asm__ volatile(
389
        "movzbl (%1), %0                        \n\t"
390 391
        "movl "RANGE    "(%2), %%ebx            \n\t"
        "movl "RANGE    "(%2), %%edx            \n\t"
392
        "andl $0xC0, %%ebx                      \n\t"
393
        "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
394 395 396
        "movl "LOW      "(%2), %%ebx            \n\t"
//eax:state ebx:low, edx:range, esi:RangeLPS
        "subl %%esi, %%edx                      \n\t"
397 398 399
        "movl %%edx, %%ecx                      \n\t"
        "shll $17, %%ecx                        \n\t"
        "cmpl %%ecx, %%ebx                      \n\t"
400
        " ja 1f                                 \n\t"
401 402 403

#if 1
        //athlon:4067 P3:4110
404
        "lea -0x100(%%edx), %%ecx               \n\t"
405 406 407 408 409
        "shr $31, %%ecx                         \n\t"
        "shl %%cl, %%edx                        \n\t"
        "shl %%cl, %%ebx                        \n\t"
#else
        //athlon:4057 P3:4130
410
        "cmp $0x100, %%edx                      \n\t" //FIXME avoidable
411 412 413
        "setb %%cl                              \n\t"
        "shl %%cl, %%edx                        \n\t"
        "shl %%cl, %%ebx                        \n\t"
414
#endif
415
        "movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx   \n\t"
416 417 418 419
        "movb %%cl, (%1)                        \n\t"
//eax:state ebx:low, edx:range, esi:RangeLPS
        "test %%bx, %%bx                        \n\t"
        " jnz 2f                                \n\t"
420
        "mov  "BYTE     "(%2), %%"REG_S"        \n\t"
421
        "subl $0xFFFF, %%ebx                    \n\t"
422
        "movzwl (%%"REG_S"), %%ecx              \n\t"
423 424
        "bswap %%ecx                            \n\t"
        "shrl $15, %%ecx                        \n\t"
425
        "add  $2, %%"REG_S"                     \n\t"
426
        "addl %%ecx, %%ebx                      \n\t"
427
        "mov  %%"REG_S", "BYTE    "(%2)         \n\t"
428 429 430
        "jmp 2f                                 \n\t"
        "1:                                     \n\t"
//eax:state ebx:low, edx:range, esi:RangeLPS
431
        "subl %%ecx, %%ebx                      \n\t"
432
        "movl %%esi, %%edx                      \n\t"
433
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx   \n\t"
434 435
        "shll %%cl, %%ebx                       \n\t"
        "shll %%cl, %%edx                       \n\t"
436
        "movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx   \n\t"
437
        "movb %%cl, (%1)                        \n\t"
438
        "add  $1, %0                            \n\t"
439 440 441
        "test %%bx, %%bx                        \n\t"
        " jnz 2f                                \n\t"

442 443
        "mov  "BYTE     "(%2), %%"REG_c"        \n\t"
        "movzwl (%%"REG_c"), %%esi              \n\t"
444 445 446
        "bswap %%esi                            \n\t"
        "shrl $15, %%esi                        \n\t"
        "subl $0xFFFF, %%esi                    \n\t"
447 448
        "add  $2, %%"REG_c"                     \n\t"
        "mov  %%"REG_c", "BYTE    "(%2)         \n\t"
449 450 451

        "leal -1(%%ebx), %%ecx                  \n\t"
        "xorl %%ebx, %%ecx                      \n\t"
452
        "shrl $15, %%ecx                        \n\t"
453
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx   \n\t"
454 455
        "neg %%ecx                              \n\t"
        "add $7, %%ecx                          \n\t"
456 457 458 459 460 461

        "shll %%cl , %%esi                      \n\t"
        "addl %%esi, %%ebx                      \n\t"
        "2:                                     \n\t"
        "movl %%edx, "RANGE    "(%2)            \n\t"
        "movl %%ebx, "LOW      "(%2)            \n\t"
462
        :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or miscompiles it (for example if "+a"(bit) or "+m"(*state) is used
463
        :"r"(state), "r"(c)
464
        : "%"REG_c, "%ebx", "%edx", "%"REG_S, "memory"
465
    );
466
    bit&=1;
467
#else /* BRANCHLESS_CABAC_DECODER */
468 469


470
#if HAVE_FAST_CMOV
471 472 473 474 475 476 477 478 479
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
        "mov    "tmp"       , %%ecx                                     \n\t"\
        "shl    $17         , "tmp"                                     \n\t"\
        "cmp    "low"       , "tmp"                                     \n\t"\
        "cmova  %%ecx       , "range"                                   \n\t"\
        "sbb    %%ecx       , %%ecx                                     \n\t"\
        "and    %%ecx       , "tmp"                                     \n\t"\
        "sub    "tmp"       , "low"                                     \n\t"\
        "xor    %%ecx       , "ret"                                     \n\t"
480
#else /* HAVE_FAST_CMOV */
481 482 483 484 485 486 487 488 489 490 491 492
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
        "mov    "tmp"       , %%ecx                                     \n\t"\
        "shl    $17         , "tmp"                                     \n\t"\
        "sub    "low"       , "tmp"                                     \n\t"\
        "sar    $31         , "tmp"                                     \n\t" /*lps_mask*/\
        "sub    %%ecx       , "range"                                   \n\t" /*RangeLPS - range*/\
        "and    "tmp"       , "range"                                   \n\t" /*(RangeLPS - range)&lps_mask*/\
        "add    %%ecx       , "range"                                   \n\t" /*new range*/\
        "shl    $17         , %%ecx                                     \n\t"\
        "and    "tmp"       , %%ecx                                     \n\t"\
        "sub    %%ecx       , "low"                                     \n\t"\
        "xor    "tmp"       , "ret"                                     \n\t"
493
#endif /* HAVE_FAST_CMOV */
494 495


496 497 498 499 500 501 502 503 504 505 506 507 508 509
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
        "movzbl "statep"    , "ret"                                     \n\t"\
        "mov    "range"     , "tmp"                                     \n\t"\
        "and    $0xC0       , "range"                                   \n\t"\
        "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
        "sub    "range"     , "tmp"                                     \n\t"\
        BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
        "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
        "shl    %%cl        , "range"                                   \n\t"\
        "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
        "mov    "tmpbyte"   , "statep"                                  \n\t"\
        "shl    %%cl        , "low"                                     \n\t"\
        "test   "lowword"   , "lowword"                                 \n\t"\
        " jnz   1f                                                      \n\t"\
510 511
        "mov "BYTE"("cabac"), %%"REG_c"                                 \n\t"\
        "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
512 513 514
        "bswap  "tmp"                                                   \n\t"\
        "shr    $15         , "tmp"                                     \n\t"\
        "sub    $0xFFFF     , "tmp"                                     \n\t"\
515 516
        "add    $2          , %%"REG_c"                                 \n\t"\
        "mov    %%"REG_c"   , "BYTE    "("cabac")                       \n\t"\
517 518 519 520 521 522 523 524 525
        "lea    -1("low")   , %%ecx                                     \n\t"\
        "xor    "low"       , %%ecx                                     \n\t"\
        "shr    $15         , %%ecx                                     \n\t"\
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\
        "neg    %%ecx                                                   \n\t"\
        "add    $7          , %%ecx                                     \n\t"\
        "shl    %%cl        , "tmp"                                     \n\t"\
        "add    "tmp"       , "low"                                     \n\t"\
        "1:                                                             \n\t"
526

527
    __asm__ volatile(
528 529 530
        "movl "RANGE    "(%2), %%esi            \n\t"
        "movl "LOW      "(%2), %%ebx            \n\t"
        BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
531
        "movl %%esi, "RANGE    "(%2)            \n\t"
532
        "movl %%ebx, "LOW      "(%2)            \n\t"
533 534 535

        :"=&a"(bit)
        :"r"(state), "r"(c)
536
        : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
537
    );
538
    bit&=1;
539
#endif /* BRANCHLESS_CABAC_DECODER */
540
#else /* ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) */
541
    int s = *state;
542
    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
543
    int bit, lps_mask av_unused;
544

Michael Niedermayer's avatar
Michael Niedermayer committed
545
    c->range -= RangeLPS;
546
#ifndef BRANCHLESS_CABAC_DECODER
547
    if(c->low < (c->range<<(CABAC_BITS+1))){
548
        bit= s&1;
549
        *state= ff_h264_mps_state[s];
Michael Niedermayer's avatar
Michael Niedermayer committed
550
        renorm_cabac_decoder_once(c);
Michael Niedermayer's avatar
Michael Niedermayer committed
551
    }else{
552
        bit= ff_h264_norm_shift[RangeLPS];
553
        c->low -= (c->range<<(CABAC_BITS+1));
554
        *state= ff_h264_lps_state[s];
555 556 557 558
        c->range = RangeLPS<<bit;
        c->low <<= bit;
        bit= (s&1)^1;

559
        if(!(c->low & CABAC_MASK)){
Michael Niedermayer's avatar
Michael Niedermayer committed
560
            refill2(c);
561
        }
Michael Niedermayer's avatar
Michael Niedermayer committed
562
    }
563
#else /* BRANCHLESS_CABAC_DECODER */
564
    lps_mask= ((c->range<<(CABAC_BITS+1)) - c->low)>>31;
565

566
    c->low -= (c->range<<(CABAC_BITS+1)) & lps_mask;
Michael Niedermayer's avatar
Michael Niedermayer committed
567
    c->range += (RangeLPS - c->range) & lps_mask;
568

569
    s^=lps_mask;
570
    *state= (ff_h264_mlps_state+128)[s];
571
    bit= s&1;
572

573
    lps_mask= ff_h264_norm_shift[c->range];
Michael Niedermayer's avatar
Michael Niedermayer committed
574 575 576 577
    c->range<<= lps_mask;
    c->low  <<= lps_mask;
    if(!(c->low & CABAC_MASK))
        refill2(c);
578
#endif /* BRANCHLESS_CABAC_DECODER */
579
#endif /* ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) */
580
    return bit;
Michael Niedermayer's avatar
Michael Niedermayer committed
581 582
}

583
static int av_noinline av_unused get_cabac_noinline(CABACContext *c, uint8_t * const state){
584 585 586
    return get_cabac_inline(c,state);
}

587
static int av_unused get_cabac(CABACContext *c, uint8_t * const state){
588 589 590
    return get_cabac_inline(c,state);
}

591
static int av_unused get_cabac_bypass(CABACContext *c){
592 593
#if 0 //not faster
    int bit;
594
    __asm__ volatile(
595 596 597 598 599
        "movl "RANGE    "(%1), %%ebx            \n\t"
        "movl "LOW      "(%1), %%eax            \n\t"
        "shl $17, %%ebx                         \n\t"
        "add %%eax, %%eax                       \n\t"
        "sub %%ebx, %%eax                       \n\t"
600
        "cltd                                   \n\t"
601 602 603 604
        "and %%edx, %%ebx                       \n\t"
        "add %%ebx, %%eax                       \n\t"
        "test %%ax, %%ax                        \n\t"
        " jnz 1f                                \n\t"
605
        "movl "BYTE     "(%1), %%"REG_b"        \n\t"
606
        "subl $0xFFFF, %%eax                    \n\t"
607
        "movzwl (%%"REG_b"), %%ecx              \n\t"
608 609
        "bswap %%ecx                            \n\t"
        "shrl $15, %%ecx                        \n\t"
610
        "addl $2, %%"REG_b"                     \n\t"
611
        "addl %%ecx, %%eax                      \n\t"
612
        "movl %%"REG_b", "BYTE     "(%1)        \n\t"
613 614 615 616 617
        "1:                                     \n\t"
        "movl %%eax, "LOW      "(%1)            \n\t"

        :"=&d"(bit)
        :"r"(c)
618
        : "%eax", "%"REG_b, "%ecx", "memory"
619 620 621
    );
    return bit+1;
#else
622
    int range;
Michael Niedermayer's avatar
Michael Niedermayer committed
623 624
    c->low += c->low;

Michael Niedermayer's avatar
Michael Niedermayer committed
625 626
    if(!(c->low & CABAC_MASK))
        refill(c);
627

628
    range= c->range<<(CABAC_BITS+1);
629
    if(c->low < range){
Michael Niedermayer's avatar
Michael Niedermayer committed
630 631
        return 0;
    }else{
632
        c->low -= range;
Michael Niedermayer's avatar
Michael Niedermayer committed
633 634
        return 1;
    }
635 636 637 638
#endif
}


639
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
640
#if ARCH_X86 && HAVE_EBX_AVAILABLE
641
    __asm__ volatile(
642 643 644 645 646
        "movl "RANGE    "(%1), %%ebx            \n\t"
        "movl "LOW      "(%1), %%eax            \n\t"
        "shl $17, %%ebx                         \n\t"
        "add %%eax, %%eax                       \n\t"
        "sub %%ebx, %%eax                       \n\t"
647
        "cltd                                   \n\t"
648 649 650 651 652 653
        "and %%edx, %%ebx                       \n\t"
        "add %%ebx, %%eax                       \n\t"
        "xor %%edx, %%ecx                       \n\t"
        "sub %%edx, %%ecx                       \n\t"
        "test %%ax, %%ax                        \n\t"
        " jnz 1f                                \n\t"
654
        "mov  "BYTE     "(%1), %%"REG_b"        \n\t"
655
        "subl $0xFFFF, %%eax                    \n\t"
656
        "movzwl (%%"REG_b"), %%edx              \n\t"
657 658
        "bswap %%edx                            \n\t"
        "shrl $15, %%edx                        \n\t"
659
        "add  $2, %%"REG_b"                     \n\t"
660
        "addl %%edx, %%eax                      \n\t"
661
        "mov  %%"REG_b", "BYTE     "(%1)        \n\t"
662 663 664 665 666
        "1:                                     \n\t"
        "movl %%eax, "LOW      "(%1)            \n\t"

        :"+c"(val)
        :"r"(c)
667
        : "%eax", "%"REG_b, "%edx", "memory"
668 669 670 671 672 673 674 675 676
    );
    return val;
#else
    int range, mask;
    c->low += c->low;

    if(!(c->low & CABAC_MASK))
        refill(c);

677
    range= c->range<<(CABAC_BITS+1);
678 679 680 681 682 683
    c->low -= range;
    mask= c->low >> 31;
    range &= mask;
    c->low += range;
    return (val^mask)-mask;
#endif
Michael Niedermayer's avatar
Michael Niedermayer committed
684
}
685

686 687 688 689
/**
 *
 * @return the number of bytes read or 0 if no end
 */
690
static int av_unused get_cabac_terminate(CABACContext *c){
691
    c->range -= 2;
692
    if(c->low < c->range<<(CABAC_BITS+1)){
Michael Niedermayer's avatar
Michael Niedermayer committed
693
        renorm_cabac_decoder_once(c);
Michael Niedermayer's avatar
Michael Niedermayer committed
694 695
        return 0;
    }else{
696
        return c->bytestream - c->bytestream_start;
697
    }
Michael Niedermayer's avatar
Michael Niedermayer committed
698 699
}

700
#if 0
701
/**
Diego Biurrun's avatar
Diego Biurrun committed
702
 * Get (truncated) unary binarization.
703
 */
704
static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
705
    int i;
706 707

    for(i=0; i<max; i++){
708 709
        if(get_cabac(c, state)==0)
            return i;
710

711 712 713 714 715 716 717 718 719
        if(i< max_index) state++;
    }

    return truncated ? max : -1;
}

/**
 * get unary exp golomb k-th order binarization.
 */
720
static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
721 722
    int i, v;
    int m= 1<<k;
723 724

    if(get_cabac(c, state)==0)
725
        return 0;
726

727
    if(0 < max_index) state++;
728 729

    for(i=1; i<max; i++){
730 731 732 733 734 735 736 737 738
        if(get_cabac(c, state)==0){
            if(is_signed && get_cabac_bypass(c)){
                return -i;
            }else
                return i;
        }

        if(i < max_index) state++;
    }
739

740 741 742 743
    while(get_cabac_bypass(c)){
        i+= m;
        m+= m;
    }
744

745 746 747 748 749 750 751 752 753 754 755
    v=0;
    while(m>>=1){
        v+= v + get_cabac_bypass(c);
    }
    i += v;

    if(is_signed && get_cabac_bypass(c)){
        return -i;
    }else
        return i;
}
756
#endif /* 0 */
757

758
#endif /* AVCODEC_CABAC_H */