Commit 7f668cd2 authored by Roland Scheidegger's avatar Roland Scheidegger Committed by Michael Niedermayer

h264: use one table instead of several for cabac functions

The reason is this is easier for PIC code (in particular on darwin...).
Keep the old names as pointers (static in cabac_functions.h so gcc
knows these are just immediate offsets) so the c code can nicely stay the same
(alternatively could use offsets directly in the functions needing the
tables). This should produce the same code as before with non-pic and better
code (confirmed) with pic.

The assembly uses the new table but still won't work for PIC case.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent e52b9618
...@@ -31,6 +31,29 @@ ...@@ -31,6 +31,29 @@
#include "cabac.h" #include "cabac.h"
#include "cabac_functions.h" #include "cabac_functions.h"
uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63] = {
9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
static const uint8_t lps_range[64][4]= { static const uint8_t lps_range[64][4]= {
{128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205}, {128,176,208,240}, {128,167,197,227}, {128,158,187,216}, {123,150,178,205},
{116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166}, {116,142,169,195}, {111,135,160,185}, {105,128,152,175}, {100,122,144,166},
...@@ -50,8 +73,6 @@ static const uint8_t lps_range[64][4]= { ...@@ -50,8 +73,6 @@ static const uint8_t lps_range[64][4]= {
{ 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2}, { 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2},
}; };
uint8_t ff_h264_mlps_state[4*64];
uint8_t ff_h264_lps_range[4*2*64];
static uint8_t h264_lps_state[2*64]; static uint8_t h264_lps_state[2*64];
static uint8_t h264_mps_state[2*64]; static uint8_t h264_mps_state[2*64];
...@@ -77,27 +98,11 @@ static const uint8_t lps_state[64]= { ...@@ -77,27 +98,11 @@ static const uint8_t lps_state[64]= {
36,36,37,37,37,38,38,63, 36,36,37,37,37,38,38,63,
}; };
const uint8_t ff_h264_norm_shift[512]= { static const uint8_t last_coeff_flag_offset_8x8[63] = {
9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
}; };
/** /**
...@@ -158,6 +163,9 @@ void ff_init_cabac_states(CABACContext *c){ ...@@ -158,6 +163,9 @@ void ff_init_cabac_states(CABACContext *c){
ff_h264_mlps_state[128-2*i-2]= 0; ff_h264_mlps_state[128-2*i-2]= 0;
} }
} }
for(i=0; i< 63; i++){
ff_h264_last_coeff_flag_offset_8x8[i] = last_coeff_flag_offset_8x8[i];
}
} }
#ifdef TEST #ifdef TEST
......
...@@ -31,6 +31,11 @@ ...@@ -31,6 +31,11 @@
#include "put_bits.h" #include "put_bits.h"
#define H264_NORM_SHIFT_OFFSET 0
#define H264_LPS_RANGE_OFFSET 512
#define H264_MLPS_STATE_OFFSET 1024
#define H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET 1280
#define CABAC_BITS 16 #define CABAC_BITS 16
#define CABAC_MASK ((1<<CABAC_BITS)-1) #define CABAC_MASK ((1<<CABAC_BITS)-1)
......
...@@ -36,9 +36,11 @@ ...@@ -36,9 +36,11 @@
# include "x86/cabac.h" # include "x86/cabac.h"
#endif #endif
extern const uint8_t ff_h264_norm_shift[512]; extern uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63];
extern uint8_t ff_h264_mlps_state[4*64]; static uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET;
extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS static uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET;
static uint8_t * const ff_h264_mlps_state = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET;
static uint8_t * const ff_h264_last_coeff_flag_offset_8x8 = ff_h264_cabac_tables + H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET;
static void refill(CABACContext *c){ static void refill(CABACContext *c){
#if CABAC_BITS == 16 #if CABAC_BITS == 16
......
...@@ -1561,13 +1561,6 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, ...@@ -1561,13 +1561,6 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
return base_ctx[cat] + ctx; return base_ctx[cat] + ctx;
} }
DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};
static av_always_inline void static av_always_inline void
decode_cabac_residual_internal(H264Context *h, DCTELEM *block, decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
int cat, int n, const uint8_t *scantable, int cat, int n, const uint8_t *scantable,
...@@ -1670,7 +1663,7 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block, ...@@ -1670,7 +1663,7 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
last_coeff_ctx_base-significant_coeff_ctx_base); last_coeff_ctx_base-significant_coeff_ctx_base);
} }
#else #else
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); DECODE_SIGNIFICANCE( 63, sig_off[last], ff_h264_last_coeff_flag_offset_8x8[last] );
} else { } else {
if (is_dc && chroma422) { // dc 422 if (is_dc && chroma422) { // dc 422
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]);
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "libavcodec/cabac.h" #include "libavcodec/cabac.h"
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/x86_cpu.h" #include "libavutil/x86_cpu.h"
#include "libavutil/internal.h"
#include "config.h" #include "config.h"
#if HAVE_FAST_CMOV #if HAVE_FAST_CMOV
...@@ -51,16 +52,16 @@ ...@@ -51,16 +52,16 @@
"xor "tmp" , "ret" \n\t" "xor "tmp" , "ret" \n\t"
#endif /* HAVE_FAST_CMOV */ #endif /* HAVE_FAST_CMOV */
#define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end) \ #define BRANCHLESS_GET_CABAC(ret, statep, low, lowword, range, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off) \
"movzbl "statep" , "ret" \n\t"\ "movzbl "statep" , "ret" \n\t"\
"mov "range" , "tmp" \n\t"\ "mov "range" , "tmp" \n\t"\
"and $0xC0 , "range" \n\t"\ "and $0xC0 , "range" \n\t"\
"movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
"sub "range" , "tmp" \n\t"\ "sub "range" , "tmp" \n\t"\
BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \ BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp) \
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx \n\t"\
"shl %%cl , "range" \n\t"\ "shl %%cl , "range" \n\t"\
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp" \n\t"\
"shl %%cl , "low" \n\t"\ "shl %%cl , "low" \n\t"\
"mov "tmpbyte" , "statep" \n\t"\ "mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\ "test "lowword" , "lowword" \n\t"\
...@@ -73,7 +74,7 @@ ...@@ -73,7 +74,7 @@
"shr $15 , %%ecx \n\t"\ "shr $15 , %%ecx \n\t"\
"bswap "tmp" \n\t"\ "bswap "tmp" \n\t"\
"shr $15 , "tmp" \n\t"\ "shr $15 , "tmp" \n\t"\
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
"sub $0xFFFF , "tmp" \n\t"\ "sub $0xFFFF , "tmp" \n\t"\
"neg %%ecx \n\t"\ "neg %%ecx \n\t"\
"add $7 , %%ecx \n\t"\ "add $7 , %%ecx \n\t"\
...@@ -93,11 +94,14 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c, ...@@ -93,11 +94,14 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
__asm__ volatile( __asm__ volatile(
BRANCHLESS_GET_CABAC("%0", "(%4)", "%1", "%w1", BRANCHLESS_GET_CABAC("%0", "(%4)", "%1", "%w1",
"%2", "%3", "%b3", "%2", "%3", "%b3",
"%a6(%5)", "%a7(%5)") "%a6(%5)", "%a7(%5)", "%a8", "%a9", "%a10")
: "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp) : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
: "r"(state), "r"(c), : "r"(state), "r"(c),
"i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream)),
"i"(offsetof(CABACContext, bytestream_end)) "i"(offsetof(CABACContext, bytestream_end)),
"i"(H264_NORM_SHIFT_OFFSET),
"i"(H264_LPS_RANGE_OFFSET),
"i"(H264_MLPS_STATE_OFFSET)
: "%"REG_c, "memory" : "%"REG_c, "memory"
); );
return bit & 1; return bit & 1;
......
...@@ -45,12 +45,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, ...@@ -45,12 +45,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
int minusindex= 4-(intptr_t)index; int minusindex= 4-(intptr_t)index;
int bit; int bit;
x86_reg coeff_count; x86_reg coeff_count;
__asm__ volatile( __asm__ volatile(
"3: \n\t" "3: \n\t"
BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3",
"%5", "%k0", "%b0", "%5", "%k0", "%b0",
"%a11(%6)", "%a12(%6)") "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15")
"test $1, %4 \n\t" "test $1, %4 \n\t"
" jz 4f \n\t" " jz 4f \n\t"
...@@ -58,7 +59,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, ...@@ -58,7 +59,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3", BRANCHLESS_GET_CABAC("%4", "(%1)", "%3", "%w3",
"%5", "%k0", "%b0", "%5", "%k0", "%b0",
"%a11(%6)", "%a12(%6)") "%a11(%6)", "%a12(%6)", "%a13", "%a14", "%a15")
"sub %10, %1 \n\t" "sub %10, %1 \n\t"
"mov %2, %0 \n\t" "mov %2, %0 \n\t"
...@@ -86,7 +87,10 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, ...@@ -86,7 +87,10 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"+&r"(c->low), "=&r"(bit), "+&r"(c->range) "+&r"(c->low), "=&r"(bit), "+&r"(c->range)
: "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off), : "r"(c), "m"(minusstart), "m"(end), "m"(minusindex), "m"(last_off),
"i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream)),
"i"(offsetof(CABACContext, bytestream_end)) "i"(offsetof(CABACContext, bytestream_end)),
"i"(H264_NORM_SHIFT_OFFSET),
"i"(H264_LPS_RANGE_OFFSET),
"i"(H264_MLPS_STATE_OFFSET)
: "%"REG_c, "memory" : "%"REG_c, "memory"
); );
return coeff_count; return coeff_count;
...@@ -100,6 +104,7 @@ static int decode_significance_8x8_x86(CABACContext *c, ...@@ -100,6 +104,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
x86_reg coeff_count; x86_reg coeff_count;
x86_reg last=0; x86_reg last=0;
x86_reg state; x86_reg state;
__asm__ volatile( __asm__ volatile(
"mov %1, %6 \n\t" "mov %1, %6 \n\t"
"3: \n\t" "3: \n\t"
...@@ -110,18 +115,19 @@ static int decode_significance_8x8_x86(CABACContext *c, ...@@ -110,18 +115,19 @@ static int decode_significance_8x8_x86(CABACContext *c,
BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3",
"%5", "%k0", "%b0", "%5", "%k0", "%b0",
"%a12(%7)", "%a13(%7)") "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16")
"mov %1, %k6 \n\t" "mov %1, %k6 \n\t"
"test $1, %4 \n\t" "test $1, %4 \n\t"
" jz 4f \n\t" " jz 4f \n\t"
"movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%k6), %k6\n\t" "movzbl "MANGLE(ff_h264_cabac_tables)"+%a17(%k6), %k6\n\t"
"add %11, %6 \n\t" "add %11, %6 \n\t"
BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3", BRANCHLESS_GET_CABAC("%4", "(%6)", "%3", "%w3",
"%5", "%k0", "%b0", "%5", "%k0", "%b0",
"%a12(%7)", "%a13(%7)") "%a12(%7)", "%a13(%7)", "%a14", "%a15", "%a16")
"mov %2, %0 \n\t" "mov %2, %0 \n\t"
"mov %1, %k6 \n\t" "mov %1, %k6 \n\t"
...@@ -147,7 +153,11 @@ static int decode_significance_8x8_x86(CABACContext *c, ...@@ -147,7 +153,11 @@ static int decode_significance_8x8_x86(CABACContext *c,
: "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), : "r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base),
"m"(sig_off), "m"(last_coeff_ctx_base), "m"(sig_off), "m"(last_coeff_ctx_base),
"i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream)),
"i"(offsetof(CABACContext, bytestream_end)) "i"(offsetof(CABACContext, bytestream_end)),
"i"(H264_NORM_SHIFT_OFFSET),
"i"(H264_LPS_RANGE_OFFSET),
"i"(H264_MLPS_STATE_OFFSET),
"i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET)
: "%"REG_c, "memory" : "%"REG_c, "memory"
); );
return coeff_count; return coeff_count;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment