cabac.h 13 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef AVCODEC_X86_CABAC_H
#define AVCODEC_X86_CABAC_H

#include "libavcodec/cabac.h"
#include "libavutil/attributes.h"
26
#include "libavutil/macros.h"
27
#include "libavutil/x86/asm.h"
28 29
#include "config.h"

30 31
#if HAVE_INLINE_ASM

32 33 34 35 36 37 38 39 40 41 42 43 44
#ifdef BROKEN_RELOCATIONS
#define TABLES_ARG , "r"(tables)

#if HAVE_FAST_CMOV
#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
        "cmp    "low"       , "tmp"                        \n\t"\
        "cmova  %%ecx       , "range"                      \n\t"\
        "sbb    %%rcx       , %%rcx                        \n\t"\
        "and    %%ecx       , "tmp"                        \n\t"\
        "xor    %%rcx       , "retq"                       \n\t"\
        "sub    "tmp"       , "low"                        \n\t"
#else /* HAVE_FAST_CMOV */
#define BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp) \
45
/* P4 Prescott has crappy cmov,sbb,64-bit shift so avoid them */ \
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
        "sub    "low"       , "tmp"                        \n\t"\
        "sar    $31         , "tmp"                        \n\t"\
        "sub    %%ecx       , "range"                      \n\t"\
        "and    "tmp"       , "range"                      \n\t"\
        "add    %%ecx       , "range"                      \n\t"\
        "shl    $17         , %%ecx                        \n\t"\
        "and    "tmp"       , %%ecx                        \n\t"\
        "sub    %%ecx       , "low"                        \n\t"\
        "xor    "tmp"       , "ret"                        \n\t"\
        "movslq "ret"       , "retq"                       \n\t"
#endif /* HAVE_FAST_CMOV */

#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
        "movzbl "statep"    , "ret"                                     \n\t"\
        "mov    "range"     , "tmp"                                     \n\t"\
        "and    $0xC0       , "range"                                   \n\t"\
        "lea    ("ret", "range", 2), %%ecx                              \n\t"\
        "movzbl "lps_off"("tables", %%rcx), "range"                     \n\t"\
        "sub    "range"     , "tmp"                                     \n\t"\
        "mov    "tmp"       , %%ecx                                     \n\t"\
        "shl    $17         , "tmp"                                     \n\t"\
        BRANCHLESS_GET_CABAC_UPDATE(ret, retq, low, range, tmp)              \
        "movzbl "norm_off"("tables", "rangeq"), %%ecx                   \n\t"\
        "shl    %%cl        , "range"                                   \n\t"\
        "movzbl "mlps_off"+128("tables", "retq"), "tmp"                 \n\t"\
        "shl    %%cl        , "low"                                     \n\t"\
        "mov    "tmpbyte"   , "statep"                                  \n\t"\
        "test   "lowword"   , "lowword"                                 \n\t"\
        "jnz    2f                                                      \n\t"\
75 76
        "mov    "byte"      , %%"FF_REG_c"                              \n\t"\
        "cmp    "end"       , %%"FF_REG_c"                              \n\t"\
77
        "jge    1f                                                      \n\t"\
78
        "add"FF_OPSIZE" $2  , "byte"                                    \n\t"\
79
        "1:                                                             \n\t"\
80
        "movzwl (%%"FF_REG_c"), "tmp"                                   \n\t"\
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
        "lea    -1("low")   , %%ecx                                     \n\t"\
        "xor    "low"       , %%ecx                                     \n\t"\
        "shr    $15         , %%ecx                                     \n\t"\
        "bswap  "tmp"                                                   \n\t"\
        "shr    $15         , "tmp"                                     \n\t"\
        "movzbl "norm_off"("tables", %%rcx), %%ecx                      \n\t"\
        "sub    $0xFFFF     , "tmp"                                     \n\t"\
        "neg    %%ecx                                                   \n\t"\
        "add    $7          , %%ecx                                     \n\t"\
        "shl    %%cl        , "tmp"                                     \n\t"\
        "add    "tmp"       , "low"                                     \n\t"\
        "2:                                                             \n\t"

#else /* BROKEN_RELOCATIONS */
#define TABLES_ARG

97
#if HAVE_FAST_CMOV
98
#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
99 100 101 102 103 104
        "mov    "tmp"       , %%ecx     \n\t"\
        "shl    $17         , "tmp"     \n\t"\
        "cmp    "low"       , "tmp"     \n\t"\
        "cmova  %%ecx       , "range"   \n\t"\
        "sbb    %%ecx       , %%ecx     \n\t"\
        "and    %%ecx       , "tmp"     \n\t"\
105 106
        "xor    %%ecx       , "ret"     \n\t"\
        "sub    "tmp"       , "low"     \n\t"
107
#else /* HAVE_FAST_CMOV */
108
#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
109 110 111 112 113 114 115 116 117 118 119 120 121
        "mov    "tmp"       , %%ecx     \n\t"\
        "shl    $17         , "tmp"     \n\t"\
        "sub    "low"       , "tmp"     \n\t"\
        "sar    $31         , "tmp"     \n\t" /*lps_mask*/\
        "sub    %%ecx       , "range"   \n\t" /*RangeLPS - range*/\
        "and    "tmp"       , "range"   \n\t" /*(RangeLPS - range)&lps_mask*/\
        "add    %%ecx       , "range"   \n\t" /*new range*/\
        "shl    $17         , %%ecx     \n\t"\
        "and    "tmp"       , %%ecx     \n\t"\
        "sub    %%ecx       , "low"     \n\t"\
        "xor    "tmp"       , "ret"     \n\t"
#endif /* HAVE_FAST_CMOV */

122
#define BRANCHLESS_GET_CABAC(ret, retq, statep, low, lowword, range, rangeq, tmp, tmpbyte, byte, end, norm_off, lps_off, mlps_off, tables) \
123 124 125
        "movzbl "statep"    , "ret"                                     \n\t"\
        "mov    "range"     , "tmp"                                     \n\t"\
        "and    $0xC0       , "range"                                   \n\t"\
126
        "movzbl "MANGLE(ff_h264_cabac_tables)"+"lps_off"("ret", "range", 2), "range" \n\t"\
127
        "sub    "range"     , "tmp"                                     \n\t"\
128
        BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)                    \
129
        "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"("range"), %%ecx    \n\t"\
130
        "shl    %%cl        , "range"                                   \n\t"\
131
        "movzbl "MANGLE(ff_h264_cabac_tables)"+"mlps_off"+128("ret"), "tmp"  \n\t"\
132
        "shl    %%cl        , "low"                                     \n\t"\
133
        "mov    "tmpbyte"   , "statep"                                  \n\t"\
134
        "test   "lowword"   , "lowword"                                 \n\t"\
135
        " jnz   2f                                                      \n\t"\
136 137
        "mov    "byte"      , %%"FF_REG_c"                              \n\t"\
        "cmp    "end"       , %%"FF_REG_c"                              \n\t"\
138
        "jge    1f                                                      \n\t"\
139
        "add"FF_OPSIZE" $2  , "byte"                                    \n\t"\
140
        "1:                                                             \n\t"\
141
        "movzwl (%%"FF_REG_c") , "tmp"                                  \n\t"\
142 143 144
        "lea    -1("low")   , %%ecx                                     \n\t"\
        "xor    "low"       , %%ecx                                     \n\t"\
        "shr    $15         , %%ecx                                     \n\t"\
145 146
        "bswap  "tmp"                                                   \n\t"\
        "shr    $15         , "tmp"                                     \n\t"\
147
        "movzbl "MANGLE(ff_h264_cabac_tables)"+"norm_off"(%%ecx), %%ecx \n\t"\
148
        "sub    $0xFFFF     , "tmp"                                     \n\t"\
149 150 151 152
        "neg    %%ecx                                                   \n\t"\
        "add    $7          , %%ecx                                     \n\t"\
        "shl    %%cl        , "tmp"                                     \n\t"\
        "add    "tmp"       , "low"                                     \n\t"\
153
        "2:                                                             \n\t"
154

155 156 157 158
#endif /* BROKEN_RELOCATIONS */


#if HAVE_7REGS
159 160 161 162
#define get_cabac_inline get_cabac_inline_x86
static av_always_inline int get_cabac_inline_x86(CABACContext *c,
                                                 uint8_t *const state)
{
163
    int bit, tmp;
164 165 166 167 168 169 170 171
#ifdef BROKEN_RELOCATIONS
    void *tables;

    __asm__ volatile(
        "lea    "MANGLE(ff_h264_cabac_tables)", %0      \n\t"
        : "=&r"(tables)
    );
#endif
172 173

    __asm__ volatile(
174 175
        BRANCHLESS_GET_CABAC("%0", "%q0", "(%4)", "%1", "%w1",
                             "%2", "%q2", "%3", "%b3",
176 177 178 179 180
                             "%c6(%5)", "%c7(%5)",
                             AV_STRINGIFY(H264_NORM_SHIFT_OFFSET),
                             AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
                             AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
                             "%8")
181 182
        : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
        : "r"(state), "r"(c),
183
          "i"(offsetof(CABACContext, bytestream)),
184 185
          "i"(offsetof(CABACContext, bytestream_end))
          TABLES_ARG
186
        : "%"FF_REG_c, "memory"
187 188 189
    );
    return bit & 1;
}
190
#endif /* HAVE_7REGS */
191 192 193 194 195 196

#define get_cabac_bypass_sign get_cabac_bypass_sign_x86
static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
{
    x86_reg tmp;
    __asm__ volatile(
197 198
        "movl        %c6(%2), %k1       \n\t"
        "movl        %c3(%2), %%eax     \n\t"
199 200 201 202 203 204 205 206 207 208
        "shl             $17, %k1       \n\t"
        "add           %%eax, %%eax     \n\t"
        "sub             %k1, %%eax     \n\t"
        "cltd                           \n\t"
        "and           %%edx, %k1       \n\t"
        "add             %k1, %%eax     \n\t"
        "xor           %%edx, %%ecx     \n\t"
        "sub           %%edx, %%ecx     \n\t"
        "test           %%ax, %%ax      \n\t"
        "jnz              1f            \n\t"
209
        "mov         %c4(%2), %1        \n\t"
210 211 212 213 214
        "subl        $0xFFFF, %%eax     \n\t"
        "movzwl         (%1), %%edx     \n\t"
        "bswap         %%edx            \n\t"
        "shrl            $15, %%edx     \n\t"
        "addl          %%edx, %%eax     \n\t"
215
        "cmp         %c5(%2), %1        \n\t"
216
        "jge              1f            \n\t"
217
        "add"FF_OPSIZE"   $2, %c4(%2)   \n\t"
218
        "1:                             \n\t"
219
        "movl          %%eax, %c3(%2)   \n\t"
220

221 222 223 224
        : "+c"(val), "=&r"(tmp)
        : "r"(c),
          "i"(offsetof(CABACContext, low)),
          "i"(offsetof(CABACContext, bytestream)),
225
          "i"(offsetof(CABACContext, bytestream_end)),
226 227
          "i"(offsetof(CABACContext, range))
        : "%eax", "%edx", "memory"
228 229 230 231
    );
    return val;
}

232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
#define get_cabac_bypass get_cabac_bypass_x86
static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
{
    x86_reg tmp;
    int res;
    __asm__ volatile(
        "movl        %c6(%2), %k1       \n\t"
        "movl        %c3(%2), %%eax     \n\t"
        "shl             $17, %k1       \n\t"
        "add           %%eax, %%eax     \n\t"
        "sub             %k1, %%eax     \n\t"
        "cltd                           \n\t"
        "and           %%edx, %k1       \n\t"
        "add             %k1, %%eax     \n\t"
        "inc           %%edx            \n\t"
        "test           %%ax, %%ax      \n\t"
        "jnz              1f            \n\t"
        "mov         %c4(%2), %1        \n\t"
        "subl        $0xFFFF, %%eax     \n\t"
        "movzwl         (%1), %%ecx     \n\t"
        "bswap         %%ecx            \n\t"
        "shrl            $15, %%ecx     \n\t"
        "addl          %%ecx, %%eax     \n\t"
        "cmp         %c5(%2), %1        \n\t"
        "jge              1f            \n\t"
257
        "add"FF_OPSIZE"   $2, %c4(%2)   \n\t"
258 259 260 261 262 263 264 265 266 267 268 269 270 271
        "1:                             \n\t"
        "movl          %%eax, %c3(%2)   \n\t"

        : "=&d"(res), "=&r"(tmp)
        : "r"(c),
          "i"(offsetof(CABACContext, low)),
          "i"(offsetof(CABACContext, bytestream)),
          "i"(offsetof(CABACContext, bytestream_end)),
          "i"(offsetof(CABACContext, range))
        : "%eax", "%ecx", "memory"
    );
    return res;
}

272
#endif /* HAVE_INLINE_ASM */
273
#endif /* AVCODEC_X86_CABAC_H */