dsputil_align.c 14.3 KB
Newer Older
1
/*
2
 * aligned/packed access motion
3 4 5
 *
 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
 *
6 7 8
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
9 10
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14 15 16 17 18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 22 23
 */


24 25
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
26 27


28
#define         LP(p)           *(uint32_t*)(p)
29 30


31 32 33 34
#define         UNPACK(ph,pl,tt0,tt1) do { \
        uint32_t t0,t1; t0=tt0;t1=tt1; \
        ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \
        pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0)
35

36 37
#define         rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03))
#define         no_rnd_PACK(ph,pl,nph,npl)      ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03))
38 39

/* little endian */
40 41
#define         MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) )
#define         MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) )
42
/* big
43 44
#define         MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) )
#define         MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) )
45 46 47
*/


48 49
#define         put(d,s)        d = s
#define         avg(d,s)        d = rnd_avg32(s,d)
50

51 52 53 54 55 56 57
#define         OP_C4(ofs) \
        ref-=ofs; \
        do { \
                OP(LP(dest),MERGE1(LP(ref),LP(ref+4),ofs)); \
                ref+=stride; \
                dest+=stride; \
        } while(--height)
58

59 60 61 62 63 64
#define        OP_C40() \
        do { \
                OP(LP(dest),LP(ref)); \
                ref+=stride; \
                dest+=stride; \
        } while(--height)
65 66


67
#define         OP      put
68 69 70

static void put_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
{
71 72 73 74 75 76
        switch((int)ref&3){
        case 0: OP_C40(); return;
        case 1: OP_C4(1); return;
        case 2: OP_C4(2); return;
        case 3: OP_C4(3); return;
        }
77 78
}

79 80
#undef          OP
#define         OP      avg
81 82 83

static void avg_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
{
84 85 86 87 88 89
        switch((int)ref&3){
        case 0: OP_C40(); return;
        case 1: OP_C4(1); return;
        case 2: OP_C4(2); return;
        case 3: OP_C4(3); return;
        }
90 91
}

92
#undef          OP
93

94
#define         OP_C(ofs,sz,avg2) \
95
{ \
96 97 98 99 100 101 102 103
        ref-=ofs; \
        do { \
                uint32_t        t0,t1; \
                t0 = LP(ref+0); \
                t1 = LP(ref+4); \
                OP(LP(dest+0), MERGE1(t0,t1,ofs)); \
                t0 = LP(ref+8); \
                OP(LP(dest+4), MERGE1(t1,t0,ofs)); \
104
if (sz==16) { \
105 106 107 108
                t1 = LP(ref+12); \
                OP(LP(dest+8), MERGE1(t0,t1,ofs)); \
                t0 = LP(ref+16); \
                OP(LP(dest+12), MERGE1(t1,t0,ofs)); \
109
} \
110 111 112
                ref+=stride; \
                dest+= stride; \
        } while(--height); \
113 114 115
}

/* aligned */
116
#define         OP_C0(sz,avg2) \
117
{ \
118 119 120
        do { \
                OP(LP(dest+0), LP(ref+0)); \
                OP(LP(dest+4), LP(ref+4)); \
121
if (sz==16) { \
122 123
                OP(LP(dest+8), LP(ref+8)); \
                OP(LP(dest+12), LP(ref+12)); \
124
} \
125 126 127
                ref+=stride; \
                dest+= stride; \
        } while(--height); \
128 129
}

130
#define         OP_X(ofs,sz,avg2) \
131
{ \
132 133 134 135 136 137 138 139
        ref-=ofs; \
        do { \
                uint32_t        t0,t1; \
                t0 = LP(ref+0); \
                t1 = LP(ref+4); \
                OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
                t0 = LP(ref+8); \
                OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
140
if (sz==16) { \
141 142 143 144
                t1 = LP(ref+12); \
                OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
                t0 = LP(ref+16); \
                OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
145
} \
146 147 148
                ref+=stride; \
                dest+= stride; \
        } while(--height); \
149 150 151
}

/* aligned */
152
#define         OP_Y0(sz,avg2) \
153
{ \
154
        uint32_t t0,t1,t2,t3,t; \
155
\
156 157
        t0 = LP(ref+0); \
        t1 = LP(ref+4); \
158
if (sz==16) { \
159 160
        t2 = LP(ref+8); \
        t3 = LP(ref+12); \
161
} \
162 163
        do { \
                ref += stride; \
164
\
165 166 167 168
                t = LP(ref+0); \
                OP(LP(dest+0), avg2(t0,t)); t0 = t; \
                t = LP(ref+4); \
                OP(LP(dest+4), avg2(t1,t)); t1 = t; \
169
if (sz==16) { \
170 171 172 173
                t = LP(ref+8); \
                OP(LP(dest+8), avg2(t2,t)); t2 = t; \
                t = LP(ref+12); \
                OP(LP(dest+12), avg2(t3,t)); t3 = t; \
174
} \
175 176
                dest+= stride; \
        } while(--height); \
177 178
}

179
#define         OP_Y(ofs,sz,avg2) \
180
{ \
181
        uint32_t t0,t1,t2,t3,t,w0,w1; \
182
\
183 184 185 186 187 188
        ref-=ofs; \
        w0 = LP(ref+0); \
        w1 = LP(ref+4); \
        t0 = MERGE1(w0,w1,ofs); \
        w0 = LP(ref+8); \
        t1 = MERGE1(w1,w0,ofs); \
189
if (sz==16) { \
190 191 192 193
        w1 = LP(ref+12); \
        t2 = MERGE1(w0,w1,ofs); \
        w0 = LP(ref+16); \
        t3 = MERGE1(w1,w0,ofs); \
194
} \
195 196
        do { \
                ref += stride; \
197
\
198 199 200 201 202 203 204
                w0 = LP(ref+0); \
                w1 = LP(ref+4); \
                t = MERGE1(w0,w1,ofs); \
                OP(LP(dest+0), avg2(t0,t)); t0 = t; \
                w0 = LP(ref+8); \
                t = MERGE1(w1,w0,ofs); \
                OP(LP(dest+4), avg2(t1,t)); t1 = t; \
205
if (sz==16) { \
206 207 208 209 210 211
                w1 = LP(ref+12); \
                t = MERGE1(w0,w1,ofs); \
                OP(LP(dest+8), avg2(t2,t)); t2 = t; \
                w0 = LP(ref+16); \
                t = MERGE1(w1,w0,ofs); \
                OP(LP(dest+12), avg2(t3,t)); t3 = t; \
212
} \
213 214
                dest+=stride; \
        } while(--height); \
215 216 217 218
}

#define OP_X0(sz,avg2) OP_X(0,sz,avg2)
#define OP_XY0(sz,PACK) OP_XY(0,sz,PACK)
219
#define         OP_XY(ofs,sz,PACK) \
220
{ \
221 222
        uint32_t        t2,t3,w0,w1; \
        uint32_t        a0,a1,a2,a3,a4,a5,a6,a7; \
223
\
224 225 226 227 228 229
        ref -= ofs; \
        w0 = LP(ref+0); \
        w1 = LP(ref+4); \
        UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
        w0 = LP(ref+8); \
        UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
230
if (sz==16) { \
231 232 233 234
        w1 = LP(ref+12); \
        UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
        w0 = LP(ref+16); \
        UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
235
} \
236 237 238 239 240 241 242 243 244 245 246
        do { \
                ref+=stride; \
                w0 = LP(ref+0); \
                w1 = LP(ref+4); \
                UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
                OP(LP(dest+0),PACK(a0,a1,t2,t3)); \
                a0 = t2; a1 = t3; \
                w0 = LP(ref+8); \
                UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
                OP(LP(dest+4),PACK(a2,a3,t2,t3)); \
                a2 = t2; a3 = t3; \
247
if (sz==16) { \
248 249 250 251 252 253 254 255
                w1 = LP(ref+12); \
                UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
                OP(LP(dest+8),PACK(a4,a5,t2,t3)); \
                a4 = t2; a5 = t3; \
                w0 = LP(ref+16); \
                UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
                OP(LP(dest+12),PACK(a6,a7,t2,t3)); \
                a6 = t2; a7 = t3; \
256
} \
257 258
                dest+=stride; \
        } while(--height); \
259 260
}

261 262 263
#define         DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \
static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \
                                const int stride, int height) \
264
{ \
265 266 267 268 269 270
        switch((int)ref&3) { \
        case 0:OP_N##0(sz,rnd##_##avgfunc); return; \
        case 1:OP_N(1,sz,rnd##_##avgfunc); return; \
        case 2:OP_N(2,sz,rnd##_##avgfunc); return; \
        case 3:OP_N(3,sz,rnd##_##avgfunc); return; \
        } \
271 272 273 274
}

#define OP put

275 276 277 278 279
DEFFUNC(put,   rnd,o,8,OP_C,avg32)
DEFFUNC(put,   rnd,x,8,OP_X,avg32)
DEFFUNC(put,no_rnd,x,8,OP_X,avg32)
DEFFUNC(put,   rnd,y,8,OP_Y,avg32)
DEFFUNC(put,no_rnd,y,8,OP_Y,avg32)
280 281
DEFFUNC(put,   rnd,xy,8,OP_XY,PACK)
DEFFUNC(put,no_rnd,xy,8,OP_XY,PACK)
282 283 284 285 286
DEFFUNC(put,   rnd,o,16,OP_C,avg32)
DEFFUNC(put,   rnd,x,16,OP_X,avg32)
DEFFUNC(put,no_rnd,x,16,OP_X,avg32)
DEFFUNC(put,   rnd,y,16,OP_Y,avg32)
DEFFUNC(put,no_rnd,y,16,OP_Y,avg32)
287 288 289 290 291 292
DEFFUNC(put,   rnd,xy,16,OP_XY,PACK)
DEFFUNC(put,no_rnd,xy,16,OP_XY,PACK)

#undef OP
#define OP avg

293 294 295 296 297
DEFFUNC(avg,   rnd,o,8,OP_C,avg32)
DEFFUNC(avg,   rnd,x,8,OP_X,avg32)
DEFFUNC(avg,no_rnd,x,8,OP_X,avg32)
DEFFUNC(avg,   rnd,y,8,OP_Y,avg32)
DEFFUNC(avg,no_rnd,y,8,OP_Y,avg32)
298 299
DEFFUNC(avg,   rnd,xy,8,OP_XY,PACK)
DEFFUNC(avg,no_rnd,xy,8,OP_XY,PACK)
300 301 302 303 304
DEFFUNC(avg,   rnd,o,16,OP_C,avg32)
DEFFUNC(avg,   rnd,x,16,OP_X,avg32)
DEFFUNC(avg,no_rnd,x,16,OP_X,avg32)
DEFFUNC(avg,   rnd,y,16,OP_Y,avg32)
DEFFUNC(avg,no_rnd,y,16,OP_Y,avg32)
305 306 307 308 309
DEFFUNC(avg,   rnd,xy,16,OP_XY,PACK)
DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)

#undef OP

310 311 312 313
#define         put_no_rnd_pixels8_o     put_rnd_pixels8_o
#define         put_no_rnd_pixels16_o    put_rnd_pixels16_o
#define         avg_no_rnd_pixels8_o     avg_rnd_pixels8_o
#define         avg_no_rnd_pixels16_o    avg_rnd_pixels16_o
314

315 316 317 318 319 320 321 322
#define         put_pixels8_c            put_rnd_pixels8_o
#define         put_pixels16_c           put_rnd_pixels16_o
#define         avg_pixels8_c            avg_rnd_pixels8_o
#define         avg_pixels16_c           avg_rnd_pixels16_o
#define         put_no_rnd_pixels8_c     put_rnd_pixels8_o
#define         put_no_rnd_pixels16_c    put_rnd_pixels16_o
#define         avg_no_rnd_pixels8_c     avg_rnd_pixels8_o
#define         avg_no_rnd_pixels16_c    avg_rnd_pixels16_o
323

324
#define         QPEL
325 326 327 328 329 330 331 332 333

#ifdef QPEL

#include "qpel.c"

#endif

void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
{
334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
        c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
        c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
        c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
        c->put_pixels_tab[0][3] = put_rnd_pixels16_xy;
        c->put_pixels_tab[1][0] = put_rnd_pixels8_o;
        c->put_pixels_tab[1][1] = put_rnd_pixels8_x;
        c->put_pixels_tab[1][2] = put_rnd_pixels8_y;
        c->put_pixels_tab[1][3] = put_rnd_pixels8_xy;

        c->put_no_rnd_pixels_tab[0][0] = put_no_rnd_pixels16_o;
        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x;
        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y;
        c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy;
        c->put_no_rnd_pixels_tab[1][0] = put_no_rnd_pixels8_o;
        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x;
        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y;
        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy;

        c->avg_pixels_tab[0][0] = avg_rnd_pixels16_o;
        c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x;
        c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y;
        c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy;
        c->avg_pixels_tab[1][0] = avg_rnd_pixels8_o;
        c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x;
        c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y;
        c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy;

        c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_o;
        c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x;
        c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y;
        c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy;
        c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_o;
        c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
        c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
        c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
369 370 371 372

#ifdef QPEL

#define dspfunc(PFX, IDX, NUM) \
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_sh4; \
    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_sh4; \
    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_sh4; \
    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_sh4; \
    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_sh4; \
    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_sh4; \
    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_sh4; \
    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_sh4; \
    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_sh4; \
    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_sh4; \
    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_sh4; \
    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_sh4; \
    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_sh4; \
    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_sh4; \
    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_sh4; \
    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_sh4
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409

    dspfunc(put_qpel, 0, 16);
    dspfunc(put_no_rnd_qpel, 0, 16);

    dspfunc(avg_qpel, 0, 16);
    /* dspfunc(avg_no_rnd_qpel, 0, 16); */

    dspfunc(put_qpel, 1, 8);
    dspfunc(put_no_rnd_qpel, 1, 8);

    dspfunc(avg_qpel, 1, 8);
    /* dspfunc(avg_no_rnd_qpel, 1, 8); */

    dspfunc(put_h264_qpel, 0, 16);
    dspfunc(put_h264_qpel, 1, 8);
    dspfunc(put_h264_qpel, 2, 4);
    dspfunc(avg_h264_qpel, 0, 16);
    dspfunc(avg_h264_qpel, 1, 8);
    dspfunc(avg_h264_qpel, 2, 4);

#undef dspfunc
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;

    c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4;
    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4;
    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_sh4;
    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_sh4;
    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_sh4;
    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_sh4;
    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_sh4;
    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_sh4;
425 426 427 428 429 430

    c->gmc1 = gmc1_c;
    c->gmc = gmc_c;

#endif
}