dsputil_align.c 14.6 KB
Newer Older
1
/*
2
 * aligned/packed access motion
3 4 5
 *
 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
 *
6 7 8
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
9 10
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14 15 16 17 18
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 22 23
 */


24 25
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
26
#include "dsputil_sh4.h"
27 28


29
#define         LP(p)           *(uint32_t*)(p)
30
#define         LPC(p)          *(const uint32_t*)(p)
31 32


33 34 35 36
#define         UNPACK(ph,pl,tt0,tt1) do { \
        uint32_t t0,t1; t0=tt0;t1=tt1; \
        ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \
        pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0)
37

38 39
#define         rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03))
#define         no_rnd_PACK(ph,pl,nph,npl)      ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03))
40 41

/* little endian */
42 43
#define         MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) )
#define         MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) )
44
/* big
45 46
#define         MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) )
#define         MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) )
47 48 49
*/


50 51
#define         put(d,s)        d = s
#define         avg(d,s)        d = rnd_avg32(s,d)
52

53 54 55
#define         OP_C4(ofs) \
        ref-=ofs; \
        do { \
56
                OP(LP(dest),MERGE1(LPC(ref),LPC(ref+4),ofs)); \
57 58 59
                ref+=stride; \
                dest+=stride; \
        } while(--height)
60

61 62
#define        OP_C40() \
        do { \
63
                OP(LP(dest),LPC(ref)); \
64 65 66
                ref+=stride; \
                dest+=stride; \
        } while(--height)
67 68


69
#define         OP      put
70 71 72

static void put_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
{
73 74 75 76 77 78
        switch((int)ref&3){
        case 0: OP_C40(); return;
        case 1: OP_C4(1); return;
        case 2: OP_C4(2); return;
        case 3: OP_C4(3); return;
        }
79 80
}

81 82
#undef          OP
#define         OP      avg
83 84 85

static void avg_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
{
86 87 88 89 90 91
        switch((int)ref&3){
        case 0: OP_C40(); return;
        case 1: OP_C4(1); return;
        case 2: OP_C4(2); return;
        case 3: OP_C4(3); return;
        }
92 93
}

94
#undef          OP
95

96
#define         OP_C(ofs,sz,avg2) \
97
{ \
98 99 100
        ref-=ofs; \
        do { \
                uint32_t        t0,t1; \
101 102
                t0 = LPC(ref+0); \
                t1 = LPC(ref+4); \
103
                OP(LP(dest+0), MERGE1(t0,t1,ofs)); \
104
                t0 = LPC(ref+8); \
105
                OP(LP(dest+4), MERGE1(t1,t0,ofs)); \
106
if (sz==16) { \
107
                t1 = LPC(ref+12); \
108
                OP(LP(dest+8), MERGE1(t0,t1,ofs)); \
109
                t0 = LPC(ref+16); \
110
                OP(LP(dest+12), MERGE1(t1,t0,ofs)); \
111
} \
112 113 114
                ref+=stride; \
                dest+= stride; \
        } while(--height); \
115 116 117
}

/* aligned */
118
#define         OP_C0(sz,avg2) \
119
{ \
120
        do { \
121 122
                OP(LP(dest+0), LPC(ref+0)); \
                OP(LP(dest+4), LPC(ref+4)); \
123
if (sz==16) { \
124 125
                OP(LP(dest+8), LPC(ref+8)); \
                OP(LP(dest+12), LPC(ref+12)); \
126
} \
127 128 129
                ref+=stride; \
                dest+= stride; \
        } while(--height); \
130 131
}

132
#define         OP_X(ofs,sz,avg2) \
133
{ \
134 135 136
        ref-=ofs; \
        do { \
                uint32_t        t0,t1; \
137 138
                t0 = LPC(ref+0); \
                t1 = LPC(ref+4); \
139
                OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
140
                t0 = LPC(ref+8); \
141
                OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
142
if (sz==16) { \
143
                t1 = LPC(ref+12); \
144
                OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
145
                t0 = LPC(ref+16); \
146
                OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
147
} \
148 149 150
                ref+=stride; \
                dest+= stride; \
        } while(--height); \
151 152 153
}

/* aligned */
154
#define         OP_Y0(sz,avg2) \
155
{ \
156
        uint32_t t0,t1,t2,t3,t; \
157
\
158 159
        t0 = LPC(ref+0); \
        t1 = LPC(ref+4); \
160
if (sz==16) { \
161 162
        t2 = LPC(ref+8); \
        t3 = LPC(ref+12); \
163
} \
164 165
        do { \
                ref += stride; \
166
\
167
                t = LPC(ref+0); \
168
                OP(LP(dest+0), avg2(t0,t)); t0 = t; \
169
                t = LPC(ref+4); \
170
                OP(LP(dest+4), avg2(t1,t)); t1 = t; \
171
if (sz==16) { \
172
                t = LPC(ref+8); \
173
                OP(LP(dest+8), avg2(t2,t)); t2 = t; \
174
                t = LPC(ref+12); \
175
                OP(LP(dest+12), avg2(t3,t)); t3 = t; \
176
} \
177 178
                dest+= stride; \
        } while(--height); \
179 180
}

181
#define         OP_Y(ofs,sz,avg2) \
182
{ \
183
        uint32_t t0,t1,t2,t3,t,w0,w1; \
184
\
185
        ref-=ofs; \
186 187
        w0 = LPC(ref+0); \
        w1 = LPC(ref+4); \
188
        t0 = MERGE1(w0,w1,ofs); \
189
        w0 = LPC(ref+8); \
190
        t1 = MERGE1(w1,w0,ofs); \
191
if (sz==16) { \
192
        w1 = LPC(ref+12); \
193
        t2 = MERGE1(w0,w1,ofs); \
194
        w0 = LPC(ref+16); \
195
        t3 = MERGE1(w1,w0,ofs); \
196
} \
197 198
        do { \
                ref += stride; \
199
\
200 201
                w0 = LPC(ref+0); \
                w1 = LPC(ref+4); \
202 203
                t = MERGE1(w0,w1,ofs); \
                OP(LP(dest+0), avg2(t0,t)); t0 = t; \
204
                w0 = LPC(ref+8); \
205 206
                t = MERGE1(w1,w0,ofs); \
                OP(LP(dest+4), avg2(t1,t)); t1 = t; \
207
if (sz==16) { \
208
                w1 = LPC(ref+12); \
209 210
                t = MERGE1(w0,w1,ofs); \
                OP(LP(dest+8), avg2(t2,t)); t2 = t; \
211
                w0 = LPC(ref+16); \
212 213
                t = MERGE1(w1,w0,ofs); \
                OP(LP(dest+12), avg2(t3,t)); t3 = t; \
214
} \
215 216
                dest+=stride; \
        } while(--height); \
217 218 219 220
}

#define OP_X0(sz,avg2) OP_X(0,sz,avg2)
#define OP_XY0(sz,PACK) OP_XY(0,sz,PACK)
221
#define         OP_XY(ofs,sz,PACK) \
222
{ \
223 224
        uint32_t        t2,t3,w0,w1; \
        uint32_t        a0,a1,a2,a3,a4,a5,a6,a7; \
225
\
226
        ref -= ofs; \
227 228
        w0 = LPC(ref+0); \
        w1 = LPC(ref+4); \
229
        UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
230
        w0 = LPC(ref+8); \
231
        UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
232
if (sz==16) { \
233
        w1 = LPC(ref+12); \
234
        UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
235
        w0 = LPC(ref+16); \
236
        UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
237
} \
238 239
        do { \
                ref+=stride; \
240 241
                w0 = LPC(ref+0); \
                w1 = LPC(ref+4); \
242 243 244
                UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
                OP(LP(dest+0),PACK(a0,a1,t2,t3)); \
                a0 = t2; a1 = t3; \
245
                w0 = LPC(ref+8); \
246 247 248
                UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
                OP(LP(dest+4),PACK(a2,a3,t2,t3)); \
                a2 = t2; a3 = t3; \
249
if (sz==16) { \
250
                w1 = LPC(ref+12); \
251 252 253
                UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
                OP(LP(dest+8),PACK(a4,a5,t2,t3)); \
                a4 = t2; a5 = t3; \
254
                w0 = LPC(ref+16); \
255 256 257
                UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
                OP(LP(dest+12),PACK(a6,a7,t2,t3)); \
                a6 = t2; a7 = t3; \
258
} \
259 260
                dest+=stride; \
        } while(--height); \
261 262
}

263 264 265
#define         DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \
static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \
                                const int stride, int height) \
266
{ \
267 268 269 270 271 272
        switch((int)ref&3) { \
        case 0:OP_N##0(sz,rnd##_##avgfunc); return; \
        case 1:OP_N(1,sz,rnd##_##avgfunc); return; \
        case 2:OP_N(2,sz,rnd##_##avgfunc); return; \
        case 3:OP_N(3,sz,rnd##_##avgfunc); return; \
        } \
273 274 275 276
}

#define OP put

277 278 279 280 281
DEFFUNC(put,   rnd,o,8,OP_C,avg32)
DEFFUNC(put,   rnd,x,8,OP_X,avg32)
DEFFUNC(put,no_rnd,x,8,OP_X,avg32)
DEFFUNC(put,   rnd,y,8,OP_Y,avg32)
DEFFUNC(put,no_rnd,y,8,OP_Y,avg32)
282 283
DEFFUNC(put,   rnd,xy,8,OP_XY,PACK)
DEFFUNC(put,no_rnd,xy,8,OP_XY,PACK)
284 285 286 287 288
DEFFUNC(put,   rnd,o,16,OP_C,avg32)
DEFFUNC(put,   rnd,x,16,OP_X,avg32)
DEFFUNC(put,no_rnd,x,16,OP_X,avg32)
DEFFUNC(put,   rnd,y,16,OP_Y,avg32)
DEFFUNC(put,no_rnd,y,16,OP_Y,avg32)
289 290 291 292 293 294
DEFFUNC(put,   rnd,xy,16,OP_XY,PACK)
DEFFUNC(put,no_rnd,xy,16,OP_XY,PACK)

#undef OP
#define OP avg

295 296 297 298 299
DEFFUNC(avg,   rnd,o,8,OP_C,avg32)
DEFFUNC(avg,   rnd,x,8,OP_X,avg32)
DEFFUNC(avg,no_rnd,x,8,OP_X,avg32)
DEFFUNC(avg,   rnd,y,8,OP_Y,avg32)
DEFFUNC(avg,no_rnd,y,8,OP_Y,avg32)
300 301
DEFFUNC(avg,   rnd,xy,8,OP_XY,PACK)
DEFFUNC(avg,no_rnd,xy,8,OP_XY,PACK)
302 303 304 305 306
DEFFUNC(avg,   rnd,o,16,OP_C,avg32)
DEFFUNC(avg,   rnd,x,16,OP_X,avg32)
DEFFUNC(avg,no_rnd,x,16,OP_X,avg32)
DEFFUNC(avg,   rnd,y,16,OP_Y,avg32)
DEFFUNC(avg,no_rnd,y,16,OP_Y,avg32)
307 308 309 310 311
DEFFUNC(avg,   rnd,xy,16,OP_XY,PACK)
DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)

#undef OP

312 313 314 315
#define         put_no_rnd_pixels8_o     put_rnd_pixels8_o
#define         put_no_rnd_pixels16_o    put_rnd_pixels16_o
#define         avg_no_rnd_pixels8_o     avg_rnd_pixels8_o
#define         avg_no_rnd_pixels16_o    avg_rnd_pixels16_o
316

317 318 319 320 321 322 323 324
#define         put_pixels8_c            put_rnd_pixels8_o
#define         put_pixels16_c           put_rnd_pixels16_o
#define         avg_pixels8_c            avg_rnd_pixels8_o
#define         avg_pixels16_c           avg_rnd_pixels16_o
#define         put_no_rnd_pixels8_c     put_rnd_pixels8_o
#define         put_no_rnd_pixels16_c    put_rnd_pixels16_o
#define         avg_no_rnd_pixels8_c     avg_rnd_pixels8_o
#define         avg_no_rnd_pixels16_c    avg_rnd_pixels16_o
325

326
#define         QPEL
327 328 329 330 331 332 333 334 335

#ifdef QPEL

#include "qpel.c"

#endif

void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
{
336
        const int high_bit_depth = avctx->bits_per_raw_sample > 8;
337

338
        if (!high_bit_depth) {
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
        c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
        c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
        c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
        c->put_pixels_tab[0][3] = put_rnd_pixels16_xy;
        c->put_pixels_tab[1][0] = put_rnd_pixels8_o;
        c->put_pixels_tab[1][1] = put_rnd_pixels8_x;
        c->put_pixels_tab[1][2] = put_rnd_pixels8_y;
        c->put_pixels_tab[1][3] = put_rnd_pixels8_xy;

        c->put_no_rnd_pixels_tab[0][0] = put_no_rnd_pixels16_o;
        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x;
        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y;
        c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy;
        c->put_no_rnd_pixels_tab[1][0] = put_no_rnd_pixels8_o;
        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x;
        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y;
        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy;

        c->avg_pixels_tab[0][0] = avg_rnd_pixels16_o;
        c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x;
        c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y;
        c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy;
        c->avg_pixels_tab[1][0] = avg_rnd_pixels8_o;
        c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x;
        c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y;
        c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy;

        c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_o;
        c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x;
        c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y;
        c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy;
        c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_o;
        c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
        c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
        c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
374
        }
375 376 377 378

#ifdef QPEL

#define dspfunc(PFX, IDX, NUM) \
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_sh4; \
    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_sh4; \
    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_sh4; \
    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_sh4; \
    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_sh4; \
    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_sh4; \
    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_sh4; \
    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_sh4; \
    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_sh4; \
    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_sh4; \
    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_sh4; \
    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_sh4; \
    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_sh4; \
    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_sh4; \
    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_sh4; \
    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_sh4
395 396 397 398 399 400 401 402 403 404 405 406 407

    dspfunc(put_qpel, 0, 16);
    dspfunc(put_no_rnd_qpel, 0, 16);

    dspfunc(avg_qpel, 0, 16);
    /* dspfunc(avg_no_rnd_qpel, 0, 16); */

    dspfunc(put_qpel, 1, 8);
    dspfunc(put_no_rnd_qpel, 1, 8);

    dspfunc(avg_qpel, 1, 8);
    /* dspfunc(avg_no_rnd_qpel, 1, 8); */

408
    if (!high_bit_depth) {
409 410 411 412 413 414
    dspfunc(put_h264_qpel, 0, 16);
    dspfunc(put_h264_qpel, 1, 8);
    dspfunc(put_h264_qpel, 2, 4);
    dspfunc(avg_h264_qpel, 0, 16);
    dspfunc(avg_h264_qpel, 1, 8);
    dspfunc(avg_h264_qpel, 2, 4);
415
    }
416 417

#undef dspfunc
418
    if (!high_bit_depth) {
419 420 421 422 423 424
    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
425
    }
426 427 428 429 430 431 432 433 434

    c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4;
    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4;
    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_sh4;
    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_sh4;
    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_sh4;
    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_sh4;
    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_sh4;
    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_sh4;
435 436 437 438 439 440

    c->gmc1 = gmc1_c;
    c->gmc = gmc_c;

#endif
}