pixblockdsp_alpha.c 2.26 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * SIMD-optimized pixel operations
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavutil/attributes.h"
#include "libavcodec/pixblockdsp.h"
#include "asm.h"

static void get_pixels_mvi(int16_t *restrict block,
26
                           const uint8_t *restrict pixels, ptrdiff_t line_size)
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
{
    int h = 8;

    do {
        uint64_t p;

        p = ldq(pixels);
        stq(unpkbw(p),       block);
        stq(unpkbw(p >> 32), block + 4);

        pixels += line_size;
        block += 8;
    } while (--h);
}

static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
                            int stride) {
    int h = 8;
    uint64_t mask = 0x4040;

    mask |= mask << 16;
    mask |= mask << 32;
    do {
        uint64_t x, y, c, d, a;
        uint64_t signs;

        x = ldq(s1);
        y = ldq(s2);
        c = cmpbge(x, y);
        d = x - y;
        a = zap(mask, c);       /* We use 0x4040404040404040 here...  */
        d += 4 * a;             /* ...so we can use s4addq here.      */
        signs = zap(-1, c);

        stq(unpkbw(d)       | (unpkbw(signs)       << 8), block);
        stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);

        s1 += stride;
        s2 += stride;
        block += 8;
    } while (--h);
}

av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
                                       unsigned high_bit_depth)
{
    if (amask(AMASK_MVI) == 0) {
        if (!high_bit_depth)
            c->get_pixels = get_pixels_mvi;
        c->diff_pixels = diff_pixels_mvi;
    }
}