blockdsp_mmi.c 6.71 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Loongson SIMD optimized blockdsp
 *
 * Copyright (c) 2015 Loongson Technology Corporation Limited
 * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "blockdsp_mips.h"
25
#include "libavutil/mips/mmiutils.h"
26

27
void ff_fill_block16_mmi(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h)
28
{
29
    double ftmp[1];
30
    DECLARE_VAR_ALL64;
31

32
    __asm__ volatile (
33 34 35 36 37
        "mtc1       %[value],   %[ftmp0]                                \n\t"
        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
        "1:                                                             \n\t"
38 39 40
        MMI_SDC1(%[ftmp0], %[block], 0x00)
        PTR_ADDI   "%[h],       %[h],           -0x01                   \n\t"
        MMI_SDC1(%[ftmp0], %[block], 0x08)
41 42
        PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
        "bnez       %[h],       1b                                      \n\t"
43 44 45
        : [ftmp0]"=&f"(ftmp[0]),
          RESTRICT_ASM_ALL64
          [block]"+&r"(block),              [h]"+&r"(h)
46 47
        : [value]"r"(value),                [line_size]"r"((mips_reg)line_size)
        : "memory"
48 49 50
    );
}

51
void ff_fill_block8_mmi(uint8_t *block, uint8_t value, ptrdiff_t line_size, int h)
52
{
53
    double ftmp0;
54
    DECLARE_VAR_ALL64;
55

56
    __asm__ volatile (
57 58 59 60 61
        "mtc1       %[value],   %[ftmp0]                                \n\t"
        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
        "1:                                                             \n\t"
62
        MMI_SDC1(%[ftmp0], %[block], 0x00)
63 64 65
        PTR_ADDI   "%[h],       %[h],           -0x01                   \n\t"
        PTR_ADDU   "%[block],   %[block],       %[line_size]            \n\t"
        "bnez       %[h],       1b                                      \n\t"
66 67 68
        : [ftmp0]"=&f"(ftmp0),
          RESTRICT_ASM_ALL64
          [block]"+&r"(block),              [h]"+&r"(h)
69 70
        : [value]"r"(value),                [line_size]"r"((mips_reg)line_size)
        : "memory"
71 72 73 74 75
    );
}

void ff_clear_block_mmi(int16_t *block)
{
76 77
    double ftmp[2];

78
    __asm__ volatile (
79 80
        "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
        "xor        %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
81 82 83 84 85 86 87 88
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x00)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x10)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x20)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x30)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x40)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x50)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x60)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x70)
89 90
        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1])
        : [block]"r"(block)
91 92 93 94 95 96
        : "memory"
    );
}

void ff_clear_blocks_mmi(int16_t *block)
{
97 98
    double ftmp[2];

99
    __asm__ volatile (
100 101
        "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
        "xor        %[ftmp1],   %[ftmp1],       %[ftmp1]                \n\t"
102 103 104 105 106 107 108 109
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x00)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x10)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x20)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x30)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x40)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x50)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x60)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x70)
110

111 112 113 114 115 116 117 118
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x80)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x90)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xa0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xb0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xc0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xd0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xe0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0xf0)
119

120 121 122 123 124 125 126 127
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x100)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x110)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x120)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x130)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x140)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x150)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x160)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x170)
128

129 130 131 132 133 134 135 136
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x180)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x190)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1a0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1b0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1c0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1d0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1e0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x1f0)
137

138 139 140 141 142 143 144 145
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x200)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x210)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x220)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x230)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x240)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x250)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x260)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x270)
146

147 148 149 150 151 152 153 154
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x280)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x290)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2a0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2b0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2c0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2d0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2e0)
        MMI_SQC1(%[ftmp0], %[ftmp1], %[block], 0x2f0)
155
        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1])
156
        : [block]"r"((uint64_t *)block)
157 158 159
        : "memory"
    );
}