v210.asm 2.41 KB
Newer Older
Kieran Kunhya's avatar
Kieran Kunhya committed
1 2 3 4 5
;******************************************************************************
;* V210 SIMD unpack
;* Copyright (c) 2011 Loren Merritt <lorenm@u.washington.edu>
;* Copyright (c) 2011 Kieran Kunhya <kieran@kunhya.com>
;*
6
;* This file is part of FFmpeg.
Kieran Kunhya's avatar
Kieran Kunhya committed
7
;*
8
;* FFmpeg is free software; you can redistribute it and/or
Kieran Kunhya's avatar
Kieran Kunhya committed
9 10 11 12
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
13
;* FFmpeg is distributed in the hope that it will be useful,
Kieran Kunhya's avatar
Kieran Kunhya committed
14 15 16 17 18
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
19
;* License along with FFmpeg; if not, write to the Free Software
Kieran Kunhya's avatar
Kieran Kunhya committed
20 21 22 23 24 25 26 27 28 29 30 31 32 33
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************

%include "libavutil/x86/x86util.asm"

SECTION_RODATA

v210_mask: times 4 dd 0x3ff
v210_mult: dw 64,4,64,4,64,4,64,4
v210_luma_shuf: db 8,9,0,1,2,3,12,13,4,5,6,7,-1,-1,-1,-1
v210_chroma_shuf: db 0,1,8,9,6,7,-1,-1,2,3,4,5,12,13,-1,-1

SECTION .text

34
%macro v210_planar_unpack 1
Kieran Kunhya's avatar
Kieran Kunhya committed
35 36

; v210_planar_unpack(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width)
37
cglobal v210_planar_unpack_%1, 5, 5, 7
Kieran Kunhya's avatar
Kieran Kunhya committed
38 39 40 41 42 43 44 45 46 47
    movsxdifnidn r4, r4d
    lea    r1, [r1+2*r4]
    add    r2, r4
    add    r3, r4
    neg    r4

    mova   m3, [v210_mult]
    mova   m4, [v210_mask]
    mova   m5, [v210_luma_shuf]
    mova   m6, [v210_chroma_shuf]
48
.loop:
Kieran Kunhya's avatar
Kieran Kunhya committed
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
%ifidn %1, unaligned
    movu   m0, [r0]
%else
    mova   m0, [r0]
%endif

    pmullw m1, m0, m3
    psrld  m0, 10
    psrlw  m1, 6  ; u0 v0 y1 y2 v1 u2 y4 y5
    pand   m0, m4 ; y0 __ u1 __ y3 __ v2 __

    shufps m2, m1, m0, 0x8d ; y1 y2 y4 y5 y0 __ y3 __
    pshufb m2, m5 ; y0 y1 y2 y3 y4 y5 __ __
    movu   [r1+2*r4], m2

    shufps m1, m0, 0xd8 ; u0 v0 v1 u2 u1 __ v2 __
    pshufb m1, m6 ; u0 u1 u2 __ v0 v1 v2 __
    movq   [r2+r4], m1
    movhps [r3+r4], m1

    add r0, mmsize
    add r4, 6
    jl  .loop

    REP_RET
%endmacro

76 77 78
INIT_XMM ssse3
v210_planar_unpack unaligned

79
%if HAVE_AVX_EXTERNAL
80 81
INIT_XMM avx
v210_planar_unpack unaligned
82
%endif
Kieran Kunhya's avatar
Kieran Kunhya committed
83

84 85 86
INIT_XMM ssse3
v210_planar_unpack aligned

87
%if HAVE_AVX_EXTERNAL
88 89
INIT_XMM avx
v210_planar_unpack aligned
90
%endif