vorbisdsp.asm 3.1 KB
Newer Older
1 2 3 4
;******************************************************************************
;* Vorbis x86 optimizations
;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
;*
5
;* This file is part of FFmpeg.
6
;*
7
;* FFmpeg is free software; you can redistribute it and/or
8 9 10 11
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
12
;* FFmpeg is distributed in the hope that it will be useful,
13 14 15 16 17
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
18
;* License along with FFmpeg; if not, write to the Free Software
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************

%include "libavutil/x86/x86util.asm"

SECTION_RODATA

pdw_80000000: times 4 dd 0x80000000

SECTION .text

%if ARCH_X86_32
INIT_MMX 3dnow
cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size
    pxor                     m7, m7
    lea                    magq, [magq+block_sizeq*4]
    lea                    angq, [angq+block_sizeq*4]
    neg             block_sizeq
.loop:
    mova                     m0, [magq+block_sizeq*4]
    mova                     m1, [angq+block_sizeq*4]
    mova                     m2, m0
    mova                     m3, m1
    pfcmpge                  m2, m7     ; m <= 0.0
    pfcmpge                  m3, m7     ; a <= 0.0
    pslld                    m2, 31     ; keep only the sign bit
    pxor                     m1, m2
    mova                     m4, m3
    pand                     m3, m1
    pandn                    m4, m1
    pfadd                    m3, m0     ; a = m + ((a < 0) & (a ^ sign(m)))
    pfsub                    m0, m4     ; m = m + ((a > 0) & (a ^ sign(m)))
    mova   [angq+block_sizeq*4], m3
    mova   [magq+block_sizeq*4], m0
    add             block_sizeq, 2
    jl .loop
    femms
    RET
%endif

INIT_XMM sse
cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr
    mova                     m5, [pdw_80000000]
    xor                   cntrq, cntrq
align 16
.loop:
    mova                     m0, [magq+cntrq*4]
    mova                     m1, [angq+cntrq*4]
    xorps                    m2, m2
    xorps                    m3, m3
    cmpleps                  m2, m0     ; m <= 0.0
    cmpleps                  m3, m1     ; a <= 0.0
    andps                    m2, m5     ; keep only the sign bit
    xorps                    m1, m2
    mova                     m4, m3
    andps                    m3, m1
    andnps                   m4, m1
    addps                    m3, m0     ; a = m + ((a < 0) & (a ^ sign(m)))
    subps                    m0, m4     ; m = m + ((a > 0) & (a ^ sign(m)))
    mova         [angq+cntrq*4], m3
    mova         [magq+cntrq*4], m0
    add                   cntrq, 4
    cmp                   cntrq, block_sizeq
    jl .loop
    RET