Commit 516566a8 authored by Måns Rullgård's avatar Måns Rullgård

SH4: fix IDCT

Originally committed as revision 15897 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 4d382e20
......@@ -21,6 +21,8 @@
*/
#include "libavcodec/dsputil.h"
#include "sh4.h"
#define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */
#define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */
#define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */
......@@ -51,9 +53,11 @@ static const float odd_table[] __attribute__ ((aligned(8))) = {
#undef c6
#undef c7
#if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
#if 1
#define load_matrix(table) \
do { \
const float *t = table; \
__asm__ volatile( \
" fschg\n" \
" fmov @%0+,xd0\n" \
......@@ -65,15 +69,13 @@ static const float odd_table[] __attribute__ ((aligned(8))) = {
" fmov @%0+,xd12\n" \
" fmov @%0+,xd14\n" \
" fschg\n" \
:\
: "r"(table)\
: "0" \
)
: "+r"(t) \
); \
} while (0)
#define ftrv() \
__asm__ volatile("ftrv xmtrx,fv0" \
: "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \
: "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) );
: "+f"(fr0),"+f"(fr1),"+f"(fr2),"+f"(fr3));
#define DEFREG \
register float fr0 __asm__("fr0"); \
......@@ -136,10 +138,9 @@ void idct_sh4(DCTELEM *block)
int i;
float tblock[8*8],*fblock;
int ofs1,ofs2,ofs3;
int fpscr;
#if defined(__SH4__)
__asm__ ("fschg");
#endif
fp_single_enter(fpscr);
/* row */
......@@ -248,9 +249,7 @@ void idct_sh4(DCTELEM *block)
block++;
} while(--i);
#if defined(__SH4__)
__asm__ ("fschg");
#endif
fp_single_leave(fpscr);
}
#else
void idct_sh4(DCTELEM *block)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment