Commit b662e839 authored by Måns Rullgård's avatar Måns Rullgård

PPC: simplify loading some values into altivec registers

Instead of filling a local array with the desired value and loading it,
load a single element and vec_splat() it to fill the vector.

Originally committed as revision 19691 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 1feec476
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "dsputil_ppc.h" #include "dsputil_ppc.h"
#include "util_altivec.h" #include "util_altivec.h"
#include "types_altivec.h"
/* /*
altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
...@@ -32,9 +33,7 @@ ...@@ -32,9 +33,7 @@
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
{ {
POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) = const DECLARE_ALIGNED_16(unsigned short, rounder_a) = rounder;
{rounder, rounder, rounder, rounder,
rounder, rounder, rounder, rounder};
const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) = const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) =
{ {
(16-x16)*(16-y16), /* A */ (16-x16)*(16-y16), /* A */
...@@ -60,7 +59,7 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); ...@@ -60,7 +59,7 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
Cv = vec_splat(tempA, 2); Cv = vec_splat(tempA, 2);
Dv = vec_splat(tempA, 3); Dv = vec_splat(tempA, 3);
rounderV = vec_ld(0, (unsigned short*)rounder_a); rounderV = vec_splat((vec_u16)vec_lde(0, &rounder_a), 0);
// we'll be able to pick-up our 9 char elements // we'll be able to pick-up our 9 char elements
// at src from those 32 bytes // at src from those 32 bytes
......
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
#include "dsputil_ppc.h" #include "dsputil_ppc.h"
#include "util_altivec.h" #include "util_altivec.h"
#include "types_altivec.h"
// Swaps two variables (used for altivec registers) // Swaps two variables (used for altivec registers)
#define SWAP(a,b) \ #define SWAP(a,b) \
do { \ do { \
...@@ -504,29 +506,16 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); ...@@ -504,29 +506,16 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
{ {
register const vector signed short vczero = (const vector signed short)vec_splat_s16(0); register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);
DECLARE_ALIGNED_16(short, qmul8[]) = DECLARE_ALIGNED_16(short, qmul8) = qmul;
{ DECLARE_ALIGNED_16(short, qadd8) = qadd;
qmul, qmul, qmul, qmul,
qmul, qmul, qmul, qmul
};
DECLARE_ALIGNED_16(short, qadd8[]) =
{
qadd, qadd, qadd, qadd,
qadd, qadd, qadd, qadd
};
DECLARE_ALIGNED_16(short, nqadd8[]) =
{
-qadd, -qadd, -qadd, -qadd,
-qadd, -qadd, -qadd, -qadd
};
register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
register vector bool short blockv_null, blockv_neg; register vector bool short blockv_null, blockv_neg;
register short backup_0 = block[0]; register short backup_0 = block[0];
register int j = 0; register int j = 0;
qmulv = vec_ld(0, qmul8); qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0);
qaddv = vec_ld(0, qadd8); qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0);
nqaddv = vec_ld(0, nqadd8); nqaddv = vec_sub(vczero, qaddv);
#if 0 // block *is* 16 bytes-aligned, it seems. #if 0 // block *is* 16 bytes-aligned, it seems.
// first make sure block[j] is 16 bytes-aligned // first make sure block[j] is 16 bytes-aligned
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment