Commit d3f3eea9 authored by Marc Hoffman's avatar Marc Hoffman Committed by Guillaume Poirier

Blackfin optimized YUV420 to RGB CSC Color Space Converters.

YUV2 -> RGB BGR for 565, 555 and 888 a.k.a. 24bit color.
Speed-up compared to C version compiled with -O3  187.28%
Patch by Marc Hoffman %mmh A pleasantst P com%
Original thread:
Date: May 9, 2007 2:46 AM
Subject: [FFmpeg-devel] PATCH BlackFin yuv2rgb color space conversion

Originally committed as revision 23307 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale
parent 79d4c96a
......@@ -12,6 +12,9 @@ OBJS= swscale.o rgb2rgb.o
OBJS-$(TARGET_ALTIVEC) += yuv2rgb_altivec.o
OBJS-$(CONFIG_GPL) += yuv2rgb.o
OBJS-$(TARGET_ARCH_BFIN) += yuv2rgb_bfin.o
ASM_OBJS-$(TARGET_ARCH_BFIN) += internal_bfin.o
HEADERS = swscale.h rgb2rgb.h
include ../common.mak
......
This diff is collapsed.
......@@ -1992,7 +1992,7 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH
#endif
#if !defined(RUNTIME_CPUDETECT) || !defined (CONFIG_GPL) //ensure that the flags match the compiled variant if cpudetect is off
flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC);
flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
#ifdef HAVE_MMX2
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
#elif defined (HAVE_3DNOW)
......@@ -2001,6 +2001,8 @@ SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH
flags |= SWS_CPU_CAPS_MMX;
#elif defined (HAVE_ALTIVEC)
flags |= SWS_CPU_CAPS_ALTIVEC;
#elif defined (ARCH_BFIN)
flags |= SWS_CPU_CAPS_BFIN;
#endif
#endif /* RUNTIME_CPUDETECT */
if (clip_table[512] != 255) globalInit();
......
......@@ -74,6 +74,7 @@ extern "C" {
#define SWS_CPU_CAPS_MMX2 0x20000000
#define SWS_CPU_CAPS_3DNOW 0x40000000
#define SWS_CPU_CAPS_ALTIVEC 0x10000000
#define SWS_CPU_CAPS_BFIN 0x01000000
#define SWS_MAX_REDUCE_CUTOFF 0.002
......
......@@ -162,6 +162,22 @@ typedef struct SwsContext{
#endif
#ifdef ARCH_BFIN
uint32_t oy __attribute__((aligned(4)));
uint32_t oc __attribute__((aligned(4)));
uint32_t zero __attribute__((aligned(4)));
uint32_t cy __attribute__((aligned(4)));
uint32_t crv __attribute__((aligned(4)));
uint32_t rmask __attribute__((aligned(4)));
uint32_t cbu __attribute__((aligned(4)));
uint32_t bmask __attribute__((aligned(4)));
uint32_t cgu __attribute__((aligned(4)));
uint32_t cgv __attribute__((aligned(4)));
uint32_t gmask __attribute__((aligned(4)));
#endif
} SwsContext;
//FIXME check init (where 0)
......
......@@ -611,6 +611,14 @@ SwsFunc yuv2rgb_get_func_ptr (SwsContext *c)
}
#endif
#ifdef ARCH_BFIN
if (c->flags & SWS_CPU_CAPS_BFIN)
{
SwsFunc t = ff_bfin_yuv2rgb_get_func_ptr (c);
if (t) return t;
}
#endif
av_log(c, AV_LOG_WARNING, "No accelerated colorspace conversion found\n");
switch(c->dstFormat){
......
/*
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
* April 20, 2007
*
* Blackfin Video Color Space Converters Operations
* convert I420 YV12 to RGB in various formats,
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <assert.h>
#include "config.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
#include <unistd.h>
#include <bits/bfin_sram.h>
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
#define L1CODE __attribute__ ((l1_text))
extern void ff_bfin_yuv2rgb555_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs) L1CODE;
extern void ff_bfin_yuv2rgb565_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs) L1CODE;
extern void ff_bfin_yuv2rgb24_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs) L1CODE;
typedef void (* ltransform_t)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
int w, uint32_t *coeffs);
static void bfin_prepare_coefficients (SwsContext *c, int rgb, int masks)
{
int oy;
oy = c->yOffset&0xffff;
oy = oy >> 3; // keep everything U8.0 for offset calculation
c->oc = 128*0x01010101U;
c->oy = oy*0x01010101U;
/* copy 64bit vector coeffs down to 32bit vector coeffs */
c->cy = c->yCoeff;
c->zero = 0;
if (rgb) {
c->crv = c->vrCoeff;
c->cbu = c->ubCoeff;
c->cgu = c->ugCoeff;
c->cgv = c->vgCoeff;
} else {
c->crv = c->ubCoeff;
c->cbu = c->vrCoeff;
c->cgu = c->vgCoeff;
c->cgv = c->ugCoeff;
}
if (masks == 555) {
c->rmask = 0x001f * 0x00010001U;
c->gmask = 0x03e0 * 0x00010001U;
c->bmask = 0x7c00 * 0x00010001U;
} else if (masks == 565) {
c->rmask = 0x001f * 0x00010001U;
c->gmask = 0x07e0 * 0x00010001U;
c->bmask = 0xf800 * 0x00010001U;
}
}
static int core_yuv420_rgb (SwsContext *c,
uint8_t **in, int *instrides,
int srcSliceY, int srcSliceH,
uint8_t **oplanes, int *outstrides,
ltransform_t lcscf, int rgb, int masks)
{
uint8_t *py,*pu,*pv,*op;
int w = instrides[0];
int h2 = srcSliceH>>1;
int i;
bfin_prepare_coefficients (c, rgb, masks);
py = in[0];
pu = in[1+(1^rgb)];
pv = in[1+(0^rgb)];
op = oplanes[0] + srcSliceY*outstrides[0];
for (i=0;i<h2;i++) {
lcscf (py,pu,pv,op,w,&c->oy);
py += instrides[0];
op += outstrides[0];
lcscf (py,pu,pv,op,w,&c->oy);
py += instrides[0];
pu += instrides[1];
pv += instrides[2];
op += outstrides[0];
}
return srcSliceH;
}
static int bfin_yuv420_rgb555 (SwsContext *c,
uint8_t **in, int *instrides,
int srcSliceY, int srcSliceH,
uint8_t **oplanes, int *outstrides)
{
return core_yuv420_rgb (c,in,instrides,srcSliceY,srcSliceH,oplanes,outstrides,
ff_bfin_yuv2rgb555_line, 1, 555);
}
static int bfin_yuv420_bgr555 (SwsContext *c,
uint8_t **in, int *instrides,
int srcSliceY, int srcSliceH,
uint8_t **oplanes, int *outstrides)
{
return core_yuv420_rgb (c,in,instrides,srcSliceY,srcSliceH,oplanes,outstrides,
ff_bfin_yuv2rgb555_line, 0, 555);
}
static int bfin_yuv420_rgb24 (SwsContext *c,
uint8_t **in, int *instrides,
int srcSliceY, int srcSliceH,
uint8_t **oplanes, int *outstrides)
{
return core_yuv420_rgb (c,in,instrides,srcSliceY,srcSliceH,oplanes,outstrides,
ff_bfin_yuv2rgb24_line, 1, 888);
}
static int bfin_yuv420_bgr24 (SwsContext *c,
uint8_t **in, int *instrides,
int srcSliceY, int srcSliceH,
uint8_t **oplanes, int *outstrides)
{
return core_yuv420_rgb (c,in,instrides,srcSliceY,srcSliceH,oplanes,outstrides,
ff_bfin_yuv2rgb24_line, 0, 888);
}
static int bfin_yuv420_rgb565 (SwsContext *c,
uint8_t **in, int *instrides,
int srcSliceY, int srcSliceH,
uint8_t **oplanes, int *outstrides)
{
return core_yuv420_rgb (c,in,instrides,srcSliceY,srcSliceH,oplanes,outstrides,
ff_bfin_yuv2rgb565_line, 1, 565);
}
static int bfin_yuv420_bgr565 (SwsContext *c,
uint8_t **in, int *instrides,
int srcSliceY, int srcSliceH,
uint8_t **oplanes, int *outstrides)
{
return core_yuv420_rgb (c,in,instrides,srcSliceY,srcSliceH,oplanes,outstrides,
ff_bfin_yuv2rgb565_line, 0, 565);
}
SwsFunc ff_bfin_yuv2rgb_get_func_ptr (SwsContext *c)
{
SwsFunc f;
switch(c->dstFormat) {
case PIX_FMT_RGB555: f = bfin_yuv420_rgb555; break;
case PIX_FMT_BGR555: f = bfin_yuv420_bgr555; break;
case PIX_FMT_RGB565: f = bfin_yuv420_rgb565; break;
case PIX_FMT_BGR565: f = bfin_yuv420_bgr565; break;
case PIX_FMT_RGB24: f = bfin_yuv420_rgb24; break;
case PIX_FMT_BGR24: f = bfin_yuv420_bgr24; break;
default:
return 0;
}
av_log(c, AV_LOG_INFO, "BlackFin Accelerated Color Space Converter %s\n",
sws_format_name (c->dstFormat));
return f;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment