Commit 034fc7bf authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master: (22 commits)
  configure: enable memalign_hack automatically when needed
  swscale: unbreak the build on non-x86 systems.
  swscale: remove if(bitexact) branch from functions.
  swscale: remove if(canMMX2BeUsed) conditional.
  swscale: remove swScale_{c,MMX,MMX2} duplication.
  swscale: use emms_c().
  Move emms_c() from libavcodec to libavutil.
  tiff: set palette in the context when specified in TIFF_PAL tag
  rtsp: use strtoul to parse rtptime and seq values.
  pgssubdec: fix incorrect colors.
  dvdsubdec: fix incorrect colors.
  ape: Allow demuxing of files with metadata tags.
  swscale: remove dead macro WRITEBGR24OLD.
  swscale: remove AMD3DNOW "optimizations".
  swscale: remove duplicate code in ppc/ subdirectory.
  swscale: remove duplicated x86/ functions.
  swscale: force --enable-runtime-cpudetect and remove SWS_CPU_CAPS_*.
  vsrc_buffer.h: add file doxy
  vsrc_buffer: tweak error message in init()
  msmpeg4: reindent.
  ...
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents d1adad3c 9bbd6a4c
......@@ -2859,11 +2859,6 @@ check_header X11/extensions/XvMClib.h
check_struct dxva2api.h DXVA_PictureParameters wDecodedPictureIndex
if ! enabled_any memalign memalign_hack posix_memalign malloc_aligned &&
enabled_any $need_memalign ; then
die "Error, no aligned memory allocator but SSE enabled, disable it or use --enable-memalign-hack."
fi
disabled zlib || check_lib zlib.h zlibVersion -lz || disable zlib
disabled bzlib || check_lib2 bzlib.h BZ2_bzlibVersion -lbz2 || disable bzlib
......@@ -3156,6 +3151,9 @@ check_deps $CONFIG_LIST \
enabled asm || { arch=c; disable $ARCH_LIST $ARCH_EXT_LIST; }
! enabled_any memalign posix_memalign malloc_aligned &&
enabled_any $need_memalign && enable memalign_hack
echo "install prefix $prefix"
echo "source path $source_path"
echo "C compiler $cc"
......
......@@ -433,3 +433,49 @@ For more information about libx264 and the supported options see:
@url{http://www.videolan.org/developers/x264.html}
@c man end VIDEO ENCODERS
@subheading Floating-Point-Only AC-3 Encoding Options
These options are only valid for the floating-point encoder and do not exist
for the fixed-point encoder due to the corresponding features not being
implemented in fixed-point.
@table @option
@item -channel_coupling @var{boolean}
Enables/Disables use of channel coupling, which is an optional AC-3 feature
that increases quality by combining high frequency information from multiple
channels into a single channel. The per-channel high frequency information is
sent with less accuracy in both the frequency and time domains. This allows
more bits to be used for lower frequencies while preserving enough information
to reconstruct the high frequencies. This option is enabled by default for the
floating-point encoder and should generally be left as enabled except for
testing purposes or to increase encoding speed.
@table @option
@item -1
@itemx auto
Selected by Encoder (default)
@item 0
@itemx off
Disable Channel Coupling
@item 1
@itemx on
Enable Channel Coupling
@end table
@item -cpl_start_band @var{number}
Coupling Start Band. Sets the channel coupling start band, from 1 to 15. If a
value higher than the bandwidth is used, it will be reduced to 1 less than the
coupling end band. If @var{auto} is used, the start band will be determined by
the encoder based on the bit rate, sample rate, and channel layout. This option
has no effect if channel coupling is disabled.
@table @option
@item -1
@itemx auto
Selected by Encoder (default)
@end table
@end table
@c man end ENCODERS
......@@ -269,8 +269,6 @@ OBJS-$(CONFIG_MPEG2VIDEO_ENCODER) += mpeg12enc.o mpegvideo_enc.o \
mpegvideo.o error_resilience.o
OBJS-$(CONFIG_MPEG4_VAAPI_HWACCEL) += vaapi_mpeg4.o
OBJS-$(CONFIG_MSMPEG4V1_DECODER) += msmpeg4.o msmpeg4data.o
OBJS-$(CONFIG_MSMPEG4V1_ENCODER) += msmpeg4.o msmpeg4data.o h263dec.o \
h263.o ituh263dec.o mpeg4videodec.o
OBJS-$(CONFIG_MSMPEG4V2_DECODER) += msmpeg4.o msmpeg4data.o h263dec.o \
h263.o ituh263dec.o mpeg4videodec.o
OBJS-$(CONFIG_MSMPEG4V2_ENCODER) += msmpeg4.o msmpeg4data.o h263dec.o \
......
......@@ -28,7 +28,8 @@
#define AVCODEC_AC3_H
#define AC3_MAX_CODED_FRAME_SIZE 3840 /* in bytes */
#define AC3_MAX_CHANNELS 6 /* including LFE channel */
#define AC3_MAX_CHANNELS 7 /**< maximum number of channels, including coupling channel */
#define CPL_CH 0 /**< coupling channel index */
#define AC3_MAX_COEFS 256
#define AC3_BLOCK_SIZE 256
......@@ -158,7 +159,9 @@ typedef struct AC3EncOptions {
/* other encoding options */
int allow_per_frame_metadata;
int stereo_rematrixing;
int stereo_rematrixing;
int channel_coupling;
int cpl_start;
} AC3EncOptions;
......
......@@ -58,11 +58,6 @@
#include "fft.h"
#include "fmtconvert.h"
/* override ac3.h to include coupling channel */
#undef AC3_MAX_CHANNELS
#define AC3_MAX_CHANNELS 7
#define CPL_CH 0
#define AC3_OUTPUT_LFEON 8
#define SPX_MAX_BANDS 17
......
......@@ -53,12 +53,6 @@ const uint8_t ff_eac3_hebap_tab[64] = {
19, 19, 19, 19,
};
/**
* Table E2.16 Default Coupling Banding Structure
*/
const uint8_t ff_eac3_default_cpl_band_struct[18] =
{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1 };
/**
* Table E2.15 Default Spectral Extension Banding Structure
*/
......
......@@ -27,7 +27,6 @@
extern const uint8_t ff_ac3_ungroup_3_in_5_bits_tab[32][3];
extern const uint8_t ff_eac3_hebap_tab[64];
extern const uint8_t ff_eac3_default_cpl_band_struct[18];
extern const uint8_t ff_eac3_default_spx_band_struct[17];
#endif /* AVCODEC_AC3DEC_DATA_H */
......@@ -70,6 +70,7 @@ typedef struct AC3MDCTContext {
FFTContext fft; ///< FFT context for MDCT calculation
} AC3MDCTContext;
/**
* Data for a single audio block.
*/
......@@ -83,10 +84,22 @@ typedef struct AC3Block {
int16_t **band_psd; ///< psd per critical band
int16_t **mask; ///< masking curve
uint16_t **qmant; ///< quantized mantissas
uint8_t **cpl_coord_exp; ///< coupling coord exponents (cplcoexp)
uint8_t **cpl_coord_mant; ///< coupling coord mantissas (cplcomant)
uint8_t coeff_shift[AC3_MAX_CHANNELS]; ///< fixed-point coefficient shift values
uint8_t new_rematrixing_strategy; ///< send new rematrixing flags in this block
int num_rematrixing_bands; ///< number of rematrixing bands
uint8_t rematrixing_flags[4]; ///< rematrixing flags
struct AC3Block *exp_ref_block[AC3_MAX_CHANNELS]; ///< reference blocks for EXP_REUSE
int new_cpl_strategy; ///< send new coupling strategy
int cpl_in_use; ///< coupling in use for this block (cplinu)
uint8_t channel_in_cpl[AC3_MAX_CHANNELS]; ///< channel in coupling (chincpl)
int num_cpl_channels; ///< number of channels in coupling
uint8_t new_cpl_coords; ///< send new coupling coordinates (cplcoe)
uint8_t cpl_master_exp[AC3_MAX_CHANNELS]; ///< coupling coord master exponents (mstrcplco)
int new_snr_offsets; ///< send new SNR offsets
int new_cpl_leak; ///< send new coupling leak info
int end_freq[AC3_MAX_CHANNELS]; ///< end frequency bin (endmant)
} AC3Block;
/**
......@@ -133,10 +146,16 @@ typedef struct AC3EncodeContext {
int cutoff; ///< user-specified cutoff frequency, in Hz
int bandwidth_code; ///< bandwidth code (0 to 60) (chbwcod)
int nb_coefs[AC3_MAX_CHANNELS];
int start_freq[AC3_MAX_CHANNELS]; ///< start frequency bin (strtmant)
int cpl_end_freq; ///< coupling channel end frequency bin
int cpl_on; ///< coupling turned on for this frame
int cpl_enabled; ///< coupling enabled for all frames
int num_cpl_subbands; ///< number of coupling subbands (ncplsubnd)
int num_cpl_bands; ///< number of coupling bands (ncplbnd)
uint8_t cpl_band_sizes[AC3_MAX_CPL_BANDS]; ///< number of coeffs in each coupling band
int rematrixing_enabled; ///< stereo rematrixing enabled
int num_rematrixing_bands; ///< number of rematrixing bands
/* bitrate allocation control */
int slow_gain_code; ///< slow gain code (sgaincod)
......@@ -163,6 +182,8 @@ typedef struct AC3EncodeContext {
int16_t *band_psd_buffer;
int16_t *mask_buffer;
uint16_t *qmant_buffer;
uint8_t *cpl_coord_exp_buffer;
uint8_t *cpl_coord_mant_buffer;
uint8_t exp_strategy[AC3_MAX_CHANNELS][AC3_MAX_BLOCKS]; ///< exponent strategies
......@@ -237,6 +258,12 @@ const AVOption ff_ac3_options[] = {
{"hdcd", "HDCD", 0, FF_OPT_TYPE_CONST, {.dbl = 1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "ad_conv_type"},
/* Other Encoding Options */
{"stereo_rematrixing", "Stereo Rematrixing", OFFSET(stereo_rematrixing), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM},
#if CONFIG_AC3ENC_FLOAT
{"channel_coupling", "Channel Coupling", OFFSET(channel_coupling), FF_OPT_TYPE_INT, {.dbl = 1 }, 0, 1, AC3ENC_PARAM, "channel_coupling"},
{"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "channel_coupling"},
{"cpl_start_band", "Coupling Start Band", OFFSET(cpl_start), FF_OPT_TYPE_INT, {.dbl = -1 }, -1, 15, AC3ENC_PARAM, "cpl_start_band"},
{"auto", "Selected by the Encoder", 0, FF_OPT_TYPE_CONST, {.dbl = -1 }, INT_MIN, INT_MAX, AC3ENC_PARAM, "cpl_start_band"},
#endif
{NULL}
};
#endif
......@@ -267,9 +294,9 @@ static void scale_coefficients(AC3EncodeContext *s);
/**
* LUT for number of exponent groups.
* exponent_group_tab[exponent strategy-1][number of coefficients]
* exponent_group_tab[coupling][exponent strategy-1][number of coefficients]
*/
static uint8_t exponent_group_tab[3][256];
static uint8_t exponent_group_tab[2][3][256];
/**
......@@ -330,6 +357,49 @@ static const uint8_t ac3_bandwidth_tab[5][3][19] = {
};
/**
* LUT to select the coupling start band based on the bit rate, sample rate, and
* number of full-bandwidth channels. -1 = coupling off
* ac3_coupling_start_tab[channel_mode-2][sample rate code][bit rate code]
*
* TODO: more testing for optimal parameters.
* multi-channel tests at 44.1kHz and 32kHz.
*/
static const int8_t ac3_coupling_start_tab[6][3][19] = {
// 32 40 48 56 64 80 96 112 128 160 192 224 256 320 384 448 512 576 640
// 2/0
{ { 0, 0, 0, 0, 0, 0, 0, 1, 1, 7, 8, 11, 12, -1, -1, -1, -1, -1, -1 },
{ 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 10, 12, 13, -1, -1, -1, -1, -1, -1 },
{ 0, 0, 0, 0, 1, 2, 2, 9, 13, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
// 3/0
{ { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 },
{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
// 2/1 - untested
{ { 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 6, 9, 11, 12, 13, -1, -1, -1, -1 },
{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
// 3/1
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 },
{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
// 2/2 - untested
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 10, 11, 11, 12, 12, 14, -1 },
{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
// 3/2
{ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6, 8, 11, 12, 12, -1, -1 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6, 8, 11, 12, 12, -1, -1 },
{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 } },
};
/**
* Adjust the frame size to make the average bit rate match the target bit rate.
* This is only needed for 11025, 22050, and 44100 sample rates.
......@@ -392,15 +462,297 @@ static void apply_mdct(AC3EncodeContext *s)
apply_window(&s->dsp, s->windowed_samples, input_samples, s->mdct.window, AC3_WINDOW_SIZE);
block->coeff_shift[ch] = normalize_samples(s);
block->coeff_shift[ch+1] = normalize_samples(s);
s->mdct.fft.mdct_calcw(&s->mdct.fft, block->mdct_coef[ch],
s->mdct.fft.mdct_calcw(&s->mdct.fft, block->mdct_coef[ch+1],
s->windowed_samples);
}
}
}
static void compute_coupling_strategy(AC3EncodeContext *s)
{
int blk, ch;
int got_cpl_snr;
/* set coupling use flags for each block/channel */
/* TODO: turn coupling on/off and adjust start band based on bit usage */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 1; ch <= s->fbw_channels; ch++)
block->channel_in_cpl[ch] = s->cpl_on;
}
/* enable coupling for each block if at least 2 channels have coupling
enabled for that block */
got_cpl_snr = 0;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
block->num_cpl_channels = 0;
for (ch = 1; ch <= s->fbw_channels; ch++)
block->num_cpl_channels += block->channel_in_cpl[ch];
block->cpl_in_use = block->num_cpl_channels > 1;
if (!block->cpl_in_use) {
block->num_cpl_channels = 0;
for (ch = 1; ch <= s->fbw_channels; ch++)
block->channel_in_cpl[ch] = 0;
}
block->new_cpl_strategy = !blk;
if (blk) {
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (block->channel_in_cpl[ch] != s->blocks[blk-1].channel_in_cpl[ch]) {
block->new_cpl_strategy = 1;
break;
}
}
}
block->new_cpl_leak = block->new_cpl_strategy;
if (!blk || (block->cpl_in_use && !got_cpl_snr)) {
block->new_snr_offsets = 1;
if (block->cpl_in_use)
got_cpl_snr = 1;
} else {
block->new_snr_offsets = 0;
}
}
/* set bandwidth for each channel */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (block->channel_in_cpl[ch])
block->end_freq[ch] = s->start_freq[CPL_CH];
else
block->end_freq[ch] = s->bandwidth_code * 3 + 73;
}
}
}
/**
* Calculate a single coupling coordinate.
*/
static inline float calc_cpl_coord(float energy_ch, float energy_cpl)
{
float coord = 0.125;
if (energy_cpl > 0)
coord *= sqrtf(energy_ch / energy_cpl);
return coord;
}
/**
* Calculate coupling channel and coupling coordinates.
* TODO: Currently this is only used for the floating-point encoder. I was
* able to make it work for the fixed-point encoder, but quality was
* generally lower in most cases than not using coupling. If a more
* adaptive coupling strategy were to be implemented it might be useful
* at that time to use coupling for the fixed-point encoder as well.
*/
static void apply_channel_coupling(AC3EncodeContext *s)
{
#if CONFIG_AC3ENC_FLOAT
DECLARE_ALIGNED(16, float, cpl_coords) [AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16];
DECLARE_ALIGNED(16, int32_t, fixed_cpl_coords)[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16];
int blk, ch, bnd, i, j;
CoefSumType energy[AC3_MAX_BLOCKS][AC3_MAX_CHANNELS][16] = {{{0}}};
int num_cpl_coefs = s->num_cpl_subbands * 12;
/* calculate coupling channel from fbw channels */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
CoefType *cpl_coef = &block->mdct_coef[CPL_CH][s->start_freq[CPL_CH]];
if (!block->cpl_in_use)
continue;
memset(cpl_coef-1, 0, (num_cpl_coefs+4) * sizeof(*cpl_coef));
for (ch = 1; ch <= s->fbw_channels; ch++) {
CoefType *ch_coef = &block->mdct_coef[ch][s->start_freq[CPL_CH]];
if (!block->channel_in_cpl[ch])
continue;
for (i = 0; i < num_cpl_coefs; i++)
cpl_coef[i] += ch_coef[i];
}
/* note: coupling start bin % 4 will always be 1 and num_cpl_coefs
will always be a multiple of 12, so we need to subtract 1 from
the start and add 4 to the length when using optimized
functions which require 16-byte alignment. */
/* coefficients must be clipped to +/- 1.0 in order to be encoded */
s->dsp.vector_clipf(cpl_coef-1, cpl_coef-1, -1.0f, 1.0f, num_cpl_coefs+4);
/* scale coupling coefficients from float to 24-bit fixed-point */
s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][s->start_freq[CPL_CH]-1],
cpl_coef-1, num_cpl_coefs+4);
}
/* calculate energy in each band in coupling channel and each fbw channel */
/* TODO: possibly use SIMD to speed up energy calculation */
bnd = 0;
i = s->start_freq[CPL_CH];
while (i < s->cpl_end_freq) {
int band_size = s->cpl_band_sizes[bnd];
for (ch = CPL_CH; ch <= s->fbw_channels; ch++) {
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch]))
continue;
for (j = 0; j < band_size; j++) {
CoefType v = block->mdct_coef[ch][i+j];
MAC_COEF(energy[blk][ch][bnd], v, v);
}
}
}
i += band_size;
bnd++;
}
/* determine which blocks to send new coupling coordinates for */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL;
int new_coords = 0;
CoefSumType coord_diff[AC3_MAX_CHANNELS] = {0,};
if (block->cpl_in_use) {
/* calculate coupling coordinates for all blocks and calculate the
average difference between coordinates in successive blocks */
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (!block->channel_in_cpl[ch])
continue;
for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy[blk][ch][bnd],
energy[blk][CPL_CH][bnd]);
if (blk > 0 && block0->cpl_in_use &&
block0->channel_in_cpl[ch]) {
coord_diff[ch] += fabs(cpl_coords[blk-1][ch][bnd] -
cpl_coords[blk ][ch][bnd]);
}
}
coord_diff[ch] /= s->num_cpl_bands;
}
/* send new coordinates if this is the first block, if previous
* block did not use coupling but this block does, the channels
* using coupling has changed from the previous block, or the
* coordinate difference from the last block for any channel is
* greater than a threshold value. */
if (blk == 0) {
new_coords = 1;
} else if (!block0->cpl_in_use) {
new_coords = 1;
} else {
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (block->channel_in_cpl[ch] && !block0->channel_in_cpl[ch]) {
new_coords = 1;
break;
}
}
if (!new_coords) {
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (block->channel_in_cpl[ch] && coord_diff[ch] > 0.04) {
new_coords = 1;
break;
}
}
}
}
}
block->new_cpl_coords = new_coords;
}
/* calculate final coupling coordinates, taking into account reusing of
coordinates in successive blocks */
for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
blk = 0;
while (blk < AC3_MAX_BLOCKS) {
int blk1;
CoefSumType energy_cpl;
AC3Block *block = &s->blocks[blk];
if (!block->cpl_in_use) {
blk++;
continue;
}
energy_cpl = energy[blk][CPL_CH][bnd];
blk1 = blk+1;
while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
if (s->blocks[blk1].cpl_in_use)
energy_cpl += energy[blk1][CPL_CH][bnd];
blk1++;
}
for (ch = 1; ch <= s->fbw_channels; ch++) {
CoefType energy_ch;
if (!block->channel_in_cpl[ch])
continue;
energy_ch = energy[blk][ch][bnd];
blk1 = blk+1;
while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
if (s->blocks[blk1].cpl_in_use)
energy_ch += energy[blk1][ch][bnd];
blk1++;
}
cpl_coords[blk][ch][bnd] = calc_cpl_coord(energy_ch, energy_cpl);
}
blk = blk1;
}
}
/* calculate exponents/mantissas for coupling coordinates */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
if (!block->cpl_in_use || !block->new_cpl_coords)
continue;
s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1],
cpl_coords[blk][1],
s->fbw_channels * 16);
s->ac3dsp.extract_exponents(block->cpl_coord_exp[1],
fixed_cpl_coords[blk][1],
s->fbw_channels * 16);
for (ch = 1; ch <= s->fbw_channels; ch++) {
int bnd, min_exp, max_exp, master_exp;
/* determine master exponent */
min_exp = max_exp = block->cpl_coord_exp[ch][0];
for (bnd = 1; bnd < s->num_cpl_bands; bnd++) {
int exp = block->cpl_coord_exp[ch][bnd];
min_exp = FFMIN(exp, min_exp);
max_exp = FFMAX(exp, max_exp);
}
master_exp = ((max_exp - 15) + 2) / 3;
master_exp = FFMAX(master_exp, 0);
while (min_exp < master_exp * 3)
master_exp--;
for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
block->cpl_coord_exp[ch][bnd] = av_clip(block->cpl_coord_exp[ch][bnd] -
master_exp * 3, 0, 15);
}
block->cpl_master_exp[ch] = master_exp;
/* quantize mantissas */
for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
int cpl_exp = block->cpl_coord_exp[ch][bnd];
int cpl_mant = (fixed_cpl_coords[blk][ch][bnd] << (5 + cpl_exp + master_exp * 3)) >> 24;
if (cpl_exp == 15)
cpl_mant >>= 1;
else
cpl_mant -= 16;
block->cpl_coord_mant[ch][bnd] = cpl_mant;
}
}
}
#endif /* CONFIG_AC3ENC_FLOAT */
}
/**
* Determine rematrixing flags for each block and band.
*/
......@@ -413,23 +765,32 @@ static void compute_rematrixing_strategy(AC3EncodeContext *s)
if (s->channel_mode != AC3_CHMODE_STEREO)
return;
s->num_rematrixing_bands = 4;
nb_coefs = FFMIN(s->nb_coefs[0], s->nb_coefs[1]);
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
block = &s->blocks[blk];
block->new_rematrixing_strategy = !blk;
if (!s->rematrixing_enabled)
if (!s->rematrixing_enabled) {
block0 = block;
continue;
for (bnd = 0; bnd < s->num_rematrixing_bands; bnd++) {
}
block->num_rematrixing_bands = 4;
if (block->cpl_in_use) {
block->num_rematrixing_bands -= (s->start_freq[CPL_CH] <= 61);
block->num_rematrixing_bands -= (s->start_freq[CPL_CH] == 37);
if (blk && block->num_rematrixing_bands != block0->num_rematrixing_bands)
block->new_rematrixing_strategy = 1;
}
nb_coefs = FFMIN(block->end_freq[1], block->end_freq[2]);
for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) {
/* calculate calculate sum of squared coeffs for one band in one block */
int start = ff_ac3_rematrix_band_tab[bnd];
int end = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]);
CoefSumType sum[4] = {0,};
for (i = start; i < end; i++) {
CoefType lt = block->mdct_coef[0][i];
CoefType rt = block->mdct_coef[1][i];
CoefType lt = block->mdct_coef[1][i];
CoefType rt = block->mdct_coef[2][i];
CoefType md = lt + rt;
CoefType sd = lt - rt;
MAC_COEF(sum[0], lt, lt);
......@@ -468,21 +829,20 @@ static void apply_rematrixing(AC3EncodeContext *s)
if (!s->rematrixing_enabled)
return;
nb_coefs = FFMIN(s->nb_coefs[0], s->nb_coefs[1]);
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
if (block->new_rematrixing_strategy)
flags = block->rematrixing_flags;
for (bnd = 0; bnd < s->num_rematrixing_bands; bnd++) {
nb_coefs = FFMIN(block->end_freq[1], block->end_freq[2]);
for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++) {
if (flags[bnd]) {
start = ff_ac3_rematrix_band_tab[bnd];
end = FFMIN(nb_coefs, ff_ac3_rematrix_band_tab[bnd+1]);
for (i = start; i < end; i++) {
int32_t lt = block->fixed_coef[0][i];
int32_t rt = block->fixed_coef[1][i];
block->fixed_coef[0][i] = (lt + rt) >> 1;
block->fixed_coef[1][i] = (lt - rt) >> 1;
int32_t lt = block->fixed_coef[1][i];
int32_t rt = block->fixed_coef[2][i];
block->fixed_coef[1][i] = (lt + rt) >> 1;
block->fixed_coef[2][i] = (lt - rt) >> 1;
}
}
}
......@@ -499,12 +859,13 @@ static av_cold void exponent_init(AC3EncodeContext *s)
for (expstr = EXP_D15-1; expstr <= EXP_D45-1; expstr++) {
grpsize = 3 << expstr;
for (i = 73; i < 256; i++) {
exponent_group_tab[expstr][i] = (i + grpsize - 4) / grpsize;
for (i = 12; i < 256; i++) {
exponent_group_tab[0][expstr][i] = (i + grpsize - 4) / grpsize;
exponent_group_tab[1][expstr][i] = (i ) / grpsize;
}
}
/* LFE */
exponent_group_tab[0][7] = 2;
exponent_group_tab[0][0][7] = 2;
}
......@@ -517,7 +878,7 @@ static void extract_exponents(AC3EncodeContext *s)
{
int blk, ch;
for (ch = 0; ch < s->channels; ch++) {
for (ch = !s->cpl_on; ch <= s->channels; ch++) {
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch],
......@@ -542,7 +903,7 @@ static void compute_exp_strategy(AC3EncodeContext *s)
{
int ch, blk, blk1;
for (ch = 0; ch < s->fbw_channels; ch++) {
for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++) {
uint8_t *exp_strategy = s->exp_strategy[ch];
uint8_t *exp = s->blocks[0].exp[ch];
int exp_diff;
......@@ -551,13 +912,18 @@ static void compute_exp_strategy(AC3EncodeContext *s)
reused in the next frame */
exp_strategy[0] = EXP_NEW;
exp += AC3_MAX_COEFS;
for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 1; blk < AC3_MAX_BLOCKS; blk++, exp += AC3_MAX_COEFS) {
if ((ch == CPL_CH && (!s->blocks[blk].cpl_in_use || !s->blocks[blk-1].cpl_in_use)) ||
(ch > CPL_CH && (s->blocks[blk].channel_in_cpl[ch] != s->blocks[blk-1].channel_in_cpl[ch]))) {
exp_strategy[blk] = EXP_NEW;
continue;
}
exp_diff = s->dsp.sad[0](NULL, exp, exp - AC3_MAX_COEFS, 16, 16);
if (exp_diff > EXP_DIFF_THRESHOLD)
exp_strategy[blk] = EXP_REUSE;
if (ch == CPL_CH && exp_diff > (EXP_DIFF_THRESHOLD * (s->blocks[blk].end_freq[ch] - s->start_freq[ch]) / AC3_MAX_COEFS))
exp_strategy[blk] = EXP_NEW;
else if (ch > CPL_CH && exp_diff > EXP_DIFF_THRESHOLD)
exp_strategy[blk] = EXP_NEW;
else
exp_strategy[blk] = EXP_REUSE;
exp += AC3_MAX_COEFS;
}
/* now select the encoding strategy type : if exponents are often
......@@ -588,25 +954,26 @@ static void compute_exp_strategy(AC3EncodeContext *s)
/**
* Update the exponents so that they are the ones the decoder will decode.
*/
static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy)
static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy,
int cpl)
{
int nb_groups, i, k;
nb_groups = exponent_group_tab[exp_strategy-1][nb_exps] * 3;
nb_groups = exponent_group_tab[cpl][exp_strategy-1][nb_exps] * 3;
/* for each group, compute the minimum exponent */
switch(exp_strategy) {
case EXP_D25:
for (i = 1, k = 1; i <= nb_groups; i++) {
for (i = 1, k = 1-cpl; i <= nb_groups; i++) {
uint8_t exp_min = exp[k];
if (exp[k+1] < exp_min)
exp_min = exp[k+1];
exp[i] = exp_min;
exp[i-cpl] = exp_min;
k += 2;
}
break;
case EXP_D45:
for (i = 1, k = 1; i <= nb_groups; i++) {
for (i = 1, k = 1-cpl; i <= nb_groups; i++) {
uint8_t exp_min = exp[k];
if (exp[k+1] < exp_min)
exp_min = exp[k+1];
......@@ -614,14 +981,14 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy)
exp_min = exp[k+2];
if (exp[k+3] < exp_min)
exp_min = exp[k+3];
exp[i] = exp_min;
exp[i-cpl] = exp_min;
k += 4;
}
break;
}
/* constraint for DC exponent */
if (exp[0] > 15)
if (!cpl && exp[0] > 15)
exp[0] = 15;
/* decrease the delta between each groups to within 2 so that they can be
......@@ -632,18 +999,21 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy)
while (--i >= 0)
exp[i] = FFMIN(exp[i], exp[i+1] + 2);
if (cpl)
exp[-1] = exp[0] & ~1;
/* now we have the exponent values the decoder will see */
switch (exp_strategy) {
case EXP_D25:
for (i = nb_groups, k = nb_groups * 2; i > 0; i--) {
uint8_t exp1 = exp[i];
for (i = nb_groups, k = (nb_groups * 2)-cpl; i > 0; i--) {
uint8_t exp1 = exp[i-cpl];
exp[k--] = exp1;
exp[k--] = exp1;
}
break;
case EXP_D45:
for (i = nb_groups, k = nb_groups * 4; i > 0; i--) {
exp[k] = exp[k-1] = exp[k-2] = exp[k-3] = exp[i];
for (i = nb_groups, k = (nb_groups * 4)-cpl; i > 0; i--) {
exp[k] = exp[k-1] = exp[k-2] = exp[k-3] = exp[i-cpl];
k -= 4;
}
break;
......@@ -659,32 +1029,40 @@ static void encode_exponents_blk_ch(uint8_t *exp, int nb_exps, int exp_strategy)
*/
static void encode_exponents(AC3EncodeContext *s)
{
int blk, blk1, ch;
int blk, blk1, ch, cpl;
uint8_t *exp, *exp_strategy;
int nb_coefs, num_reuse_blocks;
for (ch = 0; ch < s->channels; ch++) {
exp = s->blocks[0].exp[ch];
for (ch = !s->cpl_on; ch <= s->channels; ch++) {
exp = s->blocks[0].exp[ch] + s->start_freq[ch];
exp_strategy = s->exp_strategy[ch];
nb_coefs = s->nb_coefs[ch];
cpl = (ch == CPL_CH);
blk = 0;
while (blk < AC3_MAX_BLOCKS) {
AC3Block *block = &s->blocks[blk];
if (cpl && !block->cpl_in_use) {
exp += AC3_MAX_COEFS;
blk++;
continue;
}
nb_coefs = block->end_freq[ch] - s->start_freq[ch];
blk1 = blk + 1;
/* count the number of EXP_REUSE blocks after the current block
and set exponent reference block pointers */
s->blocks[blk].exp_ref_block[ch] = &s->blocks[blk];
block->exp_ref_block[ch] = block;
while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) {
s->blocks[blk1].exp_ref_block[ch] = &s->blocks[blk];
s->blocks[blk1].exp_ref_block[ch] = block;
blk1++;
}
num_reuse_blocks = blk1 - blk - 1;
/* for the EXP_REUSE case we select the min of the exponents */
s->ac3dsp.ac3_exponent_min(exp, num_reuse_blocks, nb_coefs);
s->ac3dsp.ac3_exponent_min(exp-s->start_freq[ch], num_reuse_blocks,
AC3_MAX_COEFS);
encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk]);
encode_exponents_blk_ch(exp, nb_coefs, exp_strategy[blk], cpl);
exp += AC3_MAX_COEFS * (num_reuse_blocks + 1);
blk = blk1;
......@@ -700,7 +1078,7 @@ static void encode_exponents(AC3EncodeContext *s)
*/
static void group_exponents(AC3EncodeContext *s)
{
int blk, ch, i;
int blk, ch, i, cpl;
int group_size, nb_groups, bit_count;
uint8_t *p;
int delta0, delta1, delta2;
......@@ -709,14 +1087,15 @@ static void group_exponents(AC3EncodeContext *s)
bit_count = 0;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 0; ch < s->channels; ch++) {
for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
int exp_strategy = s->exp_strategy[ch][blk];
if (exp_strategy == EXP_REUSE)
continue;
cpl = (ch == CPL_CH);
group_size = exp_strategy + (exp_strategy == EXP_D45);
nb_groups = exponent_group_tab[exp_strategy-1][s->nb_coefs[ch]];
nb_groups = exponent_group_tab[cpl][exp_strategy-1][block->end_freq[ch]-s->start_freq[ch]];
bit_count += 4 + (nb_groups * 7);
p = block->exp[ch];
p = block->exp[ch] + s->start_freq[ch] - cpl;
/* DC exponent */
exp1 = *p++;
......@@ -783,9 +1162,7 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
/* assumptions:
* no dynamic range codes
* no channel coupling
* bit allocation parameters do not change between blocks
* SNR offsets do not change between blocks
* no delta bit allocation
* no skipped data
* no auxilliary data
......@@ -806,11 +1183,6 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
/* dynamic range */
frame_bits++;
/* coupling strategy */
frame_bits++;
if (!blk)
frame_bits++;
/* exponent strategy */
frame_bits += 2 * s->fbw_channels;
if (s->lfe_on)
......@@ -821,11 +1193,6 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
if (!blk)
frame_bits += 2 + 2 + 2 + 2 + 3;
/* snr offsets and fast gain codes */
frame_bits++;
if (!blk)
frame_bits += 6 + s->channels * (4 + 3);
/* delta bit allocation */
frame_bits++;
......@@ -857,7 +1224,7 @@ static void bit_alloc_init(AC3EncodeContext *s)
s->slow_gain_code = 1;
s->db_per_bit_code = 3;
s->floor_code = 7;
for (ch = 0; ch < s->channels; ch++)
for (ch = 0; ch <= s->channels; ch++)
s->fast_gain_code[ch] = 4;
/* initial snr offset */
......@@ -871,6 +1238,8 @@ static void bit_alloc_init(AC3EncodeContext *s)
s->bit_alloc.slow_gain = ff_ac3_slow_gain_tab[s->slow_gain_code];
s->bit_alloc.db_per_bit = ff_ac3_db_per_bit_tab[s->db_per_bit_code];
s->bit_alloc.floor = ff_ac3_floor_tab[s->floor_code];
s->bit_alloc.cpl_fast_leak = 0;
s->bit_alloc.cpl_slow_leak = 0;
count_frame_bits_fixed(s);
}
......@@ -899,17 +1268,64 @@ static void count_frame_bits(AC3EncodeContext *s)
/* audio blocks */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
/* coupling strategy */
frame_bits++;
if (block->new_cpl_strategy) {
frame_bits++;
if (block->cpl_in_use) {
frame_bits += s->fbw_channels;
if (s->channel_mode == AC3_CHMODE_STEREO)
frame_bits++;
frame_bits += 4 + 4;
frame_bits += s->num_cpl_subbands - 1;
}
}
/* coupling coordinates */
if (block->cpl_in_use) {
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (block->channel_in_cpl[ch]) {
frame_bits++;
if (block->new_cpl_coords) {
frame_bits += 2;
frame_bits += (4 + 4) * s->num_cpl_bands;
}
}
}
}
/* stereo rematrixing */
if (s->channel_mode == AC3_CHMODE_STEREO) {
frame_bits++;
if (s->blocks[blk].new_rematrixing_strategy)
frame_bits += s->num_rematrixing_bands;
frame_bits += block->num_rematrixing_bands;
}
/* bandwidth codes & gain range */
for (ch = 0; ch < s->fbw_channels; ch++) {
if (s->exp_strategy[ch][blk] != EXP_REUSE)
frame_bits += 6 + 2;
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (s->exp_strategy[ch][blk] != EXP_REUSE) {
if (!block->channel_in_cpl[ch])
frame_bits += 6;
frame_bits += 2;
}
}
/* coupling exponent strategy */
if (block->cpl_in_use)
frame_bits += 2;
/* snr offsets and fast gain codes */
frame_bits++;
if (block->new_snr_offsets)
frame_bits += 6 + (s->channels + block->cpl_in_use) * (4 + 3);
/* coupling leak info */
if (block->cpl_in_use) {
frame_bits++;
if (block->new_cpl_leak)
frame_bits += 3 + 3;
}
}
......@@ -943,16 +1359,16 @@ static void bit_alloc_masking(AC3EncodeContext *s)
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 0; ch < s->channels; ch++) {
for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
/* We only need psd and mask for calculating bap.
Since we currently do not calculate bap when exponent
strategy is EXP_REUSE we do not need to calculate psd or mask. */
if (s->exp_strategy[ch][blk] != EXP_REUSE) {
ff_ac3_bit_alloc_calc_psd(block->exp[ch], 0,
s->nb_coefs[ch],
block->psd[ch], block->band_psd[ch]);
ff_ac3_bit_alloc_calc_psd(block->exp[ch], s->start_freq[ch],
block->end_freq[ch], block->psd[ch],
block->band_psd[ch]);
ff_ac3_bit_alloc_calc_mask(&s->bit_alloc, block->band_psd[ch],
0, s->nb_coefs[ch],
s->start_freq[ch], block->end_freq[ch],
ff_ac3_fast_gain_tab[s->fast_gain_code[ch]],
ch == s->lfe_channel,
DBA_NONE, 0, NULL, NULL, NULL,
......@@ -970,11 +1386,12 @@ static void bit_alloc_masking(AC3EncodeContext *s)
static void reset_block_bap(AC3EncodeContext *s)
{
int blk, ch;
int channels = s->channels + 1;
if (s->blocks[0].bap[0] == s->bap_buffer)
return;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (ch = 0; ch < s->channels; ch++) {
s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)];
for (ch = 0; ch < channels; ch++) {
s->blocks[blk].bap[ch] = &s->bap_buffer[AC3_MAX_COEFS * (blk * channels + ch)];
}
}
}
......@@ -1000,28 +1417,37 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset)
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
AC3Block *ref_block;
int av_uninit(ch0);
int got_cpl = !block->cpl_in_use;
// initialize grouped mantissa counts. these are set so that they are
// padded to the next whole group size when bits are counted in
// compute_mantissa_size_final
mant_cnt[0] = mant_cnt[3] = 0;
mant_cnt[1] = mant_cnt[2] = 2;
mant_cnt[4] = 1;
for (ch = 0; ch < s->channels; ch++) {
for (ch = 1; ch <= s->channels; ch++) {
if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
ch0 = ch - 1;
ch = CPL_CH;
got_cpl = 1;
}
/* Currently the only bit allocation parameters which vary across
blocks within a frame are the exponent values. We can take
advantage of that by reusing the bit allocation pointers
whenever we reuse exponents. */
ref_block = block->exp_ref_block[ch];
if (s->exp_strategy[ch][blk] != EXP_REUSE) {
s->ac3dsp.bit_alloc_calc_bap(ref_block->mask[ch],
ref_block->psd[ch], 0,
s->nb_coefs[ch], snr_offset,
s->bit_alloc.floor, ff_ac3_bap_tab,
ref_block->bap[ch]);
s->ac3dsp.bit_alloc_calc_bap(ref_block->mask[ch], ref_block->psd[ch],
s->start_freq[ch], block->end_freq[ch],
snr_offset, s->bit_alloc.floor,
ff_ac3_bap_tab, ref_block->bap[ch]);
}
mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt,
ref_block->bap[ch],
s->nb_coefs[ch]);
ref_block->bap[ch]+s->start_freq[ch],
block->end_freq[ch]-s->start_freq[ch]);
if (ch == CPL_CH)
ch = ch0;
}
mantissa_bits += compute_mantissa_size_final(mant_cnt);
}
......@@ -1047,7 +1473,7 @@ static int cbr_bit_allocation(AC3EncodeContext *s)
/* if previous frame SNR offset was 1023, check if current frame can also
use SNR offset of 1023. if so, skip the search. */
if ((snr_offset | s->fine_snr_offset[0]) == 1023) {
if ((snr_offset | s->fine_snr_offset[1]) == 1023) {
if (bit_alloc(s, 1023) <= bits_left)
return 0;
}
......@@ -1071,7 +1497,7 @@ static int cbr_bit_allocation(AC3EncodeContext *s)
reset_block_bap(s);
s->coarse_snr_offset = snr_offset >> 4;
for (ch = 0; ch < s->channels; ch++)
for (ch = !s->cpl_on; ch <= s->channels; ch++)
s->fine_snr_offset[ch] = snr_offset & 0xF;
return 0;
......@@ -1089,26 +1515,26 @@ static int downgrade_exponents(AC3EncodeContext *s)
{
int ch, blk;
for (ch = 0; ch < s->fbw_channels; ch++) {
for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) {
if (s->exp_strategy[ch][blk] == EXP_D15) {
s->exp_strategy[ch][blk] = EXP_D25;
return 0;
}
}
}
for (ch = 0; ch < s->fbw_channels; ch++) {
for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
for (blk = AC3_MAX_BLOCKS-1; blk >= 0; blk--) {
for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) {
if (s->exp_strategy[ch][blk] == EXP_D25) {
s->exp_strategy[ch][blk] = EXP_D45;
return 0;
}
}
}
for (ch = 0; ch < s->fbw_channels; ch++) {
/* block 0 cannot reuse exponents, so only downgrade D45 to REUSE if
the block number > 0 */
for (blk = AC3_MAX_BLOCKS-1; blk > 0; blk--) {
/* block 0 cannot reuse exponents, so only downgrade D45 to REUSE if
the block number > 0 */
for (blk = AC3_MAX_BLOCKS-1; blk > 0; blk--) {
for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++) {
if (s->exp_strategy[ch][blk] > EXP_REUSE) {
s->exp_strategy[ch][blk] = EXP_REUSE;
return 0;
......@@ -1135,7 +1561,18 @@ static int compute_bit_allocation(AC3EncodeContext *s)
ret = cbr_bit_allocation(s);
while (ret) {
/* fallback 1: downgrade exponents */
/* fallback 1: disable channel coupling */
if (s->cpl_on) {
s->cpl_on = 0;
compute_coupling_strategy(s);
compute_rematrixing_strategy(s);
apply_rematrixing(s);
process_exponents(s);
ret = compute_bit_allocation(s);
continue;
}
/* fallback 2: downgrade exponents */
if (!downgrade_exponents(s)) {
extract_exponents(s);
encode_exponents(s);
......@@ -1189,12 +1626,13 @@ static inline int asym_quant(int c, int e, int qbits)
* Quantize a set of mantissas for a single channel in a single block.
*/
static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef,
uint8_t *exp,
uint8_t *bap, uint16_t *qmant, int n)
uint8_t *exp, uint8_t *bap,
uint16_t *qmant, int start_freq,
int end_freq)
{
int i;
for (i = 0; i < n; i++) {
for (i = start_freq; i < end_freq; i++) {
int v;
int c = fixed_coef[i];
int e = exp[i];
......@@ -1284,19 +1722,27 @@ static void quantize_mantissas_blk_ch(AC3Mant *s, int32_t *fixed_coef,
*/
static void quantize_mantissas(AC3EncodeContext *s)
{
int blk, ch;
int blk, ch, ch0=0, got_cpl;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
AC3Block *ref_block;
AC3Mant m = { 0 };
for (ch = 0; ch < s->channels; ch++) {
got_cpl = !block->cpl_in_use;
for (ch = 1; ch <= s->channels; ch++) {
if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
ch0 = ch - 1;
ch = CPL_CH;
got_cpl = 1;
}
ref_block = block->exp_ref_block[ch];
quantize_mantissas_blk_ch(&m, block->fixed_coef[ch],
ref_block->exp[ch], ref_block->bap[ch],
block->qmant[ch], s->nb_coefs[ch]);
ref_block->exp[ch],
ref_block->bap[ch], block->qmant[ch],
s->start_freq[ch], block->end_freq[ch]);
if (ch == CPL_CH)
ch = ch0;
}
}
}
......@@ -1363,7 +1809,8 @@ static void output_frame_header(AC3EncodeContext *s)
*/
static void output_audio_block(AC3EncodeContext *s, int blk)
{
int ch, i, baie, rbnd;
int ch, i, baie, bnd, got_cpl;
int av_uninit(ch0);
AC3Block *block = &s->blocks[blk];
/* block switching */
......@@ -1378,11 +1825,38 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
put_bits(&s->pb, 1, 0);
/* channel coupling */
if (!blk) {
put_bits(&s->pb, 1, 1); /* coupling strategy present */
put_bits(&s->pb, 1, 0); /* no coupling strategy */
} else {
put_bits(&s->pb, 1, 0); /* no new coupling strategy */
put_bits(&s->pb, 1, block->new_cpl_strategy);
if (block->new_cpl_strategy) {
put_bits(&s->pb, 1, block->cpl_in_use);
if (block->cpl_in_use) {
int start_sub, end_sub;
for (ch = 1; ch <= s->fbw_channels; ch++)
put_bits(&s->pb, 1, block->channel_in_cpl[ch]);
if (s->channel_mode == AC3_CHMODE_STEREO)
put_bits(&s->pb, 1, 0); /* phase flags in use */
start_sub = (s->start_freq[CPL_CH] - 37) / 12;
end_sub = (s->cpl_end_freq - 37) / 12;
put_bits(&s->pb, 4, start_sub);
put_bits(&s->pb, 4, end_sub - 3);
for (bnd = start_sub+1; bnd < end_sub; bnd++)
put_bits(&s->pb, 1, ff_eac3_default_cpl_band_struct[bnd]);
}
}
/* coupling coordinates */
if (block->cpl_in_use) {
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (block->channel_in_cpl[ch]) {
put_bits(&s->pb, 1, block->new_cpl_coords);
if (block->new_cpl_coords) {
put_bits(&s->pb, 2, block->cpl_master_exp[ch]);
for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
put_bits(&s->pb, 4, block->cpl_coord_exp [ch][bnd]);
put_bits(&s->pb, 4, block->cpl_coord_mant[ch][bnd]);
}
}
}
}
}
/* stereo rematrixing */
......@@ -1390,40 +1864,41 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
put_bits(&s->pb, 1, block->new_rematrixing_strategy);
if (block->new_rematrixing_strategy) {
/* rematrixing flags */
for (rbnd = 0; rbnd < s->num_rematrixing_bands; rbnd++)
put_bits(&s->pb, 1, block->rematrixing_flags[rbnd]);
for (bnd = 0; bnd < block->num_rematrixing_bands; bnd++)
put_bits(&s->pb, 1, block->rematrixing_flags[bnd]);
}
}
/* exponent strategy */
for (ch = 0; ch < s->fbw_channels; ch++)
for (ch = !block->cpl_in_use; ch <= s->fbw_channels; ch++)
put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
if (s->lfe_on)
put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
/* bandwidth */
for (ch = 0; ch < s->fbw_channels; ch++) {
if (s->exp_strategy[ch][blk] != EXP_REUSE)
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (s->exp_strategy[ch][blk] != EXP_REUSE && !block->channel_in_cpl[ch])
put_bits(&s->pb, 6, s->bandwidth_code);
}
/* exponents */
for (ch = 0; ch < s->channels; ch++) {
for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
int nb_groups;
int cpl = (ch == CPL_CH);
if (s->exp_strategy[ch][blk] == EXP_REUSE)
continue;
/* DC exponent */
put_bits(&s->pb, 4, block->grouped_exp[ch][0]);
put_bits(&s->pb, 4, block->grouped_exp[ch][0] >> cpl);
/* exponent groups */
nb_groups = exponent_group_tab[s->exp_strategy[ch][blk]-1][s->nb_coefs[ch]];
nb_groups = exponent_group_tab[cpl][s->exp_strategy[ch][blk]-1][block->end_freq[ch]-s->start_freq[ch]];
for (i = 1; i <= nb_groups; i++)
put_bits(&s->pb, 7, block->grouped_exp[ch][i]);
/* gain range info */
if (ch != s->lfe_channel)
if (ch != s->lfe_channel && !cpl)
put_bits(&s->pb, 2, 0);
}
......@@ -1439,23 +1914,40 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
}
/* snr offset */
put_bits(&s->pb, 1, baie);
if (baie) {
put_bits(&s->pb, 1, block->new_snr_offsets);
if (block->new_snr_offsets) {
put_bits(&s->pb, 6, s->coarse_snr_offset);
for (ch = 0; ch < s->channels; ch++) {
for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
put_bits(&s->pb, 4, s->fine_snr_offset[ch]);
put_bits(&s->pb, 3, s->fast_gain_code[ch]);
}
}
/* coupling leak */
if (block->cpl_in_use) {
put_bits(&s->pb, 1, block->new_cpl_leak);
if (block->new_cpl_leak) {
put_bits(&s->pb, 3, s->bit_alloc.cpl_fast_leak);
put_bits(&s->pb, 3, s->bit_alloc.cpl_slow_leak);
}
}
put_bits(&s->pb, 1, 0); /* no delta bit allocation */
put_bits(&s->pb, 1, 0); /* no data to skip */
/* mantissas */
for (ch = 0; ch < s->channels; ch++) {
got_cpl = !block->cpl_in_use;
for (ch = 1; ch <= s->channels; ch++) {
int b, q;
AC3Block *ref_block = block->exp_ref_block[ch];
for (i = 0; i < s->nb_coefs[ch]; i++) {
AC3Block *ref_block;
if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
ch0 = ch - 1;
ch = CPL_CH;
got_cpl = 1;
}
ref_block = block->exp_ref_block[ch];
for (i = s->start_freq[ch]; i < block->end_freq[ch]; i++) {
q = block->qmant[ch][i];
b = ref_block->bap[ch][i];
switch (b) {
......@@ -1469,6 +1961,8 @@ static void output_audio_block(AC3EncodeContext *s, int blk)
default: put_bits(&s->pb, b-1, q); break;
}
}
if (ch == CPL_CH)
ch = ch0;
}
}
......@@ -1854,6 +2348,12 @@ static int ac3_encode_frame(AVCodecContext *avctx, unsigned char *frame,
scale_coefficients(s);
s->cpl_on = s->cpl_enabled;
compute_coupling_strategy(s);
if (s->cpl_on)
apply_channel_coupling(s);
compute_rematrixing_strategy(s);
apply_rematrixing(s);
......@@ -1934,7 +2434,7 @@ static av_cold int set_channel_info(AC3EncodeContext *s, int channels,
s->lfe_on = !!(ch_layout & AV_CH_LOW_FREQUENCY);
s->channels = channels;
s->fbw_channels = channels - s->lfe_on;
s->lfe_channel = s->lfe_on ? s->fbw_channels : -1;
s->lfe_channel = s->lfe_on ? s->fbw_channels + 1 : -1;
if (s->lfe_on)
ch_layout -= AV_CH_LOW_FREQUENCY;
......@@ -2033,6 +2533,10 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
s->rematrixing_enabled = s->options.stereo_rematrixing &&
(s->channel_mode == AC3_CHMODE_STEREO);
s->cpl_enabled = s->options.channel_coupling &&
s->channel_mode >= AC3_CHMODE_STEREO &&
CONFIG_AC3ENC_FLOAT;
return 0;
}
......@@ -2044,7 +2548,8 @@ static av_cold int validate_options(AVCodecContext *avctx, AC3EncodeContext *s)
*/
static av_cold void set_bandwidth(AC3EncodeContext *s)
{
int ch;
int blk, ch;
int av_uninit(cpl_start);
if (s->cutoff) {
/* calculate bandwidth based on user-specified cutoff frequency */
......@@ -2057,11 +2562,54 @@ static av_cold void set_bandwidth(AC3EncodeContext *s)
}
/* set number of coefficients for each channel */
for (ch = 0; ch < s->fbw_channels; ch++) {
s->nb_coefs[ch] = s->bandwidth_code * 3 + 73;
for (ch = 1; ch <= s->fbw_channels; ch++) {
s->start_freq[ch] = 0;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
s->blocks[blk].end_freq[ch] = s->bandwidth_code * 3 + 73;
}
/* LFE channel always has 7 coefs */
if (s->lfe_on) {
s->start_freq[s->lfe_channel] = 0;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
s->blocks[blk].end_freq[ch] = 7;
}
/* initialize coupling strategy */
if (s->cpl_enabled) {
if (s->options.cpl_start >= 0) {
cpl_start = s->options.cpl_start;
} else {
cpl_start = ac3_coupling_start_tab[s->channel_mode-2][s->bit_alloc.sr_code][s->frame_size_code/2];
if (cpl_start < 0)
s->cpl_enabled = 0;
}
}
if (s->cpl_enabled) {
int i, cpl_start_band, cpl_end_band;
uint8_t *cpl_band_sizes = s->cpl_band_sizes;
cpl_end_band = s->bandwidth_code / 4 + 3;
cpl_start_band = av_clip(cpl_start, 0, FFMIN(cpl_end_band-1, 15));
s->num_cpl_subbands = cpl_end_band - cpl_start_band;
s->num_cpl_bands = 1;
*cpl_band_sizes = 12;
for (i = cpl_start_band + 1; i < cpl_end_band; i++) {
if (ff_eac3_default_cpl_band_struct[i]) {
*cpl_band_sizes += 12;
} else {
s->num_cpl_bands++;
cpl_band_sizes++;
*cpl_band_sizes = 12;
}
}
s->start_freq[CPL_CH] = cpl_start_band * 12 + 37;
s->cpl_end_freq = cpl_end_band * 12 + 37;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
s->blocks[blk].end_freq[CPL_CH] = s->cpl_end_freq;
}
if (s->lfe_on)
s->nb_coefs[s->lfe_channel] = 7; /* LFE channel always has 7 coefs */
}
......@@ -2069,6 +2617,7 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
{
int blk, ch;
AC3EncodeContext *s = avctx->priv_data;
int channels = s->channels + 1; /* includes coupling channel */
FF_ALLOC_OR_GOTO(avctx, s->planar_samples, s->channels * sizeof(*s->planar_samples),
alloc_fail);
......@@ -2077,74 +2626,90 @@ static av_cold int allocate_buffers(AVCodecContext *avctx)
(AC3_FRAME_SIZE+AC3_BLOCK_SIZE) * sizeof(**s->planar_samples),
alloc_fail);
}
FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, AC3_MAX_BLOCKS * s->channels *
FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->bap_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * s->channels *
FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * s->channels *
FF_ALLOC_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * s->channels *
FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->exp_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * s->channels *
FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * channels *
128 * sizeof(*s->grouped_exp_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * s->channels *
FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->psd_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * s->channels *
FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * channels *
64 * sizeof(*s->band_psd_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * s->channels *
FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * channels *
64 * sizeof(*s->mask_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * s->channels *
FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->qmant_buffer), alloc_fail);
if (s->cpl_enabled) {
FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, AC3_MAX_BLOCKS * channels *
16 * sizeof(*s->cpl_coord_exp_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, AC3_MAX_BLOCKS * channels *
16 * sizeof(*s->cpl_coord_mant_buffer), alloc_fail);
}
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
FF_ALLOC_OR_GOTO(avctx, block->bap, s->channels * sizeof(*block->bap),
FF_ALLOC_OR_GOTO(avctx, block->bap, channels * sizeof(*block->bap),
alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, s->channels * sizeof(*block->mdct_coef),
FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, channels * sizeof(*block->mdct_coef),
alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, block->exp, s->channels * sizeof(*block->exp),
FF_ALLOCZ_OR_GOTO(avctx, block->exp, channels * sizeof(*block->exp),
alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, block->grouped_exp, s->channels * sizeof(*block->grouped_exp),
FF_ALLOCZ_OR_GOTO(avctx, block->grouped_exp, channels * sizeof(*block->grouped_exp),
alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, block->psd, s->channels * sizeof(*block->psd),
FF_ALLOCZ_OR_GOTO(avctx, block->psd, channels * sizeof(*block->psd),
alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, block->band_psd, s->channels * sizeof(*block->band_psd),
FF_ALLOCZ_OR_GOTO(avctx, block->band_psd, channels * sizeof(*block->band_psd),
alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, block->mask, s->channels * sizeof(*block->mask),
FF_ALLOCZ_OR_GOTO(avctx, block->mask, channels * sizeof(*block->mask),
alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, block->qmant, s->channels * sizeof(*block->qmant),
FF_ALLOCZ_OR_GOTO(avctx, block->qmant, channels * sizeof(*block->qmant),
alloc_fail);
if (s->cpl_enabled) {
FF_ALLOCZ_OR_GOTO(avctx, block->cpl_coord_exp, channels * sizeof(*block->cpl_coord_exp),
alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, block->cpl_coord_mant, channels * sizeof(*block->cpl_coord_mant),
alloc_fail);
}
for (ch = 0; ch < s->channels; ch++) {
for (ch = 0; ch < channels; ch++) {
/* arrangement: block, channel, coeff */
block->bap[ch] = &s->bap_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)];
block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)];
block->grouped_exp[ch] = &s->grouped_exp_buffer[128 * (blk * s->channels + ch)];
block->psd[ch] = &s->psd_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)];
block->band_psd[ch] = &s->band_psd_buffer [64 * (blk * s->channels + ch)];
block->mask[ch] = &s->mask_buffer [64 * (blk * s->channels + ch)];
block->qmant[ch] = &s->qmant_buffer [AC3_MAX_COEFS * (blk * s->channels + ch)];
block->bap[ch] = &s->bap_buffer [AC3_MAX_COEFS * (blk * channels + ch)];
block->grouped_exp[ch] = &s->grouped_exp_buffer[128 * (blk * channels + ch)];
block->psd[ch] = &s->psd_buffer [AC3_MAX_COEFS * (blk * channels + ch)];
block->band_psd[ch] = &s->band_psd_buffer [64 * (blk * channels + ch)];
block->mask[ch] = &s->mask_buffer [64 * (blk * channels + ch)];
block->qmant[ch] = &s->qmant_buffer [AC3_MAX_COEFS * (blk * channels + ch)];
if (s->cpl_enabled) {
block->cpl_coord_exp[ch] = &s->cpl_coord_exp_buffer [16 * (blk * channels + ch)];
block->cpl_coord_mant[ch] = &s->cpl_coord_mant_buffer[16 * (blk * channels + ch)];
}
/* arrangement: channel, block, coeff */
block->exp[ch] = &s->exp_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
}
}
if (CONFIG_AC3ENC_FLOAT) {
FF_ALLOC_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * s->channels *
FF_ALLOC_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail);
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, s->channels *
FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *
sizeof(*block->fixed_coef), alloc_fail);
for (ch = 0; ch < s->channels; ch++)
block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (blk * s->channels + ch)];
for (ch = 0; ch < channels; ch++)
block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
}
} else {
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, s->channels *
FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *
sizeof(*block->fixed_coef), alloc_fail);
for (ch = 0; ch < s->channels; ch++)
for (ch = 0; ch < channels; ch++)
block->fixed_coef[ch] = (int32_t *)block->mdct_coef[ch];
}
}
......
......@@ -101,7 +101,7 @@ static void scale_coefficients(AC3EncodeContext *s)
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 0; ch < s->channels; ch++) {
for (ch = 1; ch <= s->channels; ch++) {
s->ac3dsp.ac3_rshift_int32(block->mdct_coef[ch], AC3_MAX_COEFS,
block->coeff_shift[ch]);
}
......
......@@ -93,8 +93,10 @@ static int normalize_samples(AC3EncodeContext *s)
*/
static void scale_coefficients(AC3EncodeContext *s)
{
s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer, s->mdct_coef_buffer,
AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels);
int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS;
s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer + chan_size,
s->mdct_coef_buffer + chan_size,
chan_size * s->channels);
}
......
......@@ -138,6 +138,13 @@ const uint16_t ff_ac3_bitrate_tab[19] = {
*/
const uint8_t ff_ac3_rematrix_band_tab[5] = { 13, 25, 37, 61, 253 };
/**
* Table E2.16 Default Coupling Banding Structure
*/
const uint8_t ff_eac3_default_cpl_band_struct[18] = {
0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1
};
/* AC-3 MDCT window */
/* MDCT window */
......
......@@ -39,6 +39,7 @@ extern const uint8_t ff_ac3_dec_channel_map[8][2][6];
extern const uint16_t ff_ac3_sample_rate_tab[3];
extern const uint16_t ff_ac3_bitrate_tab[19];
extern const uint8_t ff_ac3_rematrix_band_tab[5];
extern const uint8_t ff_eac3_default_cpl_band_struct[18];
extern const int16_t ff_ac3_window[AC3_WINDOW_SIZE/2];
extern const uint8_t ff_ac3_log_add_tab[260];
extern const uint16_t ff_ac3_hearing_threshold_tab[AC3_CRITICAL_BANDS][3];
......
......@@ -156,7 +156,7 @@ void avcodec_register_all(void)
REGISTER_DECODER (MPEG1_VDPAU, mpeg1_vdpau);
REGISTER_DECODER (MPEG2_CRYSTALHD, mpeg2_crystalhd);
REGISTER_DECODER (MSMPEG4_CRYSTALHD, msmpeg4_crystalhd);
REGISTER_ENCDEC (MSMPEG4V1, msmpeg4v1);
REGISTER_DECODER (MSMPEG4V1, msmpeg4v1);
REGISTER_ENCDEC (MSMPEG4V2, msmpeg4v2);
REGISTER_ENCDEC (MSMPEG4V3, msmpeg4v3);
REGISTER_DECODER (MSRLE, msrle);
......
......@@ -628,13 +628,6 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
}
}
/**
* Empty mmx state.
* this must be called between any dsp function and float/double code.
* for example sin(); dsp->idct_put(); emms_c(); cos()
*/
#define emms_c()
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
......@@ -652,22 +645,9 @@ void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
#if HAVE_MMX
#undef emms_c
static inline void emms(void)
{
__asm__ volatile ("emms;":::"memory");
}
#define emms_c() \
{\
if(av_get_cpu_flags() & AV_CPU_FLAG_MMX)\
emms();\
}
#if ARCH_ARM
#elif ARCH_ARM
#if HAVE_NEON
# define STRIDE_ALIGN 16
......
......@@ -638,15 +638,6 @@ av_cold int MPV_encode_init(AVCodecContext *avctx)
s->low_delay= s->max_b_frames ? 0 : 1;
avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
break;
case CODEC_ID_MSMPEG4V1:
s->out_format = FMT_H263;
s->h263_msmpeg4 = 1;
s->h263_pred = 1;
s->unrestricted_mv = 1;
s->msmpeg4_version= 1;
avctx->delay=0;
s->low_delay=1;
break;
case CODEC_ID_MSMPEG4V2:
s->out_format = FMT_H263;
s->h263_msmpeg4 = 1;
......@@ -3807,18 +3798,6 @@ AVCodec ff_h263p_encoder = {
.long_name= NULL_IF_CONFIG_SMALL("H.263+ / H.263-1998 / H.263 version 2"),
};
AVCodec ff_msmpeg4v1_encoder = {
"msmpeg4v1",
AVMEDIA_TYPE_VIDEO,
CODEC_ID_MSMPEG4V1,
sizeof(MpegEncContext),
MPV_encode_init,
MPV_encode_picture,
MPV_encode_end,
.pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_NONE},
.long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2 Microsoft variant version 1"),
};
AVCodec ff_msmpeg4v2_encoder = {
"msmpeg4v2",
AVMEDIA_TYPE_VIDEO,
......
......@@ -846,22 +846,14 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
int pred, extquant;
int extrabits = 0;
if(s->msmpeg4_version==1){
int32_t *dc_val;
pred = msmpeg4v1_pred_dc(s, n, &dc_val);
/* update predictor */
*dc_val= level;
}else{
int16_t *dc_val;
pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
int16_t *dc_val;
pred = msmpeg4_pred_dc(s, n, &dc_val, dir_ptr);
/* update predictor */
if (n < 4) {
*dc_val = level * s->y_dc_scale;
} else {
*dc_val = level * s->c_dc_scale;
}
/* update predictor */
if (n < 4) {
*dc_val = level * s->y_dc_scale;
} else {
*dc_val = level * s->c_dc_scale;
}
/* do the prediction */
......
......@@ -54,8 +54,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
CONFIG_MSMPEG4V3_DECODER || \
CONFIG_WMV2_DECODER || \
CONFIG_VC1_DECODER)
#define CONFIG_MSMPEG4_ENCODER (CONFIG_MSMPEG4V1_ENCODER || \
CONFIG_MSMPEG4V2_ENCODER || \
#define CONFIG_MSMPEG4_ENCODER (CONFIG_MSMPEG4V2_ENCODER || \
CONFIG_MSMPEG4V3_ENCODER || \
CONFIG_WMV2_ENCODER)
......
......@@ -39,6 +39,8 @@ typedef struct TiffContext {
int width, height;
unsigned int bpp, bppcount;
uint32_t palette[256];
int palette_is_set;
int le;
enum TiffCompr compr;
int invert;
......@@ -255,11 +257,15 @@ static int init_image(TiffContext *s)
av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
return ret;
}
if (s->bpp == 8 && s->picture.data[1]){
/* make default grayscale pal */
pal = (uint32_t *) s->picture.data[1];
for (i = 0; i < 256; i++)
pal[i] = i * 0x010101;
if (s->avctx->pix_fmt == PIX_FMT_PAL8) {
if (s->palette_is_set) {
memcpy(s->picture.data[1], s->palette, sizeof(s->palette));
} else {
/* make default grayscale pal */
pal = (uint32_t *) s->picture.data[1];
for (i = 0; i < 256; i++)
pal[i] = i * 0x010101;
}
}
return 0;
}
......@@ -442,11 +448,7 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
s->fill_order = value - 1;
break;
case TIFF_PAL:
if(s->avctx->pix_fmt != PIX_FMT_PAL8){
av_log(s->avctx, AV_LOG_ERROR, "Palette met but this is not palettized format\n");
return -1;
}
pal = (uint32_t *) s->picture.data[1];
pal = (uint32_t *) s->palette;
off = type_sizes[type];
rp = buf;
gp = buf + count / 3 * off;
......@@ -459,6 +461,7 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
j |= tget(&bp, type, s->le) >> off;
pal[i] = j;
}
s->palette_is_set = 1;
break;
case TIFF_PLANAR:
if(value == 2){
......
......@@ -287,13 +287,8 @@ zrmjpeg
CpuCaps gCpuCaps; //FIXME initialize this so optims work
//exact copy from vf_scale.c
int get_sws_cpuflags(void){
return
(gCpuCaps.hasMMX ? SWS_CPU_CAPS_MMX : 0)
| (gCpuCaps.hasMMX2 ? SWS_CPU_CAPS_MMX2 : 0)
| (gCpuCaps.has3DNow ? SWS_CPU_CAPS_3DNOW : 0)
| (gCpuCaps.hasAltiVec ? SWS_CPU_CAPS_ALTIVEC : 0);
return 0;
}
static void sws_getFlagsAndFilterFromCmdLine(int *flags, SwsFilter **srcFilterParam, SwsFilter **dstFilterParam)
......@@ -348,7 +343,7 @@ struct SwsContext *sws_getContextFromCmdLine(int srcW, int srcH, int srcFormat,
if (srcFormat == IMGFMT_RGB8 || srcFormat == IMGFMT_BGR8) sfmt = PIX_FMT_PAL8;
sws_getFlagsAndFilterFromCmdLine(&flags, &srcFilterParam, &dstFilterParam);
return sws_getContext(srcW, srcH, sfmt, dstW, dstH, dfmt, flags | get_sws_cpuflags(), srcFilterParam, dstFilterParam, NULL);
return sws_getContext(srcW, srcH, sfmt, dstW, dstH, dfmt, flags , srcFilterParam, dstFilterParam, NULL);
}
typedef struct {
......
......@@ -27,6 +27,7 @@
* memory buffer source API for video
*/
#include "avfilter.h"
/**
......
......@@ -276,7 +276,7 @@ static int ape_read_header(AVFormatContext * s, AVFormatParameters * ap)
ape->frames[0].nblocks = ape->blocksperframe;
ape->frames[0].skip = 0;
for (i = 1; i < ape->totalframes; i++) {
ape->frames[i].pos = ape->seektable[i] + ape->junklength; //ape->frames[i-1].pos + ape->blocksperframe;
ape->frames[i].pos = ape->seektable[i] + ape->junklength;
ape->frames[i].nblocks = ape->blocksperframe;
ape->frames[i - 1].size = ape->frames[i].pos - ape->frames[i - 1].pos;
ape->frames[i].skip = (ape->frames[i].pos - ape->frames[0].pos) & 3;
......
......@@ -37,6 +37,7 @@
#include "config.h"
#include "attributes.h"
#include "timer.h"
#include "cpu.h"
#ifndef attribute_align_arg
#if ARCH_X86_32 && AV_GCC_VERSION_AT_LEAST(4,2)
......@@ -222,4 +223,19 @@
# define ONLY_IF_THREADS_ENABLED(x) NULL
#endif
#if HAVE_MMX
/**
* Empty mmx state.
* this must be called between any dsp function and float/double code.
* for example sin(); dsp->idct_put(); emms_c(); cos()
*/
static av_always_inline void emms_c(void)
{
if(av_get_cpu_flags() & AV_CPU_FLAG_MMX)
__asm__ volatile ("emms" ::: "memory");
}
#else /* HAVE_MMX */
#define emms_c()
#endif /* HAVE_MMX */
#endif /* AVUTIL_INTERNAL_H */
......@@ -79,15 +79,13 @@ static int yuyvtoyv12_unscaled(SwsContext *c, uint8_t* src[], int srcStride[], i
void ff_bfin_get_unscaled_swscale(SwsContext *c)
{
SwsFunc swScale = c->swScale;
if (c->flags & SWS_CPU_CAPS_BFIN)
if (c->dstFormat == PIX_FMT_YUV420P)
if (c->srcFormat == PIX_FMT_UYVY422) {
av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n");
c->swScale = uyvytoyv12_unscaled;
}
if (c->dstFormat == PIX_FMT_YUV420P)
if (c->srcFormat == PIX_FMT_YUYV422) {
av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n");
c->swScale = yuyvtoyv12_unscaled;
}
if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_UYVY422) {
av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized uyvytoyv12_unscaled\n");
c->swScale = uyvytoyv12_unscaled;
}
if (c->dstFormat == PIX_FMT_YUV420P && c->srcFormat == PIX_FMT_YUYV422) {
av_log (NULL, AV_LOG_VERBOSE, "selecting Blackfin optimized yuyvtoyv12_unscaled\n");
c->swScale = yuyvtoyv12_unscaled;
}
}
......@@ -33,31 +33,6 @@
#define FUNC(s,d,n) {s,d,#n,n}
static int cpu_caps;
static char *args_parse(int argc, char *argv[])
{
int o;
while ((o = getopt(argc, argv, "m23")) != -1) {
switch (o) {
case 'm':
cpu_caps |= SWS_CPU_CAPS_MMX;
break;
case '2':
cpu_caps |= SWS_CPU_CAPS_MMX2;
break;
case '3':
cpu_caps |= SWS_CPU_CAPS_3DNOW;
break;
default:
av_log(NULL, AV_LOG_ERROR, "Unknown option %c\n", o);
}
}
return argv[optind];
}
int main(int argc, char **argv)
{
int i, funcNum;
......@@ -70,9 +45,7 @@ int main(int argc, char **argv)
return -1;
av_log(NULL, AV_LOG_INFO, "memory corruption test ...\n");
args_parse(argc, argv);
av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps);
sws_rgb2rgb_init(cpu_caps);
sws_rgb2rgb_init();
for(funcNum=0; ; funcNum++) {
struct func_info_s {
......
......@@ -48,12 +48,6 @@ static const AVOption options[] = {
{ "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_SPLINE }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "print_info", "print info", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_PRINT_INFO }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_ACCURATE_RND }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_MMX }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_MMX2 }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "sse2", "SSE2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_SSE2 }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_3DNOW }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_ALTIVEC }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, {.dbl = SWS_CPU_CAPS_BFIN }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INT }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_FULL_CHR_H_INP }, INT_MIN, INT_MAX, VE, "sws_flags" },
{ "bitexact", "", 0 , FF_OPT_TYPE_CONST, {.dbl = SWS_BITEXACT }, INT_MIN, INT_MAX, VE, "sws_flags" },
......
......@@ -23,69 +23,16 @@
#include "swscale_altivec_template.c"
#endif
#if COMPILE_TEMPLATE_ALTIVEC
static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
{
#if COMPILE_TEMPLATE_ALTIVEC
yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
dest, uDest, vDest, dstW, chrDstW);
#else //COMPILE_TEMPLATE_ALTIVEC
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
#endif //!COMPILE_TEMPLATE_ALTIVEC
}
static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
{
yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat);
}
static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
{
int i;
for (i=0; i<dstW; i++) {
int val= (lumSrc[i]+64)>>7;
if (val&256) {
if (val<0) val=0;
else val=255;
}
dest[i]= val;
}
if (uDest)
for (i=0; i<chrDstW; i++) {
int u=(chrSrc[i ]+64)>>7;
int v=(chrSrc[i + VOFW]+64)>>7;
if ((u|v)&256) {
if (u<0) u=0;
else if (u>255) u=255;
if (v<0) v=0;
else if (v>255) v=255;
}
uDest[i]= u;
vDest[i]= v;
}
if (CONFIG_SWSCALE_ALPHA && aDest)
for (i=0; i<dstW; i++) {
int val= (alpSrc[i]+64)>>7;
aDest[i]= av_clip_uint8(val);
}
}
/**
* vertical scale YV12 to RGB
*/
......@@ -93,7 +40,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
{
#if COMPILE_TEMPLATE_ALTIVEC
/* The following list of supported dstFormat values should
match what's found in the body of ff_yuv2packedX_altivec() */
if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
......@@ -104,815 +50,17 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
chrFilter, chrSrc, chrFilterSize,
dest, dstW, dstY);
else
#endif
yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
alpSrc, dest, dstW, dstY);
}
#endif
/**
* vertical bilinear scale YV12 to RGB
*/
static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
{
int yalpha1=4095- yalpha;
int uvalpha1=4095-uvalpha;
int i;
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
}
/**
* YV12 to RGB without scaling or interpolating
*/
static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
{
const int yalpha1=0;
int i;
const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
const int yalpha= 4096; //FIXME ...
if (flags&SWS_FULL_CHR_H_INT) {
c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
return;
}
if (uvalpha < 2048) {
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
} else {
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
}
}
//FIXME yuy2* can read up to 7 samples too much
static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++)
dst[i]= src[2*i];
}
static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
dstU[i]= src1[4*i + 1];
dstV[i]= src1[4*i + 3];
}
assert(src1 == src2);
}
static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
dstU[i]= src1[2*i + 1];
dstV[i]= src2[2*i + 1];
}
}
/* This is almost identical to the previous, end exists only because
* yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++)
dst[i]= src[2*i+1];
}
static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
dstU[i]= src1[4*i + 0];
dstV[i]= src1[4*i + 2];
}
assert(src1 == src2);
}
static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
dstU[i]= src1[2*i];
dstV[i]= src2[2*i];
}
}
static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
const uint8_t *src, long width)
{
int i;
for (i = 0; i < width; i++) {
dst1[i] = src[2*i+0];
dst2[i] = src[2*i+1];
}
}
static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *unused)
{
RENAME(nvXXtoUV)(dstU, dstV, src1, width);
}
static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *unused)
{
RENAME(nvXXtoUV)(dstV, dstU, src1, width);
}
static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
int b= src[i*3+0];
int g= src[i*3+1];
int r= src[i*3+2];
dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
}
}
static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
int b= src1[3*i + 0];
int g= src1[3*i + 1];
int r= src1[3*i + 2];
dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
}
assert(src1 == src2);
}
static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
int b= src1[6*i + 0] + src1[6*i + 3];
int g= src1[6*i + 1] + src1[6*i + 4];
int r= src1[6*i + 2] + src1[6*i + 5];
dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
}
assert(src1 == src2);
}
static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
int r= src[i*3+0];
int g= src[i*3+1];
int b= src[i*3+2];
dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
}
}
static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
assert(src1==src2);
for (i=0; i<width; i++) {
int r= src1[3*i + 0];
int g= src1[3*i + 1];
int b= src1[3*i + 2];
dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
}
}
static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
assert(src1==src2);
for (i=0; i<width; i++) {
int r= src1[6*i + 0] + src1[6*i + 3];
int g= src1[6*i + 1] + src1[6*i + 4];
int b= src1[6*i + 2] + src1[6*i + 5];
dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
}
}
// bilinear / bicubic scaling
static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
const int16_t *filter, const int16_t *filterPos, long filterSize)
{
#if COMPILE_TEMPLATE_ALTIVEC
hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
#else
int i;
for (i=0; i<dstW; i++) {
int j;
int srcPos= filterPos[i];
int val=0;
//printf("filterPos: %d\n", filterPos[i]);
for (j=0; j<filterSize; j++) {
//printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
val += ((int)src[srcPos + j])*filter[filterSize*i + j];
}
//filter += hFilterSize;
dst[i] = FFMIN(val>>7, (1<<15)-1); // the cubic equation does overflow ...
//dst[i] = val>>7;
}
#endif /* COMPILE_TEMPLATE_ALTIVEC */
}
//FIXME all pal and rgb srcFormats could do this convertion as well
//FIXME all scalers more complex than bilinear could do half of this transform
static void RENAME(chrRangeToJpeg)(uint16_t *dst, int width)
{
int i;
for (i = 0; i < width; i++) {
dst[i ] = (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264
dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
}
}
static void RENAME(chrRangeFromJpeg)(uint16_t *dst, int width)
{
int i;
for (i = 0; i < width; i++) {
dst[i ] = (dst[i ]*1799 + 4081085)>>11; //1469
dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
}
}
static void RENAME(lumRangeToJpeg)(uint16_t *dst, int width)
{
int i;
for (i = 0; i < width; i++)
dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
}
static void RENAME(lumRangeFromJpeg)(uint16_t *dst, int width)
{
int i;
for (i = 0; i < width; i++)
dst[i] = (dst[i]*14071 + 33561947)>>14;
}
static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
long dstWidth, const uint8_t *src, int srcW,
int xInc)
{
int i;
unsigned int xpos=0;
for (i=0;i<dstWidth;i++) {
register unsigned int xx=xpos>>16;
register unsigned int xalpha=(xpos&0xFFFF)>>9;
dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
xpos+=xInc;
}
}
// *** horizontal scale Y line to temp buffer
static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
const int16_t *hLumFilter,
const int16_t *hLumFilterPos, int hLumFilterSize,
uint8_t *formatConvBuffer,
uint32_t *pal, int isAlpha)
{
void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
if (toYV12) {
toYV12(formatConvBuffer, src, srcW, pal);
src= formatConvBuffer;
}
if (c->hScale16) {
c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
} else if (!c->hyscale_fast) {
c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
} else { // fast bilinear upscale / crap downscale
c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
}
if (convertRange)
convertRange(dst, dstWidth);
}
static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
long dstWidth, const uint8_t *src1,
const uint8_t *src2, int srcW, int xInc)
{
int i;
unsigned int xpos=0;
for (i=0;i<dstWidth;i++) {
register unsigned int xx=xpos>>16;
register unsigned int xalpha=(xpos&0xFFFF)>>9;
dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha);
/* slower
dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
*/
xpos+=xInc;
}
}
inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
int srcW, int xInc, const int16_t *hChrFilter,
const int16_t *hChrFilterPos, int hChrFilterSize,
uint8_t *formatConvBuffer,
uint32_t *pal)
{
src1 += c->chrSrcOffset;
src2 += c->chrSrcOffset;
if (c->chrToYV12) {
c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
src1= formatConvBuffer;
src2= formatConvBuffer+VOFW;
}
if (c->hScale16) {
c->hScale16(dst , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
} else if (!c->hcscale_fast) {
c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
} else { // fast bilinear upscale / crap downscale
c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
}
if (c->chrConvertRange)
c->chrConvertRange(dst, dstWidth);
}
#define DEBUG_SWSCALE_BUFFERS 0
#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[])
{
/* load a few things into local vars to make the code more readable? and faster */
const int srcW= c->srcW;
const int dstW= c->dstW;
const int dstH= c->dstH;
const int chrDstW= c->chrDstW;
const int chrSrcW= c->chrSrcW;
const int lumXInc= c->lumXInc;
const int chrXInc= c->chrXInc;
const enum PixelFormat dstFormat= c->dstFormat;
const int flags= c->flags;
int16_t *vLumFilterPos= c->vLumFilterPos;
int16_t *vChrFilterPos= c->vChrFilterPos;
int16_t *hLumFilterPos= c->hLumFilterPos;
int16_t *hChrFilterPos= c->hChrFilterPos;
int16_t *vLumFilter= c->vLumFilter;
int16_t *vChrFilter= c->vChrFilter;
int16_t *hLumFilter= c->hLumFilter;
int16_t *hChrFilter= c->hChrFilter;
int32_t *lumMmxFilter= c->lumMmxFilter;
int32_t *chrMmxFilter= c->chrMmxFilter;
int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
const int vLumFilterSize= c->vLumFilterSize;
const int vChrFilterSize= c->vChrFilterSize;
const int hLumFilterSize= c->hLumFilterSize;
const int hChrFilterSize= c->hChrFilterSize;
int16_t **lumPixBuf= c->lumPixBuf;
int16_t **chrPixBuf= c->chrPixBuf;
int16_t **alpPixBuf= c->alpPixBuf;
const int vLumBufSize= c->vLumBufSize;
const int vChrBufSize= c->vChrBufSize;
uint8_t *formatConvBuffer= c->formatConvBuffer;
const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
int lastDstY;
uint32_t *pal=c->pal_yuv;
/* vars which will change and which we need to store back in the context */
int dstY= c->dstY;
int lumBufIndex= c->lumBufIndex;
int chrBufIndex= c->chrBufIndex;
int lastInLumBuf= c->lastInLumBuf;
int lastInChrBuf= c->lastInChrBuf;
if (isPacked(c->srcFormat)) {
src[0]=
src[1]=
src[2]=
src[3]= src[0];
srcStride[0]=
srcStride[1]=
srcStride[2]=
srcStride[3]= srcStride[0];
}
srcStride[1]<<= c->vChrDrop;
srcStride[2]<<= c->vChrDrop;
DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
srcSliceY, srcSliceH, dstY, dstH);
DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
static int warnedAlready=0; //FIXME move this into the context perhaps
if (flags & SWS_PRINT_INFO && !warnedAlready) {
av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
" ->cannot do aligned memory accesses anymore\n");
warnedAlready=1;
}
}
/* Note the user might start scaling the picture in the middle so this
will not get executed. This is not really intended but works
currently, so people might do it. */
if (srcSliceY ==0) {
lumBufIndex=-1;
chrBufIndex=-1;
dstY=0;
lastInLumBuf= -1;
lastInChrBuf= -1;
}
lastDstY= dstY;
for (;dstY < dstH; dstY++) {
unsigned char *dest =dst[0]+dstStride[0]*dstY;
const int chrDstY= dstY>>c->chrDstVSubSample;
unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
int enough_lines;
//handle holes (FAST_BILINEAR & weird filters)
if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
DEBUG_BUFFERS("dstY: %d\n", dstY);
DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
firstLumSrcY, lastLumSrcY, lastInLumBuf);
DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
firstChrSrcY, lastChrSrcY, lastInChrBuf);
// Do we have enough lines in this slice to output the dstY line
enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
if (!enough_lines) {
lastLumSrcY = srcSliceY + srcSliceH - 1;
lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
lastLumSrcY, lastChrSrcY);
}
//Do horizontal scaling
while(lastInLumBuf < lastLumSrcY) {
const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
lumBufIndex++;
assert(lumBufIndex < 2*vLumBufSize);
assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
assert(lastInLumBuf + 1 - srcSliceY >= 0);
RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
hLumFilter, hLumFilterPos, hLumFilterSize,
formatConvBuffer,
pal, 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
hLumFilter, hLumFilterPos, hLumFilterSize,
formatConvBuffer,
pal, 1);
lastInLumBuf++;
DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
lumBufIndex, lastInLumBuf);
}
while(lastInChrBuf < lastChrSrcY) {
const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
chrBufIndex++;
assert(chrBufIndex < 2*vChrBufSize);
assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
//FIXME replace parameters through context struct (some at least)
if (c->needs_hcscale)
RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
hChrFilter, hChrFilterPos, hChrFilterSize,
formatConvBuffer,
pal);
lastInChrBuf++;
DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
chrBufIndex, lastInChrBuf);
}
//wrap buf index around to stay inside the ring buffer
if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
if (!enough_lines)
break; //we can't output a dstY line so let's try with the next slice
if (dstY < dstH-2) {
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
c->yuv2nv12X(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat);
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
dstFormat);
} else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
const int16_t *lumBuf = lumSrcPtr[0];
const int16_t *chrBuf= chrSrcPtr[0];
const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
} else { //General YV12
c->yuv2yuvX(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
}
} else {
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
int chrAlpha= vChrFilter[2*dstY+1];
if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c, //FIXME write a packed1_full function
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
} else {
c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
alpPixBuf ? *alpSrcPtr : NULL,
dest, dstW, chrAlpha, dstFormat, flags, dstY);
}
} else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
int lumAlpha= vLumFilter[2*dstY+1];
int chrAlpha= vChrFilter[2*dstY+1];
lumMmxFilter[2]=
lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
chrMmxFilter[2]=
chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c, //FIXME write a packed2_full function
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
} else {
c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
dest, dstW, lumAlpha, chrAlpha, dstY);
}
} else { //general RGB
if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
} else {
c->yuv2packedX(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
}
}
}
} else { // hmm looks like we can't use MMX here without overwriting this array's tail
const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
yuv2nv12XinC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat);
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
dstFormat);
} else {
yuv2yuvXinC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
}
} else {
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
} else {
yuv2packedXinC(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
}
}
}
}
if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
/* store changed local vars back in the context */
c->dstY= dstY;
c->lumBufIndex= lumBufIndex;
c->chrBufIndex= chrBufIndex;
c->lastInLumBuf= lastInLumBuf;
c->lastInChrBuf= lastInChrBuf;
return dstY - lastDstY;
}
static void RENAME(sws_init_swScale)(SwsContext *c)
{
enum PixelFormat srcFormat = c->srcFormat;
c->yuv2nv12X = RENAME(yuv2nv12X );
c->yuv2yuv1 = RENAME(yuv2yuv1 );
c->yuv2yuvX = RENAME(yuv2yuvX );
c->yuv2packed1 = RENAME(yuv2packed1 );
c->yuv2packed2 = RENAME(yuv2packed2 );
c->yuv2packedX = RENAME(yuv2packedX );
c->hScale = RENAME(hScale );
if (c->flags & SWS_FAST_BILINEAR)
{
c->hyscale_fast = RENAME(hyscale_fast);
c->hcscale_fast = RENAME(hcscale_fast);
}
c->chrToYV12 = NULL;
switch(srcFormat) {
case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break;
case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break;
case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break;
case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break;
case PIX_FMT_RGB8 :
case PIX_FMT_BGR8 :
case PIX_FMT_PAL8 :
case PIX_FMT_BGR4_BYTE:
case PIX_FMT_RGB4_BYTE: c->chrToYV12 = palToUV; break;
case PIX_FMT_GRAY16BE :
case PIX_FMT_YUV420P9BE:
case PIX_FMT_YUV422P10BE:
case PIX_FMT_YUV420P10BE:
case PIX_FMT_YUV420P16BE:
case PIX_FMT_YUV422P16BE:
case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16) : RENAME(hScale16X); break;
case PIX_FMT_GRAY16LE :
case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE:
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16X) : RENAME(hScale16); break;
}
if (c->chrSrcHSubSample) {
switch(srcFormat) {
case PIX_FMT_RGB48BE:
case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV_half; break;
case PIX_FMT_BGR48BE:
case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV_half; break;
case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV_half; break;
case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV_half; break;
case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV_half); break;
case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV_half; break;
case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV_half; break;
case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV_half; break;
case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV_half; break;
case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV_half); break;
case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV_half; break;
case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV_half; break;
}
} else {
switch(srcFormat) {
case PIX_FMT_RGB48BE:
case PIX_FMT_RGB48LE: c->chrToYV12 = rgb48ToUV; break;
case PIX_FMT_BGR48BE:
case PIX_FMT_BGR48LE: c->chrToYV12 = bgr48ToUV; break;
case PIX_FMT_RGB32 : c->chrToYV12 = bgr32ToUV; break;
case PIX_FMT_RGB32_1: c->chrToYV12 = bgr321ToUV; break;
case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
case PIX_FMT_BGR565 : c->chrToYV12 = bgr16ToUV; break;
case PIX_FMT_BGR555 : c->chrToYV12 = bgr15ToUV; break;
case PIX_FMT_BGR32 : c->chrToYV12 = rgb32ToUV; break;
case PIX_FMT_BGR32_1: c->chrToYV12 = rgb321ToUV; break;
case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break;
case PIX_FMT_RGB565 : c->chrToYV12 = rgb16ToUV; break;
case PIX_FMT_RGB555 : c->chrToYV12 = rgb15ToUV; break;
}
}
c->lumToYV12 = NULL;
c->alpToYV12 = NULL;
switch (srcFormat) {
case PIX_FMT_YUYV422 :
case PIX_FMT_GRAY8A :
c->lumToYV12 = RENAME(yuy2ToY); break;
case PIX_FMT_UYVY422 :
c->lumToYV12 = RENAME(uyvyToY); break;
case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break;
case PIX_FMT_BGR565 : c->lumToYV12 = bgr16ToY; break;
case PIX_FMT_BGR555 : c->lumToYV12 = bgr15ToY; break;
case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break;
case PIX_FMT_RGB565 : c->lumToYV12 = rgb16ToY; break;
case PIX_FMT_RGB555 : c->lumToYV12 = rgb15ToY; break;
case PIX_FMT_RGB8 :
case PIX_FMT_BGR8 :
case PIX_FMT_PAL8 :
case PIX_FMT_BGR4_BYTE:
case PIX_FMT_RGB4_BYTE: c->lumToYV12 = palToY; break;
case PIX_FMT_MONOBLACK: c->lumToYV12 = monoblack2Y; break;
case PIX_FMT_MONOWHITE: c->lumToYV12 = monowhite2Y; break;
case PIX_FMT_RGB32 : c->lumToYV12 = bgr32ToY; break;
case PIX_FMT_RGB32_1: c->lumToYV12 = bgr321ToY; break;
case PIX_FMT_BGR32 : c->lumToYV12 = rgb32ToY; break;
case PIX_FMT_BGR32_1: c->lumToYV12 = rgb321ToY; break;
case PIX_FMT_RGB48BE:
case PIX_FMT_RGB48LE: c->lumToYV12 = rgb48ToY; break;
case PIX_FMT_BGR48BE:
case PIX_FMT_BGR48LE: c->lumToYV12 = bgr48ToY; break;
}
if (c->alpPixBuf) {
switch (srcFormat) {
case PIX_FMT_RGB32 :
case PIX_FMT_RGB32_1:
case PIX_FMT_BGR32 :
case PIX_FMT_BGR32_1: c->alpToYV12 = abgrToA; break;
case PIX_FMT_GRAY8A : c->alpToYV12 = RENAME(yuy2ToY); break;
case PIX_FMT_PAL8 : c->alpToYV12 = palToA; break;
}
}
switch (srcFormat) {
case PIX_FMT_GRAY8A :
c->alpSrcOffset = 1;
break;
case PIX_FMT_RGB32 :
case PIX_FMT_BGR32 :
c->alpSrcOffset = 3;
break;
case PIX_FMT_RGB48LE:
case PIX_FMT_BGR48LE:
c->lumSrcOffset = 1;
c->chrSrcOffset = 1;
c->alpSrcOffset = 1;
break;
}
if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
if (c->srcRange) {
c->lumConvertRange = RENAME(lumRangeFromJpeg);
c->chrConvertRange = RENAME(chrRangeFromJpeg);
} else {
c->lumConvertRange = RENAME(lumRangeToJpeg);
c->chrConvertRange = RENAME(chrRangeToJpeg);
}
}
if (!(isGray(srcFormat) || isGray(c->dstFormat) ||
srcFormat == PIX_FMT_MONOBLACK || srcFormat == PIX_FMT_MONOWHITE))
c->needs_hcscale = 1;
}
......@@ -94,6 +94,7 @@ adjustment.
#include "libswscale/rgb2rgb.h"
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/cpu.h"
#undef PROFILE_THE_BEAST
#undef INC_SCALING
......@@ -692,7 +693,7 @@ static int altivec_uyvy_rgb32 (SwsContext *c,
*/
SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
{
if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
return NULL;
/*
......
......@@ -116,12 +116,11 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t
32-bit C version, and and&add trick by Michael Niedermayer
*/
void sws_rgb2rgb_init(int flags)
void sws_rgb2rgb_init(void)
{
rgb2rgb_init_c();
#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
rgb2rgb_init_x86(flags);
#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
if (HAVE_MMX)
rgb2rgb_init_x86();
}
#if LIBSWSCALE_VERSION_MAJOR < 1
......
......@@ -166,8 +166,8 @@ extern void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const u
long width, long height,
long lumStride, long chromStride, long srcStride);
void sws_rgb2rgb_init(int flags);
void sws_rgb2rgb_init(void);
void rgb2rgb_init_x86(int flags);
void rgb2rgb_init_x86(void);
#endif /* SWSCALE_RGB2RGB_H */
......@@ -278,25 +278,6 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_siz
}
}
/*
* mm0 = 00 B3 00 B2 00 B1 00 B0
* mm1 = 00 G3 00 G2 00 G1 00 G0
* mm2 = 00 R3 00 R2 00 R1 00 R0
* mm6 = FF FF FF FF FF FF FF FF
* mm7 = 00 00 00 00 00 00 00 00
*/
#define PACK_RGB32 \
"packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
"packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
"packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
"punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
"punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
"movq %%mm0, %%mm3 \n\t" \
"punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
"punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
MOVNTQ" %%mm0, %0 \n\t" \
MOVNTQ" %%mm3, 8%0 \n\t" \
static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size)
{
const uint16_t *end;
......
......@@ -63,6 +63,7 @@ untested special converters
#include "libavutil/avassert.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/avutil.h"
#include "libavutil/mathematics.h"
#include "libavutil/bswap.h"
......@@ -71,10 +72,6 @@ untested special converters
#undef MOVNTQ
#undef PAVGB
//#undef HAVE_MMX2
//#define HAVE_AMD3DNOW
//#undef HAVE_MMX
//#undef ARCH_X86
#define DITHER1XBPP
#define isPacked(x) ( \
......@@ -1262,57 +1259,13 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
//Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
//Plain C versions
#if CONFIG_RUNTIME_CPUDETECT
# define COMPILE_C 1
# if ARCH_X86
# define COMPILE_MMX 1
# define COMPILE_MMX2 1
# define COMPILE_3DNOW 1
# elif ARCH_PPC
# define COMPILE_ALTIVEC HAVE_ALTIVEC
# endif
#else /* CONFIG_RUNTIME_CPUDETECT */
# if ARCH_X86
# if HAVE_MMX2
# define COMPILE_MMX2 1
# elif HAVE_AMD3DNOW
# define COMPILE_3DNOW 1
# elif HAVE_MMX
# define COMPILE_MMX 1
# else
# define COMPILE_C 1
# endif
# elif ARCH_PPC && HAVE_ALTIVEC
# define COMPILE_ALTIVEC 1
# else
# define COMPILE_C 1
# endif
#endif
#ifndef COMPILE_C
# define COMPILE_C 0
#endif
#ifndef COMPILE_MMX
# define COMPILE_MMX 0
#endif
#ifndef COMPILE_MMX2
# define COMPILE_MMX2 0
#endif
#ifndef COMPILE_3DNOW
# define COMPILE_3DNOW 0
#endif
#ifndef COMPILE_ALTIVEC
# define COMPILE_ALTIVEC 0
#endif
#define COMPILE_TEMPLATE_MMX 0
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define COMPILE_TEMPLATE_ALTIVEC 0
#include "swscale_template.c"
#if COMPILE_ALTIVEC
#if HAVE_ALTIVEC
#undef RENAME
#undef COMPILE_TEMPLATE_ALTIVEC
#define COMPILE_TEMPLATE_ALTIVEC 1
......@@ -1320,90 +1273,42 @@ static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uin
#include "ppc/swscale_template.c"
#endif
#if ARCH_X86
//MMX versions
#if COMPILE_MMX
#if HAVE_MMX
#undef RENAME
#undef COMPILE_TEMPLATE_MMX
#undef COMPILE_TEMPLATE_MMX2
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMX 1
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define RENAME(a) a ## _MMX
#include "x86/swscale_template.c"
#endif
//MMX2 versions
#if COMPILE_MMX2
#if HAVE_MMX2
#undef RENAME
#undef COMPILE_TEMPLATE_MMX
#undef COMPILE_TEMPLATE_MMX2
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMX 1
#define COMPILE_TEMPLATE_MMX2 1
#define COMPILE_TEMPLATE_AMD3DNOW 0
#define RENAME(a) a ## _MMX2
#include "x86/swscale_template.c"
#endif
//3DNOW versions
#if COMPILE_3DNOW
#undef RENAME
#undef COMPILE_TEMPLATE_MMX
#undef COMPILE_TEMPLATE_MMX2
#undef COMPILE_TEMPLATE_AMD3DNOW
#define COMPILE_TEMPLATE_MMX 1
#define COMPILE_TEMPLATE_MMX2 0
#define COMPILE_TEMPLATE_AMD3DNOW 1
#define RENAME(a) a ## _3DNow
#include "x86/swscale_template.c"
#endif
#endif //ARCH_X86
SwsFunc ff_getSwsFunc(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
sws_init_swScale_c(c);
#if CONFIG_RUNTIME_CPUDETECT
#if ARCH_X86
// ordered per speed fastest first
if (c->flags & SWS_CPU_CAPS_MMX2) {
sws_init_swScale_MMX2(c);
return swScale_MMX2;
} else if (c->flags & SWS_CPU_CAPS_3DNOW) {
sws_init_swScale_3DNow(c);
return swScale_3DNow;
} else if (c->flags & SWS_CPU_CAPS_MMX) {
#if HAVE_MMX
if (cpu_flags & AV_CPU_FLAG_MMX)
sws_init_swScale_MMX(c);
return swScale_MMX;
}
#else
#if COMPILE_ALTIVEC
if (c->flags & SWS_CPU_CAPS_ALTIVEC) {
sws_init_swScale_altivec(c);
return swScale_altivec;
}
#endif
#endif /* ARCH_X86 */
#else //CONFIG_RUNTIME_CPUDETECT
#if COMPILE_TEMPLATE_MMX2
sws_init_swScale_MMX2(c);
return swScale_MMX2;
#elif COMPILE_TEMPLATE_AMD3DNOW
sws_init_swScale_3DNow(c);
return swScale_3DNow;
#elif COMPILE_TEMPLATE_MMX
sws_init_swScale_MMX(c);
return swScale_MMX;
#elif COMPILE_TEMPLATE_ALTIVEC
sws_init_swScale_altivec(c);
return swScale_altivec;
#if HAVE_MMX2
if (cpu_flags & AV_CPU_FLAG_MMX2)
sws_init_swScale_MMX2(c);
#endif
#if HAVE_ALTIVEC
if (cpu_flags & AV_CPU_FLAG_ALTIVEC)
sws_init_swScale_altivec(c);
#endif
#endif //!CONFIG_RUNTIME_CPUDETECT
return swScale_c;
}
......@@ -1900,23 +1805,6 @@ static int planarCopyWrapper(SwsContext *c, const uint8_t* src[], int srcStride[
return srcSliceH;
}
int ff_hardcodedcpuflags(void)
{
int flags = 0;
#if COMPILE_TEMPLATE_MMX2
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
#elif COMPILE_TEMPLATE_AMD3DNOW
flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
#elif COMPILE_TEMPLATE_MMX
flags |= SWS_CPU_CAPS_MMX;
#elif COMPILE_TEMPLATE_ALTIVEC
flags |= SWS_CPU_CAPS_ALTIVEC;
#elif ARCH_BFIN
flags |= SWS_CPU_CAPS_BFIN;
#endif
return flags;
}
void ff_get_unscaled_swscale(SwsContext *c)
{
const enum PixelFormat srcFormat = c->srcFormat;
......@@ -2000,8 +1888,8 @@ void ff_get_unscaled_swscale(SwsContext *c)
if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
c->swScale= uyvyToYuv422Wrapper;
#if COMPILE_ALTIVEC
if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
#if HAVE_ALTIVEC
if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) &&
!(c->flags & SWS_BITEXACT) &&
srcFormat == PIX_FMT_YUV420P) {
// unscaled YV12 -> packed YUV, we want speed
......@@ -2031,8 +1919,7 @@ void ff_get_unscaled_swscale(SwsContext *c)
c->swScale= planarCopyWrapper;
}
#if ARCH_BFIN
if (flags & SWS_CPU_CAPS_BFIN)
ff_bfin_get_unscaled_swscale (c);
ff_bfin_get_unscaled_swscale (c);
#endif
}
......
......@@ -95,13 +95,6 @@ const char *swscale_license(void);
#define SWS_ACCURATE_RND 0x40000
#define SWS_BITEXACT 0x80000
#define SWS_CPU_CAPS_MMX 0x80000000
#define SWS_CPU_CAPS_MMX2 0x20000000
#define SWS_CPU_CAPS_3DNOW 0x40000000
#define SWS_CPU_CAPS_ALTIVEC 0x10000000
#define SWS_CPU_CAPS_BFIN 0x01000000
#define SWS_CPU_CAPS_SSE2 0x02000000
#define SWS_MAX_REDUCE_CUTOFF 0.002
#define SWS_CS_ITU709 1
......
......@@ -481,11 +481,6 @@ extern const AVClass sws_context_class;
*/
void ff_get_unscaled_swscale(SwsContext *c);
/**
* Returns the SWS_CPU_CAPS for the optimized code compiled into swscale.
*/
int ff_hardcodedcpuflags(void);
/**
* Returns function pointer to fastest main scaler path function depending
* on architecture and available optimizations.
......
......@@ -363,153 +363,11 @@ static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
}
}
static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
static inline void hScale16_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
{
int i, j;
#if COMPILE_TEMPLATE_MMX
assert(filterSize % 4 == 0 && filterSize>0);
if (filterSize==4 && shift<15) { // Always true for upscaling, sometimes for down, too.
x86_reg counter= -2*dstW;
filter-= counter*2;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
"movd %5, %%mm7 \n\t"
#if defined(PIC)
"push %%"REG_b" \n\t"
#endif
"push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movzwl (%2, %%"REG_BP"), %%eax \n\t"
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
"movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
"movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
"movq (%3, %%"REG_a", 2), %%mm0 \n\t"
"movq (%3, %%"REG_b", 2), %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"movq %%mm0, %%mm4 \n\t"
"punpckldq %%mm3, %%mm0 \n\t"
"punpckhdq %%mm3, %%mm4 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad %%mm7, %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%4, %%"REG_BP") \n\t"
"add $4, %%"REG_BP" \n\t"
" jnc 1b \n\t"
"pop %%"REG_BP" \n\t"
#if defined(PIC)
"pop %%"REG_b" \n\t"
#endif
: "+a" (counter)
: "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift)
#if !defined(PIC)
: "%"REG_b
#endif
);
} else if (filterSize==8 && shift<15) {
x86_reg counter= -2*dstW;
filter-= counter*4;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
"movd %5, %%mm7 \n\t"
#if defined(PIC)
"push %%"REG_b" \n\t"
#endif
"push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movzwl (%2, %%"REG_BP"), %%eax \n\t"
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
"movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
"movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
"movq (%3, %%"REG_a", 2), %%mm0 \n\t"
"movq (%3, %%"REG_b", 2), %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
"movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
"movq 8(%3, %%"REG_a", 2), %%mm4 \n\t"
"movq 8(%3, %%"REG_b", 2), %%mm2 \n\t"
"pmaddwd %%mm1, %%mm4 \n\t"
"pmaddwd %%mm2, %%mm5 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm5, %%mm3 \n\t"
"movq %%mm0, %%mm4 \n\t"
"punpckldq %%mm3, %%mm0 \n\t"
"punpckhdq %%mm3, %%mm4 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad %%mm7, %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%4, %%"REG_BP") \n\t"
"add $4, %%"REG_BP" \n\t"
" jnc 1b \n\t"
"pop %%"REG_BP" \n\t"
#if defined(PIC)
"pop %%"REG_b" \n\t"
#endif
: "+a" (counter)
: "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift)
#if !defined(PIC)
: "%"REG_b
#endif
);
} else if (shift<15){
const uint16_t *offset = src+filterSize;
x86_reg counter= -2*dstW;
//filter-= counter*filterSize/2;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
"movd %7, %%mm7 \n\t"
".p2align 4 \n\t"
"1: \n\t"
"mov %2, %%"REG_c" \n\t"
"movzwl (%%"REG_c", %0), %%eax \n\t"
"movzwl 2(%%"REG_c", %0), %%edx \n\t"
"mov %5, %%"REG_c" \n\t"
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t"
"2: \n\t"
"movq (%1), %%mm1 \n\t"
"movq (%1, %6), %%mm3 \n\t"
"movq (%%"REG_c", %%"REG_a", 2), %%mm0 \n\t"
"movq (%%"REG_c", %%"REG_d", 2), %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"paddd %%mm3, %%mm5 \n\t"
"paddd %%mm0, %%mm4 \n\t"
"add $8, %1 \n\t"
"add $8, %%"REG_c" \n\t"
"cmp %4, %%"REG_c" \n\t"
" jb 2b \n\t"
"add %6, %1 \n\t"
"movq %%mm4, %%mm0 \n\t"
"punpckldq %%mm5, %%mm4 \n\t"
"punpckhdq %%mm5, %%mm0 \n\t"
"paddd %%mm0, %%mm4 \n\t"
"psrad %%mm7, %%mm4 \n\t"
"packssdw %%mm4, %%mm4 \n\t"
"mov %3, %%"REG_a" \n\t"
"movd %%mm4, (%%"REG_a", %0) \n\t"
"add $4, %0 \n\t"
" jnc 1b \n\t"
: "+r" (counter), "+r" (filter)
: "m" (filterPos), "m" (dst), "m"(offset),
"m" (src), "r" ((x86_reg)filterSize*2), "m"(shift)
: "%"REG_a, "%"REG_c, "%"REG_d
);
} else
#endif
for (i=0; i<dstW; i++) {
int srcPos= filterPos[i];
int val=0;
......@@ -520,7 +378,7 @@ static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src,
}
}
static inline void RENAME(hScale16X)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
static inline void hScale16X_c(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
{
int i, j;
......@@ -660,6 +518,11 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
#define DEBUG_SWSCALE_BUFFERS 0
#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
#if HAVE_MMX
static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
int lastInLumBuf, int lastInChrBuf);
#endif
static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[])
{
......@@ -831,6 +694,9 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
if (!enough_lines)
break; //we can't output a dstY line so let's try with the next slice
#if HAVE_MMX
updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex, lastInLumBuf, lastInChrBuf);
#endif
if (dstY < dstH-2) {
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
......@@ -955,6 +821,12 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
#if HAVE_MMX2
if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
__asm__ volatile("sfence":::"memory");
#endif
emms_c();
/* store changed local vars back in the context */
c->dstY= dstY;
c->lumBufIndex= lumBufIndex;
......@@ -1001,14 +873,14 @@ static void sws_init_swScale_c(SwsContext *c)
case PIX_FMT_YUV420P10BE:
case PIX_FMT_YUV420P16BE:
case PIX_FMT_YUV422P16BE:
case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16) : RENAME(hScale16X); break;
case PIX_FMT_YUV444P16BE: c->hScale16= HAVE_BIGENDIAN ? hScale16_c : hScale16X_c; break;
case PIX_FMT_GRAY16LE :
case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE:
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? RENAME(hScale16X) : RENAME(hScale16); break;
case PIX_FMT_YUV444P16LE: c->hScale16= HAVE_BIGENDIAN ? hScale16X_c : hScale16_c; break;
}
if (c->chrSrcHSubSample) {
switch(srcFormat) {
......
......@@ -185,7 +185,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist
}
static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
int srcW, int dstW, int filterAlign, int one, int flags,
int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags,
SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
{
int i;
......@@ -196,10 +196,8 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
int64_t *filter2=NULL;
const int64_t fone= 1LL<<54;
int ret= -1;
#if ARCH_X86
if (flags & SWS_CPU_CAPS_MMX)
__asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
#endif
emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions)
// NOTE: the +1 is for the MMX scaler which reads over the end
FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+1)*sizeof(int16_t), fail);
......@@ -416,7 +414,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
if (min>minFilterSize) minFilterSize= min;
}
if (flags & SWS_CPU_CAPS_ALTIVEC) {
if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) {
// we can handle the special case 4,
// so we don't want to go to the full 8
if (minFilterSize < 5)
......@@ -431,7 +429,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi
filterAlign = 1;
}
if (flags & SWS_CPU_CAPS_MMX) {
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
// special case for unscaled vertical filtering
if (minFilterSize == 1 && filterAlign == 2)
filterAlign= 1;
......@@ -521,7 +519,7 @@ fail:
return ret;
}
#if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
#if HAVE_MMX2
static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *filter, int32_t *filterPos, int numSplits)
{
uint8_t *fragmentA;
......@@ -679,7 +677,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *fil
return fragmentPos + 1;
}
#endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
#endif /* HAVE_MMX2 */
static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
{
......@@ -687,8 +685,6 @@ static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
*v = av_pix_fmt_descriptors[format].log2_chroma_h;
}
static int update_flags_cpu(int flags);
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation)
{
memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
......@@ -703,15 +699,12 @@ int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange
c->dstFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->dstFormat]);
c->srcFormatBpp = av_get_bits_per_pixel(&av_pix_fmt_descriptors[c->srcFormat]);
c->flags = update_flags_cpu(c->flags);
ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
//FIXME factorize
#if HAVE_ALTIVEC
if (c->flags & SWS_CPU_CAPS_ALTIVEC)
if (HAVE_ALTIVEC && av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)
ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation);
#endif
return 0;
}
......@@ -741,27 +734,6 @@ static int handle_jpeg(enum PixelFormat *format)
}
}
static int update_flags_cpu(int flags)
{
#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
flags &= ~( SWS_CPU_CAPS_MMX
|SWS_CPU_CAPS_MMX2
|SWS_CPU_CAPS_3DNOW
|SWS_CPU_CAPS_SSE2
|SWS_CPU_CAPS_ALTIVEC
|SWS_CPU_CAPS_BFIN);
flags |= ff_hardcodedcpuflags();
#else /* !CONFIG_RUNTIME_CPUDETECT */
int cpuflags = av_get_cpu_flags();
flags |= (cpuflags & AV_CPU_FLAG_SSE2 ? SWS_CPU_CAPS_SSE2 : 0);
flags |= (cpuflags & AV_CPU_FLAG_MMX ? SWS_CPU_CAPS_MMX : 0);
flags |= (cpuflags & AV_CPU_FLAG_MMX2 ? SWS_CPU_CAPS_MMX2 : 0);
flags |= (cpuflags & AV_CPU_FLAG_3DNOW ? SWS_CPU_CAPS_3DNOW : 0);
#endif /* CONFIG_RUNTIME_CPUDETECT */
return flags;
}
SwsContext *sws_alloc_context(void)
{
SwsContext *c= av_mallocz(sizeof(SwsContext));
......@@ -782,16 +754,14 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
int srcH= c->srcH;
int dstW= c->dstW;
int dstH= c->dstH;
int flags;
int flags, cpu_flags;
enum PixelFormat srcFormat= c->srcFormat;
enum PixelFormat dstFormat= c->dstFormat;
flags= c->flags = update_flags_cpu(c->flags);
#if ARCH_X86
if (flags & SWS_CPU_CAPS_MMX)
__asm__ volatile("emms\n\t"::: "memory");
#endif
if (!rgb15to16) sws_rgb2rgb_init(flags);
cpu_flags = av_get_cpu_flags();
flags = c->flags;
emms_c();
if (!rgb15to16) sws_rgb2rgb_init();
unscaled = (srcW == dstW && srcH == dstH);
......@@ -884,7 +854,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
}
}
if (flags & SWS_CPU_CAPS_MMX2) {
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
if (flags&SWS_PRINT_INFO)
......@@ -910,7 +880,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
c->chrXInc+= 20;
}
//we don't use the x86 asm scaler if MMX is available
else if (flags & SWS_CPU_CAPS_MMX) {
else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
}
......@@ -918,7 +888,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
/* precalculate horizontal scaler filter coefficients */
{
#if ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT)
#if HAVE_MMX2
// can't downscale !!!
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
c->lumMmx2FilterCodeSize = initMMX2HScaler( dstW, c->lumXInc, NULL, NULL, NULL, 8);
......@@ -954,21 +924,21 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ);
#endif
} else
#endif /* ARCH_X86 && (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) */
#endif /* HAVE_MMX2 */
{
const int filterAlign=
(flags & SWS_CPU_CAPS_MMX) ? 4 :
(flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 :
(HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 :
1;
if (initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
srcW , dstW, filterAlign, 1<<14,
(flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
(flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags,
srcFilter->lumH, dstFilter->lumH, c->param) < 0)
goto fail;
if (initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags,
srcFilter->chrH, dstFilter->chrH, c->param) < 0)
goto fail;
}
......@@ -977,18 +947,18 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
/* precalculate vertical scaler filter coefficients */
{
const int filterAlign=
(flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
(flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
(HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) ? 8 :
1;
if (initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
srcH , dstH, filterAlign, (1<<12),
(flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
(flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, cpu_flags,
srcFilter->lumV, dstFilter->lumV, c->param) < 0)
goto fail;
if (initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
(flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, cpu_flags,
srcFilter->chrV, dstFilter->chrV, c->param) < 0)
goto fail;
......@@ -1082,13 +1052,13 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
#endif
sws_format_name(dstFormat));
if (flags & SWS_CPU_CAPS_MMX2) av_log(c, AV_LOG_INFO, "using MMX2\n");
else if (flags & SWS_CPU_CAPS_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n");
else if (flags & SWS_CPU_CAPS_MMX) av_log(c, AV_LOG_INFO, "using MMX\n");
else if (flags & SWS_CPU_CAPS_ALTIVEC) av_log(c, AV_LOG_INFO, "using AltiVec\n");
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) av_log(c, AV_LOG_INFO, "using MMX2\n");
else if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW) av_log(c, AV_LOG_INFO, "using 3DNOW\n");
else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) av_log(c, AV_LOG_INFO, "using MMX\n");
else if (HAVE_ALTIVEC && cpu_flags & AV_CPU_FLAG_ALTIVEC) av_log(c, AV_LOG_INFO, "using AltiVec\n");
else av_log(c, AV_LOG_INFO, "using C\n");
if (flags & SWS_CPU_CAPS_MMX) {
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
else {
......@@ -1107,7 +1077,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n");
}
} else {
#if ARCH_X86
#if HAVE_MMX
av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
#else
if (flags & SWS_FAST_BILINEAR)
......@@ -1118,31 +1088,41 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
}
if (isPlanarYUV(dstFormat)) {
if (c->vLumFilterSize==1)
av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n",
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else
av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n",
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
} else {
if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
" 2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
" 2-tap scaler for vertical chrominance scaling (BGR)\n",
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n",
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else
av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n",
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
}
if (dstFormat==PIX_FMT_BGR24)
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
(flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
(HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) ? "MMX2" :
((HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C"));
else if (dstFormat==PIX_FMT_RGB32)
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n",
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else if (dstFormat==PIX_FMT_BGR565)
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n",
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else if (dstFormat==PIX_FMT_BGR555)
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n",
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
else if (dstFormat == PIX_FMT_RGB444BE || dstFormat == PIX_FMT_RGB444LE ||
dstFormat == PIX_FMT_BGR444BE || dstFormat == PIX_FMT_BGR444LE)
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR12 converter\n",
(HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? "MMX" : "C");
av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
......@@ -1527,7 +1507,7 @@ void sws_freeContext(SwsContext *c)
av_freep(&c->hLumFilterPos);
av_freep(&c->hChrFilterPos);
#if ARCH_X86
#if HAVE_MMX
#ifdef MAP_ANONYMOUS
if (c->lumMmx2FilterCode) munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize);
if (c->chrMmx2FilterCode) munmap(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize);
......@@ -1540,7 +1520,7 @@ void sws_freeContext(SwsContext *c)
#endif
c->lumMmx2FilterCode=NULL;
c->chrMmx2FilterCode=NULL;
#endif /* ARCH_X86 */
#endif /* HAVE_MMX */
av_freep(&c->yuvTable);
......@@ -1557,8 +1537,6 @@ struct SwsContext *sws_getCachedContext(struct SwsContext *context,
if (!param)
param = default_param;
flags = update_flags_cpu(flags);
if (context &&
(context->srcW != srcW ||
context->srcH != srcH ||
......
......@@ -27,6 +27,7 @@
#include "config.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/bswap.h"
#include "libswscale/rgb2rgb.h"
#include "libswscale/swscale.h"
......@@ -122,16 +123,16 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
32-bit C version, and and&add trick by Michael Niedermayer
*/
void rgb2rgb_init_x86(int flags)
void rgb2rgb_init_x86(void)
{
#if HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX
if (flags & SWS_CPU_CAPS_SSE2)
rgb2rgb_init_SSE2();
else if (flags & SWS_CPU_CAPS_MMX2)
rgb2rgb_init_MMX2();
else if (flags & SWS_CPU_CAPS_3DNOW)
rgb2rgb_init_3DNOW();
else if (flags & SWS_CPU_CAPS_MMX)
int cpu_flags = av_get_cpu_flags();
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX)
rgb2rgb_init_MMX();
#endif /* HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX */
if (HAVE_AMD3DNOW && cpu_flags & AV_CPU_FLAG_3DNOW)
rgb2rgb_init_3DNOW();
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2)
rgb2rgb_init_MMX2();
if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
rgb2rgb_init_SSE2();
}
......@@ -22,23 +22,14 @@
#undef REAL_MOVNTQ
#undef MOVNTQ
#undef PAVGB
#undef PREFETCH
#if COMPILE_TEMPLATE_AMD3DNOW
#define PREFETCH "prefetch"
#elif COMPILE_TEMPLATE_MMX2
#if COMPILE_TEMPLATE_MMX2
#define PREFETCH "prefetchnta"
#else
#define PREFETCH " # nop"
#endif
#if COMPILE_TEMPLATE_MMX2
#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
#elif COMPILE_TEMPLATE_AMD3DNOW
#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
#endif
#if COMPILE_TEMPLATE_MMX2
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
#else
......@@ -709,62 +700,6 @@
" jb 1b \n\t"
#define WRITERGB15(dst, dstw, index) REAL_WRITERGB15(dst, dstw, index)
#define WRITEBGR24OLD(dst, dstw, index) \
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
"movq %%mm2, %%mm1 \n\t" /* B */\
"movq %%mm5, %%mm6 \n\t" /* R */\
"punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\
"punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\
"punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\
"punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\
"movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\
"movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\
"punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\
"punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\
"punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\
"punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\
\
"movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\
"psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\
"pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 0 */\
"pand "MANGLE(bm11111000)", %%mm0 \n\t" /* 00RGB000 0.5 */\
"por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\
"movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\
"psllq $48, %%mm2 \n\t" /* GB000000 1 */\
"por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\
\
"movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\
"psrld $16, %%mm4 \n\t" /* 000R000R 1 */\
"psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\
"por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\
"pand "MANGLE(bm00001111)", %%mm2 \n\t" /* 0000RGBR 1 */\
"movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\
"psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\
"pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 2 */\
"pand "MANGLE(bm11111000)", %%mm1 \n\t" /* 00RGB000 2.5 */\
"por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\
"movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\
"psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\
"por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\
\
"psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\
"movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\
"psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\
"pand "MANGLE(bm00000111)", %%mm5 \n\t" /* 00000RGB 3 */\
"pand "MANGLE(bm11111000)", %%mm3 \n\t" /* 00RGB000 3.5 */\
"por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\
"psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\
"por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\
\
MOVNTQ(%%mm0, (dst))\
MOVNTQ(%%mm2, 8(dst))\
MOVNTQ(%%mm3, 16(dst))\
"add $24, "#dst" \n\t"\
\
"add $8, "#index" \n\t"\
"cmp "#dstw", "#index" \n\t"\
" jb 1b \n\t"
#define WRITEBGR24MMX(dst, dstw, index) \
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
"movq %%mm2, %%mm1 \n\t" /* B */\
......@@ -896,7 +831,6 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
{
if(!(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND) {
if (uDest) {
YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
......@@ -918,27 +852,11 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
}
return;
}
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
}
static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
{
yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat);
}
static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
{
int i;
if(!(c->flags & SWS_BITEXACT)) {
long p= 4;
const int16_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
uint8_t *dst[4]= {aDest, dest, uDest, vDest};
......@@ -967,40 +885,6 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
}
}
}
return;
}
for (i=0; i<dstW; i++) {
int val= (lumSrc[i]+64)>>7;
if (val&256) {
if (val<0) val=0;
else val=255;
}
dest[i]= val;
}
if (uDest)
for (i=0; i<chrDstW; i++) {
int u=(chrSrc[i ]+64)>>7;
int v=(chrSrc[i + VOFW]+64)>>7;
if ((u|v)&256) {
if (u<0) u=0;
else if (u>255) u=255;
if (v<0) v=0;
else if (v>255) v=255;
}
uDest[i]= u;
vDest[i]= v;
}
if (CONFIG_SWSCALE_ALPHA && aDest)
for (i=0; i<dstW; i++) {
int val= (alpSrc[i]+64)>>7;
aDest[i]= av_clip_uint8(val);
}
}
......@@ -1013,7 +897,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
if(!(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND) {
switch(c->dstFormat) {
case PIX_FMT_RGB32:
......@@ -1170,7 +1054,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
return;
}
}
}
yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
chrFilter, chrSrc, chrFilterSize,
alpSrc, dest, dstW, dstY);
......@@ -1182,11 +1066,6 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
{
int yalpha1=4095- yalpha;
int uvalpha1=4095-uvalpha;
int i;
if(!(c->flags & SWS_BITEXACT)) {
switch(c->dstFormat) {
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
case PIX_FMT_RGB32:
......@@ -1317,10 +1196,10 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
"a" (&c->redDither)
);
return;
default: break;
}
}
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
yuv2packed2_c(c, buf0, buf1, uvbuf0, uvbuf1, abuf0, abuf1,
dest, dstW, yalpha, uvalpha, y);
}
/**
......@@ -1329,18 +1208,13 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
{
const int yalpha1=0;
int i;
const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
const int yalpha= 4096; //FIXME ...
const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
if (flags&SWS_FULL_CHR_H_INT) {
c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
return;
}
if (flags&SWS_FULL_CHR_H_INT) {
c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
return;
}
if(!(flags & SWS_BITEXACT)) {
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
switch(dstFormat) {
case PIX_FMT_RGB32:
......@@ -1554,12 +1428,9 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
return;
}
}
}
if (uvalpha < 2048) {
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
} else {
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
}
yuv2packed1_c(c, buf0, uvbuf0, uvbuf1, abuf0, dest,
dstW, uvalpha, dstFormat, flags, y);
}
//FIXME yuy2* can read up to 7 samples too much
......@@ -1866,20 +1737,6 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
assert(src1 == src2);
}
static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
int b= src1[6*i + 0] + src1[6*i + 3];
int g= src1[6*i + 1] + src1[6*i + 4];
int r= src1[6*i + 2] + src1[6*i + 5];
dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
}
assert(src1 == src2);
}
static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
......@@ -1891,20 +1748,6 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
}
static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
int i;
assert(src1==src2);
for (i=0; i<width; i++) {
int r= src1[6*i + 0] + src1[6*i + 3];
int g= src1[6*i + 1] + src1[6*i + 4];
int b= src1[6*i + 2] + src1[6*i + 5];
dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
}
}
// bilinear / bicubic scaling
static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, int srcW, int xInc,
......@@ -2061,50 +1904,168 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
}
}
//FIXME all pal and rgb srcFormats could do this convertion as well
//FIXME all scalers more complex than bilinear could do half of this transform
static void RENAME(chrRangeToJpeg)(int16_t *dst, int width)
static inline void RENAME(hScale16)(int16_t *dst, int dstW, const uint16_t *src, int srcW, int xInc,
const int16_t *filter, const int16_t *filterPos, long filterSize, int shift)
{
int i;
for (i = 0; i < width; i++) {
dst[i ] = (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264
dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
}
}
static void RENAME(chrRangeFromJpeg)(int16_t *dst, int width)
{
int i;
for (i = 0; i < width; i++) {
dst[i ] = (dst[i ]*1799 + 4081085)>>11; //1469
dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469
int i, j;
assert(filterSize % 4 == 0 && filterSize>0);
if (filterSize==4 && shift<15) { // Always true for upscaling, sometimes for down, too.
x86_reg counter= -2*dstW;
filter-= counter*2;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
"movd %5, %%mm7 \n\t"
#if defined(PIC)
"push %%"REG_b" \n\t"
#endif
"push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movzwl (%2, %%"REG_BP"), %%eax \n\t"
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
"movq (%1, %%"REG_BP", 4), %%mm1 \n\t"
"movq 8(%1, %%"REG_BP", 4), %%mm3 \n\t"
"movq (%3, %%"REG_a", 2), %%mm0 \n\t"
"movq (%3, %%"REG_b", 2), %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"movq %%mm0, %%mm4 \n\t"
"punpckldq %%mm3, %%mm0 \n\t"
"punpckhdq %%mm3, %%mm4 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad %%mm7, %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%4, %%"REG_BP") \n\t"
"add $4, %%"REG_BP" \n\t"
" jnc 1b \n\t"
"pop %%"REG_BP" \n\t"
#if defined(PIC)
"pop %%"REG_b" \n\t"
#endif
: "+a" (counter)
: "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift)
#if !defined(PIC)
: "%"REG_b
#endif
);
} else if (filterSize==8 && shift<15) {
x86_reg counter= -2*dstW;
filter-= counter*4;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
"movd %5, %%mm7 \n\t"
#if defined(PIC)
"push %%"REG_b" \n\t"
#endif
"push %%"REG_BP" \n\t" // we use 7 regs here ...
"mov %%"REG_a", %%"REG_BP" \n\t"
".p2align 4 \n\t"
"1: \n\t"
"movzwl (%2, %%"REG_BP"), %%eax \n\t"
"movzwl 2(%2, %%"REG_BP"), %%ebx \n\t"
"movq (%1, %%"REG_BP", 8), %%mm1 \n\t"
"movq 16(%1, %%"REG_BP", 8), %%mm3 \n\t"
"movq (%3, %%"REG_a", 2), %%mm0 \n\t"
"movq (%3, %%"REG_b", 2), %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"movq 8(%1, %%"REG_BP", 8), %%mm1 \n\t"
"movq 24(%1, %%"REG_BP", 8), %%mm5 \n\t"
"movq 8(%3, %%"REG_a", 2), %%mm4 \n\t"
"movq 8(%3, %%"REG_b", 2), %%mm2 \n\t"
"pmaddwd %%mm1, %%mm4 \n\t"
"pmaddwd %%mm2, %%mm5 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm5, %%mm3 \n\t"
"movq %%mm0, %%mm4 \n\t"
"punpckldq %%mm3, %%mm0 \n\t"
"punpckhdq %%mm3, %%mm4 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"psrad %%mm7, %%mm0 \n\t"
"packssdw %%mm0, %%mm0 \n\t"
"movd %%mm0, (%4, %%"REG_BP") \n\t"
"add $4, %%"REG_BP" \n\t"
" jnc 1b \n\t"
"pop %%"REG_BP" \n\t"
#if defined(PIC)
"pop %%"REG_b" \n\t"
#endif
: "+a" (counter)
: "c" (filter), "d" (filterPos), "S" (src), "D" (dst), "m"(shift)
#if !defined(PIC)
: "%"REG_b
#endif
);
} else if (shift<15){
const uint16_t *offset = src+filterSize;
x86_reg counter= -2*dstW;
//filter-= counter*filterSize/2;
filterPos-= counter/2;
dst-= counter/2;
__asm__ volatile(
"movd %7, %%mm7 \n\t"
".p2align 4 \n\t"
"1: \n\t"
"mov %2, %%"REG_c" \n\t"
"movzwl (%%"REG_c", %0), %%eax \n\t"
"movzwl 2(%%"REG_c", %0), %%edx \n\t"
"mov %5, %%"REG_c" \n\t"
"pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t"
"2: \n\t"
"movq (%1), %%mm1 \n\t"
"movq (%1, %6), %%mm3 \n\t"
"movq (%%"REG_c", %%"REG_a", 2), %%mm0 \n\t"
"movq (%%"REG_c", %%"REG_d", 2), %%mm2 \n\t"
"pmaddwd %%mm1, %%mm0 \n\t"
"pmaddwd %%mm2, %%mm3 \n\t"
"paddd %%mm3, %%mm5 \n\t"
"paddd %%mm0, %%mm4 \n\t"
"add $8, %1 \n\t"
"add $8, %%"REG_c" \n\t"
"cmp %4, %%"REG_c" \n\t"
" jb 2b \n\t"
"add %6, %1 \n\t"
"movq %%mm4, %%mm0 \n\t"
"punpckldq %%mm5, %%mm4 \n\t"
"punpckhdq %%mm5, %%mm0 \n\t"
"paddd %%mm0, %%mm4 \n\t"
"psrad %%mm7, %%mm4 \n\t"
"packssdw %%mm4, %%mm4 \n\t"
"mov %3, %%"REG_a" \n\t"
"movd %%mm4, (%%"REG_a", %0) \n\t"
"add $4, %0 \n\t"
" jnc 1b \n\t"
: "+r" (counter), "+r" (filter)
: "m" (filterPos), "m" (dst), "m"(offset),
"m" (src), "r" ((x86_reg)filterSize*2), "m"(shift)
: "%"REG_a, "%"REG_c, "%"REG_d
);
} else
for (i=0; i<dstW; i++) {
int srcPos= filterPos[i];
int val=0;
for (j=0; j<filterSize; j++) {
val += ((int)src[srcPos + j])*filter[filterSize*i + j];
}
dst[i] = FFMIN(val>>shift, (1<<15)-1); // the cubic equation does overflow ...
}
}
static void RENAME(lumRangeToJpeg)(int16_t *dst, int width)
{
int i;
for (i = 0; i < width; i++)
dst[i] = (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
}
static void RENAME(lumRangeFromJpeg)(int16_t *dst, int width)
{
int i;
for (i = 0; i < width; i++)
dst[i] = (dst[i]*14071 + 33561947)>>14;
}
#define FAST_BILINEAR_X86 \
"subl %%edi, %%esi \n\t" /* src[xx+1] - src[xx] */ \
"imull %%ecx, %%esi \n\t" /* (src[xx+1] - src[xx])*xalpha */ \
"shll $16, %%edi \n\t" \
"addl %%edi, %%esi \n\t" /* src[xx+1]*xalpha + src[xx]*(1-xalpha) */ \
"mov %1, %%"REG_D"\n\t" \
"shrl $9, %%esi \n\t" \
#if COMPILE_TEMPLATE_MMX2
static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
long dstWidth, const uint8_t *src, int srcW,
int xInc)
{
#if COMPILE_TEMPLATE_MMX2
int32_t *filterPos = c->hLumFilterPos;
int16_t *filter = c->hLumFilter;
int canMMX2BeUsed = c->canMMX2BeUsed;
......@@ -2113,7 +2074,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
#if defined(PIC)
DECLARE_ALIGNED(8, uint64_t, ebxsave);
#endif
if (canMMX2BeUsed) {
__asm__ volatile(
#if defined(PIC)
"mov %%"REG_b", %5 \n\t"
......@@ -2172,80 +2133,12 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
#endif
);
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
} else {
#endif /* COMPILE_TEMPLATE_MMX2 */
x86_reg xInc_shr16 = xInc >> 16;
uint16_t xInc_mask = xInc & 0xffff;
x86_reg dstWidth_reg = dstWidth;
//NO MMX just normal asm ...
__asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" // i
"xor %%"REG_d", %%"REG_d" \n\t" // xx
"xorl %%ecx, %%ecx \n\t" // xalpha
".p2align 4 \n\t"
"1: \n\t"
"movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
FAST_BILINEAR_X86
"movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
"addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
"adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
"movzbl (%0, %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%0, %%"REG_d"), %%esi \n\t" //src[xx+1]
FAST_BILINEAR_X86
"movw %%si, 2(%%"REG_D", %%"REG_a", 2) \n\t"
"addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
"adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
"add $2, %%"REG_a" \n\t"
"cmp %2, %%"REG_a" \n\t"
" jb 1b \n\t"
:: "r" (src), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask)
: "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
);
#if COMPILE_TEMPLATE_MMX2
} //if MMX2 can't be used
#endif
}
// *** horizontal scale Y line to temp buffer
static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src, int srcW, int xInc,
const int16_t *hLumFilter,
const int16_t *hLumFilterPos, int hLumFilterSize,
uint8_t *formatConvBuffer,
uint32_t *pal, int isAlpha)
{
void (*toYV12)(uint8_t *, const uint8_t *, long, uint32_t *) = isAlpha ? c->alpToYV12 : c->lumToYV12;
void (*convertRange)(int16_t *, int) = isAlpha ? NULL : c->lumConvertRange;
src += isAlpha ? c->alpSrcOffset : c->lumSrcOffset;
if (toYV12) {
toYV12(formatConvBuffer, src, srcW, pal);
src= formatConvBuffer;
}
if (c->hScale16) {
c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
} else if (!c->hyscale_fast) {
c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
} else { // fast bilinear upscale / crap downscale
c->hyscale_fast(c, dst, dstWidth, src, srcW, xInc);
}
if (convertRange)
convertRange(dst, dstWidth);
}
static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
long dstWidth, const uint8_t *src1,
const uint8_t *src2, int srcW, int xInc)
{
#if COMPILE_TEMPLATE_MMX2
int32_t *filterPos = c->hChrFilterPos;
int16_t *filter = c->hChrFilter;
int canMMX2BeUsed = c->canMMX2BeUsed;
......@@ -2254,7 +2147,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
#if defined(PIC)
DECLARE_ALIGNED(8, uint64_t, ebxsave);
#endif
if (canMMX2BeUsed) {
__asm__ volatile(
#if defined(PIC)
"mov %%"REG_b", %6 \n\t"
......@@ -2304,252 +2197,32 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
dst[i] = src1[srcW-1]*128;
dst[i+VOFW] = src2[srcW-1]*128;
}
} else {
#endif /* COMPILE_TEMPLATE_MMX2 */
x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
uint16_t xInc_mask = xInc & 0xffff;
x86_reg dstWidth_reg = dstWidth;
__asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" // i
"xor %%"REG_d", %%"REG_d" \n\t" // xx
"xorl %%ecx, %%ecx \n\t" // xalpha
".p2align 4 \n\t"
"1: \n\t"
"mov %0, %%"REG_S" \n\t"
"movzbl (%%"REG_S", %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%%"REG_S", %%"REG_d"), %%esi \n\t" //src[xx+1]
FAST_BILINEAR_X86
"movw %%si, (%%"REG_D", %%"REG_a", 2) \n\t"
"movzbl (%5, %%"REG_d"), %%edi \n\t" //src[xx]
"movzbl 1(%5, %%"REG_d"), %%esi \n\t" //src[xx+1]
FAST_BILINEAR_X86
"movw %%si, "AV_STRINGIFY(VOF)"(%%"REG_D", %%"REG_a", 2) \n\t"
"addw %4, %%cx \n\t" //xalpha += xInc&0xFFFF
"adc %3, %%"REG_d" \n\t" //xx+= xInc>>16 + carry
"add $1, %%"REG_a" \n\t"
"cmp %2, %%"REG_a" \n\t"
" jb 1b \n\t"
/* GCC 3.3 makes MPlayer crash on IA-32 machines when using "g" operand here,
which is needed to support GCC 4.0. */
#if ARCH_X86_64 && AV_GCC_VERSION_AT_LEAST(3,4)
:: "m" (src1), "m" (dst), "g" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask),
#else
:: "m" (src1), "m" (dst), "m" (dstWidth_reg), "m" (xInc_shr16), "m" (xInc_mask),
#endif
"r" (src2)
: "%"REG_a, "%"REG_d, "%ecx", "%"REG_D, "%esi"
);
#if COMPILE_TEMPLATE_MMX2
} //if MMX2 can't be used
#endif
}
inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth, const uint8_t *src1, const uint8_t *src2,
int srcW, int xInc, const int16_t *hChrFilter,
const int16_t *hChrFilterPos, int hChrFilterSize,
uint8_t *formatConvBuffer,
uint32_t *pal)
{
src1 += c->chrSrcOffset;
src2 += c->chrSrcOffset;
if (c->chrToYV12) {
c->chrToYV12(formatConvBuffer, formatConvBuffer+VOFW, src1, src2, srcW, pal);
src1= formatConvBuffer;
src2= formatConvBuffer+VOFW;
}
if (c->hScale16) {
c->hScale16(dst , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
} else if (!c->hcscale_fast) {
c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
} else { // fast bilinear upscale / crap downscale
c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
}
if (c->chrConvertRange)
c->chrConvertRange(dst, dstWidth);
}
#endif /* COMPILE_TEMPLATE_MMX2 */
#define DEBUG_SWSCALE_BUFFERS 0
#define DEBUG_BUFFERS(...) if (DEBUG_SWSCALE_BUFFERS) av_log(c, AV_LOG_DEBUG, __VA_ARGS__)
static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[])
#if !COMPILE_TEMPLATE_MMX2
static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufIndex,
int lastInLumBuf, int lastInChrBuf)
{
/* load a few things into local vars to make the code more readable? and faster */
const int srcW= c->srcW;
const int dstW= c->dstW;
const int dstH= c->dstH;
const int chrDstW= c->chrDstW;
const int chrSrcW= c->chrSrcW;
const int lumXInc= c->lumXInc;
const int chrXInc= c->chrXInc;
const enum PixelFormat dstFormat= c->dstFormat;
const int flags= c->flags;
int16_t **lumPixBuf= c->lumPixBuf;
int16_t **chrPixBuf= c->chrPixBuf;
int16_t **alpPixBuf= c->alpPixBuf;
const int vLumBufSize= c->vLumBufSize;
const int vChrBufSize= c->vChrBufSize;
int16_t *vLumFilterPos= c->vLumFilterPos;
int16_t *vChrFilterPos= c->vChrFilterPos;
int16_t *hLumFilterPos= c->hLumFilterPos;
int16_t *hChrFilterPos= c->hChrFilterPos;
int16_t *vLumFilter= c->vLumFilter;
int16_t *vChrFilter= c->vChrFilter;
int16_t *hLumFilter= c->hLumFilter;
int16_t *hChrFilter= c->hChrFilter;
int32_t *lumMmxFilter= c->lumMmxFilter;
int32_t *chrMmxFilter= c->chrMmxFilter;
int32_t av_unused *alpMmxFilter= c->alpMmxFilter;
const int vLumFilterSize= c->vLumFilterSize;
const int vChrFilterSize= c->vChrFilterSize;
const int hLumFilterSize= c->hLumFilterSize;
const int hChrFilterSize= c->hChrFilterSize;
int16_t **lumPixBuf= c->lumPixBuf;
int16_t **chrPixBuf= c->chrPixBuf;
int16_t **alpPixBuf= c->alpPixBuf;
const int vLumBufSize= c->vLumBufSize;
const int vChrBufSize= c->vChrBufSize;
uint8_t *formatConvBuffer= c->formatConvBuffer;
const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
int lastDstY;
uint32_t *pal=c->pal_yuv;
/* vars which will change and which we need to store back in the context */
int dstY= c->dstY;
int lumBufIndex= c->lumBufIndex;
int chrBufIndex= c->chrBufIndex;
int lastInLumBuf= c->lastInLumBuf;
int lastInChrBuf= c->lastInChrBuf;
if (isPacked(c->srcFormat)) {
src[0]=
src[1]=
src[2]=
src[3]= src[0];
srcStride[0]=
srcStride[1]=
srcStride[2]=
srcStride[3]= srcStride[0];
}
srcStride[1]<<= c->vChrDrop;
srcStride[2]<<= c->vChrDrop;
DEBUG_BUFFERS("swScale() %p[%d] %p[%d] %p[%d] %p[%d] -> %p[%d] %p[%d] %p[%d] %p[%d]\n",
src[0], srcStride[0], src[1], srcStride[1], src[2], srcStride[2], src[3], srcStride[3],
dst[0], dstStride[0], dst[1], dstStride[1], dst[2], dstStride[2], dst[3], dstStride[3]);
DEBUG_BUFFERS("srcSliceY: %d srcSliceH: %d dstY: %d dstH: %d\n",
srcSliceY, srcSliceH, dstY, dstH);
DEBUG_BUFFERS("vLumFilterSize: %d vLumBufSize: %d vChrFilterSize: %d vChrBufSize: %d\n",
vLumFilterSize, vLumBufSize, vChrFilterSize, vChrBufSize);
if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
static int warnedAlready=0; //FIXME move this into the context perhaps
if (flags & SWS_PRINT_INFO && !warnedAlready) {
av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
" ->cannot do aligned memory accesses anymore\n");
warnedAlready=1;
}
}
/* Note the user might start scaling the picture in the middle so this
will not get executed. This is not really intended but works
currently, so people might do it. */
if (srcSliceY ==0) {
lumBufIndex=-1;
chrBufIndex=-1;
dstY=0;
lastInLumBuf= -1;
lastInChrBuf= -1;
}
lastDstY= dstY;
for (;dstY < dstH; dstY++) {
unsigned char *dest =dst[0]+dstStride[0]*dstY;
const int chrDstY= dstY>>c->chrDstVSubSample;
unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)];
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input
int lastLumSrcY2=firstLumSrcY2+ vLumFilterSize -1; // Last line needed as input
int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input
int enough_lines;
//handle holes (FAST_BILINEAR & weird filters)
if (firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1;
if (firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1;
assert(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1);
assert(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1);
DEBUG_BUFFERS("dstY: %d\n", dstY);
DEBUG_BUFFERS("\tfirstLumSrcY: %d lastLumSrcY: %d lastInLumBuf: %d\n",
firstLumSrcY, lastLumSrcY, lastInLumBuf);
DEBUG_BUFFERS("\tfirstChrSrcY: %d lastChrSrcY: %d lastInChrBuf: %d\n",
firstChrSrcY, lastChrSrcY, lastInChrBuf);
// Do we have enough lines in this slice to output the dstY line
enough_lines = lastLumSrcY2 < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample);
if (!enough_lines) {
lastLumSrcY = srcSliceY + srcSliceH - 1;
lastChrSrcY = chrSrcSliceY + chrSrcSliceH - 1;
DEBUG_BUFFERS("buffering slice: lastLumSrcY %d lastChrSrcY %d\n",
lastLumSrcY, lastChrSrcY);
}
//Do horizontal scaling
while(lastInLumBuf < lastLumSrcY) {
const uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
const uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
lumBufIndex++;
assert(lumBufIndex < 2*vLumBufSize);
assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
assert(lastInLumBuf + 1 - srcSliceY >= 0);
RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
hLumFilter, hLumFilterPos, hLumFilterSize,
formatConvBuffer,
pal, 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
hLumFilter, hLumFilterPos, hLumFilterSize,
formatConvBuffer,
pal, 1);
lastInLumBuf++;
DEBUG_BUFFERS("\t\tlumBufIndex %d: lastInLumBuf: %d\n",
lumBufIndex, lastInLumBuf);
}
while(lastInChrBuf < lastChrSrcY) {
const uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
const uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
chrBufIndex++;
assert(chrBufIndex < 2*vChrBufSize);
assert(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH));
assert(lastInChrBuf + 1 - chrSrcSliceY >= 0);
//FIXME replace parameters through context struct (some at least)
if (c->needs_hcscale)
RENAME(hcscale)(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
hChrFilter, hChrFilterPos, hChrFilterSize,
formatConvBuffer,
pal);
lastInChrBuf++;
DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n",
chrBufIndex, lastInChrBuf);
}
//wrap buf index around to stay inside the ring buffer
if (lumBufIndex >= vLumBufSize) lumBufIndex-= vLumBufSize;
if (chrBufIndex >= vChrBufSize) chrBufIndex-= vChrBufSize;
if (!enough_lines)
break; //we can't output a dstY line so let's try with the next slice
const int chrDstY= dstY>>c->chrDstVSubSample;
const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
c->blueDither= ff_dither8[dstY&1];
if (c->dstFormat == PIX_FMT_RGB555 || c->dstFormat == PIX_FMT_BGR555)
......@@ -2557,7 +2230,7 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
else
c->greenDither= ff_dither4[dstY&1];
c->redDither= ff_dither8[(dstY+1)&1];
if (dstY < dstH-2) {
if (dstY < dstH - 2) {
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
......@@ -2606,183 +2279,52 @@ static int RENAME(swScale)(SwsContext *c, const uint8_t* src[], int srcStride[],
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
}
}
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
c->yuv2nv12X(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat);
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
dstFormat);
} else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
const int16_t *lumBuf = lumSrcPtr[0];
const int16_t *chrBuf= chrSrcPtr[0];
const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL;
c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
} else { //General YV12
c->yuv2yuvX(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
}
} else {
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
int chrAlpha= vChrFilter[2*dstY+1];
if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c, //FIXME write a packed1_full function
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
} else {
c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
alpPixBuf ? *alpSrcPtr : NULL,
dest, dstW, chrAlpha, dstFormat, flags, dstY);
}
} else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
int lumAlpha= vLumFilter[2*dstY+1];
int chrAlpha= vChrFilter[2*dstY+1];
lumMmxFilter[2]=
lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
chrMmxFilter[2]=
chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c, //FIXME write a packed2_full function
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
} else {
c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
dest, dstW, lumAlpha, chrAlpha, dstY);
}
} else { //general RGB
if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
} else {
c->yuv2packedX(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
}
}
}
} else { // hmm looks like we can't use MMX here without overwriting this array's tail
const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
yuv2nv12XinC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat);
} else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
if (is16BPS(dstFormat) || isNBPS(dstFormat)) {
yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
dstFormat);
} else {
yuv2yuvXinC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
}
} else {
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
} else {
yuv2packedXinC(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY);
}
}
}
}
if ((dstFormat == PIX_FMT_YUVA420P) && !alpPixBuf)
fillPlane(dst[3], dstStride[3], dstW, dstY-lastDstY, lastDstY, 255);
if (flags & SWS_CPU_CAPS_MMX2 ) __asm__ volatile("sfence":::"memory");
/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
if (flags & SWS_CPU_CAPS_3DNOW) __asm__ volatile("femms" :::"memory");
else __asm__ volatile("emms" :::"memory");
/* store changed local vars back in the context */
c->dstY= dstY;
c->lumBufIndex= lumBufIndex;
c->chrBufIndex= chrBufIndex;
c->lastInLumBuf= lastInLumBuf;
c->lastInChrBuf= lastInChrBuf;
return dstY - lastDstY;
}
#endif /* !COMPILE_TEMPLATE_MMX2 */
static void RENAME(sws_init_swScale)(SwsContext *c)
{
enum PixelFormat srcFormat = c->srcFormat;
c->yuv2nv12X = RENAME(yuv2nv12X );
c->yuv2yuv1 = RENAME(yuv2yuv1 );
c->yuv2yuvX = RENAME(yuv2yuvX );
c->yuv2packed1 = RENAME(yuv2packed1 );
c->yuv2packed2 = RENAME(yuv2packed2 );
c->yuv2packedX = RENAME(yuv2packedX );
if (!(c->flags & SWS_BITEXACT)) {
c->yuv2yuv1 = RENAME(yuv2yuv1 );
c->yuv2yuvX = RENAME(yuv2yuvX );
c->yuv2packed1 = RENAME(yuv2packed1 );
c->yuv2packed2 = RENAME(yuv2packed2 );
c->yuv2packedX = RENAME(yuv2packedX );
}
c->hScale = RENAME(hScale );
// Use the new MMX scaler if the MMX2 one can't be used (it is faster than the x86 ASM one).
#if COMPILE_TEMPLATE_MMX2
if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed)
{
c->hyscale_fast = RENAME(hyscale_fast);
c->hcscale_fast = RENAME(hcscale_fast);
} else {
#endif /* COMPILE_TEMPLATE_MMX2 */
c->hyscale_fast = NULL;
c->hcscale_fast = NULL;
#if COMPILE_TEMPLATE_MMX2
}
#endif /* COMPILE_TEMPLATE_MMX2 */
switch(srcFormat) {
switch(srcFormat) {
case PIX_FMT_YUYV422 : c->chrToYV12 = RENAME(yuy2ToUV); break;
case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break;
case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break;
case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break;
case PIX_FMT_YUV420P16BE:
case PIX_FMT_YUV422P16BE:
case PIX_FMT_YUV444P16BE: c->chrToYV12 = RENAME(BEToUV); break;
case PIX_FMT_GRAY16LE :
case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE:
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE: c->chrToYV12 = RENAME(LEToUV); break;
default: break;
}
if (c->chrSrcHSubSample) {
switch(srcFormat) {
case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV_half); break;
case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV_half); break;
default: break;
}
} else {
case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break;
}
if (!c->chrSrcHSubSample) {
switch(srcFormat) {
case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
case PIX_FMT_RGB24 : c->chrToYV12 = RENAME(rgb24ToUV); break;
......@@ -2792,16 +2334,10 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
switch (srcFormat) {
case PIX_FMT_YUYV422 :
case PIX_FMT_YUV420P16BE:
case PIX_FMT_YUV422P16BE:
case PIX_FMT_YUV444P16BE:
case PIX_FMT_Y400A :
case PIX_FMT_GRAY16BE : c->lumToYV12 = RENAME(yuy2ToY); break;
c->lumToYV12 = RENAME(yuy2ToY); break;
case PIX_FMT_UYVY422 :
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE:
case PIX_FMT_GRAY16LE : c->lumToYV12 = RENAME(uyvyToY); break;
c->lumToYV12 = RENAME(uyvyToY); break;
case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break;
case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break;
default: break;
......@@ -2812,14 +2348,4 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
default: break;
}
}
if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
if (c->srcRange) {
c->lumConvertRange = RENAME(lumRangeFromJpeg);
c->chrConvertRange = RENAME(chrRangeFromJpeg);
} else {
c->lumConvertRange = RENAME(lumRangeToJpeg);
c->chrConvertRange = RENAME(chrRangeToJpeg);
}
}
}
......@@ -34,6 +34,7 @@
#include "libswscale/swscale.h"
#include "libswscale/swscale_internal.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#define DITHER1XBPP // only for MMX
......@@ -46,57 +47,58 @@ DECLARE_ASM_CONST(8, uint64_t, pb_03) = 0x0303030303030303ULL;
DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL;
//MMX versions
#if HAVE_MMX
#undef RENAME
#undef HAVE_MMX2
#undef HAVE_AMD3DNOW
#define HAVE_MMX2 0
#define HAVE_AMD3DNOW 0
#undef COMPILE_TEMPLATE_MMX2
#define COMPILE_TEMPLATE_MMX2 0
#define RENAME(a) a ## _MMX
#include "yuv2rgb_template.c"
#endif /* HAVE_MMX */
//MMX2 versions
#if HAVE_MMX2
#undef RENAME
#undef HAVE_MMX2
#define HAVE_MMX2 1
#undef COMPILE_TEMPLATE_MMX2
#define COMPILE_TEMPLATE_MMX2 1
#define RENAME(a) a ## _MMX2
#include "yuv2rgb_template.c"
#endif /* HAVE_MMX2 */
SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
{
if (c->flags & SWS_CPU_CAPS_MMX2) {
int cpu_flags = av_get_cpu_flags();
if (c->srcFormat != PIX_FMT_YUV420P &&
c->srcFormat != PIX_FMT_YUVA420P)
return NULL;
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
switch (c->dstFormat) {
case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_rgb32_MMX2;
break;
} else return yuv420_rgb32_MMX2;
case PIX_FMT_BGR32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_bgr32_MMX2;
break;
} else return yuv420_bgr32_MMX2;
case PIX_FMT_RGB24: return yuv420_rgb24_MMX2;
case PIX_FMT_BGR24: return yuv420_bgr24_MMX2;
case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
}
}
if (c->flags & SWS_CPU_CAPS_MMX) {
if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) {
switch (c->dstFormat) {
case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_rgb32_MMX;
break;
} else return yuv420_rgb32_MMX;
case PIX_FMT_BGR32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_bgr32_MMX;
break;
} else return yuv420_bgr32_MMX;
case PIX_FMT_RGB24: return yuv420_rgb24_MMX;
case PIX_FMT_BGR24: return yuv420_bgr24_MMX;
case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
#if HAVE_7REGS
return yuva420_rgb32_MMX;
#endif
break;
} else return yuv420_rgb32_MMX;
case PIX_FMT_BGR32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
#if HAVE_7REGS
return yuva420_bgr32_MMX;
#endif
break;
} else return yuv420_bgr32_MMX;
case PIX_FMT_RGB24: return yuv420_rgb24_MMX;
case PIX_FMT_BGR24: return yuv420_bgr24_MMX;
case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
case PIX_FMT_RGB555: return yuv420_rgb15_MMX;
}
}
......
......@@ -25,14 +25,7 @@
#undef EMMS
#undef SFENCE
#if HAVE_AMD3DNOW
/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
#define EMMS "femms"
#else
#define EMMS "emms"
#endif
#if HAVE_MMX2
#if COMPILE_TEMPLATE_MMX2
#define MOVNTQ "movntq"
#define SFENCE "sfence"
#else
......@@ -159,7 +152,8 @@
} \
#define YUV2RGB_ENDFUNC \
__asm__ volatile (SFENCE"\n\t"EMMS); \
__asm__ volatile (SFENCE"\n\t" \
"emms \n\t"); \
return srcSliceH; \
#define IF0(x)
......@@ -188,6 +182,7 @@
"paddusb "GREEN_DITHER"(%4), %%mm2\n\t" \
"paddusb "RED_DITHER"(%4), %%mm1\n\t" \
#if !COMPILE_TEMPLATE_MMX2
static inline int RENAME(yuv420_rgb15)(SwsContext *c, const uint8_t *src[],
int srcStride[],
int srcSliceY, int srcSliceH,
......@@ -243,6 +238,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
YUV2RGB_OPERANDS
YUV2RGB_ENDFUNC
}
#endif /* !COMPILE_TEMPLATE_MMX2 */
#define RGB_PACK24(blue, red)\
"packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\
......@@ -259,7 +255,7 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, const uint8_t *src[],
"punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\
RGB_PACK24_B
#if HAVE_MMX2
#if COMPILE_TEMPLATE_MMX2
DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1};
DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0};
DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0};
......@@ -366,6 +362,7 @@ static inline int RENAME(yuv420_bgr24)(SwsContext *c, const uint8_t *src[],
MOVNTQ " %%mm5, 16(%1)\n\t" \
MOVNTQ " %%mm"alpha", 24(%1)\n\t" \
#if !COMPILE_TEMPLATE_MMX2
static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
int srcStride[],
int srcSliceY, int srcSliceH,
......@@ -386,12 +383,12 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, const uint8_t *src[],
YUV2RGB_ENDFUNC
}
#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
int srcStride[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
#if HAVE_7REGS
int y, h_size;
YUV2RGB_LOOP(4)
......@@ -406,9 +403,8 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, const uint8_t *src[],
YUV2RGB_ENDLOOP(4)
YUV2RGB_OPERANDS_ALPHA
YUV2RGB_ENDFUNC
#endif
return 0;
}
#endif
static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
int srcStride[],
......@@ -430,12 +426,12 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t *src[],
YUV2RGB_ENDFUNC
}
#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA
static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
int srcStride[],
int srcSliceY, int srcSliceH,
uint8_t *dst[], int dstStride[])
{
#if HAVE_7REGS
int y, h_size;
YUV2RGB_LOOP(4)
......@@ -450,6 +446,7 @@ static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t *src[],
YUV2RGB_ENDLOOP(4)
YUV2RGB_OPERANDS_ALPHA
YUV2RGB_ENDFUNC
#endif
return 0;
}
#endif
#endif /* !COMPILE_TEMPLATE_MMX2 */
......@@ -32,7 +32,7 @@
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
#include "libavutil/x86_cpu.h"
#include "libavutil/cpu.h"
#include "libavutil/bswap.h"
extern const uint8_t dither_4x4_16[4][8];
......@@ -579,24 +579,18 @@ CLOSEYUV2RGBFUNC(1)
SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
{
SwsFunc t = NULL;
#if HAVE_MMX
t = ff_yuv2rgb_init_mmx(c);
#endif
#if HAVE_VIS
t = ff_yuv2rgb_init_vis(c);
#endif
#if CONFIG_MLIB
t = ff_yuv2rgb_init_mlib(c);
#endif
#if HAVE_ALTIVEC
if (c->flags & SWS_CPU_CAPS_ALTIVEC)
t = ff_yuv2rgb_init_altivec(c);
#endif
#if ARCH_BFIN
if (c->flags & SWS_CPU_CAPS_BFIN)
if (HAVE_MMX) {
t = ff_yuv2rgb_init_mmx(c);
} else if (HAVE_VIS) {
t = ff_yuv2rgb_init_vis(c);
} else if (CONFIG_MLIB) {
t = ff_yuv2rgb_init_mlib(c);
} else if (HAVE_ALTIVEC) {
t = ff_yuv2rgb_init_altivec(c);
} else if (ARCH_BFIN) {
t = ff_yuv2rgb_get_func_ptr_bfin(c);
#endif
}
if (t)
return t;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment