Commit 52c522c7 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master: (27 commits)
  asfdec: add side data to ASFStream packet instead of output packet.
  idroqdec: set AVFMTCTX_NOHEADER and create streams as they occur.
  nellymoserdec: Indicate that the decoder can handle changed parameters
  libavcodec: Apply parameter change side data when decoding audio
  flvdec: Add param change side data if the sample rate or channels have changed
  libavformat: Add a utility function for adding parameter change side data
  libavcodec: Define a side data type for parameter changes
  aacdec: Handle new extradata passed as side data
  flvdec: Export new AAC/H.264 extradata as side data on the next packet
  libavcodec: Define a side data type for new extradata
  flacdec: skip all track indices at once instead of looping.
  mxf: Add PictureEssenceCoding UL for V210.
  mxfdec: consider QuantizationBits between 17 and 24 to be pcm_s24*
  mxfenc: Add support for MPEG-2 MP@HL-14 in mxf container.
  mxf: H.264/MPEG-4 AVC Intra support
  configure: Show whether the safe bitstream reader is enabled
  x86: Tighten register constraints for decode_significance*_x86.
  Replace Subversion revisions in comments by Git hashes.
  h264_cabac: synchronize decode_significance_*_x86 conditionals
  w32threads: wait for the waked thread in pthread_cond_signal.
  ...

Conflicts:
	libavcodec/avcodec.h
	libavcodec/version.h
	libavformat/flvdec.c
	libavformat/utils.c
	tests/ref/lavfi/pixdesc
	tests/ref/lavfi/pixfmts_copy
	tests/ref/lavfi/pixfmts_null
	tests/ref/lavfi/pixfmts_scale
	tests/ref/lavfi/pixfmts_vflip
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents a40f43db e9dc9201
...@@ -3311,6 +3311,7 @@ elif enabled gcc; then ...@@ -3311,6 +3311,7 @@ elif enabled gcc; then
check_cflags -fno-tree-vectorize check_cflags -fno-tree-vectorize
check_cflags -Werror=implicit-function-declaration check_cflags -Werror=implicit-function-declaration
check_cflags -Werror=missing-prototypes check_cflags -Werror=missing-prototypes
check_cflags -Werror=declaration-after-statement
elif enabled llvm_gcc; then elif enabled llvm_gcc; then
check_cflags -mllvm -stack-alignment=16 check_cflags -mllvm -stack-alignment=16
elif enabled clang; then elif enabled clang; then
...@@ -3406,6 +3407,7 @@ echo "postprocessing support ${postproc-no}" ...@@ -3406,6 +3407,7 @@ echo "postprocessing support ${postproc-no}"
echo "new filter support ${avfilter-no}" echo "new filter support ${avfilter-no}"
echo "network support ${network-no}" echo "network support ${network-no}"
echo "threading support ${thread_type-no}" echo "threading support ${thread_type-no}"
echo "safe bitstream reader ${safe_bitstream_reader-no}"
echo "SDL support ${sdl-no}" echo "SDL support ${sdl-no}"
echo "Sun medialib support ${mlib-no}" echo "Sun medialib support ${mlib-no}"
echo "libdxva2 enabled ${dxva2-no}" echo "libdxva2 enabled ${dxva2-no}"
......
...@@ -722,16 +722,13 @@ static void decode_ltp(AACContext *ac, LongTermPrediction *ltp, ...@@ -722,16 +722,13 @@ static void decode_ltp(AACContext *ac, LongTermPrediction *ltp,
/** /**
* Decode Individual Channel Stream info; reference: table 4.6. * Decode Individual Channel Stream info; reference: table 4.6.
*
* @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
*/ */
static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
GetBitContext *gb, int common_window) GetBitContext *gb)
{ {
if (get_bits1(gb)) { if (get_bits1(gb)) {
av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n"); av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n");
memset(ics, 0, sizeof(IndividualChannelStream)); return AVERROR_INVALIDDATA;
return -1;
} }
ics->window_sequence[1] = ics->window_sequence[0]; ics->window_sequence[1] = ics->window_sequence[0];
ics->window_sequence[0] = get_bits(gb, 2); ics->window_sequence[0] = get_bits(gb, 2);
...@@ -766,13 +763,11 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, ...@@ -766,13 +763,11 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
if (ics->predictor_present) { if (ics->predictor_present) {
if (ac->m4ac.object_type == AOT_AAC_MAIN) { if (ac->m4ac.object_type == AOT_AAC_MAIN) {
if (decode_prediction(ac, ics, gb)) { if (decode_prediction(ac, ics, gb)) {
memset(ics, 0, sizeof(IndividualChannelStream)); return AVERROR_INVALIDDATA;
return -1;
} }
} else if (ac->m4ac.object_type == AOT_AAC_LC) { } else if (ac->m4ac.object_type == AOT_AAC_LC) {
av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n"); av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
memset(ics, 0, sizeof(IndividualChannelStream)); return AVERROR_INVALIDDATA;
return -1;
} else { } else {
if ((ics->ltp.present = get_bits(gb, 1))) if ((ics->ltp.present = get_bits(gb, 1)))
decode_ltp(ac, &ics->ltp, gb, ics->max_sfb); decode_ltp(ac, &ics->ltp, gb, ics->max_sfb);
...@@ -784,8 +779,7 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, ...@@ -784,8 +779,7 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics,
av_log(ac->avctx, AV_LOG_ERROR, av_log(ac->avctx, AV_LOG_ERROR,
"Number of scalefactor bands in group (%d) exceeds limit (%d).\n", "Number of scalefactor bands in group (%d) exceeds limit (%d).\n",
ics->max_sfb, ics->num_swb); ics->max_sfb, ics->num_swb);
memset(ics, 0, sizeof(IndividualChannelStream)); return AVERROR_INVALIDDATA;
return -1;
} }
return 0; return 0;
...@@ -1390,8 +1384,8 @@ static int decode_ics(AACContext *ac, SingleChannelElement *sce, ...@@ -1390,8 +1384,8 @@ static int decode_ics(AACContext *ac, SingleChannelElement *sce,
global_gain = get_bits(gb, 8); global_gain = get_bits(gb, 8);
if (!common_window && !scale_flag) { if (!common_window && !scale_flag) {
if (decode_ics_info(ac, ics, gb, 0) < 0) if (decode_ics_info(ac, ics, gb) < 0)
return -1; return AVERROR_INVALIDDATA;
} }
if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0) if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0)
...@@ -1507,8 +1501,8 @@ static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe) ...@@ -1507,8 +1501,8 @@ static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe)
common_window = get_bits1(gb); common_window = get_bits1(gb);
if (common_window) { if (common_window) {
if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1)) if (decode_ics_info(ac, &cpe->ch[0].ics, gb))
return -1; return AVERROR_INVALIDDATA;
i = cpe->ch[1].ics.use_kb_window[0]; i = cpe->ch[1].ics.use_kb_window[0];
cpe->ch[1].ics = cpe->ch[0].ics; cpe->ch[1].ics = cpe->ch[0].ics;
cpe->ch[1].ics.use_kb_window[1] = i; cpe->ch[1].ics.use_kb_window[1] = i;
...@@ -2282,12 +2276,31 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data, ...@@ -2282,12 +2276,31 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data,
static int aac_decode_frame(AVCodecContext *avctx, void *data, static int aac_decode_frame(AVCodecContext *avctx, void *data,
int *got_frame_ptr, AVPacket *avpkt) int *got_frame_ptr, AVPacket *avpkt)
{ {
AACContext *ac = avctx->priv_data;
const uint8_t *buf = avpkt->data; const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size; int buf_size = avpkt->size;
GetBitContext gb; GetBitContext gb;
int buf_consumed; int buf_consumed;
int buf_offset; int buf_offset;
int err; int err;
int new_extradata_size;
const uint8_t *new_extradata = av_packet_get_side_data(avpkt,
AV_PKT_DATA_NEW_EXTRADATA,
&new_extradata_size);
if (new_extradata) {
av_free(avctx->extradata);
avctx->extradata = av_mallocz(new_extradata_size +
FF_INPUT_BUFFER_PADDING_SIZE);
if (!avctx->extradata)
return AVERROR(ENOMEM);
avctx->extradata_size = new_extradata_size;
memcpy(avctx->extradata, new_extradata, new_extradata_size);
if (decode_audio_specific_config(ac, ac->avctx, &ac->m4ac,
avctx->extradata,
avctx->extradata_size*8, 1) < 0)
return AVERROR_INVALIDDATA;
}
init_get_bits(&gb, buf, buf_size * 8); init_get_bits(&gb, buf, buf_size * 8);
......
...@@ -791,6 +791,10 @@ typedef struct RcOverride{ ...@@ -791,6 +791,10 @@ typedef struct RcOverride{
* Codec supports slice-based (or partition-based) multithreading. * Codec supports slice-based (or partition-based) multithreading.
*/ */
#define CODEC_CAP_SLICE_THREADS 0x2000 #define CODEC_CAP_SLICE_THREADS 0x2000
/**
* Codec supports changed parameters at any point.
*/
#define CODEC_CAP_PARAM_CHANGE 0x4000
/** /**
* Codec is lossless. * Codec is lossless.
*/ */
...@@ -877,6 +881,8 @@ typedef struct AVPanScan{ ...@@ -877,6 +881,8 @@ typedef struct AVPanScan{
enum AVPacketSideDataType { enum AVPacketSideDataType {
AV_PKT_DATA_PALETTE, AV_PKT_DATA_PALETTE,
AV_PKT_DATA_NEW_EXTRADATA,
AV_PKT_DATA_PARAM_CHANGE,
}; };
typedef struct AVPacket { typedef struct AVPacket {
...@@ -945,6 +951,27 @@ typedef struct AVPacket { ...@@ -945,6 +951,27 @@ typedef struct AVPacket {
#define AV_PKT_FLAG_KEY 0x0001 ///< The packet contains a keyframe #define AV_PKT_FLAG_KEY 0x0001 ///< The packet contains a keyframe
#define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted #define AV_PKT_FLAG_CORRUPT 0x0002 ///< The packet content is corrupted
/**
* An AV_PKT_DATA_PARAM_CHANGE side data packet is laid out as follows:
* u32le param_flags
* if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT)
* s32le channel_count
* if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT)
* u64le channel_layout
* if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE)
* s32le sample_rate
* if (param_flags & AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS)
* s32le width
* s32le height
*/
enum AVSideDataParamChangeFlags {
AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT = 0x0001,
AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT = 0x0002,
AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE = 0x0004,
AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS = 0x0008,
};
/** /**
* Audio Video Frame. * Audio Video Frame.
* New fields can be added to the end of AVFRAME with minor version * New fields can be added to the end of AVFRAME with minor version
......
...@@ -4195,13 +4195,13 @@ int main(void){ ...@@ -4195,13 +4195,13 @@ int main(void){
s= show_bits(&gb, 24); s= show_bits(&gb, 24);
START_TIMER {START_TIMER
j= get_ue_golomb(&gb); j= get_ue_golomb(&gb);
if(j != i){ if(j != i){
printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
// return -1; // return -1;
} }
STOP_TIMER("get_ue_golomb"); STOP_TIMER("get_ue_golomb");}
} }
...@@ -4220,13 +4220,13 @@ int main(void){ ...@@ -4220,13 +4220,13 @@ int main(void){
s= show_bits(&gb, 24); s= show_bits(&gb, 24);
START_TIMER {START_TIMER
j= get_se_golomb(&gb); j= get_se_golomb(&gb);
if(j != i - COUNT/2){ if(j != i - COUNT/2){
printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s); printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
// return -1; // return -1;
} }
STOP_TIMER("get_se_golomb"); STOP_TIMER("get_se_golomb");}
} }
printf("Testing RBSP\n"); printf("Testing RBSP\n");
......
...@@ -1657,7 +1657,7 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block, ...@@ -1657,7 +1657,7 @@ decode_cabac_residual_internal(H264Context *h, DCTELEM *block,
index[coeff_count++] = last;\ index[coeff_count++] = last;\
} }
const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) #if ARCH_X86 && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index,
last_coeff_ctx_base, sig_off); last_coeff_ctx_base, sig_off);
} else { } else {
......
...@@ -35,8 +35,9 @@ static void lpc_apply_welch_window_c(const int32_t *data, int len, ...@@ -35,8 +35,9 @@ static void lpc_apply_welch_window_c(const int32_t *data, int len,
double w; double w;
double c; double c;
assert(!(len&1)); //the optimization in r11881 does not support odd len /* The optimization in commit fa4ed8c does not support odd len.
//if someone wants odd len extend the change in r11881 * If someone wants odd len extend that change. */
assert(!(len & 1));
n2 = (len >> 1); n2 = (len >> 1);
c = 2.0 / (len - 1.0); c = 2.0 / (len - 1.0);
......
...@@ -226,7 +226,7 @@ AVCodec ff_nellymoser_decoder = { ...@@ -226,7 +226,7 @@ AVCodec ff_nellymoser_decoder = {
.init = decode_init, .init = decode_init,
.close = decode_end, .close = decode_end,
.decode = decode_tag, .decode = decode_tag,
.capabilities = CODEC_CAP_DR1, .capabilities = CODEC_CAP_DR1 | CODEC_CAP_PARAM_CHANGE,
.long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"), .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT, .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT,
AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16,
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include "thread.h" #include "thread.h"
#include "audioconvert.h" #include "audioconvert.h"
#include "internal.h" #include "internal.h"
#include "bytestream.h"
#include <stdlib.h> #include <stdlib.h>
#include <stdarg.h> #include <stdarg.h>
#include <limits.h> #include <limits.h>
...@@ -1028,6 +1029,47 @@ int attribute_align_arg avcodec_decode_audio3(AVCodecContext *avctx, int16_t *sa ...@@ -1028,6 +1029,47 @@ int attribute_align_arg avcodec_decode_audio3(AVCodecContext *avctx, int16_t *sa
} }
#endif #endif
static void apply_param_change(AVCodecContext *avctx, AVPacket *avpkt)
{
int size = 0;
const uint8_t *data;
uint32_t flags;
if (!(avctx->codec->capabilities & CODEC_CAP_PARAM_CHANGE))
return;
data = av_packet_get_side_data(avpkt, AV_PKT_DATA_PARAM_CHANGE, &size);
if (!data || size < 4)
return;
flags = bytestream_get_le32(&data);
size -= 4;
if (size < 4) /* Required for any of the changes */
return;
if (flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT) {
avctx->channels = bytestream_get_le32(&data);
size -= 4;
}
if (flags & AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT) {
if (size < 8)
return;
avctx->channel_layout = bytestream_get_le64(&data);
size -= 8;
}
if (size < 4)
return;
if (flags & AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE) {
avctx->sample_rate = bytestream_get_le32(&data);
size -= 4;
}
if (flags & AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS) {
if (size < 8)
return;
avctx->width = bytestream_get_le32(&data);
avctx->height = bytestream_get_le32(&data);
size -= 8;
}
}
int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx, int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
AVFrame *frame, AVFrame *frame,
int *got_frame_ptr, int *got_frame_ptr,
...@@ -1044,6 +1086,8 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx, ...@@ -1044,6 +1086,8 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size) { if ((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size) {
av_packet_split_side_data(avpkt); av_packet_split_side_data(avpkt);
apply_param_change(avctx, avpkt);
avctx->pkt = avpkt; avctx->pkt = avpkt;
ret = avctx->codec->decode(avctx, frame, got_frame_ptr, avpkt); ret = avctx->codec->decode(avctx, frame, got_frame_ptr, avpkt);
if (ret >= 0 && *got_frame_ptr) { if (ret >= 0 && *got_frame_ptr) {
......
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
#define AVCODEC_VERSION_H #define AVCODEC_VERSION_H
#define LIBAVCODEC_VERSION_MAJOR 53 #define LIBAVCODEC_VERSION_MAJOR 53
#define LIBAVCODEC_VERSION_MINOR 46 #define LIBAVCODEC_VERSION_MINOR 47
#define LIBAVCODEC_VERSION_MICRO 1 #define LIBAVCODEC_VERSION_MICRO 0
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
LIBAVCODEC_VERSION_MINOR, \ LIBAVCODEC_VERSION_MINOR, \
......
...@@ -139,7 +139,7 @@ static void pthread_cond_init(pthread_cond_t *cond, const void *unused_attr) ...@@ -139,7 +139,7 @@ static void pthread_cond_init(pthread_cond_t *cond, const void *unused_attr)
win32_cond->semaphore = CreateSemaphore(NULL, 0, 0x7fffffff, NULL); win32_cond->semaphore = CreateSemaphore(NULL, 0, 0x7fffffff, NULL);
if (!win32_cond->semaphore) if (!win32_cond->semaphore)
return; return;
win32_cond->waiters_done = CreateEvent(NULL, FALSE, FALSE, NULL); win32_cond->waiters_done = CreateEvent(NULL, TRUE, FALSE, NULL);
if (!win32_cond->waiters_done) if (!win32_cond->waiters_done)
return; return;
...@@ -204,11 +204,10 @@ static void pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) ...@@ -204,11 +204,10 @@ static void pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
/* non native condition variables */ /* non native condition variables */
pthread_mutex_lock(&win32_cond->mtx_broadcast); pthread_mutex_lock(&win32_cond->mtx_broadcast);
pthread_mutex_unlock(&win32_cond->mtx_broadcast);
pthread_mutex_lock(&win32_cond->mtx_waiter_count); pthread_mutex_lock(&win32_cond->mtx_waiter_count);
win32_cond->waiter_count++; win32_cond->waiter_count++;
pthread_mutex_unlock(&win32_cond->mtx_waiter_count); pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
pthread_mutex_unlock(&win32_cond->mtx_broadcast);
// unlock the external mutex // unlock the external mutex
pthread_mutex_unlock(mutex); pthread_mutex_unlock(mutex);
...@@ -216,7 +215,7 @@ static void pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) ...@@ -216,7 +215,7 @@ static void pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
pthread_mutex_lock(&win32_cond->mtx_waiter_count); pthread_mutex_lock(&win32_cond->mtx_waiter_count);
win32_cond->waiter_count--; win32_cond->waiter_count--;
last_waiter = !win32_cond->waiter_count && win32_cond->is_broadcast; last_waiter = !win32_cond->waiter_count || !win32_cond->is_broadcast;
pthread_mutex_unlock(&win32_cond->mtx_waiter_count); pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
if (last_waiter) if (last_waiter)
...@@ -235,13 +234,20 @@ static void pthread_cond_signal(pthread_cond_t *cond) ...@@ -235,13 +234,20 @@ static void pthread_cond_signal(pthread_cond_t *cond)
return; return;
} }
pthread_mutex_lock(&win32_cond->mtx_broadcast);
/* non-native condition variables */ /* non-native condition variables */
pthread_mutex_lock(&win32_cond->mtx_waiter_count); pthread_mutex_lock(&win32_cond->mtx_waiter_count);
have_waiter = win32_cond->waiter_count; have_waiter = win32_cond->waiter_count;
pthread_mutex_unlock(&win32_cond->mtx_waiter_count); pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
if (have_waiter) if (have_waiter) {
ReleaseSemaphore(win32_cond->semaphore, 1, NULL); ReleaseSemaphore(win32_cond->semaphore, 1, NULL);
WaitForSingleObject(win32_cond->waiters_done, INFINITE);
ResetEvent(win32_cond->waiters_done);
}
pthread_mutex_unlock(&win32_cond->mtx_broadcast);
} }
static void w32thread_init(void) static void w32thread_init(void)
......
...@@ -1338,6 +1338,7 @@ static int decode_packet(AVCodecContext *avctx, ...@@ -1338,6 +1338,7 @@ static int decode_packet(AVCodecContext *avctx,
*data_size = 0; *data_size = 0;
if (s->packet_done || s->packet_loss) { if (s->packet_done || s->packet_loss) {
int seekable_frame_in_packet, spliced_packet;
s->packet_done = 0; s->packet_done = 0;
/** sanity check for the buffer length */ /** sanity check for the buffer length */
...@@ -1351,8 +1352,8 @@ static int decode_packet(AVCodecContext *avctx, ...@@ -1351,8 +1352,8 @@ static int decode_packet(AVCodecContext *avctx,
/** parse packet header */ /** parse packet header */
init_get_bits(gb, buf, s->buf_bit_size); init_get_bits(gb, buf, s->buf_bit_size);
packet_sequence_number = get_bits(gb, 4); packet_sequence_number = get_bits(gb, 4);
int seekable_frame_in_packet = get_bits1(gb); seekable_frame_in_packet = get_bits1(gb);
int spliced_packet = get_bits1(gb); spliced_packet = get_bits1(gb);
/** get number of bits that need to be added to the previous frame */ /** get number of bits that need to be added to the previous frame */
num_bits_prev_frame = get_bits(gb, s->log2_frame_size); num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet //FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
//as that would make optimization work hard) //as that would make optimization work hard)
#if HAVE_6REGS && !defined(BROKEN_RELOCATIONS) #if HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
static int decode_significance_x86(CABACContext *c, int max_coeff, static int decode_significance_x86(CABACContext *c, int max_coeff,
uint8_t *significant_coeff_ctx_base, uint8_t *significant_coeff_ctx_base,
int *index, x86_reg last_off){ int *index, x86_reg last_off){
...@@ -144,6 +144,6 @@ static int decode_significance_8x8_x86(CABACContext *c, ...@@ -144,6 +144,6 @@ static int decode_significance_8x8_x86(CABACContext *c,
); );
return coeff_count; return coeff_count;
} }
#endif /* HAVE_6REGS && !defined(BROKEN_RELOCATIONS) */ #endif /* HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS) */
#endif /* AVCODEC_X86_H264_I386_H */ #endif /* AVCODEC_X86_H264_I386_H */
...@@ -987,7 +987,7 @@ static int ff_asf_parse_packet(AVFormatContext *s, AVIOContext *pb, AVPacket *pk ...@@ -987,7 +987,7 @@ static int ff_asf_parse_packet(AVFormatContext *s, AVIOContext *pb, AVPacket *pk
asf_st->packet_pos= asf->packet_pos; asf_st->packet_pos= asf->packet_pos;
if (asf_st->pkt.data && asf_st->palette_changed) { if (asf_st->pkt.data && asf_st->palette_changed) {
uint8_t *pal; uint8_t *pal;
pal = av_packet_new_side_data(pkt, AV_PKT_DATA_PALETTE, pal = av_packet_new_side_data(&asf_st->pkt, AV_PKT_DATA_PALETTE,
AVPALETTE_SIZE); AVPALETTE_SIZE);
if (!pal) { if (!pal) {
av_log(s, AV_LOG_ERROR, "Cannot append palette to packet\n"); av_log(s, AV_LOG_ERROR, "Cannot append palette to packet\n");
......
...@@ -102,7 +102,7 @@ static int flac_read_header(AVFormatContext *s, ...@@ -102,7 +102,7 @@ static int flac_read_header(AVFormatContext *s,
uint8_t isrc[13]; uint8_t isrc[13];
uint64_t start; uint64_t start;
const uint8_t *offset; const uint8_t *offset;
int i, j, chapters, track, ti; int i, chapters, track, ti;
if (metadata_size < 431) if (metadata_size < 431)
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
offset = buffer + 395; offset = buffer + 395;
...@@ -119,8 +119,7 @@ static int flac_read_header(AVFormatContext *s, ...@@ -119,8 +119,7 @@ static int flac_read_header(AVFormatContext *s,
offset += 14; offset += 14;
ti = bytestream_get_byte(&offset); ti = bytestream_get_byte(&offset);
if (ti <= 0) return AVERROR_INVALIDDATA; if (ti <= 0) return AVERROR_INVALIDDATA;
for (j = 0; j < ti; j++) offset += ti * 12;
offset += 12;
avpriv_new_chapter(s, track, st->time_base, start, AV_NOPTS_VALUE, isrc); avpriv_new_chapter(s, track, st->time_base, start, AV_NOPTS_VALUE, isrc);
} }
} else { } else {
......
...@@ -66,6 +66,7 @@ enum { ...@@ -66,6 +66,7 @@ enum {
FLV_STREAM_TYPE_VIDEO, FLV_STREAM_TYPE_VIDEO,
FLV_STREAM_TYPE_AUDIO, FLV_STREAM_TYPE_AUDIO,
FLV_STREAM_TYPE_DATA, FLV_STREAM_TYPE_DATA,
FLV_STREAM_TYPE_NB,
}; };
enum { enum {
......
...@@ -37,6 +37,10 @@ ...@@ -37,6 +37,10 @@
typedef struct { typedef struct {
int wrong_dts; ///< wrong dts due to negative cts int wrong_dts; ///< wrong dts due to negative cts
uint8_t *new_extradata[FLV_STREAM_TYPE_NB];
int new_extradata_size[FLV_STREAM_TYPE_NB];
int last_sample_rate;
int last_channels;
} FLVContext; } FLVContext;
static int flv_probe(AVProbeData *p) static int flv_probe(AVProbeData *p)
...@@ -50,8 +54,7 @@ static int flv_probe(AVProbeData *p) ...@@ -50,8 +54,7 @@ static int flv_probe(AVProbeData *p)
return 0; return 0;
} }
static void flv_set_audio_codec(AVFormatContext *s, AVStream *astream, int flv_codecid) { static void flv_set_audio_codec(AVFormatContext *s, AVStream *astream, AVCodecContext *acodec, int flv_codecid) {
AVCodecContext *acodec = astream->codec;
switch(flv_codecid) { switch(flv_codecid) {
//no distinction between S16 and S8 PCM codec flags //no distinction between S16 and S8 PCM codec flags
case FLV_CODECID_PCM: case FLV_CODECID_PCM:
...@@ -411,6 +414,15 @@ static int flv_read_header(AVFormatContext *s, ...@@ -411,6 +414,15 @@ static int flv_read_header(AVFormatContext *s,
return 0; return 0;
} }
static int flv_read_close(AVFormatContext *s)
{
int i;
FLVContext *flv = s->priv_data;
for(i=0; i<FLV_STREAM_TYPE_NB; i++)
av_freep(&flv->new_extradata[i]);
return 0;
}
static int flv_get_extradata(AVFormatContext *s, AVStream *st, int size) static int flv_get_extradata(AVFormatContext *s, AVStream *st, int size)
{ {
av_free(st->codec->extradata); av_free(st->codec->extradata);
...@@ -422,6 +434,18 @@ static int flv_get_extradata(AVFormatContext *s, AVStream *st, int size) ...@@ -422,6 +434,18 @@ static int flv_get_extradata(AVFormatContext *s, AVStream *st, int size)
return 0; return 0;
} }
static int flv_queue_extradata(FLVContext *flv, AVIOContext *pb, int stream,
int size)
{
av_free(flv->new_extradata[stream]);
flv->new_extradata[stream] = av_mallocz(size + FF_INPUT_BUFFER_PADDING_SIZE);
if (!flv->new_extradata[stream])
return AVERROR(ENOMEM);
flv->new_extradata_size[stream] = size;
avio_read(pb, flv->new_extradata[stream], size);
return 0;
}
static int flv_read_packet(AVFormatContext *s, AVPacket *pkt) static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
{ {
FLVContext *flv = s->priv_data; FLVContext *flv = s->priv_data;
...@@ -429,6 +453,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt) ...@@ -429,6 +453,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
int stream_type=-1; int stream_type=-1;
int64_t next, pos; int64_t next, pos;
int64_t dts, pts = AV_NOPTS_VALUE; int64_t dts, pts = AV_NOPTS_VALUE;
int sample_rate, channels;
AVStream *st = NULL; AVStream *st = NULL;
for(;;avio_skip(s->pb, 4)){ /* pkt size is repeated at end. skip it */ for(;;avio_skip(s->pb, 4)){ /* pkt size is repeated at end. skip it */
...@@ -518,13 +543,24 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt) ...@@ -518,13 +543,24 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
} }
if(stream_type == FLV_STREAM_TYPE_AUDIO){ if(stream_type == FLV_STREAM_TYPE_AUDIO){
int bits_per_coded_sample;
channels = (flags & FLV_AUDIO_CHANNEL_MASK) == FLV_STEREO ? 2 : 1;
sample_rate = (44100 << ((flags & FLV_AUDIO_SAMPLERATE_MASK) >> FLV_AUDIO_SAMPLERATE_OFFSET) >> 3);
bits_per_coded_sample = (flags & FLV_AUDIO_SAMPLESIZE_MASK) ? 16 : 8;
if(!st->codec->channels || !st->codec->sample_rate || !st->codec->bits_per_coded_sample) { if(!st->codec->channels || !st->codec->sample_rate || !st->codec->bits_per_coded_sample) {
st->codec->channels = (flags & FLV_AUDIO_CHANNEL_MASK) == FLV_STEREO ? 2 : 1; st->codec->channels = channels;
st->codec->sample_rate = (44100 << ((flags & FLV_AUDIO_SAMPLERATE_MASK) >> FLV_AUDIO_SAMPLERATE_OFFSET) >> 3); st->codec->sample_rate = sample_rate;
st->codec->bits_per_coded_sample = (flags & FLV_AUDIO_SAMPLESIZE_MASK) ? 16 : 8; st->codec->bits_per_coded_sample = bits_per_coded_sample;
} }
if(!st->codec->codec_id){ if(!st->codec->codec_id){
flv_set_audio_codec(s, st, flags & FLV_AUDIO_CODECID_MASK); flv_set_audio_codec(s, st, st->codec, flags & FLV_AUDIO_CODECID_MASK);
flv->last_sample_rate = st->codec->sample_rate;
flv->last_channels = st->codec->channels;
} else {
AVCodecContext ctx;
ctx.sample_rate = sample_rate;
flv_set_audio_codec(s, st, &ctx, flags & FLV_AUDIO_CODECID_MASK);
sample_rate = ctx.sample_rate;
} }
} else if(stream_type == FLV_STREAM_TYPE_VIDEO) { } else if(stream_type == FLV_STREAM_TYPE_VIDEO) {
size -= flv_set_video_codec(s, st, flags & FLV_VIDEO_CODECID_MASK); size -= flv_set_video_codec(s, st, flags & FLV_VIDEO_CODECID_MASK);
...@@ -545,8 +581,13 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt) ...@@ -545,8 +581,13 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
if (flv->wrong_dts) if (flv->wrong_dts)
dts = AV_NOPTS_VALUE; dts = AV_NOPTS_VALUE;
} }
if (type == 0 && (!st->codec->extradata || st->codec->codec_id != CODEC_ID_H264)) {
if (type == 0 && !st->codec->extradata) { if (st->codec->extradata) {
if ((ret = flv_queue_extradata(flv, s->pb, stream_type, size)) < 0)
return ret;
ret = AVERROR(EAGAIN);
goto leave;
}
if ((ret = flv_get_extradata(s, st, size)) < 0) if ((ret = flv_get_extradata(s, st, size)) < 0)
return ret; return ret;
if (st->codec->codec_id == CODEC_ID_AAC) { if (st->codec->codec_id == CODEC_ID_AAC) {
...@@ -583,8 +624,22 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt) ...@@ -583,8 +624,22 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
pkt->dts = dts; pkt->dts = dts;
pkt->pts = pts == AV_NOPTS_VALUE ? dts : pts; pkt->pts = pts == AV_NOPTS_VALUE ? dts : pts;
pkt->stream_index = st->index; pkt->stream_index = st->index;
if(st->codec->codec_id == CODEC_ID_NELLYMOSER) if (flv->new_extradata[stream_type]) {
av_packet_new_side_data(pkt, 'F', 1)[0]= flags; uint8_t *side = av_packet_new_side_data(pkt, AV_PKT_DATA_NEW_EXTRADATA,
flv->new_extradata_size[stream_type]);
if (side) {
memcpy(side, flv->new_extradata[stream_type],
flv->new_extradata_size[stream_type]);
av_freep(&flv->new_extradata[stream_type]);
flv->new_extradata_size[stream_type] = 0;
}
}
if (stream_type == FLV_STREAM_TYPE_AUDIO && (sample_rate != flv->last_sample_rate ||
channels != flv->last_channels)) {
flv->last_sample_rate = sample_rate;
flv->last_channels = channels;
ff_add_param_change(pkt, channels, 0, sample_rate, 0, 0);
}
if ( stream_type == FLV_STREAM_TYPE_AUDIO || if ( stream_type == FLV_STREAM_TYPE_AUDIO ||
((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY) || ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY) ||
...@@ -640,6 +695,7 @@ AVInputFormat ff_flv_demuxer = { ...@@ -640,6 +695,7 @@ AVInputFormat ff_flv_demuxer = {
#if 0 #if 0
.read_seek2 = flv_read_seek2, .read_seek2 = flv_read_seek2,
#endif #endif
.read_close = flv_read_close,
.extensions = "flv", .extensions = "flv",
.value = CODEC_ID_FLV1, .value = CODEC_ID_FLV1,
}; };
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
typedef struct RoqDemuxContext { typedef struct RoqDemuxContext {
int frame_rate;
int width; int width;
int height; int height;
int audio_channels; int audio_channels;
...@@ -71,29 +72,21 @@ static int roq_read_header(AVFormatContext *s, ...@@ -71,29 +72,21 @@ static int roq_read_header(AVFormatContext *s,
{ {
RoqDemuxContext *roq = s->priv_data; RoqDemuxContext *roq = s->priv_data;
AVIOContext *pb = s->pb; AVIOContext *pb = s->pb;
int framerate;
AVStream *st;
unsigned char preamble[RoQ_CHUNK_PREAMBLE_SIZE]; unsigned char preamble[RoQ_CHUNK_PREAMBLE_SIZE];
/* get the main header */ /* get the main header */
if (avio_read(pb, preamble, RoQ_CHUNK_PREAMBLE_SIZE) != if (avio_read(pb, preamble, RoQ_CHUNK_PREAMBLE_SIZE) !=
RoQ_CHUNK_PREAMBLE_SIZE) RoQ_CHUNK_PREAMBLE_SIZE)
return AVERROR(EIO); return AVERROR(EIO);
framerate = AV_RL16(&preamble[6]); roq->frame_rate = AV_RL16(&preamble[6]);
/* init private context parameters */ /* init private context parameters */
roq->width = roq->height = roq->audio_channels = roq->video_pts = roq->width = roq->height = roq->audio_channels = roq->video_pts =
roq->audio_frame_count = 0; roq->audio_frame_count = 0;
roq->audio_stream_index = -1; roq->audio_stream_index = -1;
roq->video_stream_index = -1;
st = avformat_new_stream(s, NULL); s->ctx_flags |= AVFMTCTX_NOHEADER;
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 63, 1, framerate);
roq->video_stream_index = st->index;
st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
st->codec->codec_id = CODEC_ID_ROQ;
st->codec->codec_tag = 0; /* no fourcc */
return 0; return 0;
} }
...@@ -131,8 +124,16 @@ static int roq_read_packet(AVFormatContext *s, ...@@ -131,8 +124,16 @@ static int roq_read_packet(AVFormatContext *s,
switch (chunk_type) { switch (chunk_type) {
case RoQ_INFO: case RoQ_INFO:
if (!roq->width || !roq->height) { if (roq->video_stream_index == -1) {
AVStream *st = s->streams[roq->video_stream_index]; AVStream *st = avformat_new_stream(s, NULL);
if (!st)
return AVERROR(ENOMEM);
avpriv_set_pts_info(st, 63, 1, roq->frame_rate);
roq->video_stream_index = st->index;
st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
st->codec->codec_id = CODEC_ID_ROQ;
st->codec->codec_tag = 0; /* no fourcc */
if (avio_read(pb, preamble, RoQ_CHUNK_PREAMBLE_SIZE) != RoQ_CHUNK_PREAMBLE_SIZE) if (avio_read(pb, preamble, RoQ_CHUNK_PREAMBLE_SIZE) != RoQ_CHUNK_PREAMBLE_SIZE)
return AVERROR(EIO); return AVERROR(EIO);
st->codec->width = roq->width = AV_RL16(preamble); st->codec->width = roq->width = AV_RL16(preamble);
......
...@@ -298,4 +298,12 @@ int64_t ff_gen_search(AVFormatContext *s, int stream_index, ...@@ -298,4 +298,12 @@ int64_t ff_gen_search(AVFormatContext *s, int stream_index,
void avpriv_set_pts_info(AVStream *s, int pts_wrap_bits, void avpriv_set_pts_info(AVStream *s, int pts_wrap_bits,
unsigned int pts_num, unsigned int pts_den); unsigned int pts_num, unsigned int pts_den);
/**
* Add side data to a packet for changing parameters to the given values.
* Parameters set to 0 aren't included in the change.
*/
int ff_add_param_change(AVPacket *pkt, int32_t channels,
uint64_t channel_layout, int32_t sample_rate,
int32_t width, int32_t height);
#endif /* AVFORMAT_INTERNAL_H */ #endif /* AVFORMAT_INTERNAL_H */
...@@ -124,13 +124,16 @@ int ff_network_inited_globally; ...@@ -124,13 +124,16 @@ int ff_network_inited_globally;
int ff_network_init(void) int ff_network_init(void)
{ {
#if HAVE_WINSOCK2_H
WSADATA wsaData;
#endif
if (!ff_network_inited_globally) if (!ff_network_inited_globally)
av_log(NULL, AV_LOG_WARNING, "Using network protocols without global " av_log(NULL, AV_LOG_WARNING, "Using network protocols without global "
"network initialization. Please use " "network initialization. Please use "
"avformat_network_init(), this will " "avformat_network_init(), this will "
"become mandatory later.\n"); "become mandatory later.\n");
#if HAVE_WINSOCK2_H #if HAVE_WINSOCK2_H
WSADATA wsaData;
if (WSAStartup(MAKEWORD(1,1), &wsaData)) if (WSAStartup(MAKEWORD(1,1), &wsaData))
return 0; return 0;
#endif #endif
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "internal.h" #include "internal.h"
#include "libavcodec/internal.h" #include "libavcodec/internal.h"
#include "libavcodec/raw.h" #include "libavcodec/raw.h"
#include "libavcodec/bytestream.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavutil/dict.h" #include "libavutil/dict.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
...@@ -4290,3 +4291,45 @@ int avformat_network_deinit(void) ...@@ -4290,3 +4291,45 @@ int avformat_network_deinit(void)
#endif #endif
return 0; return 0;
} }
int ff_add_param_change(AVPacket *pkt, int32_t channels,
uint64_t channel_layout, int32_t sample_rate,
int32_t width, int32_t height)
{
uint32_t flags = 0;
int size = 4;
uint8_t *data;
if (!pkt)
return AVERROR(EINVAL);
if (channels) {
size += 4;
flags |= AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_COUNT;
}
if (channel_layout) {
size += 8;
flags |= AV_SIDE_DATA_PARAM_CHANGE_CHANNEL_LAYOUT;
}
if (sample_rate) {
size += 4;
flags |= AV_SIDE_DATA_PARAM_CHANGE_SAMPLE_RATE;
}
if (width || height) {
size += 8;
flags |= AV_SIDE_DATA_PARAM_CHANGE_DIMENSIONS;
}
data = av_packet_new_side_data(pkt, AV_PKT_DATA_PARAM_CHANGE, size);
if (!data)
return AVERROR(ENOMEM);
bytestream_put_le32(&data, flags);
if (channels)
bytestream_put_le32(&data, channels);
if (channel_layout)
bytestream_put_le64(&data, channel_layout);
if (sample_rate)
bytestream_put_le32(&data, sample_rate);
if (width || height) {
bytestream_put_le32(&data, width);
bytestream_put_le32(&data, height);
}
return 0;
}
...@@ -33,7 +33,7 @@ typedef union { ...@@ -33,7 +33,7 @@ typedef union {
typedef struct AVAES { typedef struct AVAES {
// Note: round_key[16] is accessed in the init code, but this only // Note: round_key[16] is accessed in the init code, but this only
// overwrites state, which does not matter (see also r7471). // overwrites state, which does not matter (see also commit ba554c0).
av_aes_block round_key[15]; av_aes_block round_key[15];
av_aes_block state[2]; av_aes_block state[2];
int rounds; int rounds;
......
...@@ -530,6 +530,39 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext ...@@ -530,6 +530,39 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
} }
static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
const vector signed int vsint32_8 = vec_splat_s32(8);
const vector unsigned int vuint32_4 = vec_splat_u32(4);
const vector signed char neg1 = vec_splat_s8(-1);
const vector unsigned char permA1 = (vector unsigned char)
{0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,
0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F};
const vector unsigned char permA2 = (vector unsigned char)
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,
0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F};
const vector unsigned char permA1inc = (vector unsigned char)
{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char permA2inc = (vector unsigned char)
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char magic = (vector unsigned char)
{0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char extractPerm = (vector unsigned char)
{0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,
0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01};
const vector unsigned char extractPermInc = (vector unsigned char)
{0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01};
const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);
const vector unsigned char tenRight = (vector unsigned char)
{0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char eightLeft = (vector unsigned char)
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08};
/* /*
this code makes no assumption on src or stride. this code makes no assumption on src or stride.
One could remove the recomputation of the perm One could remove the recomputation of the perm
...@@ -539,11 +572,9 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ...@@ -539,11 +572,9 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
src & stride :-( src & stride :-(
*/ */
uint8_t *srcCopy = src; uint8_t *srcCopy = src;
DECLARE_ALIGNED(16, uint8_t, dt)[16]; DECLARE_ALIGNED(16, uint8_t, dt)[16] = { deringThreshold };
const vector signed int zero = vec_splat_s32(0); const vector signed int zero = vec_splat_s32(0);
vector unsigned char v_dt; vector unsigned char v_dt = vec_splat(vec_ld(0, dt), 0);
dt[0] = deringThreshold;
v_dt = vec_splat(vec_ld(0, dt), 0);
#define LOAD_LINE(i) \ #define LOAD_LINE(i) \
const vector unsigned char perm##i = \ const vector unsigned char perm##i = \
...@@ -565,6 +596,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ...@@ -565,6 +596,11 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
#undef LOAD_LINE #undef LOAD_LINE
vector unsigned char v_avg; vector unsigned char v_avg;
DECLARE_ALIGNED(16, signed int, S)[8];
DECLARE_ALIGNED(16, int, tQP2)[4] = { c->QP/2 + 1 };
vector signed int vQP2 = vec_ld(0, tQP2);
vQP2 = vec_splat(vQP2, 0);
{ {
const vector unsigned char trunc_perm = (vector unsigned char) const vector unsigned char trunc_perm = (vector unsigned char)
{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
...@@ -575,21 +611,22 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ...@@ -575,21 +611,22 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm); const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm);
#define EXTRACT(op) do { \ #define EXTRACT(op) do { \
const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \ const vector unsigned char s_1 = vec_##op(trunc_src12, trunc_src34); \
const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \ const vector unsigned char s_2 = vec_##op(trunc_src56, trunc_src78); \
const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \ const vector unsigned char s_6 = vec_##op(s_1, s_2); \
const vector unsigned char s##op##_8h = vec_mergeh(s##op##_6, s##op##_6); \ const vector unsigned char s_8h = vec_mergeh(s_6, s_6); \
const vector unsigned char s##op##_8l = vec_mergel(s##op##_6, s##op##_6); \ const vector unsigned char s_8l = vec_mergel(s_6, s_6); \
const vector unsigned char s##op##_9 = vec_##op(s##op##_8h, s##op##_8l); \ const vector unsigned char s_9 = vec_##op(s_8h, s_8l); \
const vector unsigned char s##op##_9h = vec_mergeh(s##op##_9, s##op##_9); \ const vector unsigned char s_9h = vec_mergeh(s_9, s_9); \
const vector unsigned char s##op##_9l = vec_mergel(s##op##_9, s##op##_9); \ const vector unsigned char s_9l = vec_mergel(s_9, s_9); \
const vector unsigned char s##op##_10 = vec_##op(s##op##_9h, s##op##_9l); \ const vector unsigned char s_10 = vec_##op(s_9h, s_9l); \
const vector unsigned char s##op##_10h = vec_mergeh(s##op##_10, s##op##_10); \ const vector unsigned char s_10h = vec_mergeh(s_10, s_10); \
const vector unsigned char s##op##_10l = vec_mergel(s##op##_10, s##op##_10); \ const vector unsigned char s_10l = vec_mergel(s_10, s_10); \
const vector unsigned char s##op##_11 = vec_##op(s##op##_10h, s##op##_10l); \ const vector unsigned char s_11 = vec_##op(s_10h, s_10l); \
const vector unsigned char s##op##_11h = vec_mergeh(s##op##_11, s##op##_11); \ const vector unsigned char s_11h = vec_mergeh(s_11, s_11); \
const vector unsigned char s##op##_11l = vec_mergel(s##op##_11, s##op##_11); \ const vector unsigned char s_11l = vec_mergel(s_11, s_11); \
v_##op = vec_##op(s##op##_11h, s##op##_11l); } while (0) v_##op = vec_##op(s_11h, s_11l); \
} while (0)
vector unsigned char v_min; vector unsigned char v_min;
vector unsigned char v_max; vector unsigned char v_max;
...@@ -603,7 +640,6 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ...@@ -603,7 +640,6 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
v_avg = vec_avg(v_min, v_max); v_avg = vec_avg(v_min, v_max);
} }
DECLARE_ALIGNED(16, signed int, S)[8];
{ {
const vector unsigned short mask1 = (vector unsigned short) const vector unsigned short mask1 = (vector unsigned short)
{0x0001, 0x0002, 0x0004, 0x0008, {0x0001, 0x0002, 0x0004, 0x0008,
...@@ -615,22 +651,27 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ...@@ -615,22 +651,27 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4)); const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));
const vector unsigned int vuint32_1 = vec_splat_u32(1); const vector unsigned int vuint32_1 = vec_splat_u32(1);
vector signed int sumA2;
vector signed int sumB2;
vector signed int sum0, sum1, sum2, sum3, sum4;
vector signed int sum5, sum6, sum7, sum8, sum9;
#define COMPARE(i) \ #define COMPARE(i) \
vector signed int sum##i; \
do { \ do { \
const vector unsigned char cmp##i = \ const vector unsigned char cmp = \
(vector unsigned char)vec_cmpgt(src##i, v_avg); \ (vector unsigned char)vec_cmpgt(src##i, v_avg); \
const vector unsigned short cmpHi##i = \ const vector unsigned short cmpHi = \
(vector unsigned short)vec_mergeh(cmp##i, cmp##i); \ (vector unsigned short)vec_mergeh(cmp, cmp); \
const vector unsigned short cmpLi##i = \ const vector unsigned short cmpLi = \
(vector unsigned short)vec_mergel(cmp##i, cmp##i); \ (vector unsigned short)vec_mergel(cmp, cmp); \
const vector signed short cmpHf##i = \ const vector signed short cmpHf = \
(vector signed short)vec_and(cmpHi##i, mask1); \ (vector signed short)vec_and(cmpHi, mask1); \
const vector signed short cmpLf##i = \ const vector signed short cmpLf = \
(vector signed short)vec_and(cmpLi##i, mask2); \ (vector signed short)vec_and(cmpLi, mask2); \
const vector signed int sump##i = vec_sum4s(cmpHf##i, zero); \ const vector signed int sump = vec_sum4s(cmpHf, zero); \
const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i); \ const vector signed int sumq = vec_sum4s(cmpLf, sump); \
sum##i = vec_sums(sumq##i, zero); } while (0) sum##i = vec_sums(sumq, zero); \
} while (0)
COMPARE(0); COMPARE(0);
COMPARE(1); COMPARE(1);
...@@ -644,8 +685,6 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ...@@ -644,8 +685,6 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
COMPARE(9); COMPARE(9);
#undef COMPARE #undef COMPARE
vector signed int sumA2;
vector signed int sumB2;
{ {
const vector signed int sump02 = vec_mergel(sum0, sum2); const vector signed int sump02 = vec_mergel(sum0, sum2);
const vector signed int sump13 = vec_mergel(sum1, sum3); const vector signed int sump13 = vec_mergel(sum1, sum3);
...@@ -699,86 +738,43 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ...@@ -699,86 +738,43 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
/* I'm not sure the following is actually faster /* I'm not sure the following is actually faster
than straight, unvectorized C code :-( */ than straight, unvectorized C code :-( */
DECLARE_ALIGNED(16, int, tQP2)[4]; #define F_INIT() \
tQP2[0]= c->QP/2 + 1; vector unsigned char tenRightM = tenRight; \
vector signed int vQP2 = vec_ld(0, tQP2); vector unsigned char permA1M = permA1; \
vQP2 = vec_splat(vQP2, 0); vector unsigned char permA2M = permA2; \
const vector signed int vsint32_8 = vec_splat_s32(8); vector unsigned char extractPermM = extractPerm
const vector unsigned int vuint32_4 = vec_splat_u32(4);
const vector unsigned char permA1 = (vector unsigned char)
{0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,
0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F};
const vector unsigned char permA2 = (vector unsigned char)
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,
0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F};
const vector unsigned char permA1inc = (vector unsigned char)
{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char permA2inc = (vector unsigned char)
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char magic = (vector unsigned char)
{0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char extractPerm = (vector unsigned char)
{0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,
0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01};
const vector unsigned char extractPermInc = (vector unsigned char)
{0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01};
const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);
const vector unsigned char tenRight = (vector unsigned char)
{0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
const vector unsigned char eightLeft = (vector unsigned char)
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08};
#define F_INIT(i) \
vector unsigned char tenRightM##i = tenRight; \
vector unsigned char permA1M##i = permA1; \
vector unsigned char permA2M##i = permA2; \
vector unsigned char extractPermM##i = extractPerm
#define F2(i, j, k, l) \ #define F2(i, j, k, l) \
if (S[i] & (1 << (l+1))) { \ if (S[i] & (1 << (l+1))) { \
const vector unsigned char a_##j##_A##l = \ const vector unsigned char a_A = vec_perm(src##i, src##j, permA1M); \
vec_perm(src##i, src##j, permA1M##i); \ const vector unsigned char a_B = vec_perm(a_A, src##k, permA2M); \
const vector unsigned char a_##j##_B##l = \ const vector signed int a_sump = \
vec_perm(a_##j##_A##l, src##k, permA2M##i); \ (vector signed int)vec_msum(a_B, magic, (vector unsigned int)zero);\
const vector signed int a_##j##_sump##l = \ vector signed int F = vec_sr(vec_sums(a_sump, vsint32_8), vuint32_4); \
(vector signed int)vec_msum(a_##j##_B##l, magic, \ const vector signed int p = \
(vector unsigned int)zero); \ (vector signed int)vec_perm(src##j, (vector unsigned char)zero, \
vector signed int F_##j##_##l = \ extractPermM); \
vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4); \ const vector signed int sum = vec_add(p, vQP2); \
F_##j##_##l = vec_splat(F_##j##_##l, 3); \ const vector signed int diff = vec_sub(p, vQP2); \
const vector signed int p_##j##_##l = \ vector signed int newpm; \
(vector signed int)vec_perm(src##j, \ vector unsigned char newpm2, mask; \
(vector unsigned char)zero, \ F = vec_splat(F, 3); \
extractPermM##i); \ if (vec_all_lt(sum, F)) \
const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2);\ newpm = sum; \
const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\ else if (vec_all_gt(diff, F)) \
vector signed int newpm_##j##_##l; \ newpm = diff; \
if (vec_all_lt(sum_##j##_##l, F_##j##_##l)) \ else newpm = F; \
newpm_##j##_##l = sum_##j##_##l; \ newpm2 = vec_splat((vector unsigned char)newpm, 15); \
else if (vec_all_gt(diff_##j##_##l, F_##j##_##l)) \ mask = vec_add(identity, tenRightM); \
newpm_##j##_##l = diff_##j##_##l; \ src##j = vec_perm(src##j, newpm2, mask); \
else newpm_##j##_##l = F_##j##_##l; \
const vector unsigned char newpm2_##j##_##l = \
vec_splat((vector unsigned char)newpm_##j##_##l, 15); \
const vector unsigned char mask##j##l = vec_add(identity, \
tenRightM##i); \
src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l); \
} \ } \
permA1M##i = vec_add(permA1M##i, permA1inc); \ permA1M = vec_add(permA1M, permA1inc); \
permA2M##i = vec_add(permA2M##i, permA2inc); \ permA2M = vec_add(permA2M, permA2inc); \
tenRightM##i = vec_sro(tenRightM##i, eightLeft); \ tenRightM = vec_sro(tenRightM, eightLeft); \
extractPermM##i = vec_add(extractPermM##i, extractPermInc) extractPermM = vec_add(extractPermM, extractPermInc)
#define ITER(i, j, k) \ #define ITER(i, j, k) do { \
F_INIT(i); \ F_INIT(); \
F2(i, j, k, 0); \ F2(i, j, k, 0); \
F2(i, j, k, 1); \ F2(i, j, k, 1); \
F2(i, j, k, 2); \ F2(i, j, k, 2); \
...@@ -786,7 +782,8 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ...@@ -786,7 +782,8 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
F2(i, j, k, 4); \ F2(i, j, k, 4); \
F2(i, j, k, 5); \ F2(i, j, k, 5); \
F2(i, j, k, 6); \ F2(i, j, k, 6); \
F2(i, j, k, 7) F2(i, j, k, 7); \
} while (0)
ITER(0, 1, 2); ITER(0, 1, 2);
ITER(1, 2, 3); ITER(1, 2, 3);
...@@ -797,19 +794,18 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ...@@ -797,19 +794,18 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
ITER(6, 7, 8); ITER(6, 7, 8);
ITER(7, 8, 9); ITER(7, 8, 9);
const vector signed char neg1 = vec_splat_s8(-1); #define STORE_LINE(i) do { \
const vector unsigned char permST = \
#define STORE_LINE(i) \
const vector unsigned char permST##i = \
vec_lvsr(i * stride, srcCopy); \ vec_lvsr(i * stride, srcCopy); \
const vector unsigned char maskST##i = \ const vector unsigned char maskST = \
vec_perm((vector unsigned char)zero, \ vec_perm((vector unsigned char)zero, \
(vector unsigned char)neg1, permST##i);\ (vector unsigned char)neg1, permST); \
src##i = vec_perm(src##i ,src##i, permST##i); \ src##i = vec_perm(src##i ,src##i, permST); \
sA##i= vec_sel(sA##i, src##i, maskST##i); \ sA##i= vec_sel(sA##i, src##i, maskST); \
sB##i= vec_sel(src##i, sB##i, maskST##i); \ sB##i= vec_sel(src##i, sB##i, maskST); \
vec_st(sA##i, i * stride, srcCopy); \ vec_st(sA##i, i * stride, srcCopy); \
vec_st(sB##i, i * stride + 16, srcCopy) vec_st(sB##i, i * stride + 16, srcCopy); \
} while (0)
STORE_LINE(1); STORE_LINE(1);
STORE_LINE(2); STORE_LINE(2);
...@@ -832,16 +828,16 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ...@@ -832,16 +828,16 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise) uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
{ {
const vector signed char neg1 = vec_splat_s8(-1);
const vector unsigned char permHH = (const vector unsigned char){0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F};
const vector signed int zero = vec_splat_s32(0); const vector signed int zero = vec_splat_s32(0);
const vector signed short vsint16_1 = vec_splat_s16(1); const vector signed short vsint16_1 = vec_splat_s16(1);
vector signed int v_dp = zero; vector signed int v_dp = zero;
vector signed int v_sysdp = zero; vector signed int v_sysdp = zero;
int d, sysd, i; int d, sysd, i;
tempBlurredPast[127]= maxNoise[0];
tempBlurredPast[128]= maxNoise[1];
tempBlurredPast[129]= maxNoise[2];
#define LOAD_LINE(src, i) \ #define LOAD_LINE(src, i) \
register int j##src##i = i * stride; \ register int j##src##i = i * stride; \
vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \ vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \
...@@ -872,11 +868,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, ...@@ -872,11 +868,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
LOAD_LINE(tempBlurred, 7); LOAD_LINE(tempBlurred, 7);
#undef LOAD_LINE #undef LOAD_LINE
#define ACCUMULATE_DIFFS(i) \ #define ACCUMULATE_DIFFS(i) do { \
vector signed short v_d##i = vec_sub(v_tempBlurredAss##i, \ vector signed short v_d = vec_sub(v_tempBlurredAss##i, \
v_srcAss##i); \ v_srcAss##i); \
v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ v_dp = vec_msums(v_d, v_d, v_dp); \
v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) v_sysdp = vec_msums(v_d, vsint16_1, v_sysdp); \
} while (0)
ACCUMULATE_DIFFS(0); ACCUMULATE_DIFFS(0);
ACCUMULATE_DIFFS(1); ACCUMULATE_DIFFS(1);
...@@ -888,6 +885,10 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, ...@@ -888,6 +885,10 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
ACCUMULATE_DIFFS(7); ACCUMULATE_DIFFS(7);
#undef ACCUMULATE_DIFFS #undef ACCUMULATE_DIFFS
tempBlurredPast[127]= maxNoise[0];
tempBlurredPast[128]= maxNoise[1];
tempBlurredPast[129]= maxNoise[2];
v_dp = vec_sums(v_dp, zero); v_dp = vec_sums(v_dp, zero);
v_sysdp = vec_sums(v_sysdp, zero); v_sysdp = vec_sums(v_sysdp, zero);
...@@ -938,13 +939,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, ...@@ -938,13 +939,12 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
const vector signed short vsint16_4 = vec_splat_s16(4); const vector signed short vsint16_4 = vec_splat_s16(4);
const vector unsigned short vuint16_3 = vec_splat_u16(3); const vector unsigned short vuint16_3 = vec_splat_u16(3);
#define OP(i) \ #define OP(i) do { \
const vector signed short v_temp##i = \ const vector signed short v_temp = \
vec_mladd(v_tempBlurredAss##i, \ vec_mladd(v_tempBlurredAss##i, vsint16_7, v_srcAss##i); \
vsint16_7, v_srcAss##i); \ const vector signed short v_temp2 = vec_add(v_temp, vsint16_4); \
const vector signed short v_temp2##i = \ v_tempBlurredAss##i = vec_sr(v_temp2, vuint16_3); \
vec_add(v_temp##i, vsint16_4); \ } while (0)
v_tempBlurredAss##i = vec_sr(v_temp2##i, vuint16_3)
OP(0); OP(0);
OP(1); OP(1);
...@@ -959,13 +959,13 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, ...@@ -959,13 +959,13 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
const vector signed short vsint16_3 = vec_splat_s16(3); const vector signed short vsint16_3 = vec_splat_s16(3);
const vector signed short vsint16_2 = vec_splat_s16(2); const vector signed short vsint16_2 = vec_splat_s16(2);
#define OP(i) \ #define OP(i) do { \
const vector signed short v_temp##i = \ const vector signed short v_temp = \
vec_mladd(v_tempBlurredAss##i, \ vec_mladd(v_tempBlurredAss##i, vsint16_3, v_srcAss##i); \
vsint16_3, v_srcAss##i); \ const vector signed short v_temp2 = vec_add(v_temp, vsint16_2); \
const vector signed short v_temp2##i = \ v_tempBlurredAss##i = \
vec_add(v_temp##i, vsint16_2); \ vec_sr(v_temp2, (vector unsigned short)vsint16_2); \
v_tempBlurredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) } while (0)
OP(0); OP(0);
OP(1); OP(1);
...@@ -979,27 +979,19 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, ...@@ -979,27 +979,19 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
} }
} }
const vector signed char neg1 = vec_splat_s8(-1); #define PACK_AND_STORE(src, i) do { \
const vector unsigned char permHH = (const vector unsigned char){0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, const vector unsigned char perms = vec_lvsr(i * stride, src); \
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F}; const vector unsigned char vf = \
vec_packsu(v_tempBlurredAss##1, (vector signed short)zero); \
#define PACK_AND_STORE(src, i) \ const vector unsigned char vg = vec_perm(vf, v_##src##A##i, permHH); \
const vector unsigned char perms##src##i = \ const vector unsigned char mask = \
vec_lvsr(i * stride, src); \ vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms); \
const vector unsigned char vf##src##i = \ const vector unsigned char vg2 = vec_perm(vg, vg, perms); \
vec_packsu(v_tempBlurredAss##i, (vector signed short)zero); \ const vector unsigned char svA = vec_sel(v_##src##A1##i, vg2, mask); \
const vector unsigned char vg##src##i = \ const vector unsigned char svB = vec_sel(vg2, v_##src##A2##i, mask); \
vec_perm(vf##src##i, v_##src##A##i, permHH); \ vec_st(svA, i * stride, src); \
const vector unsigned char mask##src##i = \ vec_st(svB, i * stride + 16, src); \
vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \ } while (0)
const vector unsigned char vg2##src##i = \
vec_perm(vg##src##i, vg##src##i, perms##src##i); \
const vector unsigned char svA##src##i = \
vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \
const vector unsigned char svB##src##i = \
vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \
vec_st(svA##src##i, i * stride, src); \
vec_st(svB##src##i, i * stride + 16, src)
PACK_AND_STORE(src, 0); PACK_AND_STORE(src, 0);
PACK_AND_STORE(src, 1); PACK_AND_STORE(src, 1);
...@@ -1127,6 +1119,7 @@ static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, ...@@ -1127,6 +1119,7 @@ static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst,
static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {
const vector unsigned char zero = vec_splat_u8(0); const vector unsigned char zero = vec_splat_u8(0);
const vector signed char neg1 = vec_splat_s8(-1);
#define LOAD_DOUBLE_LINE(i, j) \ #define LOAD_DOUBLE_LINE(i, j) \
vector unsigned char src##i = vec_ld(i * 16, src); \ vector unsigned char src##i = vec_ld(i * 16, src); \
...@@ -1187,26 +1180,28 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds ...@@ -1187,26 +1180,28 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
temp7 = vec_mergel(tempD, tempL); temp7 = vec_mergel(tempD, tempL);
const vector signed char neg1 = vec_splat_s8(-1); #define STORE_DOUBLE_LINE(i, j) do { \
#define STORE_DOUBLE_LINE(i, j) \ vector unsigned char dstAi = vec_ld(i * stride, dst); \
vector unsigned char dstA##i = vec_ld(i * stride, dst); \ vector unsigned char dstBi = vec_ld(i * stride + 16, dst); \
vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \ vector unsigned char dstAj = vec_ld(j * stride, dst); \
vector unsigned char dstA##j = vec_ld(j * stride, dst); \ vector unsigned char dstBj = vec_ld(j * stride+ 16, dst); \
vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \ vector unsigned char aligni = vec_lvsr(i * stride, dst); \
vector unsigned char align##i = vec_lvsr(i * stride, dst); \ vector unsigned char alignj = vec_lvsr(j * stride, dst); \
vector unsigned char align##j = vec_lvsr(j * stride, dst); \ vector unsigned char maski = \
vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \ vec_perm(zero, (vector unsigned char)neg1, aligni); \
vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \ vector unsigned char maskj = \
vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i);\ vec_perm(zero, (vector unsigned char)neg1, alignj); \
vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j);\ vector unsigned char dstRi = vec_perm(temp##i, temp##i, aligni); \
vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \ vector unsigned char dstRj = vec_perm(temp##j, temp##j, alignj); \
vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \ vector unsigned char dstAFi = vec_sel(dstAi, dstRi, maski); \
vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \ vector unsigned char dstBFi = vec_sel(dstRi, dstBi, maski); \
vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \ vector unsigned char dstAFj = vec_sel(dstAj, dstRj, maskj); \
vec_st(dstAF##i, i * stride, dst); \ vector unsigned char dstBFj = vec_sel(dstRj, dstBj, maskj); \
vec_st(dstBF##i, i * stride + 16, dst); \ vec_st(dstAFi, i * stride, dst); \
vec_st(dstAF##j, j * stride, dst); \ vec_st(dstBFi, i * stride + 16, dst); \
vec_st(dstBF##j, j * stride + 16, dst) vec_st(dstAFj, j * stride, dst); \
vec_st(dstBFj, j * stride + 16, dst); \
} while (0)
STORE_DOUBLE_LINE(0,1); STORE_DOUBLE_LINE(0,1);
STORE_DOUBLE_LINE(2,3); STORE_DOUBLE_LINE(2,3);
......
...@@ -3515,9 +3515,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ ...@@ -3515,9 +3515,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
else if(mode & H_DEBLOCK){ else if(mode & H_DEBLOCK){
#if HAVE_ALTIVEC #if HAVE_ALTIVEC
DECLARE_ALIGNED(16, unsigned char, tempBlock)[272]; DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
int t;
transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
const int t=vertClassify_altivec(tempBlock-48, 16, &c); t = vertClassify_altivec(tempBlock-48, 16, &c);
if(t==1) { if(t==1) {
doVertLowPass_altivec(tempBlock-48, 16, &c); doVertLowPass_altivec(tempBlock-48, 16, &c);
transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride); transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment