Commit 27237d52 authored by Steven Walters's avatar Steven Walters Committed by Janne Grunau

w32threads: support for frame multithreading

Replace our incomplete w32threads implementation with x264's pthreads
w32threads wrapper.
Relicensed to LGPL with kind permission by Pegasys Inc.
Signed-off-by: 's avatarJanne Grunau <janne-libav@jannau.net>
parent b4452298
...@@ -645,7 +645,7 @@ OBJS-$(CONFIG_TEXT2MOVSUB_BSF) += movsub_bsf.o ...@@ -645,7 +645,7 @@ OBJS-$(CONFIG_TEXT2MOVSUB_BSF) += movsub_bsf.o
# thread libraries # thread libraries
OBJS-$(HAVE_PTHREADS) += pthread.o OBJS-$(HAVE_PTHREADS) += pthread.o
OBJS-$(HAVE_W32THREADS) += w32thread.o OBJS-$(HAVE_W32THREADS) += pthread.o
OBJS-$(CONFIG_MLIB) += mlib/dsputil_mlib.o \ OBJS-$(CONFIG_MLIB) += mlib/dsputil_mlib.o \
......
...@@ -688,7 +688,7 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t ...@@ -688,7 +688,7 @@ static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t
assert(IS_INTER(mb_type)); assert(IS_INTER(mb_type));
if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME)) if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
await_references(h); await_references(h);
prefetch_motion(h, 0, pixel_shift, chroma444); prefetch_motion(h, 0, pixel_shift, chroma444);
......
...@@ -147,7 +147,7 @@ static void await_reference_mb_row(H264Context * const h, Picture *ref, int mb_y ...@@ -147,7 +147,7 @@ static void await_reference_mb_row(H264Context * const h, Picture *ref, int mb_y
int ref_field_picture = ref->field_picture; int ref_field_picture = ref->field_picture;
int ref_height = 16*h->s.mb_height >> ref_field_picture; int ref_height = 16*h->s.mb_height >> ref_field_picture;
if(!HAVE_PTHREADS || !(h->s.avctx->active_thread_type&FF_THREAD_FRAME)) if(!HAVE_THREADS || !(h->s.avctx->active_thread_type&FF_THREAD_FRAME))
return; return;
//FIXME it can be safe to access mb stuff //FIXME it can be safe to access mb stuff
......
...@@ -1596,7 +1596,7 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size) ...@@ -1596,7 +1596,7 @@ static int mpeg_field_start(MpegEncContext *s, const uint8_t *buf, int buf_size)
*s->current_picture_ptr->f.pan_scan = s1->pan_scan; *s->current_picture_ptr->f.pan_scan = s1->pan_scan;
if (HAVE_PTHREADS && (avctx->active_thread_type & FF_THREAD_FRAME)) if (HAVE_THREADS && (avctx->active_thread_type & FF_THREAD_FRAME))
ff_thread_finish_setup(avctx); ff_thread_finish_setup(avctx);
} else { // second field } else { // second field
int i; int i;
......
...@@ -2119,7 +2119,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], ...@@ -2119,7 +2119,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
/* decoding or more than one mb_type (MC was already done otherwise) */ /* decoding or more than one mb_type (MC was already done otherwise) */
if(!s->encoding){ if(!s->encoding){
if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) { if(HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) {
if (s->mv_dir & MV_DIR_FORWARD) { if (s->mv_dir & MV_DIR_FORWARD) {
ff_thread_await_progress((AVFrame*)s->last_picture_ptr, MPV_lowest_referenced_row(s, 0), 0); ff_thread_await_progress((AVFrame*)s->last_picture_ptr, MPV_lowest_referenced_row(s, 0), 0);
} }
......
...@@ -29,11 +29,16 @@ ...@@ -29,11 +29,16 @@
* @see doc/multithreading.txt * @see doc/multithreading.txt
*/ */
#include <pthread.h> #include "config.h"
#include "avcodec.h" #include "avcodec.h"
#include "thread.h" #include "thread.h"
#if HAVE_PTHREADS
#include <pthread.h>
#elif HAVE_W32THREADS
#include "w32pthreads.h"
#endif
typedef int (action_func)(AVCodecContext *c, void *arg); typedef int (action_func)(AVCodecContext *c, void *arg);
typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr); typedef int (action_func2)(AVCodecContext *c, void *arg, int jobnr, int threadnr);
...@@ -898,6 +903,10 @@ int ff_thread_init(AVCodecContext *avctx) ...@@ -898,6 +903,10 @@ int ff_thread_init(AVCodecContext *avctx)
return -1; return -1;
} }
#if HAVE_W32THREADS
w32thread_init();
#endif
if (avctx->codec) { if (avctx->codec) {
validate_thread_parameters(avctx); validate_thread_parameters(avctx);
......
...@@ -723,7 +723,7 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi ...@@ -723,7 +723,7 @@ int attribute_align_arg avcodec_decode_video2(AVCodecContext *avctx, AVFrame *pi
avctx->pkt = avpkt; avctx->pkt = avpkt;
if((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type&FF_THREAD_FRAME)){ if((avctx->codec->capabilities & CODEC_CAP_DELAY) || avpkt->size || (avctx->active_thread_type&FF_THREAD_FRAME)){
if (HAVE_PTHREADS && avctx->active_thread_type&FF_THREAD_FRAME) if (HAVE_THREADS && avctx->active_thread_type&FF_THREAD_FRAME)
ret = ff_thread_decode_frame(avctx, picture, got_picture_ptr, ret = ff_thread_decode_frame(avctx, picture, got_picture_ptr,
avpkt); avpkt);
else { else {
...@@ -1089,7 +1089,7 @@ const char *avcodec_license(void) ...@@ -1089,7 +1089,7 @@ const char *avcodec_license(void)
void avcodec_flush_buffers(AVCodecContext *avctx) void avcodec_flush_buffers(AVCodecContext *avctx)
{ {
if(HAVE_PTHREADS && avctx->active_thread_type&FF_THREAD_FRAME) if(HAVE_THREADS && avctx->active_thread_type&FF_THREAD_FRAME)
ff_thread_flush(avctx); ff_thread_flush(avctx);
else if(avctx->codec->flush) else if(avctx->codec->flush)
avctx->codec->flush(avctx); avctx->codec->flush(avctx);
...@@ -1277,7 +1277,7 @@ unsigned int ff_toupper4(unsigned int x) ...@@ -1277,7 +1277,7 @@ unsigned int ff_toupper4(unsigned int x)
+ (toupper((x>>24)&0xFF)<<24); + (toupper((x>>24)&0xFF)<<24);
} }
#if !HAVE_PTHREADS #if !HAVE_THREADS
int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f) int ff_thread_get_buffer(AVCodecContext *avctx, AVFrame *f)
{ {
......
...@@ -1316,7 +1316,7 @@ static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y) ...@@ -1316,7 +1316,7 @@ static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y)
int h, cy; int h, cy;
int offset[4]; int offset[4];
if (HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) { if (HAVE_THREADS && s->avctx->active_thread_type&FF_THREAD_FRAME) {
int y_flipped = s->flipped_image ? s->avctx->height-y : y; int y_flipped = s->flipped_image ? s->avctx->height-y : y;
// At the end of the frame, report INT_MAX instead of the height of the frame. // At the end of the frame, report INT_MAX instead of the height of the frame.
...@@ -1400,7 +1400,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) ...@@ -1400,7 +1400,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
int fragment_width = s->fragment_width[!!plane]; int fragment_width = s->fragment_width[!!plane];
int fragment_height = s->fragment_height[!!plane]; int fragment_height = s->fragment_height[!!plane];
int fragment_start = s->fragment_start[plane]; int fragment_start = s->fragment_start[plane];
int do_await = !plane && HAVE_PTHREADS && (s->avctx->active_thread_type&FF_THREAD_FRAME); int do_await = !plane && HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_FRAME);
if (!s->flipped_image) stride = -stride; if (!s->flipped_image) stride = -stride;
if (CONFIG_GRAY && plane && (s->avctx->flags & CODEC_FLAG_GRAY)) if (CONFIG_GRAY && plane && (s->avctx->flags & CODEC_FLAG_GRAY))
...@@ -1965,7 +1965,7 @@ static int vp3_decode_frame(AVCodecContext *avctx, ...@@ -1965,7 +1965,7 @@ static int vp3_decode_frame(AVCodecContext *avctx,
*data_size=sizeof(AVFrame); *data_size=sizeof(AVFrame);
*(AVFrame*)data= s->current_frame; *(AVFrame*)data= s->current_frame;
if (!HAVE_PTHREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME)) if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME))
update_frames(avctx); update_frames(avctx);
return buf_size; return buf_size;
...@@ -1973,7 +1973,7 @@ static int vp3_decode_frame(AVCodecContext *avctx, ...@@ -1973,7 +1973,7 @@ static int vp3_decode_frame(AVCodecContext *avctx,
error: error:
ff_thread_report_progress(&s->current_frame, INT_MAX, 0); ff_thread_report_progress(&s->current_frame, INT_MAX, 0);
if (!HAVE_PTHREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME)) if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_FRAME))
avctx->release_buffer(avctx, &s->current_frame); avctx->release_buffer(avctx, &s->current_frame);
return -1; return -1;
......
/*
* Copyright (C) 2010-2011 x264 project
*
* Authors: Steven Walters <kemuri9@gmail.com>
* Pegasys Inc. <http://www.pegasys-inc.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* w32threads to pthreads wrapper
*/
#ifndef AVCODEC_W32PTHREADS_H
#define AVCODEC_W32PTHREADS_H
/* Build up a pthread-like API using underlying Windows API. Have only static
* methods so as to not conflict with a potentially linked in pthread-win32
* library.
* As most functions here are used without checking return values,
* only implement return values as necessary. */
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <process.h>
typedef struct {
void *handle;
void *(*func)(void* arg);
void *arg;
void *ret;
} pthread_t;
/* the conditional variable api for windows 6.0+ uses critical sections and
* not mutexes */
typedef CRITICAL_SECTION pthread_mutex_t;
/* This is the CONDITIONAL_VARIABLE typedef for using Window's native
* conditional variables on kernels 6.0+.
* MinGW does not currently have this typedef. */
typedef struct {
void *ptr;
} pthread_cond_t;
/* function pointers to conditional variable API on windows 6.0+ kernels */
static void (WINAPI *cond_broadcast)(pthread_cond_t *cond);
static void (WINAPI *cond_init)(pthread_cond_t *cond);
static void (WINAPI *cond_signal)(pthread_cond_t *cond);
static BOOL (WINAPI *cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex,
DWORD milliseconds);
static unsigned __stdcall attribute_align_arg win32thread_worker(void *arg)
{
pthread_t *h = arg;
h->ret = h->func(h->arg);
return 0;
}
static int pthread_create(pthread_t *thread, const void *unused_attr,
void *(*start_routine)(void*), void *arg)
{
thread->func = start_routine;
thread->arg = arg;
thread->handle = (void*)_beginthreadex(NULL, 0, win32thread_worker, thread,
0, NULL);
return !thread->handle;
}
static void pthread_join(pthread_t thread, void **value_ptr)
{
DWORD ret = WaitForSingleObject(thread.handle, INFINITE);
if (ret != WAIT_OBJECT_0)
return;
if (value_ptr)
*value_ptr = thread.ret;
CloseHandle(thread.handle);
}
#define pthread_mutex_init(m, a) InitializeCriticalSection(m)
#define pthread_mutex_destroy(m) DeleteCriticalSection(m)
#define pthread_mutex_lock(m) EnterCriticalSection(m)
#define pthread_mutex_unlock(m) LeaveCriticalSection(m)
/* for pre-Windows 6.0 platforms we need to define and use our own condition
* variable and api */
typedef struct {
pthread_mutex_t mtx_waiter_count;
volatile int waiter_count;
HANDLE semaphore;
} win32_cond_t;
static void pthread_cond_init(pthread_cond_t *cond, const void *unused_attr)
{
win32_cond_t *win32_cond = NULL;
if (cond_init) {
cond_init(cond);
return;
}
/* non native condition variables */
win32_cond = av_mallocz(sizeof(win32_cond_t));
if (!win32_cond)
return;
cond->ptr = win32_cond;
win32_cond->semaphore = CreateSemaphore(NULL, 0, 0x7fffffff, NULL);
if (!win32_cond->semaphore)
return;
pthread_mutex_init(&win32_cond->mtx_waiter_count, NULL);
}
static void pthread_cond_destroy(pthread_cond_t *cond)
{
win32_cond_t *win32_cond = cond->ptr;
/* native condition variables do not destroy */
if (cond_init)
return;
/* non native condition variables */
CloseHandle(win32_cond->semaphore);
pthread_mutex_destroy(&win32_cond->mtx_waiter_count);
av_freep(&win32_cond);
cond->ptr = NULL;
}
static void pthread_cond_broadcast(pthread_cond_t *cond)
{
win32_cond_t *win32_cond = cond->ptr;
if (cond_broadcast) {
cond_broadcast(cond);
return;
}
/* non native condition variables */
pthread_mutex_lock(&win32_cond->mtx_waiter_count);
if (win32_cond->waiter_count) {
ReleaseSemaphore(win32_cond->semaphore, win32_cond->waiter_count, NULL);
win32_cond->waiter_count = 0;
}
pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
}
static void pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
{
win32_cond_t *win32_cond = cond->ptr;
if (cond_wait) {
cond_wait(cond, mutex, INFINITE);
return;
}
/* non native condition variables */
pthread_mutex_lock(&win32_cond->mtx_waiter_count);
win32_cond->waiter_count++;
pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
pthread_mutex_unlock(mutex);
WaitForSingleObject(win32_cond->semaphore, INFINITE);
pthread_mutex_lock(mutex);
}
static void pthread_cond_signal(pthread_cond_t *cond)
{
win32_cond_t *win32_cond = cond->ptr;
if (cond_signal) {
cond_signal(cond);
return;
}
/* non-native condition variables */
pthread_mutex_lock(&win32_cond->mtx_waiter_count);
if (win32_cond->waiter_count) {
ReleaseSemaphore(win32_cond->semaphore, 1, NULL);
win32_cond->waiter_count--;
}
pthread_mutex_unlock(&win32_cond->mtx_waiter_count);
}
static void w32thread_init(void)
{
HANDLE kernel_dll = GetModuleHandle(TEXT("kernel32.dll"));
/* if one is available, then they should all be available */
cond_init =
(void*)GetProcAddress(kernel_dll, "InitializeConditionVariable");
cond_broadcast =
(void*)GetProcAddress(kernel_dll, "WakeAllConditionVariable");
cond_signal =
(void*)GetProcAddress(kernel_dll, "WakeConditionVariable");
cond_wait =
(void*)GetProcAddress(kernel_dll, "SleepConditionVariableCS");
}
#endif /* AVCODEC_W32PTHREADS_H */
/*
* Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
//#define DEBUG
#include "avcodec.h"
#include "thread.h"
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <process.h>
typedef struct ThreadContext{
AVCodecContext *avctx;
HANDLE thread;
HANDLE work_sem;
HANDLE job_sem;
HANDLE done_sem;
int (*func)(AVCodecContext *c, void *arg);
int (*func2)(AVCodecContext *c, void *arg, int, int);
void *arg;
int argsize;
int *jobnr;
int *ret;
int threadnr;
}ThreadContext;
static unsigned WINAPI attribute_align_arg thread_func(void *v){
ThreadContext *c= v;
for(;;){
int ret, jobnr;
//printf("thread_func %X enter wait\n", (int)v); fflush(stdout);
WaitForSingleObject(c->work_sem, INFINITE);
// avoid trying to access jobnr if we should quit
if (!c->func && !c->func2)
break;
WaitForSingleObject(c->job_sem, INFINITE);
jobnr = (*c->jobnr)++;
ReleaseSemaphore(c->job_sem, 1, 0);
//printf("thread_func %X after wait (func=%X)\n", (int)v, (int)c->func); fflush(stdout);
if(c->func)
ret= c->func(c->avctx, (uint8_t *)c->arg + jobnr*c->argsize);
else
ret= c->func2(c->avctx, c->arg, jobnr, c->threadnr);
if (c->ret)
c->ret[jobnr] = ret;
//printf("thread_func %X signal complete\n", (int)v); fflush(stdout);
ReleaseSemaphore(c->done_sem, 1, 0);
}
return 0;
}
/**
* Free what has been allocated by ff_thread_init().
* Must be called after decoding has finished, especially do not call while avcodec_thread_execute() is running.
*/
void ff_thread_free(AVCodecContext *s){
ThreadContext *c= s->thread_opaque;
int i;
for(i=0; i<s->thread_count; i++){
c[i].func= NULL;
c[i].func2= NULL;
}
ReleaseSemaphore(c[0].work_sem, s->thread_count, 0);
for(i=0; i<s->thread_count; i++){
WaitForSingleObject(c[i].thread, INFINITE);
if(c[i].thread) CloseHandle(c[i].thread);
}
if(c[0].work_sem) CloseHandle(c[0].work_sem);
if(c[0].job_sem) CloseHandle(c[0].job_sem);
if(c[0].done_sem) CloseHandle(c[0].done_sem);
av_freep(&s->thread_opaque);
}
static int avcodec_thread_execute(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size){
ThreadContext *c= s->thread_opaque;
int i;
int jobnr = 0;
assert(s == c->avctx);
/* note, we can be certain that this is not called with the same AVCodecContext by different threads at the same time */
for(i=0; i<s->thread_count; i++){
c[i].arg= arg;
c[i].argsize= size;
c[i].func= func;
c[i].ret= ret;
c[i].jobnr = &jobnr;
}
ReleaseSemaphore(c[0].work_sem, count, 0);
for(i=0; i<count; i++)
WaitForSingleObject(c[0].done_sem, INFINITE);
return 0;
}
static int avcodec_thread_execute2(AVCodecContext *s, int (*func)(AVCodecContext *c2, void *arg2, int, int),void *arg, int *ret, int count){
ThreadContext *c= s->thread_opaque;
int i;
for(i=0; i<s->thread_count; i++)
c[i].func2 = func;
avcodec_thread_execute(s, NULL, arg, ret, count, 0);
}
int ff_thread_init(AVCodecContext *s){
int i;
ThreadContext *c;
uint32_t threadid;
if (s->thread_type && !(s->thread_type & FF_THREAD_SLICE)) {
av_log(s, AV_LOG_WARNING,
"This thread library only supports FF_THREAD_SLICE"
" threading algorithm.\n");
return 0;
}
s->active_thread_type= FF_THREAD_SLICE;
if (s->thread_count <= 1)
return 0;
assert(!s->thread_opaque);
c= av_mallocz(sizeof(ThreadContext)*s->thread_count);
s->thread_opaque= c;
if(!(c[0].work_sem = CreateSemaphore(NULL, 0, INT_MAX, NULL)))
goto fail;
if(!(c[0].job_sem = CreateSemaphore(NULL, 1, 1, NULL)))
goto fail;
if(!(c[0].done_sem = CreateSemaphore(NULL, 0, INT_MAX, NULL)))
goto fail;
for(i=0; i<s->thread_count; i++){
//printf("init semaphors %d\n", i); fflush(stdout);
c[i].avctx= s;
c[i].work_sem = c[0].work_sem;
c[i].job_sem = c[0].job_sem;
c[i].done_sem = c[0].done_sem;
c[i].threadnr = i;
//printf("create thread %d\n", i); fflush(stdout);
c[i].thread = (HANDLE)_beginthreadex(NULL, 0, thread_func, &c[i], 0, &threadid );
if( !c[i].thread ) goto fail;
}
//printf("init done\n"); fflush(stdout);
s->execute= avcodec_thread_execute;
s->execute2= avcodec_thread_execute2;
return 0;
fail:
ff_thread_free(s);
return -1;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment