opencl.c 32 KB
Newer Older
1
/*
2 3 4 5
 * Copyright (C) 2012 Peng  Gao     <peng@multicorewareinc.com>
 * Copyright (C) 2012 Li    Cao     <li@multicorewareinc.com>
 * Copyright (C) 2012 Wei   Gao     <weigao@multicorewareinc.com>
 * Copyright (C) 2013 Lenny Wang    <lwanghpc@gmail.com>
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "opencl.h"
#include "avstring.h"
#include "log.h"
#include "avassert.h"
28
#include "opt.h"
29

30
#if HAVE_THREADS
31 32
#if HAVE_PTHREADS
#include <pthread.h>
33 34 35 36 37 38
#elif HAVE_W32THREADS
#include "compat/w32pthreads.h"
#elif HAVE_OS2THREADS
#include "compat/os2threads.h"
#endif
#include "atomic.h"
39

40
static volatile pthread_mutex_t *atomic_opencl_lock = NULL;
41 42 43
#define LOCK_OPENCL pthread_mutex_lock(atomic_opencl_lock)
#define UNLOCK_OPENCL pthread_mutex_unlock(atomic_opencl_lock)
#else
44 45 46 47 48 49 50 51 52 53 54 55
#define LOCK_OPENCL
#define UNLOCK_OPENCL
#endif

#define MAX_KERNEL_CODE_NUM 200

typedef struct {
    int is_compiled;
    const char *kernel_string;
} KernelCode;

typedef struct {
56 57 58
    const AVClass *class;
    int log_offset;
    void *log_ctx;
59
    int init_count;
60 61 62 63 64 65
    int opt_init_flag;
     /**
     * if set to 1, the OpenCL environment was created by the user and
     * passed as AVOpenCLExternalEnv when initing ,0:created by opencl wrapper.
     */
    int is_user_created;
66 67
    int platform_idx;
    int device_idx;
68 69 70 71 72
    cl_platform_id platform_id;
    cl_device_type device_type;
    cl_context context;
    cl_device_id device_id;
    cl_command_queue command_queue;
73 74
#if FF_API_OLD_OPENCL
    char *build_options;
75 76
    int program_count;
    cl_program programs[MAX_KERNEL_CODE_NUM];
77 78
    int kernel_count;
#endif
79 80
    int kernel_code_count;
    KernelCode kernel_code[MAX_KERNEL_CODE_NUM];
81
    AVOpenCLDeviceList device_list;
82
} OpenclContext;
83

84
#define OFFSET(x) offsetof(OpenclContext, x)
85 86 87 88

static const AVOption opencl_options[] = {
     { "platform_idx",        "set platform index value",  OFFSET(platform_idx),  AV_OPT_TYPE_INT,    {.i64=-1}, -1, INT_MAX},
     { "device_idx",          "set device index value",    OFFSET(device_idx),    AV_OPT_TYPE_INT,    {.i64=-1}, -1, INT_MAX},
89
#if FF_API_OLD_OPENCL
90
     { "build_options",       "build options of opencl",   OFFSET(build_options), AV_OPT_TYPE_STRING, {.str="-I."},  CHAR_MIN, CHAR_MAX},
91
#endif
92
     { NULL }
93 94
};

95 96
static const AVClass openclutils_class = {
    .class_name                = "OPENCLUTILS",
97
    .option                    = opencl_options,
98 99
    .item_name                 = av_default_item_name,
    .version                   = LIBAVUTIL_VERSION_INT,
100 101
    .log_level_offset_offset   = offsetof(OpenclContext, log_offset),
    .parent_log_context_offset = offsetof(OpenclContext, log_ctx),
102 103
};

104
static OpenclContext opencl_ctx = {&openclutils_class};
105

106
static const cl_device_type device_type[] = {CL_DEVICE_TYPE_GPU, CL_DEVICE_TYPE_CPU};
107

108 109 110 111 112 113
typedef struct {
    int err_code;
    const char *err_str;
} OpenclErrorMsg;

static const OpenclErrorMsg opencl_err_msg[] = {
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
    {CL_DEVICE_NOT_FOUND,                               "DEVICE NOT FOUND"},
    {CL_DEVICE_NOT_AVAILABLE,                           "DEVICE NOT AVAILABLE"},
    {CL_COMPILER_NOT_AVAILABLE,                         "COMPILER NOT AVAILABLE"},
    {CL_MEM_OBJECT_ALLOCATION_FAILURE,                  "MEM OBJECT ALLOCATION FAILURE"},
    {CL_OUT_OF_RESOURCES,                               "OUT OF RESOURCES"},
    {CL_OUT_OF_HOST_MEMORY,                             "OUT OF HOST MEMORY"},
    {CL_PROFILING_INFO_NOT_AVAILABLE,                   "PROFILING INFO NOT AVAILABLE"},
    {CL_MEM_COPY_OVERLAP,                               "MEM COPY OVERLAP"},
    {CL_IMAGE_FORMAT_MISMATCH,                          "IMAGE FORMAT MISMATCH"},
    {CL_IMAGE_FORMAT_NOT_SUPPORTED,                     "IMAGE FORMAT NOT_SUPPORTED"},
    {CL_BUILD_PROGRAM_FAILURE,                          "BUILD PROGRAM FAILURE"},
    {CL_MAP_FAILURE,                                    "MAP FAILURE"},
    {CL_MISALIGNED_SUB_BUFFER_OFFSET,                   "MISALIGNED SUB BUFFER OFFSET"},
    {CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST,      "EXEC STATUS ERROR FOR EVENTS IN WAIT LIST"},
    {CL_COMPILE_PROGRAM_FAILURE,                        "COMPILE PROGRAM FAILURE"},
    {CL_LINKER_NOT_AVAILABLE,                           "LINKER NOT AVAILABLE"},
    {CL_LINK_PROGRAM_FAILURE,                           "LINK PROGRAM FAILURE"},
    {CL_DEVICE_PARTITION_FAILED,                        "DEVICE PARTITION FAILED"},
    {CL_KERNEL_ARG_INFO_NOT_AVAILABLE,                  "KERNEL ARG INFO NOT AVAILABLE"},
    {CL_INVALID_VALUE,                                  "INVALID VALUE"},
    {CL_INVALID_DEVICE_TYPE,                            "INVALID DEVICE TYPE"},
    {CL_INVALID_PLATFORM,                               "INVALID PLATFORM"},
    {CL_INVALID_DEVICE,                                 "INVALID DEVICE"},
    {CL_INVALID_CONTEXT,                                "INVALID CONTEXT"},
    {CL_INVALID_QUEUE_PROPERTIES,                       "INVALID QUEUE PROPERTIES"},
    {CL_INVALID_COMMAND_QUEUE,                          "INVALID COMMAND QUEUE"},
    {CL_INVALID_HOST_PTR,                               "INVALID HOST PTR"},
    {CL_INVALID_MEM_OBJECT,                             "INVALID MEM OBJECT"},
    {CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,                "INVALID IMAGE FORMAT DESCRIPTOR"},
    {CL_INVALID_IMAGE_SIZE,                             "INVALID IMAGE SIZE"},
    {CL_INVALID_SAMPLER,                                "INVALID SAMPLER"},
    {CL_INVALID_BINARY,                                 "INVALID BINARY"},
    {CL_INVALID_BUILD_OPTIONS,                          "INVALID BUILD OPTIONS"},
    {CL_INVALID_PROGRAM,                                "INVALID PROGRAM"},
    {CL_INVALID_PROGRAM_EXECUTABLE,                     "INVALID PROGRAM EXECUTABLE"},
    {CL_INVALID_KERNEL_NAME,                            "INVALID KERNEL NAME"},
    {CL_INVALID_KERNEL_DEFINITION,                      "INVALID KERNEL DEFINITION"},
    {CL_INVALID_KERNEL,                                 "INVALID KERNEL"},
    {CL_INVALID_ARG_INDEX,                              "INVALID ARG INDEX"},
    {CL_INVALID_ARG_VALUE,                              "INVALID ARG VALUE"},
    {CL_INVALID_ARG_SIZE,                               "INVALID ARG_SIZE"},
    {CL_INVALID_KERNEL_ARGS,                            "INVALID KERNEL ARGS"},
    {CL_INVALID_WORK_DIMENSION,                         "INVALID WORK DIMENSION"},
    {CL_INVALID_WORK_GROUP_SIZE,                        "INVALID WORK GROUP SIZE"},
    {CL_INVALID_WORK_ITEM_SIZE,                         "INVALID WORK ITEM SIZE"},
    {CL_INVALID_GLOBAL_OFFSET,                          "INVALID GLOBAL OFFSET"},
    {CL_INVALID_EVENT_WAIT_LIST,                        "INVALID EVENT WAIT LIST"},
    {CL_INVALID_EVENT,                                  "INVALID EVENT"},
    {CL_INVALID_OPERATION,                              "INVALID OPERATION"},
    {CL_INVALID_GL_OBJECT,                              "INVALID GL OBJECT"},
    {CL_INVALID_BUFFER_SIZE,                            "INVALID BUFFER SIZE"},
    {CL_INVALID_MIP_LEVEL,                              "INVALID MIP LEVEL"},
    {CL_INVALID_GLOBAL_WORK_SIZE,                       "INVALID GLOBAL WORK SIZE"},
    {CL_INVALID_PROPERTY,                               "INVALID PROPERTY"},
    {CL_INVALID_IMAGE_DESCRIPTOR,                       "INVALID IMAGE DESCRIPTOR"},
    {CL_INVALID_COMPILER_OPTIONS,                       "INVALID COMPILER OPTIONS"},
    {CL_INVALID_LINKER_OPTIONS,                         "INVALID LINKER OPTIONS"},
    {CL_INVALID_DEVICE_PARTITION_COUNT,                 "INVALID DEVICE PARTITION COUNT"},
172 173
};

174
const char *av_opencl_errstr(cl_int status)
175 176
{
    int i;
177
    for (i = 0; i < FF_ARRAY_ELEMS(opencl_err_msg); i++) {
178 179 180 181 182 183
        if (opencl_err_msg[i].err_code == status)
            return opencl_err_msg[i].err_str;
    }
    return "unknown error";
}

184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
static void free_device_list(AVOpenCLDeviceList *device_list)
{
    int i, j;
    if (!device_list)
        return;
    for (i = 0; i < device_list->platform_num; i++) {
        if (!device_list->platform_node[i])
            continue;
        for (j = 0; j < device_list->platform_node[i]->device_num; j++) {
            av_freep(&(device_list->platform_node[i]->device_node[j]));
        }
        av_freep(&device_list->platform_node[i]->device_node);
        av_freep(&device_list->platform_node[i]);
    }
    av_freep(&device_list->platform_node);
    device_list->platform_num = 0;
}

static int get_device_list(AVOpenCLDeviceList *device_list)
{
    cl_int status;
205
    int i, j, k, device_num, total_devices_num, ret = 0;
206 207 208 209 210 211
    int *devices_num;
    cl_platform_id *platform_ids = NULL;
    cl_device_id *device_ids = NULL;
    AVOpenCLDeviceNode *device_node = NULL;
    status = clGetPlatformIDs(0, NULL, &device_list->platform_num);
    if (status != CL_SUCCESS) {
212
        av_log(&opencl_ctx, AV_LOG_ERROR,
213
               "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status));
214 215 216 217 218 219 220
        return AVERROR_EXTERNAL;
    }
    platform_ids = av_mallocz(device_list->platform_num * sizeof(cl_platform_id));
    if (!platform_ids)
        return AVERROR(ENOMEM);
    status = clGetPlatformIDs(device_list->platform_num, platform_ids, NULL);
    if (status != CL_SUCCESS) {
221
        av_log(&opencl_ctx, AV_LOG_ERROR,
222
                "Could not get OpenCL platform ids: %s\n", av_opencl_errstr(status));
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
        ret = AVERROR_EXTERNAL;
        goto end;
    }
    device_list->platform_node = av_mallocz(device_list->platform_num * sizeof(AVOpenCLPlatformNode *));
    if (!device_list->platform_node) {
        ret = AVERROR(ENOMEM);
        goto end;
    }
    devices_num = av_mallocz(sizeof(int) * FF_ARRAY_ELEMS(device_type));
    if (!devices_num) {
        ret = AVERROR(ENOMEM);
        goto end;
    }
    for (i = 0; i < device_list->platform_num; i++) {
        device_list->platform_node[i] = av_mallocz(sizeof(AVOpenCLPlatformNode));
        if (!device_list->platform_node[i]) {
            ret = AVERROR(ENOMEM);
            goto end;
        }
        device_list->platform_node[i]->platform_id = platform_ids[i];
        status = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR,
                                   sizeof(device_list->platform_node[i]->platform_name),
                                   device_list->platform_node[i]->platform_name, NULL);
        total_devices_num = 0;
        for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) {
            status = clGetDeviceIDs(device_list->platform_node[i]->platform_id,
                                    device_type[j], 0, NULL, &devices_num[j]);
            total_devices_num += devices_num[j];
        }
        device_list->platform_node[i]->device_node = av_mallocz(total_devices_num * sizeof(AVOpenCLDeviceNode *));
        if (!device_list->platform_node[i]->device_node) {
            ret = AVERROR(ENOMEM);
            goto end;
        }
        for (j = 0; j < FF_ARRAY_ELEMS(device_type); j++) {
            if (devices_num[j]) {
                device_ids = av_mallocz(devices_num[j] * sizeof(cl_device_id));
                if (!device_ids) {
                    ret = AVERROR(ENOMEM);
                    goto end;
                }
                status = clGetDeviceIDs(device_list->platform_node[i]->platform_id, device_type[j],
                                        devices_num[j], device_ids, NULL);
                if (status != CL_SUCCESS) {
267
                    av_log(&opencl_ctx, AV_LOG_WARNING,
268
                            "Could not get device ID: %s:\n", av_opencl_errstr(status));
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
                    av_freep(&device_ids);
                    continue;
                }
                for (k = 0; k < devices_num[j]; k++) {
                    device_num = device_list->platform_node[i]->device_num;
                    device_list->platform_node[i]->device_node[device_num] = av_mallocz(sizeof(AVOpenCLDeviceNode));
                    if (!device_list->platform_node[i]->device_node[device_num]) {
                        ret = AVERROR(ENOMEM);
                        goto end;
                    }
                    device_node = device_list->platform_node[i]->device_node[device_num];
                    device_node->device_id = device_ids[k];
                    device_node->device_type = device_type[j];
                    status = clGetDeviceInfo(device_node->device_id, CL_DEVICE_NAME,
                                             sizeof(device_node->device_name), device_node->device_name,
                                             NULL);
                    if (status != CL_SUCCESS) {
286
                        av_log(&opencl_ctx, AV_LOG_WARNING,
287
                                "Could not get device name: %s\n", av_opencl_errstr(status));
288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
                        continue;
                    }
                    device_list->platform_node[i]->device_num++;
                }
                av_freep(&device_ids);
            }
        }
    }
end:
    av_freep(&platform_ids);
    av_freep(&devices_num);
    av_freep(&device_ids);
    if (ret < 0)
        free_device_list(device_list);
    return ret;
}

int av_opencl_get_device_list(AVOpenCLDeviceList **device_list)
{
    int ret = 0;
    *device_list = av_mallocz(sizeof(AVOpenCLDeviceList));
    if (!(*device_list)) {
310
        av_log(&opencl_ctx, AV_LOG_ERROR, "Could not allocate opencl device list\n");
311 312 313 314
        return AVERROR(ENOMEM);
    }
    ret = get_device_list(*device_list);
    if (ret < 0) {
315
        av_log(&opencl_ctx, AV_LOG_ERROR, "Could not get device list from environment\n");
316 317 318 319 320 321 322 323 324 325 326 327 328
        free_device_list(*device_list);
        av_freep(device_list);
        return ret;
    }
    return ret;
}

void av_opencl_free_device_list(AVOpenCLDeviceList **device_list)
{
    free_device_list(*device_list);
    av_freep(device_list);
}

329
static inline int init_opencl_mtx(void)
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
{
#if HAVE_THREADS
    if (!atomic_opencl_lock) {
        int err;
        pthread_mutex_t *tmp = av_malloc(sizeof(pthread_mutex_t));
        if (!tmp)
            return AVERROR(ENOMEM);
        if ((err = pthread_mutex_init(tmp, NULL))) {
            av_free(tmp);
            return AVERROR(err);
        }
        if (avpriv_atomic_ptr_cas(&atomic_opencl_lock, NULL, tmp)) {
            pthread_mutex_destroy(tmp);
            av_free(tmp);
        }
    }
#endif
    return 0;
}

350 351
int av_opencl_set_option(const char *key, const char *val)
{
352 353 354
    int ret = init_opencl_mtx( );
    if (ret < 0)
        return ret;
355
    LOCK_OPENCL;
356 357 358
    if (!opencl_ctx.opt_init_flag) {
        av_opt_set_defaults(&opencl_ctx);
        opencl_ctx.opt_init_flag = 1;
359
    }
360
    ret = av_opt_set(&opencl_ctx, key, val, 0);
361
    UNLOCK_OPENCL;
362 363 364 365 366 367
    return ret;
}

int av_opencl_get_option(const char *key, uint8_t **out_val)
{
    int ret = 0;
368
    LOCK_OPENCL;
369
    ret = av_opt_get(&opencl_ctx, key, 0, out_val);
370
    UNLOCK_OPENCL;
371 372 373 374 375 376
    return ret;
}

void av_opencl_free_option(void)
{
    /*FIXME: free openclutils context*/
377
    LOCK_OPENCL;
378
    av_opt_free(&opencl_ctx);
379
    UNLOCK_OPENCL;
380 381
}

382 383 384 385
AVOpenCLExternalEnv *av_opencl_alloc_external_env(void)
{
    AVOpenCLExternalEnv *ext = av_mallocz(sizeof(AVOpenCLExternalEnv));
    if (!ext) {
386
        av_log(&opencl_ctx, AV_LOG_ERROR,
387
               "Could not malloc external opencl environment data space\n");
388 389 390 391 392 393 394 395 396 397 398
    }
    return ext;
}

void av_opencl_free_external_env(AVOpenCLExternalEnv **ext_opencl_env)
{
    av_freep(ext_opencl_env);
}

int av_opencl_register_kernel_code(const char *kernel_code)
{
399 400 401
    int i, ret = init_opencl_mtx( );
    if (ret < 0)
        return ret;
402
    LOCK_OPENCL;
403 404
    if (opencl_ctx.kernel_code_count >= MAX_KERNEL_CODE_NUM) {
        av_log(&opencl_ctx, AV_LOG_ERROR,
405 406
               "Could not register kernel code, maximum number of registered kernel code %d already reached\n",
               MAX_KERNEL_CODE_NUM);
407 408 409
        ret = AVERROR(EINVAL);
        goto end;
    }
410 411 412
    for (i = 0; i < opencl_ctx.kernel_code_count; i++) {
        if (opencl_ctx.kernel_code[i].kernel_string == kernel_code) {
            av_log(&opencl_ctx, AV_LOG_WARNING, "Same kernel code has been registered\n");
413 414 415
            goto end;
        }
    }
416 417 418
    opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].kernel_string = kernel_code;
    opencl_ctx.kernel_code[opencl_ctx.kernel_code_count].is_compiled = 0;
    opencl_ctx.kernel_code_count++;
419 420 421 422 423
end:
    UNLOCK_OPENCL;
    return ret;
}

424
cl_program av_opencl_compile(const char *program_name, const char *build_opts)
425
{
426
    int i;
427
    cl_int status;
428 429 430 431 432 433
    int kernel_code_idx = 0;
    const char *kernel_source;
    size_t kernel_code_len;
    char* ptr = NULL;
    cl_program program = NULL;

434
    LOCK_OPENCL;
435 436 437 438 439 440 441 442 443 444 445 446 447
    for (i = 0; i < opencl_ctx.kernel_code_count; i++) {
        // identify a program using a unique name within the kernel source
        ptr = av_stristr(opencl_ctx.kernel_code[i].kernel_string, program_name);
        if (ptr && !opencl_ctx.kernel_code[i].is_compiled) {
            kernel_source = opencl_ctx.kernel_code[i].kernel_string;
            kernel_code_len = strlen(opencl_ctx.kernel_code[i].kernel_string);
            kernel_code_idx = i;
            break;
        }
    }
    if (!kernel_source) {
        av_log(&opencl_ctx, AV_LOG_ERROR,
               "Unable to find OpenCL kernel source '%s'\n", program_name);
448 449
        goto end;
    }
450 451 452 453 454 455 456 457

    /* create a CL program from kernel source */
    program = clCreateProgramWithSource(opencl_ctx.context, 1, &kernel_source, &kernel_code_len, &status);
    if(status != CL_SUCCESS) {
        av_log(&opencl_ctx, AV_LOG_ERROR,
               "Unable to create OpenCL program '%s': %s\n", program_name, av_opencl_errstr(status));
        program = NULL;
        goto end;
458
    }
459 460 461 462 463 464 465 466 467
    status = clBuildProgram(program, 1, &(opencl_ctx.device_id), build_opts, NULL, NULL);
    if (status != CL_SUCCESS) {
        av_log(&opencl_ctx, AV_LOG_ERROR,
               "Compilation failed with OpenCL program: %s\n", program_name);
        program = NULL;
        goto end;
    }

    opencl_ctx.kernel_code[kernel_code_idx].is_compiled = 1;
468 469
end:
    UNLOCK_OPENCL;
470 471 472 473 474 475 476 477 478 479 480 481 482
    return program;
}

cl_command_queue av_opencl_get_command_queue(void)
{
    return opencl_ctx.command_queue;
}

#if FF_API_OLD_OPENCL
int av_opencl_create_kernel(AVOpenCLKernelEnv *env, const char *kernel_name)
{
    av_log(&opencl_ctx, AV_LOG_ERROR, "Could not create OpenCL kernel %s, please update libavfilter.\n", kernel_name);
    return AVERROR(EINVAL);
483 484 485 486
}

void av_opencl_release_kernel(AVOpenCLKernelEnv *env)
{
487
    av_log(&opencl_ctx, AV_LOG_ERROR, "Could not release OpenCL kernel, please update libavfilter.\n");
488
}
489
#endif
490

491
static int init_opencl_env(OpenclContext *opencl_ctx, AVOpenCLExternalEnv *ext_opencl_env)
492 493 494
{
    cl_int status;
    cl_context_properties cps[3];
495 496
    int i, ret = 0;
    AVOpenCLDeviceNode *device_node = NULL;
497

498
    if (ext_opencl_env) {
499
        if (opencl_ctx->is_user_created)
500
            return 0;
501 502 503 504 505 506
        opencl_ctx->platform_id     = ext_opencl_env->platform_id;
        opencl_ctx->is_user_created = 1;
        opencl_ctx->command_queue   = ext_opencl_env->command_queue;
        opencl_ctx->context         = ext_opencl_env->context;
        opencl_ctx->device_id       = ext_opencl_env->device_id;
        opencl_ctx->device_type     = ext_opencl_env->device_type;
507
    } else {
508 509 510
        if (!opencl_ctx->is_user_created) {
            if (!opencl_ctx->device_list.platform_num) {
                ret = get_device_list(&opencl_ctx->device_list);
511 512 513
                if (ret < 0) {
                    return ret;
                }
514
            }
515 516 517
            if (opencl_ctx->platform_idx >= 0) {
                if (opencl_ctx->device_list.platform_num < opencl_ctx->platform_idx + 1) {
                    av_log(opencl_ctx, AV_LOG_ERROR, "User set platform index not exist\n");
518 519
                    return AVERROR(EINVAL);
                }
520 521 522
                if (!opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num) {
                    av_log(opencl_ctx, AV_LOG_ERROR, "No devices in user specific platform with index %d\n",
                           opencl_ctx->platform_idx);
523
                    return AVERROR(EINVAL);
524
                }
525
                opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_id;
526 527
            } else {
                /* get a usable platform by default*/
528 529 530 531
                for (i = 0; i < opencl_ctx->device_list.platform_num; i++) {
                    if (opencl_ctx->device_list.platform_node[i]->device_num) {
                        opencl_ctx->platform_id = opencl_ctx->device_list.platform_node[i]->platform_id;
                        opencl_ctx->platform_idx = i;
532
                        break;
533 534 535
                    }
                }
            }
536 537
            if (!opencl_ctx->platform_id) {
                av_log(opencl_ctx, AV_LOG_ERROR, "Could not get OpenCL platforms\n");
538
                return AVERROR_EXTERNAL;
539
            }
540
            /* get a usable device*/
541 542 543 544
            if (opencl_ctx->device_idx >= 0) {
                if (opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_num < opencl_ctx->device_idx + 1) {
                    av_log(opencl_ctx, AV_LOG_ERROR,
                           "Could not get OpenCL device idx %d in the user set platform\n", opencl_ctx->platform_idx);
545
                    return AVERROR(EINVAL);
546
                }
547
            } else {
548
                opencl_ctx->device_idx = 0;
549 550
            }

551 552 553
            device_node = opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->device_node[opencl_ctx->device_idx];
            opencl_ctx->device_id = device_node->device_id;
            opencl_ctx->device_type = device_node->device_type;
554

555 556 557
            /*
             * Use available platform.
             */
558
            av_log(opencl_ctx, AV_LOG_VERBOSE, "Platform Name: %s, Device Name: %s\n",
559
                   opencl_ctx->device_list.platform_node[opencl_ctx->platform_idx]->platform_name,
560
                   device_node->device_name);
561
            cps[0] = CL_CONTEXT_PLATFORM;
562
            cps[1] = (cl_context_properties)opencl_ctx->platform_id;
563
            cps[2] = 0;
564 565

            opencl_ctx->context = clCreateContextFromType(cps, opencl_ctx->device_type,
566
                                                       NULL, NULL, &status);
567
            if (status != CL_SUCCESS) {
568
                av_log(opencl_ctx, AV_LOG_ERROR,
569
                       "Could not get OpenCL context from device type: %s\n", av_opencl_errstr(status));
570
                return AVERROR_EXTERNAL;
571
            }
572
            opencl_ctx->command_queue = clCreateCommandQueue(opencl_ctx->context, opencl_ctx->device_id,
573 574
                                                          0, &status);
            if (status != CL_SUCCESS) {
575
                av_log(opencl_ctx, AV_LOG_ERROR,
576
                       "Could not create OpenCL command queue: %s\n", av_opencl_errstr(status));
577
                return AVERROR_EXTERNAL;
578 579 580 581 582 583
            }
        }
    }
    return ret;
}

584
int av_opencl_init(AVOpenCLExternalEnv *ext_opencl_env)
585
{
586 587 588
    int ret = init_opencl_mtx( );
    if (ret < 0)
        return ret;
589
    LOCK_OPENCL;
590 591 592 593
    if (!opencl_ctx.init_count) {
        if (!opencl_ctx.opt_init_flag) {
            av_opt_set_defaults(&opencl_ctx);
            opencl_ctx.opt_init_flag = 1;
594
        }
595
        ret = init_opencl_env(&opencl_ctx, ext_opencl_env);
596 597
        if (ret < 0)
            goto end;
598 599 600 601 602 603
        if (opencl_ctx.kernel_code_count <= 0) {
            av_log(&opencl_ctx, AV_LOG_ERROR,
                   "No kernel code is registered, compile kernel file failed\n");
            ret = AVERROR(EINVAL);
            goto end;
        }
604
    }
605
    opencl_ctx.init_count++;
606
end:
607
    UNLOCK_OPENCL;
608 609 610 611 612 613
    return ret;
}

void av_opencl_uninit(void)
{
    cl_int status;
614
    LOCK_OPENCL;
615 616
    opencl_ctx.init_count--;
    if (opencl_ctx.is_user_created)
617
        goto end;
618
    if (opencl_ctx.init_count > 0)
619
        goto end;
620 621
    if (opencl_ctx.command_queue) {
        status = clReleaseCommandQueue(opencl_ctx.command_queue);
622
        if (status != CL_SUCCESS) {
623
            av_log(&opencl_ctx, AV_LOG_ERROR,
624
                   "Could not release OpenCL command queue: %s\n", av_opencl_errstr(status));
625
        }
626
        opencl_ctx.command_queue = NULL;
627
    }
628 629
    if (opencl_ctx.context) {
        status = clReleaseContext(opencl_ctx.context);
630
        if (status != CL_SUCCESS) {
631
            av_log(&opencl_ctx, AV_LOG_ERROR,
632
                   "Could not release OpenCL context: %s\n", av_opencl_errstr(status));
633
        }
634
        opencl_ctx.context = NULL;
635
    }
636
    free_device_list(&opencl_ctx.device_list);
637
end:
638
    if (opencl_ctx.init_count <= 0)
639
        av_opt_free(&opencl_ctx); //FIXME: free openclutils context
640
    UNLOCK_OPENCL;
641 642 643 644 645
}

int av_opencl_buffer_create(cl_mem *cl_buf, size_t cl_buf_size, int flags, void *host_ptr)
{
    cl_int status;
646
    *cl_buf = clCreateBuffer(opencl_ctx.context, flags, cl_buf_size, host_ptr, &status);
647
    if (status != CL_SUCCESS) {
648
        av_log(&opencl_ctx, AV_LOG_ERROR, "Could not create OpenCL buffer: %s\n", av_opencl_errstr(status));
649 650 651 652 653 654 655 656 657 658 659 660
        return AVERROR_EXTERNAL;
    }
    return 0;
}

void av_opencl_buffer_release(cl_mem *cl_buf)
{
    cl_int status = 0;
    if (!cl_buf)
        return;
    status = clReleaseMemObject(*cl_buf);
    if (status != CL_SUCCESS) {
661
        av_log(&opencl_ctx, AV_LOG_ERROR,
662
               "Could not release OpenCL buffer: %s\n", av_opencl_errstr(status));
663 664 665 666 667 668 669
    }
    memset(cl_buf, 0, sizeof(*cl_buf));
}

int av_opencl_buffer_write(cl_mem dst_cl_buf, uint8_t *src_buf, size_t buf_size)
{
    cl_int status;
670 671
    void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf,
                                      CL_TRUE, CL_MAP_WRITE, 0, sizeof(uint8_t) * buf_size,
672 673 674
                                      0, NULL, NULL, &status);

    if (status != CL_SUCCESS) {
675
        av_log(&opencl_ctx, AV_LOG_ERROR,
676
               "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
677 678 679 680
        return AVERROR_EXTERNAL;
    }
    memcpy(mapped, src_buf, buf_size);

681
    status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
682
    if (status != CL_SUCCESS) {
683
        av_log(&opencl_ctx, AV_LOG_ERROR,
684
               "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
685 686 687 688 689 690 691 692
        return AVERROR_EXTERNAL;
    }
    return 0;
}

int av_opencl_buffer_read(uint8_t *dst_buf, cl_mem src_cl_buf, size_t buf_size)
{
    cl_int status;
693 694
    void *mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf,
                                      CL_TRUE, CL_MAP_READ, 0, buf_size,
695 696 697
                                      0, NULL, NULL, &status);

    if (status != CL_SUCCESS) {
698
        av_log(&opencl_ctx, AV_LOG_ERROR,
699
               "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
700 701 702 703
        return AVERROR_EXTERNAL;
    }
    memcpy(dst_buf, mapped, buf_size);

704
    status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL);
705
    if (status != CL_SUCCESS) {
706
        av_log(&opencl_ctx, AV_LOG_ERROR,
707
               "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
708 709 710 711 712 713
        return AVERROR_EXTERNAL;
    }
    return 0;
}

int av_opencl_buffer_write_image(cl_mem dst_cl_buf, size_t cl_buffer_size, int dst_cl_offset,
714
                                 uint8_t **src_data, int *plane_size, int plane_num)
715 716 717 718 719 720 721 722 723 724 725 726
{
    int i, buffer_size = 0;
    uint8_t *temp;
    cl_int status;
    void *mapped;
    if ((unsigned int)plane_num > 8) {
        return AVERROR(EINVAL);
    }
    for (i = 0;i < plane_num;i++) {
        buffer_size += plane_size[i];
    }
    if (buffer_size > cl_buffer_size) {
727
        av_log(&opencl_ctx, AV_LOG_ERROR,
728
               "Cannot write image to OpenCL buffer: buffer too small\n");
729 730
        return AVERROR(EINVAL);
    }
731 732
    mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, dst_cl_buf,
                                CL_TRUE, CL_MAP_WRITE, 0, buffer_size + dst_cl_offset,
733
                                0, NULL, NULL, &status);
734
    if (status != CL_SUCCESS) {
735
        av_log(&opencl_ctx, AV_LOG_ERROR,
736
               "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
737 738 739 740 741 742 743 744
        return AVERROR_EXTERNAL;
    }
    temp = mapped;
    temp += dst_cl_offset;
    for (i = 0; i < plane_num; i++) {
        memcpy(temp, src_data[i], plane_size[i]);
        temp += plane_size[i];
    }
745
    status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, dst_cl_buf, mapped, 0, NULL, NULL);
746
    if (status != CL_SUCCESS) {
747
        av_log(&opencl_ctx, AV_LOG_ERROR,
748
               "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
749 750 751 752 753 754
        return AVERROR_EXTERNAL;
    }
    return 0;
}

int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num,
755
                                cl_mem src_cl_buf, size_t cl_buffer_size)
756 757 758 759 760 761 762 763
{
    int i,buffer_size = 0,ret = 0;
    uint8_t *temp;
    void *mapped;
    cl_int status;
    if ((unsigned int)plane_num > 8) {
        return AVERROR(EINVAL);
    }
764
    for (i = 0; i < plane_num; i++) {
765 766 767
        buffer_size += plane_size[i];
    }
    if (buffer_size > cl_buffer_size) {
768
        av_log(&opencl_ctx, AV_LOG_ERROR,
769
               "Cannot write image to CPU buffer: OpenCL buffer too small\n");
770 771
        return AVERROR(EINVAL);
    }
772 773
    mapped = clEnqueueMapBuffer(opencl_ctx.command_queue, src_cl_buf,
                                CL_TRUE, CL_MAP_READ, 0, buffer_size,
774
                                0, NULL, NULL, &status);
775 776

    if (status != CL_SUCCESS) {
777
        av_log(&opencl_ctx, AV_LOG_ERROR,
778
               "Could not map OpenCL buffer: %s\n", av_opencl_errstr(status));
779 780 781 782
        return AVERROR_EXTERNAL;
    }
    temp = mapped;
    if (ret >= 0) {
783
        for (i = 0; i < plane_num; i++) {
784 785 786 787
            memcpy(dst_data[i], temp, plane_size[i]);
            temp += plane_size[i];
        }
    }
788
    status = clEnqueueUnmapMemObject(opencl_ctx.command_queue, src_cl_buf, mapped, 0, NULL, NULL);
789
    if (status != CL_SUCCESS) {
790
        av_log(&opencl_ctx, AV_LOG_ERROR,
791
               "Could not unmap OpenCL buffer: %s\n", av_opencl_errstr(status));
792 793 794 795
        return AVERROR_EXTERNAL;
    }
    return 0;
}
796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837

int64_t av_opencl_benchmark(AVOpenCLDeviceNode *device_node, cl_platform_id platform,
                            int64_t (*benchmark)(AVOpenCLExternalEnv *ext_opencl_env))
{
    int64_t ret = 0;
    cl_int status;
    cl_context_properties cps[3];
    AVOpenCLExternalEnv *ext_opencl_env = NULL;

    ext_opencl_env = av_opencl_alloc_external_env();
    ext_opencl_env->device_id = device_node->device_id;
    ext_opencl_env->device_type = device_node->device_type;
    av_log(&opencl_ctx, AV_LOG_VERBOSE, "Performing test on OpenCL device %s\n",
           device_node->device_name);

    cps[0] = CL_CONTEXT_PLATFORM;
    cps[1] = (cl_context_properties)platform;
    cps[2] = 0;
    ext_opencl_env->context = clCreateContextFromType(cps, ext_opencl_env->device_type,
                                                      NULL, NULL, &status);
    if (status != CL_SUCCESS || !ext_opencl_env->context) {
        ret = AVERROR_EXTERNAL;
        goto end;
    }
    ext_opencl_env->command_queue = clCreateCommandQueue(ext_opencl_env->context,
                                                         ext_opencl_env->device_id, 0, &status);
    if (status != CL_SUCCESS || !ext_opencl_env->command_queue) {
        ret = AVERROR_EXTERNAL;
        goto end;
    }
    ret = benchmark(ext_opencl_env);
    if (ret < 0)
        av_log(&opencl_ctx, AV_LOG_ERROR, "Benchmark failed with OpenCL device %s\n",
               device_node->device_name);
end:
    if (ext_opencl_env->command_queue)
        clReleaseCommandQueue(ext_opencl_env->command_queue);
    if (ext_opencl_env->context)
        clReleaseContext(ext_opencl_env->context);
    av_opencl_free_external_env(&ext_opencl_env);
    return ret;
}