Commit 6fca61bb authored by Lynne's avatar Lynne

lavfi: add Vulkan filtering framework

This commit adds a Vulkan filtering infrastructure for libavfilter.
It attempts to abstract as much as possible of the Vulkan API from filters.

The way the hwcontext and the framework are designed permits for parallel,
non-CPU-blocking filtering throughout, with the exception of up/downloading
and mapping.
parent 88d2ccbe
......@@ -236,6 +236,7 @@ External library support:
--enable-libfontconfig enable libfontconfig, useful for drawtext filter [no]
--enable-libfreetype enable libfreetype, needed for drawtext filter [no]
--enable-libfribidi enable libfribidi, improves drawtext filter [no]
--enable-libglslang enable GLSL->SPIRV compilation via libglslang [no]
--enable-libgme enable Game Music Emu via libgme [no]
--enable-libgsm enable GSM de/encoding via libgsm [no]
--enable-libiec61883 enable iec61883 via libiec61883 [no]
......@@ -1550,11 +1551,11 @@ require_cc(){
}
require_cpp(){
name="$1"
headers="$2"
classes="$3"
shift 3
check_lib_cpp "$headers" "$classes" "$@" || die "ERROR: $name not found"
log require_cpp "$@"
name_version="$1"
name="${1%% *}"
shift
check_lib_cpp "$name" "$@" || die "ERROR: $name_version not found"
}
require_headers(){
......@@ -1771,6 +1772,7 @@ EXTERNAL_LIBRARY_LIST="
libfontconfig
libfreetype
libfribidi
libglslang
libgme
libgsm
libiec61883
......@@ -6261,6 +6263,7 @@ enabled fontconfig && enable libfontconfig
enabled libfontconfig && require_pkg_config libfontconfig fontconfig "fontconfig/fontconfig.h" FcInit
enabled libfreetype && require_pkg_config libfreetype freetype2 "ft2build.h FT_FREETYPE_H" FT_Init_FreeType
enabled libfribidi && require_pkg_config libfribidi fribidi fribidi.h fribidi_version_info
enabled libglslang && require_cpp libglslang glslang/SPIRV/GlslangToSpv.h "glslang::TIntermediate*" -lglslang -lOSDependent -lHLSL -lOGLCompiler -lSPVRemapper -lSPIRV -lSPIRV-Tools -lSPIRV-Tools-opt -lpthread -lstdc++
enabled libgme && { check_pkg_config libgme libgme gme/gme.h gme_new_emu ||
require libgme gme/gme.h gme_new_emu -lgme -lstdc++; }
enabled libgsm && { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do
......
......@@ -510,6 +510,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h
SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h
OBJS-$(CONFIG_LIBGLSLANG) += glslang.o
TOOLS = graph2dot
TESTPROGS = drawutils filtfmts formats integral
......
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <pthread.h>
extern "C" {
#include "libavutil/mem.h"
#include "libavutil/avassert.h"
}
#include <glslang/Include/ResourceLimits.h>
#include <glslang/Include/revision.h>
#include <glslang/Public/ShaderLang.h>
#include <glslang/SPIRV/GlslangToSpv.h>
#include "glslang.h"
using namespace glslang;
static pthread_mutex_t glslang_mutex = PTHREAD_MUTEX_INITIALIZER;
static int glslang_refcount = 0;
/* We require Vulkan 1.1 */
#define GLSL_VERSION EShTargetVulkan_1_1
/* Vulkan 1.1 implementations require SPIR-V 1.3 to be implemented */
#define SPIRV_VERSION EShTargetSpv_1_3
// Taken from glslang's examples, which apparently generally bases the choices
// on OpenGL specification limits
static const TBuiltInResource DefaultTBuiltInResource = {
/* .MaxLights = */ 32,
/* .MaxClipPlanes = */ 6,
/* .MaxTextureUnits = */ 32,
/* .MaxTextureCoords = */ 32,
/* .MaxVertexAttribs = */ 64,
/* .MaxVertexUniformComponents = */ 4096,
/* .MaxVaryingFloats = */ 64,
/* .MaxVertexTextureImageUnits = */ 32,
/* .MaxCombinedTextureImageUnits = */ 80,
/* .MaxTextureImageUnits = */ 32,
/* .MaxFragmentUniformComponents = */ 4096,
/* .MaxDrawBuffers = */ 32,
/* .MaxVertexUniformVectors = */ 128,
/* .MaxVaryingVectors = */ 8,
/* .MaxFragmentUniformVectors = */ 16,
/* .MaxVertexOutputVectors = */ 16,
/* .MaxFragmentInputVectors = */ 15,
/* .MinProgramTexelOffset = */ -8,
/* .MaxProgramTexelOffset = */ 7,
/* .MaxClipDistances = */ 8,
/* .MaxComputeWorkGroupCountX = */ 65535,
/* .MaxComputeWorkGroupCountY = */ 65535,
/* .MaxComputeWorkGroupCountZ = */ 65535,
/* .MaxComputeWorkGroupSizeX = */ 1024,
/* .MaxComputeWorkGroupSizeY = */ 1024,
/* .MaxComputeWorkGroupSizeZ = */ 64,
/* .MaxComputeUniformComponents = */ 1024,
/* .MaxComputeTextureImageUnits = */ 16,
/* .MaxComputeImageUniforms = */ 8,
/* .MaxComputeAtomicCounters = */ 8,
/* .MaxComputeAtomicCounterBuffers = */ 1,
/* .MaxVaryingComponents = */ 60,
/* .MaxVertexOutputComponents = */ 64,
/* .MaxGeometryInputComponents = */ 64,
/* .MaxGeometryOutputComponents = */ 128,
/* .MaxFragmentInputComponents = */ 128,
/* .MaxImageUnits = */ 8,
/* .MaxCombinedImageUnitsAndFragmentOutputs = */ 8,
/* .MaxCombinedShaderOutputResources = */ 8,
/* .MaxImageSamples = */ 0,
/* .MaxVertexImageUniforms = */ 0,
/* .MaxTessControlImageUniforms = */ 0,
/* .MaxTessEvaluationImageUniforms = */ 0,
/* .MaxGeometryImageUniforms = */ 0,
/* .MaxFragmentImageUniforms = */ 8,
/* .MaxCombinedImageUniforms = */ 8,
/* .MaxGeometryTextureImageUnits = */ 16,
/* .MaxGeometryOutputVertices = */ 256,
/* .MaxGeometryTotalOutputComponents = */ 1024,
/* .MaxGeometryUniformComponents = */ 1024,
/* .MaxGeometryVaryingComponents = */ 64,
/* .MaxTessControlInputComponents = */ 128,
/* .MaxTessControlOutputComponents = */ 128,
/* .MaxTessControlTextureImageUnits = */ 16,
/* .MaxTessControlUniformComponents = */ 1024,
/* .MaxTessControlTotalOutputComponents = */ 4096,
/* .MaxTessEvaluationInputComponents = */ 128,
/* .MaxTessEvaluationOutputComponents = */ 128,
/* .MaxTessEvaluationTextureImageUnits = */ 16,
/* .MaxTessEvaluationUniformComponents = */ 1024,
/* .MaxTessPatchComponents = */ 120,
/* .MaxPatchVertices = */ 32,
/* .MaxTessGenLevel = */ 64,
/* .MaxViewports = */ 16,
/* .MaxVertexAtomicCounters = */ 0,
/* .MaxTessControlAtomicCounters = */ 0,
/* .MaxTessEvaluationAtomicCounters = */ 0,
/* .MaxGeometryAtomicCounters = */ 0,
/* .MaxFragmentAtomicCounters = */ 8,
/* .MaxCombinedAtomicCounters = */ 8,
/* .MaxAtomicCounterBindings = */ 1,
/* .MaxVertexAtomicCounterBuffers = */ 0,
/* .MaxTessControlAtomicCounterBuffers = */ 0,
/* .MaxTessEvaluationAtomicCounterBuffers = */ 0,
/* .MaxGeometryAtomicCounterBuffers = */ 0,
/* .MaxFragmentAtomicCounterBuffers = */ 1,
/* .MaxCombinedAtomicCounterBuffers = */ 1,
/* .MaxAtomicCounterBufferSize = */ 16384,
/* .MaxTransformFeedbackBuffers = */ 4,
/* .MaxTransformFeedbackInterleavedComponents = */ 64,
/* .MaxCullDistances = */ 8,
/* .MaxCombinedClipAndCullDistances = */ 8,
/* .MaxSamples = */ 4,
/* .maxMeshOutputVerticesNV = */ 256,
/* .maxMeshOutputPrimitivesNV = */ 512,
/* .maxMeshWorkGroupSizeX_NV = */ 32,
/* .maxMeshWorkGroupSizeY_NV = */ 1,
/* .maxMeshWorkGroupSizeZ_NV = */ 1,
/* .maxTaskWorkGroupSizeX_NV = */ 32,
/* .maxTaskWorkGroupSizeY_NV = */ 1,
/* .maxTaskWorkGroupSizeZ_NV = */ 1,
/* .maxMeshViewCountNV = */ 4,
.limits = {
/* .nonInductiveForLoops = */ 1,
/* .whileLoops = */ 1,
/* .doWhileLoops = */ 1,
/* .generalUniformIndexing = */ 1,
/* .generalAttributeMatrixVectorIndexing = */ 1,
/* .generalVaryingIndexing = */ 1,
/* .generalSamplerIndexing = */ 1,
/* .generalVariableIndexing = */ 1,
/* .generalConstantMatrixVectorIndexing = */ 1,
}
};
GLSlangResult *glslang_compile(const char *glsl, enum GLSlangStage stage)
{
GLSlangResult *res = (GLSlangResult *)av_mallocz(sizeof(*res));
if (!res)
return NULL;
static const EShLanguage lang[] = {
[GLSLANG_VERTEX] = EShLangVertex,
[GLSLANG_FRAGMENT] = EShLangFragment,
[GLSLANG_COMPUTE] = EShLangCompute,
};
assert(glslang_refcount);
TShader *shader = new TShader(lang[stage]);
if (!shader) {
res->rval = AVERROR(ENOMEM);
return res;
}
shader->setEnvClient(EShClientVulkan, GLSL_VERSION);
shader->setEnvTarget(EShTargetSpv, SPIRV_VERSION);
shader->setStrings(&glsl, 1);
if (!shader->parse(&DefaultTBuiltInResource, GLSL_VERSION, true, EShMsgDefault)) {
res->error_msg = av_strdup(shader->getInfoLog());
res->rval = AVERROR_EXTERNAL;
delete shader;
return res;
}
TProgram *prog = new TProgram();
if (!prog) {
res->rval = AVERROR(ENOMEM);
delete shader;
return res;
}
prog->addShader(shader);
if (!prog->link(EShMsgDefault)) {
res->error_msg = av_strdup(prog->getInfoLog());
res->rval = AVERROR_EXTERNAL;
delete shader;
delete prog;
return res;
}
std::vector<unsigned int> spirv; /* Result */
SpvOptions options; /* Options - by default all optimizations are off */
options.generateDebugInfo = false; /* Makes sense for files but not here */
options.disassemble = false; /* Will print disassembly on compilation */
options.validate = false; /* Validates the generated SPIRV, unneeded */
options.disableOptimizer = false; /* For debugging */
options.optimizeSize = true; /* Its faster */
GlslangToSpv(*prog->getIntermediate(lang[stage]), spirv, NULL, &options);
res->size = spirv.size()*sizeof(unsigned int);
res->data = av_memdup(spirv.data(), res->size);
if (!res->data) {
res->rval = AVERROR(ENOMEM);
delete shader;
delete prog;
return res;
}
delete shader;
delete prog;
return res;
}
int glslang_init(void)
{
int ret = 0;
pthread_mutex_lock(&glslang_mutex);
if (glslang_refcount++ == 0)
ret = !InitializeProcess();
pthread_mutex_unlock(&glslang_mutex);
return ret;
}
void glslang_uninit(void)
{
pthread_mutex_lock(&glslang_mutex);
av_assert0(glslang_refcount > 0);
if (--glslang_refcount == 0)
FinalizeProcess();
pthread_mutex_unlock(&glslang_mutex);
}
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFILTER_GLSLANG_H
#define AVFILTER_GLSLANG_H
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
int glslang_init(void);
void glslang_uninit(void);
typedef struct GLSlangResult {
int rval;
char *error_msg;
void *data; /* Shader data or NULL */
size_t size;
} GLSlangResult;
enum GLSlangStage {
GLSLANG_VERTEX,
GLSLANG_FRAGMENT,
GLSLANG_COMPUTE,
};
/* Compile GLSL into a SPIRV stream, if possible */
GLSlangResult *glslang_compile(const char *glsl, enum GLSlangStage stage);
#ifdef __cplusplus
}
#endif
#endif /* AVFILTER_GLSLANG_H */
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "formats.h"
#include "vulkan.h"
#include "glslang.h"
/* Generic macro for creating contexts which need to keep their addresses
* if another context is created. */
#define FN_CREATING(ctx, type, shortname, array, num) \
static av_always_inline type *create_ ##shortname(ctx *dctx) \
{ \
type **array, *sctx = av_mallocz(sizeof(*sctx)); \
if (!sctx) \
return NULL; \
\
array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
if (!array) { \
av_free(sctx); \
return NULL; \
} \
\
dctx->array = array; \
dctx->array[dctx->num++] = sctx; \
\
return sctx; \
}
const VkComponentMapping ff_comp_identity_map = {
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
};
/* Converts return values to strings */
const char *ff_vk_ret2str(VkResult res)
{
#define CASE(VAL) case VAL: return #VAL
switch (res) {
CASE(VK_SUCCESS);
CASE(VK_NOT_READY);
CASE(VK_TIMEOUT);
CASE(VK_EVENT_SET);
CASE(VK_EVENT_RESET);
CASE(VK_INCOMPLETE);
CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
CASE(VK_ERROR_INITIALIZATION_FAILED);
CASE(VK_ERROR_DEVICE_LOST);
CASE(VK_ERROR_MEMORY_MAP_FAILED);
CASE(VK_ERROR_LAYER_NOT_PRESENT);
CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
CASE(VK_ERROR_FEATURE_NOT_PRESENT);
CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
CASE(VK_ERROR_TOO_MANY_OBJECTS);
CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
CASE(VK_ERROR_FRAGMENTED_POOL);
CASE(VK_ERROR_SURFACE_LOST_KHR);
CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
CASE(VK_SUBOPTIMAL_KHR);
CASE(VK_ERROR_OUT_OF_DATE_KHR);
CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
CASE(VK_ERROR_VALIDATION_FAILED_EXT);
CASE(VK_ERROR_INVALID_SHADER_NV);
CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
CASE(VK_ERROR_NOT_PERMITTED_EXT);
default: return "Unknown error";
}
#undef CASE
}
static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
{
VkResult ret;
int index = -1;
VkPhysicalDeviceProperties props;
VkPhysicalDeviceMemoryProperties mprops;
VulkanFilterContext *s = avctx->priv;
VkMemoryAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = alloc_extension,
};
vkGetPhysicalDeviceProperties(s->hwctx->phys_dev, &props);
vkGetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops);
/* Align if we need to */
if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment);
alloc_info.allocationSize = req->size;
/* The vulkan spec requires memory types to be sorted in the "optimal"
* order, so the first matching type we find will be the best/fastest one */
for (int i = 0; i < mprops.memoryTypeCount; i++) {
/* The memory type must be supported by the requirements (bitfield) */
if (!(req->memoryTypeBits & (1 << i)))
continue;
/* The memory type flags must include our properties */
if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
continue;
/* Found a suitable memory type */
index = i;
break;
}
if (index < 0) {
av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
req_flags);
return AVERROR(EINVAL);
}
alloc_info.memoryTypeIndex = index;
ret = vkAllocateMemory(s->hwctx->act_dev, &alloc_info,
s->hwctx->alloc, mem);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
ff_vk_ret2str(ret));
return AVERROR(ENOMEM);
}
*mem_flags |= mprops.memoryTypes[index].propertyFlags;
return 0;
}
int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
{
int err;
VkResult ret;
VkMemoryRequirements req;
VulkanFilterContext *s = avctx->priv;
VkBufferCreateInfo buf_spawn = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = NULL,
.usage = usage,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.size = size, /* Gets FFALIGNED during alloc if host visible
but should be ok */
};
ret = vkCreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
vkGetBufferMemoryRequirements(s->hwctx->act_dev, buf->buf, &req);
err = vk_alloc_mem(avctx, &req, flags, NULL, &buf->flags, &buf->mem);
if (err)
return err;
ret = vkBindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
return 0;
}
int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
int nb_buffers, int invalidate)
{
VkResult ret;
VulkanFilterContext *s = avctx->priv;
VkMappedMemoryRange *inval_list = NULL;
int inval_count = 0;
for (int i = 0; i < nb_buffers; i++) {
ret = vkMapMemory(s->hwctx->act_dev, buf[i].mem, 0,
VK_WHOLE_SIZE, 0, (void **)&mem[i]);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
if (!invalidate)
return 0;
for (int i = 0; i < nb_buffers; i++) {
const VkMappedMemoryRange ival_buf = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
.memory = buf[i].mem,
.size = VK_WHOLE_SIZE,
};
if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
continue;
inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
(++inval_count)*sizeof(*inval_list));
if (!inval_list)
return AVERROR(ENOMEM);
inval_list[inval_count - 1] = ival_buf;
}
if (inval_count) {
ret = vkInvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
inval_list);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
return 0;
}
int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
int flush)
{
int err = 0;
VkResult ret;
VulkanFilterContext *s = avctx->priv;
VkMappedMemoryRange *flush_list = NULL;
int flush_count = 0;
if (flush) {
for (int i = 0; i < nb_buffers; i++) {
const VkMappedMemoryRange flush_buf = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
.memory = buf[i].mem,
.size = VK_WHOLE_SIZE,
};
if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
continue;
flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
(++flush_count)*sizeof(*flush_list));
if (!flush_list)
return AVERROR(ENOMEM);
flush_list[flush_count - 1] = flush_buf;
}
}
if (flush_count) {
ret = vkFlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
flush_list);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n",
ff_vk_ret2str(ret));
err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
}
}
for (int i = 0; i < nb_buffers; i++)
vkUnmapMemory(s->hwctx->act_dev, buf[i].mem);
return err;
}
void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
{
VulkanFilterContext *s = avctx->priv;
if (!buf)
return;
if (buf->buf != VK_NULL_HANDLE)
vkDestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
if (buf->mem != VK_NULL_HANDLE)
vkFreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
}
int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
int offset, int size, VkShaderStageFlagBits stage)
{
VkPushConstantRange *pc;
pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
pl->push_consts_num + 1);
if (!pl->push_consts)
return AVERROR(ENOMEM);
pc = &pl->push_consts[pl->push_consts_num++];
memset(pc, 0, sizeof(*pc));
pc->stageFlags = stage;
pc->offset = offset;
pc->size = size;
return 0;
}
FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue)
{
VkResult ret;
FFVkExecContext *e;
VulkanFilterContext *s = avctx->priv;
VkCommandPoolCreateInfo cqueue_create = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = queue,
};
VkCommandBufferAllocateInfo cbuf_create = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
};
VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO };
e = create_exec_ctx(s);
if (!e)
return AVERROR(ENOMEM);
ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create,
s->hwctx->alloc, &e->pool);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n",
ff_vk_ret2str(ret));
return 1;
}
cbuf_create.commandPool = e->pool;
ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, &e->buf);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
ff_vk_ret2str(ret));
return 1;
}
ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn,
s->hwctx->alloc, &e->fence);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n",
ff_vk_ret2str(ret));
return 1;
}
vkGetDeviceQueue(s->hwctx->act_dev, queue, 0, &e->queue);
*ctx = e;
return 0;
}
int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
{
VkResult ret;
VkCommandBufferBeginInfo cmd_start = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
};
e->sem_wait_cnt = 0;
e->sem_sig_cnt = 0;
ret = vkBeginCommandBuffer(e->buf, &cmd_start);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
return 0;
}
int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag)
{
AVVkFrame *f = (AVVkFrame *)frame->data[0];
AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
int planes = av_pix_fmt_count_planes(fc->sw_format);
for (int i = 0; i < planes; i++) {
e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
if (!e->sem_wait)
return AVERROR(ENOMEM);
e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
if (!e->sem_wait_dst)
return AVERROR(ENOMEM);
e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
if (!e->sem_sig)
return AVERROR(ENOMEM);
e->sem_wait[e->sem_wait_cnt] = f->sem[i];
e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
e->sem_wait_cnt++;
e->sem_sig[e->sem_sig_cnt] = f->sem[i];
e->sem_sig_cnt++;
}
return 0;
}
int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
{
VkResult ret;
VulkanFilterContext *s = avctx->priv;
VkSubmitInfo s_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
.commandBufferCount = 1,
.pCommandBuffers = &e->buf,
.pWaitSemaphores = e->sem_wait,
.pWaitDstStageMask = e->sem_wait_dst,
.waitSemaphoreCount = e->sem_wait_cnt,
.pSignalSemaphores = e->sem_sig,
.signalSemaphoreCount = e->sem_sig_cnt,
};
vkEndCommandBuffer(e->buf);
ret = vkQueueSubmit(e->queue, 1, &s_info, e->fence);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
vkWaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX);
vkResetFences(s->hwctx->act_dev, 1, &e->fence);
return 0;
}
int ff_vk_filter_query_formats(AVFilterContext *avctx)
{
static const enum AVPixelFormat pixel_formats[] = {
AV_PIX_FMT_VULKAN, AV_PIX_FMT_NONE,
};
AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
if (!pix_fmts)
return AVERROR(ENOMEM);
return ff_set_common_formats(avctx, pix_fmts);
}
static int vulkan_filter_set_device(AVFilterContext *avctx,
AVBufferRef *device)
{
VulkanFilterContext *s = avctx->priv;
av_buffer_unref(&s->device_ref);
s->device_ref = av_buffer_ref(device);
if (!s->device_ref)
return AVERROR(ENOMEM);
s->device = (AVHWDeviceContext*)s->device_ref->data;
s->hwctx = s->device->hwctx;
return 0;
}
static int vulkan_filter_set_frames(AVFilterContext *avctx,
AVBufferRef *frames)
{
VulkanFilterContext *s = avctx->priv;
av_buffer_unref(&s->frames_ref);
s->frames_ref = av_buffer_ref(frames);
if (!s->frames_ref)
return AVERROR(ENOMEM);
return 0;
}
int ff_vk_filter_config_input(AVFilterLink *inlink)
{
int err;
AVFilterContext *avctx = inlink->dst;
VulkanFilterContext *s = avctx->priv;
AVHWFramesContext *input_frames;
if (!inlink->hw_frames_ctx) {
av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
"hardware frames context on the input.\n");
return AVERROR(EINVAL);
}
/* Extract the device and default output format from the first input. */
if (avctx->inputs[0] != inlink)
return 0;
input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
if (input_frames->format != AV_PIX_FMT_VULKAN)
return AVERROR(EINVAL);
err = vulkan_filter_set_device(avctx, input_frames->device_ref);
if (err < 0)
return err;
err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx);
if (err < 0)
return err;
/* Default output parameters match input parameters. */
s->input_format = input_frames->sw_format;
if (s->output_format == AV_PIX_FMT_NONE)
s->output_format = input_frames->sw_format;
if (!s->output_width)
s->output_width = inlink->w;
if (!s->output_height)
s->output_height = inlink->h;
return 0;
}
int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
{
int err;
AVFilterContext *avctx = outlink->src;
VulkanFilterContext *s = avctx->priv;
av_buffer_unref(&outlink->hw_frames_ctx);
if (!s->device_ref) {
if (!avctx->hw_device_ctx) {
av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
"Vulkan device.\n");
return AVERROR(EINVAL);
}
err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
if (err < 0)
return err;
}
outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref);
if (!outlink->hw_frames_ctx)
return AVERROR(ENOMEM);
outlink->w = s->output_width;
outlink->h = s->output_height;
return 0;
}
int ff_vk_filter_config_output(AVFilterLink *outlink)
{
int err;
AVFilterContext *avctx = outlink->src;
VulkanFilterContext *s = avctx->priv;
AVBufferRef *output_frames_ref;
AVHWFramesContext *output_frames;
av_buffer_unref(&outlink->hw_frames_ctx);
if (!s->device_ref) {
if (!avctx->hw_device_ctx) {
av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a "
"Vulkan device.\n");
return AVERROR(EINVAL);
}
err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx);
if (err < 0)
return err;
}
output_frames_ref = av_hwframe_ctx_alloc(s->device_ref);
if (!output_frames_ref) {
err = AVERROR(ENOMEM);
goto fail;
}
output_frames = (AVHWFramesContext*)output_frames_ref->data;
output_frames->format = AV_PIX_FMT_VULKAN;
output_frames->sw_format = s->output_format;
output_frames->width = s->output_width;
output_frames->height = s->output_height;
err = av_hwframe_ctx_init(output_frames_ref);
if (err < 0) {
av_log(avctx, AV_LOG_ERROR, "Failed to initialise output "
"frames: %d.\n", err);
goto fail;
}
outlink->hw_frames_ctx = output_frames_ref;
outlink->w = s->output_width;
outlink->h = s->output_height;
return 0;
fail:
av_buffer_unref(&output_frames_ref);
return err;
}
int ff_vk_filter_init(AVFilterContext *avctx)
{
VulkanFilterContext *s = avctx->priv;
s->output_format = AV_PIX_FMT_NONE;
if (glslang_init())
return AVERROR_EXTERNAL;
return 0;
}
FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num)
VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
VkFilter filt)
{
VkResult ret;
VulkanFilterContext *s = avctx->priv;
VkSamplerCreateInfo sampler_info = {
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.magFilter = filt,
.minFilter = sampler_info.magFilter,
.mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
VK_SAMPLER_MIPMAP_MODE_LINEAR,
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
.addressModeV = sampler_info.addressModeU,
.addressModeW = sampler_info.addressModeU,
.anisotropyEnable = VK_FALSE,
.compareOp = VK_COMPARE_OP_NEVER,
.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
.unnormalizedCoordinates = unnorm_coords,
};
VkSampler *sampler = create_sampler(s);
if (!sampler)
return NULL;
ret = vkCreateSampler(s->hwctx->act_dev, &sampler_info,
s->hwctx->alloc, sampler);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n",
ff_vk_ret2str(ret));
return NULL;
}
return sampler;
}
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
const int high = desc->comp[0].depth > 8;
return high ? "rgba16f" : "rgba8";
}
int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img,
VkFormat fmt, const VkComponentMapping map)
{
VulkanFilterContext *s = avctx->priv;
VkImageViewCreateInfo imgview_spawn = {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = NULL,
.image = img,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = fmt,
.components = map,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
},
};
VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn,
s->hwctx->alloc, v);
if (ret != VK_SUCCESS) {
av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
return 0;
}
void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v)
{
VulkanFilterContext *s = avctx->priv;
if (v && *v) {
vkDestroyImageView(s->hwctx->act_dev, *v, s->hwctx->alloc);
*v = NULL;
}
}
FN_CREATING(VulkanPipeline, SPIRVShader, shader, shaders, shaders_num)
SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl,
const char *name, VkShaderStageFlags stage)
{
SPIRVShader *shd = create_shader(pl);
if (!shd)
return NULL;
av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shd->shader.stage = stage;
shd->name = name;
GLSLF(0, #version %i ,460);
GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) );
GLSLC(0, );
return shd;
}
void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
int local_size[3])
{
shd->local_size[0] = local_size[0];
shd->local_size[1] = local_size[1];
shd->local_size[2] = local_size[2];
av_bprintf(&shd->src, "layout (local_size_x = %i, "
"local_size_y = %i, local_size_z = %i) in;\n\n",
shd->local_size[0], shd->local_size[1], shd->local_size[2]);
}
static void print_shader(AVFilterContext *avctx, SPIRVShader *shd, int prio)
{
int line = 0;
const char *p = shd->src.str;
const char *start = p;
AVBPrint buf;
av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
for (int i = 0; i < strlen(p); i++) {
if (p[i] == '\n') {
av_bprintf(&buf, "%i\t", ++line);
av_bprint_append_data(&buf, start, &p[i] - start + 1);
start = &p[i + 1];
}
}
av_log(avctx, prio, "Shader %s: \n%s", shd->name, buf.str);
av_bprint_finalize(&buf, NULL);
}
int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
const char *entrypoint)
{
VkResult ret;
VulkanFilterContext *s = avctx->priv;
VkShaderModuleCreateInfo shader_create;
GLSlangResult *res;
static const enum GLSlangStage emap[] = {
[VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_VERTEX,
[VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_FRAGMENT,
[VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_COMPUTE,
};
shd->shader.pName = entrypoint;
res = glslang_compile(shd->src.str, emap[shd->shader.stage]);
if (!res)
return AVERROR(ENOMEM);
if (res->rval) {
av_log(avctx, AV_LOG_ERROR, "Error compiling shader %s: %s!\n",
shd->name, av_err2str(res->rval));
print_shader(avctx, shd, AV_LOG_ERROR);
if (res->error_msg)
av_log(avctx, AV_LOG_ERROR, "%s", res->error_msg);
av_free(res->error_msg);
return res->rval;
}
print_shader(avctx, shd, AV_LOG_VERBOSE);
shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_create.pNext = NULL;
shader_create.codeSize = res->size;
shader_create.flags = 0;
shader_create.pCode = res->data;
ret = vkCreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
&shd->shader.module);
/* Free the GLSlangResult struct */
av_free(res);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
av_log(avctx, AV_LOG_VERBOSE, "Shader %s linked! Size: %zu bytes\n",
shd->name, shader_create.codeSize);
return 0;
}
static const struct descriptor_props {
size_t struct_size; /* Size of the opaque which updates the descriptor */
const char *type;
int is_uniform;
int mem_quali; /* Can use a memory qualifier */
int dim_needed; /* Must indicate dimension */
int buf_content; /* Must indicate buffer contents */
} descriptor_props[] = {
[VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, },
[VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, },
[VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, },
[VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, },
[VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, },
[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
[VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, },
[VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, },
[VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, },
[VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
};
int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
SPIRVShader *shd, VulkanDescriptorSetBinding *desc,
int num, int only_print_to_shader)
{
VkResult ret;
VkDescriptorSetLayout *layout;
VulkanFilterContext *s = avctx->priv;
if (only_print_to_shader)
goto print;
pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
pl->descriptor_sets_num + 1);
if (!pl->desc_layout)
return AVERROR(ENOMEM);
layout = &pl->desc_layout[pl->descriptor_sets_num];
memset(layout, 0, sizeof(*layout));
{ /* Create descriptor set layout descriptions */
VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
VkDescriptorSetLayoutBinding *desc_binding;
desc_binding = av_mallocz(sizeof(*desc_binding)*num);
if (!desc_binding)
return AVERROR(ENOMEM);
for (int i = 0; i < num; i++) {
desc_binding[i].binding = i;
desc_binding[i].descriptorType = desc[i].type;
desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
desc_binding[i].stageFlags = desc[i].stages;
desc_binding[i].pImmutableSamplers = desc[i].samplers;
}
desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
desc_create_layout.pBindings = desc_binding;
desc_create_layout.bindingCount = num;
ret = vkCreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
s->hwctx->alloc, layout);
av_free(desc_binding);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
"layout: %s\n", ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
{ /* Pool each descriptor by type and update pool counts */
for (int i = 0; i < num; i++) {
int j;
for (j = 0; j < pl->pool_size_desc_num; j++)
if (pl->pool_size_desc[j].type == desc[i].type)
break;
if (j >= pl->pool_size_desc_num) {
pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
sizeof(*pl->pool_size_desc),
++pl->pool_size_desc_num);
if (!pl->pool_size_desc)
return AVERROR(ENOMEM);
memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
}
pl->pool_size_desc[j].type = desc[i].type;
pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1);
}
}
{ /* Create template creation struct */
VkDescriptorUpdateTemplateCreateInfo *dt;
VkDescriptorUpdateTemplateEntry *des_entries;
/* Freed after descriptor set initialization */
des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
if (!des_entries)
return AVERROR(ENOMEM);
for (int i = 0; i < num; i++) {
des_entries[i].dstBinding = i;
des_entries[i].descriptorType = desc[i].type;
des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
des_entries[i].dstArrayElement = 0;
des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
des_entries[i].stride = descriptor_props[desc[i].type].struct_size;
}
pl->desc_template_info = av_realloc_array(pl->desc_template_info,
sizeof(*pl->desc_template_info),
pl->descriptor_sets_num + 1);
if (!pl->desc_template_info)
return AVERROR(ENOMEM);
dt = &pl->desc_template_info[pl->descriptor_sets_num];
memset(dt, 0, sizeof(*dt));
dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
dt->descriptorSetLayout = *layout;
dt->pDescriptorUpdateEntries = des_entries;
dt->descriptorUpdateEntryCount = num;
}
pl->descriptor_sets_num++;
print:
/* Write shader info */
for (int i = 0; i < num; i++) {
const struct descriptor_props *prop = &descriptor_props[desc[i].type];
GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
if (desc[i].mem_layout)
GLSLA(", %s", desc[i].mem_layout);
GLSLA(")");
if (prop->is_uniform)
GLSLA(" uniform");
if (prop->mem_quali && desc[i].mem_quali)
GLSLA(" %s", desc[i].mem_quali);
if (prop->type)
GLSLA(" %s", prop->type);
if (prop->dim_needed)
GLSLA("%iD", desc[i].dimensions);
GLSLA(" %s", desc[i].name);
if (prop->buf_content)
GLSLA(" {\n %s\n}", desc[i].buf_content);
else if (desc[i].elems > 0)
GLSLA("[%i]", desc[i].elems);
GLSLA(";\n");
}
GLSLA("\n");
return 0;
}
void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
int set_id)
{
VulkanFilterContext *s = avctx->priv;
vkUpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
pl->desc_set[set_id],
pl->desc_template[set_id], s);
}
void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e,
VkShaderStageFlagBits stage, int offset,
size_t size, void *src)
{
vkCmdPushConstants(e->buf, e->bound_pl->pipeline_layout,
stage, offset, size, src);
}
int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
{
VkResult ret;
VulkanFilterContext *s = avctx->priv;
{ /* Init descriptor set pool */
VkDescriptorPoolCreateInfo pool_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.poolSizeCount = pl->pool_size_desc_num,
.pPoolSizes = pl->pool_size_desc,
.maxSets = pl->descriptor_sets_num,
};
ret = vkCreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
s->hwctx->alloc, &pl->desc_pool);
av_freep(&pl->pool_size_desc);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
"pool: %s\n", ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
{ /* Allocate descriptor sets */
VkDescriptorSetAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = pl->desc_pool,
.descriptorSetCount = pl->descriptor_sets_num,
.pSetLayouts = pl->desc_layout,
};
pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set));
if (!pl->desc_set)
return AVERROR(ENOMEM);
ret = vkAllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
pl->desc_set);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
{ /* Finally create the pipeline layout */
VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = pl->descriptor_sets_num,
.pSetLayouts = pl->desc_layout,
.pushConstantRangeCount = pl->push_consts_num,
.pPushConstantRanges = pl->push_consts,
};
ret = vkCreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
s->hwctx->alloc, &pl->pipeline_layout);
av_freep(&pl->push_consts);
pl->push_consts_num = 0;
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
{ /* Descriptor template (for tightly packed descriptors) */
VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template));
if (!pl->desc_template)
return AVERROR(ENOMEM);
/* Create update templates for the descriptor sets */
for (int i = 0; i < pl->descriptor_sets_num; i++) {
desc_template_info = &pl->desc_template_info[i];
desc_template_info->pipelineLayout = pl->pipeline_layout;
ret = vkCreateDescriptorUpdateTemplate(s->hwctx->act_dev,
desc_template_info,
s->hwctx->alloc,
&pl->desc_template[i]);
av_free((void *)desc_template_info->pDescriptorUpdateEntries);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor "
"template: %s\n", ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
}
av_freep(&pl->desc_template_info);
}
return 0;
}
FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num)
VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx)
{
return create_pipeline(avctx->priv);
}
int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl)
{
int i;
VkResult ret;
VulkanFilterContext *s = avctx->priv;
VkComputePipelineCreateInfo pipe = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.layout = pl->pipeline_layout,
};
for (i = 0; i < pl->shaders_num; i++) {
if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
pipe.stage = pl->shaders[i]->shader;
break;
}
}
if (i == pl->shaders_num) {
av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
return AVERROR(EINVAL);
}
ret = vkCreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
s->hwctx->alloc, &pl->pipeline);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
}
pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
return 0;
}
void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
VulkanPipeline *pl)
{
vkCmdBindPipeline(e->buf, pl->bind_point, pl->pipeline);
vkCmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout, 0,
pl->descriptor_sets_num, pl->desc_set, 0, 0);
e->bound_pl = pl;
}
static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
{
vkDestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc);
if (e->buf != VK_NULL_HANDLE)
vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, 1, &e->buf);
if (e->pool != VK_NULL_HANDLE)
vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
av_free(e->sem_wait);
av_free(e->sem_wait_dst);
av_free(e->sem_sig);
av_free(e);
}
static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
{
for (int i = 0; i < pl->shaders_num; i++) {
SPIRVShader *shd = pl->shaders[i];
av_bprint_finalize(&shd->src, NULL);
vkDestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
s->hwctx->alloc);
av_free(shd);
}
vkDestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
vkDestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
s->hwctx->alloc);
for (int i = 0; i < pl->descriptor_sets_num; i++) {
if (pl->desc_template && pl->desc_template[i])
vkDestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
s->hwctx->alloc);
if (pl->desc_layout && pl->desc_layout[i])
vkDestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
s->hwctx->alloc);
}
/* Also frees the descriptor sets */
if (pl->desc_pool)
vkDestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
s->hwctx->alloc);
av_freep(&pl->desc_set);
av_freep(&pl->shaders);
av_freep(&pl->desc_layout);
av_freep(&pl->desc_template);
av_freep(&pl->push_consts);
pl->push_consts_num = 0;
/* Only freed in case of failure */
av_freep(&pl->pool_size_desc);
if (pl->desc_template_info) {
for (int i = 0; i < pl->descriptor_sets_num; i++)
av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries);
av_freep(&pl->desc_template_info);
}
av_free(pl);
}
void ff_vk_filter_uninit(AVFilterContext *avctx)
{
VulkanFilterContext *s = avctx->priv;
glslang_uninit();
for (int i = 0; i < s->samplers_num; i++)
vkDestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc);
av_freep(&s->samplers);
for (int i = 0; i < s->pipelines_num; i++)
free_pipeline(s, s->pipelines[i]);
av_freep(&s->pipelines);
for (int i = 0; i < s->exec_ctx_num; i++)
free_exec_ctx(s, s->exec_ctx[i]);
av_freep(&s->exec_ctx);
av_freep(&s->scratch);
s->scratch_size = 0;
av_buffer_unref(&s->device_ref);
av_buffer_unref(&s->frames_ref);
}
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVFILTER_VULKAN_H
#define AVFILTER_VULKAN_H
#include "avfilter.h"
#include "libavutil/pixdesc.h"
#include "libavutil/bprint.h"
#include "libavutil/hwcontext.h"
#include "libavutil/hwcontext_vulkan.h"
/* GLSL management macros */
#define INDENT(N) INDENT_##N
#define INDENT_0
#define INDENT_1 INDENT_0 " "
#define INDENT_2 INDENT_1 INDENT_1
#define INDENT_3 INDENT_2 INDENT_1
#define INDENT_4 INDENT_3 INDENT_1
#define INDENT_5 INDENT_4 INDENT_1
#define INDENT_6 INDENT_5 INDENT_1
#define C(N, S) INDENT(N) #S "\n"
#define GLSLC(N, S) av_bprintf(&shd->src, C(N, S))
#define GLSLA(...) av_bprintf(&shd->src, __VA_ARGS__)
#define GLSLF(N, S, ...) av_bprintf(&shd->src, C(N, S), __VA_ARGS__)
#define GLSLD(D) GLSLC(0, ); \
av_bprint_append_data(&shd->src, D, strlen(D)); \
GLSLC(0, )
/* Helper, pretty much every Vulkan return value needs to be checked */
#define RET(x) \
do { \
if ((err = (x)) < 0) \
goto fail; \
} while (0)
/* Useful for attaching immutable samplers to arrays */
#define DUP_SAMPLER_ARRAY4(x) (VkSampler []){ x, x, x, x, }
typedef struct SPIRVShader {
const char *name; /* Name for id/debugging purposes */
AVBPrint src;
int local_size[3]; /* Compute shader workgroup sizes */
VkPipelineShaderStageCreateInfo shader;
} SPIRVShader;
typedef struct VulkanDescriptorSetBinding {
const char *name;
VkDescriptorType type;
const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */
const char *mem_quali; /* readonly, writeonly, etc. */
const char *buf_content; /* For buffers */
uint32_t dimensions; /* Needed for e.g. sampler%iD */
uint32_t elems; /* 0 - scalar, 1 or more - vector */
VkShaderStageFlags stages;
const VkSampler *samplers; /* Immutable samplers, length - #elems */
void *updater; /* Pointer to VkDescriptor*Info */
} VulkanDescriptorSetBinding;
typedef struct FFVkBuffer {
VkBuffer buf;
VkDeviceMemory mem;
VkMemoryPropertyFlagBits flags;
} FFVkBuffer;
typedef struct VulkanPipeline {
VkPipelineBindPoint bind_point;
/* Contexts */
VkPipelineLayout pipeline_layout;
VkPipeline pipeline;
/* Shaders */
SPIRVShader **shaders;
int shaders_num;
/* Push consts */
VkPushConstantRange *push_consts;
int push_consts_num;
/* Descriptors */
VkDescriptorSetLayout *desc_layout;
VkDescriptorPool desc_pool;
VkDescriptorSet *desc_set;
VkDescriptorUpdateTemplate *desc_template;
int descriptor_sets_num;
int pool_size_desc_num;
/* Temporary, used to store data in between initialization stages */
VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
VkDescriptorPoolSize *pool_size_desc;
} VulkanPipeline;
typedef struct FFVkExecContext {
VkCommandPool pool;
VkCommandBuffer buf;
VkQueue queue;
VkFence fence;
VulkanPipeline *bound_pl;
VkSemaphore *sem_wait;
int sem_wait_alloc; /* Allocated sem_wait */
int sem_wait_cnt;
VkPipelineStageFlagBits *sem_wait_dst;
int sem_wait_dst_alloc; /* Allocated sem_wait_dst */
VkSemaphore *sem_sig;
int sem_sig_alloc; /* Allocated sem_sig */
int sem_sig_cnt;
} FFVkExecContext;
typedef struct VulkanFilterContext {
const AVClass *class;
AVBufferRef *device_ref;
AVBufferRef *frames_ref; /* For in-place filtering */
AVHWDeviceContext *device;
AVVulkanDeviceContext *hwctx;
/* Properties */
int output_width;
int output_height;
enum AVPixelFormat output_format;
enum AVPixelFormat input_format;
/* Samplers */
VkSampler **samplers;
int samplers_num;
/* Exec contexts */
FFVkExecContext **exec_ctx;
int exec_ctx_num;
/* Pipelines (each can have 1 shader of each type) */
VulkanPipeline **pipelines;
int pipelines_num;
void *scratch; /* Scratch memory used only in functions */
unsigned int scratch_size;
} VulkanFilterContext;
/* Identity mapping - r = r, b = b, g = g, a = a */
extern const VkComponentMapping ff_comp_identity_map;
/**
* General lavfi IO functions
*/
int ff_vk_filter_query_formats (AVFilterContext *avctx);
int ff_vk_filter_init (AVFilterContext *avctx);
int ff_vk_filter_config_input (AVFilterLink *inlink);
int ff_vk_filter_config_output (AVFilterLink *outlink);
int ff_vk_filter_config_output_inplace(AVFilterLink *outlink);
void ff_vk_filter_uninit (AVFilterContext *avctx);
/**
* Converts Vulkan return values to strings
*/
const char *ff_vk_ret2str(VkResult res);
/**
* Gets the glsl format string for a pixel format
*/
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
/**
* Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
*/
VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
VkFilter filt);
/**
* Create an imageview.
*/
int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img,
VkFormat fmt, const VkComponentMapping map);
/**
* Destroy an imageview. Command buffer must have completed executing, which
* ff_vk_submit_exec_queue() will ensure
*/
void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v);
/**
* Define a push constant for a given stage into a pipeline.
* Must be called before the pipeline layout has been initialized.
*/
int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
int offset, int size, VkShaderStageFlagBits stage);
/**
* Inits a pipeline. Everything in it will be auto-freed when calling
* ff_vk_filter_uninit().
*/
VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx);
/**
* Inits a shader for a specific pipeline. Will be auto-freed on uninit.
*/
SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl,
const char *name, VkShaderStageFlags stage);
/**
* Writes the workgroup size for a shader.
*/
void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd,
int local_size[3]);
/**
* Adds a descriptor set to the shader and registers them in the pipeline.
*/
int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
SPIRVShader *shd, VulkanDescriptorSetBinding *desc,
int num, int only_print_to_shader);
/**
* Compiles the shader, entrypoint must be set to "main".
*/
int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd,
const char *entrypoint);
/**
* Initializes the pipeline layout after all shaders and descriptor sets have
* been finished.
*/
int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl);
/**
* Initializes a compute pipeline. Will pick the first shader with the
* COMPUTE flag set.
*/
int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl);
/**
* Updates a descriptor set via the updaters defined.
* Can be called immediately after pipeline creation, but must be called
* at least once before queue submission.
*/
void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
int set_id);
/**
* Init an execution context for command recording and queue submission.
* WIll be auto-freed on uninit.
*/
int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue);
/**
* Begin recording to the command buffer. Previous execution must have been
* completed, which ff_vk_submit_exec_queue() will ensure.
*/
int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e);
/**
* Add a command to bind the completed pipeline and its descriptor sets.
* Must be called after ff_vk_start_exec_recording() and before submission.
*/
void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
VulkanPipeline *pl);
/**
* Updates push constants.
* Must be called after binding a pipeline if any push constants were defined.
*/
void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e,
VkShaderStageFlagBits stage, int offset,
size_t size, void *src);
/**
* Adds a frame as a queue dependency. This manages semaphore signalling.
* Must be called before submission.
*/
int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag);
/**
* Submits a command buffer to the queue for execution.
* Will block until execution has finished in order to simplify resource
* management.
*/
int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e);
/**
* Create a VkBuffer with the specified parameters.
*/
int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
/**
* Maps the buffer to userspace. Set invalidate to 1 if reading the contents
* is necessary.
*/
int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
int nb_buffers, int invalidate);
/**
* Unmaps the buffer from userspace. Set flush to 1 to write and sync.
*/
int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
int flush);
/**
* Frees a buffer.
*/
void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf);
#endif /* AVFILTER_VULKAN_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment