cbs_av1.h 12.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
/*
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef AVCODEC_CBS_AV1_H
#define AVCODEC_CBS_AV1_H

#include <stddef.h>
#include <stdint.h>

#include "av1.h"
#include "cbs.h"


typedef struct AV1RawOBUHeader {
    uint8_t obu_forbidden_bit;
    uint8_t obu_type;
    uint8_t obu_extension_flag;
    uint8_t obu_has_size_field;
    uint8_t obu_reserved_1bit;

    uint8_t temporal_id;
    uint8_t spatial_id;
    uint8_t extension_header_reserved_3bits;
} AV1RawOBUHeader;

typedef struct AV1RawColorConfig {
    uint8_t high_bitdepth;
    uint8_t twelve_bit;
    uint8_t mono_chrome;

    uint8_t color_description_present_flag;
    uint8_t color_primaries;
    uint8_t transfer_characteristics;
    uint8_t matrix_coefficients;

    uint8_t color_range;
    uint8_t subsampling_x;
    uint8_t subsampling_y;
    uint8_t chroma_sample_position;
    uint8_t separate_uv_delta_q;
} AV1RawColorConfig;

typedef struct AV1RawTimingInfo {
    uint32_t num_units_in_display_tick;
    uint32_t time_scale;

    uint8_t equal_picture_interval;
    uint32_t num_ticks_per_picture_minus_1;
} AV1RawTimingInfo;

typedef struct AV1RawDecoderModelInfo {
    uint8_t  buffer_delay_length_minus_1;
    uint32_t num_units_in_decoding_tick;
    uint8_t  buffer_removal_time_length_minus_1;
    uint8_t  frame_presentation_time_length_minus_1;
} AV1RawDecoderModelInfo;

typedef struct AV1RawSequenceHeader {
    uint8_t seq_profile;
    uint8_t still_picture;
    uint8_t reduced_still_picture_header;

    uint8_t timing_info_present_flag;
    uint8_t decoder_model_info_present_flag;
    uint8_t initial_display_delay_present_flag;
    uint8_t operating_points_cnt_minus_1;

    AV1RawTimingInfo       timing_info;
    AV1RawDecoderModelInfo decoder_model_info;

    uint16_t operating_point_idc[AV1_MAX_OPERATING_POINTS];
    uint8_t  seq_level_idx[AV1_MAX_OPERATING_POINTS];
    uint8_t  seq_tier[AV1_MAX_OPERATING_POINTS];
    uint8_t  decoder_model_present_for_this_op[AV1_MAX_OPERATING_POINTS];
90 91
    uint32_t decoder_buffer_delay[AV1_MAX_OPERATING_POINTS];
    uint32_t encoder_buffer_delay[AV1_MAX_OPERATING_POINTS];
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
    uint8_t  low_delay_mode_flag[AV1_MAX_OPERATING_POINTS];
    uint8_t  initial_display_delay_present_for_this_op[AV1_MAX_OPERATING_POINTS];
    uint8_t  initial_display_delay_minus_1[AV1_MAX_OPERATING_POINTS];

    uint8_t  frame_width_bits_minus_1;
    uint8_t  frame_height_bits_minus_1;
    uint16_t max_frame_width_minus_1;
    uint16_t max_frame_height_minus_1;

    uint8_t frame_id_numbers_present_flag;
    uint8_t delta_frame_id_length_minus_2;
    uint8_t additional_frame_id_length_minus_1;

    uint8_t use_128x128_superblock;
    uint8_t enable_filter_intra;
    uint8_t enable_intra_edge_filter;
    uint8_t enable_intraintra_compound;
    uint8_t enable_masked_compound;
    uint8_t enable_warped_motion;
    uint8_t enable_dual_filter;

    uint8_t enable_order_hint;
    uint8_t enable_jnt_comp;
    uint8_t enable_ref_frame_mvs;

    uint8_t seq_choose_screen_content_tools;
    uint8_t seq_force_screen_content_tools;
    uint8_t seq_choose_integer_mv;
    uint8_t seq_force_integer_mv;

    uint8_t order_hint_bits_minus_1;

    uint8_t enable_superres;
    uint8_t enable_cdef;
    uint8_t enable_restoration;

    AV1RawColorConfig color_config;

    uint8_t film_grain_params_present;
} AV1RawSequenceHeader;

typedef struct AV1RawFrameHeader {
    uint8_t  show_existing_frame;
    uint8_t  frame_to_show_map_idx;
    uint32_t frame_presentation_time;
    uint32_t display_frame_id;

    uint8_t frame_type;
    uint8_t show_frame;
    uint8_t showable_frame;

    uint8_t error_resilient_mode;
    uint8_t disable_cdf_update;
    uint8_t allow_screen_content_tools;
    uint8_t force_integer_mv;

    uint32_t current_frame_id;
    uint8_t  frame_size_override_flag;
    uint8_t  order_hint;

    uint8_t  buffer_removal_time_present_flag;
    uint32_t buffer_removal_time[AV1_MAX_OPERATING_POINTS];

    uint8_t  primary_ref_frame;
    uint16_t frame_width_minus_1;
    uint16_t frame_height_minus_1;
    uint8_t  use_superres;
    uint8_t  coded_denom;
    uint8_t  render_and_frame_size_different;
    uint8_t  render_width_minus_1;
    uint8_t  render_height_minus_1;

164
    uint8_t found_ref[AV1_REFS_PER_FRAME];
165 166 167 168 169 170 171 172

    uint8_t refresh_frame_flags;
    uint8_t allow_intrabc;
    uint8_t ref_order_hint[AV1_NUM_REF_FRAMES];
    uint8_t frame_refs_short_signaling;
    uint8_t last_frame_idx;
    uint8_t golden_frame_idx;
    int8_t  ref_frame_idx[AV1_REFS_PER_FRAME];
173
    uint32_t delta_frame_id_minus1[AV1_REFS_PER_FRAME];
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212

    uint8_t allow_high_precision_mv;
    uint8_t is_filter_switchable;
    uint8_t interpolation_filter;
    uint8_t is_motion_mode_switchable;
    uint8_t use_ref_frame_mvs;

    uint8_t disable_frame_end_update_cdf;

    uint8_t uniform_tile_spacing_flag;
    uint8_t tile_cols_log2;
    uint8_t tile_rows_log2;
    uint8_t width_in_sbs_minus_1[AV1_MAX_TILE_COLS];
    uint8_t height_in_sbs_minus_1[AV1_MAX_TILE_ROWS];
    uint16_t context_update_tile_id;
    uint8_t tile_size_bytes_minus1;

    // These are derived values, but it's very unhelpful to have to
    // recalculate them all the time so we store them here.
    uint16_t tile_cols;
    uint16_t tile_rows;

    uint8_t base_q_idx;
    int8_t  delta_q_y_dc;
    uint8_t diff_uv_delta;
    int8_t  delta_q_u_dc;
    int8_t  delta_q_u_ac;
    int8_t  delta_q_v_dc;
    int8_t  delta_q_v_ac;
    uint8_t using_qmatrix;
    uint8_t qm_y;
    uint8_t qm_u;
    uint8_t qm_v;

    uint8_t segmentation_enabled;
    uint8_t segmentation_update_map;
    uint8_t segmentation_temporal_update;
    uint8_t segmentation_update_data;
    uint8_t feature_enabled[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
213
    int16_t feature_value[AV1_MAX_SEGMENTS][AV1_SEG_LVL_MAX];
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401

    uint8_t delta_q_present;
    uint8_t delta_q_res;
    uint8_t delta_lf_present;
    uint8_t delta_lf_res;
    uint8_t delta_lf_multi;

    uint8_t loop_filter_level[4];
    uint8_t loop_filter_sharpness;
    uint8_t loop_filter_delta_enabled;
    uint8_t loop_filter_delta_update;
    uint8_t update_ref_delta[AV1_TOTAL_REFS_PER_FRAME];
    int8_t  loop_filter_ref_deltas[AV1_TOTAL_REFS_PER_FRAME];
    uint8_t update_mode_delta[2];
    int8_t  loop_filter_mode_deltas[2];

    uint8_t cdef_damping_minus_3;
    uint8_t cdef_bits;
    uint8_t cdef_y_pri_strength[8];
    uint8_t cdef_y_sec_strength[8];
    uint8_t cdef_uv_pri_strength[8];
    uint8_t cdef_uv_sec_strength[8];

    uint8_t lr_type[3];
    uint8_t lr_unit_shift;
    uint8_t lr_uv_shift;

    uint8_t tx_mode;
    uint8_t reference_select;
    uint8_t skip_mode_present;

    uint8_t allow_warped_motion;
    uint8_t reduced_tx_set;

    uint8_t is_global[AV1_TOTAL_REFS_PER_FRAME];
    uint8_t is_rot_zoom[AV1_TOTAL_REFS_PER_FRAME];
    uint8_t is_translation[AV1_TOTAL_REFS_PER_FRAME];
    //AV1RawSubexp gm_params[AV1_TOTAL_REFS_PER_FRAME][6];
    uint32_t gm_params[AV1_TOTAL_REFS_PER_FRAME][6];

    uint8_t  apply_grain;
    uint16_t grain_seed;
    uint8_t  update_grain;
    uint8_t  film_grain_params_ref_idx;
    uint8_t  num_y_points;
    uint8_t  point_y_value[16];
    uint8_t  point_y_scaling[16];
    uint8_t  chroma_scaling_from_luma;
    uint8_t  num_cb_points;
    uint8_t  point_cb_value[16];
    uint8_t  point_cb_scaling[16];
    uint8_t  num_cr_points;
    uint8_t  point_cr_value[16];
    uint8_t  point_cr_scaling[16];
    uint8_t  grain_scaling_minus_8;
    uint8_t  ar_coeff_lag;
    uint8_t  ar_coeffs_y_plus_128[24];
    uint8_t  ar_coeffs_cb_plus_128[24];
    uint8_t  ar_coeffs_cr_plus_128[24];
    uint8_t  ar_coeff_shift_minus_6;
    uint8_t  grain_scale_shift;
    uint8_t  cb_mult;
    uint8_t  cb_luma_mult;
    uint16_t cb_offset;
    uint8_t  cr_mult;
    uint8_t  cr_luma_mult;
    uint16_t cr_offset;
    uint8_t  overlap_flag;
    uint8_t  clip_to_restricted_range;
} AV1RawFrameHeader;

typedef struct AV1RawTileData {
    uint8_t     *data;
    size_t       data_size;
    AVBufferRef *data_ref;
} AV1RawTileData;

typedef struct AV1RawTileGroup {
    uint8_t  tile_start_and_end_present_flag;
    uint16_t tg_start;
    uint16_t tg_end;

    AV1RawTileData tile_data;
} AV1RawTileGroup;

typedef struct AV1RawFrame {
    AV1RawFrameHeader header;
    AV1RawTileGroup   tile_group;
} AV1RawFrame;

typedef struct AV1RawTileList {
    uint8_t output_frame_width_in_tiles_minus_1;
    uint8_t output_frame_height_in_tiles_minus_1;
    uint16_t tile_count_minus_1;

    AV1RawTileData tile_data;
} AV1RawTileList;

typedef struct AV1RawMetadataHDRCLL {
    uint16_t max_cll;
    uint16_t max_fall;
} AV1RawMetadataHDRCLL;

typedef struct AV1RawMetadataHDRMDCV {
    uint16_t primary_chromaticity_x[3];
    uint16_t primary_chromaticity_y[3];
    uint16_t white_point_chromaticity_x;
    uint16_t white_point_chromaticity_y;
    uint32_t luminance_max;
    uint32_t luminance_min;
} AV1RawMetadataHDRMDCV;

typedef struct AV1RawMetadataScalability {
    uint8_t scalability_mode_idc;
    // TODO: more stuff.
} AV1RawMetadataScalability;

typedef struct AV1RawMetadataITUTT35 {
    uint8_t itu_t_t35_country_code;
    uint8_t itu_t_t35_country_code_extension_byte;

    uint8_t     *payload;
    size_t       payload_size;
    AVBufferRef *payload_ref;
} AV1RawMetadataITUTT35;

typedef struct AV1RawMetadataTimecode {
    uint8_t  counting_type;
    uint8_t  full_timestamp_flag;
    uint8_t  discontinuity_flag;
    uint8_t  cnt_dropped_flag;
    uint16_t n_frames;
    uint8_t  seconds_value;
    uint8_t  minutes_value;
    uint8_t  hours_value;
    uint8_t  seconds_flag;
    uint8_t  minutes_flag;
    uint8_t  hours_flag;
    uint8_t  time_offset_length;
    uint32_t time_offset_value;
} AV1RawMetadataTimecode;

typedef struct AV1RawMetadata {
    uint64_t metadata_type;
    union {
        AV1RawMetadataHDRCLL      hdr_cll;
        AV1RawMetadataHDRMDCV     hdr_mdcv;
        AV1RawMetadataScalability scalability;
        AV1RawMetadataITUTT35     itut_t35;
        AV1RawMetadataTimecode    timecode;
    } metadata;
} AV1RawMetadata;


typedef struct AV1RawOBU {
    AV1RawOBUHeader header;

    size_t obu_size;

    union {
        AV1RawSequenceHeader sequence_header;
        AV1RawFrameHeader    frame_header;
        AV1RawFrame          frame;
        AV1RawTileGroup      tile_group;
        AV1RawTileList       tile_list;
        AV1RawMetadata       metadata;
    } obu;
} AV1RawOBU;

typedef struct AV1ReferenceFrameState {
    int valid;          // RefValid
    int frame_id;       // RefFrameId
    int upscaled_width; // RefUpscaledWidth
    int frame_width;    // RefFrameWidth
    int frame_height;   // RefFrameHeight
    int render_width;   // RefRenderWidth
    int render_height;  // RefRenderHeight
    int frame_type;     // RefFrameType
    int subsampling_x;  // RefSubsamplingX
    int subsampling_y;  // RefSubsamplingY
    int bit_depth;      // RefBitDepth
    int order_hint;     // RefOrderHint
} AV1ReferenceFrameState;

typedef struct CodedBitstreamAV1Context {
    AV1RawSequenceHeader *sequence_header;
    AVBufferRef          *sequence_header_ref;

402 403 404 405
    int     seen_frame_header;
    AVBufferRef *frame_header_ref;
    uint8_t     *frame_header;
    size_t       frame_header_size;
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432

    int temporal_id;
    int spatial_id;
    int operating_point_idc;

    int bit_depth;
    int frame_width;
    int frame_height;
    int upscaled_width;
    int render_width;
    int render_height;

    int num_planes;
    int coded_lossless;
    int all_lossless;
    int tile_cols;
    int tile_rows;

    AV1ReferenceFrameState ref[AV1_NUM_REF_FRAMES];

    // Write buffer.
    uint8_t *write_buffer;
    size_t   write_buffer_size;
} CodedBitstreamAV1Context;


#endif /* AVCODEC_CBS_AV1_H */