Commit e59da0f7 authored by Ilia Valiakhmetov's avatar Ilia Valiakhmetov Committed by Ronald S. Bultje

avcodec/vp9: Add tile threading support

Signed-off-by: 's avatarIlia Valiakhmetov <zakne0ne@gmail.com>
Signed-off-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
parent 83c12fef
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -26,8 +26,10 @@ ...@@ -26,8 +26,10 @@
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <stdatomic.h>
#include "libavutil/buffer.h" #include "libavutil/buffer.h"
#include "libavutil/thread.h"
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "vp9.h" #include "vp9.h"
...@@ -84,20 +86,21 @@ typedef struct VP9Block { ...@@ -84,20 +86,21 @@ typedef struct VP9Block {
enum BlockPartition bp; enum BlockPartition bp;
} VP9Block; } VP9Block;
typedef struct VP9TileData VP9TileData;
typedef struct VP9Context { typedef struct VP9Context {
VP9TileData *td;
VP9SharedContext s; VP9SharedContext s;
VP9DSPContext dsp; VP9DSPContext dsp;
VideoDSPContext vdsp; VideoDSPContext vdsp;
GetBitContext gb; GetBitContext gb;
VP56RangeCoder c; VP56RangeCoder c;
VP56RangeCoder *c_b; int pass, active_tile_cols;
unsigned c_b_size;
VP9Block *b_base, *b; pthread_mutex_t progress_mutex;
int pass; pthread_cond_t progress_cond;
int row, row7, col, col7; atomic_int *entries;
uint8_t *dst[3];
ptrdiff_t y_stride, uv_stride;
uint8_t ss_h, ss_v; uint8_t ss_h, ss_v;
uint8_t last_bpp, bpp_index, bytesperpixel; uint8_t last_bpp, bpp_index, bytesperpixel;
...@@ -115,7 +118,6 @@ typedef struct VP9Context { ...@@ -115,7 +118,6 @@ typedef struct VP9Context {
uint8_t lim_lut[64]; uint8_t lim_lut[64];
uint8_t mblim_lut[64]; uint8_t mblim_lut[64];
} filter_lut; } filter_lut;
unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
struct { struct {
ProbContext p; ProbContext p;
uint8_t coef[4][2][2][6][6][3]; uint8_t coef[4][2][2][6][6][3];
...@@ -124,6 +126,44 @@ typedef struct VP9Context { ...@@ -124,6 +126,44 @@ typedef struct VP9Context {
ProbContext p; ProbContext p;
uint8_t coef[4][2][2][6][6][11]; uint8_t coef[4][2][2][6][6][11];
} prob; } prob;
// contextual (above) cache
uint8_t *above_partition_ctx;
uint8_t *above_mode_ctx;
// FIXME maybe merge some of the below in a flags field?
uint8_t *above_y_nnz_ctx;
uint8_t *above_uv_nnz_ctx[2];
uint8_t *above_skip_ctx; // 1bit
uint8_t *above_txfm_ctx; // 2bit
uint8_t *above_segpred_ctx; // 1bit
uint8_t *above_intra_ctx; // 1bit
uint8_t *above_comp_ctx; // 1bit
uint8_t *above_ref_ctx; // 2bit
uint8_t *above_filter_ctx;
VP56mv (*above_mv_ctx)[2];
// whole-frame cache
uint8_t *intra_pred_data[3];
VP9Filter *lflvl;
// block reconstruction intermediates
int block_alloc_using_2pass;
uint16_t mvscale[3][2];
uint8_t mvstep[3][2];
} VP9Context;
struct VP9TileData {
//VP9Context should be const, but because of the threading API(generates
//a lot of warnings) it's not.
VP9Context *s;
VP56RangeCoder *c_b;
VP56RangeCoder *c;
int row, row7, col, col7;
uint8_t *dst[3];
ptrdiff_t y_stride, uv_stride;
VP9Block *b_base, *b;
unsigned tile_col_start;
struct { struct {
unsigned y_mode[4][10]; unsigned y_mode[4][10];
unsigned uv_mode[10][10]; unsigned uv_mode[10][10];
...@@ -153,7 +193,10 @@ typedef struct VP9Context { ...@@ -153,7 +193,10 @@ typedef struct VP9Context {
unsigned eob[4][2][2][6][6][2]; unsigned eob[4][2][2][6][6][2];
} counts; } counts;
// contextual (left/above) cache // whole-frame cache
DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
// contextual (left) cache
DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16]; DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16]; DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2]; DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
...@@ -166,52 +209,30 @@ typedef struct VP9Context { ...@@ -166,52 +209,30 @@ typedef struct VP9Context {
DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8]; DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8]; DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8]; DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
uint8_t *above_partition_ctx;
uint8_t *above_mode_ctx;
// FIXME maybe merge some of the below in a flags field?
uint8_t *above_y_nnz_ctx;
uint8_t *above_uv_nnz_ctx[2];
uint8_t *above_skip_ctx; // 1bit
uint8_t *above_txfm_ctx; // 2bit
uint8_t *above_segpred_ctx; // 1bit
uint8_t *above_intra_ctx; // 1bit
uint8_t *above_comp_ctx; // 1bit
uint8_t *above_ref_ctx; // 2bit
uint8_t *above_filter_ctx;
VP56mv (*above_mv_ctx)[2];
// whole-frame cache
uint8_t *intra_pred_data[3];
VP9Filter *lflvl;
DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
// block reconstruction intermediates // block reconstruction intermediates
int block_alloc_using_2pass;
int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
struct { int x, y; } min_mv, max_mv;
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2]; DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2]; DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
uint16_t mvscale[3][2]; struct { int x, y; } min_mv, max_mv;
uint8_t mvstep[3][2]; int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
} VP9Context; uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
};
void ff_vp9_fill_mv(VP9Context *s, VP56mv *mv, int mode, int sb); void ff_vp9_fill_mv(VP9TileData *td, VP56mv *mv, int mode, int sb);
void ff_vp9_adapt_probs(VP9Context *s); void ff_vp9_adapt_probs(VP9Context *s);
void ff_vp9_decode_block(AVCodecContext *ctx, int row, int col, void ff_vp9_decode_block(VP9TileData *td, int row, int col,
VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff, VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl, enum BlockPartition bp); enum BlockLevel bl, enum BlockPartition bp);
void ff_vp9_loopfilter_sb(AVCodecContext *avctx, VP9Filter *lflvl, void ff_vp9_loopfilter_sb(AVCodecContext *avctx, VP9Filter *lflvl,
int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff); int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff);
void ff_vp9_intra_recon_8bpp(AVCodecContext *avctx, void ff_vp9_intra_recon_8bpp(VP9TileData *td,
ptrdiff_t y_off, ptrdiff_t uv_off); ptrdiff_t y_off, ptrdiff_t uv_off);
void ff_vp9_intra_recon_16bpp(AVCodecContext *avctx, void ff_vp9_intra_recon_16bpp(VP9TileData *td,
ptrdiff_t y_off, ptrdiff_t uv_off); ptrdiff_t y_off, ptrdiff_t uv_off);
void ff_vp9_inter_recon_8bpp(AVCodecContext *avctx); void ff_vp9_inter_recon_8bpp(VP9TileData *td);
void ff_vp9_inter_recon_16bpp(AVCodecContext *avctx); void ff_vp9_inter_recon_16bpp(VP9TileData *td);
#endif /* AVCODEC_VP9DEC_H */ #endif /* AVCODEC_VP9DEC_H */
This diff is collapsed.
...@@ -56,8 +56,8 @@ void ff_vp9_adapt_probs(VP9Context *s) ...@@ -56,8 +56,8 @@ void ff_vp9_adapt_probs(VP9Context *s)
for (l = 0; l < 6; l++) for (l = 0; l < 6; l++)
for (m = 0; m < 6; m++) { for (m = 0; m < 6; m++) {
uint8_t *pp = s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m]; uint8_t *pp = s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m];
unsigned *e = s->counts.eob[i][j][k][l][m]; unsigned *e = s->td[0].counts.eob[i][j][k][l][m];
unsigned *c = s->counts.coef[i][j][k][l][m]; unsigned *c = s->td[0].counts.coef[i][j][k][l][m];
if (l == 0 && m >= 3) // dc only has 3 pt if (l == 0 && m >= 3) // dc only has 3 pt
break; break;
...@@ -77,32 +77,32 @@ void ff_vp9_adapt_probs(VP9Context *s) ...@@ -77,32 +77,32 @@ void ff_vp9_adapt_probs(VP9Context *s)
// skip flag // skip flag
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
adapt_prob(&p->skip[i], s->counts.skip[i][0], adapt_prob(&p->skip[i], s->td[0].counts.skip[i][0],
s->counts.skip[i][1], 20, 128); s->td[0].counts.skip[i][1], 20, 128);
// intra/inter flag // intra/inter flag
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
adapt_prob(&p->intra[i], s->counts.intra[i][0], adapt_prob(&p->intra[i], s->td[0].counts.intra[i][0],
s->counts.intra[i][1], 20, 128); s->td[0].counts.intra[i][1], 20, 128);
// comppred flag // comppred flag
if (s->s.h.comppredmode == PRED_SWITCHABLE) { if (s->s.h.comppredmode == PRED_SWITCHABLE) {
for (i = 0; i < 5; i++) for (i = 0; i < 5; i++)
adapt_prob(&p->comp[i], s->counts.comp[i][0], adapt_prob(&p->comp[i], s->td[0].counts.comp[i][0],
s->counts.comp[i][1], 20, 128); s->td[0].counts.comp[i][1], 20, 128);
} }
// reference frames // reference frames
if (s->s.h.comppredmode != PRED_SINGLEREF) { if (s->s.h.comppredmode != PRED_SINGLEREF) {
for (i = 0; i < 5; i++) for (i = 0; i < 5; i++)
adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0], adapt_prob(&p->comp_ref[i], s->td[0].counts.comp_ref[i][0],
s->counts.comp_ref[i][1], 20, 128); s->td[0].counts.comp_ref[i][1], 20, 128);
} }
if (s->s.h.comppredmode != PRED_COMPREF) { if (s->s.h.comppredmode != PRED_COMPREF) {
for (i = 0; i < 5; i++) { for (i = 0; i < 5; i++) {
uint8_t *pp = p->single_ref[i]; uint8_t *pp = p->single_ref[i];
unsigned (*c)[2] = s->counts.single_ref[i]; unsigned (*c)[2] = s->td[0].counts.single_ref[i];
adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128); adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128); adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
...@@ -113,7 +113,7 @@ void ff_vp9_adapt_probs(VP9Context *s) ...@@ -113,7 +113,7 @@ void ff_vp9_adapt_probs(VP9Context *s)
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++) { for (j = 0; j < 4; j++) {
uint8_t *pp = p->partition[i][j]; uint8_t *pp = p->partition[i][j];
unsigned *c = s->counts.partition[i][j]; unsigned *c = s->td[0].counts.partition[i][j];
adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128); adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128); adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
...@@ -123,10 +123,10 @@ void ff_vp9_adapt_probs(VP9Context *s) ...@@ -123,10 +123,10 @@ void ff_vp9_adapt_probs(VP9Context *s)
// tx size // tx size
if (s->s.h.txfmmode == TX_SWITCHABLE) { if (s->s.h.txfmmode == TX_SWITCHABLE) {
for (i = 0; i < 2; i++) { for (i = 0; i < 2; i++) {
unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i]; unsigned *c16 = s->td[0].counts.tx16p[i], *c32 = s->td[0].counts.tx32p[i];
adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], adapt_prob(&p->tx8p[i], s->td[0].counts.tx8p[i][0],
s->counts.tx8p[i][1], 20, 128); s->td[0].counts.tx8p[i][1], 20, 128);
adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128); adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128); adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128); adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
...@@ -139,7 +139,7 @@ void ff_vp9_adapt_probs(VP9Context *s) ...@@ -139,7 +139,7 @@ void ff_vp9_adapt_probs(VP9Context *s)
if (s->s.h.filtermode == FILTER_SWITCHABLE) { if (s->s.h.filtermode == FILTER_SWITCHABLE) {
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
uint8_t *pp = p->filter[i]; uint8_t *pp = p->filter[i];
unsigned *c = s->counts.filter[i]; unsigned *c = s->td[0].counts.filter[i];
adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128); adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
adapt_prob(&pp[1], c[1], c[2], 20, 128); adapt_prob(&pp[1], c[1], c[2], 20, 128);
...@@ -149,7 +149,7 @@ void ff_vp9_adapt_probs(VP9Context *s) ...@@ -149,7 +149,7 @@ void ff_vp9_adapt_probs(VP9Context *s)
// inter modes // inter modes
for (i = 0; i < 7; i++) { for (i = 0; i < 7; i++) {
uint8_t *pp = p->mv_mode[i]; uint8_t *pp = p->mv_mode[i];
unsigned *c = s->counts.mv_mode[i]; unsigned *c = s->td[0].counts.mv_mode[i];
adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128); adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128); adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
...@@ -159,7 +159,7 @@ void ff_vp9_adapt_probs(VP9Context *s) ...@@ -159,7 +159,7 @@ void ff_vp9_adapt_probs(VP9Context *s)
// mv joints // mv joints
{ {
uint8_t *pp = p->mv_joint; uint8_t *pp = p->mv_joint;
unsigned *c = s->counts.mv_joint; unsigned *c = s->td[0].counts.mv_joint;
adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128); adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128); adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
...@@ -171,11 +171,11 @@ void ff_vp9_adapt_probs(VP9Context *s) ...@@ -171,11 +171,11 @@ void ff_vp9_adapt_probs(VP9Context *s)
uint8_t *pp; uint8_t *pp;
unsigned *c, (*c2)[2], sum; unsigned *c, (*c2)[2], sum;
adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0], adapt_prob(&p->mv_comp[i].sign, s->td[0].counts.mv_comp[i].sign[0],
s->counts.mv_comp[i].sign[1], 20, 128); s->td[0].counts.mv_comp[i].sign[1], 20, 128);
pp = p->mv_comp[i].classes; pp = p->mv_comp[i].classes;
c = s->counts.mv_comp[i].classes; c = s->td[0].counts.mv_comp[i].classes;
sum = c[1] + c[2] + c[3] + c[4] + c[5] + sum = c[1] + c[2] + c[3] + c[4] + c[5] +
c[6] + c[7] + c[8] + c[9] + c[10]; c[6] + c[7] + c[8] + c[9] + c[10];
adapt_prob(&pp[0], c[0], sum, 20, 128); adapt_prob(&pp[0], c[0], sum, 20, 128);
...@@ -193,39 +193,39 @@ void ff_vp9_adapt_probs(VP9Context *s) ...@@ -193,39 +193,39 @@ void ff_vp9_adapt_probs(VP9Context *s)
adapt_prob(&pp[8], c[7], c[8], 20, 128); adapt_prob(&pp[8], c[7], c[8], 20, 128);
adapt_prob(&pp[9], c[9], c[10], 20, 128); adapt_prob(&pp[9], c[9], c[10], 20, 128);
adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0], adapt_prob(&p->mv_comp[i].class0, s->td[0].counts.mv_comp[i].class0[0],
s->counts.mv_comp[i].class0[1], 20, 128); s->td[0].counts.mv_comp[i].class0[1], 20, 128);
pp = p->mv_comp[i].bits; pp = p->mv_comp[i].bits;
c2 = s->counts.mv_comp[i].bits; c2 = s->td[0].counts.mv_comp[i].bits;
for (j = 0; j < 10; j++) for (j = 0; j < 10; j++)
adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128); adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
for (j = 0; j < 2; j++) { for (j = 0; j < 2; j++) {
pp = p->mv_comp[i].class0_fp[j]; pp = p->mv_comp[i].class0_fp[j];
c = s->counts.mv_comp[i].class0_fp[j]; c = s->td[0].counts.mv_comp[i].class0_fp[j];
adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128); adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128); adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
adapt_prob(&pp[2], c[2], c[3], 20, 128); adapt_prob(&pp[2], c[2], c[3], 20, 128);
} }
pp = p->mv_comp[i].fp; pp = p->mv_comp[i].fp;
c = s->counts.mv_comp[i].fp; c = s->td[0].counts.mv_comp[i].fp;
adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128); adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128); adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
adapt_prob(&pp[2], c[2], c[3], 20, 128); adapt_prob(&pp[2], c[2], c[3], 20, 128);
if (s->s.h.highprecisionmvs) { if (s->s.h.highprecisionmvs) {
adapt_prob(&p->mv_comp[i].class0_hp, adapt_prob(&p->mv_comp[i].class0_hp,
s->counts.mv_comp[i].class0_hp[0], s->td[0].counts.mv_comp[i].class0_hp[0],
s->counts.mv_comp[i].class0_hp[1], 20, 128); s->td[0].counts.mv_comp[i].class0_hp[1], 20, 128);
adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0], adapt_prob(&p->mv_comp[i].hp, s->td[0].counts.mv_comp[i].hp[0],
s->counts.mv_comp[i].hp[1], 20, 128); s->td[0].counts.mv_comp[i].hp[1], 20, 128);
} }
} }
// y intra modes // y intra modes
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
uint8_t *pp = p->y_mode[i]; uint8_t *pp = p->y_mode[i];
unsigned *c = s->counts.y_mode[i], sum, s2; unsigned *c = s->td[0].counts.y_mode[i], sum, s2;
sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9]; sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128); adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
...@@ -250,7 +250,7 @@ void ff_vp9_adapt_probs(VP9Context *s) ...@@ -250,7 +250,7 @@ void ff_vp9_adapt_probs(VP9Context *s)
// uv intra modes // uv intra modes
for (i = 0; i < 10; i++) { for (i = 0; i < 10; i++) {
uint8_t *pp = p->uv_mode[i]; uint8_t *pp = p->uv_mode[i];
unsigned *c = s->counts.uv_mode[i], sum, s2; unsigned *c = s->td[0].counts.uv_mode[i], sum, s2;
sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9]; sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128); adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment