Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
6d0d1c4a
Commit
6d0d1c4a
authored
Mar 27, 2017
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
vp9: split out reconstruction functions in their own source file.
parent
b823bbc1
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
674 additions
and
634 deletions
+674
-634
Makefile
libavcodec/Makefile
+1
-1
vp9_mc_template.c
libavcodec/vp9_mc_template.c
+4
-2
vp9block.c
libavcodec/vp9block.c
+12
-631
vp9data.c
libavcodec/vp9data.c
+10
-0
vp9data.h
libavcodec/vp9data.h
+1
-0
vp9dec.h
libavcodec/vp9dec.h
+7
-0
vp9recon.c
libavcodec/vp9recon.c
+639
-0
No files found.
libavcodec/Makefile
View file @
6d0d1c4a
...
...
@@ -610,7 +610,7 @@ OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp56rac.o
OBJS-$(CONFIG_VP8_CUVID_DECODER)
+=
cuvid.o
OBJS-$(CONFIG_VP8_MEDIACODEC_DECODER)
+=
mediacodecdec.o
OBJS-$(CONFIG_VP8_VAAPI_ENCODER)
+=
vaapi_encode_vp8.o
OBJS-$(CONFIG_VP9_DECODER)
+=
vp9.o
vp9data.o
vp9dsp.o
vp9lpf.o
\
OBJS-$(CONFIG_VP9_DECODER)
+=
vp9.o
vp9data.o
vp9dsp.o
vp9lpf.o
vp9recon.o
\
vp9block.o
vp9prob.o
vp9mvs.o
vp56rac.o
\
vp9dsp_8bpp.o
vp9dsp_10bpp.o
vp9dsp_12bpp.o
OBJS-$(CONFIG_VP9_CUVID_DECODER)
+=
cuvid.o
...
...
libavcodec/vp9_mc_template.c
View file @
6d0d1c4a
...
...
@@ -405,8 +405,10 @@ static void FN(inter_pred)(AVCodecContext *avctx)
}
}
else
{
int
bwl
=
bwlog_tab
[
0
][
b
->
bs
];
int
bw
=
bwh_tab
[
0
][
b
->
bs
][
0
]
*
4
,
bh
=
bwh_tab
[
0
][
b
->
bs
][
1
]
*
4
;
int
uvbw
=
bwh_tab
[
s
->
ss_h
][
b
->
bs
][
0
]
*
4
,
uvbh
=
bwh_tab
[
s
->
ss_v
][
b
->
bs
][
1
]
*
4
;
int
bw
=
ff_vp9_bwh_tab
[
0
][
b
->
bs
][
0
]
*
4
;
int
bh
=
ff_vp9_bwh_tab
[
0
][
b
->
bs
][
1
]
*
4
;
int
uvbw
=
ff_vp9_bwh_tab
[
s
->
ss_h
][
b
->
bs
][
0
]
*
4
;
int
uvbh
=
ff_vp9_bwh_tab
[
s
->
ss_v
][
b
->
bs
][
1
]
*
4
;
mc_luma_dir
(
s
,
mc
[
bwl
][
b
->
filter
][
0
],
s
->
dst
[
0
],
ls_y
,
ref1
->
data
[
0
],
ref1
->
linesize
[
0
],
tref1
,
...
...
libavcodec/vp9block.c
View file @
6d0d1c4a
...
...
@@ -31,16 +31,6 @@
#include "vp9data.h"
#include "vp9dec.h"
static
const
uint8_t
bwh_tab
[
2
][
N_BS_SIZES
][
2
]
=
{
{
{
16
,
16
},
{
16
,
8
},
{
8
,
16
},
{
8
,
8
},
{
8
,
4
},
{
4
,
8
},
{
4
,
4
},
{
4
,
2
},
{
2
,
4
},
{
2
,
2
},
{
2
,
1
},
{
1
,
2
},
{
1
,
1
},
},
{
{
8
,
8
},
{
8
,
4
},
{
4
,
8
},
{
4
,
4
},
{
4
,
2
},
{
2
,
4
},
{
2
,
2
},
{
2
,
1
},
{
1
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
}
};
static
av_always_inline
void
setctx_2d
(
uint8_t
*
ptr
,
int
w
,
int
h
,
ptrdiff_t
stride
,
int
v
)
{
...
...
@@ -103,8 +93,8 @@ static void decode_mode(AVCodecContext *avctx)
VP9Block
*
b
=
s
->
b
;
int
row
=
s
->
row
,
col
=
s
->
col
,
row7
=
s
->
row7
;
enum
TxfmMode
max_tx
=
max_tx_for_bl_bp
[
b
->
bs
];
int
bw4
=
bwh_tab
[
1
][
b
->
bs
][
0
],
w4
=
FFMIN
(
s
->
cols
-
col
,
bw4
);
int
bh4
=
bwh_tab
[
1
][
b
->
bs
][
1
],
h4
=
FFMIN
(
s
->
rows
-
row
,
bh4
),
y
;
int
bw4
=
ff_vp9_
bwh_tab
[
1
][
b
->
bs
][
0
],
w4
=
FFMIN
(
s
->
cols
-
col
,
bw4
);
int
bh4
=
ff_vp9_
bwh_tab
[
1
][
b
->
bs
][
1
],
h4
=
FFMIN
(
s
->
rows
-
row
,
bh4
),
y
;
int
have_a
=
row
>
0
,
have_l
=
col
>
s
->
tile_col_start
;
int
vref
,
filter_id
;
...
...
@@ -272,8 +262,8 @@ static void decode_mode(AVCodecContext *avctx)
b
->
mode
[
2
]
=
b
->
mode
[
1
]
=
b
->
mode
[
0
];
// FIXME this can probably be optimized
memset
(
a
,
b
->
mode
[
0
],
bwh_tab
[
0
][
b
->
bs
][
0
]);
memset
(
l
,
b
->
mode
[
0
],
bwh_tab
[
0
][
b
->
bs
][
1
]);
memset
(
a
,
b
->
mode
[
0
],
ff_vp9_
bwh_tab
[
0
][
b
->
bs
][
0
]);
memset
(
l
,
b
->
mode
[
0
],
ff_vp9_
bwh_tab
[
0
][
b
->
bs
][
1
]);
}
b
->
uvmode
=
vp8_rac_get_tree
(
&
s
->
c
,
ff_vp9_intramode_tree
,
ff_vp9_default_kf_uvmode_probs
[
b
->
mode
[
3
]]);
...
...
@@ -725,7 +715,7 @@ static void decode_mode(AVCodecContext *avctx)
}
#endif
switch
(
bwh_tab
[
1
][
b
->
bs
][
0
])
{
switch
(
ff_vp9_
bwh_tab
[
1
][
b
->
bs
][
0
])
{
#define SET_CTXS(dir, off, n) \
do { \
SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
...
...
@@ -748,7 +738,7 @@ static void decode_mode(AVCodecContext *avctx)
case
4
:
SET_CTXS
(
above
,
col
,
4
);
break
;
case
8
:
SET_CTXS
(
above
,
col
,
8
);
break
;
}
switch
(
bwh_tab
[
1
][
b
->
bs
][
1
])
{
switch
(
ff_vp9_
bwh_tab
[
1
][
b
->
bs
][
1
])
{
case
1
:
SET_CTXS
(
left
,
row7
,
1
);
break
;
case
2
:
SET_CTXS
(
left
,
row7
,
2
);
break
;
case
4
:
SET_CTXS
(
left
,
row7
,
4
);
break
;
...
...
@@ -983,7 +973,7 @@ static av_always_inline int decode_coeffs(AVCodecContext *avctx, int is8bitsperp
uint8_t
(
*
p
)[
6
][
11
]
=
s
->
prob
.
coef
[
b
->
tx
][
0
/* y */
][
!
b
->
intra
];
unsigned
(
*
c
)[
6
][
3
]
=
s
->
counts
.
coef
[
b
->
tx
][
0
/* y */
][
!
b
->
intra
];
unsigned
(
*
e
)[
6
][
2
]
=
s
->
counts
.
eob
[
b
->
tx
][
0
/* y */
][
!
b
->
intra
];
int
w4
=
bwh_tab
[
1
][
b
->
bs
][
0
]
<<
1
,
h4
=
bwh_tab
[
1
][
b
->
bs
][
1
]
<<
1
;
int
w4
=
ff_vp9_bwh_tab
[
1
][
b
->
bs
][
0
]
<<
1
,
h4
=
ff_vp9_
bwh_tab
[
1
][
b
->
bs
][
1
]
<<
1
;
int
end_x
=
FFMIN
(
2
*
(
s
->
cols
-
col
),
w4
);
int
end_y
=
FFMIN
(
2
*
(
s
->
rows
-
row
),
h4
);
int
n
,
pl
,
x
,
y
,
ret
;
...
...
@@ -1152,615 +1142,6 @@ static int decode_coeffs_16bpp(AVCodecContext *avctx)
return
decode_coeffs
(
avctx
,
0
);
}
static
av_always_inline
int
check_intra_mode
(
VP9Context
*
s
,
int
mode
,
uint8_t
**
a
,
uint8_t
*
dst_edge
,
ptrdiff_t
stride_edge
,
uint8_t
*
dst_inner
,
ptrdiff_t
stride_inner
,
uint8_t
*
l
,
int
col
,
int
x
,
int
w
,
int
row
,
int
y
,
enum
TxfmMode
tx
,
int
p
,
int
ss_h
,
int
ss_v
,
int
bytesperpixel
)
{
int
have_top
=
row
>
0
||
y
>
0
;
int
have_left
=
col
>
s
->
tile_col_start
||
x
>
0
;
int
have_right
=
x
<
w
-
1
;
int
bpp
=
s
->
s
.
h
.
bpp
;
static
const
uint8_t
mode_conv
[
10
][
2
/* have_left */
][
2
/* have_top */
]
=
{
[
VERT_PRED
]
=
{
{
DC_127_PRED
,
VERT_PRED
},
{
DC_127_PRED
,
VERT_PRED
}
},
[
HOR_PRED
]
=
{
{
DC_129_PRED
,
DC_129_PRED
},
{
HOR_PRED
,
HOR_PRED
}
},
[
DC_PRED
]
=
{
{
DC_128_PRED
,
TOP_DC_PRED
},
{
LEFT_DC_PRED
,
DC_PRED
}
},
[
DIAG_DOWN_LEFT_PRED
]
=
{
{
DC_127_PRED
,
DIAG_DOWN_LEFT_PRED
},
{
DC_127_PRED
,
DIAG_DOWN_LEFT_PRED
}
},
[
DIAG_DOWN_RIGHT_PRED
]
=
{
{
DIAG_DOWN_RIGHT_PRED
,
DIAG_DOWN_RIGHT_PRED
},
{
DIAG_DOWN_RIGHT_PRED
,
DIAG_DOWN_RIGHT_PRED
}
},
[
VERT_RIGHT_PRED
]
=
{
{
VERT_RIGHT_PRED
,
VERT_RIGHT_PRED
},
{
VERT_RIGHT_PRED
,
VERT_RIGHT_PRED
}
},
[
HOR_DOWN_PRED
]
=
{
{
HOR_DOWN_PRED
,
HOR_DOWN_PRED
},
{
HOR_DOWN_PRED
,
HOR_DOWN_PRED
}
},
[
VERT_LEFT_PRED
]
=
{
{
DC_127_PRED
,
VERT_LEFT_PRED
},
{
DC_127_PRED
,
VERT_LEFT_PRED
}
},
[
HOR_UP_PRED
]
=
{
{
DC_129_PRED
,
DC_129_PRED
},
{
HOR_UP_PRED
,
HOR_UP_PRED
}
},
[
TM_VP8_PRED
]
=
{
{
DC_129_PRED
,
VERT_PRED
},
{
HOR_PRED
,
TM_VP8_PRED
}
},
};
static
const
struct
{
uint8_t
needs_left
:
1
;
uint8_t
needs_top
:
1
;
uint8_t
needs_topleft
:
1
;
uint8_t
needs_topright
:
1
;
uint8_t
invert_left
:
1
;
}
edges
[
N_INTRA_PRED_MODES
]
=
{
[
VERT_PRED
]
=
{
.
needs_top
=
1
},
[
HOR_PRED
]
=
{
.
needs_left
=
1
},
[
DC_PRED
]
=
{
.
needs_top
=
1
,
.
needs_left
=
1
},
[
DIAG_DOWN_LEFT_PRED
]
=
{
.
needs_top
=
1
,
.
needs_topright
=
1
},
[
DIAG_DOWN_RIGHT_PRED
]
=
{
.
needs_left
=
1
,
.
needs_top
=
1
,
.
needs_topleft
=
1
},
[
VERT_RIGHT_PRED
]
=
{
.
needs_left
=
1
,
.
needs_top
=
1
,
.
needs_topleft
=
1
},
[
HOR_DOWN_PRED
]
=
{
.
needs_left
=
1
,
.
needs_top
=
1
,
.
needs_topleft
=
1
},
[
VERT_LEFT_PRED
]
=
{
.
needs_top
=
1
,
.
needs_topright
=
1
},
[
HOR_UP_PRED
]
=
{
.
needs_left
=
1
,
.
invert_left
=
1
},
[
TM_VP8_PRED
]
=
{
.
needs_left
=
1
,
.
needs_top
=
1
,
.
needs_topleft
=
1
},
[
LEFT_DC_PRED
]
=
{
.
needs_left
=
1
},
[
TOP_DC_PRED
]
=
{
.
needs_top
=
1
},
[
DC_128_PRED
]
=
{
0
},
[
DC_127_PRED
]
=
{
0
},
[
DC_129_PRED
]
=
{
0
}
};
av_assert2
(
mode
>=
0
&&
mode
<
10
);
mode
=
mode_conv
[
mode
][
have_left
][
have_top
];
if
(
edges
[
mode
].
needs_top
)
{
uint8_t
*
top
,
*
topleft
;
int
n_px_need
=
4
<<
tx
,
n_px_have
=
(((
s
->
cols
-
col
)
<<
!
ss_h
)
-
x
)
*
4
;
int
n_px_need_tr
=
0
;
if
(
tx
==
TX_4X4
&&
edges
[
mode
].
needs_topright
&&
have_right
)
n_px_need_tr
=
4
;
// if top of sb64-row, use s->intra_pred_data[] instead of
// dst[-stride] for intra prediction (it contains pre- instead of
// post-loopfilter data)
if
(
have_top
)
{
top
=
!
(
row
&
7
)
&&
!
y
?
s
->
intra_pred_data
[
p
]
+
(
col
*
(
8
>>
ss_h
)
+
x
*
4
)
*
bytesperpixel
:
y
==
0
?
&
dst_edge
[
-
stride_edge
]
:
&
dst_inner
[
-
stride_inner
];
if
(
have_left
)
topleft
=
!
(
row
&
7
)
&&
!
y
?
s
->
intra_pred_data
[
p
]
+
(
col
*
(
8
>>
ss_h
)
+
x
*
4
)
*
bytesperpixel
:
y
==
0
||
x
==
0
?
&
dst_edge
[
-
stride_edge
]
:
&
dst_inner
[
-
stride_inner
];
}
if
(
have_top
&&
(
!
edges
[
mode
].
needs_topleft
||
(
have_left
&&
top
==
topleft
))
&&
(
tx
!=
TX_4X4
||
!
edges
[
mode
].
needs_topright
||
have_right
)
&&
n_px_need
+
n_px_need_tr
<=
n_px_have
)
{
*
a
=
top
;
}
else
{
if
(
have_top
)
{
if
(
n_px_need
<=
n_px_have
)
{
memcpy
(
*
a
,
top
,
n_px_need
*
bytesperpixel
);
}
else
{
#define memset_bpp(c, i1, v, i2, num) do { \
if (bytesperpixel == 1) { \
memset(&(c)[(i1)], (v)[(i2)], (num)); \
} else { \
int n, val = AV_RN16A(&(v)[(i2) * 2]); \
for (n = 0; n < (num); n++) { \
AV_WN16A(&(c)[((i1) + n) * 2], val); \
} \
} \
} while (0)
memcpy
(
*
a
,
top
,
n_px_have
*
bytesperpixel
);
memset_bpp
(
*
a
,
n_px_have
,
(
*
a
),
n_px_have
-
1
,
n_px_need
-
n_px_have
);
}
}
else
{
#define memset_val(c, val, num) do { \
if (bytesperpixel == 1) { \
memset((c), (val), (num)); \
} else { \
int n; \
for (n = 0; n < (num); n++) { \
AV_WN16A(&(c)[n * 2], (val)); \
} \
} \
} while (0)
memset_val
(
*
a
,
(
128
<<
(
bpp
-
8
))
-
1
,
n_px_need
);
}
if
(
edges
[
mode
].
needs_topleft
)
{
if
(
have_left
&&
have_top
)
{
#define assign_bpp(c, i1, v, i2) do { \
if (bytesperpixel == 1) { \
(c)[(i1)] = (v)[(i2)]; \
} else { \
AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
} \
} while (0)
assign_bpp
(
*
a
,
-
1
,
topleft
,
-
1
);
}
else
{
#define assign_val(c, i, v) do { \
if (bytesperpixel == 1) { \
(c)[(i)] = (v); \
} else { \
AV_WN16A(&(c)[(i) * 2], (v)); \
} \
} while (0)
assign_val
((
*
a
),
-
1
,
(
128
<<
(
bpp
-
8
))
+
(
have_top
?
+
1
:
-
1
));
}
}
if
(
tx
==
TX_4X4
&&
edges
[
mode
].
needs_topright
)
{
if
(
have_top
&&
have_right
&&
n_px_need
+
n_px_need_tr
<=
n_px_have
)
{
memcpy
(
&
(
*
a
)[
4
*
bytesperpixel
],
&
top
[
4
*
bytesperpixel
],
4
*
bytesperpixel
);
}
else
{
memset_bpp
(
*
a
,
4
,
*
a
,
3
,
4
);
}
}
}
}
if
(
edges
[
mode
].
needs_left
)
{
if
(
have_left
)
{
int
n_px_need
=
4
<<
tx
,
i
,
n_px_have
=
(((
s
->
rows
-
row
)
<<
!
ss_v
)
-
y
)
*
4
;
uint8_t
*
dst
=
x
==
0
?
dst_edge
:
dst_inner
;
ptrdiff_t
stride
=
x
==
0
?
stride_edge
:
stride_inner
;
if
(
edges
[
mode
].
invert_left
)
{
if
(
n_px_need
<=
n_px_have
)
{
for
(
i
=
0
;
i
<
n_px_need
;
i
++
)
assign_bpp
(
l
,
i
,
&
dst
[
i
*
stride
],
-
1
);
}
else
{
for
(
i
=
0
;
i
<
n_px_have
;
i
++
)
assign_bpp
(
l
,
i
,
&
dst
[
i
*
stride
],
-
1
);
memset_bpp
(
l
,
n_px_have
,
l
,
n_px_have
-
1
,
n_px_need
-
n_px_have
);
}
}
else
{
if
(
n_px_need
<=
n_px_have
)
{
for
(
i
=
0
;
i
<
n_px_need
;
i
++
)
assign_bpp
(
l
,
n_px_need
-
1
-
i
,
&
dst
[
i
*
stride
],
-
1
);
}
else
{
for
(
i
=
0
;
i
<
n_px_have
;
i
++
)
assign_bpp
(
l
,
n_px_need
-
1
-
i
,
&
dst
[
i
*
stride
],
-
1
);
memset_bpp
(
l
,
0
,
l
,
n_px_need
-
n_px_have
,
n_px_need
-
n_px_have
);
}
}
}
else
{
memset_val
(
l
,
(
128
<<
(
bpp
-
8
))
+
1
,
4
<<
tx
);
}
}
return
mode
;
}
static
av_always_inline
void
intra_recon
(
AVCodecContext
*
avctx
,
ptrdiff_t
y_off
,
ptrdiff_t
uv_off
,
int
bytesperpixel
)
{
VP9Context
*
s
=
avctx
->
priv_data
;
VP9Block
*
b
=
s
->
b
;
int
row
=
s
->
row
,
col
=
s
->
col
;
int
w4
=
bwh_tab
[
1
][
b
->
bs
][
0
]
<<
1
,
step1d
=
1
<<
b
->
tx
,
n
;
int
h4
=
bwh_tab
[
1
][
b
->
bs
][
1
]
<<
1
,
x
,
y
,
step
=
1
<<
(
b
->
tx
*
2
);
int
end_x
=
FFMIN
(
2
*
(
s
->
cols
-
col
),
w4
);
int
end_y
=
FFMIN
(
2
*
(
s
->
rows
-
row
),
h4
);
int
tx
=
4
*
s
->
s
.
h
.
lossless
+
b
->
tx
,
uvtx
=
b
->
uvtx
+
4
*
s
->
s
.
h
.
lossless
;
int
uvstep1d
=
1
<<
b
->
uvtx
,
p
;
uint8_t
*
dst
=
s
->
dst
[
0
],
*
dst_r
=
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
data
[
0
]
+
y_off
;
LOCAL_ALIGNED_32
(
uint8_t
,
a_buf
,
[
96
]);
LOCAL_ALIGNED_32
(
uint8_t
,
l
,
[
64
]);
for
(
n
=
0
,
y
=
0
;
y
<
end_y
;
y
+=
step1d
)
{
uint8_t
*
ptr
=
dst
,
*
ptr_r
=
dst_r
;
for
(
x
=
0
;
x
<
end_x
;
x
+=
step1d
,
ptr
+=
4
*
step1d
*
bytesperpixel
,
ptr_r
+=
4
*
step1d
*
bytesperpixel
,
n
+=
step
)
{
int
mode
=
b
->
mode
[
b
->
bs
>
BS_8x8
&&
b
->
tx
==
TX_4X4
?
y
*
2
+
x
:
0
];
uint8_t
*
a
=
&
a_buf
[
32
];
enum
TxfmType
txtp
=
ff_vp9_intra_txfm_type
[
mode
];
int
eob
=
b
->
skip
?
0
:
b
->
tx
>
TX_8X8
?
AV_RN16A
(
&
s
->
eob
[
n
])
:
s
->
eob
[
n
];
mode
=
check_intra_mode
(
s
,
mode
,
&
a
,
ptr_r
,
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
linesize
[
0
],
ptr
,
s
->
y_stride
,
l
,
col
,
x
,
w4
,
row
,
y
,
b
->
tx
,
0
,
0
,
0
,
bytesperpixel
);
s
->
dsp
.
intra_pred
[
b
->
tx
][
mode
](
ptr
,
s
->
y_stride
,
l
,
a
);
if
(
eob
)
s
->
dsp
.
itxfm_add
[
tx
][
txtp
](
ptr
,
s
->
y_stride
,
s
->
block
+
16
*
n
*
bytesperpixel
,
eob
);
}
dst_r
+=
4
*
step1d
*
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
linesize
[
0
];
dst
+=
4
*
step1d
*
s
->
y_stride
;
}
// U/V
w4
>>=
s
->
ss_h
;
end_x
>>=
s
->
ss_h
;
end_y
>>=
s
->
ss_v
;
step
=
1
<<
(
b
->
uvtx
*
2
);
for
(
p
=
0
;
p
<
2
;
p
++
)
{
dst
=
s
->
dst
[
1
+
p
];
dst_r
=
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
data
[
1
+
p
]
+
uv_off
;
for
(
n
=
0
,
y
=
0
;
y
<
end_y
;
y
+=
uvstep1d
)
{
uint8_t
*
ptr
=
dst
,
*
ptr_r
=
dst_r
;
for
(
x
=
0
;
x
<
end_x
;
x
+=
uvstep1d
,
ptr
+=
4
*
uvstep1d
*
bytesperpixel
,
ptr_r
+=
4
*
uvstep1d
*
bytesperpixel
,
n
+=
step
)
{
int
mode
=
b
->
uvmode
;
uint8_t
*
a
=
&
a_buf
[
32
];
int
eob
=
b
->
skip
?
0
:
b
->
uvtx
>
TX_8X8
?
AV_RN16A
(
&
s
->
uveob
[
p
][
n
])
:
s
->
uveob
[
p
][
n
];
mode
=
check_intra_mode
(
s
,
mode
,
&
a
,
ptr_r
,
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
linesize
[
1
],
ptr
,
s
->
uv_stride
,
l
,
col
,
x
,
w4
,
row
,
y
,
b
->
uvtx
,
p
+
1
,
s
->
ss_h
,
s
->
ss_v
,
bytesperpixel
);
s
->
dsp
.
intra_pred
[
b
->
uvtx
][
mode
](
ptr
,
s
->
uv_stride
,
l
,
a
);
if
(
eob
)
s
->
dsp
.
itxfm_add
[
uvtx
][
DCT_DCT
](
ptr
,
s
->
uv_stride
,
s
->
uvblock
[
p
]
+
16
*
n
*
bytesperpixel
,
eob
);
}
dst_r
+=
4
*
uvstep1d
*
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
linesize
[
1
];
dst
+=
4
*
uvstep1d
*
s
->
uv_stride
;
}
}
}
static
void
intra_recon_8bpp
(
AVCodecContext
*
avctx
,
ptrdiff_t
y_off
,
ptrdiff_t
uv_off
)
{
intra_recon
(
avctx
,
y_off
,
uv_off
,
1
);
}
static
void
intra_recon_16bpp
(
AVCodecContext
*
avctx
,
ptrdiff_t
y_off
,
ptrdiff_t
uv_off
)
{
intra_recon
(
avctx
,
y_off
,
uv_off
,
2
);
}
static
av_always_inline
void
mc_luma_unscaled
(
VP9Context
*
s
,
vp9_mc_func
(
*
mc
)[
2
],
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
uint8_t
*
ref
,
ptrdiff_t
ref_stride
,
ThreadFrame
*
ref_frame
,
ptrdiff_t
y
,
ptrdiff_t
x
,
const
VP56mv
*
mv
,
int
bw
,
int
bh
,
int
w
,
int
h
,
int
bytesperpixel
)
{
int
mx
=
mv
->
x
,
my
=
mv
->
y
,
th
;
y
+=
my
>>
3
;
x
+=
mx
>>
3
;
ref
+=
y
*
ref_stride
+
x
*
bytesperpixel
;
mx
&=
7
;
my
&=
7
;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th
=
(
y
+
bh
+
4
*
!!
my
+
7
)
>>
6
;
ff_thread_await_progress
(
ref_frame
,
FFMAX
(
th
,
0
),
0
);
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (!!my * 5) than horizontally (!!mx * 4).
if
(
x
<
!!
mx
*
3
||
y
<
!!
my
*
3
||
x
+
!!
mx
*
4
>
w
-
bw
||
y
+
!!
my
*
5
>
h
-
bh
)
{
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref
-
!!
my
*
3
*
ref_stride
-
!!
mx
*
3
*
bytesperpixel
,
160
,
ref_stride
,
bw
+
!!
mx
*
7
,
bh
+
!!
my
*
7
,
x
-
!!
mx
*
3
,
y
-
!!
my
*
3
,
w
,
h
);
ref
=
s
->
edge_emu_buffer
+
!!
my
*
3
*
160
+
!!
mx
*
3
*
bytesperpixel
;
ref_stride
=
160
;
}
mc
[
!!
mx
][
!!
my
](
dst
,
dst_stride
,
ref
,
ref_stride
,
bh
,
mx
<<
1
,
my
<<
1
);
}
static
av_always_inline
void
mc_chroma_unscaled
(
VP9Context
*
s
,
vp9_mc_func
(
*
mc
)[
2
],
uint8_t
*
dst_u
,
uint8_t
*
dst_v
,
ptrdiff_t
dst_stride
,
const
uint8_t
*
ref_u
,
ptrdiff_t
src_stride_u
,
const
uint8_t
*
ref_v
,
ptrdiff_t
src_stride_v
,
ThreadFrame
*
ref_frame
,
ptrdiff_t
y
,
ptrdiff_t
x
,
const
VP56mv
*
mv
,
int
bw
,
int
bh
,
int
w
,
int
h
,
int
bytesperpixel
)
{
int
mx
=
mv
->
x
*
(
1
<<
!
s
->
ss_h
),
my
=
mv
->
y
*
(
1
<<
!
s
->
ss_v
),
th
;
y
+=
my
>>
4
;
x
+=
mx
>>
4
;
ref_u
+=
y
*
src_stride_u
+
x
*
bytesperpixel
;
ref_v
+=
y
*
src_stride_v
+
x
*
bytesperpixel
;
mx
&=
15
;
my
&=
15
;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th
=
(
y
+
bh
+
4
*
!!
my
+
7
)
>>
(
6
-
s
->
ss_v
);
ff_thread_await_progress
(
ref_frame
,
FFMAX
(
th
,
0
),
0
);
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (!!my * 5) than horizontally (!!mx * 4).
if
(
x
<
!!
mx
*
3
||
y
<
!!
my
*
3
||
x
+
!!
mx
*
4
>
w
-
bw
||
y
+
!!
my
*
5
>
h
-
bh
)
{
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref_u
-
!!
my
*
3
*
src_stride_u
-
!!
mx
*
3
*
bytesperpixel
,
160
,
src_stride_u
,
bw
+
!!
mx
*
7
,
bh
+
!!
my
*
7
,
x
-
!!
mx
*
3
,
y
-
!!
my
*
3
,
w
,
h
);
ref_u
=
s
->
edge_emu_buffer
+
!!
my
*
3
*
160
+
!!
mx
*
3
*
bytesperpixel
;
mc
[
!!
mx
][
!!
my
](
dst_u
,
dst_stride
,
ref_u
,
160
,
bh
,
mx
,
my
);
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref_v
-
!!
my
*
3
*
src_stride_v
-
!!
mx
*
3
*
bytesperpixel
,
160
,
src_stride_v
,
bw
+
!!
mx
*
7
,
bh
+
!!
my
*
7
,
x
-
!!
mx
*
3
,
y
-
!!
my
*
3
,
w
,
h
);
ref_v
=
s
->
edge_emu_buffer
+
!!
my
*
3
*
160
+
!!
mx
*
3
*
bytesperpixel
;
mc
[
!!
mx
][
!!
my
](
dst_v
,
dst_stride
,
ref_v
,
160
,
bh
,
mx
,
my
);
}
else
{
mc
[
!!
mx
][
!!
my
](
dst_u
,
dst_stride
,
ref_u
,
src_stride_u
,
bh
,
mx
,
my
);
mc
[
!!
mx
][
!!
my
](
dst_v
,
dst_stride
,
ref_v
,
src_stride_v
,
bh
,
mx
,
my
);
}
}
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, bw, bh, w, h, bytesperpixel)
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, bytesperpixel)
#define SCALED 0
#define FN(x) x##_8bpp
#define BYTES_PER_PIXEL 1
#include "vp9_mc_template.c"
#undef FN
#undef BYTES_PER_PIXEL
#define FN(x) x##_16bpp
#define BYTES_PER_PIXEL 2
#include "vp9_mc_template.c"
#undef mc_luma_dir
#undef mc_chroma_dir
#undef FN
#undef BYTES_PER_PIXEL
#undef SCALED
static
av_always_inline
void
mc_luma_scaled
(
VP9Context
*
s
,
vp9_scaled_mc_func
smc
,
vp9_mc_func
(
*
mc
)[
2
],
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
uint8_t
*
ref
,
ptrdiff_t
ref_stride
,
ThreadFrame
*
ref_frame
,
ptrdiff_t
y
,
ptrdiff_t
x
,
const
VP56mv
*
in_mv
,
int
px
,
int
py
,
int
pw
,
int
ph
,
int
bw
,
int
bh
,
int
w
,
int
h
,
int
bytesperpixel
,
const
uint16_t
*
scale
,
const
uint8_t
*
step
)
{
if
(
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
width
==
ref_frame
->
f
->
width
&&
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
height
==
ref_frame
->
f
->
height
)
{
mc_luma_unscaled
(
s
,
mc
,
dst
,
dst_stride
,
ref
,
ref_stride
,
ref_frame
,
y
,
x
,
in_mv
,
bw
,
bh
,
w
,
h
,
bytesperpixel
);
}
else
{
#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
int
mx
,
my
;
int
refbw_m1
,
refbh_m1
;
int
th
;
VP56mv
mv
;
mv
.
x
=
av_clip
(
in_mv
->
x
,
-
(
x
+
pw
-
px
+
4
)
*
8
,
(
s
->
cols
*
8
-
x
+
px
+
3
)
*
8
);
mv
.
y
=
av_clip
(
in_mv
->
y
,
-
(
y
+
ph
-
py
+
4
)
*
8
,
(
s
->
rows
*
8
-
y
+
py
+
3
)
*
8
);
// BUG libvpx seems to scale the two components separately. This introduces
// rounding errors but we have to reproduce them to be exactly compatible
// with the output from libvpx...
mx
=
scale_mv
(
mv
.
x
*
2
,
0
)
+
scale_mv
(
x
*
16
,
0
);
my
=
scale_mv
(
mv
.
y
*
2
,
1
)
+
scale_mv
(
y
*
16
,
1
);
y
=
my
>>
4
;
x
=
mx
>>
4
;
ref
+=
y
*
ref_stride
+
x
*
bytesperpixel
;
mx
&=
15
;
my
&=
15
;
refbw_m1
=
((
bw
-
1
)
*
step
[
0
]
+
mx
)
>>
4
;
refbh_m1
=
((
bh
-
1
)
*
step
[
1
]
+
my
)
>>
4
;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th
=
(
y
+
refbh_m1
+
4
+
7
)
>>
6
;
ff_thread_await_progress
(
ref_frame
,
FFMAX
(
th
,
0
),
0
);
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
if
(
x
<
3
||
y
<
3
||
x
+
4
>=
w
-
refbw_m1
||
y
+
5
>=
h
-
refbh_m1
)
{
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref
-
3
*
ref_stride
-
3
*
bytesperpixel
,
288
,
ref_stride
,
refbw_m1
+
8
,
refbh_m1
+
8
,
x
-
3
,
y
-
3
,
w
,
h
);
ref
=
s
->
edge_emu_buffer
+
3
*
288
+
3
*
bytesperpixel
;
ref_stride
=
288
;
}
smc
(
dst
,
dst_stride
,
ref
,
ref_stride
,
bh
,
mx
,
my
,
step
[
0
],
step
[
1
]);
}
}
static
av_always_inline
void
mc_chroma_scaled
(
VP9Context
*
s
,
vp9_scaled_mc_func
smc
,
vp9_mc_func
(
*
mc
)[
2
],
uint8_t
*
dst_u
,
uint8_t
*
dst_v
,
ptrdiff_t
dst_stride
,
const
uint8_t
*
ref_u
,
ptrdiff_t
src_stride_u
,
const
uint8_t
*
ref_v
,
ptrdiff_t
src_stride_v
,
ThreadFrame
*
ref_frame
,
ptrdiff_t
y
,
ptrdiff_t
x
,
const
VP56mv
*
in_mv
,
int
px
,
int
py
,
int
pw
,
int
ph
,
int
bw
,
int
bh
,
int
w
,
int
h
,
int
bytesperpixel
,
const
uint16_t
*
scale
,
const
uint8_t
*
step
)
{
if
(
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
width
==
ref_frame
->
f
->
width
&&
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
height
==
ref_frame
->
f
->
height
)
{
mc_chroma_unscaled
(
s
,
mc
,
dst_u
,
dst_v
,
dst_stride
,
ref_u
,
src_stride_u
,
ref_v
,
src_stride_v
,
ref_frame
,
y
,
x
,
in_mv
,
bw
,
bh
,
w
,
h
,
bytesperpixel
);
}
else
{
int
mx
,
my
;
int
refbw_m1
,
refbh_m1
;
int
th
;
VP56mv
mv
;
if
(
s
->
ss_h
)
{
// BUG https://code.google.com/p/webm/issues/detail?id=820
mv
.
x
=
av_clip
(
in_mv
->
x
,
-
(
x
+
pw
-
px
+
4
)
*
16
,
(
s
->
cols
*
4
-
x
+
px
+
3
)
*
16
);
mx
=
scale_mv
(
mv
.
x
,
0
)
+
(
scale_mv
(
x
*
16
,
0
)
&
~
15
)
+
(
scale_mv
(
x
*
32
,
0
)
&
15
);
}
else
{
mv
.
x
=
av_clip
(
in_mv
->
x
,
-
(
x
+
pw
-
px
+
4
)
*
8
,
(
s
->
cols
*
8
-
x
+
px
+
3
)
*
8
);
mx
=
scale_mv
(
mv
.
x
*
2
,
0
)
+
scale_mv
(
x
*
16
,
0
);
}
if
(
s
->
ss_v
)
{
// BUG https://code.google.com/p/webm/issues/detail?id=820
mv
.
y
=
av_clip
(
in_mv
->
y
,
-
(
y
+
ph
-
py
+
4
)
*
16
,
(
s
->
rows
*
4
-
y
+
py
+
3
)
*
16
);
my
=
scale_mv
(
mv
.
y
,
1
)
+
(
scale_mv
(
y
*
16
,
1
)
&
~
15
)
+
(
scale_mv
(
y
*
32
,
1
)
&
15
);
}
else
{
mv
.
y
=
av_clip
(
in_mv
->
y
,
-
(
y
+
ph
-
py
+
4
)
*
8
,
(
s
->
rows
*
8
-
y
+
py
+
3
)
*
8
);
my
=
scale_mv
(
mv
.
y
*
2
,
1
)
+
scale_mv
(
y
*
16
,
1
);
}
#undef scale_mv
y
=
my
>>
4
;
x
=
mx
>>
4
;
ref_u
+=
y
*
src_stride_u
+
x
*
bytesperpixel
;
ref_v
+=
y
*
src_stride_v
+
x
*
bytesperpixel
;
mx
&=
15
;
my
&=
15
;
refbw_m1
=
((
bw
-
1
)
*
step
[
0
]
+
mx
)
>>
4
;
refbh_m1
=
((
bh
-
1
)
*
step
[
1
]
+
my
)
>>
4
;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th
=
(
y
+
refbh_m1
+
4
+
7
)
>>
(
6
-
s
->
ss_v
);
ff_thread_await_progress
(
ref_frame
,
FFMAX
(
th
,
0
),
0
);
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
if
(
x
<
3
||
y
<
3
||
x
+
4
>=
w
-
refbw_m1
||
y
+
5
>=
h
-
refbh_m1
)
{
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref_u
-
3
*
src_stride_u
-
3
*
bytesperpixel
,
288
,
src_stride_u
,
refbw_m1
+
8
,
refbh_m1
+
8
,
x
-
3
,
y
-
3
,
w
,
h
);
ref_u
=
s
->
edge_emu_buffer
+
3
*
288
+
3
*
bytesperpixel
;
smc
(
dst_u
,
dst_stride
,
ref_u
,
288
,
bh
,
mx
,
my
,
step
[
0
],
step
[
1
]);
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref_v
-
3
*
src_stride_v
-
3
*
bytesperpixel
,
288
,
src_stride_v
,
refbw_m1
+
8
,
refbh_m1
+
8
,
x
-
3
,
y
-
3
,
w
,
h
);
ref_v
=
s
->
edge_emu_buffer
+
3
*
288
+
3
*
bytesperpixel
;
smc
(
dst_v
,
dst_stride
,
ref_v
,
288
,
bh
,
mx
,
my
,
step
[
0
],
step
[
1
]);
}
else
{
smc
(
dst_u
,
dst_stride
,
ref_u
,
src_stride_u
,
bh
,
mx
,
my
,
step
[
0
],
step
[
1
]);
smc
(
dst_v
,
dst_stride
,
ref_v
,
src_stride_v
,
bh
,
mx
,
my
,
step
[
0
],
step
[
1
]);
}
}
}
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define SCALED 1
#define FN(x) x##_scaled_8bpp
#define BYTES_PER_PIXEL 1
#include "vp9_mc_template.c"
#undef FN
#undef BYTES_PER_PIXEL
#define FN(x) x##_scaled_16bpp
#define BYTES_PER_PIXEL 2
#include "vp9_mc_template.c"
#undef mc_luma_dir
#undef mc_chroma_dir
#undef FN
#undef BYTES_PER_PIXEL
#undef SCALED
static
av_always_inline
void
inter_recon
(
AVCodecContext
*
avctx
,
int
bytesperpixel
)
{
VP9Context
*
s
=
avctx
->
priv_data
;
VP9Block
*
b
=
s
->
b
;
int
row
=
s
->
row
,
col
=
s
->
col
;
if
(
s
->
mvscale
[
b
->
ref
[
0
]][
0
]
||
(
b
->
comp
&&
s
->
mvscale
[
b
->
ref
[
1
]][
0
]))
{
if
(
bytesperpixel
==
1
)
{
inter_pred_scaled_8bpp
(
avctx
);
}
else
{
inter_pred_scaled_16bpp
(
avctx
);
}
}
else
{
if
(
bytesperpixel
==
1
)
{
inter_pred_8bpp
(
avctx
);
}
else
{
inter_pred_16bpp
(
avctx
);
}
}
if
(
!
b
->
skip
)
{
/* mostly copied intra_recon() */
int
w4
=
bwh_tab
[
1
][
b
->
bs
][
0
]
<<
1
,
step1d
=
1
<<
b
->
tx
,
n
;
int
h4
=
bwh_tab
[
1
][
b
->
bs
][
1
]
<<
1
,
x
,
y
,
step
=
1
<<
(
b
->
tx
*
2
);
int
end_x
=
FFMIN
(
2
*
(
s
->
cols
-
col
),
w4
);
int
end_y
=
FFMIN
(
2
*
(
s
->
rows
-
row
),
h4
);
int
tx
=
4
*
s
->
s
.
h
.
lossless
+
b
->
tx
,
uvtx
=
b
->
uvtx
+
4
*
s
->
s
.
h
.
lossless
;
int
uvstep1d
=
1
<<
b
->
uvtx
,
p
;
uint8_t
*
dst
=
s
->
dst
[
0
];
// y itxfm add
for
(
n
=
0
,
y
=
0
;
y
<
end_y
;
y
+=
step1d
)
{
uint8_t
*
ptr
=
dst
;
for
(
x
=
0
;
x
<
end_x
;
x
+=
step1d
,
ptr
+=
4
*
step1d
*
bytesperpixel
,
n
+=
step
)
{
int
eob
=
b
->
tx
>
TX_8X8
?
AV_RN16A
(
&
s
->
eob
[
n
])
:
s
->
eob
[
n
];
if
(
eob
)
s
->
dsp
.
itxfm_add
[
tx
][
DCT_DCT
](
ptr
,
s
->
y_stride
,
s
->
block
+
16
*
n
*
bytesperpixel
,
eob
);
}
dst
+=
4
*
s
->
y_stride
*
step1d
;
}
// uv itxfm add
end_x
>>=
s
->
ss_h
;
end_y
>>=
s
->
ss_v
;
step
=
1
<<
(
b
->
uvtx
*
2
);
for
(
p
=
0
;
p
<
2
;
p
++
)
{
dst
=
s
->
dst
[
p
+
1
];
for
(
n
=
0
,
y
=
0
;
y
<
end_y
;
y
+=
uvstep1d
)
{
uint8_t
*
ptr
=
dst
;
for
(
x
=
0
;
x
<
end_x
;
x
+=
uvstep1d
,
ptr
+=
4
*
uvstep1d
*
bytesperpixel
,
n
+=
step
)
{
int
eob
=
b
->
uvtx
>
TX_8X8
?
AV_RN16A
(
&
s
->
uveob
[
p
][
n
])
:
s
->
uveob
[
p
][
n
];
if
(
eob
)
s
->
dsp
.
itxfm_add
[
uvtx
][
DCT_DCT
](
ptr
,
s
->
uv_stride
,
s
->
uvblock
[
p
]
+
16
*
n
*
bytesperpixel
,
eob
);
}
dst
+=
4
*
uvstep1d
*
s
->
uv_stride
;
}
}
}
}
static
void
inter_recon_8bpp
(
AVCodecContext
*
avctx
)
{
inter_recon
(
avctx
,
1
);
}
static
void
inter_recon_16bpp
(
AVCodecContext
*
avctx
)
{
inter_recon
(
avctx
,
2
);
}
static
av_always_inline
void
mask_edges
(
uint8_t
(
*
mask
)[
8
][
4
],
int
ss_h
,
int
ss_v
,
int
row_and_7
,
int
col_and_7
,
int
w
,
int
h
,
int
col_end
,
int
row_end
,
...
...
@@ -1891,7 +1272,7 @@ void ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
VP9Block
*
b
=
s
->
b
;
enum
BlockSize
bs
=
bl
*
3
+
bp
;
int
bytesperpixel
=
s
->
bytesperpixel
;
int
w4
=
bwh_tab
[
1
][
bs
][
0
],
h4
=
bwh_tab
[
1
][
bs
][
1
],
lvl
;
int
w4
=
ff_vp9_bwh_tab
[
1
][
bs
][
0
],
h4
=
ff_vp9_
bwh_tab
[
1
][
bs
][
1
],
lvl
;
int
emu
[
2
];
AVFrame
*
f
=
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
;
...
...
@@ -2001,15 +1382,15 @@ void ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
}
if
(
b
->
intra
)
{
if
(
s
->
s
.
h
.
bpp
>
8
)
{
intra_recon_16bpp
(
avctx
,
yoff
,
uvoff
);
ff_vp9_
intra_recon_16bpp
(
avctx
,
yoff
,
uvoff
);
}
else
{
intra_recon_8bpp
(
avctx
,
yoff
,
uvoff
);
ff_vp9_
intra_recon_8bpp
(
avctx
,
yoff
,
uvoff
);
}
}
else
{
if
(
s
->
s
.
h
.
bpp
>
8
)
{
inter_recon_16bpp
(
avctx
);
ff_vp9_
inter_recon_16bpp
(
avctx
);
}
else
{
inter_recon_8bpp
(
avctx
);
ff_vp9_
inter_recon_8bpp
(
avctx
);
}
}
if
(
emu
[
0
])
{
...
...
libavcodec/vp9data.c
View file @
6d0d1c4a
...
...
@@ -22,6 +22,16 @@
#include "vp9.h"
#include "vp9data.h"
const
uint8_t
ff_vp9_bwh_tab
[
2
][
N_BS_SIZES
][
2
]
=
{
{
{
16
,
16
},
{
16
,
8
},
{
8
,
16
},
{
8
,
8
},
{
8
,
4
},
{
4
,
8
},
{
4
,
4
},
{
4
,
2
},
{
2
,
4
},
{
2
,
2
},
{
2
,
1
},
{
1
,
2
},
{
1
,
1
},
},
{
{
8
,
8
},
{
8
,
4
},
{
4
,
8
},
{
4
,
4
},
{
4
,
2
},
{
2
,
4
},
{
2
,
2
},
{
2
,
1
},
{
1
,
2
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
{
1
,
1
},
}
};
const
int8_t
ff_vp9_partition_tree
[
3
][
2
]
=
{
{
-
PARTITION_NONE
,
1
},
// '0'
{
-
PARTITION_H
,
2
},
// '10'
...
...
libavcodec/vp9data.h
View file @
6d0d1c4a
...
...
@@ -26,6 +26,7 @@
#include "vp9dec.h"
extern
const
uint8_t
ff_vp9_bwh_tab
[
2
][
N_BS_SIZES
][
2
];
extern
const
int8_t
ff_vp9_partition_tree
[
3
][
2
];
extern
const
uint8_t
ff_vp9_default_kf_partition_probs
[
4
][
4
][
3
];
extern
const
int8_t
ff_vp9_segmentation_tree
[
7
][
2
];
...
...
libavcodec/vp9dec.h
View file @
6d0d1c4a
...
...
@@ -206,4 +206,11 @@ void ff_vp9_decode_block(AVCodecContext *ctx, int row, int col,
void
ff_vp9_loopfilter_sb
(
AVCodecContext
*
avctx
,
VP9Filter
*
lflvl
,
int
row
,
int
col
,
ptrdiff_t
yoff
,
ptrdiff_t
uvoff
);
void
ff_vp9_intra_recon_8bpp
(
AVCodecContext
*
avctx
,
ptrdiff_t
y_off
,
ptrdiff_t
uv_off
);
void
ff_vp9_intra_recon_16bpp
(
AVCodecContext
*
avctx
,
ptrdiff_t
y_off
,
ptrdiff_t
uv_off
);
void
ff_vp9_inter_recon_8bpp
(
AVCodecContext
*
avctx
);
void
ff_vp9_inter_recon_16bpp
(
AVCodecContext
*
avctx
);
#endif
/* AVCODEC_VP9DEC_H */
libavcodec/vp9recon.c
0 → 100644
View file @
6d0d1c4a
/*
* VP9 compatible video decoder
*
* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
* Copyright (C) 2013 Clément Bœsch <u pkh me>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/avassert.h"
#include "avcodec.h"
#include "internal.h"
#include "videodsp.h"
#include "vp9data.h"
#include "vp9dec.h"
static
av_always_inline
int
check_intra_mode
(
VP9Context
*
s
,
int
mode
,
uint8_t
**
a
,
uint8_t
*
dst_edge
,
ptrdiff_t
stride_edge
,
uint8_t
*
dst_inner
,
ptrdiff_t
stride_inner
,
uint8_t
*
l
,
int
col
,
int
x
,
int
w
,
int
row
,
int
y
,
enum
TxfmMode
tx
,
int
p
,
int
ss_h
,
int
ss_v
,
int
bytesperpixel
)
{
int
have_top
=
row
>
0
||
y
>
0
;
int
have_left
=
col
>
s
->
tile_col_start
||
x
>
0
;
int
have_right
=
x
<
w
-
1
;
int
bpp
=
s
->
s
.
h
.
bpp
;
static
const
uint8_t
mode_conv
[
10
][
2
/* have_left */
][
2
/* have_top */
]
=
{
[
VERT_PRED
]
=
{
{
DC_127_PRED
,
VERT_PRED
},
{
DC_127_PRED
,
VERT_PRED
}
},
[
HOR_PRED
]
=
{
{
DC_129_PRED
,
DC_129_PRED
},
{
HOR_PRED
,
HOR_PRED
}
},
[
DC_PRED
]
=
{
{
DC_128_PRED
,
TOP_DC_PRED
},
{
LEFT_DC_PRED
,
DC_PRED
}
},
[
DIAG_DOWN_LEFT_PRED
]
=
{
{
DC_127_PRED
,
DIAG_DOWN_LEFT_PRED
},
{
DC_127_PRED
,
DIAG_DOWN_LEFT_PRED
}
},
[
DIAG_DOWN_RIGHT_PRED
]
=
{
{
DIAG_DOWN_RIGHT_PRED
,
DIAG_DOWN_RIGHT_PRED
},
{
DIAG_DOWN_RIGHT_PRED
,
DIAG_DOWN_RIGHT_PRED
}
},
[
VERT_RIGHT_PRED
]
=
{
{
VERT_RIGHT_PRED
,
VERT_RIGHT_PRED
},
{
VERT_RIGHT_PRED
,
VERT_RIGHT_PRED
}
},
[
HOR_DOWN_PRED
]
=
{
{
HOR_DOWN_PRED
,
HOR_DOWN_PRED
},
{
HOR_DOWN_PRED
,
HOR_DOWN_PRED
}
},
[
VERT_LEFT_PRED
]
=
{
{
DC_127_PRED
,
VERT_LEFT_PRED
},
{
DC_127_PRED
,
VERT_LEFT_PRED
}
},
[
HOR_UP_PRED
]
=
{
{
DC_129_PRED
,
DC_129_PRED
},
{
HOR_UP_PRED
,
HOR_UP_PRED
}
},
[
TM_VP8_PRED
]
=
{
{
DC_129_PRED
,
VERT_PRED
},
{
HOR_PRED
,
TM_VP8_PRED
}
},
};
static
const
struct
{
uint8_t
needs_left
:
1
;
uint8_t
needs_top
:
1
;
uint8_t
needs_topleft
:
1
;
uint8_t
needs_topright
:
1
;
uint8_t
invert_left
:
1
;
}
edges
[
N_INTRA_PRED_MODES
]
=
{
[
VERT_PRED
]
=
{
.
needs_top
=
1
},
[
HOR_PRED
]
=
{
.
needs_left
=
1
},
[
DC_PRED
]
=
{
.
needs_top
=
1
,
.
needs_left
=
1
},
[
DIAG_DOWN_LEFT_PRED
]
=
{
.
needs_top
=
1
,
.
needs_topright
=
1
},
[
DIAG_DOWN_RIGHT_PRED
]
=
{
.
needs_left
=
1
,
.
needs_top
=
1
,
.
needs_topleft
=
1
},
[
VERT_RIGHT_PRED
]
=
{
.
needs_left
=
1
,
.
needs_top
=
1
,
.
needs_topleft
=
1
},
[
HOR_DOWN_PRED
]
=
{
.
needs_left
=
1
,
.
needs_top
=
1
,
.
needs_topleft
=
1
},
[
VERT_LEFT_PRED
]
=
{
.
needs_top
=
1
,
.
needs_topright
=
1
},
[
HOR_UP_PRED
]
=
{
.
needs_left
=
1
,
.
invert_left
=
1
},
[
TM_VP8_PRED
]
=
{
.
needs_left
=
1
,
.
needs_top
=
1
,
.
needs_topleft
=
1
},
[
LEFT_DC_PRED
]
=
{
.
needs_left
=
1
},
[
TOP_DC_PRED
]
=
{
.
needs_top
=
1
},
[
DC_128_PRED
]
=
{
0
},
[
DC_127_PRED
]
=
{
0
},
[
DC_129_PRED
]
=
{
0
}
};
av_assert2
(
mode
>=
0
&&
mode
<
10
);
mode
=
mode_conv
[
mode
][
have_left
][
have_top
];
if
(
edges
[
mode
].
needs_top
)
{
uint8_t
*
top
,
*
topleft
;
int
n_px_need
=
4
<<
tx
,
n_px_have
=
(((
s
->
cols
-
col
)
<<
!
ss_h
)
-
x
)
*
4
;
int
n_px_need_tr
=
0
;
if
(
tx
==
TX_4X4
&&
edges
[
mode
].
needs_topright
&&
have_right
)
n_px_need_tr
=
4
;
// if top of sb64-row, use s->intra_pred_data[] instead of
// dst[-stride] for intra prediction (it contains pre- instead of
// post-loopfilter data)
if
(
have_top
)
{
top
=
!
(
row
&
7
)
&&
!
y
?
s
->
intra_pred_data
[
p
]
+
(
col
*
(
8
>>
ss_h
)
+
x
*
4
)
*
bytesperpixel
:
y
==
0
?
&
dst_edge
[
-
stride_edge
]
:
&
dst_inner
[
-
stride_inner
];
if
(
have_left
)
topleft
=
!
(
row
&
7
)
&&
!
y
?
s
->
intra_pred_data
[
p
]
+
(
col
*
(
8
>>
ss_h
)
+
x
*
4
)
*
bytesperpixel
:
y
==
0
||
x
==
0
?
&
dst_edge
[
-
stride_edge
]
:
&
dst_inner
[
-
stride_inner
];
}
if
(
have_top
&&
(
!
edges
[
mode
].
needs_topleft
||
(
have_left
&&
top
==
topleft
))
&&
(
tx
!=
TX_4X4
||
!
edges
[
mode
].
needs_topright
||
have_right
)
&&
n_px_need
+
n_px_need_tr
<=
n_px_have
)
{
*
a
=
top
;
}
else
{
if
(
have_top
)
{
if
(
n_px_need
<=
n_px_have
)
{
memcpy
(
*
a
,
top
,
n_px_need
*
bytesperpixel
);
}
else
{
#define memset_bpp(c, i1, v, i2, num) do { \
if (bytesperpixel == 1) { \
memset(&(c)[(i1)], (v)[(i2)], (num)); \
} else { \
int n, val = AV_RN16A(&(v)[(i2) * 2]); \
for (n = 0; n < (num); n++) { \
AV_WN16A(&(c)[((i1) + n) * 2], val); \
} \
} \
} while (0)
memcpy
(
*
a
,
top
,
n_px_have
*
bytesperpixel
);
memset_bpp
(
*
a
,
n_px_have
,
(
*
a
),
n_px_have
-
1
,
n_px_need
-
n_px_have
);
}
}
else
{
#define memset_val(c, val, num) do { \
if (bytesperpixel == 1) { \
memset((c), (val), (num)); \
} else { \
int n; \
for (n = 0; n < (num); n++) { \
AV_WN16A(&(c)[n * 2], (val)); \
} \
} \
} while (0)
memset_val
(
*
a
,
(
128
<<
(
bpp
-
8
))
-
1
,
n_px_need
);
}
if
(
edges
[
mode
].
needs_topleft
)
{
if
(
have_left
&&
have_top
)
{
#define assign_bpp(c, i1, v, i2) do { \
if (bytesperpixel == 1) { \
(c)[(i1)] = (v)[(i2)]; \
} else { \
AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
} \
} while (0)
assign_bpp
(
*
a
,
-
1
,
topleft
,
-
1
);
}
else
{
#define assign_val(c, i, v) do { \
if (bytesperpixel == 1) { \
(c)[(i)] = (v); \
} else { \
AV_WN16A(&(c)[(i) * 2], (v)); \
} \
} while (0)
assign_val
((
*
a
),
-
1
,
(
128
<<
(
bpp
-
8
))
+
(
have_top
?
+
1
:
-
1
));
}
}
if
(
tx
==
TX_4X4
&&
edges
[
mode
].
needs_topright
)
{
if
(
have_top
&&
have_right
&&
n_px_need
+
n_px_need_tr
<=
n_px_have
)
{
memcpy
(
&
(
*
a
)[
4
*
bytesperpixel
],
&
top
[
4
*
bytesperpixel
],
4
*
bytesperpixel
);
}
else
{
memset_bpp
(
*
a
,
4
,
*
a
,
3
,
4
);
}
}
}
}
if
(
edges
[
mode
].
needs_left
)
{
if
(
have_left
)
{
int
n_px_need
=
4
<<
tx
,
i
,
n_px_have
=
(((
s
->
rows
-
row
)
<<
!
ss_v
)
-
y
)
*
4
;
uint8_t
*
dst
=
x
==
0
?
dst_edge
:
dst_inner
;
ptrdiff_t
stride
=
x
==
0
?
stride_edge
:
stride_inner
;
if
(
edges
[
mode
].
invert_left
)
{
if
(
n_px_need
<=
n_px_have
)
{
for
(
i
=
0
;
i
<
n_px_need
;
i
++
)
assign_bpp
(
l
,
i
,
&
dst
[
i
*
stride
],
-
1
);
}
else
{
for
(
i
=
0
;
i
<
n_px_have
;
i
++
)
assign_bpp
(
l
,
i
,
&
dst
[
i
*
stride
],
-
1
);
memset_bpp
(
l
,
n_px_have
,
l
,
n_px_have
-
1
,
n_px_need
-
n_px_have
);
}
}
else
{
if
(
n_px_need
<=
n_px_have
)
{
for
(
i
=
0
;
i
<
n_px_need
;
i
++
)
assign_bpp
(
l
,
n_px_need
-
1
-
i
,
&
dst
[
i
*
stride
],
-
1
);
}
else
{
for
(
i
=
0
;
i
<
n_px_have
;
i
++
)
assign_bpp
(
l
,
n_px_need
-
1
-
i
,
&
dst
[
i
*
stride
],
-
1
);
memset_bpp
(
l
,
0
,
l
,
n_px_need
-
n_px_have
,
n_px_need
-
n_px_have
);
}
}
}
else
{
memset_val
(
l
,
(
128
<<
(
bpp
-
8
))
+
1
,
4
<<
tx
);
}
}
return
mode
;
}
static
av_always_inline
void
intra_recon
(
AVCodecContext
*
avctx
,
ptrdiff_t
y_off
,
ptrdiff_t
uv_off
,
int
bytesperpixel
)
{
VP9Context
*
s
=
avctx
->
priv_data
;
VP9Block
*
b
=
s
->
b
;
int
row
=
s
->
row
,
col
=
s
->
col
;
int
w4
=
ff_vp9_bwh_tab
[
1
][
b
->
bs
][
0
]
<<
1
,
step1d
=
1
<<
b
->
tx
,
n
;
int
h4
=
ff_vp9_bwh_tab
[
1
][
b
->
bs
][
1
]
<<
1
,
x
,
y
,
step
=
1
<<
(
b
->
tx
*
2
);
int
end_x
=
FFMIN
(
2
*
(
s
->
cols
-
col
),
w4
);
int
end_y
=
FFMIN
(
2
*
(
s
->
rows
-
row
),
h4
);
int
tx
=
4
*
s
->
s
.
h
.
lossless
+
b
->
tx
,
uvtx
=
b
->
uvtx
+
4
*
s
->
s
.
h
.
lossless
;
int
uvstep1d
=
1
<<
b
->
uvtx
,
p
;
uint8_t
*
dst
=
s
->
dst
[
0
],
*
dst_r
=
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
data
[
0
]
+
y_off
;
LOCAL_ALIGNED_32
(
uint8_t
,
a_buf
,
[
96
]);
LOCAL_ALIGNED_32
(
uint8_t
,
l
,
[
64
]);
for
(
n
=
0
,
y
=
0
;
y
<
end_y
;
y
+=
step1d
)
{
uint8_t
*
ptr
=
dst
,
*
ptr_r
=
dst_r
;
for
(
x
=
0
;
x
<
end_x
;
x
+=
step1d
,
ptr
+=
4
*
step1d
*
bytesperpixel
,
ptr_r
+=
4
*
step1d
*
bytesperpixel
,
n
+=
step
)
{
int
mode
=
b
->
mode
[
b
->
bs
>
BS_8x8
&&
b
->
tx
==
TX_4X4
?
y
*
2
+
x
:
0
];
uint8_t
*
a
=
&
a_buf
[
32
];
enum
TxfmType
txtp
=
ff_vp9_intra_txfm_type
[
mode
];
int
eob
=
b
->
skip
?
0
:
b
->
tx
>
TX_8X8
?
AV_RN16A
(
&
s
->
eob
[
n
])
:
s
->
eob
[
n
];
mode
=
check_intra_mode
(
s
,
mode
,
&
a
,
ptr_r
,
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
linesize
[
0
],
ptr
,
s
->
y_stride
,
l
,
col
,
x
,
w4
,
row
,
y
,
b
->
tx
,
0
,
0
,
0
,
bytesperpixel
);
s
->
dsp
.
intra_pred
[
b
->
tx
][
mode
](
ptr
,
s
->
y_stride
,
l
,
a
);
if
(
eob
)
s
->
dsp
.
itxfm_add
[
tx
][
txtp
](
ptr
,
s
->
y_stride
,
s
->
block
+
16
*
n
*
bytesperpixel
,
eob
);
}
dst_r
+=
4
*
step1d
*
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
linesize
[
0
];
dst
+=
4
*
step1d
*
s
->
y_stride
;
}
// U/V
w4
>>=
s
->
ss_h
;
end_x
>>=
s
->
ss_h
;
end_y
>>=
s
->
ss_v
;
step
=
1
<<
(
b
->
uvtx
*
2
);
for
(
p
=
0
;
p
<
2
;
p
++
)
{
dst
=
s
->
dst
[
1
+
p
];
dst_r
=
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
data
[
1
+
p
]
+
uv_off
;
for
(
n
=
0
,
y
=
0
;
y
<
end_y
;
y
+=
uvstep1d
)
{
uint8_t
*
ptr
=
dst
,
*
ptr_r
=
dst_r
;
for
(
x
=
0
;
x
<
end_x
;
x
+=
uvstep1d
,
ptr
+=
4
*
uvstep1d
*
bytesperpixel
,
ptr_r
+=
4
*
uvstep1d
*
bytesperpixel
,
n
+=
step
)
{
int
mode
=
b
->
uvmode
;
uint8_t
*
a
=
&
a_buf
[
32
];
int
eob
=
b
->
skip
?
0
:
b
->
uvtx
>
TX_8X8
?
AV_RN16A
(
&
s
->
uveob
[
p
][
n
])
:
s
->
uveob
[
p
][
n
];
mode
=
check_intra_mode
(
s
,
mode
,
&
a
,
ptr_r
,
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
linesize
[
1
],
ptr
,
s
->
uv_stride
,
l
,
col
,
x
,
w4
,
row
,
y
,
b
->
uvtx
,
p
+
1
,
s
->
ss_h
,
s
->
ss_v
,
bytesperpixel
);
s
->
dsp
.
intra_pred
[
b
->
uvtx
][
mode
](
ptr
,
s
->
uv_stride
,
l
,
a
);
if
(
eob
)
s
->
dsp
.
itxfm_add
[
uvtx
][
DCT_DCT
](
ptr
,
s
->
uv_stride
,
s
->
uvblock
[
p
]
+
16
*
n
*
bytesperpixel
,
eob
);
}
dst_r
+=
4
*
uvstep1d
*
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
linesize
[
1
];
dst
+=
4
*
uvstep1d
*
s
->
uv_stride
;
}
}
}
void
ff_vp9_intra_recon_8bpp
(
AVCodecContext
*
avctx
,
ptrdiff_t
y_off
,
ptrdiff_t
uv_off
)
{
intra_recon
(
avctx
,
y_off
,
uv_off
,
1
);
}
void
ff_vp9_intra_recon_16bpp
(
AVCodecContext
*
avctx
,
ptrdiff_t
y_off
,
ptrdiff_t
uv_off
)
{
intra_recon
(
avctx
,
y_off
,
uv_off
,
2
);
}
static
av_always_inline
void
mc_luma_unscaled
(
VP9Context
*
s
,
vp9_mc_func
(
*
mc
)[
2
],
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
uint8_t
*
ref
,
ptrdiff_t
ref_stride
,
ThreadFrame
*
ref_frame
,
ptrdiff_t
y
,
ptrdiff_t
x
,
const
VP56mv
*
mv
,
int
bw
,
int
bh
,
int
w
,
int
h
,
int
bytesperpixel
)
{
int
mx
=
mv
->
x
,
my
=
mv
->
y
,
th
;
y
+=
my
>>
3
;
x
+=
mx
>>
3
;
ref
+=
y
*
ref_stride
+
x
*
bytesperpixel
;
mx
&=
7
;
my
&=
7
;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th
=
(
y
+
bh
+
4
*
!!
my
+
7
)
>>
6
;
ff_thread_await_progress
(
ref_frame
,
FFMAX
(
th
,
0
),
0
);
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (!!my * 5) than horizontally (!!mx * 4).
if
(
x
<
!!
mx
*
3
||
y
<
!!
my
*
3
||
x
+
!!
mx
*
4
>
w
-
bw
||
y
+
!!
my
*
5
>
h
-
bh
)
{
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref
-
!!
my
*
3
*
ref_stride
-
!!
mx
*
3
*
bytesperpixel
,
160
,
ref_stride
,
bw
+
!!
mx
*
7
,
bh
+
!!
my
*
7
,
x
-
!!
mx
*
3
,
y
-
!!
my
*
3
,
w
,
h
);
ref
=
s
->
edge_emu_buffer
+
!!
my
*
3
*
160
+
!!
mx
*
3
*
bytesperpixel
;
ref_stride
=
160
;
}
mc
[
!!
mx
][
!!
my
](
dst
,
dst_stride
,
ref
,
ref_stride
,
bh
,
mx
<<
1
,
my
<<
1
);
}
static
av_always_inline
void
mc_chroma_unscaled
(
VP9Context
*
s
,
vp9_mc_func
(
*
mc
)[
2
],
uint8_t
*
dst_u
,
uint8_t
*
dst_v
,
ptrdiff_t
dst_stride
,
const
uint8_t
*
ref_u
,
ptrdiff_t
src_stride_u
,
const
uint8_t
*
ref_v
,
ptrdiff_t
src_stride_v
,
ThreadFrame
*
ref_frame
,
ptrdiff_t
y
,
ptrdiff_t
x
,
const
VP56mv
*
mv
,
int
bw
,
int
bh
,
int
w
,
int
h
,
int
bytesperpixel
)
{
int
mx
=
mv
->
x
*
(
1
<<
!
s
->
ss_h
),
my
=
mv
->
y
*
(
1
<<
!
s
->
ss_v
),
th
;
y
+=
my
>>
4
;
x
+=
mx
>>
4
;
ref_u
+=
y
*
src_stride_u
+
x
*
bytesperpixel
;
ref_v
+=
y
*
src_stride_v
+
x
*
bytesperpixel
;
mx
&=
15
;
my
&=
15
;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th
=
(
y
+
bh
+
4
*
!!
my
+
7
)
>>
(
6
-
s
->
ss_v
);
ff_thread_await_progress
(
ref_frame
,
FFMAX
(
th
,
0
),
0
);
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (!!my * 5) than horizontally (!!mx * 4).
if
(
x
<
!!
mx
*
3
||
y
<
!!
my
*
3
||
x
+
!!
mx
*
4
>
w
-
bw
||
y
+
!!
my
*
5
>
h
-
bh
)
{
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref_u
-
!!
my
*
3
*
src_stride_u
-
!!
mx
*
3
*
bytesperpixel
,
160
,
src_stride_u
,
bw
+
!!
mx
*
7
,
bh
+
!!
my
*
7
,
x
-
!!
mx
*
3
,
y
-
!!
my
*
3
,
w
,
h
);
ref_u
=
s
->
edge_emu_buffer
+
!!
my
*
3
*
160
+
!!
mx
*
3
*
bytesperpixel
;
mc
[
!!
mx
][
!!
my
](
dst_u
,
dst_stride
,
ref_u
,
160
,
bh
,
mx
,
my
);
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref_v
-
!!
my
*
3
*
src_stride_v
-
!!
mx
*
3
*
bytesperpixel
,
160
,
src_stride_v
,
bw
+
!!
mx
*
7
,
bh
+
!!
my
*
7
,
x
-
!!
mx
*
3
,
y
-
!!
my
*
3
,
w
,
h
);
ref_v
=
s
->
edge_emu_buffer
+
!!
my
*
3
*
160
+
!!
mx
*
3
*
bytesperpixel
;
mc
[
!!
mx
][
!!
my
](
dst_v
,
dst_stride
,
ref_v
,
160
,
bh
,
mx
,
my
);
}
else
{
mc
[
!!
mx
][
!!
my
](
dst_u
,
dst_stride
,
ref_u
,
src_stride_u
,
bh
,
mx
,
my
);
mc
[
!!
mx
][
!!
my
](
dst_v
,
dst_stride
,
ref_v
,
src_stride_v
,
bh
,
mx
,
my
);
}
}
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, bw, bh, w, h, bytesperpixel)
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, bytesperpixel)
#define SCALED 0
#define FN(x) x##_8bpp
#define BYTES_PER_PIXEL 1
#include "vp9_mc_template.c"
#undef FN
#undef BYTES_PER_PIXEL
#define FN(x) x##_16bpp
#define BYTES_PER_PIXEL 2
#include "vp9_mc_template.c"
#undef mc_luma_dir
#undef mc_chroma_dir
#undef FN
#undef BYTES_PER_PIXEL
#undef SCALED
static
av_always_inline
void
mc_luma_scaled
(
VP9Context
*
s
,
vp9_scaled_mc_func
smc
,
vp9_mc_func
(
*
mc
)[
2
],
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
uint8_t
*
ref
,
ptrdiff_t
ref_stride
,
ThreadFrame
*
ref_frame
,
ptrdiff_t
y
,
ptrdiff_t
x
,
const
VP56mv
*
in_mv
,
int
px
,
int
py
,
int
pw
,
int
ph
,
int
bw
,
int
bh
,
int
w
,
int
h
,
int
bytesperpixel
,
const
uint16_t
*
scale
,
const
uint8_t
*
step
)
{
if
(
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
width
==
ref_frame
->
f
->
width
&&
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
height
==
ref_frame
->
f
->
height
)
{
mc_luma_unscaled
(
s
,
mc
,
dst
,
dst_stride
,
ref
,
ref_stride
,
ref_frame
,
y
,
x
,
in_mv
,
bw
,
bh
,
w
,
h
,
bytesperpixel
);
}
else
{
#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
int
mx
,
my
;
int
refbw_m1
,
refbh_m1
;
int
th
;
VP56mv
mv
;
mv
.
x
=
av_clip
(
in_mv
->
x
,
-
(
x
+
pw
-
px
+
4
)
*
8
,
(
s
->
cols
*
8
-
x
+
px
+
3
)
*
8
);
mv
.
y
=
av_clip
(
in_mv
->
y
,
-
(
y
+
ph
-
py
+
4
)
*
8
,
(
s
->
rows
*
8
-
y
+
py
+
3
)
*
8
);
// BUG libvpx seems to scale the two components separately. This introduces
// rounding errors but we have to reproduce them to be exactly compatible
// with the output from libvpx...
mx
=
scale_mv
(
mv
.
x
*
2
,
0
)
+
scale_mv
(
x
*
16
,
0
);
my
=
scale_mv
(
mv
.
y
*
2
,
1
)
+
scale_mv
(
y
*
16
,
1
);
y
=
my
>>
4
;
x
=
mx
>>
4
;
ref
+=
y
*
ref_stride
+
x
*
bytesperpixel
;
mx
&=
15
;
my
&=
15
;
refbw_m1
=
((
bw
-
1
)
*
step
[
0
]
+
mx
)
>>
4
;
refbh_m1
=
((
bh
-
1
)
*
step
[
1
]
+
my
)
>>
4
;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th
=
(
y
+
refbh_m1
+
4
+
7
)
>>
6
;
ff_thread_await_progress
(
ref_frame
,
FFMAX
(
th
,
0
),
0
);
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
if
(
x
<
3
||
y
<
3
||
x
+
4
>=
w
-
refbw_m1
||
y
+
5
>=
h
-
refbh_m1
)
{
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref
-
3
*
ref_stride
-
3
*
bytesperpixel
,
288
,
ref_stride
,
refbw_m1
+
8
,
refbh_m1
+
8
,
x
-
3
,
y
-
3
,
w
,
h
);
ref
=
s
->
edge_emu_buffer
+
3
*
288
+
3
*
bytesperpixel
;
ref_stride
=
288
;
}
smc
(
dst
,
dst_stride
,
ref
,
ref_stride
,
bh
,
mx
,
my
,
step
[
0
],
step
[
1
]);
}
}
static
av_always_inline
void
mc_chroma_scaled
(
VP9Context
*
s
,
vp9_scaled_mc_func
smc
,
vp9_mc_func
(
*
mc
)[
2
],
uint8_t
*
dst_u
,
uint8_t
*
dst_v
,
ptrdiff_t
dst_stride
,
const
uint8_t
*
ref_u
,
ptrdiff_t
src_stride_u
,
const
uint8_t
*
ref_v
,
ptrdiff_t
src_stride_v
,
ThreadFrame
*
ref_frame
,
ptrdiff_t
y
,
ptrdiff_t
x
,
const
VP56mv
*
in_mv
,
int
px
,
int
py
,
int
pw
,
int
ph
,
int
bw
,
int
bh
,
int
w
,
int
h
,
int
bytesperpixel
,
const
uint16_t
*
scale
,
const
uint8_t
*
step
)
{
if
(
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
width
==
ref_frame
->
f
->
width
&&
s
->
s
.
frames
[
CUR_FRAME
].
tf
.
f
->
height
==
ref_frame
->
f
->
height
)
{
mc_chroma_unscaled
(
s
,
mc
,
dst_u
,
dst_v
,
dst_stride
,
ref_u
,
src_stride_u
,
ref_v
,
src_stride_v
,
ref_frame
,
y
,
x
,
in_mv
,
bw
,
bh
,
w
,
h
,
bytesperpixel
);
}
else
{
int
mx
,
my
;
int
refbw_m1
,
refbh_m1
;
int
th
;
VP56mv
mv
;
if
(
s
->
ss_h
)
{
// BUG https://code.google.com/p/webm/issues/detail?id=820
mv
.
x
=
av_clip
(
in_mv
->
x
,
-
(
x
+
pw
-
px
+
4
)
*
16
,
(
s
->
cols
*
4
-
x
+
px
+
3
)
*
16
);
mx
=
scale_mv
(
mv
.
x
,
0
)
+
(
scale_mv
(
x
*
16
,
0
)
&
~
15
)
+
(
scale_mv
(
x
*
32
,
0
)
&
15
);
}
else
{
mv
.
x
=
av_clip
(
in_mv
->
x
,
-
(
x
+
pw
-
px
+
4
)
*
8
,
(
s
->
cols
*
8
-
x
+
px
+
3
)
*
8
);
mx
=
scale_mv
(
mv
.
x
*
2
,
0
)
+
scale_mv
(
x
*
16
,
0
);
}
if
(
s
->
ss_v
)
{
// BUG https://code.google.com/p/webm/issues/detail?id=820
mv
.
y
=
av_clip
(
in_mv
->
y
,
-
(
y
+
ph
-
py
+
4
)
*
16
,
(
s
->
rows
*
4
-
y
+
py
+
3
)
*
16
);
my
=
scale_mv
(
mv
.
y
,
1
)
+
(
scale_mv
(
y
*
16
,
1
)
&
~
15
)
+
(
scale_mv
(
y
*
32
,
1
)
&
15
);
}
else
{
mv
.
y
=
av_clip
(
in_mv
->
y
,
-
(
y
+
ph
-
py
+
4
)
*
8
,
(
s
->
rows
*
8
-
y
+
py
+
3
)
*
8
);
my
=
scale_mv
(
mv
.
y
*
2
,
1
)
+
scale_mv
(
y
*
16
,
1
);
}
#undef scale_mv
y
=
my
>>
4
;
x
=
mx
>>
4
;
ref_u
+=
y
*
src_stride_u
+
x
*
bytesperpixel
;
ref_v
+=
y
*
src_stride_v
+
x
*
bytesperpixel
;
mx
&=
15
;
my
&=
15
;
refbw_m1
=
((
bw
-
1
)
*
step
[
0
]
+
mx
)
>>
4
;
refbh_m1
=
((
bh
-
1
)
*
step
[
1
]
+
my
)
>>
4
;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th
=
(
y
+
refbh_m1
+
4
+
7
)
>>
(
6
-
s
->
ss_v
);
ff_thread_await_progress
(
ref_frame
,
FFMAX
(
th
,
0
),
0
);
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
if
(
x
<
3
||
y
<
3
||
x
+
4
>=
w
-
refbw_m1
||
y
+
5
>=
h
-
refbh_m1
)
{
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref_u
-
3
*
src_stride_u
-
3
*
bytesperpixel
,
288
,
src_stride_u
,
refbw_m1
+
8
,
refbh_m1
+
8
,
x
-
3
,
y
-
3
,
w
,
h
);
ref_u
=
s
->
edge_emu_buffer
+
3
*
288
+
3
*
bytesperpixel
;
smc
(
dst_u
,
dst_stride
,
ref_u
,
288
,
bh
,
mx
,
my
,
step
[
0
],
step
[
1
]);
s
->
vdsp
.
emulated_edge_mc
(
s
->
edge_emu_buffer
,
ref_v
-
3
*
src_stride_v
-
3
*
bytesperpixel
,
288
,
src_stride_v
,
refbw_m1
+
8
,
refbh_m1
+
8
,
x
-
3
,
y
-
3
,
w
,
h
);
ref_v
=
s
->
edge_emu_buffer
+
3
*
288
+
3
*
bytesperpixel
;
smc
(
dst_v
,
dst_stride
,
ref_v
,
288
,
bh
,
mx
,
my
,
step
[
0
],
step
[
1
]);
}
else
{
smc
(
dst_u
,
dst_stride
,
ref_u
,
src_stride_u
,
bh
,
mx
,
my
,
step
[
0
],
step
[
1
]);
smc
(
dst_v
,
dst_stride
,
ref_v
,
src_stride_v
,
bh
,
mx
,
my
,
step
[
0
],
step
[
1
]);
}
}
}
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define SCALED 1
#define FN(x) x##_scaled_8bpp
#define BYTES_PER_PIXEL 1
#include "vp9_mc_template.c"
#undef FN
#undef BYTES_PER_PIXEL
#define FN(x) x##_scaled_16bpp
#define BYTES_PER_PIXEL 2
#include "vp9_mc_template.c"
#undef mc_luma_dir
#undef mc_chroma_dir
#undef FN
#undef BYTES_PER_PIXEL
#undef SCALED
static
av_always_inline
void
inter_recon
(
AVCodecContext
*
avctx
,
int
bytesperpixel
)
{
VP9Context
*
s
=
avctx
->
priv_data
;
VP9Block
*
b
=
s
->
b
;
int
row
=
s
->
row
,
col
=
s
->
col
;
if
(
s
->
mvscale
[
b
->
ref
[
0
]][
0
]
||
(
b
->
comp
&&
s
->
mvscale
[
b
->
ref
[
1
]][
0
]))
{
if
(
bytesperpixel
==
1
)
{
inter_pred_scaled_8bpp
(
avctx
);
}
else
{
inter_pred_scaled_16bpp
(
avctx
);
}
}
else
{
if
(
bytesperpixel
==
1
)
{
inter_pred_8bpp
(
avctx
);
}
else
{
inter_pred_16bpp
(
avctx
);
}
}
if
(
!
b
->
skip
)
{
/* mostly copied intra_recon() */
int
w4
=
ff_vp9_bwh_tab
[
1
][
b
->
bs
][
0
]
<<
1
,
step1d
=
1
<<
b
->
tx
,
n
;
int
h4
=
ff_vp9_bwh_tab
[
1
][
b
->
bs
][
1
]
<<
1
,
x
,
y
,
step
=
1
<<
(
b
->
tx
*
2
);
int
end_x
=
FFMIN
(
2
*
(
s
->
cols
-
col
),
w4
);
int
end_y
=
FFMIN
(
2
*
(
s
->
rows
-
row
),
h4
);
int
tx
=
4
*
s
->
s
.
h
.
lossless
+
b
->
tx
,
uvtx
=
b
->
uvtx
+
4
*
s
->
s
.
h
.
lossless
;
int
uvstep1d
=
1
<<
b
->
uvtx
,
p
;
uint8_t
*
dst
=
s
->
dst
[
0
];
// y itxfm add
for
(
n
=
0
,
y
=
0
;
y
<
end_y
;
y
+=
step1d
)
{
uint8_t
*
ptr
=
dst
;
for
(
x
=
0
;
x
<
end_x
;
x
+=
step1d
,
ptr
+=
4
*
step1d
*
bytesperpixel
,
n
+=
step
)
{
int
eob
=
b
->
tx
>
TX_8X8
?
AV_RN16A
(
&
s
->
eob
[
n
])
:
s
->
eob
[
n
];
if
(
eob
)
s
->
dsp
.
itxfm_add
[
tx
][
DCT_DCT
](
ptr
,
s
->
y_stride
,
s
->
block
+
16
*
n
*
bytesperpixel
,
eob
);
}
dst
+=
4
*
s
->
y_stride
*
step1d
;
}
// uv itxfm add
end_x
>>=
s
->
ss_h
;
end_y
>>=
s
->
ss_v
;
step
=
1
<<
(
b
->
uvtx
*
2
);
for
(
p
=
0
;
p
<
2
;
p
++
)
{
dst
=
s
->
dst
[
p
+
1
];
for
(
n
=
0
,
y
=
0
;
y
<
end_y
;
y
+=
uvstep1d
)
{
uint8_t
*
ptr
=
dst
;
for
(
x
=
0
;
x
<
end_x
;
x
+=
uvstep1d
,
ptr
+=
4
*
uvstep1d
*
bytesperpixel
,
n
+=
step
)
{
int
eob
=
b
->
uvtx
>
TX_8X8
?
AV_RN16A
(
&
s
->
uveob
[
p
][
n
])
:
s
->
uveob
[
p
][
n
];
if
(
eob
)
s
->
dsp
.
itxfm_add
[
uvtx
][
DCT_DCT
](
ptr
,
s
->
uv_stride
,
s
->
uvblock
[
p
]
+
16
*
n
*
bytesperpixel
,
eob
);
}
dst
+=
4
*
uvstep1d
*
s
->
uv_stride
;
}
}
}
}
void
ff_vp9_inter_recon_8bpp
(
AVCodecContext
*
avctx
)
{
inter_recon
(
avctx
,
1
);
}
void
ff_vp9_inter_recon_16bpp
(
AVCodecContext
*
avctx
)
{
inter_recon
(
avctx
,
2
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment