Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
fab9df63
Commit
fab9df63
authored
Jan 24, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dsputil: Split off global motion compensation bits into a separate context
parent
f23d26a6
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
374 additions
and
274 deletions
+374
-274
Makefile
libavcodec/Makefile
+2
-2
dsputil.c
libavcodec/dsputil.c
+0
-91
dsputil.h
libavcodec/dsputil.h
+0
-16
mpegvideo.c
libavcodec/mpegvideo.c
+1
-0
mpegvideo.h
libavcodec/mpegvideo.h
+2
-0
mpegvideo_motion.c
libavcodec/mpegvideo_motion.c
+33
-34
mpegvideodsp.c
libavcodec/mpegvideodsp.c
+119
-0
mpegvideodsp.h
libavcodec/mpegvideodsp.h
+47
-0
Makefile
libavcodec/ppc/Makefile
+2
-2
dsputil_altivec.h
libavcodec/ppc/dsputil_altivec.h
+1
-2
dsputil_ppc.c
libavcodec/ppc/dsputil_ppc.c
+0
-2
mpegvideodsp.c
libavcodec/ppc/mpegvideodsp.c
+12
-3
Makefile
libavcodec/x86/Makefile
+2
-1
dsputil_init.c
libavcodec/x86/dsputil_init.c
+0
-2
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+0
-114
dsputil_x86.h
libavcodec/x86/dsputil_x86.h
+0
-5
mpegvideodsp.c
libavcodec/x86/mpegvideodsp.c
+153
-0
No files found.
libavcodec/Makefile
View file @
fab9df63
...
@@ -63,8 +63,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \
...
@@ -63,8 +63,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \
mpegaudiodsp_data.o
\
mpegaudiodsp_data.o
\
mpegaudiodsp_fixed.o
\
mpegaudiodsp_fixed.o
\
mpegaudiodsp_float.o
mpegaudiodsp_float.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
mpegvideo.o
mpegvideo
_motion.o
\
OBJS-$(CONFIG_MPEGVIDEO)
+=
mpegvideo.o
mpegvideo
dsp.o
\
mpegutils.o
mpeg
video_motion.o
mpeg
utils.o
OBJS-$(CONFIG_MPEGVIDEOENC)
+=
mpegvideo_enc.o
mpeg12data.o
\
OBJS-$(CONFIG_MPEGVIDEOENC)
+=
mpegvideo_enc.o
mpeg12data.o
\
motion_est.o
ratecontrol.o
motion_est.o
ratecontrol.o
OBJS-$(CONFIG_QPELDSP)
+=
qpeldsp.o
OBJS-$(CONFIG_QPELDSP)
+=
qpeldsp.o
...
...
libavcodec/dsputil.c
View file @
fab9df63
...
@@ -3,8 +3,6 @@
...
@@ -3,8 +3,6 @@
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
*
* gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of Libav.
* This file is part of Libav.
*
*
* Libav is free software; you can redistribute it and/or
* Libav is free software; you can redistribute it and/or
...
@@ -352,92 +350,6 @@ static int sum_abs_dctelem_c(int16_t *block)
...
@@ -352,92 +350,6 @@ static int sum_abs_dctelem_c(int16_t *block)
#define avg2(a, b) ((a + b + 1) >> 1)
#define avg2(a, b) ((a + b + 1) >> 1)
#define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
#define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
static
void
gmc1_c
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
{
const
int
A
=
(
16
-
x16
)
*
(
16
-
y16
);
const
int
B
=
(
x16
)
*
(
16
-
y16
);
const
int
C
=
(
16
-
x16
)
*
(
y16
);
const
int
D
=
(
x16
)
*
(
y16
);
int
i
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
dst
[
0
]
=
(
A
*
src
[
0
]
+
B
*
src
[
1
]
+
C
*
src
[
stride
+
0
]
+
D
*
src
[
stride
+
1
]
+
rounder
)
>>
8
;
dst
[
1
]
=
(
A
*
src
[
1
]
+
B
*
src
[
2
]
+
C
*
src
[
stride
+
1
]
+
D
*
src
[
stride
+
2
]
+
rounder
)
>>
8
;
dst
[
2
]
=
(
A
*
src
[
2
]
+
B
*
src
[
3
]
+
C
*
src
[
stride
+
2
]
+
D
*
src
[
stride
+
3
]
+
rounder
)
>>
8
;
dst
[
3
]
=
(
A
*
src
[
3
]
+
B
*
src
[
4
]
+
C
*
src
[
stride
+
3
]
+
D
*
src
[
stride
+
4
]
+
rounder
)
>>
8
;
dst
[
4
]
=
(
A
*
src
[
4
]
+
B
*
src
[
5
]
+
C
*
src
[
stride
+
4
]
+
D
*
src
[
stride
+
5
]
+
rounder
)
>>
8
;
dst
[
5
]
=
(
A
*
src
[
5
]
+
B
*
src
[
6
]
+
C
*
src
[
stride
+
5
]
+
D
*
src
[
stride
+
6
]
+
rounder
)
>>
8
;
dst
[
6
]
=
(
A
*
src
[
6
]
+
B
*
src
[
7
]
+
C
*
src
[
stride
+
6
]
+
D
*
src
[
stride
+
7
]
+
rounder
)
>>
8
;
dst
[
7
]
=
(
A
*
src
[
7
]
+
B
*
src
[
8
]
+
C
*
src
[
stride
+
7
]
+
D
*
src
[
stride
+
8
]
+
rounder
)
>>
8
;
dst
+=
stride
;
src
+=
stride
;
}
}
void
ff_gmc_c
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
ox
,
int
oy
,
int
dxx
,
int
dxy
,
int
dyx
,
int
dyy
,
int
shift
,
int
r
,
int
width
,
int
height
)
{
int
y
,
vx
,
vy
;
const
int
s
=
1
<<
shift
;
width
--
;
height
--
;
for
(
y
=
0
;
y
<
h
;
y
++
)
{
int
x
;
vx
=
ox
;
vy
=
oy
;
for
(
x
=
0
;
x
<
8
;
x
++
)
{
// FIXME: optimize
int
index
;
int
src_x
=
vx
>>
16
;
int
src_y
=
vy
>>
16
;
int
frac_x
=
src_x
&
(
s
-
1
);
int
frac_y
=
src_y
&
(
s
-
1
);
src_x
>>=
shift
;
src_y
>>=
shift
;
if
((
unsigned
)
src_x
<
width
)
{
if
((
unsigned
)
src_y
<
height
)
{
index
=
src_x
+
src_y
*
stride
;
dst
[
y
*
stride
+
x
]
=
((
src
[
index
]
*
(
s
-
frac_x
)
+
src
[
index
+
1
]
*
frac_x
)
*
(
s
-
frac_y
)
+
(
src
[
index
+
stride
]
*
(
s
-
frac_x
)
+
src
[
index
+
stride
+
1
]
*
frac_x
)
*
frac_y
+
r
)
>>
(
shift
*
2
);
}
else
{
index
=
src_x
+
av_clip
(
src_y
,
0
,
height
)
*
stride
;
dst
[
y
*
stride
+
x
]
=
((
src
[
index
]
*
(
s
-
frac_x
)
+
src
[
index
+
1
]
*
frac_x
)
*
s
+
r
)
>>
(
shift
*
2
);
}
}
else
{
if
((
unsigned
)
src_y
<
height
)
{
index
=
av_clip
(
src_x
,
0
,
width
)
+
src_y
*
stride
;
dst
[
y
*
stride
+
x
]
=
((
src
[
index
]
*
(
s
-
frac_y
)
+
src
[
index
+
stride
]
*
frac_y
)
*
s
+
r
)
>>
(
shift
*
2
);
}
else
{
index
=
av_clip
(
src_x
,
0
,
width
)
+
av_clip
(
src_y
,
0
,
height
)
*
stride
;
dst
[
y
*
stride
+
x
]
=
src
[
index
];
}
}
vx
+=
dxx
;
vy
+=
dyx
;
}
ox
+=
dxy
;
oy
+=
dyy
;
}
}
static
inline
int
pix_abs16_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
static
inline
int
pix_abs16_c
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
int
line_size
,
int
h
)
{
{
...
@@ -1346,9 +1258,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
...
@@ -1346,9 +1258,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c
->
sum_abs_dctelem
=
sum_abs_dctelem_c
;
c
->
sum_abs_dctelem
=
sum_abs_dctelem_c
;
c
->
gmc1
=
gmc1_c
;
c
->
gmc
=
ff_gmc_c
;
c
->
pix_sum
=
pix_sum_c
;
c
->
pix_sum
=
pix_sum_c
;
c
->
pix_norm1
=
pix_norm1_c
;
c
->
pix_norm1
=
pix_norm1_c
;
...
...
libavcodec/dsputil.h
View file @
fab9df63
...
@@ -34,10 +34,6 @@
...
@@ -34,10 +34,6 @@
extern
uint32_t
ff_square_tab
[
512
];
extern
uint32_t
ff_square_tab
[
512
];
void
ff_gmc_c
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
ox
,
int
oy
,
int
dxx
,
int
dxy
,
int
dyx
,
int
dyy
,
int
shift
,
int
r
,
int
width
,
int
height
);
struct
MpegEncContext
;
struct
MpegEncContext
;
/* Motion estimation:
/* Motion estimation:
* h is limited to { width / 2, width, 2 * width },
* h is limited to { width / 2, width, 2 * width },
...
@@ -84,18 +80,6 @@ typedef struct DSPContext {
...
@@ -84,18 +80,6 @@ typedef struct DSPContext {
uint8_t
*
pixels
/* align 8 */
,
uint8_t
*
pixels
/* align 8 */
,
int
line_size
);
int
line_size
);
int
(
*
sum_abs_dctelem
)(
int16_t
*
block
/* align 16 */
);
int
(
*
sum_abs_dctelem
)(
int16_t
*
block
/* align 16 */
);
/**
* translational global motion compensation.
*/
void
(
*
gmc1
)(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align 1 */
,
int
srcStride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
);
/**
* global motion compensation.
*/
void
(
*
gmc
)(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align 1 */
,
int
stride
,
int
h
,
int
ox
,
int
oy
,
int
dxx
,
int
dxy
,
int
dyx
,
int
dyy
,
int
shift
,
int
r
,
int
width
,
int
height
);
int
(
*
pix_sum
)(
uint8_t
*
pix
,
int
line_size
);
int
(
*
pix_sum
)(
uint8_t
*
pix
,
int
line_size
);
int
(
*
pix_norm1
)(
uint8_t
*
pix
,
int
line_size
);
int
(
*
pix_norm1
)(
uint8_t
*
pix
,
int
line_size
);
...
...
libavcodec/mpegvideo.c
View file @
fab9df63
...
@@ -380,6 +380,7 @@ av_cold int ff_dct_common_init(MpegEncContext *s)
...
@@ -380,6 +380,7 @@ av_cold int ff_dct_common_init(MpegEncContext *s)
ff_blockdsp_init
(
&
s
->
bdsp
,
s
->
avctx
);
ff_blockdsp_init
(
&
s
->
bdsp
,
s
->
avctx
);
ff_dsputil_init
(
&
s
->
dsp
,
s
->
avctx
);
ff_dsputil_init
(
&
s
->
dsp
,
s
->
avctx
);
ff_hpeldsp_init
(
&
s
->
hdsp
,
s
->
avctx
->
flags
);
ff_hpeldsp_init
(
&
s
->
hdsp
,
s
->
avctx
->
flags
);
ff_mpegvideodsp_init
(
&
s
->
mdsp
);
ff_videodsp_init
(
&
s
->
vdsp
,
s
->
avctx
->
bits_per_raw_sample
);
ff_videodsp_init
(
&
s
->
vdsp
,
s
->
avctx
->
bits_per_raw_sample
);
s
->
dct_unquantize_h263_intra
=
dct_unquantize_h263_intra_c
;
s
->
dct_unquantize_h263_intra
=
dct_unquantize_h263_intra_c
;
...
...
libavcodec/mpegvideo.h
View file @
fab9df63
...
@@ -35,6 +35,7 @@
...
@@ -35,6 +35,7 @@
#include "get_bits.h"
#include "get_bits.h"
#include "h263dsp.h"
#include "h263dsp.h"
#include "hpeldsp.h"
#include "hpeldsp.h"
#include "mpegvideodsp.h"
#include "put_bits.h"
#include "put_bits.h"
#include "ratecontrol.h"
#include "ratecontrol.h"
#include "parser.h"
#include "parser.h"
...
@@ -351,6 +352,7 @@ typedef struct MpegEncContext {
...
@@ -351,6 +352,7 @@ typedef struct MpegEncContext {
BlockDSPContext
bdsp
;
BlockDSPContext
bdsp
;
DSPContext
dsp
;
///< pointers for accelerated dsp functions
DSPContext
dsp
;
///< pointers for accelerated dsp functions
HpelDSPContext
hdsp
;
HpelDSPContext
hdsp
;
MpegVideoDSPContext
mdsp
;
QpelDSPContext
qdsp
;
QpelDSPContext
qdsp
;
VideoDSPContext
vdsp
;
VideoDSPContext
vdsp
;
H263DSPContext
h263dsp
;
H263DSPContext
h263dsp
;
...
...
libavcodec/mpegvideo_motion.c
View file @
fab9df63
...
@@ -25,7 +25,6 @@
...
@@ -25,7 +25,6 @@
#include "libavutil/internal.h"
#include "libavutil/internal.h"
#include "avcodec.h"
#include "avcodec.h"
#include "dsputil.h"
#include "h261.h"
#include "h261.h"
#include "mpegutils.h"
#include "mpegutils.h"
#include "mpegvideo.h"
#include "mpegvideo.h"
...
@@ -72,9 +71,9 @@ static void gmc1_motion(MpegEncContext *s,
...
@@ -72,9 +71,9 @@ static void gmc1_motion(MpegEncContext *s,
}
}
if
((
motion_x
|
motion_y
)
&
7
)
{
if
((
motion_x
|
motion_y
)
&
7
)
{
s
->
dsp
.
gmc1
(
dest_y
,
ptr
,
linesize
,
16
,
s
->
m
dsp
.
gmc1
(
dest_y
,
ptr
,
linesize
,
16
,
motion_x
&
15
,
motion_y
&
15
,
128
-
s
->
no_rounding
);
motion_x
&
15
,
motion_y
&
15
,
128
-
s
->
no_rounding
);
s
->
dsp
.
gmc1
(
dest_y
+
8
,
ptr
+
8
,
linesize
,
16
,
s
->
m
dsp
.
gmc1
(
dest_y
+
8
,
ptr
+
8
,
linesize
,
16
,
motion_x
&
15
,
motion_y
&
15
,
128
-
s
->
no_rounding
);
motion_x
&
15
,
motion_y
&
15
,
128
-
s
->
no_rounding
);
}
else
{
}
else
{
int
dxy
;
int
dxy
;
...
@@ -115,7 +114,7 @@ static void gmc1_motion(MpegEncContext *s,
...
@@ -115,7 +114,7 @@ static void gmc1_motion(MpegEncContext *s,
ptr
=
s
->
edge_emu_buffer
;
ptr
=
s
->
edge_emu_buffer
;
emu
=
1
;
emu
=
1
;
}
}
s
->
dsp
.
gmc1
(
dest_cb
,
ptr
,
uvlinesize
,
8
,
s
->
m
dsp
.
gmc1
(
dest_cb
,
ptr
,
uvlinesize
,
8
,
motion_x
&
15
,
motion_y
&
15
,
128
-
s
->
no_rounding
);
motion_x
&
15
,
motion_y
&
15
,
128
-
s
->
no_rounding
);
ptr
=
ref_picture
[
2
]
+
offset
;
ptr
=
ref_picture
[
2
]
+
offset
;
...
@@ -127,7 +126,7 @@ static void gmc1_motion(MpegEncContext *s,
...
@@ -127,7 +126,7 @@ static void gmc1_motion(MpegEncContext *s,
s
->
h_edge_pos
>>
1
,
s
->
v_edge_pos
>>
1
);
s
->
h_edge_pos
>>
1
,
s
->
v_edge_pos
>>
1
);
ptr
=
s
->
edge_emu_buffer
;
ptr
=
s
->
edge_emu_buffer
;
}
}
s
->
dsp
.
gmc1
(
dest_cr
,
ptr
,
uvlinesize
,
8
,
s
->
m
dsp
.
gmc1
(
dest_cr
,
ptr
,
uvlinesize
,
8
,
motion_x
&
15
,
motion_y
&
15
,
128
-
s
->
no_rounding
);
motion_x
&
15
,
motion_y
&
15
,
128
-
s
->
no_rounding
);
}
}
...
@@ -150,13 +149,13 @@ static void gmc_motion(MpegEncContext *s,
...
@@ -150,13 +149,13 @@ static void gmc_motion(MpegEncContext *s,
oy
=
s
->
sprite_offset
[
0
][
1
]
+
s
->
sprite_delta
[
1
][
0
]
*
s
->
mb_x
*
16
+
oy
=
s
->
sprite_offset
[
0
][
1
]
+
s
->
sprite_delta
[
1
][
0
]
*
s
->
mb_x
*
16
+
s
->
sprite_delta
[
1
][
1
]
*
s
->
mb_y
*
16
;
s
->
sprite_delta
[
1
][
1
]
*
s
->
mb_y
*
16
;
s
->
dsp
.
gmc
(
dest_y
,
ptr
,
linesize
,
16
,
s
->
m
dsp
.
gmc
(
dest_y
,
ptr
,
linesize
,
16
,
ox
,
oy
,
ox
,
oy
,
s
->
sprite_delta
[
0
][
0
],
s
->
sprite_delta
[
0
][
1
],
s
->
sprite_delta
[
0
][
0
],
s
->
sprite_delta
[
0
][
1
],
s
->
sprite_delta
[
1
][
0
],
s
->
sprite_delta
[
1
][
1
],
s
->
sprite_delta
[
1
][
0
],
s
->
sprite_delta
[
1
][
1
],
a
+
1
,
(
1
<<
(
2
*
a
+
1
))
-
s
->
no_rounding
,
a
+
1
,
(
1
<<
(
2
*
a
+
1
))
-
s
->
no_rounding
,
s
->
h_edge_pos
,
s
->
v_edge_pos
);
s
->
h_edge_pos
,
s
->
v_edge_pos
);
s
->
dsp
.
gmc
(
dest_y
+
8
,
ptr
,
linesize
,
16
,
s
->
m
dsp
.
gmc
(
dest_y
+
8
,
ptr
,
linesize
,
16
,
ox
+
s
->
sprite_delta
[
0
][
0
]
*
8
,
ox
+
s
->
sprite_delta
[
0
][
0
]
*
8
,
oy
+
s
->
sprite_delta
[
1
][
0
]
*
8
,
oy
+
s
->
sprite_delta
[
1
][
0
]
*
8
,
s
->
sprite_delta
[
0
][
0
],
s
->
sprite_delta
[
0
][
1
],
s
->
sprite_delta
[
0
][
0
],
s
->
sprite_delta
[
0
][
1
],
...
@@ -173,7 +172,7 @@ static void gmc_motion(MpegEncContext *s,
...
@@ -173,7 +172,7 @@ static void gmc_motion(MpegEncContext *s,
s
->
sprite_delta
[
1
][
1
]
*
s
->
mb_y
*
8
;
s
->
sprite_delta
[
1
][
1
]
*
s
->
mb_y
*
8
;
ptr
=
ref_picture
[
1
];
ptr
=
ref_picture
[
1
];
s
->
dsp
.
gmc
(
dest_cb
,
ptr
,
uvlinesize
,
8
,
s
->
m
dsp
.
gmc
(
dest_cb
,
ptr
,
uvlinesize
,
8
,
ox
,
oy
,
ox
,
oy
,
s
->
sprite_delta
[
0
][
0
],
s
->
sprite_delta
[
0
][
1
],
s
->
sprite_delta
[
0
][
0
],
s
->
sprite_delta
[
0
][
1
],
s
->
sprite_delta
[
1
][
0
],
s
->
sprite_delta
[
1
][
1
],
s
->
sprite_delta
[
1
][
0
],
s
->
sprite_delta
[
1
][
1
],
...
@@ -181,7 +180,7 @@ static void gmc_motion(MpegEncContext *s,
...
@@ -181,7 +180,7 @@ static void gmc_motion(MpegEncContext *s,
s
->
h_edge_pos
>>
1
,
s
->
v_edge_pos
>>
1
);
s
->
h_edge_pos
>>
1
,
s
->
v_edge_pos
>>
1
);
ptr
=
ref_picture
[
2
];
ptr
=
ref_picture
[
2
];
s
->
dsp
.
gmc
(
dest_cr
,
ptr
,
uvlinesize
,
8
,
s
->
m
dsp
.
gmc
(
dest_cr
,
ptr
,
uvlinesize
,
8
,
ox
,
oy
,
ox
,
oy
,
s
->
sprite_delta
[
0
][
0
],
s
->
sprite_delta
[
0
][
1
],
s
->
sprite_delta
[
0
][
0
],
s
->
sprite_delta
[
0
][
1
],
s
->
sprite_delta
[
1
][
0
],
s
->
sprite_delta
[
1
][
1
],
s
->
sprite_delta
[
1
][
0
],
s
->
sprite_delta
[
1
][
1
],
...
...
libavcodec/mpegvideodsp.c
0 → 100644
View file @
fab9df63
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/common.h"
#include "mpegvideodsp.h"
static
void
gmc1_c
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
{
const
int
A
=
(
16
-
x16
)
*
(
16
-
y16
);
const
int
B
=
(
x16
)
*
(
16
-
y16
);
const
int
C
=
(
16
-
x16
)
*
(
y16
);
const
int
D
=
(
x16
)
*
(
y16
);
int
i
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
dst
[
0
]
=
(
A
*
src
[
0
]
+
B
*
src
[
1
]
+
C
*
src
[
stride
+
0
]
+
D
*
src
[
stride
+
1
]
+
rounder
)
>>
8
;
dst
[
1
]
=
(
A
*
src
[
1
]
+
B
*
src
[
2
]
+
C
*
src
[
stride
+
1
]
+
D
*
src
[
stride
+
2
]
+
rounder
)
>>
8
;
dst
[
2
]
=
(
A
*
src
[
2
]
+
B
*
src
[
3
]
+
C
*
src
[
stride
+
2
]
+
D
*
src
[
stride
+
3
]
+
rounder
)
>>
8
;
dst
[
3
]
=
(
A
*
src
[
3
]
+
B
*
src
[
4
]
+
C
*
src
[
stride
+
3
]
+
D
*
src
[
stride
+
4
]
+
rounder
)
>>
8
;
dst
[
4
]
=
(
A
*
src
[
4
]
+
B
*
src
[
5
]
+
C
*
src
[
stride
+
4
]
+
D
*
src
[
stride
+
5
]
+
rounder
)
>>
8
;
dst
[
5
]
=
(
A
*
src
[
5
]
+
B
*
src
[
6
]
+
C
*
src
[
stride
+
5
]
+
D
*
src
[
stride
+
6
]
+
rounder
)
>>
8
;
dst
[
6
]
=
(
A
*
src
[
6
]
+
B
*
src
[
7
]
+
C
*
src
[
stride
+
6
]
+
D
*
src
[
stride
+
7
]
+
rounder
)
>>
8
;
dst
[
7
]
=
(
A
*
src
[
7
]
+
B
*
src
[
8
]
+
C
*
src
[
stride
+
7
]
+
D
*
src
[
stride
+
8
]
+
rounder
)
>>
8
;
dst
+=
stride
;
src
+=
stride
;
}
}
void
ff_gmc_c
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
ox
,
int
oy
,
int
dxx
,
int
dxy
,
int
dyx
,
int
dyy
,
int
shift
,
int
r
,
int
width
,
int
height
)
{
int
y
,
vx
,
vy
;
const
int
s
=
1
<<
shift
;
width
--
;
height
--
;
for
(
y
=
0
;
y
<
h
;
y
++
)
{
int
x
;
vx
=
ox
;
vy
=
oy
;
for
(
x
=
0
;
x
<
8
;
x
++
)
{
// FIXME: optimize
int
index
;
int
src_x
=
vx
>>
16
;
int
src_y
=
vy
>>
16
;
int
frac_x
=
src_x
&
(
s
-
1
);
int
frac_y
=
src_y
&
(
s
-
1
);
src_x
>>=
shift
;
src_y
>>=
shift
;
if
((
unsigned
)
src_x
<
width
)
{
if
((
unsigned
)
src_y
<
height
)
{
index
=
src_x
+
src_y
*
stride
;
dst
[
y
*
stride
+
x
]
=
((
src
[
index
]
*
(
s
-
frac_x
)
+
src
[
index
+
1
]
*
frac_x
)
*
(
s
-
frac_y
)
+
(
src
[
index
+
stride
]
*
(
s
-
frac_x
)
+
src
[
index
+
stride
+
1
]
*
frac_x
)
*
frac_y
+
r
)
>>
(
shift
*
2
);
}
else
{
index
=
src_x
+
av_clip
(
src_y
,
0
,
height
)
*
stride
;
dst
[
y
*
stride
+
x
]
=
((
src
[
index
]
*
(
s
-
frac_x
)
+
src
[
index
+
1
]
*
frac_x
)
*
s
+
r
)
>>
(
shift
*
2
);
}
}
else
{
if
((
unsigned
)
src_y
<
height
)
{
index
=
av_clip
(
src_x
,
0
,
width
)
+
src_y
*
stride
;
dst
[
y
*
stride
+
x
]
=
((
src
[
index
]
*
(
s
-
frac_y
)
+
src
[
index
+
stride
]
*
frac_y
)
*
s
+
r
)
>>
(
shift
*
2
);
}
else
{
index
=
av_clip
(
src_x
,
0
,
width
)
+
av_clip
(
src_y
,
0
,
height
)
*
stride
;
dst
[
y
*
stride
+
x
]
=
src
[
index
];
}
}
vx
+=
dxx
;
vy
+=
dyx
;
}
ox
+=
dxy
;
oy
+=
dyy
;
}
}
av_cold
void
ff_mpegvideodsp_init
(
MpegVideoDSPContext
*
c
)
{
c
->
gmc1
=
gmc1_c
;
c
->
gmc
=
ff_gmc_c
;
if
(
ARCH_PPC
)
ff_mpegvideodsp_init_ppc
(
c
);
if
(
ARCH_X86
)
ff_mpegvideodsp_init_x86
(
c
);
}
libavcodec/mpegvideodsp.h
0 → 100644
View file @
fab9df63
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_MPEGVIDEODSP_H
#define AVCODEC_MPEGVIDEODSP_H
#include <stdint.h>
void
ff_gmc_c
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
ox
,
int
oy
,
int
dxx
,
int
dxy
,
int
dyx
,
int
dyy
,
int
shift
,
int
r
,
int
width
,
int
height
);
typedef
struct
MpegVideoDSPContext
{
/**
* translational global motion compensation.
*/
void
(
*
gmc1
)(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align 1 */
,
int
srcStride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
);
/**
* global motion compensation.
*/
void
(
*
gmc
)(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align 1 */
,
int
stride
,
int
h
,
int
ox
,
int
oy
,
int
dxx
,
int
dxy
,
int
dyx
,
int
dyy
,
int
shift
,
int
r
,
int
width
,
int
height
);
}
MpegVideoDSPContext
;
void
ff_mpegvideodsp_init
(
MpegVideoDSPContext
*
c
);
void
ff_mpegvideodsp_init_ppc
(
MpegVideoDSPContext
*
c
);
void
ff_mpegvideodsp_init_x86
(
MpegVideoDSPContext
*
c
);
#endif
/* AVCODEC_MPEGVIDEODSP_H */
libavcodec/ppc/Makefile
View file @
fab9df63
...
@@ -10,7 +10,8 @@ OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o
...
@@ -10,7 +10,8 @@ OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o
OBJS-$(CONFIG_HPELDSP)
+=
ppc/hpeldsp_altivec.o
OBJS-$(CONFIG_HPELDSP)
+=
ppc/hpeldsp_altivec.o
OBJS-$(CONFIG_HUFFYUVDSP)
+=
ppc/huffyuvdsp_altivec.o
OBJS-$(CONFIG_HUFFYUVDSP)
+=
ppc/huffyuvdsp_altivec.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
ppc/mpegaudiodsp_altivec.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
ppc/mpegaudiodsp_altivec.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
ppc/mpegvideo_altivec.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
ppc/mpegvideo_altivec.o
\
ppc/mpegvideodsp.o
OBJS-$(CONFIG_VIDEODSP)
+=
ppc/videodsp_ppc.o
OBJS-$(CONFIG_VIDEODSP)
+=
ppc/videodsp_ppc.o
OBJS-$(CONFIG_VP3DSP)
+=
ppc/vp3dsp_altivec.o
OBJS-$(CONFIG_VP3DSP)
+=
ppc/vp3dsp_altivec.o
...
@@ -23,7 +24,6 @@ OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o
...
@@ -23,7 +24,6 @@ OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o
ALTIVEC-OBJS-$(CONFIG_DSPUTIL)
+=
ppc/dsputil_altivec.o
\
ALTIVEC-OBJS-$(CONFIG_DSPUTIL)
+=
ppc/dsputil_altivec.o
\
ppc/fdct_altivec.o
\
ppc/fdct_altivec.o
\
ppc/gmc_altivec.o
\
ppc/idct_altivec.o
\
ppc/idct_altivec.o
\
FFT-OBJS-$(HAVE_GNU_AS)
+=
ppc/fft_altivec_s.o
FFT-OBJS-$(HAVE_GNU_AS)
+=
ppc/fft_altivec_s.o
...
...
libavcodec/ppc/dsputil_altivec.h
View file @
fab9df63
...
@@ -28,8 +28,7 @@
...
@@ -28,8 +28,7 @@
#include "libavcodec/dsputil.h"
#include "libavcodec/dsputil.h"
void
ff_fdct_altivec
(
int16_t
*
block
);
void
ff_fdct_altivec
(
int16_t
*
block
);
void
ff_gmc1_altivec
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
);
void
ff_idct_put_altivec
(
uint8_t
*
dest
,
int
line_size
,
int16_t
*
block
);
void
ff_idct_put_altivec
(
uint8_t
*
dest
,
int
line_size
,
int16_t
*
block
);
void
ff_idct_add_altivec
(
uint8_t
*
dest
,
int
line_size
,
int16_t
*
block
);
void
ff_idct_add_altivec
(
uint8_t
*
dest
,
int
line_size
,
int16_t
*
block
);
...
...
libavcodec/ppc/dsputil_ppc.c
View file @
fab9df63
...
@@ -35,8 +35,6 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
...
@@ -35,8 +35,6 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
if
(
PPC_ALTIVEC
(
av_get_cpu_flags
()))
{
if
(
PPC_ALTIVEC
(
av_get_cpu_flags
()))
{
ff_dsputil_init_altivec
(
c
,
avctx
,
high_bit_depth
);
ff_dsputil_init_altivec
(
c
,
avctx
,
high_bit_depth
);
c
->
gmc1
=
ff_gmc1_altivec
;
if
(
!
high_bit_depth
)
{
if
(
!
high_bit_depth
)
{
#if CONFIG_ENCODERS
#if CONFIG_ENCODERS
if
(
avctx
->
dct_algo
==
FF_DCT_AUTO
||
if
(
avctx
->
dct_algo
==
FF_DCT_AUTO
||
...
...
libavcodec/ppc/
gmc_altivec
.c
→
libavcodec/ppc/
mpegvideodsp
.c
View file @
fab9df63
...
@@ -23,11 +23,12 @@
...
@@ -23,11 +23,12 @@
#include "libavutil/mem.h"
#include "libavutil/mem.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h"
#include "libavutil/ppc/util_altivec.h"
#include "
dsputil_altivec
.h"
#include "
libavcodec/mpegvideodsp
.h"
#if HAVE_ALTIVEC
/* AltiVec-enhanced gmc1. ATM this code assumes stride is a multiple of 8
/* AltiVec-enhanced gmc1. ATM this code assumes stride is a multiple of 8
* to preserve proper dst alignment. */
* to preserve proper dst alignment. */
void
ff_
gmc1_altivec
(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align1 */
,
static
void
gmc1_altivec
(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align1 */
,
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
{
{
int
i
;
int
i
;
...
@@ -122,3 +123,11 @@ void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,
...
@@ -122,3 +123,11 @@ void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,
src
+=
stride
;
src
+=
stride
;
}
}
}
}
#endif
/* HAVE_ALTIVEC */
av_cold
void
ff_mpegvideodsp_init_ppc
(
MpegVideoDSPContext
*
c
)
{
#if HAVE_ALTIVEC
c
->
gmc1
=
gmc1_altivec
;
#endif
/* HAVE_ALTIVEC */
}
libavcodec/x86/Makefile
View file @
fab9df63
...
@@ -20,7 +20,8 @@ OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o
...
@@ -20,7 +20,8 @@ OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o
OBJS-$(CONFIG_HUFFYUVENCDSP)
+=
x86/huffyuvencdsp_mmx.o
OBJS-$(CONFIG_HUFFYUVENCDSP)
+=
x86/huffyuvencdsp_mmx.o
OBJS-$(CONFIG_LPC)
+=
x86/lpc.o
OBJS-$(CONFIG_LPC)
+=
x86/lpc.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
x86/mpegaudiodsp.o
OBJS-$(CONFIG_MPEGAUDIODSP)
+=
x86/mpegaudiodsp.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
x86/mpegvideo.o
OBJS-$(CONFIG_MPEGVIDEO)
+=
x86/mpegvideo.o
\
x86/mpegvideodsp.o
OBJS-$(CONFIG_MPEGVIDEOENC)
+=
x86/mpegvideoenc.o
OBJS-$(CONFIG_MPEGVIDEOENC)
+=
x86/mpegvideoenc.o
OBJS-$(CONFIG_QPELDSP)
+=
x86/qpeldsp_init.o
OBJS-$(CONFIG_QPELDSP)
+=
x86/qpeldsp_init.o
OBJS-$(CONFIG_VIDEODSP)
+=
x86/videodsp_init.o
OBJS-$(CONFIG_VIDEODSP)
+=
x86/videodsp_init.o
...
...
libavcodec/x86/dsputil_init.c
View file @
fab9df63
...
@@ -52,8 +52,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
...
@@ -52,8 +52,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
break
;
break
;
}
}
}
}
c
->
gmc
=
ff_gmc_mmx
;
#endif
/* HAVE_MMX_INLINE */
#endif
/* HAVE_MMX_INLINE */
}
}
...
...
libavcodec/x86/dsputil_mmx.c
View file @
fab9df63
...
@@ -260,118 +260,4 @@ void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
...
@@ -260,118 +260,4 @@ void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
}
}
}
}
void
ff_gmc_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
ox
,
int
oy
,
int
dxx
,
int
dxy
,
int
dyx
,
int
dyy
,
int
shift
,
int
r
,
int
width
,
int
height
)
{
const
int
w
=
8
;
const
int
ix
=
ox
>>
(
16
+
shift
);
const
int
iy
=
oy
>>
(
16
+
shift
);
const
int
oxs
=
ox
>>
4
;
const
int
oys
=
oy
>>
4
;
const
int
dxxs
=
dxx
>>
4
;
const
int
dxys
=
dxy
>>
4
;
const
int
dyxs
=
dyx
>>
4
;
const
int
dyys
=
dyy
>>
4
;
const
uint16_t
r4
[
4
]
=
{
r
,
r
,
r
,
r
};
const
uint16_t
dxy4
[
4
]
=
{
dxys
,
dxys
,
dxys
,
dxys
};
const
uint16_t
dyy4
[
4
]
=
{
dyys
,
dyys
,
dyys
,
dyys
};
const
uint64_t
shift2
=
2
*
shift
;
int
x
,
y
;
const
int
dxw
=
(
dxx
-
(
1
<<
(
16
+
shift
)))
*
(
w
-
1
);
const
int
dyh
=
(
dyy
-
(
1
<<
(
16
+
shift
)))
*
(
h
-
1
);
const
int
dxh
=
dxy
*
(
h
-
1
);
const
int
dyw
=
dyx
*
(
w
-
1
);
if
(
// non-constant fullpel offset (3% of blocks)
((
ox
^
(
ox
+
dxw
))
|
(
ox
^
(
ox
+
dxh
))
|
(
ox
^
(
ox
+
dxw
+
dxh
))
|
(
oy
^
(
oy
+
dyw
))
|
(
oy
^
(
oy
+
dyh
))
|
(
oy
^
(
oy
+
dyw
+
dyh
)))
>>
(
16
+
shift
)
||
// uses more than 16 bits of subpel mv (only at huge resolution)
(
dxx
|
dxy
|
dyx
|
dyy
)
&
15
||
(
unsigned
)
ix
>=
width
-
w
||
(
unsigned
)
iy
>=
height
-
h
)
{
// FIXME could still use mmx for some of the rows
ff_gmc_c
(
dst
,
src
,
stride
,
h
,
ox
,
oy
,
dxx
,
dxy
,
dyx
,
dyy
,
shift
,
r
,
width
,
height
);
return
;
}
src
+=
ix
+
iy
*
stride
;
__asm__
volatile
(
"movd %0, %%mm6
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"punpcklwd %%mm6, %%mm6
\n\t
"
"punpcklwd %%mm6, %%mm6
\n\t
"
::
"r"
(
1
<<
shift
));
for
(
x
=
0
;
x
<
w
;
x
+=
4
)
{
uint16_t
dx4
[
4
]
=
{
oxs
-
dxys
+
dxxs
*
(
x
+
0
),
oxs
-
dxys
+
dxxs
*
(
x
+
1
),
oxs
-
dxys
+
dxxs
*
(
x
+
2
),
oxs
-
dxys
+
dxxs
*
(
x
+
3
)
};
uint16_t
dy4
[
4
]
=
{
oys
-
dyys
+
dyxs
*
(
x
+
0
),
oys
-
dyys
+
dyxs
*
(
x
+
1
),
oys
-
dyys
+
dyxs
*
(
x
+
2
),
oys
-
dyys
+
dyxs
*
(
x
+
3
)
};
for
(
y
=
0
;
y
<
h
;
y
++
)
{
__asm__
volatile
(
"movq %0, %%mm4
\n\t
"
"movq %1, %%mm5
\n\t
"
"paddw %2, %%mm4
\n\t
"
"paddw %3, %%mm5
\n\t
"
"movq %%mm4, %0
\n\t
"
"movq %%mm5, %1
\n\t
"
"psrlw $12, %%mm4
\n\t
"
"psrlw $12, %%mm5
\n\t
"
:
"+m"
(
*
dx4
),
"+m"
(
*
dy4
)
:
"m"
(
*
dxy4
),
"m"
(
*
dyy4
));
__asm__
volatile
(
"movq %%mm6, %%mm2
\n\t
"
"movq %%mm6, %%mm1
\n\t
"
"psubw %%mm4, %%mm2
\n\t
"
"psubw %%mm5, %%mm1
\n\t
"
"movq %%mm2, %%mm0
\n\t
"
"movq %%mm4, %%mm3
\n\t
"
"pmullw %%mm1, %%mm0
\n\t
"
// (s - dx) * (s - dy)
"pmullw %%mm5, %%mm3
\n\t
"
// dx * dy
"pmullw %%mm5, %%mm2
\n\t
"
// (s - dx) * dy
"pmullw %%mm4, %%mm1
\n\t
"
// dx * (s - dy)
"movd %4, %%mm5
\n\t
"
"movd %3, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"pmullw %%mm5, %%mm3
\n\t
"
// src[1, 1] * dx * dy
"pmullw %%mm4, %%mm2
\n\t
"
// src[0, 1] * (s - dx) * dy
"movd %2, %%mm5
\n\t
"
"movd %1, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"pmullw %%mm5, %%mm1
\n\t
"
// src[1, 0] * dx * (s - dy)
"pmullw %%mm4, %%mm0
\n\t
"
// src[0, 0] * (s - dx) * (s - dy)
"paddw %5, %%mm1
\n\t
"
"paddw %%mm3, %%mm2
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"psrlw %6, %%mm0
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"movd %%mm0, %0
\n\t
"
:
"=m"
(
dst
[
x
+
y
*
stride
])
:
"m"
(
src
[
0
]),
"m"
(
src
[
1
]),
"m"
(
src
[
stride
]),
"m"
(
src
[
stride
+
1
]),
"m"
(
*
r4
),
"m"
(
shift2
));
src
+=
stride
;
}
src
+=
4
-
h
*
stride
;
}
}
#endif
/* HAVE_INLINE_ASM */
#endif
/* HAVE_INLINE_ASM */
libavcodec/x86/dsputil_x86.h
View file @
fab9df63
...
@@ -41,9 +41,4 @@ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
...
@@ -41,9 +41,4 @@ void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
void
ff_draw_edges_mmx
(
uint8_t
*
buf
,
int
wrap
,
int
width
,
int
height
,
void
ff_draw_edges_mmx
(
uint8_t
*
buf
,
int
wrap
,
int
width
,
int
height
,
int
w
,
int
h
,
int
sides
);
int
w
,
int
h
,
int
sides
);
void
ff_gmc_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
ox
,
int
oy
,
int
dxx
,
int
dxy
,
int
dyx
,
int
dyy
,
int
shift
,
int
r
,
int
width
,
int
height
);
#endif
/* AVCODEC_X86_DSPUTIL_X86_H */
#endif
/* AVCODEC_X86_DSPUTIL_X86_H */
libavcodec/x86/mpegvideodsp.c
0 → 100644
View file @
fab9df63
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/mpegvideodsp.h"
#if HAVE_INLINE_ASM
static
void
gmc_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
ox
,
int
oy
,
int
dxx
,
int
dxy
,
int
dyx
,
int
dyy
,
int
shift
,
int
r
,
int
width
,
int
height
)
{
const
int
w
=
8
;
const
int
ix
=
ox
>>
(
16
+
shift
);
const
int
iy
=
oy
>>
(
16
+
shift
);
const
int
oxs
=
ox
>>
4
;
const
int
oys
=
oy
>>
4
;
const
int
dxxs
=
dxx
>>
4
;
const
int
dxys
=
dxy
>>
4
;
const
int
dyxs
=
dyx
>>
4
;
const
int
dyys
=
dyy
>>
4
;
const
uint16_t
r4
[
4
]
=
{
r
,
r
,
r
,
r
};
const
uint16_t
dxy4
[
4
]
=
{
dxys
,
dxys
,
dxys
,
dxys
};
const
uint16_t
dyy4
[
4
]
=
{
dyys
,
dyys
,
dyys
,
dyys
};
const
uint64_t
shift2
=
2
*
shift
;
int
x
,
y
;
const
int
dxw
=
(
dxx
-
(
1
<<
(
16
+
shift
)))
*
(
w
-
1
);
const
int
dyh
=
(
dyy
-
(
1
<<
(
16
+
shift
)))
*
(
h
-
1
);
const
int
dxh
=
dxy
*
(
h
-
1
);
const
int
dyw
=
dyx
*
(
w
-
1
);
if
(
// non-constant fullpel offset (3% of blocks)
((
ox
^
(
ox
+
dxw
))
|
(
ox
^
(
ox
+
dxh
))
|
(
ox
^
(
ox
+
dxw
+
dxh
))
|
(
oy
^
(
oy
+
dyw
))
|
(
oy
^
(
oy
+
dyh
))
|
(
oy
^
(
oy
+
dyw
+
dyh
)))
>>
(
16
+
shift
)
||
// uses more than 16 bits of subpel mv (only at huge resolution)
(
dxx
|
dxy
|
dyx
|
dyy
)
&
15
||
(
unsigned
)
ix
>=
width
-
w
||
(
unsigned
)
iy
>=
height
-
h
)
{
// FIXME could still use mmx for some of the rows
ff_gmc_c
(
dst
,
src
,
stride
,
h
,
ox
,
oy
,
dxx
,
dxy
,
dyx
,
dyy
,
shift
,
r
,
width
,
height
);
return
;
}
src
+=
ix
+
iy
*
stride
;
__asm__
volatile
(
"movd %0, %%mm6
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"punpcklwd %%mm6, %%mm6
\n\t
"
"punpcklwd %%mm6, %%mm6
\n\t
"
::
"r"
(
1
<<
shift
));
for
(
x
=
0
;
x
<
w
;
x
+=
4
)
{
uint16_t
dx4
[
4
]
=
{
oxs
-
dxys
+
dxxs
*
(
x
+
0
),
oxs
-
dxys
+
dxxs
*
(
x
+
1
),
oxs
-
dxys
+
dxxs
*
(
x
+
2
),
oxs
-
dxys
+
dxxs
*
(
x
+
3
)
};
uint16_t
dy4
[
4
]
=
{
oys
-
dyys
+
dyxs
*
(
x
+
0
),
oys
-
dyys
+
dyxs
*
(
x
+
1
),
oys
-
dyys
+
dyxs
*
(
x
+
2
),
oys
-
dyys
+
dyxs
*
(
x
+
3
)
};
for
(
y
=
0
;
y
<
h
;
y
++
)
{
__asm__
volatile
(
"movq %0, %%mm4
\n\t
"
"movq %1, %%mm5
\n\t
"
"paddw %2, %%mm4
\n\t
"
"paddw %3, %%mm5
\n\t
"
"movq %%mm4, %0
\n\t
"
"movq %%mm5, %1
\n\t
"
"psrlw $12, %%mm4
\n\t
"
"psrlw $12, %%mm5
\n\t
"
:
"+m"
(
*
dx4
),
"+m"
(
*
dy4
)
:
"m"
(
*
dxy4
),
"m"
(
*
dyy4
));
__asm__
volatile
(
"movq %%mm6, %%mm2
\n\t
"
"movq %%mm6, %%mm1
\n\t
"
"psubw %%mm4, %%mm2
\n\t
"
"psubw %%mm5, %%mm1
\n\t
"
"movq %%mm2, %%mm0
\n\t
"
"movq %%mm4, %%mm3
\n\t
"
"pmullw %%mm1, %%mm0
\n\t
"
// (s - dx) * (s - dy)
"pmullw %%mm5, %%mm3
\n\t
"
// dx * dy
"pmullw %%mm5, %%mm2
\n\t
"
// (s - dx) * dy
"pmullw %%mm4, %%mm1
\n\t
"
// dx * (s - dy)
"movd %4, %%mm5
\n\t
"
"movd %3, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"pmullw %%mm5, %%mm3
\n\t
"
// src[1, 1] * dx * dy
"pmullw %%mm4, %%mm2
\n\t
"
// src[0, 1] * (s - dx) * dy
"movd %2, %%mm5
\n\t
"
"movd %1, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"pmullw %%mm5, %%mm1
\n\t
"
// src[1, 0] * dx * (s - dy)
"pmullw %%mm4, %%mm0
\n\t
"
// src[0, 0] * (s - dx) * (s - dy)
"paddw %5, %%mm1
\n\t
"
"paddw %%mm3, %%mm2
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"psrlw %6, %%mm0
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"movd %%mm0, %0
\n\t
"
:
"=m"
(
dst
[
x
+
y
*
stride
])
:
"m"
(
src
[
0
]),
"m"
(
src
[
1
]),
"m"
(
src
[
stride
]),
"m"
(
src
[
stride
+
1
]),
"m"
(
*
r4
),
"m"
(
shift2
));
src
+=
stride
;
}
src
+=
4
-
h
*
stride
;
}
}
#endif
/* HAVE_INLINE_ASM */
av_cold
void
ff_mpegvideodsp_init_x86
(
MpegVideoDSPContext
*
c
)
{
#if HAVE_INLINE_ASM
int
cpu_flags
=
av_get_cpu_flags
();
if
(
INLINE_MMX
(
cpu_flags
))
c
->
gmc
=
gmc_mmx
;
#endif
/* HAVE_INLINE_ASM */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment