Merge remote-tracking branch 'qatar/master'

* qatar/master: x86: dsputil: port to cpuflags crc: av_crc() parameter names should match between .c, .h and doxygen avserver: replace av_read_packet with av_read_frame avserver: fix constness casting warnings Conflicts: libavcodec/x86/dsputil.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>

Merge remote-tracking branch 'qatar/master'
* qatar/master: x86: dsputil: port to cpuflags crc: av_crc() parameter names should match between .c, .h and doxygen avserver: replace av_read_packet with av_read_frame avserver: fix constness casting warnings Conflicts: libavcodec/x86/dsputil.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>
ff3b59c8 · Michael Niedermayer · f30cf51d · 8c3849bc · ff3b59c8 · ff3b59c8
Commit ff3b59c8 authored Nov 16, 2012 by Michael Niedermayer
Show whitespace changes
Inline Side-by-side

Showing with 134 additions and 139 deletions

ffserver.c ffserver.c +14 -11

dsputil.asm libavcodec/x86/dsputil.asm +105 -114

dsputil_mmx.c libavcodec/x86/dsputil_mmx.c +13 -13

crc.h libavutil/crc.h +2 -1

No files found.
--- a/ffserver.c
+++ b/ffserver.c
@@ -1490,7 +1490,8 @@ enum RedirType {
 /* parse http request and prepare header */
 static int http_parse_request(HTTPContext *c)
 {
-    char *p;
+    const char *p;
+    char *p1;
    enum RedirType redir_type;
    char cmd[32];
    char info[1024], filename[1024];
@@ -1501,10 +1502,10 @@ static int http_parse_request(HTTPContext *c)
    FFStream *stream;
    int i;
    char ratebuf[32];
-    char *useragent = 0;
+    const char *useragent = 0;
    p = c->buffer;
-    get_word(cmd, sizeof(cmd), (const char **)&p);
+    get_word(cmd, sizeof(cmd), &p);
    av_strlcpy(c->method, cmd, sizeof(c->method));
    if (!strcmp(cmd, "GET"))
@@ -1514,7 +1515,7 @@ static int http_parse_request(HTTPContext *c)
    else
        return -1;
-    get_word(url, sizeof(url), (const char **)&p);
+    get_word(url, sizeof(url), &p);
    av_strlcpy(c->url, url, sizeof(c->url));
    get_word(protocol, sizeof(protocol), (const char **)&p);
@@ -1527,10 +1528,10 @@ static int http_parse_request(HTTPContext *c)
        http_log("%s - - New connection: %s %s\n", inet_ntoa(c->from_addr.sin_addr), cmd, url);
    /* find the filename and the optional info string in the request */
-    p = strchr(url, '?');
+    p1 = strchr(url, '?');
-    if (p) {
+    if (p1) {
-        av_strlcpy(info, p, sizeof(info));
+        av_strlcpy(info, p1, sizeof(info));
-        *p = '\0';
+        *p1 = '\0';
    } else
        info[0] = '\0';
@@ -1649,7 +1650,7 @@ static int http_parse_request(HTTPContext *c)
    }
    if (redir_type != REDIR_NONE) {
-        char *hostinfo = 0;
+        const char *hostinfo = 0;
        for (p = c->buffer; *p && *p != '\r' && *p != '\n'; ) {
            if (av_strncasecmp(p, "Host:", 5) == 0) {
@@ -1783,7 +1784,7 @@ static int http_parse_request(HTTPContext *c)
        if (!stream->is_feed) {
            /* However it might be a status report from WMP! Let us log the
             * data as it might come in handy one day. */
-            char *logline = 0;
+            const char *logline = 0;
            int client_id = 0;
            for (p = c->buffer; *p && *p != '\r' && *p != '\n'; ) {
@@ -3594,6 +3595,8 @@ static void extract_mpeg4_header(AVFormatContext *infile)
    AVStream *st;
    const uint8_t *p;
+    infile->flags |= AVFMT_FLAG_NOFILLIN | AVFMT_FLAG_NOPARSE;
    mpeg4_count = 0;
    for(i=0;i<infile->nb_streams;i++) {
        st = infile->streams[i];
@@ -3607,7 +3610,7 @@ static void extract_mpeg4_header(AVFormatContext *infile)
    printf("MPEG4 without extra data: trying to find header in %s\n", infile->filename);
    while (mpeg4_count > 0) {
-        if (av_read_packet(infile, &pkt) < 0)
+        if (av_read_frame(infile, &pkt) < 0)
            break;
        st = infile->streams[pkt.stream_index];
        if (st->codec->codec_id == AV_CODEC_ID_MPEG4 &&

--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -33,9 +33,9 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
 SECTION_TEXT
-%macro SCALARPRODUCT 1
+%macro SCALARPRODUCT 0
 ; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
-cglobal scalarproduct_int16_%1, 3,3,3, v1, v2, order
+cglobal scalarproduct_int16, 3,3,3, v1, v2, order
    shl orderq, 1
    add v1q, orderq
    add v2q, orderq
@@ -62,7 +62,7 @@ cglobal scalarproduct_int16_%1, 3,3,3, v1, v2, order
    RET
 ; int scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)
-cglobal scalarproduct_and_madd_int16_%1, 4,4,8, v1, v2, v3, order, mul
+cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
    shl orderq, 1
    movd    m7, mulm
 %if mmsize == 16
@@ -107,10 +107,10 @@ cglobal scalarproduct_and_madd_int16_%1, 4,4,8, v1, v2, v3, order, mul
    RET
 %endmacro
-INIT_MMX
+INIT_MMX mmxext
-SCALARPRODUCT mmxext
+SCALARPRODUCT
-INIT_XMM
+INIT_XMM sse2
-SCALARPRODUCT sse2
+SCALARPRODUCT
 %macro SCALARPRODUCT_LOOP 1
 align 16
@@ -158,7 +158,8 @@ align 16
 %endmacro
 ; int scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)
-cglobal scalarproduct_and_madd_int16_ssse3, 4,5,10, v1, v2, v3, order, mul
+INIT_XMM ssse3
+cglobal scalarproduct_and_madd_int16, 4,5,10, v1, v2, v3, order, mul
    shl orderq, 1
    movd    m7, mulm
    pshuflw m7, m7, 0
@@ -207,48 +208,60 @@ SCALARPRODUCT_LOOP 0
 ;                            const int16_t *window, unsigned int len)
 ;-----------------------------------------------------------------------------
-%macro REVERSE_WORDS_MMXEXT 1-2
+%macro REVERSE_WORDS 1-2
-    pshufw   %1, %1, 0x1B
+%if cpuflag(ssse3) && notcpuflag(atom)
-%endmacro
+    pshufb  %1, %2
+%elif cpuflag(sse2)
-%macro REVERSE_WORDS_SSE2 1-2
    pshuflw  %1, %1, 0x1B
    pshufhw  %1, %1, 0x1B
    pshufd   %1, %1, 0x4E
+%elif cpuflag(mmxext)
+    pshufw   %1, %1, 0x1B
+%endif
 %endmacro
-%macro REVERSE_WORDS_SSSE3 2
+%macro MUL16FIXED 3
-    pshufb  %1, %2
+%if cpuflag(ssse3) ; dst, src, unused
-%endmacro
+; dst = ((dst * src) + (1<<14)) >> 15
+    pmulhrsw   %1, %2
+%elif cpuflag(mmxext) ; dst, src, temp
 ; dst = (dst * src) >> 15
 ; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
 ; in from the pmullw result.
-%macro MUL16FIXED_MMXEXT 3 ; dst, src, temp
    mova    %3, %1
    pmulhw  %1, %2
    pmullw  %3, %2
    psrlw   %3, 15
    psllw   %1, 1
    por     %1, %3
+%endif
 %endmacro
-; dst = ((dst * src) + (1<<14)) >> 15
+%macro APPLY_WINDOW_INT16 1 ; %1 bitexact version
-%macro MUL16FIXED_SSSE3 3 ; dst, src, unused
+%if %1
-    pmulhrsw   %1, %2
+cglobal apply_window_int16, 4,5,6, output, input, window, offset, offset2
-%endmacro
+%else
+cglobal apply_window_int16_round, 4,5,6, output, input, window, offset, offset2
-%macro APPLY_WINDOW_INT16 3 ; %1=instruction set, %2=mmxext/sse2 bit exact version, %3=has_ssse3
+%endif
-cglobal apply_window_int16_%1, 4,5,6, output, input, window, offset, offset2
    lea     offset2q, [offsetq-mmsize]
-%if %2
+%if cpuflag(ssse3) && notcpuflag(atom)
-    mova          m5, [pd_16384]
-%elifidn %1, ssse3
    mova          m5, [pb_revwords]
    ALIGN 16
+%elif %1
+    mova          m5, [pd_16384]
 %endif
 .loop:
-%if %2
+%if cpuflag(ssse3)
+    ; This version does the 16x16->16 multiplication in-place without expanding
+    ; to 32-bit. The ssse3 version is bit-identical.
+    mova          m0, [windowq+offset2q]
+    mova          m1, [ inputq+offset2q]
+    pmulhrsw      m1, m0
+    REVERSE_WORDS m0, m5
+    pmulhrsw      m0, [ inputq+offsetq ]
+    mova  [outputq+offset2q], m1
+    mova  [outputq+offsetq ], m0
+%elif %1
    ; This version expands 16-bit to 32-bit, multiplies by the window,
    ; adds 16384 for rounding, right shifts 15, then repacks back to words to
    ; save to the output. The window is reversed for the second half.
@@ -284,16 +297,6 @@ cglobal apply_window_int16_%1, 4,5,6, output, input, window, offset, offset2
    psrad         m2, 15
    packssdw      m0, m2
    mova  [outputq+offsetq], m0
-%elif %3
-    ; This version does the 16x16->16 multiplication in-place without expanding
-    ; to 32-bit. The ssse3 version is bit-identical.
-    mova          m0, [windowq+offset2q]
-    mova          m1, [ inputq+offset2q]
-    pmulhrsw      m1, m0
-    REVERSE_WORDS m0, m5
-    pmulhrsw      m0, [ inputq+offsetq ]
-    mova  [outputq+offset2q], m1
-    mova  [outputq+offsetq ], m0
 %else
    ; This version does the 16x16->16 multiplication in-place without expanding
    ; to 32-bit. The mmxext and sse2 versions do not use rounding, and
@@ -313,22 +316,24 @@ cglobal apply_window_int16_%1, 4,5,6, output, input, window, offset, offset2
    REP_RET
 %endmacro
-INIT_MMX
+INIT_MMX mmxext
-%define REVERSE_WORDS REVERSE_WORDS_MMXEXT
+APPLY_WINDOW_INT16 0
-%define MUL16FIXED MUL16FIXED_MMXEXT
+INIT_XMM sse2
-APPLY_WINDOW_INT16 mmxext,     0, 0
+APPLY_WINDOW_INT16 0
-APPLY_WINDOW_INT16 mmxext_ba,  1, 0
-INIT_XMM
+INIT_MMX mmxext
-%define REVERSE_WORDS REVERSE_WORDS_SSE2
+APPLY_WINDOW_INT16 1
-APPLY_WINDOW_INT16 sse2,       0, 0
+INIT_XMM sse2
-APPLY_WINDOW_INT16 sse2_ba,    1, 0
+APPLY_WINDOW_INT16 1
-APPLY_WINDOW_INT16 ssse3_atom, 0, 1
+INIT_XMM ssse3
-%define REVERSE_WORDS REVERSE_WORDS_SSSE3
+APPLY_WINDOW_INT16 1
-APPLY_WINDOW_INT16 ssse3,      0, 1
+INIT_XMM ssse3, atom
+APPLY_WINDOW_INT16 1
 ; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
-cglobal add_hfyu_median_prediction_mmxext, 6,6,0, dst, top, diff, w, left, left_top
+INIT_MMX mmxext
+cglobal add_hfyu_median_prediction, 6,6,0, dst, top, diff, w, left, left_top
    movq    mm0, [topq]
    movq    mm2, mm0
    movd    mm4, [left_topq]
@@ -430,8 +435,8 @@ cglobal add_hfyu_median_prediction_mmxext, 6,6,0, dst, top, diff, w, left, left_
 %endmacro
 ; int add_hfyu_left_prediction(uint8_t *dst, const uint8_t *src, int w, int left)
-INIT_MMX
+INIT_MMX ssse3
-cglobal add_hfyu_left_prediction_ssse3, 3,3,7, dst, src, w, left
+cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left
 .skip_prologue:
    mova    m5, [pb_7]
    mova    m4, [pb_zzzz3333zzzzbbbb]
@@ -440,8 +445,8 @@ cglobal add_hfyu_left_prediction_ssse3, 3,3,7, dst, src, w, left
    psllq   m0, 56
    ADD_HFYU_LEFT_LOOP 1, 1
-INIT_XMM
+INIT_XMM sse4
-cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left
+cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left
    mova    m5, [pb_f]
    mova    m6, [pb_zzzzzzzz77777777]
    mova    m4, [pb_zzzz3333zzzzbbbb]
@@ -460,7 +465,8 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left
 ; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
-cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset
+INIT_XMM sse
+cglobal scalarproduct_float, 3,3,2, v1, v2, offset
    neg offsetq
    shl offsetq, 2
    sub v1q, offsetq
@@ -1249,15 +1255,20 @@ INIT_YMM avx
 BUTTERFLIES_FLOAT_INTERLEAVE
 %endif
-INIT_XMM sse2
 ; %1 = aligned/unaligned
-%macro BSWAP_LOOPS_SSE2  1
+%macro BSWAP_LOOPS  1
    mov      r3, r2
    sar      r2, 3
    jz       .left4_%1
 .loop8_%1:
    mov%1    m0, [r1 +  0]
    mov%1    m1, [r1 + 16]
+%if cpuflag(ssse3)
+    pshufb   m0, m2
+    pshufb   m1, m2
+    mova     [r0 +  0], m0
+    mova     [r0 + 16], m1
+%else
    pshuflw  m0, m0, 10110001b
    pshuflw  m1, m1, 10110001b
    pshufhw  m0, m0, 10110001b
@@ -1272,8 +1283,9 @@ INIT_XMM sse2
    por      m3, m1
    mova     [r0 +  0], m2
    mova     [r0 + 16], m3
-    add      r1, 32
+%endif
    add      r0, 32
+    add      r1, 32
    dec      r2
    jnz      .loop8_%1
 .left4_%1:
@@ -1281,6 +1293,10 @@ INIT_XMM sse2
    and      r3, 4
    jz       .left
    mov%1    m0, [r1]
+%if cpuflag(ssse3)
+    pshufb   m0, m2
+    mova     [r0], m0
+%else
    pshuflw  m0, m0, 10110001b
    pshufhw  m0, m0, 10110001b
    mova     m2, m0
@@ -1288,72 +1304,29 @@ INIT_XMM sse2
    psrlw    m2, 8
    por      m2, m0
    mova     [r0], m2
+%endif
    add      r1, 16
    add      r0, 16
 %endmacro
 ; void bswap_buf(uint32_t *dst, const uint32_t *src, int w);
+%macro BSWAP32_BUF 0
+%if cpuflag(ssse3)
+cglobal bswap32_buf, 3,4,3
+    mov      r3, r1
+    mova     m2, [pb_bswap32]
+%else
 cglobal bswap32_buf, 3,4,5
    mov      r3, r1
+%endif
    and      r3, 15
    jz       .start_align
-    BSWAP_LOOPS_SSE2  u
+    BSWAP_LOOPS  u
    jmp      .left
 .start_align:
-    BSWAP_LOOPS_SSE2  a
+    BSWAP_LOOPS  a
 .left:
-    and      r2, 3
+%if cpuflag(ssse3)
-    jz       .end
-.loop2:
-    mov      r3d, [r1]
-    bswap    r3d
-    mov      [r0], r3d
-    add      r1, 4
-    add      r0, 4
-    dec      r2
-    jnz      .loop2
-.end:
-    RET
-; %1 = aligned/unaligned
-%macro BSWAP_LOOPS_SSSE3  1
-    mov      r3, r2
-    sar      r2, 3
-    jz       .left4_%1
-.loop8_%1:
-    mov%1    m0, [r1 +  0]
-    mov%1    m1, [r1 + 16]
-    pshufb   m0, m2
-    pshufb   m1, m2
-    mova     [r0 +  0], m0
-    mova     [r0 + 16], m1
-    add      r0, 32
-    add      r1, 32
-    dec      r2
-    jnz      .loop8_%1
-.left4_%1:
-    mov      r2, r3
-    and      r3, 4
-    jz       .left2
-    mov%1    m0, [r1]
-    pshufb   m0, m2
-    mova     [r0], m0
-    add      r1, 16
-    add      r0, 16
-%endmacro
-INIT_XMM ssse3
-; void bswap_buf(uint32_t *dst, const uint32_t *src, int w);
-cglobal bswap32_buf, 3,4,3
-    mov      r3, r1
-    mova     m2, [pb_bswap32]
-    and      r3, 15
-    jz       .start_align
-    BSWAP_LOOPS_SSSE3  u
-    jmp      .left2
-.start_align:
-    BSWAP_LOOPS_SSSE3  a
-.left2:
    mov      r3, r2
    and      r2, 2
    jz       .left1
@@ -1368,6 +1341,24 @@ cglobal bswap32_buf, 3,4,3
    mov      r2d, [r1]
    bswap    r2d
    mov      [r0], r2d
+%else
+    and      r2, 3
+    jz       .end
+.loop2:
+    mov      r3d, [r1]
+    bswap    r3d
+    mov      [r0], r3d
+    add      r1, 4
+    add      r0, 4
+    dec      r2
+    jnz      .loop2
+%endif
 .end:
    RET
+%endmacro
+INIT_XMM sse2
+BSWAP32_BUF
+INIT_XMM ssse3
+BSWAP32_BUF
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2471,15 +2471,15 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2,
                                              const int16_t *v3,
                                              int order, int mul);
-void ff_apply_window_int16_mmxext    (int16_t *output, const int16_t *input,
+void ff_apply_window_int16_round_mmxext(int16_t *output, const int16_t *input,
                                        const int16_t *window, unsigned int len);
-void ff_apply_window_int16_mmxext_ba (int16_t *output, const int16_t *input,
+void ff_apply_window_int16_round_sse2(int16_t *output, const int16_t *input,
                                      const int16_t *window, unsigned int len);
-void ff_apply_window_int16_sse2      (int16_t *output, const int16_t *input,
+void ff_apply_window_int16_mmxext(int16_t *output, const int16_t *input,
                                  const int16_t *window, unsigned int len);
-void ff_apply_window_int16_sse2_ba   (int16_t *output, const int16_t *input,
+void ff_apply_window_int16_sse2(int16_t *output, const int16_t *input,
                                const int16_t *window, unsigned int len);
-void ff_apply_window_int16_ssse3     (int16_t *output, const int16_t *input,
+void ff_apply_window_int16_ssse3(int16_t *output, const int16_t *input,
                                 const int16_t *window, unsigned int len);
 void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
                                      const int16_t *window, unsigned int len);
@@ -2726,9 +2726,9 @@ static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
    c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext;
    if (avctx->flags & CODEC_FLAG_BITEXACT) {
-        c->apply_window_int16 = ff_apply_window_int16_mmxext_ba;
-    } else {
        c->apply_window_int16 = ff_apply_window_int16_mmxext;
+    } else {
+        c->apply_window_int16 = ff_apply_window_int16_round_mmxext;
    }
 #endif /* HAVE_YASM */
 }
@@ -2912,9 +2912,9 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
        c->vector_clip_int32 = ff_vector_clip_int32_sse2;
    }
    if (avctx->flags & CODEC_FLAG_BITEXACT) {
-        c->apply_window_int16 = ff_apply_window_int16_sse2_ba;
-    } else if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
        c->apply_window_int16 = ff_apply_window_int16_sse2;
+    } else if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
+        c->apply_window_int16 = ff_apply_window_int16_round_sse2;
    }
    c->bswap_buf = ff_bswap32_buf_sse2;
 #endif /* HAVE_YASM */

--- a/libavutil/crc.h
+++ b/libavutil/crc.h
@@ -68,6 +68,7 @@ const AVCRC *av_crc_get_table(AVCRCId crc_id);
 *
 * @see av_crc_init() "le" parameter
 */
-uint32_t av_crc(const AVCRC *ctx, uint32_t start_crc, const uint8_t *buffer, size_t length) av_pure;
+uint32_t av_crc(const AVCRC *ctx, uint32_t crc,
+                const uint8_t *buffer, size_t length) av_pure;
 #endif /* AVUTIL_CRC_H */