Browse code

Merge remote-tracking branch 'qatar/master'

* qatar/master:
Fix compilation of iirfilter-test.
libx264: handle closed GOP codec flag
lavf: remove duplicate assignment in avformat_alloc_context.
lavf: use designated initializers for AVClasses.
flvdec: clenup debug code
asfdec: fix possible overread on broken files.
asfdec: do not fall back to binary/generic search
asfdec: reindent after previous commit c7bd5ed
asfdec: fallback to binary search internally
mpegaudio: add _fixed suffix to some names
Modify x86util.asm to ease transitioning to 10-bit H.264 assembly.
dct: build dct32 as separate object files
qdm2: include correct header for rdft

Conflicts:
ffpresets/libx264-fast.ffpreset
ffpresets/libx264-fast_firstpass.ffpreset
ffpresets/libx264-faster.ffpreset
ffpresets/libx264-faster_firstpass.ffpreset
ffpresets/libx264-medium.ffpreset
ffpresets/libx264-medium_firstpass.ffpreset
ffpresets/libx264-placebo.ffpreset
ffpresets/libx264-placebo_firstpass.ffpreset
ffpresets/libx264-slow.ffpreset
ffpresets/libx264-slow_firstpass.ffpreset
ffpresets/libx264-slower.ffpreset
ffpresets/libx264-slower_firstpass.ffpreset
ffpresets/libx264-superfast.ffpreset
ffpresets/libx264-superfast_firstpass.ffpreset
ffpresets/libx264-ultrafast.ffpreset
ffpresets/libx264-ultrafast_firstpass.ffpreset
ffpresets/libx264-veryfast.ffpreset
ffpresets/libx264-veryfast_firstpass.ffpreset
ffpresets/libx264-veryslow.ffpreset
ffpresets/libx264-veryslow_firstpass.ffpreset
libavformat/flvdec.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2011/05/18 12:42:42
Showing 29 changed files
... ...
@@ -2927,8 +2927,8 @@ enabled libvpx     && {
2927 2927
     enabled libvpx_encoder && { check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VPX_CQ" -lvpx ||
2928 2928
                                 die "ERROR: libvpx encoder version must be >=0.9.6"; } }
2929 2929
 enabled libx264    && require  libx264 x264.h x264_encoder_encode -lx264 &&
2930
-                      { check_cpp_condition x264.h "X264_BUILD >= 99" ||
2931
-                        die "ERROR: libx264 version must be >= 0.99."; }
2930
+                      { check_cpp_condition x264.h "X264_BUILD >= 115" ||
2931
+                        die "ERROR: libx264 version must be >= 0.115."; }
2932 2932
 enabled libxavs    && require  libxavs xavs.h xavs_encoder_encode -lxavs
2933 2933
 enabled libxvid    && require  libxvid xvid.h xvid_global -lxvidcore
2934 2934
 enabled mlib       && require  mediaLib mlib_types.h mlib_VectorSub_S16_U8_Mod -lmlib
... ...
@@ -1,5 +1,5 @@
1 1
 coder=0
2
-flags=+loop
2
+flags=+loop+cgop
3 3
 cmp=+chroma
4 4
 partitions=-parti8x8+parti4x4+partp8x8-partp4x4-partb8x8
5 5
 me_method=hex
... ...
@@ -1,5 +1,5 @@
1 1
 coder=1
2
-flags=+loop
2
+flags=+loop+cgop
3 3
 cmp=+chroma
4 4
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
5 5
 me_method=esa
... ...
@@ -1,5 +1,5 @@
1 1
 coder=1
2
-flags=+loop
2
+flags=+loop+cgop
3 3
 cmp=+chroma
4 4
 partitions=-parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
5 5
 me_method=hex
... ...
@@ -1,5 +1,5 @@
1 1
 coder=1
2
-flags=+loop
2
+flags=+loop+cgop
3 3
 cmp=+chroma
4 4
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
5 5
 me_method=umh
... ...
@@ -1,5 +1,5 @@
1 1
 coder=1
2
-flags=+loop
2
+flags=+loop+cgop
3 3
 cmp=+chroma
4 4
 partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
5 5
 me_method=umh
... ...
@@ -1,5 +1,5 @@
1 1
 coder=0
2
-flags=+loop
2
+flags=+loop+cgop
3 3
 cmp=+chroma
4 4
 partitions=-parti8x8-parti4x4-partp8x8-partp4x4-partb8x8
5 5
 me_method=dia
... ...
@@ -28,7 +28,7 @@ OBJS-$(CONFIG_AANDCT)                  += aandcttab.o
28 28
 OBJS-$(CONFIG_AC3DSP)                  += ac3dsp.o
29 29
 OBJS-$(CONFIG_CRYSTALHD)               += crystalhd.o
30 30
 OBJS-$(CONFIG_ENCODERS)                += faandct.o jfdctfst.o jfdctint.o
31
-OBJS-$(CONFIG_DCT)                     += dct.o
31
+OBJS-$(CONFIG_DCT)                     += dct.o dct32_fixed.o dct32_float.o
32 32
 OBJS-$(CONFIG_DWT)                     += dwt.o
33 33
 OBJS-$(CONFIG_DXVA2)                   += dxva2.o
34 34
 FFT-OBJS-$(CONFIG_HARDCODED_TABLES)    += cos_tables.o cos_fixed_tables.o
... ...
@@ -30,9 +30,7 @@
30 30
 #include <math.h>
31 31
 #include "libavutil/mathematics.h"
32 32
 #include "dct.h"
33
-
34
-#define DCT32_FLOAT
35
-#include "dct32.c"
33
+#include "dct32.h"
36 34
 
37 35
 /* sin((M_PI * x / (2*n)) */
38 36
 #define SIN(s,n,x) (s->costab[(n) - (x)])
... ...
@@ -210,7 +208,7 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
210 210
         }
211 211
     }
212 212
 
213
-    s->dct32 = dct32;
213
+    s->dct32 = ff_dct32_float;
214 214
     if (HAVE_MMX)     ff_dct_init_mmx(s);
215 215
 
216 216
     return 0;
... ...
@@ -19,10 +19,19 @@
19 19
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 20
  */
21 21
 
22
-#ifdef DCT32_FLOAT
22
+#include "dct32.h"
23
+#include "mathops.h"
24
+
25
+#if DCT32_FLOAT
26
+#   define dct32 ff_dct32_float
23 27
 #   define FIXHR(x)       ((float)(x))
24 28
 #   define MULH3(x, y, s) ((s)*(y)*(x))
25 29
 #   define INTFLOAT float
30
+#else
31
+#   define dct32 ff_dct32_fixed
32
+#   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
33
+#   define MULH3(x, y, s) MULH((s)*(x), y)
34
+#   define INTFLOAT int
26 35
 #endif
27 36
 
28 37
 
... ...
@@ -103,7 +112,7 @@
103 103
 #define ADD(a, b) val##a += val##b
104 104
 
105 105
 /* DCT32 without 1/sqrt(2) coef zero scaling. */
106
-static void dct32(INTFLOAT *out, const INTFLOAT *tab)
106
+void dct32(INTFLOAT *out, const INTFLOAT *tab)
107 107
 {
108 108
     INTFLOAT tmp0, tmp1;
109 109
 
110 110
new file mode 100644
... ...
@@ -0,0 +1,25 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#ifndef AVCODEC_DCT32_H
19
+#define AVCODEC_DCT32_H
20
+
21
+void ff_dct32_float(float *dst, const float *src);
22
+void ff_dct32_fixed(int *dst, const int *src);
23
+
24
+#endif
0 25
new file mode 100644
... ...
@@ -0,0 +1,20 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#define DCT32_FLOAT 0
19
+#include "dct32.c"
0 20
new file mode 100644
... ...
@@ -0,0 +1,20 @@
0
+/*
1
+ * This file is part of Libav.
2
+ *
3
+ * Libav is free software; you can redistribute it and/or
4
+ * modify it under the terms of the GNU Lesser General Public
5
+ * License as published by the Free Software Foundation; either
6
+ * version 2.1 of the License, or (at your option) any later version.
7
+ *
8
+ * Libav is distributed in the hope that it will be useful,
9
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11
+ * Lesser General Public License for more details.
12
+ *
13
+ * You should have received a copy of the GNU Lesser General Public
14
+ * License along with Libav; if not, write to the Free Software
15
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
+ */
17
+
18
+#define DCT32_FLOAT 1
19
+#include "dct32.c"
... ...
@@ -324,7 +324,7 @@ int main(void)
324 324
     int i;
325 325
     FILE* fd;
326 326
 
327
-    fcoeffs = ff_iir_filter_init_coeffs(FF_FILTER_TYPE_BUTTERWORTH,
327
+    fcoeffs = ff_iir_filter_init_coeffs(NULL, FF_FILTER_TYPE_BUTTERWORTH,
328 328
                                         FF_FILTER_MODE_LOWPASS, FILT_ORDER,
329 329
                                         cutoff_coeff, 0.0, 0.0);
330 330
     fstate  = ff_iir_filter_init_state(FILT_ORDER);
... ...
@@ -367,6 +367,8 @@ static av_cold int X264_init(AVCodecContext *avctx)
367 367
 
368 368
     x4->params.b_interlaced   = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
369 369
 
370
+    x4->params.b_open_gop     = !(avctx->flags & CODEC_FLAG_CLOSED_GOP);
371
+
370 372
     x4->params.i_slice_count  = avctx->slices;
371 373
 
372 374
     x4->params.vui.b_fullrange = avctx->pix_fmt == PIX_FMT_YUVJ420P;
... ...
@@ -36,7 +36,7 @@
36 36
 
37 37
 void ff_mpc_init(void)
38 38
 {
39
-    ff_mpa_synth_init(ff_mpa_synth_window);
39
+    ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed);
40 40
 }
41 41
 
42 42
 /**
... ...
@@ -51,8 +51,8 @@ static void mpc_synth(MPCContext *c, int16_t *out, int channels)
51 51
     for(ch = 0;  ch < channels; ch++){
52 52
         samples_ptr = samples + ch;
53 53
         for(i = 0; i < SAMPLES_PER_BAND; i++) {
54
-            ff_mpa_synth_filter(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
55
-                                ff_mpa_synth_window, &dither_state,
54
+            ff_mpa_synth_filter_fixed(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
55
+                                ff_mpa_synth_window_fixed, &dither_state,
56 56
                                 samples_ptr, channels,
57 57
                                 c->sb_samples[ch][i]);
58 58
             samples_ptr += 32 * channels;
... ...
@@ -158,9 +158,9 @@ typedef struct HuffTable {
158 158
 
159 159
 int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
160 160
 int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
161
-extern MPA_INT ff_mpa_synth_window[];
162
-void ff_mpa_synth_init(MPA_INT *window);
163
-void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
161
+extern MPA_INT ff_mpa_synth_window_fixed[];
162
+void ff_mpa_synth_init_fixed(MPA_INT *window);
163
+void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
164 164
                          MPA_INT *window, int *dither_state,
165 165
                          OUT_INT *samples, int incr,
166 166
                          INTFLOAT sb_samples[SBLIMIT]);
... ...
@@ -33,9 +33,9 @@ int main(void)
33 33
 
34 34
     WRITE_ARRAY("static const", int8_t, table_4_3_exp);
35 35
     WRITE_ARRAY("static const", uint32_t, table_4_3_value);
36
-    WRITE_ARRAY("static const", uint32_t, exp_table);
36
+    WRITE_ARRAY("static const", uint32_t, exp_table_fixed);
37 37
     WRITE_ARRAY("static const", float, exp_table_float);
38
-    WRITE_2D_ARRAY("static const", uint32_t, expval_table);
38
+    WRITE_2D_ARRAY("static const", uint32_t, expval_table_fixed);
39 39
     WRITE_2D_ARRAY("static const", float, expval_table_float);
40 40
 
41 41
     return 0;
... ...
@@ -33,8 +33,8 @@
33 33
 #else
34 34
 static int8_t   table_4_3_exp[TABLE_4_3_SIZE];
35 35
 static uint32_t table_4_3_value[TABLE_4_3_SIZE];
36
-static uint32_t exp_table[512];
37
-static uint32_t expval_table[512][16];
36
+static uint32_t exp_table_fixed[512];
37
+static uint32_t expval_table_fixed[512][16];
38 38
 static float exp_table_float[512];
39 39
 static float expval_table_float[512][16];
40 40
 
... ...
@@ -59,10 +59,10 @@ static void mpegaudio_tableinit(void)
59 59
     for (exponent = 0; exponent < 512; exponent++) {
60 60
         for (value = 0; value < 16; value++) {
61 61
             double f = (double)value * cbrtf(value) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5);
62
-            expval_table[exponent][value] = llrint(f);
62
+            expval_table_fixed[exponent][value] = llrint(f);
63 63
             expval_table_float[exponent][value] = f;
64 64
         }
65
-        exp_table[exponent] = expval_table[exponent][1];
65
+        exp_table_fixed[exponent] = expval_table_fixed[exponent][1];
66 66
         exp_table_float[exponent] = expval_table_float[exponent][1];
67 67
     }
68 68
 }
... ...
@@ -29,6 +29,7 @@
29 29
 #include "get_bits.h"
30 30
 #include "dsputil.h"
31 31
 #include "mathops.h"
32
+#include "dct32.h"
32 33
 
33 34
 /*
34 35
  * TODO:
... ...
@@ -57,7 +58,7 @@
57 57
 #   define FIXHR(a)       ((int)((a) * (1LL<<32) + 0.5))
58 58
 #   define MULH3(x, y, s) MULH((s)*(x), y)
59 59
 #   define MULLx(x, y, s) MULL(x,y,s)
60
-#   define RENAME(a)      a
60
+#   define RENAME(a)      a ## _fixed
61 61
 #   define OUT_FMT AV_SAMPLE_FMT_S16
62 62
 #endif
63 63
 
... ...
@@ -68,12 +69,6 @@
68 68
 #include "mpegaudiodata.h"
69 69
 #include "mpegaudiodectab.h"
70 70
 
71
-#if CONFIG_FLOAT
72
-#    include "fft.h"
73
-#else
74
-#    include "dct32.c"
75
-#endif
76
-
77 71
 static void compute_antialias(MPADecodeContext *s, GranuleDef *g);
78 72
 static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
79 73
                                int *dither_state, OUT_INT *samples, int incr);
... ...
@@ -626,7 +621,7 @@ static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
626 626
    32 samples. */
627 627
 /* XXX: optimize by avoiding ring buffer usage */
628 628
 #if !CONFIG_FLOAT
629
-void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
629
+void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
630 630
                          MPA_INT *window, int *dither_state,
631 631
                          OUT_INT *samples, int incr,
632 632
                          INTFLOAT sb_samples[SBLIMIT])
... ...
@@ -637,7 +632,7 @@ void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
637 637
     offset = *synth_buf_offset;
638 638
     synth_buf = synth_buf_ptr + offset;
639 639
 
640
-    dct32(synth_buf, sb_samples);
640
+    ff_dct32_fixed(synth_buf, sb_samples);
641 641
     apply_window_mp3_c(synth_buf, window, dither_state, samples, incr);
642 642
 
643 643
     offset = (offset - 32) & 511;
... ...
@@ -38,7 +38,7 @@
38 38
 #include "avcodec.h"
39 39
 #include "get_bits.h"
40 40
 #include "dsputil.h"
41
-#include "fft.h"
41
+#include "rdft.h"
42 42
 #include "mpegaudio.h"
43 43
 
44 44
 #include "qdm2data.h"
... ...
@@ -1616,8 +1616,8 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
1616 1616
         OUT_INT *samples_ptr = samples + ch;
1617 1617
 
1618 1618
         for (i = 0; i < 8; i++) {
1619
-            ff_mpa_synth_filter(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
1620
-                ff_mpa_synth_window, &dither_state,
1619
+            ff_mpa_synth_filter_fixed(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
1620
+                ff_mpa_synth_window_fixed, &dither_state,
1621 1621
                 samples_ptr, q->nb_channels,
1622 1622
                 q->sb_samples[ch][(8 * index) + i]);
1623 1623
             samples_ptr += 32 * q->nb_channels;
... ...
@@ -1646,7 +1646,7 @@ static av_cold void qdm2_init(QDM2Context *q) {
1646 1646
     initialized = 1;
1647 1647
 
1648 1648
     qdm2_init_vlc();
1649
-    ff_mpa_synth_init(ff_mpa_synth_window);
1649
+    ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed);
1650 1650
     softclip_table_init();
1651 1651
     rnd_table_init();
1652 1652
     init_noise_samples();
... ...
@@ -59,12 +59,12 @@ SECTION .text
59 59
 %endmacro
60 60
 
61 61
 %macro HADAMARD8 0
62
-    SUMSUB_BADC       m0, m1, m2, m3
63
-    SUMSUB_BADC       m4, m5, m6, m7
64
-    SUMSUB_BADC       m0, m2, m1, m3
65
-    SUMSUB_BADC       m4, m6, m5, m7
66
-    SUMSUB_BADC       m0, m4, m1, m5
67
-    SUMSUB_BADC       m2, m6, m3, m7
62
+    SUMSUB_BADC       w, 0, 1, 2, 3
63
+    SUMSUB_BADC       w, 4, 5, 6, 7
64
+    SUMSUB_BADC       w, 0, 2, 1, 3
65
+    SUMSUB_BADC       w, 4, 6, 5, 7
66
+    SUMSUB_BADC       w, 0, 4, 1, 5
67
+    SUMSUB_BADC       w, 2, 6, 3, 7
68 68
 %endmacro
69 69
 
70 70
 %macro ABS1_SUM 3
... ...
@@ -59,11 +59,11 @@ SECTION .text
59 59
     movq         m2, [%2+16]
60 60
     movq         m3, [%2+24]
61 61
 
62
-    IDCT4_1D      0, 1, 2, 3, 4, 5
62
+    IDCT4_1D      w, 0, 1, 2, 3, 4, 5
63 63
     mova         m6, [pw_32]
64 64
     TRANSPOSE4x4W 0, 1, 2, 3, 4
65 65
     paddw        m0, m6
66
-    IDCT4_1D      0, 1, 2, 3, 4, 5
66
+    IDCT4_1D      w, 0, 1, 2, 3, 4, 5
67 67
     pxor         m7, m7
68 68
 
69 69
     STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3
... ...
@@ -118,13 +118,13 @@ cglobal h264_idct_add_mmx, 3, 3, 0
118 118
 
119 119
     mova         m2, %1
120 120
     mova         m5, %2
121
-    SUMSUB_BA    m5, m2
122
-    SUMSUB_BA    m6, m5
123
-    SUMSUB_BA    m4, m2
124
-    SUMSUB_BA    m7, m6
125
-    SUMSUB_BA    m0, m4
126
-    SUMSUB_BA    m3, m2
127
-    SUMSUB_BA    m1, m5
121
+    SUMSUB_BA    w, 5, 2
122
+    SUMSUB_BA    w, 6, 5
123
+    SUMSUB_BA    w, 4, 2
124
+    SUMSUB_BA    w, 7, 6
125
+    SUMSUB_BA    w, 0, 4
126
+    SUMSUB_BA    w, 3, 2
127
+    SUMSUB_BA    w, 1, 5
128 128
     SWAP          7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
129 129
 %endmacro
130 130
 
... ...
@@ -715,10 +715,10 @@ x264_add8x4_idct_sse2:
715 715
     movhps m1, [r2+40]
716 716
     movhps m2, [r2+48]
717 717
     movhps m3, [r2+56]
718
-    IDCT4_1D 0,1,2,3,4,5
718
+    IDCT4_1D w,0,1,2,3,4,5
719 719
     TRANSPOSE2x4x4W 0,1,2,3,4
720 720
     paddw m0, [pw_32]
721
-    IDCT4_1D 0,1,2,3,4,5
721
+    IDCT4_1D w,0,1,2,3,4,5
722 722
     pxor  m7, m7
723 723
     STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3
724 724
     lea   r0, [r0+r3*2]
... ...
@@ -859,8 +859,8 @@ cglobal h264_idct_add8_sse2, 5, 7, 8
859 859
 ;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul)
860 860
 
861 861
 %macro WALSH4_1D 5
862
-    SUMSUB_BADC m%4, m%3, m%2, m%1, m%5
863
-    SUMSUB_BADC m%4, m%2, m%3, m%1, m%5
862
+    SUMSUB_BADC w, %4, %3, %2, %1, %5
863
+    SUMSUB_BADC w, %4, %2, %3, %1, %5
864 864
     SWAP %1, %4, %3
865 865
 %endmacro
866 866
 
... ...
@@ -1106,10 +1106,10 @@ cglobal vp8_idct_dc_add4uv_mmx, 3, 3
1106 1106
 ;           %5/%6 are temporary registers
1107 1107
 ;           we assume m6/m7 have constant words 20091/17734 loaded in them
1108 1108
 %macro VP8_IDCT_TRANSFORM4x4_1D 6
1109
-    SUMSUB_BA           m%3, m%1, m%5     ;t0, t1
1109
+    SUMSUB_BA         w, %3,  %1,  %5     ;t0, t1
1110 1110
     VP8_MULTIPLY_SUMSUB m%2, m%4, m%5,m%6 ;t2, t3
1111
-    SUMSUB_BA           m%4, m%3, m%5     ;tmp0, tmp3
1112
-    SUMSUB_BA           m%2, m%1, m%5     ;tmp1, tmp2
1111
+    SUMSUB_BA         w, %4,  %3,  %5     ;tmp0, tmp3
1112
+    SUMSUB_BA         w, %2,  %1,  %5     ;tmp1, tmp2
1113 1113
     SWAP                 %4,  %1
1114 1114
     SWAP                 %4,  %3
1115 1115
 %endmacro
... ...
@@ -1181,8 +1181,8 @@ VP8_IDCT_ADD sse
1181 1181
 %endmacro
1182 1182
 
1183 1183
 %macro HADAMARD4_1D 4
1184
-    SUMSUB_BADC m%2, m%1, m%4, m%3
1185
-    SUMSUB_BADC m%4, m%2, m%3, m%1
1184
+    SUMSUB_BADC w, %2, %1, %4, %3
1185
+    SUMSUB_BADC w, %4, %2, %3, %1
1186 1186
     SWAP %1, %4, %3
1187 1187
 %endmacro
1188 1188
 
... ...
@@ -208,6 +208,17 @@
208 208
     pminub  %2, %4
209 209
 %endmacro
210 210
 
211
+%macro ABSD2_MMX 4
212
+    pxor    %3, %3
213
+    pxor    %4, %4
214
+    pcmpgtd %3, %1
215
+    pcmpgtd %4, %2
216
+    pxor    %1, %3
217
+    pxor    %2, %4
218
+    psubd   %1, %3
219
+    psubd   %2, %4
220
+%endmacro
221
+
211 222
 %macro ABSB_SSSE3 2
212 223
     pabsb   %1, %1
213 224
 %endmacro
... ...
@@ -230,12 +241,7 @@
230 230
 %macro SPLATB_MMX 3
231 231
     movd      %1, [%2-3] ;to avoid crossing a cacheline
232 232
     punpcklbw %1, %1
233
-%if mmsize==16
234
-    pshuflw   %1, %1, 0xff
235
-    punpcklqdq %1, %1
236
-%else
237
-    pshufw    %1, %1, 0xff
238
-%endif
233
+    SPLATW    %1, %1, 3
239 234
 %endmacro
240 235
 
241 236
 %macro SPLATB_SSSE3 3
... ...
@@ -243,125 +249,169 @@
243 243
     pshufb    %1, %3
244 244
 %endmacro
245 245
 
246
-%macro PALIGNR_MMX 4
247
-    %ifnidn %4, %2
246
+%macro PALIGNR_MMX 4-5 ; [dst,] src1, src2, imm, tmp
247
+    %define %%dst %1
248
+%if %0==5
249
+%ifnidn %1, %2
250
+    mova    %%dst, %2
251
+%endif
252
+    %rotate 1
253
+%endif
254
+%ifnidn %4, %2
248 255
     mova    %4, %2
249
-    %endif
250
-    %if mmsize == 8
251
-    psllq   %1, (8-%3)*8
256
+%endif
257
+%if mmsize==8
258
+    psllq   %%dst, (8-%3)*8
252 259
     psrlq   %4, %3*8
253
-    %else
254
-    pslldq  %1, 16-%3
260
+%else
261
+    pslldq  %%dst, 16-%3
255 262
     psrldq  %4, %3
256
-    %endif
257
-    por     %1, %4
263
+%endif
264
+    por     %%dst, %4
258 265
 %endmacro
259 266
 
260
-%macro PALIGNR_SSSE3 4
267
+%macro PALIGNR_SSSE3 4-5
268
+%if %0==5
269
+    palignr %1, %2, %3, %4
270
+%else
261 271
     palignr %1, %2, %3
272
+%endif
262 273
 %endmacro
263 274
 
264 275
 %macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
265 276
 %ifnum %5
266
-    mova   m%1, m%5
267
-    mova   m%3, m%5
277
+    pand   m%3, m%5, m%4 ; src .. y6 .. y4
278
+    pand   m%1, m%5, m%2 ; dst .. y6 .. y4
268 279
 %else
269 280
     mova   m%1, %5
270
-    mova   m%3, m%1
281
+    pand   m%3, m%1, m%4 ; src .. y6 .. y4
282
+    pand   m%1, m%1, m%2 ; dst .. y6 .. y4
271 283
 %endif
272
-    pand   m%1, m%2 ; dst .. y6 .. y4
273
-    pand   m%3, m%4 ; src .. y6 .. y4
274
-    psrlw  m%2, 8   ; dst .. y7 .. y5
275
-    psrlw  m%4, 8   ; src .. y7 .. y5
284
+    psrlw  m%2, 8        ; dst .. y7 .. y5
285
+    psrlw  m%4, 8        ; src .. y7 .. y5
276 286
 %endmacro
277 287
 
278
-%macro SUMSUB_BA 2-3
279
-%if %0==2
280
-    paddw   %1, %2
281
-    paddw   %2, %2
282
-    psubw   %2, %1
288
+%macro SUMSUB_BA 3-4
289
+%if %0==3
290
+    padd%1  m%2, m%3
291
+    padd%1  m%3, m%3
292
+    psub%1  m%3, m%2
293
+%else
294
+%if avx_enabled == 0
295
+    mova    m%4, m%2
296
+    padd%1  m%2, m%3
297
+    psub%1  m%3, m%4
283 298
 %else
284
-    mova    %3, %1
285
-    paddw   %1, %2
286
-    psubw   %2, %3
299
+    padd%1  m%4, m%2, m%3
300
+    psub%1  m%3, m%2
301
+    SWAP    %2, %4
302
+%endif
287 303
 %endif
288 304
 %endmacro
289 305
 
290
-%macro SUMSUB_BADC 4-5
291
-%if %0==5
292
-    SUMSUB_BA %1, %2, %5
293
-    SUMSUB_BA %3, %4, %5
306
+%macro SUMSUB_BADC 5-6
307
+%if %0==6
308
+    SUMSUB_BA %1, %2, %3, %6
309
+    SUMSUB_BA %1, %4, %5, %6
294 310
 %else
295
-    paddw   %1, %2
296
-    paddw   %3, %4
297
-    paddw   %2, %2
298
-    paddw   %4, %4
299
-    psubw   %2, %1
300
-    psubw   %4, %3
311
+    padd%1  m%2, m%3
312
+    padd%1  m%4, m%5
313
+    padd%1  m%3, m%3
314
+    padd%1  m%5, m%5
315
+    psub%1  m%3, m%2
316
+    psub%1  m%5, m%4
301 317
 %endif
302 318
 %endmacro
303 319
 
304
-%macro SUMSUB2_AB 3
305
-    mova    %3, %1
306
-    paddw   %1, %1
307
-    paddw   %1, %2
308
-    psubw   %3, %2
309
-    psubw   %3, %2
320
+%macro SUMSUB2_AB 4
321
+%ifnum %3
322
+    psub%1  m%4, m%2, m%3
323
+    psub%1  m%4, m%3
324
+    padd%1  m%2, m%2
325
+    padd%1  m%2, m%3
326
+%else
327
+    mova    m%4, m%2
328
+    padd%1  m%2, m%2
329
+    padd%1  m%2, %3
330
+    psub%1  m%4, %3
331
+    psub%1  m%4, %3
332
+%endif
310 333
 %endmacro
311 334
 
312
-%macro SUMSUB2_BA 3
313
-    mova    m%3, m%1
314
-    paddw   m%1, m%2
315
-    paddw   m%1, m%2
316
-    psubw   m%2, m%3
317
-    psubw   m%2, m%3
335
+%macro SUMSUB2_BA 4
336
+%if avx_enabled == 0
337
+    mova    m%4, m%2
338
+    padd%1  m%2, m%3
339
+    padd%1  m%2, m%3
340
+    psub%1  m%3, m%4
341
+    psub%1  m%3, m%4
342
+%else
343
+    padd%1  m%4, m%2, m%3
344
+    padd%1  m%4, m%3
345
+    psub%1  m%3, m%2
346
+    psub%1  m%3, m%2
347
+    SWAP     %2,  %4
348
+%endif
318 349
 %endmacro
319 350
 
320
-%macro SUMSUBD2_AB 4
321
-    mova    %4, %1
322
-    mova    %3, %2
323
-    psraw   %2, 1  ; %2: %2>>1
324
-    psraw   %1, 1  ; %1: %1>>1
325
-    paddw   %2, %4 ; %2: %2>>1+%1
326
-    psubw   %1, %3 ; %1: %1>>1-%2
351
+%macro SUMSUBD2_AB 5
352
+%ifnum %4
353
+    psra%1  m%5, m%2, 1  ; %3: %3>>1
354
+    psra%1  m%4, m%3, 1  ; %2: %2>>1
355
+    padd%1  m%4, m%2     ; %3: %3>>1+%2
356
+    psub%1  m%5, m%3     ; %2: %2>>1-%3
357
+    SWAP     %2, %5
358
+    SWAP     %3, %4
359
+%else
360
+    mova    %5, m%2
361
+    mova    %4, m%3
362
+    psra%1  m%3, 1  ; %3: %3>>1
363
+    psra%1  m%2, 1  ; %2: %2>>1
364
+    padd%1  m%3, %5 ; %3: %3>>1+%2
365
+    psub%1  m%2, %4 ; %2: %2>>1-%3
366
+%endif
327 367
 %endmacro
328 368
 
329 369
 %macro DCT4_1D 5
330 370
 %ifnum %5
331
-    SUMSUB_BADC m%4, m%1, m%3, m%2; m%5
332
-    SUMSUB_BA   m%3, m%4, m%5
333
-    SUMSUB2_AB  m%1, m%2, m%5
371
+    SUMSUB_BADC w, %4, %1, %3, %2, %5
372
+    SUMSUB_BA   w, %3, %4, %5
373
+    SUMSUB2_AB  w, %1, %2, %5
334 374
     SWAP %1, %3, %4, %5, %2
335 375
 %else
336
-    SUMSUB_BADC m%4, m%1, m%3, m%2
337
-    SUMSUB_BA   m%3, m%4
338
-    mova       [%5], m%2
339
-    SUMSUB2_AB m%1, [%5], m%2
376
+    SUMSUB_BADC w, %4, %1, %3, %2
377
+    SUMSUB_BA   w, %3, %4
378
+    mova     [%5], m%2
379
+    SUMSUB2_AB  w, %1, [%5], %2
340 380
     SWAP %1, %3, %4, %2
341 381
 %endif
342 382
 %endmacro
343 383
 
344
-%macro IDCT4_1D 5-6
345
-%ifnum %5
346
-    SUMSUBD2_AB m%2, m%4, m%6, m%5
347
-    ; %2: %2>>1-%4 %4: %2+%4>>1
348
-    SUMSUB_BA   m%3, m%1, m%6
349
-    ; %3: %1+%3 %1: %1-%3
350
-    SUMSUB_BADC m%4, m%3, m%2, m%1, m%6
351
-    ; %4: %1+%3 + (%2+%4>>1)
352
-    ; %3: %1+%3 - (%2+%4>>1)
353
-    ; %2: %1-%3 + (%2>>1-%4)
354
-    ; %1: %1-%3 - (%2>>1-%4)
384
+%macro IDCT4_1D 6-7
385
+%ifnum %6
386
+    SUMSUBD2_AB %1, %3, %5, %7, %6
387
+    ; %3: %3>>1-%5 %5: %3+%5>>1
388
+    SUMSUB_BA   %1, %4, %2, %7
389
+    ; %4: %2+%4 %2: %2-%4
390
+    SUMSUB_BADC %1, %5, %4, %3, %2, %7
391
+    ; %5: %2+%4 + (%3+%5>>1)
392
+    ; %4: %2+%4 - (%3+%5>>1)
393
+    ; %3: %2-%4 + (%3>>1-%5)
394
+    ; %2: %2-%4 - (%3>>1-%5)
355 395
 %else
356
-    SUMSUBD2_AB m%2, m%4, [%5], [%5+16]
357
-    SUMSUB_BA   m%3, m%1
358
-    SUMSUB_BADC m%4, m%3, m%2, m%1
396
+%ifidn %1, w
397
+    SUMSUBD2_AB %1, %3, %5, [%6], [%6+16]
398
+%else
399
+    SUMSUBD2_AB %1, %3, %5, [%6], [%6+32]
400
+%endif
401
+    SUMSUB_BA   %1, %4, %2
402
+    SUMSUB_BADC %1, %5, %4, %3, %2
359 403
 %endif
360
-    SWAP %1, %4, %3
361
-    ; %1: %1+%3 + (%2+%4>>1) row0
362
-    ; %2: %1-%3 + (%2>>1-%4) row1
363
-    ; %3: %1-%3 - (%2>>1-%4) row2
364
-    ; %4: %1+%3 - (%2+%4>>1) row3
404
+    SWAP %2, %5, %4
405
+    ; %2: %2+%4 + (%3+%5>>1) row0
406
+    ; %3: %2-%4 + (%3>>1-%5) row1
407
+    ; %4: %2-%4 - (%3>>1-%5) row2
408
+    ; %5: %2+%4 - (%3+%5>>1) row3
365 409
 %endmacro
366 410
 
367 411
 
... ...
@@ -848,7 +848,10 @@ static int asf_read_frame_header(AVFormatContext *s, AVIOContext *pb){
848 848
     }
849 849
     if (asf->packet_flags & 0x01) {
850 850
         DO_2BITS(asf->packet_segsizetype >> 6, asf->packet_frag_size, 0); // 0 is illegal
851
-        if(asf->packet_frag_size > asf->packet_size_left - rsize){
851
+        if (rsize > asf->packet_size_left) {
852
+            av_log(s, AV_LOG_ERROR, "packet_replic_size is invalid\n");
853
+            return -1;
854
+        } else if(asf->packet_frag_size > asf->packet_size_left - rsize){
852 855
             if (asf->packet_frag_size > asf->packet_size_left - rsize + asf->packet_padsize) {
853 856
                 av_log(s, AV_LOG_ERROR, "packet_frag_size is invalid (%d-%d)\n", asf->packet_size_left, rsize);
854 857
                 return -1;
... ...
@@ -1261,21 +1264,22 @@ static int asf_read_seek(AVFormatContext *s, int stream_index, int64_t pts, int
1261 1261
     if (!asf->index_read)
1262 1262
         asf_build_simple_index(s, stream_index);
1263 1263
 
1264
-    if(!(asf->index_read && st->index_entries)){
1265
-        if(av_seek_frame_binary(s, stream_index, pts, flags)<0)
1266
-            return -1;
1267
-    }else{
1264
+    if((asf->index_read && st->index_entries)){
1268 1265
         index= av_index_search_timestamp(st, pts, flags);
1269
-        if(index<0)
1270
-            return -1;
1266
+        if(index >= 0) {
1267
+            /* find the position */
1268
+            pos = st->index_entries[index].pos;
1271 1269
 
1272
-        /* find the position */
1273
-        pos = st->index_entries[index].pos;
1274
-
1275
-        /* do the seek */
1276
-        av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos);
1277
-        avio_seek(s->pb, pos, SEEK_SET);
1270
+            /* do the seek */
1271
+            av_log(s, AV_LOG_DEBUG, "SEEKTO: %"PRId64"\n", pos);
1272
+            avio_seek(s->pb, pos, SEEK_SET);
1273
+            asf_reset_header(s);
1274
+            return 0;
1275
+        }
1278 1276
     }
1277
+    /* no index or seeking by index failed */
1278
+    if(av_seek_frame_binary(s, stream_index, pts, flags)<0)
1279
+        return -1;
1279 1280
     asf_reset_header(s);
1280 1281
     return 0;
1281 1282
 }
... ...
@@ -1290,4 +1294,5 @@ AVInputFormat ff_asf_demuxer = {
1290 1290
     asf_read_close,
1291 1291
     asf_read_seek,
1292 1292
     asf_read_pts,
1293
+    .flags = AVFMT_NOBINSEARCH | AVFMT_NOGENSEARCH,
1293 1294
 };
... ...
@@ -258,6 +258,8 @@ typedef struct AVFormatParameters {
258 258
 #define AVFMT_VARIABLE_FPS  0x0400 /**< Format allows variable fps. */
259 259
 #define AVFMT_NODIMENSIONS  0x0800 /**< Format does not need width/height */
260 260
 #define AVFMT_NOSTREAMS     0x1000 /**< Format does not require any streams */
261
+#define AVFMT_NOBINSEARCH   0x2000 /**< Format does not allow to fallback to binary search via read_timestamp */
262
+#define AVFMT_NOGENSEARCH   0x4000 /**< Format does not allow to fallback to generic search */
261 263
 
262 264
 typedef struct AVOutputFormat {
263 265
     const char *name;
... ...
@@ -375,7 +375,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
375 375
     size = avio_rb24(s->pb);
376 376
     dts = avio_rb24(s->pb);
377 377
     dts |= avio_r8(s->pb) << 24;
378
-//    av_log(s, AV_LOG_DEBUG, "type:%d, size:%d, dts:%d\n", type, size, dts);
378
+    av_dlog(s, "type:%d, size:%d, dts:%"PRId64"\n", type, size, dts);
379 379
     if (url_feof(s->pb))
380 380
         return AVERROR_EOF;
381 381
     avio_skip(s->pb, 3); /* stream id, always 0 */
... ...
@@ -421,7 +421,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
421 421
         st= create_stream(s, is_audio);
422 422
         s->ctx_flags &= ~AVFMTCTX_NOHEADER;
423 423
     }
424
-//    av_log(s, AV_LOG_DEBUG, "%d %X %d \n", is_audio, flags, st->discard);
424
+    av_dlog(s, "%d %X %d \n", is_audio, flags, st->discard);
425 425
     if(  (st->discard >= AVDISCARD_NONKEY && !((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY ||         is_audio))
426 426
        ||(st->discard >= AVDISCARD_BIDIR  &&  ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_DISP_INTER && !is_audio))
427 427
        || st->discard >= AVDISCARD_ALL
... ...
@@ -1742,10 +1742,12 @@ int av_seek_frame(AVFormatContext *s, int stream_index, int64_t timestamp, int f
1742 1742
         return 0;
1743 1743
     }
1744 1744
 
1745
-    if(s->iformat->read_timestamp)
1745
+    if(s->iformat->read_timestamp && !(s->iformat->flags & AVFMT_NOBINSEARCH))
1746 1746
         return av_seek_frame_binary(s, stream_index, timestamp, flags);
1747
-    else
1747
+    else if (!(s->iformat->flags & AVFMT_NOGENSEARCH))
1748 1748
         return av_seek_frame_generic(s, stream_index, timestamp, flags);
1749
+    else
1750
+        return -1;
1749 1751
 }
1750 1752
 
1751 1753
 int avformat_seek_file(AVFormatContext *s, int stream_index, int64_t min_ts, int64_t ts, int64_t max_ts, int flags)