Browse code

Merge remote-tracking branch 'qatar/master'

* qatar/master:
get_bits: remove x86 inline asm in A32 bitstream reader
doc: Remove outdated information about our issue tracker
avidec: Factor out the sync fucntionality.
fate-aac: Expand coverage.
ac3dsp: add x86-optimized versions of ac3dsp.extract_exponents().
ac3dsp: simplify extract_exponents() now that it does not need to do clipping.
ac3enc: clip coefficients after MDCT.
ac3enc: add int32_t array clipping function to DSPUtil, including x86 versions.
swscale: for >8bit scaling, read in native bit-depth.
matroskadec: matroska_read_seek after after EBML_STOP leads to failure.
doxygen: fix usage of @file directive in libavutil/{dict,file}.h
doxygen: Help doxygen parser to understand the DECLARE_ALIGNED and offsetof macros

Conflicts:
doc/issue_tracker.txt
libavformat/avidec.c
libavutil/dict.h
libswscale/swscale.c
libswscale/utils.c
tests/ref/lavfi/pixfmts_scale

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2011/07/02 10:07:06
Showing 19 changed files
... ...
@@ -1160,6 +1160,7 @@ INCLUDE_FILE_PATTERNS  =
1160 1160
 
1161 1161
 PREDEFINED             = __attribute__(x)="" "RENAME(x)=x ## _TMPL" "DEF(x)=x ## _TMPL" \
1162 1162
                          HAVE_AV_CONFIG_H HAVE_MMX HAVE_MMX2 HAVE_AMD3DNOW \
1163
+                         "DECLARE_ALIGNED(a,t,n)=t n" "offsetof(x,y)=0x42" \
1163 1164
 
1164 1165
 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
1165 1166
 # this tag can be used to specify a list of macro names that should be expanded.
... ...
@@ -164,21 +164,8 @@ static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs)
164 164
     int i;
165 165
 
166 166
     for (i = 0; i < nb_coefs; i++) {
167
-        int e;
168 167
         int v = abs(coef[i]);
169
-        if (v == 0)
170
-            e = 24;
171
-        else {
172
-            e = 23 - av_log2(v);
173
-            if (e >= 24) {
174
-                e = 24;
175
-                coef[i] = 0;
176
-            } else if (e < 0) {
177
-                e = 0;
178
-                coef[i] = av_clip(coef[i], -16777215, 16777215);
179
-            }
180
-        }
181
-        exp[i] = e;
168
+        exp[i] = v ? 23 - av_log2(v) : 24;
182 169
     }
183 170
 }
184 171
 
... ...
@@ -50,12 +50,16 @@
50 50
 #if CONFIG_AC3ENC_FLOAT
51 51
 #define AC3_NAME(x) ff_ac3_float_ ## x
52 52
 #define MAC_COEF(d,a,b) ((d)+=(a)*(b))
53
+#define COEF_MIN (-16777215.0/16777216.0)
54
+#define COEF_MAX ( 16777215.0/16777216.0)
53 55
 typedef float SampleType;
54 56
 typedef float CoefType;
55 57
 typedef float CoefSumType;
56 58
 #else
57 59
 #define AC3_NAME(x) ff_ac3_fixed_ ## x
58 60
 #define MAC_COEF(d,a,b) MAC64(d,a,b)
61
+#define COEF_MIN -16777215
62
+#define COEF_MAX  16777215
59 63
 typedef int16_t SampleType;
60 64
 typedef int32_t CoefType;
61 65
 typedef int64_t CoefSumType;
... ...
@@ -104,6 +104,15 @@ static void scale_coefficients(AC3EncodeContext *s)
104 104
 }
105 105
 
106 106
 
107
+/**
108
+ * Clip MDCT coefficients to allowable range.
109
+ */
110
+static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len)
111
+{
112
+    dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len);
113
+}
114
+
115
+
107 116
 static av_cold int ac3_fixed_encode_init(AVCodecContext *avctx)
108 117
 {
109 118
     AC3EncodeContext *s = avctx->priv_data;
... ...
@@ -111,6 +111,15 @@ static void scale_coefficients(AC3EncodeContext *s)
111 111
 }
112 112
 
113 113
 
114
+/**
115
+ * Clip MDCT coefficients to allowable range.
116
+ */
117
+static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len)
118
+{
119
+    dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len);
120
+}
121
+
122
+
114 123
 #if CONFIG_AC3_ENCODER
115 124
 AVCodec ff_ac3_float_encoder = {
116 125
     "ac3_float",
... ...
@@ -41,6 +41,8 @@ static void apply_window(DSPContext *dsp, SampleType *output,
41 41
 
42 42
 static int normalize_samples(AC3EncodeContext *s);
43 43
 
44
+static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len);
45
+
44 46
 
45 47
 int AC3_NAME(allocate_sample_buffers)(AC3EncodeContext *s)
46 48
 {
... ...
@@ -171,8 +173,8 @@ static void apply_channel_coupling(AC3EncodeContext *s)
171 171
                 cpl_coef[i] += ch_coef[i];
172 172
         }
173 173
 
174
-        /* coefficients must be clipped to +/- 1.0 in order to be encoded */
175
-        s->dsp.vector_clipf(cpl_coef, cpl_coef, -1.0f, 1.0f, num_cpl_coefs);
174
+        /* coefficients must be clipped in order to be encoded */
175
+        clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs);
176 176
 
177 177
         /* scale coupling coefficients from float to 24-bit fixed-point */
178 178
         s->ac3dsp.float_to_fixed24(&block->fixed_coef[CPL_CH][cpl_start],
... ...
@@ -300,6 +302,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
300 300
         if (!block->cpl_in_use || !block->new_cpl_coords)
301 301
             continue;
302 302
 
303
+        clip_coefficients(&s->dsp, cpl_coords[blk][1], s->fbw_channels * 16);
303 304
         s->ac3dsp.float_to_fixed24(fixed_cpl_coords[blk][1],
304 305
                                    cpl_coords[blk][1],
305 306
                                    s->fbw_channels * 16);
... ...
@@ -433,7 +436,11 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
433 433
 
434 434
     apply_mdct(s);
435 435
 
436
-    scale_coefficients(s);
436
+    if (s->fixed_point)
437
+        scale_coefficients(s);
438
+
439
+    clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1],
440
+                      AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels);
437 441
 
438 442
     s->cpl_on = s->cpl_enabled;
439 443
     ff_ac3_compute_coupling_strategy(s);
... ...
@@ -443,6 +450,9 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
443 443
 
444 444
     compute_rematrixing_strategy(s);
445 445
 
446
+    if (!s->fixed_point)
447
+        scale_coefficients(s);
448
+
446 449
     ff_ac3_apply_rematrixing(s);
447 450
 
448 451
     ff_ac3_process_exponents(s);
... ...
@@ -2664,6 +2664,22 @@ static void apply_window_int16_c(int16_t *output, const int16_t *input,
2664 2664
     }
2665 2665
 }
2666 2666
 
2667
+static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2668
+                                int32_t max, unsigned int len)
2669
+{
2670
+    do {
2671
+        *dst++ = av_clip(*src++, min, max);
2672
+        *dst++ = av_clip(*src++, min, max);
2673
+        *dst++ = av_clip(*src++, min, max);
2674
+        *dst++ = av_clip(*src++, min, max);
2675
+        *dst++ = av_clip(*src++, min, max);
2676
+        *dst++ = av_clip(*src++, min, max);
2677
+        *dst++ = av_clip(*src++, min, max);
2678
+        *dst++ = av_clip(*src++, min, max);
2679
+        len -= 8;
2680
+    } while (len > 0);
2681
+}
2682
+
2667 2683
 #define W0 2048
2668 2684
 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
2669 2685
 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
... ...
@@ -3106,6 +3122,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3106 3106
     c->scalarproduct_int16 = scalarproduct_int16_c;
3107 3107
     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
3108 3108
     c->apply_window_int16 = apply_window_int16_c;
3109
+    c->vector_clip_int32 = vector_clip_int32_c;
3109 3110
     c->scalarproduct_float = scalarproduct_float_c;
3110 3111
     c->butterflies_float = butterflies_float_c;
3111 3112
     c->vector_fmul_scalar = vector_fmul_scalar_c;
... ...
@@ -553,6 +553,22 @@ typedef struct DSPContext {
553 553
     void (*apply_window_int16)(int16_t *output, const int16_t *input,
554 554
                                const int16_t *window, unsigned int len);
555 555
 
556
+    /**
557
+     * Clip each element in an array of int32_t to a given minimum and maximum value.
558
+     * @param dst  destination array
559
+     *             constraints: 16-byte aligned
560
+     * @param src  source array
561
+     *             constraints: 16-byte aligned
562
+     * @param min  minimum value
563
+     *             constraints: must in the the range [-(1<<24), 1<<24]
564
+     * @param max  maximum value
565
+     *             constraints: must in the the range [-(1<<24), 1<<24]
566
+     * @param len  number of elements in the array
567
+     *             constraints: multiple of 32 greater than zero
568
+     */
569
+    void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
570
+                              int32_t max, unsigned int len);
571
+
556 572
     /* rv30 functions */
557 573
     qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
558 574
     qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
... ...
@@ -201,19 +201,11 @@ static inline void skip_bits_long(GetBitContext *s, int n){
201 201
         }                                                               \
202 202
     } while (0)
203 203
 
204
-#if ARCH_X86
205
-#   define SKIP_CACHE(name, gb, num)                            \
206
-    __asm__("shldl %2, %1, %0          \n\t"                    \
207
-            "shll  %2, %1              \n\t"                    \
208
-            : "+r" (name##_cache0), "+r" (name##_cache1)        \
209
-            : "Ic" ((uint8_t)(num)))
210
-#else
211 204
 #   define SKIP_CACHE(name, gb, num) do {               \
212 205
         name##_cache0 <<= (num);                        \
213 206
         name##_cache0 |= NEG_USR32(name##_cache1,num);  \
214 207
         name##_cache1 <<= (num);                        \
215 208
     } while (0)
216
-#endif
217 209
 
218 210
 #   define SKIP_COUNTER(name, gb, num) name##_bit_count += (num)
219 211
 
... ...
@@ -32,6 +32,11 @@ cextern ac3_bap_bits
32 32
 pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
33 33
 pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
34 34
 
35
+; used in ff_ac3_extract_exponents()
36
+pd_1:   times 4 dd 1
37
+pd_151: times 4 dd 151
38
+pb_shuf_4dwb: db 0, 4, 8, 12
39
+
35 40
 SECTION .text
36 41
 
37 42
 ;-----------------------------------------------------------------------------
... ...
@@ -346,3 +351,100 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
346 346
     movd       eax, m0
347 347
     add        eax, sumd
348 348
     RET
349
+
350
+;------------------------------------------------------------------------------
351
+; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
352
+;------------------------------------------------------------------------------
353
+
354
+%macro PABSD_MMX 2 ; src/dst, tmp
355
+    pxor     %2, %2
356
+    pcmpgtd  %2, %1
357
+    pxor     %1, %2
358
+    psubd    %1, %2
359
+%endmacro
360
+
361
+%macro PABSD_SSSE3 1-2 ; src/dst, unused
362
+    pabsd    %1, %1
363
+%endmacro
364
+
365
+%ifdef HAVE_AMD3DNOW
366
+INIT_MMX
367
+cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
368
+    add      expq, lenq
369
+    lea     coefq, [coefq+4*lenq]
370
+    neg      lenq
371
+    movq       m3, [pd_1]
372
+    movq       m4, [pd_151]
373
+.loop:
374
+    movq       m0, [coefq+4*lenq  ]
375
+    movq       m1, [coefq+4*lenq+8]
376
+    PABSD_MMX  m0, m2
377
+    PABSD_MMX  m1, m2
378
+    pslld      m0, 1
379
+    por        m0, m3
380
+    pi2fd      m2, m0
381
+    psrld      m2, 23
382
+    movq       m0, m4
383
+    psubd      m0, m2
384
+    pslld      m1, 1
385
+    por        m1, m3
386
+    pi2fd      m2, m1
387
+    psrld      m2, 23
388
+    movq       m1, m4
389
+    psubd      m1, m2
390
+    packssdw   m0, m0
391
+    packuswb   m0, m0
392
+    packssdw   m1, m1
393
+    packuswb   m1, m1
394
+    punpcklwd  m0, m1
395
+    movd  [expq+lenq], m0
396
+    add      lenq, 4
397
+    jl .loop
398
+    REP_RET
399
+%endif
400
+
401
+%macro AC3_EXTRACT_EXPONENTS 1
402
+cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
403
+    add     expq, lenq
404
+    lea    coefq, [coefq+4*lenq]
405
+    neg     lenq
406
+    mova      m2, [pd_1]
407
+    mova      m3, [pd_151]
408
+%ifidn %1, ssse3 ;
409
+    movd      m4, [pb_shuf_4dwb]
410
+%endif
411
+.loop:
412
+    ; move 4 32-bit coefs to xmm0
413
+    mova      m0, [coefq+4*lenq]
414
+    ; absolute value
415
+    PABSD     m0, m1
416
+    ; convert to float and extract exponents
417
+    pslld     m0, 1
418
+    por       m0, m2
419
+    cvtdq2ps  m1, m0
420
+    psrld     m1, 23
421
+    mova      m0, m3
422
+    psubd     m0, m1
423
+    ; move the lowest byte in each of 4 dwords to the low dword
424
+%ifidn %1, ssse3
425
+    pshufb    m0, m4
426
+%else
427
+    packssdw  m0, m0
428
+    packuswb  m0, m0
429
+%endif
430
+    movd  [expq+lenq], m0
431
+
432
+    add     lenq, 4
433
+    jl .loop
434
+    REP_RET
435
+%endmacro
436
+
437
+%ifdef HAVE_SSE
438
+INIT_XMM
439
+%define PABSD PABSD_MMX
440
+AC3_EXTRACT_EXPONENTS sse2
441
+%ifdef HAVE_SSSE3
442
+%define PABSD PABSD_SSSE3
443
+AC3_EXTRACT_EXPONENTS ssse3
444
+%endif
445
+%endif
... ...
@@ -44,6 +44,10 @@ extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned i
44 44
 
45 45
 extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
46 46
 
47
+extern void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
48
+extern void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
49
+extern void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
50
+
47 51
 av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
48 52
 {
49 53
     int mm_flags = av_get_cpu_flags();
... ...
@@ -56,6 +60,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
56 56
         c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
57 57
     }
58 58
     if (mm_flags & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
59
+        c->extract_exponents = ff_ac3_extract_exponents_3dnow;
59 60
         if (!bit_exact) {
60 61
             c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
61 62
         }
... ...
@@ -72,6 +77,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
72 72
         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
73 73
         c->float_to_fixed24 = ff_float_to_fixed24_sse2;
74 74
         c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
75
+        c->extract_exponents = ff_ac3_extract_exponents_sse2;
75 76
         if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
76 77
             c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
77 78
             c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
... ...
@@ -79,6 +85,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
79 79
     }
80 80
     if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
81 81
         c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
82
+        if (!(mm_flags & AV_CPU_FLAG_ATOM)) {
83
+            c->extract_exponents = ff_ac3_extract_exponents_ssse3;
84
+        }
82 85
     }
83 86
 #endif
84 87
 }
... ...
@@ -2333,6 +2333,15 @@ int  ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i
2333 2333
 
2334 2334
 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
2335 2335
 
2336
+void ff_vector_clip_int32_mmx     (int32_t *dst, const int32_t *src, int32_t min,
2337
+                                   int32_t max, unsigned int len);
2338
+void ff_vector_clip_int32_sse2    (int32_t *dst, const int32_t *src, int32_t min,
2339
+                                   int32_t max, unsigned int len);
2340
+void ff_vector_clip_int32_sse2_int(int32_t *dst, const int32_t *src, int32_t min,
2341
+                                   int32_t max, unsigned int len);
2342
+void ff_vector_clip_int32_sse41   (int32_t *dst, const int32_t *src, int32_t min,
2343
+                                   int32_t max, unsigned int len);
2344
+
2336 2345
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2337 2346
 {
2338 2347
     int mm_flags = av_get_cpu_flags();
... ...
@@ -2473,6 +2482,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2473 2473
 
2474 2474
         c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
2475 2475
         c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
2476
+
2477
+        c->vector_clip_int32 = ff_vector_clip_int32_mmx;
2476 2478
 #endif
2477 2479
 
2478 2480
         if (mm_flags & AV_CPU_FLAG_MMX2) {
... ...
@@ -2756,6 +2767,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2756 2756
 #if HAVE_YASM
2757 2757
             c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
2758 2758
             c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
2759
+            if (mm_flags & AV_CPU_FLAG_ATOM) {
2760
+                c->vector_clip_int32 = ff_vector_clip_int32_sse2_int;
2761
+            } else {
2762
+                c->vector_clip_int32 = ff_vector_clip_int32_sse2;
2763
+            }
2759 2764
             if (avctx->flags & CODEC_FLAG_BITEXACT) {
2760 2765
                 c->apply_window_int16 = ff_apply_window_int16_sse2_ba;
2761 2766
             } else {
... ...
@@ -2781,6 +2797,13 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2781 2781
             }
2782 2782
 #endif
2783 2783
         }
2784
+
2785
+        if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
2786
+#if HAVE_YASM
2787
+            c->vector_clip_int32 = ff_vector_clip_int32_sse41;
2788
+#endif
2789
+        }
2790
+
2784 2791
 #if HAVE_AVX && HAVE_YASM
2785 2792
         if (mm_flags & AV_CPU_FLAG_AVX) {
2786 2793
             if (bit_depth == 10) {
... ...
@@ -1048,3 +1048,118 @@ emu_edge sse
1048 1048
 %ifdef ARCH_X86_32
1049 1049
 emu_edge mmx
1050 1050
 %endif
1051
+
1052
+;-----------------------------------------------------------------------------
1053
+; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min,
1054
+;                           int32_t max, unsigned int len)
1055
+;-----------------------------------------------------------------------------
1056
+
1057
+%macro PMINSD_MMX 3 ; dst, src, tmp
1058
+    mova      %3, %2
1059
+    pcmpgtd   %3, %1
1060
+    pxor      %1, %2
1061
+    pand      %1, %3
1062
+    pxor      %1, %2
1063
+%endmacro
1064
+
1065
+%macro PMAXSD_MMX 3 ; dst, src, tmp
1066
+    mova      %3, %1
1067
+    pcmpgtd   %3, %2
1068
+    pand      %1, %3
1069
+    pandn     %3, %2
1070
+    por       %1, %3
1071
+%endmacro
1072
+
1073
+%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp
1074
+    PMINSD_MMX %1, %3, %4
1075
+    PMAXSD_MMX %1, %2, %4
1076
+%endmacro
1077
+
1078
+%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused
1079
+    cvtdq2ps  %1, %1
1080
+    minps     %1, %3
1081
+    maxps     %1, %2
1082
+    cvtps2dq  %1, %1
1083
+%endmacro
1084
+
1085
+%macro CLIPD_SSE41 3-4 ;  src/dst, min, max, unused
1086
+    pminsd  %1, %3
1087
+    pmaxsd  %1, %2
1088
+%endmacro
1089
+
1090
+%macro SPLATD_MMX 1
1091
+    punpckldq  %1, %1
1092
+%endmacro
1093
+
1094
+%macro SPLATD_SSE2 1
1095
+    pshufd  %1, %1, 0
1096
+%endmacro
1097
+
1098
+%macro VECTOR_CLIP_INT32 4
1099
+cglobal vector_clip_int32_%1, 5,5,%2, dst, src, min, max, len
1100
+%ifidn %1, sse2
1101
+    cvtsi2ss  m4, minm
1102
+    cvtsi2ss  m5, maxm
1103
+%else
1104
+    movd      m4, minm
1105
+    movd      m5, maxm
1106
+%endif
1107
+    SPLATD    m4
1108
+    SPLATD    m5
1109
+.loop:
1110
+%assign %%i 1
1111
+%rep %3
1112
+    mova      m0,  [srcq+mmsize*0*%%i]
1113
+    mova      m1,  [srcq+mmsize*1*%%i]
1114
+    mova      m2,  [srcq+mmsize*2*%%i]
1115
+    mova      m3,  [srcq+mmsize*3*%%i]
1116
+%if %4
1117
+    mova      m7,  [srcq+mmsize*4*%%i]
1118
+    mova      m8,  [srcq+mmsize*5*%%i]
1119
+    mova      m9,  [srcq+mmsize*6*%%i]
1120
+    mova      m10, [srcq+mmsize*7*%%i]
1121
+%endif
1122
+    CLIPD  m0,  m4, m5, m6
1123
+    CLIPD  m1,  m4, m5, m6
1124
+    CLIPD  m2,  m4, m5, m6
1125
+    CLIPD  m3,  m4, m5, m6
1126
+%if %4
1127
+    CLIPD  m7,  m4, m5, m6
1128
+    CLIPD  m8,  m4, m5, m6
1129
+    CLIPD  m9,  m4, m5, m6
1130
+    CLIPD  m10, m4, m5, m6
1131
+%endif
1132
+    mova  [dstq+mmsize*0*%%i], m0
1133
+    mova  [dstq+mmsize*1*%%i], m1
1134
+    mova  [dstq+mmsize*2*%%i], m2
1135
+    mova  [dstq+mmsize*3*%%i], m3
1136
+%if %4
1137
+    mova  [dstq+mmsize*4*%%i], m7
1138
+    mova  [dstq+mmsize*5*%%i], m8
1139
+    mova  [dstq+mmsize*6*%%i], m9
1140
+    mova  [dstq+mmsize*7*%%i], m10
1141
+%endif
1142
+%assign %%i %%i+1
1143
+%endrep
1144
+    add     srcq, mmsize*4*(%3+%4)
1145
+    add     dstq, mmsize*4*(%3+%4)
1146
+    sub     lend, mmsize*(%3+%4)
1147
+    jg .loop
1148
+    REP_RET
1149
+%endmacro
1150
+
1151
+INIT_MMX
1152
+%define SPLATD SPLATD_MMX
1153
+%define CLIPD CLIPD_MMX
1154
+VECTOR_CLIP_INT32 mmx, 0, 1, 0
1155
+INIT_XMM
1156
+%define SPLATD SPLATD_SSE2
1157
+VECTOR_CLIP_INT32 sse2_int, 6, 1, 0
1158
+%define CLIPD CLIPD_SSE2
1159
+VECTOR_CLIP_INT32 sse2, 6, 2, 0
1160
+%define CLIPD CLIPD_SSE41
1161
+%ifdef m8
1162
+VECTOR_CLIP_INT32 sse41, 11, 1, 1
1163
+%else
1164
+VECTOR_CLIP_INT32 sse41, 6, 1, 0
1165
+%endif
... ...
@@ -861,13 +861,137 @@ static int get_stream_idx(int *d){
861 861
     }
862 862
 }
863 863
 
864
-static int avi_read_packet(AVFormatContext *s, AVPacket *pkt)
864
+static int avi_sync(AVFormatContext *s)
865 865
 {
866 866
     AVIContext *avi = s->priv_data;
867 867
     AVIOContext *pb = s->pb;
868 868
     int n, d[8];
869 869
     unsigned int size;
870 870
     int64_t i, sync;
871
+
872
+start_sync:
873
+    memset(d, -1, sizeof(int)*8);
874
+    for(i=sync=avio_tell(pb); !url_feof(pb); i++) {
875
+        int j;
876
+
877
+        for(j=0; j<7; j++)
878
+            d[j]= d[j+1];
879
+        d[7]= avio_r8(pb);
880
+
881
+        size= d[4] + (d[5]<<8) + (d[6]<<16) + (d[7]<<24);
882
+
883
+        n= get_stream_idx(d+2);
884
+//av_log(s, AV_LOG_DEBUG, "%X %X %X %X %X %X %X %X %"PRId64" %d %d\n", d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], i, size, n);
885
+        if(i + (uint64_t)size > avi->fsize || d[0]<0)
886
+            continue;
887
+
888
+        //parse ix##
889
+        if(  (d[0] == 'i' && d[1] == 'x' && n < s->nb_streams)
890
+        //parse JUNK
891
+           ||(d[0] == 'J' && d[1] == 'U' && d[2] == 'N' && d[3] == 'K')
892
+           ||(d[0] == 'i' && d[1] == 'd' && d[2] == 'x' && d[3] == '1')){
893
+            avio_skip(pb, size);
894
+//av_log(s, AV_LOG_DEBUG, "SKIP\n");
895
+            goto start_sync;
896
+        }
897
+
898
+        //parse stray LIST
899
+        if(d[0] == 'L' && d[1] == 'I' && d[2] == 'S' && d[3] == 'T'){
900
+            avio_skip(pb, 4);
901
+            goto start_sync;
902
+        }
903
+
904
+        n= get_stream_idx(d);
905
+
906
+        if(!((i-avi->last_pkt_pos)&1) && get_stream_idx(d+1) < s->nb_streams)
907
+            continue;
908
+
909
+        //detect ##ix chunk and skip
910
+        if(d[2] == 'i' && d[3] == 'x' && n < s->nb_streams){
911
+            avio_skip(pb, size);
912
+            goto start_sync;
913
+        }
914
+
915
+        //parse ##dc/##wb
916
+        if(n < s->nb_streams){
917
+            AVStream *st;
918
+            AVIStream *ast;
919
+            st = s->streams[n];
920
+            ast = st->priv_data;
921
+
922
+            if(s->nb_streams>=2){
923
+                AVStream *st1  = s->streams[1];
924
+                AVIStream *ast1= st1->priv_data;
925
+                //workaround for broken small-file-bug402.avi
926
+                if(   d[2] == 'w' && d[3] == 'b'
927
+                   && n==0
928
+                   && st ->codec->codec_type == AVMEDIA_TYPE_VIDEO
929
+                   && st1->codec->codec_type == AVMEDIA_TYPE_AUDIO
930
+                   && ast->prefix == 'd'*256+'c'
931
+                   && (d[2]*256+d[3] == ast1->prefix || !ast1->prefix_count)
932
+                  ){
933
+                    n=1;
934
+                    st = st1;
935
+                    ast = ast1;
936
+                    av_log(s, AV_LOG_WARNING, "Invalid stream + prefix combination, assuming audio.\n");
937
+                }
938
+            }
939
+
940
+
941
+            if(   (st->discard >= AVDISCARD_DEFAULT && size==0)
942
+               /*|| (st->discard >= AVDISCARD_NONKEY && !(pkt->flags & AV_PKT_FLAG_KEY))*/ //FIXME needs a little reordering
943
+               || st->discard >= AVDISCARD_ALL){
944
+                ast->frame_offset += get_duration(ast, size);
945
+                avio_skip(pb, size);
946
+                goto start_sync;
947
+            }
948
+
949
+            if (d[2] == 'p' && d[3] == 'c' && size<=4*256+4) {
950
+                int k = avio_r8(pb);
951
+                int last = (k + avio_r8(pb) - 1) & 0xFF;
952
+
953
+                avio_rl16(pb); //flags
954
+
955
+                for (; k <= last; k++)
956
+                    ast->pal[k] = avio_rb32(pb)>>8;// b + (g << 8) + (r << 16);
957
+                ast->has_pal= 1;
958
+                goto start_sync;
959
+            } else if(   ((ast->prefix_count<5 || sync+9 > i) && d[2]<128 && d[3]<128) ||
960
+                         d[2]*256+d[3] == ast->prefix /*||
961
+                         (d[2] == 'd' && d[3] == 'c') ||
962
+                         (d[2] == 'w' && d[3] == 'b')*/) {
963
+
964
+//av_log(s, AV_LOG_DEBUG, "OK\n");
965
+                if(d[2]*256+d[3] == ast->prefix)
966
+                    ast->prefix_count++;
967
+                else{
968
+                    ast->prefix= d[2]*256+d[3];
969
+                    ast->prefix_count= 0;
970
+                }
971
+
972
+                avi->stream_index= n;
973
+                ast->packet_size= size + 8;
974
+                ast->remaining= size;
975
+
976
+                if(size || !ast->sample_size){
977
+                    uint64_t pos= avio_tell(pb) - 8;
978
+                    if(!st->index_entries || !st->nb_index_entries || st->index_entries[st->nb_index_entries - 1].pos < pos){
979
+                        av_add_index_entry(st, pos, ast->frame_offset, size, 0, AVINDEX_KEYFRAME);
980
+                    }
981
+                }
982
+                return 0;
983
+            }
984
+        }
985
+    }
986
+
987
+    return AVERROR_EOF;
988
+}
989
+
990
+static int avi_read_packet(AVFormatContext *s, AVPacket *pkt)
991
+{
992
+    AVIContext *avi = s->priv_data;
993
+    AVIOContext *pb = s->pb;
994
+    int err;
871 995
     void* dstr;
872 996
 
873 997
     if (CONFIG_DV_DEMUXER && avi->dv_demux) {
... ...
@@ -1041,121 +1165,9 @@ resync:
1041 1041
         return size;
1042 1042
     }
1043 1043
 
1044
-    memset(d, -1, sizeof(int)*8);
1045
-    for(i=sync=avio_tell(pb); !url_feof(pb); i++) {
1046
-        int j;
1047
-
1048
-        for(j=0; j<7; j++)
1049
-            d[j]= d[j+1];
1050
-        d[7]= avio_r8(pb);
1051
-
1052
-        size= d[4] + (d[5]<<8) + (d[6]<<16) + (d[7]<<24);
1053
-
1054
-        n= get_stream_idx(d+2);
1055
-//av_log(s, AV_LOG_DEBUG, "%X %X %X %X %X %X %X %X %"PRId64" %d %d\n", d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], i, size, n);
1056
-        if(i + (uint64_t)size > avi->fsize || d[0]<0)
1057
-            continue;
1058
-
1059
-        //parse ix##
1060
-        if(  (d[0] == 'i' && d[1] == 'x' && n < s->nb_streams)
1061
-        //parse JUNK
1062
-           ||(d[0] == 'J' && d[1] == 'U' && d[2] == 'N' && d[3] == 'K')
1063
-           ||(d[0] == 'i' && d[1] == 'd' && d[2] == 'x' && d[3] == '1')){
1064
-            avio_skip(pb, size);
1065
-//av_log(s, AV_LOG_DEBUG, "SKIP\n");
1066
-            goto resync;
1067
-        }
1068
-
1069
-        //parse stray LIST
1070
-        if(d[0] == 'L' && d[1] == 'I' && d[2] == 'S' && d[3] == 'T'){
1071
-            avio_skip(pb, 4);
1072
-            goto resync;
1073
-        }
1074
-
1075
-        n= get_stream_idx(d);
1076
-
1077
-        if(!((i-avi->last_pkt_pos)&1) && get_stream_idx(d+1) < s->nb_streams)
1078
-            continue;
1079
-
1080
-        //detect ##ix chunk and skip
1081
-        if(d[2] == 'i' && d[3] == 'x' && n < s->nb_streams){
1082
-            avio_skip(pb, size);
1083
-            goto resync;
1084
-        }
1085
-
1086
-        //parse ##dc/##wb
1087
-        if(n < s->nb_streams){
1088
-            AVStream *st;
1089
-            AVIStream *ast;
1090
-            st = s->streams[n];
1091
-            ast = st->priv_data;
1092
-
1093
-            if(s->nb_streams>=2){
1094
-                AVStream *st1  = s->streams[1];
1095
-                AVIStream *ast1= st1->priv_data;
1096
-                //workaround for broken small-file-bug402.avi
1097
-                if(   d[2] == 'w' && d[3] == 'b'
1098
-                   && n==0
1099
-                   && st ->codec->codec_type == AVMEDIA_TYPE_VIDEO
1100
-                   && st1->codec->codec_type == AVMEDIA_TYPE_AUDIO
1101
-                   && ast->prefix == 'd'*256+'c'
1102
-                   && (d[2]*256+d[3] == ast1->prefix || !ast1->prefix_count)
1103
-                  ){
1104
-                    n=1;
1105
-                    st = st1;
1106
-                    ast = ast1;
1107
-                    av_log(s, AV_LOG_WARNING, "Invalid stream + prefix combination, assuming audio.\n");
1108
-                }
1109
-            }
1110
-
1111
-
1112
-            if(   (st->discard >= AVDISCARD_DEFAULT && size==0)
1113
-               /*|| (st->discard >= AVDISCARD_NONKEY && !(pkt->flags & AV_PKT_FLAG_KEY))*/ //FIXME needs a little reordering
1114
-               || st->discard >= AVDISCARD_ALL){
1115
-                ast->frame_offset += get_duration(ast, size);
1116
-                avio_skip(pb, size);
1117
-                goto resync;
1118
-            }
1119
-
1120
-            if (d[2] == 'p' && d[3] == 'c' && size<=4*256+4) {
1121
-                int k = avio_r8(pb);
1122
-                int last = (k + avio_r8(pb) - 1) & 0xFF;
1123
-
1124
-                avio_rl16(pb); //flags
1125
-
1126
-                for (; k <= last; k++)
1127
-                    ast->pal[k] = avio_rb32(pb)>>8;// b + (g << 8) + (r << 16);
1128
-                ast->has_pal= 1;
1129
-                goto resync;
1130
-            } else if(   ((ast->prefix_count<5 || sync+9 > i) && d[2]<128 && d[3]<128) ||
1131
-                         d[2]*256+d[3] == ast->prefix /*||
1132
-                         (d[2] == 'd' && d[3] == 'c') ||
1133
-                         (d[2] == 'w' && d[3] == 'b')*/) {
1134
-
1135
-//av_log(s, AV_LOG_DEBUG, "OK\n");
1136
-                if(d[2]*256+d[3] == ast->prefix)
1137
-                    ast->prefix_count++;
1138
-                else{
1139
-                    ast->prefix= d[2]*256+d[3];
1140
-                    ast->prefix_count= 0;
1141
-                }
1142
-
1143
-                avi->stream_index= n;
1144
-                ast->packet_size= size + 8;
1145
-                ast->remaining= size;
1146
-
1147
-                if(size || !ast->sample_size){
1148
-                    uint64_t pos= avio_tell(pb) - 8;
1149
-                    if(!st->index_entries || !st->nb_index_entries || st->index_entries[st->nb_index_entries - 1].pos < pos){
1150
-                        av_add_index_entry(st, pos, ast->frame_offset, size, 0, AVINDEX_KEYFRAME);
1151
-                    }
1152
-                }
1153
-                goto resync;
1154
-            }
1155
-        }
1156
-    }
1157
-
1158
-    return AVERROR_EOF;
1044
+    if ((err = avi_sync(s)) < 0)
1045
+        return err;
1046
+    goto resync;
1159 1047
 }
1160 1048
 
1161 1049
 /* XXX: We make the implicit supposition that the positions are sorted
... ...
@@ -1960,6 +1960,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
1960 1960
 
1961 1961
     if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
1962 1962
         avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET);
1963
+        matroska->current_id = 0;
1963 1964
         while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
1964 1965
             matroska_clear_queue(matroska);
1965 1966
             if (matroska_parse_cluster(matroska) < 0)
... ...
@@ -1988,6 +1989,7 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
1988 1988
     }
1989 1989
 
1990 1990
     avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET);
1991
+    matroska->current_id = 0;
1991 1992
     matroska->skip_to_keyframe = !(flags & AVSEEK_FLAG_ANY);
1992 1993
     matroska->skip_to_timecode = st->index_entries[index].timestamp;
1993 1994
     matroska->done = 0;
... ...
@@ -18,7 +18,8 @@
18 18
  */
19 19
 
20 20
 /**
21
- * @file Public dictionary API.
21
+ * @file
22
+ * Public dictionary API.
22 23
  * @deprecated
23 24
  *  AVDictionary is provided for compatibility with libav. It is both in
24 25
  *  implementation as well as API inefficient. It does not scale and is
... ...
@@ -22,7 +22,8 @@
22 22
 #include "avutil.h"
23 23
 
24 24
 /**
25
- * @file misc file utilities
25
+ * @file
26
+ * Misc file utilities.
26 27
  */
27 28
 
28 29
 /**
... ...
@@ -1783,53 +1783,6 @@ static void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
1783 1783
 
1784 1784
 #define input_pixel(pos) (isBE(origin) ? AV_RB16(pos) : AV_RL16(pos))
1785 1785
 
1786
-// FIXME Maybe dither instead.
1787
-static av_always_inline void
1788
-yuv9_OR_10ToUV_c_template(uint8_t *dstU, uint8_t *dstV,
1789
-                          const uint8_t *_srcU, const uint8_t *_srcV,
1790
-                          int width, enum PixelFormat origin, int depth)
1791
-{
1792
-    int i;
1793
-    const uint16_t *srcU = (const uint16_t *) _srcU;
1794
-    const uint16_t *srcV = (const uint16_t *) _srcV;
1795
-
1796
-    for (i = 0; i < width; i++) {
1797
-        dstU[i] = input_pixel(&srcU[i]) >> (depth - 8);
1798
-        dstV[i] = input_pixel(&srcV[i]) >> (depth - 8);
1799
-    }
1800
-}
1801
-
1802
-static av_always_inline void
1803
-yuv9_or_10ToY_c_template(uint8_t *dstY, const uint8_t *_srcY,
1804
-                         int width, enum PixelFormat origin, int depth)
1805
-{
1806
-    int i;
1807
-    const uint16_t *srcY = (const uint16_t*)_srcY;
1808
-
1809
-    for (i = 0; i < width; i++)
1810
-        dstY[i] = input_pixel(&srcY[i]) >> (depth - 8);
1811
-}
1812
-
1813
-#undef input_pixel
1814
-
1815
-#define YUV_NBPS(depth, BE_LE, origin) \
1816
-static void BE_LE ## depth ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
1817
-                                     const uint8_t *srcU, const uint8_t *srcV, \
1818
-                                     int width, uint32_t *unused) \
1819
-{ \
1820
-    yuv9_OR_10ToUV_c_template(dstU, dstV, srcU, srcV, width, origin, depth); \
1821
-} \
1822
-static void BE_LE ## depth ## ToY_c(uint8_t *dstY, const uint8_t *srcY, \
1823
-                                    int width, uint32_t *unused) \
1824
-{ \
1825
-    yuv9_or_10ToY_c_template(dstY, srcY, width, origin, depth); \
1826
-}
1827
-
1828
-YUV_NBPS( 9, LE, PIX_FMT_YUV420P9LE);
1829
-YUV_NBPS( 9, BE, PIX_FMT_YUV420P9BE);
1830
-YUV_NBPS(10, LE, PIX_FMT_YUV420P10LE);
1831
-YUV_NBPS(10, BE, PIX_FMT_YUV420P10BE);
1832
-
1833 1786
 static void bgr24ToY_c(int16_t *dst, const uint8_t *src,
1834 1787
                        int width, uint32_t *unused)
1835 1788
 {
... ...
@@ -2,10 +2,22 @@ FATE_AAC += fate-aac-al04_44
2 2
 fate-aac-al04_44: CMD = pcm -i $(SAMPLES)/aac/al04_44.mp4
3 3
 fate-aac-al04_44: REF = $(SAMPLES)/aac/al04_44.s16
4 4
 
5
+FATE_AAC += fate-aac-al05_44
6
+fate-aac-al05_44: CMD = pcm -i $(SAMPLES)/aac/al05_44.mp4
7
+fate-aac-al05_44: REF = $(SAMPLES)/aac/al05_44.s16
8
+
9
+FATE_AAC += fate-aac-al06_44
10
+fate-aac-al06_44: CMD = pcm -i $(SAMPLES)/aac/al06_44.mp4
11
+fate-aac-al06_44: REF = $(SAMPLES)/aac/al06_44.s16
12
+
5 13
 FATE_AAC += fate-aac-al07_96
6 14
 fate-aac-al07_96: CMD = pcm -i $(SAMPLES)/aac/al07_96.mp4
7 15
 fate-aac-al07_96: REF = $(SAMPLES)/aac/al07_96.s16
8 16
 
17
+FATE_AAC += fate-aac-al17_44
18
+fate-aac-al17_44: CMD = pcm -i $(SAMPLES)/aac/al17_44.mp4
19
+fate-aac-al17_44: REF = $(SAMPLES)/aac/al17_44.s16
20
+
9 21
 FATE_AAC += fate-aac-am00_88
10 22
 fate-aac-am00_88: CMD = pcm -i $(SAMPLES)/aac/am00_88.mp4
11 23
 fate-aac-am00_88: REF = $(SAMPLES)/aac/am00_88.s16