Browse code

Merge remote-tracking branch 'qatar/master'

* qatar/master: (23 commits)
fix AC3ENC_OPT_MODE_ON/OFF
h264: fix HRD parameters parsing
prores: implement multithreading.
prores: idct sse2/sse4 optimizations.
swscale: use aligned move for storage into temporary buffer.
prores: extract idct into its own dspcontext and merge with put_pixels.
h264: fix invalid shifts in init_cavlc_level_tab()
intfloat_readwrite: fix signed addition overflows
mov: do not misreport empty stts
mov: cosmetics, fix for and if spacing
id3v2: fix NULL pointer dereference
mov: read album_artist atom
mov: fix disc/track numbers and totals
doc: fix references to obsolete presets directories for avconv/ffmpeg
flashsv: return more meaningful error value
flashsv: fix typo in av_log() message
smacker: validate channels and sample format.
smacker: check buffer size before reading output size
smacker: validate number of channels
smacker: Separate audio flags from sample rates in smacker demuxer.
...

Conflicts:
cmdutils.h
doc/ffmpeg.texi
libavcodec/Makefile
libavcodec/motion_est_template.c
libavformat/id3v2.c
libavformat/mov.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>

Michael Niedermayer authored on 2011/10/12 12:33:52
Showing 27 changed files
... ...
@@ -314,7 +314,7 @@ OBJS-$(CONFIG_PNG_ENCODER)             += png.o pngenc.o
314 314
 OBJS-$(CONFIG_PPM_DECODER)             += pnmdec.o pnm.o
315 315
 OBJS-$(CONFIG_PPM_ENCODER)             += pnmenc.o pnm.o
316 316
 OBJS-$(CONFIG_PRORES_GPL_DECODER)      += proresdec_gpl.o
317
-OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += proresdec_lgpl.o
317
+OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += proresdec_lgpl.o proresdsp.o
318 318
 OBJS-$(CONFIG_PTX_DECODER)             += ptx.o
319 319
 OBJS-$(CONFIG_QCELP_DECODER)           += qcelpdec.o celp_math.o         \
320 320
                                           celp_filters.o acelp_vectors.o \
... ...
@@ -73,8 +73,8 @@ typedef int64_t CoefSumType;
73 73
 #define AC3ENC_OPT_OFF              0
74 74
 #define AC3ENC_OPT_ON               1
75 75
 #define AC3ENC_OPT_NOT_INDICATED    0
76
-#define AC3ENC_OPT_MODE_ON          1
77
-#define AC3ENC_OPT_MODE_OFF         2
76
+#define AC3ENC_OPT_MODE_ON          2
77
+#define AC3ENC_OPT_MODE_OFF         1
78 78
 
79 79
 /* specific option values */
80 80
 #define AC3ENC_OPT_LARGE_ROOM       1
... ...
@@ -144,6 +144,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
144 144
     }
145 145
 }
146 146
 
147
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
148
+                                   int idct_permutation_type)
149
+{
150
+    int i;
151
+
152
+    switch(idct_permutation_type){
153
+    case FF_NO_IDCT_PERM:
154
+        for(i=0; i<64; i++)
155
+            idct_permutation[i]= i;
156
+        break;
157
+    case FF_LIBMPEG2_IDCT_PERM:
158
+        for(i=0; i<64; i++)
159
+            idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
160
+        break;
161
+    case FF_SIMPLE_IDCT_PERM:
162
+        for(i=0; i<64; i++)
163
+            idct_permutation[i]= simple_mmx_permutation[i];
164
+        break;
165
+    case FF_TRANSPOSE_IDCT_PERM:
166
+        for(i=0; i<64; i++)
167
+            idct_permutation[i]= ((i&7)<<3) | (i>>3);
168
+        break;
169
+    case FF_PARTTRANS_IDCT_PERM:
170
+        for(i=0; i<64; i++)
171
+            idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
172
+        break;
173
+    case FF_SSE2_IDCT_PERM:
174
+        for(i=0; i<64; i++)
175
+            idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
176
+        break;
177
+    default:
178
+        av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
179
+    }
180
+}
181
+
147 182
 static int pix_sum_c(uint8_t * pix, int line_size)
148 183
 {
149 184
     int s, i, j;
... ...
@@ -3107,32 +3142,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3107 3107
             c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
3108 3108
     }
3109 3109
 
3110
-    switch(c->idct_permutation_type){
3111
-    case FF_NO_IDCT_PERM:
3112
-        for(i=0; i<64; i++)
3113
-            c->idct_permutation[i]= i;
3114
-        break;
3115
-    case FF_LIBMPEG2_IDCT_PERM:
3116
-        for(i=0; i<64; i++)
3117
-            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
3118
-        break;
3119
-    case FF_SIMPLE_IDCT_PERM:
3120
-        for(i=0; i<64; i++)
3121
-            c->idct_permutation[i]= simple_mmx_permutation[i];
3122
-        break;
3123
-    case FF_TRANSPOSE_IDCT_PERM:
3124
-        for(i=0; i<64; i++)
3125
-            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
3126
-        break;
3127
-    case FF_PARTTRANS_IDCT_PERM:
3128
-        for(i=0; i<64; i++)
3129
-            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
3130
-        break;
3131
-    case FF_SSE2_IDCT_PERM:
3132
-        for(i=0; i<64; i++)
3133
-            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
3134
-        break;
3135
-    default:
3136
-        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
3137
-    }
3110
+    ff_init_scantable_permutation(c->idct_permutation,
3111
+                                  c->idct_permutation_type);
3138 3112
 }
... ...
@@ -204,6 +204,8 @@ typedef struct ScanTable{
204 204
 } ScanTable;
205 205
 
206 206
 void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
207
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
208
+                                   int idct_permutation_type);
207 209
 
208 210
 #define EMULATED_EDGE(depth) \
209 211
 void ff_emulated_edge_mc_ ## depth (uint8_t *buf, const uint8_t *src, int linesize,\
... ...
@@ -301,7 +301,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
301 301
     /* check for changes of image width and image height */
302 302
     if (avctx->width != s->image_width || avctx->height != s->image_height) {
303 303
         av_log(avctx, AV_LOG_ERROR,
304
-               "Frame width or height differs from first frames!\n");
304
+               "Frame width or height differs from first frame!\n");
305 305
         av_log(avctx, AV_LOG_ERROR, "fh = %d, fv %d  vs  ch = %d, cv = %d\n",
306 306
                avctx->height, avctx->width, s->image_height, s->image_width);
307 307
         return AVERROR_INVALIDDATA;
... ...
@@ -367,7 +367,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx, void *data,
367 367
                 if (s->color_depth != 0 && s->color_depth != 2) {
368 368
                     av_log(avctx, AV_LOG_ERROR,
369 369
                            "%dx%d invalid color depth %d\n", i, j, s->color_depth);
370
-                    return -1;
370
+                    return AVERROR_INVALIDDATA;
371 371
                 }
372 372
 
373 373
                 if (has_diff) {
... ...
@@ -75,6 +75,20 @@ static inline int get_ue_golomb(GetBitContext *gb){
75 75
     }
76 76
 }
77 77
 
78
+/**
79
+ * Read an unsigned Exp-Golomb code in the range 0 to UINT32_MAX-1.
80
+ */
81
+static inline unsigned get_ue_golomb_long(GetBitContext *gb)
82
+{
83
+    unsigned buf, log;
84
+
85
+    buf = show_bits_long(gb, 32);
86
+    log = 31 - av_log2(buf);
87
+    skip_bits_long(gb, log);
88
+
89
+    return get_bits_long(gb, log + 1) - 1;
90
+}
91
+
78 92
  /**
79 93
  * read unsigned exp golomb code, constraint to a max of 31.
80 94
  * the return value is undefined if the stored value exceeds 31.
... ...
@@ -298,17 +298,18 @@ static inline int pred_non_zero_count(H264Context *h, int n){
298 298
 }
299 299
 
300 300
 static av_cold void init_cavlc_level_tab(void){
301
-    int suffix_length, mask;
301
+    int suffix_length;
302 302
     unsigned int i;
303 303
 
304 304
     for(suffix_length=0; suffix_length<7; suffix_length++){
305 305
         for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
306 306
             int prefix= LEVEL_TAB_BITS - av_log2(2*i);
307
-            int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
308 307
 
309
-            mask= -(level_code&1);
310
-            level_code= (((2+level_code)>>1) ^ mask) - mask;
311 308
             if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
309
+                int level_code = (prefix << suffix_length) +
310
+                    (i >> (av_log2(i) - suffix_length)) - (1 << suffix_length);
311
+                int mask = -(level_code&1);
312
+                level_code = (((2 + level_code) >> 1) ^ mask) - mask;
312 313
                 cavlc_level_tab[suffix_length][i][0]= level_code;
313 314
                 cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
314 315
             }else if(prefix + 1 <= LEVEL_TAB_BITS){
... ...
@@ -143,8 +143,8 @@ static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
143 143
     get_bits(&s->gb, 4); /* bit_rate_scale */
144 144
     get_bits(&s->gb, 4); /* cpb_size_scale */
145 145
     for(i=0; i<cpb_count; i++){
146
-        get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
147
-        get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
146
+        get_ue_golomb_long(&s->gb); /* bit_rate_value_minus1 */
147
+        get_ue_golomb_long(&s->gb); /* cpb_size_value_minus1 */
148 148
         get_bits1(&s->gb);     /* cbr_flag */
149 149
     }
150 150
     sps->initial_cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
... ...
@@ -494,6 +494,7 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
494 494
     unsigned int pps_id= get_ue_golomb(&s->gb);
495 495
     PPS *pps;
496 496
     const int qp_bd_offset = 6*(h->sps.bit_depth_luma-8);
497
+    int bits_left;
497 498
 
498 499
     if(pps_id >= MAX_PPS_COUNT) {
499 500
         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
... ...
@@ -570,6 +571,7 @@ int ff_h264_decode_picture_parameter_set(H264Context *h, int bit_length){
570 570
     memcpy(pps->scaling_matrix4, h->sps_buffers[pps->sps_id]->scaling_matrix4, sizeof(pps->scaling_matrix4));
571 571
     memcpy(pps->scaling_matrix8, h->sps_buffers[pps->sps_id]->scaling_matrix8, sizeof(pps->scaling_matrix8));
572 572
 
573
+    bits_left = bit_length - get_bits_count(&s->gb);
573 574
     if(get_bits_count(&s->gb) < bit_length){
574 575
         pps->transform_8x8_mode= get_bits1(&s->gb);
575 576
         decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
... ...
@@ -52,7 +52,7 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
52 52
                                   int src_index, int ref_index,
53 53
                                   int size, int h);
54 54
 
55
-static inline int update_map_generation(MotionEstContext *c)
55
+static inline unsigned update_map_generation(MotionEstContext *c)
56 56
 {
57 57
     c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
58 58
     if(c->map_generation==0){
... ...
@@ -90,8 +90,8 @@ static int hpel_motion_search(MpegEncContext * s,
90 90
                      + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
91 91
 
92 92
 #if 1
93
-        int key;
94
-        int map_generation= c->map_generation;
93
+        unsigned key;
94
+        unsigned map_generation= c->map_generation;
95 95
 #ifndef NDEBUG
96 96
         uint32_t *map= c->map;
97 97
 #endif
... ...
@@ -210,7 +210,7 @@ static int qpel_motion_search(MpegEncContext * s,
210 210
     const int mx = *mx_ptr;
211 211
     const int my = *my_ptr;
212 212
     const int penalty_factor= c->sub_penalty_factor;
213
-    const int map_generation= c->map_generation;
213
+    const unsigned map_generation = c->map_generation;
214 214
     const int subpel_quality= c->avctx->me_subpel_quality;
215 215
     uint32_t *map= c->map;
216 216
     me_cmp_func cmpf, chroma_cmpf;
... ...
@@ -356,7 +356,7 @@ static int qpel_motion_search(MpegEncContext * s,
356 356
 
357 357
 #define CHECK_MV(x,y)\
358 358
 {\
359
-    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
359
+    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
360 360
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
361 361
     assert((x) >= xmin);\
362 362
     assert((x) <= xmax);\
... ...
@@ -384,7 +384,7 @@ static int qpel_motion_search(MpegEncContext * s,
384 384
 
385 385
 #define CHECK_MV_DIR(x,y,new_dir)\
386 386
 {\
387
-    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
387
+    const unsigned key = ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
388 388
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
389 389
 /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
390 390
     if(map[index]!=key){\
... ...
@@ -422,13 +422,13 @@ static av_always_inline int small_diamond_search(MpegEncContext * s, int *best,
422 422
     int next_dir=-1;
423 423
     LOAD_COMMON
424 424
     LOAD_COMMON2
425
-    int map_generation= c->map_generation;
425
+    unsigned map_generation = c->map_generation;
426 426
 
427 427
     cmpf= s->dsp.me_cmp[size];
428 428
     chroma_cmpf= s->dsp.me_cmp[size+1];
429 429
 
430 430
     { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
431
-        const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
431
+        const unsigned key = (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
432 432
         const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
433 433
         if(map[index]!=key){ //this will be executed only very rarey
434 434
             score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
... ...
@@ -464,7 +464,7 @@ static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
464 464
     int dia_size;
465 465
     LOAD_COMMON
466 466
     LOAD_COMMON2
467
-    int map_generation= c->map_generation;
467
+    unsigned map_generation = c->map_generation;
468 468
 
469 469
     cmpf= s->dsp.me_cmp[size];
470 470
     chroma_cmpf= s->dsp.me_cmp[size+1];
... ...
@@ -505,7 +505,7 @@ static int hex_search(MpegEncContext * s, int *best, int dmin,
505 505
     me_cmp_func cmpf, chroma_cmpf;
506 506
     LOAD_COMMON
507 507
     LOAD_COMMON2
508
-    int map_generation= c->map_generation;
508
+    unsigned map_generation = c->map_generation;
509 509
     int x,y,d;
510 510
     const int dec= dia_size & (dia_size-1);
511 511
 
... ...
@@ -539,7 +539,7 @@ static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
539 539
     me_cmp_func cmpf, chroma_cmpf;
540 540
     LOAD_COMMON
541 541
     LOAD_COMMON2
542
-    int map_generation= c->map_generation;
542
+    unsigned map_generation = c->map_generation;
543 543
     int x,y,i,d;
544 544
     int dia_size= c->dia_size&0xFF;
545 545
     const int dec= dia_size & (dia_size-1);
... ...
@@ -577,7 +577,7 @@ static int umh_search(MpegEncContext * s, int *best, int dmin,
577 577
     me_cmp_func cmpf, chroma_cmpf;
578 578
     LOAD_COMMON
579 579
     LOAD_COMMON2
580
-    int map_generation= c->map_generation;
580
+    unsigned map_generation = c->map_generation;
581 581
     int x,y,x2,y2, i, j, d;
582 582
     const int dia_size= c->dia_size&0xFE;
583 583
     static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
... ...
@@ -624,7 +624,7 @@ static int full_search(MpegEncContext * s, int *best, int dmin,
624 624
     me_cmp_func cmpf, chroma_cmpf;
625 625
     LOAD_COMMON
626 626
     LOAD_COMMON2
627
-    int map_generation= c->map_generation;
627
+    unsigned map_generation = c->map_generation;
628 628
     int x,y, d;
629 629
     const int dia_size= c->dia_size&0xFF;
630 630
 
... ...
@@ -653,7 +653,7 @@ static int full_search(MpegEncContext * s, int *best, int dmin,
653 653
 
654 654
 #define SAB_CHECK_MV(ax,ay)\
655 655
 {\
656
-    const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
656
+    const unsigned key = ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
657 657
     const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
658 658
 /*printf("sab check %d %d\n", ax, ay);*/\
659 659
     if(map[index]!=key){\
... ...
@@ -692,7 +692,7 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
692 692
     int i, j;
693 693
     LOAD_COMMON
694 694
     LOAD_COMMON2
695
-    int map_generation= c->map_generation;
695
+    unsigned map_generation = c->map_generation;
696 696
 
697 697
     cmpf= s->dsp.me_cmp[size];
698 698
     chroma_cmpf= s->dsp.me_cmp[size+1];
... ...
@@ -777,7 +777,7 @@ static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
777 777
     int dia_size;
778 778
     LOAD_COMMON
779 779
     LOAD_COMMON2
780
-    int map_generation= c->map_generation;
780
+    unsigned map_generation = c->map_generation;
781 781
 
782 782
     cmpf= s->dsp.me_cmp[size];
783 783
     chroma_cmpf= s->dsp.me_cmp[size+1];
... ...
@@ -869,7 +869,7 @@ static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int
869 869
     int d;                   ///< the score (cmp + penalty) of any given mv
870 870
     int dmin;                /**< the best value of d, i.e. the score
871 871
                                corresponding to the mv stored in best[]. */
872
-    int map_generation;
872
+    unsigned map_generation;
873 873
     int penalty_factor;
874 874
     const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
875 875
     const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
... ...
@@ -997,7 +997,7 @@ static int epzs_motion_search4(MpegEncContext * s,
997 997
     MotionEstContext * const c= &s->me;
998 998
     int best[2]={0, 0};
999 999
     int d, dmin;
1000
-    int map_generation;
1000
+    unsigned map_generation;
1001 1001
     const int penalty_factor= c->penalty_factor;
1002 1002
     const int size=1;
1003 1003
     const int h=8;
... ...
@@ -1057,7 +1057,7 @@ static int epzs_motion_search2(MpegEncContext * s,
1057 1057
     MotionEstContext * const c= &s->me;
1058 1058
     int best[2]={0, 0};
1059 1059
     int d, dmin;
1060
-    int map_generation;
1060
+    unsigned map_generation;
1061 1061
     const int penalty_factor= c->penalty_factor;
1062 1062
     const int size=0; //FIXME pass as arg
1063 1063
     const int h=8;
... ...
@@ -156,7 +156,7 @@ typedef struct MotionEstContext{
156 156
     int best_bits;
157 157
     uint32_t *map;                     ///< map to avoid duplicate evaluations
158 158
     uint32_t *score_map;               ///< map to store the scores
159
-    int map_generation;
159
+    unsigned map_generation;
160 160
     int pre_penalty_factor;
161 161
     int penalty_factor;                /**< an estimate of the bits required to
162 162
                                         code a given mv value, e.g. (1,0) takes
... ...
@@ -34,17 +34,19 @@
34 34
 
35 35
 #include "libavutil/intmath.h"
36 36
 #include "avcodec.h"
37
-#include "dsputil.h"
37
+#include "proresdsp.h"
38 38
 #include "get_bits.h"
39 39
 
40
-#define BITS_PER_SAMPLE 10                              ///< output precision of that decoder
41
-#define BIAS     (1 << (BITS_PER_SAMPLE - 1))           ///< bias value for converting signed pixels into unsigned ones
42
-#define CLIP_MIN (1 << (BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
43
-#define CLIP_MAX (1 << BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
44
-
40
+typedef struct {
41
+    const uint8_t *index;            ///< pointers to the data of this slice
42
+    int slice_num;
43
+    int x_pos, y_pos;
44
+    int slice_width;
45
+    DECLARE_ALIGNED(16, DCTELEM, blocks[8 * 4 * 64]);
46
+} ProresThreadData;
45 47
 
46 48
 typedef struct {
47
-    DSPContext dsp;
49
+    ProresDSPContext dsp;
48 50
     AVFrame    picture;
49 51
     ScanTable  scantable;
50 52
     int        scantable_type;           ///< -1 = uninitialized, 0 = progressive, 1/2 = interlaced
... ...
@@ -57,9 +59,9 @@ typedef struct {
57 57
     int        prev_slice_sf;            ///< scalefactor of the previous decoded slice
58 58
     DECLARE_ALIGNED(16, int16_t, qmat_luma_scaled[64]);
59 59
     DECLARE_ALIGNED(16, int16_t, qmat_chroma_scaled[64]);
60
-    DECLARE_ALIGNED(16, DCTELEM, blocks[8 * 4 * 64]);
61 60
     int        total_slices;            ///< total number of slices in a picture
62
-    const uint8_t **slice_data_index;   ///< array of pointers to the data of each slice
61
+    ProresThreadData *slice_data;
62
+    int        pic_num;
63 63
     int        chroma_factor;
64 64
     int        mb_chroma_factor;
65 65
     int        num_chroma_blocks;       ///< number of chrominance blocks in a macroblock
... ...
@@ -100,12 +102,12 @@ static av_cold int decode_init(AVCodecContext *avctx)
100 100
     ProresContext *ctx = avctx->priv_data;
101 101
 
102 102
     ctx->total_slices     = 0;
103
-    ctx->slice_data_index = 0;
103
+    ctx->slice_data       = NULL;
104 104
 
105 105
     avctx->pix_fmt = PIX_FMT_YUV422P10; // set default pixel format
106 106
 
107
-    avctx->bits_per_raw_sample = BITS_PER_SAMPLE;
108
-    dsputil_init(&ctx->dsp, avctx);
107
+    avctx->bits_per_raw_sample = PRORES_BITS_PER_SAMPLE;
108
+    ff_proresdsp_init(&ctx->dsp);
109 109
 
110 110
     avctx->coded_frame = &ctx->picture;
111 111
     avcodec_get_frame_defaults(&ctx->picture);
... ...
@@ -271,9 +273,9 @@ static int decode_picture_header(ProresContext *ctx, const uint8_t *buf,
271 271
     }
272 272
 
273 273
     if (ctx->total_slices != num_slices) {
274
-        av_freep(&ctx->slice_data_index);
275
-        ctx->slice_data_index = av_malloc((num_slices + 1) * sizeof(uint8_t*));
276
-        if (!ctx->slice_data_index)
274
+        av_freep(&ctx->slice_data);
275
+        ctx->slice_data = av_malloc((num_slices + 1) * sizeof(ctx->slice_data[0]));
276
+        if (!ctx->slice_data)
277 277
             return AVERROR(ENOMEM);
278 278
         ctx->total_slices = num_slices;
279 279
     }
... ...
@@ -288,10 +290,10 @@ static int decode_picture_header(ProresContext *ctx, const uint8_t *buf,
288 288
     data_ptr = index_ptr + num_slices * 2;
289 289
 
290 290
     for (i = 0; i < num_slices; i++) {
291
-        ctx->slice_data_index[i] = data_ptr;
291
+        ctx->slice_data[i].index = data_ptr;
292 292
         data_ptr += AV_RB16(index_ptr + i * 2);
293 293
     }
294
-    ctx->slice_data_index[i] = data_ptr;
294
+    ctx->slice_data[i].index = data_ptr;
295 295
 
296 296
     if (data_ptr > buf + data_size) {
297 297
         av_log(avctx, AV_LOG_ERROR, "out of slice data\n");
... ...
@@ -449,52 +451,11 @@ static inline void decode_ac_coeffs(GetBitContext *gb, DCTELEM *out,
449 449
 }
450 450
 
451 451
 
452
-#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
453
-
454
-/**
455
- * Add bias value, clamp and output pixels of a slice
456
- */
457
-static void put_pixels(const DCTELEM *in, uint16_t *out, int stride,
458
-                       int mbs_per_slice, int blocks_per_mb)
459
-{
460
-    int mb, x, y, src_offset, dst_offset;
461
-    const DCTELEM *src1, *src2;
462
-    uint16_t *dst1, *dst2;
463
-
464
-    src1 = in;
465
-    src2 = in + (blocks_per_mb << 5);
466
-    dst1 = out;
467
-    dst2 = out + (stride << 3);
468
-
469
-    for (mb = 0; mb < mbs_per_slice; mb++) {
470
-        for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
471
-            for (x = 0; x < 8; x++) {
472
-                src_offset = (y << 3) + x;
473
-
474
-                dst1[dst_offset + x] = CLIP_AND_BIAS(src1[src_offset]);
475
-                dst2[dst_offset + x] = CLIP_AND_BIAS(src2[src_offset]);
476
-
477
-                if (blocks_per_mb > 2) {
478
-                    dst1[dst_offset + x + 8] =
479
-                        CLIP_AND_BIAS(src1[src_offset + 64]);
480
-                    dst2[dst_offset + x + 8] =
481
-                        CLIP_AND_BIAS(src2[src_offset + 64]);
482
-                }
483
-            }
484
-        }
485
-
486
-        src1 += blocks_per_mb << 6;
487
-        src2 += blocks_per_mb << 6;
488
-        dst1 += blocks_per_mb << 2;
489
-        dst2 += blocks_per_mb << 2;
490
-    }
491
-}
492
-
493
-
494 452
 /**
495 453
  * Decode a slice plane (luma or chroma).
496 454
  */
497
-static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
455
+static void decode_slice_plane(ProresContext *ctx, ProresThreadData *td,
456
+                               const uint8_t *buf,
498 457
                                int data_size, uint16_t *out_ptr,
499 458
                                int linesize, int mbs_per_slice,
500 459
                                int blocks_per_mb, int plane_size_factor,
... ...
@@ -502,43 +463,47 @@ static void decode_slice_plane(ProresContext *ctx, const uint8_t *buf,
502 502
 {
503 503
     GetBitContext gb;
504 504
     DCTELEM *block_ptr;
505
-    int i, blk_num, blocks_per_slice;
505
+    int mb_num, blocks_per_slice;
506 506
 
507 507
     blocks_per_slice = mbs_per_slice * blocks_per_mb;
508 508
 
509
-    memset(ctx->blocks, 0, 8 * 4 * 64 * sizeof(*ctx->blocks));
509
+    memset(td->blocks, 0, 8 * 4 * 64 * sizeof(*td->blocks));
510 510
 
511 511
     init_get_bits(&gb, buf, data_size << 3);
512 512
 
513
-    decode_dc_coeffs(&gb, ctx->blocks, blocks_per_slice);
513
+    decode_dc_coeffs(&gb, td->blocks, blocks_per_slice);
514 514
 
515
-    decode_ac_coeffs(&gb, ctx->blocks, blocks_per_slice,
515
+    decode_ac_coeffs(&gb, td->blocks, blocks_per_slice,
516 516
                      plane_size_factor, ctx->scantable.permutated);
517 517
 
518 518
     /* inverse quantization, inverse transform and output */
519
-    block_ptr = ctx->blocks;
520
-
521
-    for (blk_num = 0; blk_num < blocks_per_slice; blk_num++, block_ptr += 64) {
522
-        /* TODO: the correct solution shoud be (block_ptr[i] * qmat[i]) >> 1
523
-         * and the input of the inverse transform should be scaled by 2
524
-         * in order to avoid rounding errors.
525
-         * Due to the fact the existing Libav transforms are incompatible with
526
-         * that input I temporally introduced the coarse solution below... */
527
-        for (i = 0; i < 64; i++)
528
-            block_ptr[i] = (block_ptr[i] * qmat[i]) >> 2;
529
-
530
-        ctx->dsp.idct(block_ptr);
519
+    block_ptr = td->blocks;
520
+
521
+    for (mb_num = 0; mb_num < mbs_per_slice; mb_num++, out_ptr += blocks_per_mb * 4) {
522
+        ctx->dsp.idct_put(out_ptr,                    linesize, block_ptr, qmat);
523
+        block_ptr += 64;
524
+        if (blocks_per_mb > 2) {
525
+            ctx->dsp.idct_put(out_ptr + 8,            linesize, block_ptr, qmat);
526
+            block_ptr += 64;
527
+        }
528
+        ctx->dsp.idct_put(out_ptr + linesize * 4,     linesize, block_ptr, qmat);
529
+        block_ptr += 64;
530
+        if (blocks_per_mb > 2) {
531
+            ctx->dsp.idct_put(out_ptr + linesize * 4 + 8, linesize, block_ptr, qmat);
532
+            block_ptr += 64;
533
+        }
531 534
     }
532
-
533
-    put_pixels(ctx->blocks, out_ptr, linesize >> 1, mbs_per_slice,
534
-               blocks_per_mb);
535 535
 }
536 536
 
537 537
 
538
-static int decode_slice(ProresContext *ctx, int pic_num, int slice_num,
539
-                        int mb_x_pos, int mb_y_pos, int mbs_per_slice,
540
-                        AVCodecContext *avctx)
538
+static int decode_slice(AVCodecContext *avctx, ProresThreadData *td)
541 539
 {
540
+    ProresContext *ctx = avctx->priv_data;
541
+    int mb_x_pos  = td->x_pos;
542
+    int mb_y_pos  = td->y_pos;
543
+    int pic_num   = ctx->pic_num;
544
+    int slice_num = td->slice_num;
545
+    int mbs_per_slice = td->slice_width;
542 546
     const uint8_t *buf;
543 547
     uint8_t *y_data, *u_data, *v_data;
544 548
     AVFrame *pic = avctx->coded_frame;
... ...
@@ -546,8 +511,8 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num,
546 546
     int slice_data_size, hdr_size, y_data_size, u_data_size, v_data_size;
547 547
     int y_linesize, u_linesize, v_linesize;
548 548
 
549
-    buf             = ctx->slice_data_index[slice_num];
550
-    slice_data_size = ctx->slice_data_index[slice_num + 1] - buf;
549
+    buf             = ctx->slice_data[slice_num].index;
550
+    slice_data_size = ctx->slice_data[slice_num + 1].index - buf;
551 551
 
552 552
     slice_width_factor = av_log2(mbs_per_slice);
553 553
 
... ...
@@ -593,20 +558,20 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num,
593 593
     if (ctx->qmat_changed || sf != ctx->prev_slice_sf) {
594 594
         ctx->prev_slice_sf = sf;
595 595
         for (i = 0; i < 64; i++) {
596
-            ctx->qmat_luma_scaled[i]   = ctx->qmat_luma[i]   * sf;
597
-            ctx->qmat_chroma_scaled[i] = ctx->qmat_chroma[i] * sf;
596
+            ctx->qmat_luma_scaled[ctx->dsp.idct_permutation[i]]   = ctx->qmat_luma[i]   * sf;
597
+            ctx->qmat_chroma_scaled[ctx->dsp.idct_permutation[i]] = ctx->qmat_chroma[i] * sf;
598 598
         }
599 599
     }
600 600
 
601 601
     /* decode luma plane */
602
-    decode_slice_plane(ctx, buf + hdr_size, y_data_size,
602
+    decode_slice_plane(ctx, td, buf + hdr_size, y_data_size,
603 603
                        (uint16_t*) (y_data + (mb_y_pos << 4) * y_linesize +
604 604
                                     (mb_x_pos << 5)), y_linesize,
605 605
                        mbs_per_slice, 4, slice_width_factor + 2,
606 606
                        ctx->qmat_luma_scaled);
607 607
 
608 608
     /* decode U chroma plane */
609
-    decode_slice_plane(ctx, buf + hdr_size + y_data_size, u_data_size,
609
+    decode_slice_plane(ctx, td, buf + hdr_size + y_data_size, u_data_size,
610 610
                        (uint16_t*) (u_data + (mb_y_pos << 4) * u_linesize +
611 611
                                     (mb_x_pos << ctx->mb_chroma_factor)),
612 612
                        u_linesize, mbs_per_slice, ctx->num_chroma_blocks,
... ...
@@ -614,7 +579,7 @@ static int decode_slice(ProresContext *ctx, int pic_num, int slice_num,
614 614
                        ctx->qmat_chroma_scaled);
615 615
 
616 616
     /* decode V chroma plane */
617
-    decode_slice_plane(ctx, buf + hdr_size + y_data_size + u_data_size,
617
+    decode_slice_plane(ctx, td, buf + hdr_size + y_data_size + u_data_size,
618 618
                        v_data_size,
619 619
                        (uint16_t*) (v_data + (mb_y_pos << 4) * v_linesize +
620 620
                                     (mb_x_pos << ctx->mb_chroma_factor)),
... ...
@@ -633,6 +598,7 @@ static int decode_picture(ProresContext *ctx, int pic_num,
633 633
 
634 634
     slice_num = 0;
635 635
 
636
+    ctx->pic_num = pic_num;
636 637
     for (y_pos = 0; y_pos < ctx->num_y_mbs; y_pos++) {
637 638
         slice_width = 1 << ctx->slice_width_factor;
638 639
 
... ...
@@ -641,15 +607,18 @@ static int decode_picture(ProresContext *ctx, int pic_num,
641 641
             while (ctx->num_x_mbs - x_pos < slice_width)
642 642
                 slice_width >>= 1;
643 643
 
644
-            if (decode_slice(ctx, pic_num, slice_num, x_pos, y_pos,
645
-                             slice_width, avctx) < 0)
646
-                return -1;
644
+            ctx->slice_data[slice_num].slice_num   = slice_num;
645
+            ctx->slice_data[slice_num].x_pos       = x_pos;
646
+            ctx->slice_data[slice_num].y_pos       = y_pos;
647
+            ctx->slice_data[slice_num].slice_width = slice_width;
647 648
 
648 649
             slice_num++;
649 650
         }
650 651
     }
651 652
 
652
-    return 0;
653
+    return avctx->execute(avctx, (void *) decode_slice,
654
+                          ctx->slice_data, NULL, slice_num,
655
+                          sizeof(ctx->slice_data[0]));
653 656
 }
654 657
 
655 658
 
... ...
@@ -712,7 +681,7 @@ static av_cold int decode_close(AVCodecContext *avctx)
712 712
     if (ctx->picture.data[0])
713 713
         avctx->release_buffer(avctx, &ctx->picture);
714 714
 
715
-    av_freep(&ctx->slice_data_index);
715
+    av_freep(&ctx->slice_data);
716 716
 
717 717
     return 0;
718 718
 }
... ...
@@ -726,6 +695,6 @@ AVCodec ff_prores_lgpl_decoder = {
726 726
     .init           = decode_init,
727 727
     .close          = decode_close,
728 728
     .decode         = decode_frame,
729
-    .capabilities   = CODEC_CAP_DR1,
729
+    .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_SLICE_THREADS,
730 730
     .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)")
731 731
 };
732 732
new file mode 100644
... ...
@@ -0,0 +1,63 @@
0
+/*
1
+ * Apple ProRes compatible decoder
2
+ *
3
+ * Copyright (c) 2010-2011 Maxim Poliakovski
4
+ *
5
+ * This file is part of Libav.
6
+ *
7
+ * Libav is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * Libav is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with Libav; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#include "proresdsp.h"
23
+#include "simple_idct.h"
24
+
25
+#define BIAS     (1 << (PRORES_BITS_PER_SAMPLE - 1))           ///< bias value for converting signed pixels into unsigned ones
26
+#define CLIP_MIN (1 << (PRORES_BITS_PER_SAMPLE - 8))           ///< minimum value for clipping resulting pixels
27
+#define CLIP_MAX (1 << PRORES_BITS_PER_SAMPLE) - CLIP_MIN - 1  ///< maximum value for clipping resulting pixels
28
+
29
+#define CLIP_AND_BIAS(x) (av_clip((x) + BIAS, CLIP_MIN, CLIP_MAX))
30
+
31
+/**
32
+ * Add bias value, clamp and output pixels of a slice
33
+ */
34
+static void put_pixels(uint16_t *dst, int stride, const DCTELEM *in)
35
+{
36
+    int x, y, src_offset, dst_offset;
37
+
38
+    for (y = 0, dst_offset = 0; y < 8; y++, dst_offset += stride) {
39
+        for (x = 0; x < 8; x++) {
40
+            src_offset = (y << 3) + x;
41
+
42
+            dst[dst_offset + x] = CLIP_AND_BIAS(in[src_offset]);
43
+        }
44
+    }
45
+}
46
+
47
+static void prores_idct_put_c(uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat)
48
+{
49
+    ff_prores_idct(block, qmat);
50
+    put_pixels(out, linesize >> 1, block);
51
+}
52
+
53
+void ff_proresdsp_init(ProresDSPContext *dsp)
54
+{
55
+    dsp->idct_put = prores_idct_put_c;
56
+    dsp->idct_permutation_type = FF_NO_IDCT_PERM;
57
+
58
+    if (HAVE_MMX) ff_proresdsp_x86_init(dsp);
59
+
60
+    ff_init_scantable_permutation(dsp->idct_permutation,
61
+                                  dsp->idct_permutation_type);
62
+}
0 63
new file mode 100644
... ...
@@ -0,0 +1,40 @@
0
+/*
1
+ * Apple ProRes compatible decoder
2
+ *
3
+ * Copyright (c) 2010-2011 Maxim Poliakovski
4
+ *
5
+ * This file is part of Libav.
6
+ *
7
+ * Libav is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * Libav is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with Libav; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#ifndef AVCODEC_PRORESDSP_H
23
+#define AVCODEC_PRORESDSP_H
24
+
25
+#include "dsputil.h"
26
+
27
+#define PRORES_BITS_PER_SAMPLE 10 ///< output precision of prores decoder
28
+
29
+typedef struct {
30
+    int idct_permutation_type;
31
+    uint8_t idct_permutation[64];
32
+    void (* idct_put) (uint16_t *out, int linesize, DCTELEM *block, const int16_t *qmat);
33
+} ProresDSPContext;
34
+
35
+void ff_proresdsp_init(ProresDSPContext *dsp);
36
+
37
+void ff_proresdsp_x86_init(ProresDSPContext *dsp);
38
+
39
+#endif /* AVCODEC_PRORESDSP_H */
... ...
@@ -221,3 +221,20 @@ void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
221 221
         idct4col_add(dest + i, line_size, block + i);
222 222
     }
223 223
 }
224
+
225
+void ff_prores_idct(DCTELEM *block, const int16_t *qmat)
226
+{
227
+    int i;
228
+
229
+    for (i = 0; i < 64; i++)
230
+        block[i] *= qmat[i];
231
+
232
+    for (i = 0; i < 8; i++)
233
+        idctRowCondDC_10(block + i*8);
234
+
235
+    for (i = 0; i < 64; i++)
236
+        block[i] >>= 2;
237
+
238
+    for (i = 0; i < 8; i++)
239
+        idctSparseCol_10(block + i);
240
+}
... ...
@@ -38,6 +38,12 @@ void ff_simple_idct_8(DCTELEM *block);
38 38
 void ff_simple_idct_put_10(uint8_t *dest, int line_size, DCTELEM *block);
39 39
 void ff_simple_idct_add_10(uint8_t *dest, int line_size, DCTELEM *block);
40 40
 void ff_simple_idct_10(DCTELEM *block);
41
+/**
42
+ * Special version of ff_simple_idct_10() which does dequantization
43
+ * and scales by a factor of 2 more between the two IDCTs to account
44
+ * for larger scale of input coefficients.
45
+ */
46
+void ff_prores_idct(DCTELEM *block, const int16_t *qmat);
41 47
 
42 48
 void ff_simple_idct_mmx(int16_t *block);
43 49
 void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, int16_t *block);
... ...
@@ -194,14 +194,16 @@ static void decode_parameters(SiprParameters* parms, GetBitContext *pgb,
194 194
 {
195 195
     int i, j;
196 196
 
197
-    parms->ma_pred_switch           = get_bits(pgb, p->ma_predictor_bits);
197
+    if (p->ma_predictor_bits)
198
+        parms->ma_pred_switch       = get_bits(pgb, p->ma_predictor_bits);
198 199
 
199 200
     for (i = 0; i < 5; i++)
200 201
         parms->vq_indexes[i]        = get_bits(pgb, p->vq_indexes_bits[i]);
201 202
 
202 203
     for (i = 0; i < p->subframe_count; i++) {
203 204
         parms->pitch_delay[i]       = get_bits(pgb, p->pitch_delay_bits[i]);
204
-        parms->gp_index[i]          = get_bits(pgb, p->gp_index_bits);
205
+        if (p->gp_index_bits)
206
+            parms->gp_index[i]      = get_bits(pgb, p->gp_index_bits);
205 207
 
206 208
         for (j = 0; j < p->number_of_fc_indexes; j++)
207 209
             parms->fc_indexes[i][j] = get_bits(pgb, p->fc_index_bits[j]);
... ...
@@ -560,6 +560,10 @@ static av_cold int decode_end(AVCodecContext *avctx)
560 560
 
561 561
 static av_cold int smka_decode_init(AVCodecContext *avctx)
562 562
 {
563
+    if (avctx->channels < 1 || avctx->channels > 2) {
564
+        av_log(avctx, AV_LOG_ERROR, "invalid number of channels\n");
565
+        return AVERROR(EINVAL);
566
+    }
563 567
     avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
564 568
     avctx->sample_fmt = avctx->bits_per_coded_sample == 8 ? AV_SAMPLE_FMT_U8 : AV_SAMPLE_FMT_S16;
565 569
     return 0;
... ...
@@ -583,6 +587,11 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
583 583
     int bits, stereo;
584 584
     int pred[2] = {0, 0};
585 585
 
586
+    if (buf_size <= 4) {
587
+        av_log(avctx, AV_LOG_ERROR, "packet is too small\n");
588
+        return AVERROR(EINVAL);
589
+    }
590
+
586 591
     unp_size = AV_RL32(buf);
587 592
 
588 593
     init_get_bits(&gb, buf + 4, (buf_size - 4) * 8);
... ...
@@ -598,6 +607,14 @@ static int smka_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
598 598
         av_log(avctx, AV_LOG_ERROR, "Frame is too large to fit in buffer\n");
599 599
         return -1;
600 600
     }
601
+    if (stereo ^ (avctx->channels != 1)) {
602
+        av_log(avctx, AV_LOG_ERROR, "channels mismatch\n");
603
+        return AVERROR(EINVAL);
604
+    }
605
+    if (bits && avctx->sample_fmt == AV_SAMPLE_FMT_U8) {
606
+        av_log(avctx, AV_LOG_ERROR, "sample format mismatch\n");
607
+        return AVERROR(EINVAL);
608
+    }
601 609
 
602 610
     memset(vlc, 0, sizeof(VLC) * 4);
603 611
     memset(h, 0, sizeof(HuffContext) * 4);
... ...
@@ -34,6 +34,8 @@ MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
34 34
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc_yasm.o
35 35
 MMX-OBJS-$(CONFIG_GPL)                 += x86/idct_mmx.o
36 36
 MMX-OBJS-$(CONFIG_LPC)                 += x86/lpc_mmx.o
37
+YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER)     += x86/proresdsp.o
38
+MMX-OBJS-$(CONFIG_PRORES_LGPL_DECODER)      += x86/proresdsp-init.o
37 39
 MMX-OBJS-$(CONFIG_DWT)                 += x86/snowdsp_mmx.o
38 40
 MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o
39 41
 YASM-OBJS-$(CONFIG_VP3_DECODER)        += x86/vp3dsp.o
... ...
@@ -64,6 +64,8 @@ DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_64 ) = {0x0040004000400040ULL, 0x00400
64 64
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
65 65
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
66 66
 DECLARE_ALIGNED(8,  const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
67
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_512) = {0x0200020002000200ULL, 0x0200020002000200ULL};
68
+DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_1019)= {0x03FB03FB03FB03FBULL, 0x03FB03FB03FB03FBULL};
67 69
 
68 70
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_0  ) = {0x0000000000000000ULL, 0x0000000000000000ULL};
69 71
 DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_1  ) = {0x0101010101010101ULL, 0x0101010101010101ULL};
70 72
new file mode 100644
... ...
@@ -0,0 +1,54 @@
0
+/*
1
+ * Apple ProRes compatible decoder
2
+ *
3
+ * Copyright (c) 2010-2011 Maxim Poliakovski
4
+ *
5
+ * This file is part of Libav.
6
+ *
7
+ * Libav is free software; you can redistribute it and/or
8
+ * modify it under the terms of the GNU Lesser General Public
9
+ * License as published by the Free Software Foundation; either
10
+ * version 2.1 of the License, or (at your option) any later version.
11
+ *
12
+ * Libav is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
+ * Lesser General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU Lesser General Public
18
+ * License along with Libav; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
+ */
21
+
22
+#include "libavcodec/proresdsp.h"
23
+
24
+void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
25
+                                DCTELEM *block);
26
+void ff_prores_idct_put_10_sse4(uint16_t *dst, int linesize,
27
+                                DCTELEM *block);
28
+void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize,
29
+                                DCTELEM *block);
30
+
31
+void ff_proresdsp_x86_init(ProresDSPContext *dsp)
32
+{
33
+#if ARCH_X86_64
34
+    int flags = av_get_cpu_flags();
35
+
36
+    if (flags & AV_CPU_FLAG_SSE2) {
37
+        dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
38
+        dsp->idct_put = ff_prores_idct_put_10_sse2;
39
+    }
40
+
41
+    if (flags & AV_CPU_FLAG_SSE4) {
42
+        dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
43
+        dsp->idct_put = ff_prores_idct_put_10_sse4;
44
+    }
45
+
46
+#if HAVE_AVX
47
+    if (flags & AV_CPU_FLAG_AVX) {
48
+        dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
49
+        dsp->idct_put = ff_prores_idct_put_10_avx;
50
+    }
51
+#endif
52
+#endif
53
+}
0 54
new file mode 100644
... ...
@@ -0,0 +1,432 @@
0
+;******************************************************************************
1
+;* x86-SIMD-optimized IDCT for prores
2
+;* this is identical to "simple" IDCT except for the clip range
3
+;*
4
+;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
5
+;*
6
+;* This file is part of Libav.
7
+;*
8
+;* Libav is free software; you can redistribute it and/or
9
+;* modify it under the terms of the GNU Lesser General Public
10
+;* License as published by the Free Software Foundation; either
11
+;* version 2.1 of the License, or (at your option) any later version.
12
+;*
13
+;* Libav is distributed in the hope that it will be useful,
14
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
+;* Lesser General Public License for more details.
17
+;*
18
+;* You should have received a copy of the GNU Lesser General Public
19
+;* License along with Libav; if not, write to the Free Software
20
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
+;******************************************************************************
22
+
23
+%include "x86inc.asm"
24
+%include "x86util.asm"
25
+
26
+%define W1sh2 22725 ; W1 = 90901 = 22725<<2 + 1
27
+%define W2sh2 21407 ; W2 = 85627 = 21407<<2 - 1
28
+%define W3sh2 19265 ; W3 = 77062 = 19265<<2 + 2
29
+%define W4sh2 16384 ; W4 = 65535 = 16384<<2 - 1
30
+%define W5sh2 12873 ; W5 = 51491 = 12873<<2 - 1
31
+%define W6sh2  8867 ; W6 = 35468 =  8867<<2
32
+%define W7sh2  4520 ; W7 = 18081 =  4520<<2 + 1
33
+
34
+%ifdef ARCH_X86_64
35
+
36
+SECTION_RODATA
37
+
38
+w4_plus_w2: times 4 dw W4sh2, +W2sh2
39
+w4_min_w2:  times 4 dw W4sh2, -W2sh2
40
+w4_plus_w6: times 4 dw W4sh2, +W6sh2
41
+w4_min_w6:  times 4 dw W4sh2, -W6sh2
42
+w1_plus_w3: times 4 dw W1sh2, +W3sh2
43
+w3_min_w1:  times 4 dw W3sh2, -W1sh2
44
+w7_plus_w3: times 4 dw W7sh2, +W3sh2
45
+w3_min_w7:  times 4 dw W3sh2, -W7sh2
46
+w1_plus_w5: times 4 dw W1sh2, +W5sh2
47
+w5_min_w1:  times 4 dw W5sh2, -W1sh2
48
+w5_plus_w7: times 4 dw W5sh2, +W7sh2
49
+w7_min_w5:  times 4 dw W7sh2, -W5sh2
50
+row_round:  times 8 dw (1<<14)
51
+
52
+cextern pw_4
53
+cextern pw_8
54
+cextern pw_512
55
+cextern pw_1019
56
+
57
+section .text align=16
58
+
59
+; interleave data while maintaining source
60
+; %1=type, %2=dstlo, %3=dsthi, %4=src, %5=interleave
61
+%macro SBUTTERFLY3 5
62
+    punpckl%1   m%2, m%4, m%5
63
+    punpckh%1   m%3, m%4, m%5
64
+%endmacro
65
+
66
+; %1/%2=src1/dst1, %3/%4=dst2, %5/%6=src2, %7=shift
67
+; action: %3/%4 = %1/%2 - %5/%6; %1/%2 += %5/%6
68
+;         %1/%2/%3/%4 >>= %7; dword -> word (in %1/%3)
69
+%macro SUMSUB_SHPK 7
70
+    psubd       %3,  %1,  %5       ; { a0 - b0 }[0-3]
71
+    psubd       %4,  %2,  %6       ; { a0 - b0 }[4-7]
72
+    paddd       %1,  %5            ; { a0 + b0 }[0-3]
73
+    paddd       %2,  %6            ; { a0 + b0 }[4-7]
74
+    psrad       %1,  %7
75
+    psrad       %2,  %7
76
+    psrad       %3,  %7
77
+    psrad       %4,  %7
78
+    packssdw    %1,  %2            ; row[0]
79
+    packssdw    %3,  %4            ; row[7]
80
+%endmacro
81
+
82
+; %1 = row or col (for rounding variable)
83
+; %2 = number of bits to shift at the end
84
+; %3 = optimization
85
+%macro IDCT_1D 3
86
+    ; a0 = (W4 * row[0]) + (1 << (15 - 1));
87
+    ; a1 = a0;
88
+    ; a2 = a0;
89
+    ; a3 = a0;
90
+    ; a0 += W2 * row[2];
91
+    ; a1 += W6 * row[2];
92
+    ; a2 -= W6 * row[2];
93
+    ; a3 -= W2 * row[2];
94
+%ifidn %1, col
95
+    paddw       m10,[pw_8]
96
+%endif
97
+    SBUTTERFLY3 wd,  0,  1, 10,  8 ; { row[0], row[2] }[0-3]/[4-7]
98
+%ifidn %1, row
99
+    psubw       m10,[row_round]
100
+%endif
101
+    SIGNEXTEND  m8,  m9,  m14      ; { row[2] }[0-3] / [4-7]
102
+    SIGNEXTEND  m10, m11, m14      ; { row[0] }[0-3] / [4-7]
103
+    pmaddwd     m2,  m0, [w4_plus_w6]
104
+    pmaddwd     m3,  m1, [w4_plus_w6]
105
+    pmaddwd     m4,  m0, [w4_min_w6]
106
+    pmaddwd     m5,  m1, [w4_min_w6]
107
+    pmaddwd     m6,  m0, [w4_min_w2]
108
+    pmaddwd     m7,  m1, [w4_min_w2]
109
+    pmaddwd     m0, [w4_plus_w2]
110
+    pmaddwd     m1, [w4_plus_w2]
111
+    pslld       m2,  2
112
+    pslld       m3,  2
113
+    pslld       m4,  2
114
+    pslld       m5,  2
115
+    pslld       m6,  2
116
+    pslld       m7,  2
117
+    pslld       m0,  2
118
+    pslld       m1,  2
119
+
120
+    ; a0: -1*row[0]-1*row[2]
121
+    ; a1: -1*row[0]
122
+    ; a2: -1*row[0]
123
+    ; a3: -1*row[0]+1*row[2]
124
+    psubd       m2,  m10           ; a1[0-3]
125
+    psubd       m3,  m11           ; a1[4-7]
126
+    psubd       m4,  m10           ; a2[0-3]
127
+    psubd       m5,  m11           ; a2[4-7]
128
+    psubd       m0,  m10
129
+    psubd       m1,  m11
130
+    psubd       m6,  m10
131
+    psubd       m7,  m11
132
+    psubd       m0,  m8            ; a0[0-3]
133
+    psubd       m1,  m9            ; a0[4-7]
134
+    paddd       m6,  m8            ; a3[0-3]
135
+    paddd       m7,  m9            ; a3[4-7]
136
+
137
+    ; a0 +=   W4*row[4] + W6*row[6]; i.e. -1*row[4]
138
+    ; a1 -=   W4*row[4] + W2*row[6]; i.e. -1*row[4]-1*row[6]
139
+    ; a2 -=   W4*row[4] - W2*row[6]; i.e. -1*row[4]+1*row[6]
140
+    ; a3 +=   W4*row[4] - W6*row[6]; i.e. -1*row[4]
141
+    SBUTTERFLY3 wd,  8,  9, 13, 12 ; { row[4], row[6] }[0-3]/[4-7]
142
+    SIGNEXTEND  m13, m14, m10      ; { row[4] }[0-3] / [4-7]
143
+    pmaddwd     m10, m8, [w4_plus_w6]
144
+    pmaddwd     m11, m9, [w4_plus_w6]
145
+    pslld       m10, 2
146
+    pslld       m11, 2
147
+    psubd       m10,  m13
148
+    psubd       m11,  m14
149
+    paddd       m0,  m10            ; a0[0-3]
150
+    paddd       m1,  m11            ; a0[4-7]
151
+    pmaddwd     m10, m8, [w4_min_w6]
152
+    pmaddwd     m11, m9, [w4_min_w6]
153
+    pslld       m10, 2
154
+    pslld       m11, 2
155
+    psubd       m10, m13
156
+    psubd       m11, m14
157
+    paddd       m6,  m10           ; a3[0-3]
158
+    paddd       m7,  m11           ; a3[4-7]
159
+    pmaddwd     m10, m8, [w4_min_w2]
160
+    pmaddwd     m11, m9, [w4_min_w2]
161
+    pmaddwd     m8, [w4_plus_w2]
162
+    pmaddwd     m9, [w4_plus_w2]
163
+    pslld       m10, 2
164
+    pslld       m11, 2
165
+    pslld       m8,  2
166
+    pslld       m9,  2
167
+    psubd       m10, m13
168
+    psubd       m11, m14
169
+    psubd       m8,  m13
170
+    psubd       m9,  m14
171
+    psubd       m4,  m10           ; a2[0-3] intermediate
172
+    psubd       m5,  m11           ; a2[4-7] intermediate
173
+    psubd       m2,  m8            ; a1[0-3] intermediate
174
+    psubd       m3,  m9            ; a1[4-7] intermediate
175
+    SIGNEXTEND  m12, m13, m10      ; { row[6] }[0-3] / [4-7]
176
+    psubd       m4,  m12           ; a2[0-3]
177
+    psubd       m5,  m13           ; a2[4-7]
178
+    paddd       m2,  m12           ; a1[0-3]
179
+    paddd       m3,  m13           ; a1[4-7]
180
+
181
+    ; load/store
182
+    mova   [r2+  0], m0
183
+    mova   [r2+ 32], m2
184
+    mova   [r2+ 64], m4
185
+    mova   [r2+ 96], m6
186
+    mova        m10,[r2+ 16]       ; { row[1] }[0-7]
187
+    mova        m8, [r2+ 48]       ; { row[3] }[0-7]
188
+    mova        m13,[r2+ 80]       ; { row[5] }[0-7]
189
+    mova        m14,[r2+112]       ; { row[7] }[0-7]
190
+    mova   [r2+ 16], m1
191
+    mova   [r2+ 48], m3
192
+    mova   [r2+ 80], m5
193
+    mova   [r2+112], m7
194
+%ifidn %1, row
195
+    pmullw      m10,[r3+ 16]
196
+    pmullw      m8, [r3+ 48]
197
+    pmullw      m13,[r3+ 80]
198
+    pmullw      m14,[r3+112]
199
+%endif
200
+
201
+    ; b0 = MUL(W1, row[1]);
202
+    ; MAC(b0, W3, row[3]);
203
+    ; b1 = MUL(W3, row[1]);
204
+    ; MAC(b1, -W7, row[3]);
205
+    ; b2 = MUL(W5, row[1]);
206
+    ; MAC(b2, -W1, row[3]);
207
+    ; b3 = MUL(W7, row[1]);
208
+    ; MAC(b3, -W5, row[3]);
209
+    SBUTTERFLY3 wd,  0,  1, 10, 8  ; { row[1], row[3] }[0-3]/[4-7]
210
+    SIGNEXTEND  m10, m11, m12      ; { row[1] }[0-3] / [4-7]
211
+    SIGNEXTEND  m8,  m9,  m12      ; { row[3] }[0-3] / [4-7]
212
+    pmaddwd     m2,  m0, [w3_min_w7]
213
+    pmaddwd     m3,  m1, [w3_min_w7]
214
+    pmaddwd     m4,  m0, [w5_min_w1]
215
+    pmaddwd     m5,  m1, [w5_min_w1]
216
+    pmaddwd     m6,  m0, [w7_min_w5]
217
+    pmaddwd     m7,  m1, [w7_min_w5]
218
+    pmaddwd     m0, [w1_plus_w3]
219
+    pmaddwd     m1, [w1_plus_w3]
220
+    pslld       m2,  2
221
+    pslld       m3,  2
222
+    pslld       m4,  2
223
+    pslld       m5,  2
224
+    pslld       m6,  2
225
+    pslld       m7,  2
226
+    pslld       m0,  2
227
+    pslld       m1,  2
228
+
229
+    ; b0: +1*row[1]+2*row[3]
230
+    ; b1: +2*row[1]-1*row[3]
231
+    ; b2: -1*row[1]-1*row[3]
232
+    ; b3: +1*row[1]+1*row[3]
233
+    psubd       m2,  m8
234
+    psubd       m3,  m9
235
+    paddd       m0,  m8
236
+    paddd       m1,  m9
237
+    paddd       m8,  m10           ; { row[1] + row[3] }[0-3]
238
+    paddd       m9,  m11           ; { row[1] + row[3] }[4-7]
239
+    paddd       m10, m10
240
+    paddd       m11, m11
241
+    paddd       m0,  m8            ; b0[0-3]
242
+    paddd       m1,  m9            ; b0[4-7]
243
+    paddd       m2,  m10           ; b1[0-3]
244
+    paddd       m3,  m11           ; b2[4-7]
245
+    psubd       m4,  m8            ; b2[0-3]
246
+    psubd       m5,  m9            ; b2[4-7]
247
+    paddd       m6,  m8            ; b3[0-3]
248
+    paddd       m7,  m9            ; b3[4-7]
249
+
250
+    ; MAC(b0,  W5, row[5]);
251
+    ; MAC(b0,  W7, row[7]);
252
+    ; MAC(b1, -W1, row[5]);
253
+    ; MAC(b1, -W5, row[7]);
254
+    ; MAC(b2,  W7, row[5]);
255
+    ; MAC(b2,  W3, row[7]);
256
+    ; MAC(b3,  W3, row[5]);
257
+    ; MAC(b3, -W1, row[7]);
258
+    SBUTTERFLY3 wd,  8,  9, 13, 14 ; { row[5], row[7] }[0-3]/[4-7]
259
+    SIGNEXTEND  m13, m12, m11      ; { row[5] }[0-3] / [4-7]
260
+    SIGNEXTEND  m14, m11, m10      ; { row[7] }[0-3] / [4-7]
261
+
262
+    ; b0: -1*row[5]+1*row[7]
263
+    ; b1: -1*row[5]+1*row[7]
264
+    ; b2: +1*row[5]+2*row[7]
265
+    ; b3: +2*row[5]-1*row[7]
266
+    paddd       m4,  m13
267
+    paddd       m5,  m12
268
+    paddd       m6,  m13
269
+    paddd       m7,  m12
270
+    psubd       m13, m14           ; { row[5] - row[7] }[0-3]
271
+    psubd       m12, m11           ; { row[5] - row[7] }[4-7]
272
+    paddd       m14, m14
273
+    paddd       m11, m11
274
+    psubd       m0,  m13
275
+    psubd       m1,  m12
276
+    psubd       m2,  m13
277
+    psubd       m3,  m12
278
+    paddd       m4,  m14
279
+    paddd       m5,  m11
280
+    paddd       m6,  m13
281
+    paddd       m7,  m12
282
+
283
+    pmaddwd     m10, m8, [w1_plus_w5]
284
+    pmaddwd     m11, m9, [w1_plus_w5]
285
+    pmaddwd     m12, m8, [w5_plus_w7]
286
+    pmaddwd     m13, m9, [w5_plus_w7]
287
+    pslld       m10, 2
288
+    pslld       m11, 2
289
+    pslld       m12,  2
290
+    pslld       m13,  2
291
+    psubd       m2,  m10           ; b1[0-3]
292
+    psubd       m3,  m11           ; b1[4-7]
293
+    paddd       m0,  m12            ; b0[0-3]
294
+    paddd       m1,  m13            ; b0[4-7]
295
+    pmaddwd     m12, m8, [w7_plus_w3]
296
+    pmaddwd     m13, m9, [w7_plus_w3]
297
+    pmaddwd     m8, [w3_min_w1]
298
+    pmaddwd     m9, [w3_min_w1]
299
+    pslld       m12, 2
300
+    pslld       m13, 2
301
+    pslld       m8,  2
302
+    pslld       m9,  2
303
+    paddd       m4,  m12           ; b2[0-3]
304
+    paddd       m5,  m13           ; b2[4-7]
305
+    paddd       m6,  m8            ; b3[0-3]
306
+    paddd       m7,  m9            ; b3[4-7]
307
+
308
+    ; row[0] = (a0 + b0) >> 15;
309
+    ; row[7] = (a0 - b0) >> 15;
310
+    ; row[1] = (a1 + b1) >> 15;
311
+    ; row[6] = (a1 - b1) >> 15;
312
+    ; row[2] = (a2 + b2) >> 15;
313
+    ; row[5] = (a2 - b2) >> 15;
314
+    ; row[3] = (a3 + b3) >> 15;
315
+    ; row[4] = (a3 - b3) >> 15;
316
+    mova        m8, [r2+ 0]        ; a0[0-3]
317
+    mova        m9, [r2+16]        ; a0[4-7]
318
+    SUMSUB_SHPK m8,  m9,  m10, m11, m0,  m1,  %2
319
+    mova        m0, [r2+32]        ; a1[0-3]
320
+    mova        m1, [r2+48]        ; a1[4-7]
321
+    SUMSUB_SHPK m0,  m1,  m9,  m11, m2,  m3,  %2
322
+    mova        m1, [r2+64]        ; a2[0-3]
323
+    mova        m2, [r2+80]        ; a2[4-7]
324
+    SUMSUB_SHPK m1,  m2,  m11, m3,  m4,  m5,  %2
325
+    mova        m2, [r2+96]        ; a3[0-3]
326
+    mova        m3, [r2+112]       ; a3[4-7]
327
+    SUMSUB_SHPK m2,  m3,  m4,  m5,  m6,  m7,  %2
328
+%endmacro
329
+
330
+; void prores_idct_put_10_<opt>(uint8_t *pixels, int stride,
331
+;                               DCTELEM *block, const int16_t *qmat);
332
+%macro idct_put_fn 2
333
+cglobal prores_idct_put_10_%1, 4, 4, %2
334
+    movsxd      r1,  r1d
335
+    pxor        m15, m15           ; zero
336
+
337
+    ; for (i = 0; i < 8; i++)
338
+    ;     idctRowCondDC(block + i*8);
339
+    mova        m10,[r2+ 0]        ; { row[0] }[0-7]
340
+    mova        m8, [r2+32]        ; { row[2] }[0-7]
341
+    mova        m13,[r2+64]        ; { row[4] }[0-7]
342
+    mova        m12,[r2+96]        ; { row[6] }[0-7]
343
+
344
+    pmullw      m10,[r3+ 0]
345
+    pmullw      m8, [r3+32]
346
+    pmullw      m13,[r3+64]
347
+    pmullw      m12,[r3+96]
348
+
349
+    IDCT_1D     row, 17,  %1
350
+
351
+    ; transpose for second part of IDCT
352
+    TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3
353
+    mova   [r2+ 16], m0
354
+    mova   [r2+ 48], m2
355
+    mova   [r2+ 80], m11
356
+    mova   [r2+112], m10
357
+    SWAP         8,  10
358
+    SWAP         1,   8
359
+    SWAP         4,  13
360
+    SWAP         9,  12
361
+
362
+    ; for (i = 0; i < 8; i++)
363
+    ;     idctSparseColAdd(dest + i, line_size, block + i);
364
+    IDCT_1D     col, 20,  %1
365
+
366
+    ; clip/store
367
+    mova        m6, [pw_512]
368
+    mova        m3, [pw_4]
369
+    mova        m5, [pw_1019]
370
+    paddw       m8,  m6
371
+    paddw       m0,  m6
372
+    paddw       m1,  m6
373
+    paddw       m2,  m6
374
+    paddw       m4,  m6
375
+    paddw       m11, m6
376
+    paddw       m9,  m6
377
+    paddw       m10, m6
378
+    pmaxsw      m8,  m3
379
+    pmaxsw      m0,  m3
380
+    pmaxsw      m1,  m3
381
+    pmaxsw      m2,  m3
382
+    pmaxsw      m4,  m3
383
+    pmaxsw      m11, m3
384
+    pmaxsw      m9,  m3
385
+    pmaxsw      m10, m3
386
+    pminsw      m8,  m5
387
+    pminsw      m0,  m5
388
+    pminsw      m1,  m5
389
+    pminsw      m2,  m5
390
+    pminsw      m4,  m5
391
+    pminsw      m11, m5
392
+    pminsw      m9,  m5
393
+    pminsw      m10, m5
394
+
395
+    lea         r2, [r1*3]
396
+    mova  [r0     ], m8
397
+    mova  [r0+r1  ], m0
398
+    mova  [r0+r1*2], m1
399
+    mova  [r0+r2  ], m2
400
+    lea         r0, [r0+r1*4]
401
+    mova  [r0     ], m4
402
+    mova  [r0+r1  ], m11
403
+    mova  [r0+r1*2], m9
404
+    mova  [r0+r2  ], m10
405
+    RET
406
+%endmacro
407
+
408
+%macro signextend_sse2 3 ; dstlow, dsthigh, tmp
409
+    pxor        %3,  %3
410
+    pcmpgtw     %3,  %1
411
+    mova        %2,  %1
412
+    punpcklwd   %1,  %3
413
+    punpckhwd   %2,  %3
414
+%endmacro
415
+
416
+%macro signextend_sse4 2-3 ; dstlow, dsthigh
417
+    movhlps     %2,  %1
418
+    pmovsxwd    %1,  %1
419
+    pmovsxwd    %2,  %2
420
+%endmacro
421
+
422
+INIT_XMM
423
+%define SIGNEXTEND signextend_sse2
424
+idct_put_fn sse2, 16
425
+INIT_XMM
426
+%define SIGNEXTEND signextend_sse4
427
+idct_put_fn sse4, 16
428
+INIT_AVX
429
+idct_put_fn avx,  16
430
+
431
+%endif
... ...
@@ -360,7 +360,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
360 360
     AVIOContext *pbx;
361 361
     unsigned char *buffer = NULL;
362 362
     int buffer_size = 0;
363
-    void (*extra_func)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta**) = NULL;
363
+    const ID3v2EMFunc *extra_func;
364 364
 
365 365
     switch (version) {
366 366
     case 2:
... ...
@@ -432,7 +432,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
432 432
             av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag);
433 433
             avio_skip(s->pb, tlen);
434 434
         /* check for text tag or supported special meta tag */
435
-        } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)->read))) {
435
+        } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34)))) {
436 436
             if (unsync || tunsync) {
437 437
                 int i, j;
438 438
                 av_fast_malloc(&buffer, &buffer_size, tlen);
... ...
@@ -458,7 +458,7 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t
458 458
                 read_ttag(s, pbx, tlen, tag);
459 459
             else
460 460
                 /* parse special meta tag */
461
-                extra_func(s, pbx, tlen, tag, extra_meta);
461
+                extra_func->read(s, pbx, tlen, tag, extra_meta);
462 462
         }
463 463
         else if (!tag[0]) {
464 464
             if (tag[1])
... ...
@@ -521,11 +521,11 @@ void ff_id3v2_read(AVFormatContext *s, const char *magic)
521 521
 void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta)
522 522
 {
523 523
     ID3v2ExtraMeta *current = *extra_meta, *next;
524
-    void (*free_func)(void *);
524
+    const ID3v2EMFunc *extra_func;
525 525
 
526 526
     while (current) {
527
-        if ((free_func = get_extra_meta_func(current->tag, 1)->free))
528
-            free_func(current->data);
527
+        if ((extra_func = get_extra_meta_func(current->tag, 1)))
528
+            extra_func->free(current->data);
529 529
         next = current->next;
530 530
         av_freep(&current);
531 531
         current = next;
... ...
@@ -81,15 +81,20 @@ typedef struct MOVParseTableEntry {
81 81
 
82 82
 static const MOVParseTableEntry mov_default_parse_table[];
83 83
 
84
-static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb, unsigned len, const char *type)
84
+static int mov_metadata_track_or_disc_number(MOVContext *c, AVIOContext *pb,
85
+                                             unsigned len, const char *key)
85 86
 {
86 87
     char buf[16];
87 88
 
89
+    short current, total;
88 90
     avio_rb16(pb); // unknown
89
-    snprintf(buf, sizeof(buf), "%d", avio_rb16(pb));
90
-    av_dict_set(&c->fc->metadata, type, buf, 0);
91
-
92
-    avio_rb16(pb); // total tracks/discs
91
+    current = avio_rb16(pb);
92
+    total = avio_rb16(pb);
93
+    if (!total)
94
+        snprintf(buf, sizeof(buf), "%d", current);
95
+    else
96
+        snprintf(buf, sizeof(buf), "%d/%d", current, total);
97
+    av_dict_set(&c->fc->metadata, key, buf, 0);
93 98
 
94 99
     return 0;
95 100
 }
... ...
@@ -140,13 +145,13 @@ static int mov_read_udta_string(MOVContext *c, AVIOContext *pb, MOVAtom atom)
140 140
     const char *key = NULL;
141 141
     uint16_t str_size, langcode = 0;
142 142
     uint32_t data_type = 0;
143
-    int (*parse)(MOVContext*, AVIOContext*, unsigned, const char *) = NULL;
143
+    int (*parse)(MOVContext*, AVIOContext*, unsigned, const char*) = NULL;
144 144
 
145 145
     switch (atom.type) {
146 146
     case MKTAG(0xa9,'n','a','m'): key = "title";     break;
147 147
     case MKTAG(0xa9,'a','u','t'):
148 148
     case MKTAG(0xa9,'A','R','T'): key = "artist";    break;
149
-    case MKTAG( 'a','A','R','T'): key = "album_artist";break;
149
+    case MKTAG( 'a','A','R','T'): key = "album_artist";    break;
150 150
     case MKTAG(0xa9,'w','r','t'): key = "composer";  break;
151 151
     case MKTAG( 'c','p','r','t'):
152 152
     case MKTAG(0xa9,'c','p','y'): key = "copyright"; break;
... ...
@@ -268,7 +273,7 @@ static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom)
268 268
         int (*parse)(MOVContext*, AVIOContext*, MOVAtom) = NULL;
269 269
         a.size = atom.size;
270 270
         a.type=0;
271
-        if(atom.size >= 8) {
271
+        if (atom.size >= 8) {
272 272
             a.size = avio_rb32(pb);
273 273
             a.type = avio_rl32(pb);
274 274
         }
... ...
@@ -285,7 +290,7 @@ static int mov_read_default(MOVContext *c, AVIOContext *pb, MOVAtom atom)
285 285
                 break;
286 286
         }
287 287
         a.size -= 8;
288
-        if(a.size < 0)
288
+        if (a.size < 0)
289 289
             break;
290 290
         a.size = FFMIN(a.size, atom.size - total_size);
291 291
 
... ...
@@ -452,11 +457,11 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
452 452
 
453 453
     if     (type == MKTAG('v','i','d','e'))
454 454
         st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
455
-    else if(type == MKTAG('s','o','u','n'))
455
+    else if (type == MKTAG('s','o','u','n'))
456 456
         st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
457
-    else if(type == MKTAG('m','1','a',' '))
457
+    else if (type == MKTAG('m','1','a',' '))
458 458
         st->codec->codec_id = CODEC_ID_MP2;
459
-    else if((type == MKTAG('s','u','b','p')) || (type == MKTAG('c','l','c','p')))
459
+    else if ((type == MKTAG('s','u','b','p')) || (type == MKTAG('c','l','c','p')))
460 460
         st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
461 461
 
462 462
     avio_rb32(pb); /* component  manufacture */
... ...
@@ -553,7 +558,7 @@ static int mov_read_pasp(MOVContext *c, AVIOContext *pb, MOVAtom atom)
553 553
 /* this atom contains actual media data */
554 554
 static int mov_read_mdat(MOVContext *c, AVIOContext *pb, MOVAtom atom)
555 555
 {
556
-    if(atom.size == 0) /* wrong one (MP4) */
556
+    if (atom.size == 0) /* wrong one (MP4) */
557 557
         return 0;
558 558
     c->found_mdat=1;
559 559
     return 0; /* now go for moov */
... ...
@@ -707,7 +712,7 @@ static int mov_read_smi(MOVContext *c, AVIOContext *pb, MOVAtom atom)
707 707
         return 0;
708 708
     st = c->fc->streams[c->fc->nb_streams-1];
709 709
 
710
-    if((uint64_t)atom.size > (1<<30))
710
+    if ((uint64_t)atom.size > (1<<30))
711 711
         return -1;
712 712
 
713 713
     // currently SVQ3 decoder expect full STSD header - so let's fake it
... ...
@@ -771,10 +776,10 @@ static int mov_read_extradata(MOVContext *c, AVIOContext *pb, MOVAtom atom,
771 771
         return 0; /* unexpected codec_id - don't mess with extradata */
772 772
 
773 773
     size= (uint64_t)st->codec->extradata_size + atom.size + 8 + FF_INPUT_BUFFER_PADDING_SIZE;
774
-    if(size > INT_MAX || (uint64_t)atom.size > INT_MAX)
774
+    if (size > INT_MAX || (uint64_t)atom.size > INT_MAX)
775 775
         return -1;
776 776
     buf= av_realloc(st->codec->extradata, size);
777
-    if(!buf)
777
+    if (!buf)
778 778
         return -1;
779 779
     st->codec->extradata= buf;
780 780
     buf+= st->codec->extradata_size;
... ...
@@ -814,7 +819,7 @@ static int mov_read_wave(MOVContext *c, AVIOContext *pb, MOVAtom atom)
814 814
         return 0;
815 815
     st = c->fc->streams[c->fc->nb_streams-1];
816 816
 
817
-    if((uint64_t)atom.size > (1<<30))
817
+    if ((uint64_t)atom.size > (1<<30))
818 818
         return -1;
819 819
 
820 820
     if (st->codec->codec_id == CODEC_ID_QDM2 || st->codec->codec_id == CODEC_ID_QDMC) {
... ...
@@ -845,7 +850,7 @@ static int mov_read_glbl(MOVContext *c, AVIOContext *pb, MOVAtom atom)
845 845
         return 0;
846 846
     st = c->fc->streams[c->fc->nb_streams-1];
847 847
 
848
-    if((uint64_t)atom.size > (1<<30))
848
+    if ((uint64_t)atom.size > (1<<30))
849 849
         return -1;
850 850
 
851 851
     av_free(st->codec->extradata);
... ...
@@ -872,7 +877,7 @@ static int mov_read_strf(MOVContext *c, AVIOContext *pb, MOVAtom atom)
872 872
         return 0;
873 873
     st = c->fc->streams[c->fc->nb_streams-1];
874 874
 
875
-    if((uint64_t)atom.size > (1<<30))
875
+    if ((uint64_t)atom.size > (1<<30))
876 876
         return -1;
877 877
 
878 878
     av_free(st->codec->extradata);
... ...
@@ -901,7 +906,7 @@ static int mov_read_stco(MOVContext *c, AVIOContext *pb, MOVAtom atom)
901 901
 
902 902
     entries = avio_rb32(pb);
903 903
 
904
-    if(entries >= UINT_MAX/sizeof(int64_t))
904
+    if (entries >= UINT_MAX/sizeof(int64_t))
905 905
         return -1;
906 906
 
907 907
     sc->chunk_offsets = av_malloc(entries * sizeof(int64_t));
... ...
@@ -910,10 +915,10 @@ static int mov_read_stco(MOVContext *c, AVIOContext *pb, MOVAtom atom)
910 910
     sc->chunk_count = entries;
911 911
 
912 912
     if      (atom.type == MKTAG('s','t','c','o'))
913
-        for(i=0; i<entries; i++)
913
+        for (i=0; i<entries; i++)
914 914
             sc->chunk_offsets[i] = avio_rb32(pb);
915 915
     else if (atom.type == MKTAG('c','o','6','4'))
916
-        for(i=0; i<entries; i++)
916
+        for (i=0; i<entries; i++)
917 917
             sc->chunk_offsets[i] = avio_rb64(pb);
918 918
     else
919 919
         return -1;
... ...
@@ -967,7 +972,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
967 967
     st = c->fc->streams[c->fc->nb_streams-1];
968 968
     sc = st->priv_data;
969 969
 
970
-    for(pseudo_stream_id=0; pseudo_stream_id<entries; pseudo_stream_id++) {
970
+    for (pseudo_stream_id=0; pseudo_stream_id<entries; pseudo_stream_id++) {
971 971
         //Parsing Sample description table
972 972
         enum CodecID id;
973 973
         int dref_id = 1;
... ...
@@ -1015,9 +1020,9 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
1015 1015
                 id = ff_codec_get_id(ff_codec_bmp_tags, format);
1016 1016
             if (id > 0)
1017 1017
                 st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
1018
-            else if(st->codec->codec_type == AVMEDIA_TYPE_DATA){
1018
+            else if (st->codec->codec_type == AVMEDIA_TYPE_DATA){
1019 1019
                 id = ff_codec_get_id(ff_codec_movsubtitle_tags, format);
1020
-                if(id > 0)
1020
+                if (id > 0)
1021 1021
                     st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
1022 1022
             }
1023 1023
         }
... ...
@@ -1026,7 +1031,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
1026 1026
                 (format >> 0) & 0xff, (format >> 8) & 0xff, (format >> 16) & 0xff,
1027 1027
                 (format >> 24) & 0xff, st->codec->codec_type);
1028 1028
 
1029
-        if(st->codec->codec_type==AVMEDIA_TYPE_VIDEO) {
1029
+        if (st->codec->codec_type==AVMEDIA_TYPE_VIDEO) {
1030 1030
             unsigned int color_depth, len;
1031 1031
             int color_greyscale;
1032 1032
 
... ...
@@ -1132,7 +1137,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
1132 1132
                 }
1133 1133
                 sc->has_palette = 1;
1134 1134
             }
1135
-        } else if(st->codec->codec_type==AVMEDIA_TYPE_AUDIO) {
1135
+        } else if (st->codec->codec_type==AVMEDIA_TYPE_AUDIO) {
1136 1136
             int bits_per_sample, flags;
1137 1137
             uint16_t version = avio_rb16(pb);
1138 1138
 
... ...
@@ -1151,13 +1156,13 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
1151 1151
 
1152 1152
             //Read QT version 1 fields. In version 0 these do not exist.
1153 1153
             av_dlog(c->fc, "version =%d, isom =%d\n",version,c->isom);
1154
-            if(!c->isom) {
1155
-                if(version==1) {
1154
+            if (!c->isom) {
1155
+                if (version==1) {
1156 1156
                     sc->samples_per_frame = avio_rb32(pb);
1157 1157
                     avio_rb32(pb); /* bytes per packet */
1158 1158
                     sc->bytes_per_frame = avio_rb32(pb);
1159 1159
                     avio_rb32(pb); /* bytes per sample */
1160
-                } else if(version==2) {
1160
+                } else if (version==2) {
1161 1161
                     avio_rb32(pb); /* sizeof struct only */
1162 1162
                     st->codec->sample_rate = av_int2dbl(avio_rb64(pb)); /* float 64 */
1163 1163
                     st->codec->channels = avio_rb32(pb);
... ...
@@ -1212,7 +1217,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
1212 1212
                 st->codec->bits_per_coded_sample = bits_per_sample;
1213 1213
                 sc->sample_size = (bits_per_sample >> 3) * st->codec->channels;
1214 1214
             }
1215
-        } else if(st->codec->codec_type==AVMEDIA_TYPE_SUBTITLE){
1215
+        } else if (st->codec->codec_type==AVMEDIA_TYPE_SUBTITLE){
1216 1216
             // ttxt stsd contains display flags, justification, background
1217 1217
             // color, fonts, and default styles, so fake an atom to read it
1218 1218
             MOVAtom fake_atom = { .size = size - (avio_tell(pb) - start_pos) };
... ...
@@ -1245,7 +1250,7 @@ int ff_mov_read_stsd_entries(MOVContext *c, AVIOContext *pb, int entries)
1245 1245
             avio_skip(pb, a.size);
1246 1246
     }
1247 1247
 
1248
-    if(st->codec->codec_type==AVMEDIA_TYPE_AUDIO && st->codec->sample_rate==0 && sc->time_scale>1)
1248
+    if (st->codec->codec_type==AVMEDIA_TYPE_AUDIO && st->codec->sample_rate==0 && sc->time_scale>1)
1249 1249
         st->codec->sample_rate= sc->time_scale;
1250 1250
 
1251 1251
     /* special codec parameters handling */
... ...
@@ -1337,14 +1342,14 @@ static int mov_read_stsc(MOVContext *c, AVIOContext *pb, MOVAtom atom)
1337 1337
 
1338 1338
     av_dlog(c->fc, "track[%i].stsc.entries = %i\n", c->fc->nb_streams-1, entries);
1339 1339
 
1340
-    if(entries >= UINT_MAX / sizeof(*sc->stsc_data))
1340
+    if (entries >= UINT_MAX / sizeof(*sc->stsc_data))
1341 1341
         return -1;
1342 1342
     sc->stsc_data = av_malloc(entries * sizeof(*sc->stsc_data));
1343 1343
     if (!sc->stsc_data)
1344 1344
         return AVERROR(ENOMEM);
1345 1345
     sc->stsc_count = entries;
1346 1346
 
1347
-    for(i=0; i<entries; i++) {
1347
+    for (i=0; i<entries; i++) {
1348 1348
         sc->stsc_data[i].first = avio_rb32(pb);
1349 1349
         sc->stsc_data[i].count = avio_rb32(pb);
1350 1350
         sc->stsc_data[i].id = avio_rb32(pb);
... ...
@@ -1399,14 +1404,14 @@ static int mov_read_stss(MOVContext *c, AVIOContext *pb, MOVAtom atom)
1399 1399
 
1400 1400
     av_dlog(c->fc, "keyframe_count = %d\n", entries);
1401 1401
 
1402
-    if(entries >= UINT_MAX / sizeof(int))
1402
+    if (entries >= UINT_MAX / sizeof(int))
1403 1403
         return -1;
1404 1404
     sc->keyframes = av_malloc(entries * sizeof(int));
1405 1405
     if (!sc->keyframes)
1406 1406
         return AVERROR(ENOMEM);
1407 1407
     sc->keyframe_count = entries;
1408 1408
 
1409
-    for(i=0; i<entries; i++) {
1409
+    for (i=0; i<entries; i++) {
1410 1410
         sc->keyframes[i] = avio_rb32(pb);
1411 1411
         //av_dlog(c->fc, "keyframes[]=%d\n", sc->keyframes[i]);
1412 1412
     }
... ...
@@ -1474,7 +1479,7 @@ static int mov_read_stsz(MOVContext *c, AVIOContext *pb, MOVAtom atom)
1474 1474
 
1475 1475
     init_get_bits(&gb, buf, 8*num_bytes);
1476 1476
 
1477
-    for(i=0; i<entries; i++)
1477
+    for (i=0; i<entries; i++)
1478 1478
         sc->sample_sizes[i] = get_bits_long(&gb, field_size);
1479 1479
 
1480 1480
     av_free(buf);
... ...
@@ -1498,16 +1503,19 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
1498 1498
     avio_rb24(pb); /* flags */
1499 1499
     entries = avio_rb32(pb);
1500 1500
 
1501
-    av_dlog(c->fc, "track[%i].stts.entries = %i\n", c->fc->nb_streams-1, entries);
1501
+    av_dlog(c->fc, "track[%i].stts.entries = %i\n",
1502
+            c->fc->nb_streams-1, entries);
1503
+
1504
+    if (!entries || entries >= UINT_MAX / sizeof(*sc->stts_data))
1505
+        return AVERROR(EINVAL);
1502 1506
 
1503
-    if(entries >= UINT_MAX / sizeof(*sc->stts_data))
1504
-        return -1;
1505 1507
     sc->stts_data = av_malloc(entries * sizeof(*sc->stts_data));
1506 1508
     if (!sc->stts_data)
1507 1509
         return AVERROR(ENOMEM);
1510
+
1508 1511
     sc->stts_count = entries;
1509 1512
 
1510
-    for(i=0; i<entries; i++) {
1513
+    for (i=0; i<entries; i++) {
1511 1514
         int sample_duration;
1512 1515
         int sample_count;
1513 1516
 
... ...
@@ -1521,14 +1529,15 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
1521 1521
         sc->stts_data[i].count= sample_count;
1522 1522
         sc->stts_data[i].duration= sample_duration;
1523 1523
 
1524
-        av_dlog(c->fc, "sample_count=%d, sample_duration=%d\n",sample_count,sample_duration);
1524
+        av_dlog(c->fc, "sample_count=%d, sample_duration=%d\n",
1525
+                sample_count, sample_duration);
1525 1526
 
1526 1527
         duration+=(int64_t)sample_duration*sample_count;
1527 1528
         total_sample_count+=sample_count;
1528 1529
     }
1529 1530
 
1530 1531
     st->nb_frames= total_sample_count;
1531
-    if(duration)
1532
+    if (duration)
1532 1533
         st->duration= duration;
1533 1534
     return 0;
1534 1535
 }
... ...
@@ -1550,14 +1559,14 @@ static int mov_read_ctts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
1550 1550
 
1551 1551
     av_dlog(c->fc, "track[%i].ctts.entries = %i\n", c->fc->nb_streams-1, entries);
1552 1552
 
1553
-    if(entries >= UINT_MAX / sizeof(*sc->ctts_data))
1553
+    if (entries >= UINT_MAX / sizeof(*sc->ctts_data))
1554 1554
         return -1;
1555 1555
     sc->ctts_data = av_malloc(entries * sizeof(*sc->ctts_data));
1556 1556
     if (!sc->ctts_data)
1557 1557
         return AVERROR(ENOMEM);
1558 1558
     sc->ctts_count = entries;
1559 1559
 
1560
-    for(i=0; i<entries; i++) {
1560
+    for (i=0; i<entries; i++) {
1561 1561
         int count    =avio_rb32(pb);
1562 1562
         int duration =avio_rb32(pb);
1563 1563
 
... ...
@@ -1640,7 +1649,7 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
1640 1640
                 if (keyframe)
1641 1641
                     distance = 0;
1642 1642
                 sample_size = sc->sample_size > 0 ? sc->sample_size : sc->sample_sizes[current_sample];
1643
-                if(sc->pseudo_stream_id == -1 ||
1643
+                if (sc->pseudo_stream_id == -1 ||
1644 1644
                    sc->stsc_data[stsc_index].id - 1 == sc->pseudo_stream_id) {
1645 1645
                     AVIndexEntry *e = &st->index_entries[st->nb_index_entries++];
1646 1646
                     e->pos = current_offset;
... ...
@@ -2209,9 +2218,9 @@ static int mov_read_cmov(MOVContext *c, AVIOContext *pb, MOVAtom atom)
2209 2209
         return AVERROR(ENOMEM);
2210 2210
     }
2211 2211
     avio_read(pb, cmov_data, cmov_len);
2212
-    if(uncompress (moov_data, (uLongf *) &moov_len, (const Bytef *)cmov_data, cmov_len) != Z_OK)
2212
+    if (uncompress (moov_data, (uLongf *) &moov_len, (const Bytef *)cmov_data, cmov_len) != Z_OK)
2213 2213
         goto free_and_return;
2214
-    if(ffio_init_context(&ctx, moov_data, moov_len, 0, NULL, NULL, NULL, NULL) != 0)
2214
+    if (ffio_init_context(&ctx, moov_data, moov_len, 0, NULL, NULL, NULL, NULL) != 0)
2215 2215
         goto free_and_return;
2216 2216
     atom.type = MKTAG('m','o','o','v');
2217 2217
     atom.size = moov_len;
... ...
@@ -2240,10 +2249,10 @@ static int mov_read_elst(MOVContext *c, AVIOContext *pb, MOVAtom atom)
2240 2240
     avio_rb24(pb); /* flags */
2241 2241
     edit_count = avio_rb32(pb); /* entries */
2242 2242
 
2243
-    if((uint64_t)edit_count*12+8 > atom.size)
2243
+    if ((uint64_t)edit_count*12+8 > atom.size)
2244 2244
         return -1;
2245 2245
 
2246
-    for(i=0; i<edit_count; i++){
2246
+    for (i=0; i<edit_count; i++){
2247 2247
         int64_t time;
2248 2248
         int64_t duration;
2249 2249
         if (version == 1) {
... ...
@@ -2259,7 +2268,7 @@ static int mov_read_elst(MOVContext *c, AVIOContext *pb, MOVAtom atom)
2259 2259
         }
2260 2260
     }
2261 2261
 
2262
-    if(edit_count > 1)
2262
+    if (edit_count > 1)
2263 2263
         av_log(c->fc, AV_LOG_WARNING, "multiple edit list entries, "
2264 2264
                "a/v desync might occur, patch welcome\n");
2265 2265
 
... ...
@@ -2342,7 +2351,7 @@ static int mov_probe(AVProbeData *p)
2342 2342
 
2343 2343
     /* check file header */
2344 2344
     offset = 0;
2345
-    for(;;) {
2345
+    for (;;) {
2346 2346
         /* ignore invalid offset */
2347 2347
         if ((offset + 8) > (unsigned int)p->buf_size)
2348 2348
             return score;
... ...
@@ -2450,7 +2459,7 @@ static int mov_read_header(AVFormatContext *s, AVFormatParameters *ap)
2450 2450
 
2451 2451
     mov->fc = s;
2452 2452
     /* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */
2453
-    if(pb->seekable)
2453
+    if (pb->seekable)
2454 2454
         atom.size = avio_size(pb);
2455 2455
     else
2456 2456
         atom.size = INT64_MAX;
... ...
@@ -2661,7 +2670,7 @@ static int mov_read_close(AVFormatContext *s)
2661 2661
     }
2662 2662
 
2663 2663
     if (mov->dv_demux) {
2664
-        for(i = 0; i < mov->dv_fctx->nb_streams; i++) {
2664
+        for (i = 0; i < mov->dv_fctx->nb_streams; i++) {
2665 2665
             av_freep(&mov->dv_fctx->streams[i]->codec);
2666 2666
             av_freep(&mov->dv_fctx->streams[i]);
2667 2667
         }
... ...
@@ -31,11 +31,11 @@
31 31
 #define SMACKER_FLAG_RING_FRAME 0x01
32 32
 
33 33
 enum SAudFlags {
34
-    SMK_AUD_PACKED  = 0x80000000,
35
-    SMK_AUD_16BITS  = 0x20000000,
36
-    SMK_AUD_STEREO  = 0x10000000,
37
-    SMK_AUD_BINKAUD = 0x08000000,
38
-    SMK_AUD_USEDCT  = 0x04000000
34
+    SMK_AUD_PACKED  = 0x80,
35
+    SMK_AUD_16BITS  = 0x20,
36
+    SMK_AUD_STEREO  = 0x10,
37
+    SMK_AUD_BINKAUD = 0x08,
38
+    SMK_AUD_USEDCT  = 0x04
39 39
 };
40 40
 
41 41
 typedef struct SmackerContext {
... ...
@@ -48,6 +48,7 @@ typedef struct SmackerContext {
48 48
     uint32_t audio[7];
49 49
     uint32_t treesize;
50 50
     uint32_t mmap_size, mclr_size, full_size, type_size;
51
+    uint8_t  aflags[7];
51 52
     uint32_t rates[7];
52 53
     uint32_t pad;
53 54
     /* frame info */
... ...
@@ -129,8 +130,10 @@ static int smacker_read_header(AVFormatContext *s, AVFormatParameters *ap)
129 129
     smk->mclr_size = avio_rl32(pb);
130 130
     smk->full_size = avio_rl32(pb);
131 131
     smk->type_size = avio_rl32(pb);
132
-    for(i = 0; i < 7; i++)
133
-        smk->rates[i] = avio_rl32(pb);
132
+    for(i = 0; i < 7; i++) {
133
+        smk->rates[i]  = avio_rl24(pb);
134
+        smk->aflags[i] = avio_r8(pb);
135
+    }
134 136
     smk->pad = avio_rl32(pb);
135 137
     /* setup data */
136 138
     if(smk->frames > 0xFFFFFF) {
... ...
@@ -173,23 +176,23 @@ static int smacker_read_header(AVFormatContext *s, AVFormatParameters *ap)
173 173
     /* handle possible audio streams */
174 174
     for(i = 0; i < 7; i++) {
175 175
         smk->indexes[i] = -1;
176
-        if(smk->rates[i] & 0xFFFFFF){
176
+        if (smk->rates[i]) {
177 177
             ast[i] = av_new_stream(s, 0);
178 178
             smk->indexes[i] = ast[i]->index;
179 179
             ast[i]->codec->codec_type = AVMEDIA_TYPE_AUDIO;
180
-            if (smk->rates[i] & SMK_AUD_BINKAUD) {
180
+            if (smk->aflags[i] & SMK_AUD_BINKAUD) {
181 181
                 ast[i]->codec->codec_id = CODEC_ID_BINKAUDIO_RDFT;
182
-            } else if (smk->rates[i] & SMK_AUD_USEDCT) {
182
+            } else if (smk->aflags[i] & SMK_AUD_USEDCT) {
183 183
                 ast[i]->codec->codec_id = CODEC_ID_BINKAUDIO_DCT;
184
-            } else if (smk->rates[i] & SMK_AUD_PACKED){
184
+            } else if (smk->aflags[i] & SMK_AUD_PACKED){
185 185
                 ast[i]->codec->codec_id = CODEC_ID_SMACKAUDIO;
186 186
                 ast[i]->codec->codec_tag = MKTAG('S', 'M', 'K', 'A');
187 187
             } else {
188 188
                 ast[i]->codec->codec_id = CODEC_ID_PCM_U8;
189 189
             }
190
-            ast[i]->codec->channels = (smk->rates[i] & SMK_AUD_STEREO) ? 2 : 1;
191
-            ast[i]->codec->sample_rate = smk->rates[i] & 0xFFFFFF;
192
-            ast[i]->codec->bits_per_coded_sample = (smk->rates[i] & SMK_AUD_16BITS) ? 16 : 8;
190
+            ast[i]->codec->channels = (smk->aflags[i] & SMK_AUD_STEREO) ? 2 : 1;
191
+            ast[i]->codec->sample_rate = smk->rates[i];
192
+            ast[i]->codec->bits_per_coded_sample = (smk->aflags[i] & SMK_AUD_16BITS) ? 16 : 8;
193 193
             if(ast[i]->codec->bits_per_coded_sample == 16 && ast[i]->codec->codec_id == CODEC_ID_PCM_U8)
194 194
                 ast[i]->codec->codec_id = CODEC_ID_PCM_S16LE;
195 195
             av_set_pts_info(ast[i], 64, 1, ast[i]->codec->sample_rate
... ...
@@ -30,13 +30,13 @@
30 30
 #include "intfloat_readwrite.h"
31 31
 
32 32
 double av_int2dbl(int64_t v){
33
-    if(v+v > 0xFFEULL<<52)
33
+    if((uint64_t)v+v > 0xFFEULL<<52)
34 34
         return NAN;
35 35
     return ldexp(((v&((1LL<<52)-1)) + (1LL<<52)) * (v>>63|1), (v>>52&0x7FF)-1075);
36 36
 }
37 37
 
38 38
 float av_int2flt(int32_t v){
39
-    if(v+v > 0xFF000000U)
39
+    if((uint32_t)v+v > 0xFF000000U)
40 40
         return NAN;
41 41
     return ldexp(((v&0x7FFFFF) + (1<<23)) * (v>>31|1), (v>>23&0xFF)-150);
42 42
 }
... ...
@@ -369,7 +369,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7
369 369
     cvtps2dq      m0, m0
370 370
 %endif ; mmx/sse2/ssse3/sse4
371 371
 %ifnidn %3, X
372
-    movu [r1+r2*(4>>r2shr)], m0
372
+    mova [r1+r2*(4>>r2shr)], m0
373 373
 %else ; %3 == X
374 374
     movq   [r1+r2*4], m0
375 375
 %endif ; %3 ==/!= X