Browse code

optimization

Originally committed as revision 3639 to svn://svn.ffmpeg.org/ffmpeg/trunk

Michael Niedermayer authored on 2004/10/26 12:12:21
Showing 2 changed files
... ...
@@ -69,6 +69,25 @@ const uint8_t ff_h264_lps_state[64]= {
69 69
  36,36,37,37,37,38,38,63,
70 70
 };
71 71
 
72
+const uint8_t ff_h264_norm_shift[256]= {
73
+ 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,
74
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
75
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
76
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
77
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
78
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
79
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
80
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
81
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
82
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
83
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
84
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
85
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
86
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
87
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
88
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
89
+};
90
+
72 91
 /**
73 92
  *
74 93
  * @param buf_size size of buf in bits
... ...
@@ -95,10 +114,14 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
95 95
     c->bytestream= buf;
96 96
     c->bytestream_end= buf + buf_size;
97 97
 
98
-    c->low= *c->bytestream++;
99
-    c->low= (c->low<<9) + ((*c->bytestream++)<<1);
100
-    c->range= 0x1FE00;
101
-    c->bits_left= 7;
98
+#if CABAC_BITS == 16
99
+    c->low =  (*c->bytestream++)<<18;
100
+    c->low+=  (*c->bytestream++)<<10;
101
+#else
102
+    c->low =  (*c->bytestream++)<<10;
103
+#endif
104
+    c->low+= ((*c->bytestream++)<<2) + 2;
105
+    c->range= 0x1FE<<(CABAC_BITS + 1);
102 106
 }
103 107
 
104 108
 void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4], 
... ...
@@ -107,8 +130,8 @@ void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4],
107 107
     
108 108
     for(i=0; i<state_count; i++){
109 109
         for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
110
-            c->lps_range[2*i+0][j]=
111
-            c->lps_range[2*i+1][j]= lps_range[i][j];
110
+            c->lps_range[2*i+0][j+4]=
111
+            c->lps_range[2*i+1][j+4]= lps_range[i][j];
112 112
         }
113 113
 
114 114
         c->mps_state[2*i+0]= 2*mps_state[i];
... ...
@@ -27,6 +27,9 @@
27 27
 #undef NDEBUG
28 28
 #include <assert.h>
29 29
 
30
+#define CABAC_BITS 8
31
+#define CABAC_MASK ((1<<CABAC_BITS)-1)
32
+
30 33
 typedef struct CABACContext{
31 34
     int low;
32 35
     int range;
... ...
@@ -34,19 +37,20 @@ typedef struct CABACContext{
34 34
 #ifdef STRICT_LIMITS
35 35
     int symCount;
36 36
 #endif
37
-    uint8_t lps_range[2*64][4];   ///< rangeTabLPS
37
+    uint8_t lps_range[2*65][4];   ///< rangeTabLPS
38 38
     uint8_t lps_state[2*64];      ///< transIdxLPS
39 39
     uint8_t mps_state[2*64];      ///< transIdxMPS
40 40
     const uint8_t *bytestream_start;
41 41
     const uint8_t *bytestream;
42 42
     const uint8_t *bytestream_end;
43
-    int bits_left;                ///<
44 43
     PutBitContext pb;
45 44
 }CABACContext;
46 45
 
47 46
 extern const uint8_t ff_h264_lps_range[64][4];
48 47
 extern const uint8_t ff_h264_mps_state[64];
49 48
 extern const uint8_t ff_h264_lps_state[64];
49
+extern const uint8_t ff_h264_norm_shift[256];
50
+
50 51
 
51 52
 void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
52 53
 void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
... ...
@@ -80,7 +84,7 @@ static inline void renorm_cabac_encoder(CABACContext *c){
80 80
 }
81 81
 
82 82
 static inline void put_cabac(CABACContext *c, uint8_t * const state, int bit){
83
-    int RangeLPS= c->lps_range[*state][((c->range)>>6)&3];
83
+    int RangeLPS= c->lps_range[*state][c->range>>6];
84 84
     
85 85
     if(bit == ((*state)&1)){
86 86
         c->range -= RangeLPS;
... ...
@@ -249,63 +253,101 @@ static inline void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int ma
249 249
     }
250 250
 }
251 251
 
252
+static void refill(CABACContext *c){
253
+    if(c->bytestream < c->bytestream_end)
254
+#if CABAC_BITS == 16
255
+        c->low+= ((c->bytestream[0]<<9) + (c->bytestream[1])<<1);
256
+#else
257
+        c->low+= c->bytestream[0]<<1;
258
+#endif
259
+    c->low -= CABAC_MASK;
260
+    c->bytestream+= CABAC_BITS/8;
261
+}
262
+
263
+static void refill2(CABACContext *c){
264
+    int i, x;
265
+
266
+    x= c->low ^ (c->low-1);
267
+    i= 8 - ff_h264_norm_shift[x>>(CABAC_BITS+1)];
268
+
269
+    x= -CABAC_MASK;
270
+    
271
+    if(c->bytestream < c->bytestream_end)
272
+#if CABAC_BITS == 16
273
+        x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
274
+#else
275
+        x+= c->bytestream[0]<<1;
276
+#endif
277
+    
278
+    c->low += x<<i;
279
+    c->bytestream+= CABAC_BITS/8;
280
+}
281
+
282
+
252 283
 static inline void renorm_cabac_decoder(CABACContext *c){
253
-    while(c->range < 0x10000){
284
+    while(c->range < (0x200 << CABAC_BITS)){
254 285
         c->range+= c->range;
255 286
         c->low+= c->low;
256
-        if(--c->bits_left == 0){
257
-            if(c->bytestream < c->bytestream_end)
258
-                c->low+= *c->bytestream;
259
-            c->bytestream++;
260
-            c->bits_left= 8;
261
-        }
287
+        if(!(c->low & CABAC_MASK))
288
+            refill(c);
262 289
     }
263 290
 }
264 291
 
292
+static inline void renorm_cabac_decoder_once(CABACContext *c){
293
+    int mask= (c->range - (0x200 << CABAC_BITS))>>31;
294
+    c->range+= c->range&mask;
295
+    c->low  += c->low  &mask;
296
+    if(!(c->low & CABAC_MASK))
297
+        refill(c);
298
+}
299
+
265 300
 static inline int get_cabac(CABACContext *c, uint8_t * const state){
266
-    int RangeLPS= c->lps_range[*state][((c->range)>>14)&3]<<8;
267
-    int bit;
301
+    int RangeLPS= c->lps_range[*state][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1);
302
+    int bit, lps_mask;
268 303
     
269 304
     c->range -= RangeLPS;
305
+#if 1
270 306
     if(c->low < c->range){
271 307
         bit= (*state)&1;
272 308
         *state= c->mps_state[*state];
309
+        renorm_cabac_decoder_once(c);
273 310
     }else{
311
+//        int shift= ff_h264_norm_shift[RangeLPS>>17];
274 312
         bit= ((*state)&1)^1;
275 313
         c->low -= c->range;
276
-        c->range = RangeLPS;
277 314
         *state= c->lps_state[*state];
315
+        c->range = RangeLPS;
316
+        renorm_cabac_decoder(c);
317
+/*        c->range = RangeLPS<<shift;
318
+        c->low <<= shift;
319
+        if(!(c->low & 0xFFFF)){
320
+            refill2(c);
321
+        }*/
278 322
     }
279
-    renorm_cabac_decoder(c);
323
+#else
324
+    lps_mask= (c->range - c->low)>>31;
280 325
     
281
-    return bit;    
282
-}
283
-
284
-static inline int get_cabac_static(CABACContext *c, int RangeLPS){
285
-    int bit;
326
+    c->low -= c->range & lps_mask;
327
+    c->range += (RangeLPS - c->range) & lps_mask;
286 328
     
287
-    c->range -= RangeLPS;
288
-    if(c->low < c->range){
289
-        bit= 0;
290
-    }else{
291
-        bit= 1;
292
-        c->low -= c->range;
293
-        c->range = RangeLPS;
294
-    }
295
-    renorm_cabac_decoder(c);
329
+    bit= ((*state)^lps_mask)&1;
330
+    *state= c->mps_state[(*state) - (128&lps_mask)];
296 331
     
332
+    lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+2)];
333
+    c->range<<= lps_mask;
334
+    c->low  <<= lps_mask;
335
+    if(!(c->low & CABAC_MASK))
336
+        refill2(c);
337
+#endif
338
+
297 339
     return bit;    
298 340
 }
299 341
 
300 342
 static inline int get_cabac_bypass(CABACContext *c){
301 343
     c->low += c->low;
302 344
 
303
-    if(--c->bits_left == 0){
304
-        if(c->bytestream < c->bytestream_end)
305
-            c->low+= *c->bytestream;
306
-        c->bytestream++;
307
-        c->bits_left= 8;
308
-    }
345
+    if(!(c->low & CABAC_MASK))
346
+        refill(c);
309 347
     
310 348
     if(c->low < c->range){
311 349
         return 0;
... ...
@@ -320,9 +362,9 @@ static inline int get_cabac_bypass(CABACContext *c){
320 320
  * @return the number of bytes read or 0 if no end
321 321
  */
322 322
 static inline int get_cabac_terminate(CABACContext *c){
323
-    c->range -= 2<<8;
323
+    c->range -= 4<<CABAC_BITS;
324 324
     if(c->low < c->range){
325
-        renorm_cabac_decoder(c);    
325
+        renorm_cabac_decoder_once(c);
326 326
         return 0;
327 327
     }else{
328 328
         return c->bytestream - c->bytestream_start;