Browse code

get_bits: remove A32 variant

The A32 bitstream reader variant is only used on ARMv5 and for
Prores due to the larger bit cache this decoder requires.

In benchmarks on ARMv5 (Marvell Sheeva) with gcc 4.6, the only
statistically significant difference between ALT and A32 is
a 4% advantage for ALT in FLAC decoding. There is thus no (longer)
any reason to keep the A32 reader from this point of view.

This patch adds an option to the ALT reader increasing the bit
cache to 32 bits as required by the Prores decoder. Benchmarking
shows no significant change in speed on Intel i7. Again, the
A32 reader fails to justify its existence.

Signed-off-by: Mans Rullgard <mans@mansr.com>
(cherry picked from commit a1e98f198e9db4e5ddfc2f777014179d3d7bc4d2)

Conflicts:

libavcodec/get_bits.h

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>

Mans Rullgard authored on 2011/12/17 06:19:50
Showing 4 changed files
... ...
@@ -37,7 +37,7 @@
37 37
  * @file
38 38
  * DV codec.
39 39
  */
40
-#define ALT_BITSTREAM_READER
40
+
41 41
 #include "libavutil/pixdesc.h"
42 42
 #include "avcodec.h"
43 43
 #include "dsputil.h"
... ...
@@ -35,31 +35,11 @@
35 35
 #include "libavutil/log.h"
36 36
 #include "mathops.h"
37 37
 
38
-#if defined(ALT_BITSTREAM_READER_LE) && !defined(ALT_BITSTREAM_READER)
39
-#   define ALT_BITSTREAM_READER
40
-#endif
41
-
42
-#if !defined(A32_BITSTREAM_READER) && !defined(ALT_BITSTREAM_READER)
43
-#   if ARCH_ARM && !HAVE_FAST_UNALIGNED
44
-#       define A32_BITSTREAM_READER
45
-#   else
46
-#       define ALT_BITSTREAM_READER
47
-//#define A32_BITSTREAM_READER
48
-#   endif
49
-#endif
50
-
51 38
 /* bit input */
52 39
 /* buffer, buffer_end and size_in_bits must be present and used by every reader */
53 40
 typedef struct GetBitContext {
54 41
     const uint8_t *buffer, *buffer_end;
55
-#ifdef ALT_BITSTREAM_READER
56 42
     int index;
57
-#elif defined A32_BITSTREAM_READER
58
-    uint32_t *buffer_ptr;
59
-    uint32_t cache0;
60
-    uint32_t cache1;
61
-    int bit_count;
62
-#endif
63 43
     int size_in_bits;
64 44
 } GetBitContext;
65 45
 
... ...
@@ -122,8 +102,11 @@ LAST_SKIP_BITS(name, gb, num)
122 122
 for examples see get_bits, show_bits, skip_bits, get_vlc
123 123
 */
124 124
 
125
-#ifdef ALT_BITSTREAM_READER
125
+#ifdef LONG_BITSTREAM_READER
126
+#   define MIN_CACHE_BITS 32
127
+#else
126 128
 #   define MIN_CACHE_BITS 25
129
+#endif
127 130
 
128 131
 #   define OPEN_READER(name, gb)                \
129 132
     unsigned int name##_index = (gb)->index;    \
... ...
@@ -132,13 +115,23 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
132 132
 #   define CLOSE_READER(name, gb) (gb)->index = name##_index
133 133
 
134 134
 # ifdef ALT_BITSTREAM_READER_LE
135
+# ifdef LONG_BITSTREAM_READER
136
+#   define UPDATE_CACHE(name, gb) \
137
+    name##_cache = AV_RL64((gb)->buffer+(name##_index>>3)) >> (name##_index&0x07)
138
+# else
135 139
 #   define UPDATE_CACHE(name, gb) \
136 140
     name##_cache = AV_RL32(((const uint8_t *)(gb)->buffer)+(name##_index>>3)) >> (name##_index&0x07)
141
+# endif
137 142
 
138 143
 #   define SKIP_CACHE(name, gb, num) name##_cache >>= (num)
139 144
 # else
145
+# ifdef LONG_BITSTREAM_READER
146
+#   define UPDATE_CACHE(name, gb) \
147
+    name##_cache = AV_RB64((gb)->buffer+(name##_index >> 3)) >> (32 - (name##_index & 0x07))
148
+# else
140 149
 #   define UPDATE_CACHE(name, gb) \
141 150
     name##_cache = AV_RB32(((const uint8_t *)(gb)->buffer)+(name##_index>>3)) << (name##_index&0x07)
151
+# endif
142 152
 
143 153
 #   define SKIP_CACHE(name, gb, num) name##_cache <<= (num)
144 154
 # endif
... ...
@@ -174,80 +167,6 @@ static inline void skip_bits_long(GetBitContext *s, int n){
174 174
     s->index += n;
175 175
 }
176 176
 
177
-#elif defined A32_BITSTREAM_READER
178
-
179
-#   define MIN_CACHE_BITS 32
180
-
181
-#   define OPEN_READER(name, gb)                        \
182
-    int name##_bit_count        = (gb)->bit_count;      \
183
-    uint32_t name##_cache0      = (gb)->cache0;         \
184
-    uint32_t name##_cache1      = (gb)->cache1;         \
185
-    uint32_t *name##_buffer_ptr = (gb)->buffer_ptr
186
-
187
-#   define CLOSE_READER(name, gb) do {          \
188
-        (gb)->bit_count  = name##_bit_count;    \
189
-        (gb)->cache0     = name##_cache0;       \
190
-        (gb)->cache1     = name##_cache1;       \
191
-        (gb)->buffer_ptr = name##_buffer_ptr;   \
192
-    } while (0)
193
-
194
-#   define UPDATE_CACHE(name, gb) do {                                  \
195
-        if(name##_bit_count > 0){                                       \
196
-            const uint32_t next = av_be2ne32(*name##_buffer_ptr);       \
197
-            name##_cache0 |= NEG_USR32(next, name##_bit_count);         \
198
-            name##_cache1 |= next << name##_bit_count;                  \
199
-            name##_buffer_ptr++;                                        \
200
-            name##_bit_count -= 32;                                     \
201
-        }                                                               \
202
-    } while (0)
203
-
204
-#if ARCH_X86
205
-#   define SKIP_CACHE(name, gb, num)                            \
206
-    __asm__("shldl %2, %1, %0          \n\t"                    \
207
-            "shll  %2, %1              \n\t"                    \
208
-            : "+r" (name##_cache0), "+r" (name##_cache1)        \
209
-            : "Ic" ((uint8_t)(num)))
210
-#else
211
-#   define SKIP_CACHE(name, gb, num) do {               \
212
-        name##_cache0 <<= (num);                        \
213
-        name##_cache0 |= NEG_USR32(name##_cache1,num);  \
214
-        name##_cache1 <<= (num);                        \
215
-    } while (0)
216
-#endif
217
-
218
-#   define SKIP_COUNTER(name, gb, num) name##_bit_count += (num)
219
-
220
-#   define SKIP_BITS(name, gb, num) do {        \
221
-        SKIP_CACHE(name, gb, num);              \
222
-        SKIP_COUNTER(name, gb, num);            \
223
-    } while (0)
224
-
225
-#   define LAST_SKIP_BITS(name, gb, num)  SKIP_BITS(name, gb, num)
226
-#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
227
-
228
-#   define SHOW_UBITS(name, gb, num) NEG_USR32(name##_cache0, num)
229
-
230
-#   define SHOW_SBITS(name, gb, num) NEG_SSR32(name##_cache0, num)
231
-
232
-#   define GET_CACHE(name, gb) name##_cache0
233
-
234
-static inline int get_bits_count(const GetBitContext *s) {
235
-    return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
236
-}
237
-
238
-static inline void skip_bits_long(GetBitContext *s, int n){
239
-    OPEN_READER(re, s);
240
-    re_bit_count += n;
241
-    re_buffer_ptr += re_bit_count>>5;
242
-    re_bit_count &= 31;
243
-    re_cache0 = av_be2ne32(re_buffer_ptr[-1]) << re_bit_count;
244
-    re_cache1 = 0;
245
-    UPDATE_CACHE(re, s);
246
-    CLOSE_READER(re, s);
247
-}
248
-
249
-#endif
250
-
251 177
 /**
252 178
  * read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
253 179
  * if MSB not set it is negative
... ...
@@ -309,7 +228,6 @@ static inline void skip_bits(GetBitContext *s, int n){
309 309
 }
310 310
 
311 311
 static inline unsigned int get_bits1(GetBitContext *s){
312
-#ifdef ALT_BITSTREAM_READER
313 312
     unsigned int index = s->index;
314 313
     uint8_t result = s->buffer[index>>3];
315 314
 #ifdef ALT_BITSTREAM_READER_LE
... ...
@@ -323,9 +241,6 @@ static inline unsigned int get_bits1(GetBitContext *s){
323 323
     s->index = index;
324 324
 
325 325
     return result;
326
-#else
327
-    return get_bits(s, 1);
328
-#endif
329 326
 }
330 327
 
331 328
 static inline unsigned int show_bits1(GetBitContext *s){
... ...
@@ -400,13 +315,7 @@ static inline void init_get_bits(GetBitContext *s,
400 400
     s->buffer       = buffer;
401 401
     s->size_in_bits = bit_size;
402 402
     s->buffer_end   = buffer + buffer_size;
403
-#ifdef ALT_BITSTREAM_READER
404 403
     s->index        = 0;
405
-#elif defined A32_BITSTREAM_READER
406
-    s->buffer_ptr   = (uint32_t*)((intptr_t)buffer & ~3);
407
-    s->bit_count    = 32 +     8*((intptr_t)buffer &  3);
408
-    skip_bits_long(s, 0);
409
-#endif
410 404
 }
411 405
 
412 406
 static inline void align_get_bits(GetBitContext *s)
... ...
@@ -35,7 +35,6 @@
35 35
 #include <stddef.h>
36 36
 #include <stdio.h>
37 37
 
38
-#define ALT_BITSTREAM_READER
39 38
 #include "avcodec.h"
40 39
 #include "get_bits.h"
41 40
 #include "dsputil.h"
... ...
@@ -28,7 +28,7 @@
28 28
  * @see http://wiki.multimedia.cx/index.php?title=Apple_ProRes
29 29
  */
30 30
 
31
-#define A32_BITSTREAM_READER // some ProRes vlc codes require up to 28 bits to be read at once
31
+#define LONG_BITSTREAM_READER // some ProRes vlc codes require up to 28 bits to be read at once
32 32
 
33 33
 #include <stdint.h>
34 34