Browse code

* Detect "retrict" keyword * Use "restrict" in some dsputil routines (yields large speedup)

Originally committed as revision 716 to svn://svn.ffmpeg.org/ffmpeg/trunk

Falk Hüffner authored on 2002/07/04 10:47:32
Showing 2 changed files
... ...
@@ -191,6 +191,15 @@ EOF
191 191
 $cc -o $TMPO $TMPC 2> /dev/null || _memalign=no
192 192
 fi
193 193
 
194
+_restrict=
195
+for restrict_keyword in restrict __restrict__ __restrict; do
196
+  echo "void foo(char * $restrict_keyword p);" > $TMPC
197
+  if $cc -c -o $TMPO $TMPC 2> /dev/null; then
198
+    _restrict=$restrict_keyword
199
+    break;
200
+  fi
201
+done
202
+
194 203
 if test x"$1" = x"-h" -o x"$1" = x"--help" ; then
195 204
 cat << EOF
196 205
 
... ...
@@ -351,6 +360,8 @@ if test "$simpleidct" = "yes" ; then
351 351
   echo "#define SIMPLE_IDCT 1" >> $TMPH
352 352
 fi
353 353
 
354
+echo "#define restrict $_restrict" >> $TMPH
355
+
354 356
 # build tree in object directory if source path is different from current one
355 357
 if test "$source_path_used" = "yes" ; then
356 358
     DIRS="libav libavcodec libavcodec/alpha libavcodec/armv4l libavcodec/i386 \
... ...
@@ -21,6 +21,12 @@
21 21
 #include "avcodec.h"
22 22
 #include "dsputil.h"
23 23
 #include "simple_idct.h"
24
+#include "config.h"
25
+
26
+/* Suppress restrict if it was not defined in config.h  */
27
+#ifndef restrict
28
+#define restrict
29
+#endif
24 30
 
25 31
 void (*ff_idct)(DCTELEM *block);
26 32
 void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block);
... ...
@@ -159,96 +165,86 @@ static void build_zigzag_end(void)
159 159
     }
160 160
 }
161 161
 
162
-void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
162
+void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
163 163
 {
164
-    DCTELEM *p;
165
-    const UINT8 *pix;
166 164
     int i;
167 165
 
168 166
     /* read the pixels */
169
-    p = block;
170
-    pix = pixels;
171 167
     for(i=0;i<8;i++) {
172
-        p[0] = pix[0];
173
-        p[1] = pix[1];
174
-        p[2] = pix[2];
175
-        p[3] = pix[3];
176
-        p[4] = pix[4];
177
-        p[5] = pix[5];
178
-        p[6] = pix[6];
179
-        p[7] = pix[7];
180
-        pix += line_size;
181
-        p += 8;
168
+        block[0] = pixels[0];
169
+        block[1] = pixels[1];
170
+        block[2] = pixels[2];
171
+        block[3] = pixels[3];
172
+        block[4] = pixels[4];
173
+        block[5] = pixels[5];
174
+        block[6] = pixels[6];
175
+        block[7] = pixels[7];
176
+        pixels += line_size;
177
+        block += 8;
182 178
     }
183 179
 }
184 180
 
185
-void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride){
186
-    DCTELEM *p;
181
+void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2,
182
+		   int stride){
187 183
     int i;
188 184
 
189 185
     /* read the pixels */
190
-    p = block;
191 186
     for(i=0;i<8;i++) {
192
-        p[0] = s1[0] - s2[0];
193
-        p[1] = s1[1] - s2[1];
194
-        p[2] = s1[2] - s2[2];
195
-        p[3] = s1[3] - s2[3];
196
-        p[4] = s1[4] - s2[4];
197
-        p[5] = s1[5] - s2[5];
198
-        p[6] = s1[6] - s2[6];
199
-        p[7] = s1[7] - s2[7];
187
+        block[0] = s1[0] - s2[0];
188
+        block[1] = s1[1] - s2[1];
189
+        block[2] = s1[2] - s2[2];
190
+        block[3] = s1[3] - s2[3];
191
+        block[4] = s1[4] - s2[4];
192
+        block[5] = s1[5] - s2[5];
193
+        block[6] = s1[6] - s2[6];
194
+        block[7] = s1[7] - s2[7];
200 195
         s1 += stride;
201 196
         s2 += stride;
202
-        p += 8;
197
+        block += 8;
203 198
     }
204 199
 }
205 200
 
206 201
 
207
-void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
202
+void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
203
+                          int line_size)
208 204
 {
209
-    const DCTELEM *p;
210
-    UINT8 *pix;
211 205
     int i;
212 206
     UINT8 *cm = cropTbl + MAX_NEG_CROP;
213 207
     
214 208
     /* read the pixels */
215
-    p = block;
216
-    pix = pixels;
217 209
     for(i=0;i<8;i++) {
218
-        pix[0] = cm[p[0]];
219
-        pix[1] = cm[p[1]];
220
-        pix[2] = cm[p[2]];
221
-        pix[3] = cm[p[3]];
222
-        pix[4] = cm[p[4]];
223
-        pix[5] = cm[p[5]];
224
-        pix[6] = cm[p[6]];
225
-        pix[7] = cm[p[7]];
226
-        pix += line_size;
227
-        p += 8;
210
+        pixels[0] = cm[block[0]];
211
+        pixels[1] = cm[block[1]];
212
+        pixels[2] = cm[block[2]];
213
+        pixels[3] = cm[block[3]];
214
+        pixels[4] = cm[block[4]];
215
+        pixels[5] = cm[block[5]];
216
+        pixels[6] = cm[block[6]];
217
+        pixels[7] = cm[block[7]];
218
+
219
+        pixels += line_size;
220
+        block += 8;
228 221
     }
229 222
 }
230 223
 
231
-void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
224
+void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
225
+                          int line_size)
232 226
 {
233
-    const DCTELEM *p;
234
-    UINT8 *pix;
235 227
     int i;
236 228
     UINT8 *cm = cropTbl + MAX_NEG_CROP;
237 229
     
238 230
     /* read the pixels */
239
-    p = block;
240
-    pix = pixels;
241 231
     for(i=0;i<8;i++) {
242
-        pix[0] = cm[pix[0] + p[0]];
243
-        pix[1] = cm[pix[1] + p[1]];
244
-        pix[2] = cm[pix[2] + p[2]];
245
-        pix[3] = cm[pix[3] + p[3]];
246
-        pix[4] = cm[pix[4] + p[4]];
247
-        pix[5] = cm[pix[5] + p[5]];
248
-        pix[6] = cm[pix[6] + p[6]];
249
-        pix[7] = cm[pix[7] + p[7]];
250
-        pix += line_size;
251
-        p += 8;
232
+        pixels[0] = cm[pixels[0] + block[0]];
233
+        pixels[1] = cm[pixels[1] + block[1]];
234
+        pixels[2] = cm[pixels[2] + block[2]];
235
+        pixels[3] = cm[pixels[3] + block[3]];
236
+        pixels[4] = cm[pixels[4] + block[4]];
237
+        pixels[5] = cm[pixels[5] + block[5]];
238
+        pixels[6] = cm[pixels[6] + block[6]];
239
+        pixels[7] = cm[pixels[7] + block[7]];
240
+        pixels += line_size;
241
+        block += 8;
252 242
     }
253 243
 }
254 244