Browse code

altivec support for snow

Originally committed as revision 5228 to svn://svn.ffmpeg.org/ffmpeg/trunk

Luca Barbato authored on 2006/03/27 21:51:19
Showing 5 changed files
... ...
@@ -212,3 +212,4 @@ BeOS                                    Francois Revol
212 212
 i386                                    Michael Niedermayer
213 213
 Mac OS X / PowerPC                      Romain Dolbeau
214 214
 Amiga / PowerPC                         Colin Ward
215
+Linux / PowerPC                         Luca Barbato
... ...
@@ -388,7 +388,7 @@ endif
388 388
 ifeq ($(TARGET_ALTIVEC),yes)
389 389
 OBJS += ppc/dsputil_altivec.o ppc/mpegvideo_altivec.o ppc/idct_altivec.o \
390 390
         ppc/fft_altivec.o ppc/gmc_altivec.o ppc/fdct_altivec.o \
391
-        ppc/dsputil_h264_altivec.o
391
+        ppc/dsputil_h264_altivec.o ppc/dsputil_snow_altivec.o
392 392
 endif
393 393
 
394 394
 ifeq ($(TARGET_ARCH_SH4),yes)
... ...
@@ -21,35 +21,6 @@
21 21
 #include "../snow.h"
22 22
 #include "mmx.h"
23 23
 
24
-static void always_inline snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
25
-    (*i) = (width) - 2;
26
-
27
-    if (width & 1){
28
-        low[(*i)+1] = low[((*i)+1)>>1];
29
-        (*i)--;
30
-    }
31
-}
32
-
33
-static void always_inline snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
34
-    for(; i<w; i++){
35
-        dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift);
36
-    }
37
-
38
-    if((width^lift_high)&1){
39
-        dst[w] = src[w] - ((mul * 2 * ref[w] + add) >> shift);
40
-    }
41
-}
42
-
43
-static void always_inline snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
44
-        for(; i<w; i++){
45
-            dst[i] = src[i] - (((-(ref[i] + ref[(i+1)])+W_BO) - 4 * src[i]) >> W_BS);
46
-        }
47
-
48
-        if(width&1){
49
-            dst[w] = src[w] - (((-2 * ref[w] + W_BO) - 4 * src[w]) >> W_BS);
50
-        }
51
-}
52
-
53 24
 void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width){
54 25
     const int w2= (width+1)>>1;
55 26
     // SSE2 code runs faster with pointers aligned on a 32-byte boundary.
... ...
@@ -30,6 +30,17 @@ extern void fdct_altivec(int16_t *block);
30 30
 extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
31 31
 extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
32 32
 
33
+extern void ff_snow_horizontal_compose97i_altivec(DWTELEM *b, int width);
34
+extern void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1,
35
+                                                DWTELEM *b2, DWTELEM *b3,
36
+                                                DWTELEM *b4, DWTELEM *b5,
37
+                                                int width);
38
+extern void ff_snow_inner_add_yblock_altivec(uint8_t *obmc, const int obmc_stride,
39
+                                          uint8_t * * block, int b_w, int b_h,
40
+                                          int src_x, int src_y, int src_stride,
41
+                                          slice_buffer * sb, int add,
42
+                                          uint8_t * dst8);
43
+
33 44
 int mm_flags = 0;
34 45
 
35 46
 int mm_support(void)
... ...
@@ -297,6 +308,11 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
297 297
         c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
298 298
 #endif
299 299
 
300
+
301
+        c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec;
302
+        c->vertical_compose97i = ff_snow_vertical_compose97i_altivec;
303
+        c->inner_add_yblock = ff_snow_inner_add_yblock_altivec;
304
+
300 305
 #ifdef CONFIG_ENCODERS
301 306
         if (avctx->dct_algo == FF_DCT_AUTO ||
302 307
             avctx->dct_algo == FF_DCT_ALTIVEC)
... ...
@@ -120,4 +120,43 @@ extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, D
120 120
 extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width);
121 121
 extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
122 122
 
123
+
124
+/* C bits used by mmx/sse2/altivec */
125
+
126
+static always_inline void snow_interleave_line_header(int * i, int width, DWTELEM * low, DWTELEM * high){
127
+    (*i) = (width) - 2;
128
+
129
+    if (width & 1){
130
+        low[(*i)+1] = low[((*i)+1)>>1];
131
+        (*i)--;
132
+    }
133
+}
134
+
135
+static always_inline void snow_interleave_line_footer(int * i, DWTELEM * low, DWTELEM * high){
136
+    for (; (*i)>=0; (*i)-=2){
137
+        low[(*i)+1] = high[(*i)>>1];
138
+        low[*i] = low[(*i)>>1];
139
+    }
140
+}
141
+
142
+static always_inline void snow_horizontal_compose_lift_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w, int lift_high, int mul, int add, int shift){
143
+    for(; i<w; i++){
144
+        dst[i] = src[i] - ((mul * (ref[i] + ref[i + 1]) + add) >> shift);
145
+    }
146
+
147
+    if((width^lift_high)&1){
148
+        dst[w] = src[w] - ((mul * 2 * ref[w] + add) >> shift);
149
+    }
150
+}
151
+
152
+static always_inline void snow_horizontal_compose_liftS_lead_out(int i, DWTELEM * dst, DWTELEM * src, DWTELEM * ref, int width, int w){
153
+        for(; i<w; i++){
154
+            dst[i] = src[i] - (((-(ref[i] + ref[(i+1)])+W_BO) - 4 * src[i]) >> W_BS);
155
+        }
156
+
157
+        if(width&1){
158
+            dst[w] = src[w] - (((-2 * ref[w] + W_BO) - 4 * src[w]) >> W_BS);
159
+        }
160
+}
161
+
123 162
 #endif